Changeset 932 for trunk/yat/statistics


Ignore:
Timestamp:
Oct 5, 2007, 11:03:46 PM (16 years ago)
Author:
Peter
Message:

median and percentile functions now take iterators rather than vectors

Location:
trunk/yat/statistics
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/yat/statistics/VectorFunction.cc

    r865 r932  
    5454  {
    5555    assert(vec.size());
    56     return median(vec);
     56    return median(vec.begin(), vec.end());
    5757  }
    5858 
  • trunk/yat/statistics/WilcoxonFoldChange.cc

    r865 r932  
    5656    }
    5757    if (absolute_)
    58       return fabs(median(distance));
    59     return median(distance);
     58      return fabs(median(distance.begin(), distance.end()));
     59    return median(distance.begin(), distance.end());
    6060  }
    6161
  • trunk/yat/statistics/utility.cc

    r865 r932  
    5050  double mad(const utility::vector& vec, const bool sorted)
    5151  {
    52     double m = median(vec, sorted);
     52    double m = median(vec.begin(), vec.end(), sorted);
    5353    std::vector<double> ad;
    5454    ad.reserve(vec.size());
     
    5656      ad.push_back(fabs(vec[i]-m));
    5757    std::sort(ad.begin(), ad.end());
    58     return median(ad,true);
     58    return median(ad.begin(), ad.end(),true);
    5959  }
    6060 
    61 
    62   double median(const utility::vector& vec, const bool sorted)
    63   {
    64     if (!sorted){
    65       utility::vector vec_copy(vec);
    66       utility::sort(vec_copy);
    67       return gsl_stats_median_from_sorted_data (vec_copy.gsl_vector_p()->data,
    68                                                 vec_copy.gsl_vector_p()->stride,
    69                                                 vec_copy.gsl_vector_p()->size);
    70     }
    71     return gsl_stats_median_from_sorted_data (vec.gsl_vector_p()->data,
    72                                               vec.gsl_vector_p()->stride,
    73                                               vec.gsl_vector_p()->size);
    74   }
    75 
    76   double percentile(const utility::vector& vec, const double p,
    77                     const bool sorted)
    78   {
    79     if (!sorted){
    80       utility::vector vec_c(vec);
    81       utility::sort(vec_c);
    82       return gsl_stats_quantile_from_sorted_data(vec_c.gsl_vector_p()->data,
    83                                                  vec_c.gsl_vector_p()->stride,
    84                                                  vec_c.gsl_vector_p()->size,
    85                                                  p);
    86     }
    87     return gsl_stats_quantile_from_sorted_data (vec.gsl_vector_p()->data,
    88                                               vec.gsl_vector_p()->stride,
    89                                               vec.gsl_vector_p()->size,
    90                                               p);
    91   }
    9261
    9362  double skewness(const utility::vector& v)
  • trunk/yat/statistics/utility.h

    r865 r932  
    3131#include "yat/classifier/Target.h"
    3232#include "yat/utility/vector.h"
     33#include "yat/utility/yat_assert.h"
    3334
    3435#include <algorithm>
     
    4445  //forward declarations
    4546  template <class T>
    46   double median(const std::vector<T>& v, const bool sorted=false);
     47  double median(T first, T last, const bool sorted=false);
    4748
    4849  template <class T>
    49   double percentile(const std::vector<T>& vec, const double p,
    50                     const bool sorted=false);
     50  double percentile(T first, T last, double p, bool sorted=false);
    5151 
    5252  /**
     
    113113      ad.push_back(fabs(vec[i]-m));
    114114    std::sort(ad.begin(), ad.end());
    115     return median(ad,true);
     115    return median(ad.begin(), ad.end(),true);
    116116  }
    117117 
     
    126126  /// Median is defined to be value in the middle. If number of values
    127127  /// is even median is the average of the two middle values.  the
    128   /// median value is given by p equal to 50. If @a sorted is false
    129   /// (default), the vector is copied, the copy is sorted, and then
     128  /// median value is given by p equal to 50. If \a sorted is false
     129  /// (default), the range is copied, the copy is sorted, and then
    130130  /// used to calculate the median.
    131131  ///
    132   /// @return median
     132  /// Requirements: T should be an iterator over a range of doubles (or
     133  /// any type being convertable to double). If \a sorted is false
     134  /// iterator must be mutable, else read-only iterator is also ok.
    133135  ///
    134   /// @note interface will change
     136  /// @return median of range
    135137  ///
    136138  template <class T>
    137   double median(const std::vector<T>& v, const bool sorted=false)
    138   { return percentile(v, 50.0, sorted); }
    139 
    140   ///
    141   /// Median is defined to be value in the middle. If number of values
    142   /// is even median is the average of the two middle values. If @a
    143   /// sorted is true, the function assumes vector @a vec to be
    144   /// sorted. If @a sorted is false, the vector is copied, the copy is
    145   /// sorted (default), and then used to calculate the median.
    146   ///
    147   /// @return median
    148   ///
    149   double median(const utility::vector& vec, const bool sorted=false);
     139  double median(T first, T last, const bool sorted=false)
     140  { return percentile(first, last, 50.0, sorted); }
    150141
    151142  /**
     
    159150     is false (default), the vector is copied, the copy is sorted,
    160151     and then used to calculate the median.
     152
     153     Requirements: T should be an iterator over a range of doubles (or
     154     any type being convertable to double). If \a sorted is false
     155     iterator must be mutable, else read-only iterator is also ok.
    161156     
    162      @return \a p'th percentile
     157     @return \a p'th percentile of range
    163158  */
    164159  template <class T>
    165   double percentile(const std::vector<T>& vec, const double p,
    166                     const bool sorted=false)
     160  double percentile(T first, T last, double p, bool sorted=false)
    167161  {
     162    yat_assert(first<last && "range is invalid");
     163    yat_assert(p>=0);
     164    yat_assert(p<=100);
    168165    if (sorted){
    169166      if (p>=100)
    170         return vec.back();
    171       double j = p/100 * (vec.size()-1);
     167        return *(--last);
     168      double j = p/100 * (std::distance(first,last)-1);
    172169      int i = static_cast<int>(j);
    173       return (1-j+floor(j))*vec[i] + (j-floor(j))*vec[i+1];
     170      return (1-j+floor(j))*first[i] + (j-floor(j))*first[i+1];
    174171    }
    175     if (p==100)
    176       return  *std::max_element(vec.begin(),vec.end());
    177     std::vector<T> v_copy(vec);
     172
     173    std::vector<double> v_copy;
     174    v_copy.reserve(std::distance(first,last));
     175    std::copy(first, last, std::back_inserter(v_copy));
    178176    double j = p/100 * (v_copy.size()-1);
    179177    int i = static_cast<int>(j);
    180178    std::partial_sort(v_copy.begin(),v_copy.begin()+i+2 , v_copy.end());
    181     return (1-j+floor(j))*v_copy[i] + (j-floor(j))*v_copy[i+1];
    182  
     179    return percentile(v_copy.begin(), v_copy.end(), p, true);
    183180  }
    184 
    185   /**
    186      The percentile is determined by the \a p, a number between 0 and
    187      100. The percentile is found by interpolation, using the formula
    188      \f$ percentile = (1 - \delta) x_i + \delta x_{i+1} \f$ where \a
    189      p is floor\f$((n - 1)p/100)\f$ and \f$ \delta \f$ is \f$
    190      (n-1)p/100 - i \f$.Thus the minimum value of the vector is given
    191      by p equal to zero, the maximum is given by p equal to 100 and
    192      the median value is given by p equal to 50. If @a sorted
    193      is false (default), the vector is copied, the copy is sorted,
    194      and then used to calculate the median.
    195      
    196      @return \a p'th percentile
    197   */
    198   double percentile(const utility::vector& vec, const double,
    199                     const bool sorted=false);
    200181
    201182  ///
Note: See TracChangeset for help on using the changeset viewer.