Changeset 502


Ignore:
Timestamp:
Jan 30, 2006, 11:05:20 PM (17 years ago)
Author:
Peter
Message:

improved median and percentile functions

Location:
trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/classifier/ConsensusInputRanker.cc

    r500 r502  
    4343        ranks[j]=input_rankers_[j].rank(i);
    4444      }
    45       medians[i].first = statistics::median_TMP(ranks);
     45      medians[i].first = statistics::median(ranks);
    4646      medians[i].second = i;
    4747    }
  • trunk/lib/statistics/WilcoxonFoldChange.cc

    r500 r502  
    3535    }
    3636    if (absolute_)
    37       return fabs(median_TMP(distance));
    38     return median_TMP(distance);
     37      return fabs(median(distance));
     38    return median(distance);
    3939  }
    4040
  • trunk/lib/statistics/utility.h

    r500 r502  
    77
    88#include <algorithm>
     9#include <cassert>
    910#include <cmath>
    1011#include <vector>
     
    2728
    2829  ///
    29   /// The percentile is determined by the \a p, a number between 0
    30   /// and 100. The percentile is found by interpolation, using the
    31   /// formula \f$ percentile = (1 - \delta) x_i + \delta x_{i+1} \f$
    32   /// where \a p is floor\f$((n - 1)p/100)\f$ and \f$ \delta \f$ is
    33   /// \f$ (n-1)p/100 - i \f$.Thus the minimum value of the vector is
    34   /// given by p equal to zero, the maximum is given by p equal to
    35   /// 100 and the median value is given by p equal to 50.
     30  /// The percentile is determined by the \a p, a number between 0 and
     31  /// 100. The percentile is found by interpolation, using the formula
     32  /// \f$ percentile = (1 - \delta) x_i + \delta x_{i+1} \f$ where \a
     33  /// p is floor\f$((n - 1)p/100)\f$ and \f$ \delta \f$ is \f$
     34  /// (n-1)p/100 - i \f$.Thus the minimum value of the vector is given
     35  /// by p equal to zero, the maximum is given by p equal to 100 and
     36  /// the median value is given by p equal to 50. If @a sorted
     37  /// is false (default), the vector is copied, the copy is sorted,
     38  /// and then used to calculate the median.
    3639  ///
    3740  /// @return \a p'th percentile
    3841  ///
    39   /// @note interface will change
    40   ///
    4142  template <class T>
    42   double percentile_TMP(std::vector<T>& vec, double p)
     43  double percentile(const std::vector<T>& vec, const double p,
     44                    const bool sorted=false)
    4345  {
     46    assert(!(p>100 && p<0));
     47    if (sorted){
     48      if (p>=100)
     49        return vec.back();
     50      double j = p/100 * (vec.size()-1);
     51      int i = static_cast<int>(j);
     52      return (1-j+floor(j))*vec[i] + (j-floor(j))*vec[i+1];
     53    }
    4454    if (p==100)
    45       return  vec[vec.size()-1];
    46     sort(vec.begin(), vec.end());
    47     double j = p/100 * (vec.size()-1);
     55      return  *std::max_element(vec.begin(),vec.end());
     56    std::vector<T> v_copy(vec);
     57    double j = p/100 * (v_copy.size()-1);
    4858    int i = static_cast<int>(j);
    49     return (1-j+floor(j))*vec[i] + (j-floor(j))*vec[i+1];
     59    std::partial_sort(v_copy.begin(),v_copy.begin()+i+2 , v_copy.end());
     60    return (1-j+floor(j))*v_copy[i] + (j-floor(j))*v_copy[i+1];
    5061 
    5162  }
    5263
    5364  ///
    54   /// Median is defined to be value in the middle. If number of
    55   /// values is even median is the average of the two middle
    56   /// values. @return median
     65  /// Median is defined to be value in the middle. If number of values
     66  /// is even median is the average of the two middle values.  the
     67  /// median value is given by p equal to 50. If @a sorted is false
     68  /// (default), the vector is copied, the copy is sorted, and then
     69  /// used to calculate the median.
     70  ///
     71  /// @return median
    5772  ///
    5873  /// @note interface will change
    5974  ///
    6075  template <class T>
    61   inline double median_TMP(std::vector<T>& v) { return percentile_TMP(v,50.0); }
     76  inline double median(const std::vector<T>& v, const bool sorted=false)
     77  { return percentile(v, 50.0, sorted); }
    6278
    6379  ///
     
    6581  /// is even median is the average of the two middle values. If @a
    6682  /// sorted is true, the function assumes vector @a vec to be
    67   /// sorted. If @a sorted is false, the vector is copied, the copy
    68   /// is sorted, and then used to calculate the median.
     83  /// sorted. If @a sorted is false, the vector is copied, the copy is
     84  /// sorted (default), and then used to calculate the median.
    6985  ///
    7086  /// @return median
    7187  ///
    72   double median(const gslapi::vector& vec, const bool sorted=true);
     88  double median(const gslapi::vector& vec, const bool sorted=false);
    7389
    7490}} // of namespace statistics and namespace theplu
  • trunk/test/statistics_test.cc

    r500 r502  
    1515  for (unsigned int i=0; i<10; i++)
    1616    data.push_back(static_cast<double>(i));
    17   double m=theplu::statistics::median_TMP(data);
     17  double m=theplu::statistics::median(data);
    1818  if (m!=4.5)
    1919    return -1;
Note: See TracChangeset for help on using the changeset viewer.