Changeset 464


Ignore:
Timestamp:
Dec 16, 2005, 11:15:25 PM (16 years ago)
Author:
Peter
Message:

made median template

Location:
trunk/lib
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/classifier/ConsensusInputRanker.cc

    r461 r464  
    3232    }
    3333    // Sorting with respect to median rank
    34     std::vector<std::pair<size_t,double> > median(data.rows());
     34    std::vector<std::pair<size_t,double> > medians(data.rows());
    3535    for (size_t i=0; i<data.rows(); i++){
    3636      std::vector<size_t> ranks(nof_rankers_);
     
    3838        ranks[j]=input_rankers_[j].rank(i);
    3939     
    40       median[i].first = i;
    41       median[i].second = statistics::median(ranks);
     40      medians[i].first = i;
     41      medians[i].second = statistics::median(ranks);
    4242    }
    4343   
    4444    //sort medians and assign id_ and rank_
    45     sort(median.begin(), median.end(),
     45    sort(medians.begin(), medians.end(),
    4646         utility::pair_value_compare<size_t, double>());
    4747    id_.resize(data.rows());
    4848    rank_.resize(data.rows());
    4949    for (size_t i=0; i<data.rows(); i++){
    50       id_[i]=median[i].first;
     50      id_[i]=medians[i].first;
    5151      rank_[id_[i]]=i;           
    5252    }
  • trunk/lib/statistics/utility.cc

    r298 r464  
    44#include <c++_tools/statistics/utility.h>
    55
    6 #include <vector>
    7 #include <algorithm>
    8 
    9 #include <cmath>
    10 #include <cstdlib>
    116#include <gsl/gsl_randist.h>
    12 #include <iostream>
    137
    148
     
    2418  }
    2519
    26 
    27   double median(std::vector<double>& vec)
    28   {
    29     return percentile(vec, 50.0);
    30   }
    31 
    32   double median(std::vector<size_t>& vec)
    33   {
    34     return percentile(vec, 50.0);
    35   }
    36 
    37   double percentile(std::vector<double>& vec, double percentile)
    38   {
    39     if (percentile==100)
    40       return  vec[vec.size()-1];
    41     else{
    42       sort(vec.begin(), vec.end());
    43       double j = percentile/100 * (vec.size()-1);
    44       int i = static_cast<int>(j);
    45       return (1-j+floor(j))*vec[i] + (j-floor(j))*vec[i+1];
    46     }
    47   }
    48 
    49   double percentile(std::vector<size_t>& vec, double percentile)
    50   {
    51     if (percentile==100)
    52       return vec[vec.size()-1];
    53     else{
    54       sort(vec.begin(), vec.end());
    55       double j = percentile/100 * (vec.size()-1);
    56       int i = static_cast<int>(j);
    57       return (1-j+floor(j))*vec[i] + (j-floor(j))*vec[i+1];
    58     }
    59    
    60   }
    61 
    62 
    6320}} // of namespace statistics and namespace theplu
  • trunk/lib/statistics/utility.h

    r303 r464  
    44#define _theplu_statistics_utility_
    55
     6#include <algorithm>
     7#include <cmath>
    68#include <vector>
    79
     
    911namespace statistics { 
    1012
    11     ///
    12     /// Calculates the probabilty to get \a k or smaller from a
    13     /// hypergeometric distribution with parameters \a n1 \a n2 \a
    14     /// t. Hypergeomtric situation you get in the following situation:
    15     /// Let there be \a n1 ways for a "good" selection and \a n2 ways
    16     /// for a "bad" selection out of a total of possibilities. Take \a
    17     /// t samples without replacement and \a k of those are "good"
    18     /// samples. \a k will follow a hypergeomtric distribution.
    19     /// @cumulative hypergeomtric distribution functions P(k).
    20     ///
    21     double cdf_hypergeometric_P(u_int k, u_int n1, u_int n2, u_int t);
     13  ///
     14  /// Calculates the probabilty to get \a k or smaller from a
     15  /// hypergeometric distribution with parameters \a n1 \a n2 \a
     16  /// t. Hypergeomtric situation you get in the following situation:
     17  /// Let there be \a n1 ways for a "good" selection and \a n2 ways
     18  /// for a "bad" selection out of a total of possibilities. Take \a
     19  /// t samples without replacement and \a k of those are "good"
     20  /// samples. \a k will follow a hypergeomtric distribution.
     21  /// @cumulative hypergeomtric distribution functions P(k).
     22  ///
     23  double cdf_hypergeometric_P(u_int k, u_int n1, u_int n2, u_int t);
    2224
    2325
    24     ///
    25     /// Median is defined to be value in the middle. If number of
    26     /// values is even median is the average of the two middle
    27     /// values. @return median
    28     ///
    29     double median(std::vector<double>&); 
     26  ///
     27  /// The percentile is determined by the \a p, a number between 0
     28  /// and 100. The percentile is found by interpolation, using the
     29  /// formula \f$ percentile = (1 - \delta) x_i + \delta x_{i+1} \f$
     30  /// where \a p is floor\f$((n - 1)p/100)\f$ and \f$ \delta \f$ is
     31  /// \f$ (n-1)p/100 - i \f$.Thus the minimum value of the vector is
     32  /// given by p equal to zero, the maximum is given by p equal to
     33  /// 100 and the median value is given by p equal to 50.
     34  ///
     35  /// @return \a p'th percentile
     36  ///
     37  template <class T>
     38  double percentile(std::vector<T>& vec, double p)
     39  {
     40    if (p==100)
     41      return  vec[vec.size()-1];
     42    sort(vec.begin(), vec.end());
     43    double j = p/100 * (vec.size()-1);
     44    int i = static_cast<int>(j);
     45    return (1-j+floor(j))*vec[i] + (j-floor(j))*vec[i+1];
     46 
     47  }
    3048
    31     ///
    32     /// Median is defined to be value in the middle. If number of
    33     /// values is even median is the average of the two middle
    34     /// values. @return median
    35     ///
    36     double median(std::vector<size_t>&); 
     49  ///
     50  /// Median is defined to be value in the middle. If number of
     51  /// values is even median is the average of the two middle
     52  /// values. @return median
     53  ///
     54  template <class T>
     55  inline double median(std::vector<T>& v) { return percentile(v,50.0); }
    3756
    38     ///
    39     /// The percentile is determined by the \a p, a number between 0
    40     /// and 100. The percentile is found by interpolation, using the
    41     /// formula \f$ percentile = (1 - \delta) x_i + \delta x_{i+1} \f$
    42     /// where \a p is floor\f$((n - 1)p/100)\f$ and \f$ \delta \f$ is
    43     /// \f$ (n-1)p/100 - i \f$.Thus the minimum value of the vector is
    44     /// given by p equal to zero, the maximum is given by p equal to
    45     /// 100 and the median value is given by p equal to 50.
    46     ///
    47     /// @return \a i'th percentile
    48     ///
    49     double percentile(std::vector<double>&, double p);
    50 
    51     ///
    52     /// The percentile is determined by the \a p, a number between 0
    53     /// and 100. The percentile is found by interpolation, using the
    54     /// formula \f$ percentile = (1 - \delta) x_i + \delta x_{i+1} \f$
    55     /// where \a p is floor\f$((n - 1)p/100)\f$ and \f$ \delta \f$ is
    56     /// \f$ (n-1)p/100 - i \f$.Thus the minimum value of the vector is
    57     /// given by p equal to zero, the maximum is given by p equal to
    58     /// 100 and the median value is given by p equal to 50.
    59     ///
    60     /// @return \a i'th percentile
    61     ///
    62     double percentile(std::vector<size_t>&, double i);
    63 
     57 
    6458}} // of namespace statistics and namespace theplu
    6559
Note: See TracChangeset for help on using the changeset viewer.