- Timestamp:
- Dec 16, 2005, 11:15:25 PM (17 years ago)
- Location:
- trunk/lib
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/lib/classifier/ConsensusInputRanker.cc
r461 r464 32 32 } 33 33 // Sorting with respect to median rank 34 std::vector<std::pair<size_t,double> > median (data.rows());34 std::vector<std::pair<size_t,double> > medians(data.rows()); 35 35 for (size_t i=0; i<data.rows(); i++){ 36 36 std::vector<size_t> ranks(nof_rankers_); … … 38 38 ranks[j]=input_rankers_[j].rank(i); 39 39 40 median [i].first = i;41 median [i].second = statistics::median(ranks);40 medians[i].first = i; 41 medians[i].second = statistics::median(ranks); 42 42 } 43 43 44 44 //sort medians and assign id_ and rank_ 45 sort(median .begin(), median.end(),45 sort(medians.begin(), medians.end(), 46 46 utility::pair_value_compare<size_t, double>()); 47 47 id_.resize(data.rows()); 48 48 rank_.resize(data.rows()); 49 49 for (size_t i=0; i<data.rows(); i++){ 50 id_[i]=median [i].first;50 id_[i]=medians[i].first; 51 51 rank_[id_[i]]=i; 52 52 } -
trunk/lib/statistics/utility.cc
r298 r464 4 4 #include <c++_tools/statistics/utility.h> 5 5 6 #include <vector>7 #include <algorithm>8 9 #include <cmath>10 #include <cstdlib>11 6 #include <gsl/gsl_randist.h> 12 #include <iostream>13 7 14 8 … … 24 18 } 25 19 26 27 double median(std::vector<double>& vec)28 {29 return percentile(vec, 50.0);30 }31 32 double median(std::vector<size_t>& vec)33 {34 return percentile(vec, 50.0);35 }36 37 double percentile(std::vector<double>& vec, double percentile)38 {39 if (percentile==100)40 return vec[vec.size()-1];41 else{42 sort(vec.begin(), vec.end());43 double j = percentile/100 * (vec.size()-1);44 int i = static_cast<int>(j);45 return (1-j+floor(j))*vec[i] + (j-floor(j))*vec[i+1];46 }47 }48 49 double percentile(std::vector<size_t>& vec, double percentile)50 {51 if (percentile==100)52 return vec[vec.size()-1];53 else{54 sort(vec.begin(), vec.end());55 double j = percentile/100 * (vec.size()-1);56 int i = static_cast<int>(j);57 return (1-j+floor(j))*vec[i] + (j-floor(j))*vec[i+1];58 }59 60 }61 62 63 20 }} // of namespace statistics and namespace theplu -
trunk/lib/statistics/utility.h
r303 r464 4 4 #define _theplu_statistics_utility_ 5 5 6 #include <algorithm> 7 #include <cmath> 6 8 #include <vector> 7 9 … … 9 11 namespace statistics { 10 12 11 12 13 14 15 16 17 18 19 20 21 13 /// 14 /// Calculates the probabilty to get \a k or smaller from a 15 /// hypergeometric distribution with parameters \a n1 \a n2 \a 16 /// t. Hypergeomtric situation you get in the following situation: 17 /// Let there be \a n1 ways for a "good" selection and \a n2 ways 18 /// for a "bad" selection out of a total of possibilities. Take \a 19 /// t samples without replacement and \a k of those are "good" 20 /// samples. \a k will follow a hypergeomtric distribution. 21 /// @cumulative hypergeomtric distribution functions P(k). 22 /// 23 double cdf_hypergeometric_P(u_int k, u_int n1, u_int n2, u_int t); 22 24 23 25 24 /// 25 /// Median is defined to be value in the middle. If number of 26 /// values is even median is the average of the two middle 27 /// values. @return median 28 /// 29 double median(std::vector<double>&); 26 /// 27 /// The percentile is determined by the \a p, a number between 0 28 /// and 100. The percentile is found by interpolation, using the 29 /// formula \f$ percentile = (1 - \delta) x_i + \delta x_{i+1} \f$ 30 /// where \a p is floor\f$((n - 1)p/100)\f$ and \f$ \delta \f$ is 31 /// \f$ (n-1)p/100 - i \f$.Thus the minimum value of the vector is 32 /// given by p equal to zero, the maximum is given by p equal to 33 /// 100 and the median value is given by p equal to 50. 34 /// 35 /// @return \a p'th percentile 36 /// 37 template <class T> 38 double percentile(std::vector<T>& vec, double p) 39 { 40 if (p==100) 41 return vec[vec.size()-1]; 42 sort(vec.begin(), vec.end()); 43 double j = p/100 * (vec.size()-1); 44 int i = static_cast<int>(j); 45 return (1-j+floor(j))*vec[i] + (j-floor(j))*vec[i+1]; 46 47 } 30 48 31 /// 32 /// Median is defined to be value in the middle. If number of 33 /// values is even median is the average of the two middle 34 /// values. @return median 35 /// 36 double median(std::vector<size_t>&); 49 /// 50 /// Median is defined to be value in the middle. If number of 51 /// values is even median is the average of the two middle 52 /// values. @return median 53 /// 54 template <class T> 55 inline double median(std::vector<T>& v) { return percentile(v,50.0); } 37 56 38 /// 39 /// The percentile is determined by the \a p, a number between 0 40 /// and 100. The percentile is found by interpolation, using the 41 /// formula \f$ percentile = (1 - \delta) x_i + \delta x_{i+1} \f$ 42 /// where \a p is floor\f$((n - 1)p/100)\f$ and \f$ \delta \f$ is 43 /// \f$ (n-1)p/100 - i \f$.Thus the minimum value of the vector is 44 /// given by p equal to zero, the maximum is given by p equal to 45 /// 100 and the median value is given by p equal to 50. 46 /// 47 /// @return \a i'th percentile 48 /// 49 double percentile(std::vector<double>&, double p); 50 51 /// 52 /// The percentile is determined by the \a p, a number between 0 53 /// and 100. The percentile is found by interpolation, using the 54 /// formula \f$ percentile = (1 - \delta) x_i + \delta x_{i+1} \f$ 55 /// where \a p is floor\f$((n - 1)p/100)\f$ and \f$ \delta \f$ is 56 /// \f$ (n-1)p/100 - i \f$.Thus the minimum value of the vector is 57 /// given by p equal to zero, the maximum is given by p equal to 58 /// 100 and the median value is given by p equal to 50. 59 /// 60 /// @return \a i'th percentile 61 /// 62 double percentile(std::vector<size_t>&, double i); 63 57 64 58 }} // of namespace statistics and namespace theplu 65 59
Note: See TracChangeset
for help on using the changeset viewer.