Changeset 932 for trunk/yat/statistics
- Timestamp:
- Oct 5, 2007, 11:03:46 PM (16 years ago)
- Location:
- trunk/yat/statistics
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/yat/statistics/VectorFunction.cc
r865 r932 54 54 { 55 55 assert(vec.size()); 56 return median(vec );56 return median(vec.begin(), vec.end()); 57 57 } 58 58 -
trunk/yat/statistics/WilcoxonFoldChange.cc
r865 r932 56 56 } 57 57 if (absolute_) 58 return fabs(median(distance ));59 return median(distance );58 return fabs(median(distance.begin(), distance.end())); 59 return median(distance.begin(), distance.end()); 60 60 } 61 61 -
trunk/yat/statistics/utility.cc
r865 r932 50 50 double mad(const utility::vector& vec, const bool sorted) 51 51 { 52 double m = median(vec , sorted);52 double m = median(vec.begin(), vec.end(), sorted); 53 53 std::vector<double> ad; 54 54 ad.reserve(vec.size()); … … 56 56 ad.push_back(fabs(vec[i]-m)); 57 57 std::sort(ad.begin(), ad.end()); 58 return median(ad ,true);58 return median(ad.begin(), ad.end(),true); 59 59 } 60 60 61 62 double median(const utility::vector& vec, const bool sorted)63 {64 if (!sorted){65 utility::vector vec_copy(vec);66 utility::sort(vec_copy);67 return gsl_stats_median_from_sorted_data (vec_copy.gsl_vector_p()->data,68 vec_copy.gsl_vector_p()->stride,69 vec_copy.gsl_vector_p()->size);70 }71 return gsl_stats_median_from_sorted_data (vec.gsl_vector_p()->data,72 vec.gsl_vector_p()->stride,73 vec.gsl_vector_p()->size);74 }75 76 double percentile(const utility::vector& vec, const double p,77 const bool sorted)78 {79 if (!sorted){80 utility::vector vec_c(vec);81 utility::sort(vec_c);82 return gsl_stats_quantile_from_sorted_data(vec_c.gsl_vector_p()->data,83 vec_c.gsl_vector_p()->stride,84 vec_c.gsl_vector_p()->size,85 p);86 }87 return gsl_stats_quantile_from_sorted_data (vec.gsl_vector_p()->data,88 vec.gsl_vector_p()->stride,89 vec.gsl_vector_p()->size,90 p);91 }92 61 93 62 double skewness(const utility::vector& v) -
trunk/yat/statistics/utility.h
r865 r932 31 31 #include "yat/classifier/Target.h" 32 32 #include "yat/utility/vector.h" 33 #include "yat/utility/yat_assert.h" 33 34 34 35 #include <algorithm> … … 44 45 //forward declarations 45 46 template <class T> 46 double median( const std::vector<T>& v, const bool sorted=false);47 double median(T first, T last, const bool sorted=false); 47 48 48 49 template <class T> 49 double percentile(const std::vector<T>& vec, const double p, 50 const bool sorted=false); 50 double percentile(T first, T last, double p, bool sorted=false); 51 51 52 52 /** … … 113 113 ad.push_back(fabs(vec[i]-m)); 114 114 std::sort(ad.begin(), ad.end()); 115 return median(ad ,true);115 return median(ad.begin(), ad.end(),true); 116 116 } 117 117 … … 126 126 /// Median is defined to be value in the middle. If number of values 127 127 /// is even median is the average of the two middle values. the 128 /// median value is given by p equal to 50. If @a sorted is false129 /// (default), the vectoris copied, the copy is sorted, and then128 /// median value is given by p equal to 50. If \a sorted is false 129 /// (default), the range is copied, the copy is sorted, and then 130 130 /// used to calculate the median. 131 131 /// 132 /// @return median 132 /// Requirements: T should be an iterator over a range of doubles (or 133 /// any type being convertable to double). If \a sorted is false 134 /// iterator must be mutable, else read-only iterator is also ok. 133 135 /// 134 /// @ note interface will change136 /// @return median of range 135 137 /// 136 138 template <class T> 137 double median(const std::vector<T>& v, const bool sorted=false) 138 { return percentile(v, 50.0, sorted); } 139 140 /// 141 /// Median is defined to be value in the middle. If number of values 142 /// is even median is the average of the two middle values. If @a 143 /// sorted is true, the function assumes vector @a vec to be 144 /// sorted. If @a sorted is false, the vector is copied, the copy is 145 /// sorted (default), and then used to calculate the median. 146 /// 147 /// @return median 148 /// 149 double median(const utility::vector& vec, const bool sorted=false); 139 double median(T first, T last, const bool sorted=false) 140 { return percentile(first, last, 50.0, sorted); } 150 141 151 142 /** … … 159 150 is false (default), the vector is copied, the copy is sorted, 160 151 and then used to calculate the median. 152 153 Requirements: T should be an iterator over a range of doubles (or 154 any type being convertable to double). If \a sorted is false 155 iterator must be mutable, else read-only iterator is also ok. 161 156 162 @return \a p'th percentile 157 @return \a p'th percentile of range 163 158 */ 164 159 template <class T> 165 double percentile(const std::vector<T>& vec, const double p, 166 const bool sorted=false) 160 double percentile(T first, T last, double p, bool sorted=false) 167 161 { 162 yat_assert(first<last && "range is invalid"); 163 yat_assert(p>=0); 164 yat_assert(p<=100); 168 165 if (sorted){ 169 166 if (p>=100) 170 return vec.back();171 double j = p/100 * ( vec.size()-1);167 return *(--last); 168 double j = p/100 * (std::distance(first,last)-1); 172 169 int i = static_cast<int>(j); 173 return (1-j+floor(j))* vec[i] + (j-floor(j))*vec[i+1];170 return (1-j+floor(j))*first[i] + (j-floor(j))*first[i+1]; 174 171 } 175 if (p==100) 176 return *std::max_element(vec.begin(),vec.end()); 177 std::vector<T> v_copy(vec); 172 173 std::vector<double> v_copy; 174 v_copy.reserve(std::distance(first,last)); 175 std::copy(first, last, std::back_inserter(v_copy)); 178 176 double j = p/100 * (v_copy.size()-1); 179 177 int i = static_cast<int>(j); 180 178 std::partial_sort(v_copy.begin(),v_copy.begin()+i+2 , v_copy.end()); 181 return (1-j+floor(j))*v_copy[i] + (j-floor(j))*v_copy[i+1]; 182 179 return percentile(v_copy.begin(), v_copy.end(), p, true); 183 180 } 184 185 /**186 The percentile is determined by the \a p, a number between 0 and187 100. The percentile is found by interpolation, using the formula188 \f$ percentile = (1 - \delta) x_i + \delta x_{i+1} \f$ where \a189 p is floor\f$((n - 1)p/100)\f$ and \f$ \delta \f$ is \f$190 (n-1)p/100 - i \f$.Thus the minimum value of the vector is given191 by p equal to zero, the maximum is given by p equal to 100 and192 the median value is given by p equal to 50. If @a sorted193 is false (default), the vector is copied, the copy is sorted,194 and then used to calculate the median.195 196 @return \a p'th percentile197 */198 double percentile(const utility::vector& vec, const double,199 const bool sorted=false);200 181 201 182 ///
Note: See TracChangeset
for help on using the changeset viewer.