Changeset 1404 for trunk/yat/statistics
 Timestamp:
 Aug 8, 2008, 12:47:18 AM (14 years ago)
 Location:
 trunk/yat/statistics
 Files:

 2 edited
Legend:
 Unmodified
 Added
 Removed

trunk/yat/statistics/Percentiler.h
r1339 r1404 25 25 */ 26 26 27 #include "yat/utility/DataWeight.h" 27 28 #include "yat/utility/iterator_traits.h" 29 #include "yat/utility/yat_assert.h" 30 #include "yat/utility/WeightIterator.h" 28 31 29 32 #include <algorithm> 30 33 #include <cassert> 31 34 #include <cmath> 35 #include <numeric> 36 #include <stdexcept> 32 37 #include <vector> 38 39 #include <iostream> 33 40 34 41 namespace theplu { … … 47 54 \param perc percentile to calculate [0,100]. Defaule value is 48 55 50, which implies class will calculate median. 49 \param sorted if true class assu es that ranges are already56 \param sorted if true class assumes that ranges are already 50 57 sorted, if false the range will copied to a new range which is 51 58 sorted. … … 54 61 55 62 /** 56 The percentile is found by interpolation, using the formula \f$57 percentile = (1  \delta) x_i + \delta x_{i+1} \f$ where \a p58 is floor\f$((n  1)p/100)\f$ and \f$ \delta \f$ is \f$59 (n1)p/100  i \f$. Thus the minimum value of the range is60 given by perc equal to zero, the maximum is given by perc equal61 to 100 and the median value is given by perc equal to 50.62 63 63 Function is a nonmutable function, i.e., \a first and \a last 64 64 can be const_iterators. … … 68 68 template<typename RandomAccessIterator> 69 69 double operator()(RandomAccessIterator first, 70 RandomAccessIterator last) const; 71 /* 70 RandomAccessIterator last) const 72 71 { 73 72 return calculate(first, last, sorted_, 74 73 typename utility::weighted_iterator_traits<RandomAccessIterator>::type()); 75 74 } 76 */77 75 private: 78 76 double perc_; 79 77 bool sorted_; 80 78 81 // unweighted version  NOTE range must be sorted79 // unweighted version 82 80 template<typename RandomAccessIterator> 83 81 double calculate(RandomAccessIterator first, RandomAccessIterator last, 84 utility::unweighted_iterator_tag tag) const;82 bool sorted, utility::unweighted_iterator_tag tag) const; 85 83 86 // weighted version  NOTE range must be sorted84 // weighted version 87 85 template<typename RandomAccessIterator> 88 86 double calculate(RandomAccessIterator first, RandomAccessIterator last, 89 utility::weighted_iterator_tag tag) const;87 bool sorted, utility::weighted_iterator_tag tag) const; 90 88 91 89 // using compiler generated copy … … 100 98 // unweighted version 101 99 template<typename RandomAccessIterator> 102 double Percentiler::operator()(RandomAccessIterator first, 103 RandomAccessIterator last) const 100 double 101 Percentiler::calculate(RandomAccessIterator first, 102 RandomAccessIterator last, 103 bool sorted, 104 utility::unweighted_iterator_tag tag) const 104 105 { 105 106 // range is one value only is a special case 106 107 if (first+1 == last) 107 return *first; 108 // just for convenience 109 typedef 110 typename utility::weighted_iterator_traits<RandomAccessIterator>::type 111 weighted_tag; 112 113 if (sorted_){ 114 if (perc_>=100) 108 return utility::iterator_traits<RandomAccessIterator>().data(first); 109 if (sorted) { 110 size_t n = last  first; 111 // have to take care of this special case 112 if (perc_>= 100.0  50.0/n) 115 113 return *(last); 116 return calculate(first, last, weighted_tag()); 114 if (perc_<= 50.0/n) 115 return *first; 116 double j = n * (perc_50.0/n) / 100.0; 117 int i = static_cast<int>(j); 118 return (1j+floor(j))*first[i] + (jfloor(j))*first[i+1]; 117 119 } 118 120 119 std::vector<typename std::iterator_traits<RandomAccessIterator>::value_type> 120 v_copy; 121 std::vector<double> v_copy; 121 122 v_copy.reserve(std::distance(first,last)); 122 123 std::copy(first, last, std::back_inserter(v_copy)); 123 size_t i = static_cast<size_t>(perc_/100 * (v_copy.size()1)); 124 if (i+2 < v_copy.size()) { 125 std::partial_sort(v_copy.begin(), v_copy.begin()+i+2, v_copy.end()); 126 } 127 else 128 std::sort(v_copy.begin(), v_copy.end()); 129 return calculate(v_copy.begin(), v_copy.end(), weighted_tag()); 130 } 131 132 // unweighted version 133 template<typename RandomAccessIterator> 134 double Percentiler::calculate(RandomAccessIterator first, 135 RandomAccessIterator last, 136 utility::unweighted_iterator_tag tag) const 137 { 138 double j = perc_/100 * (std::distance(first,last)1); 139 int i = static_cast<int>(j); 140 return (1j+floor(j))*first[i] + (jfloor(j))*first[i+1]; 124 std::sort(v_copy.begin(), v_copy.end()); 125 return calculate(v_copy.begin(), v_copy.end(), true, tag); 141 126 } 142 127 … … 146 131 double Percentiler::calculate(RandomAccessIterator first, 147 132 RandomAccessIterator last, 133 bool sorted, 148 134 utility::weighted_iterator_tag tag) const 149 135 { 150 assert(false && "not implemented"); 151 return 0; 136 if (sorted) { 137 utility::iterator_traits<RandomAccessIterator> trait; 138 std::vector<double> accum_w; 139 accum_w.reserve(lastfirst); 140 std::partial_sum(weight_iterator(first), 141 weight_iterator(last), 142 std::back_inserter(accum_w)); 143 144 double sum_w=0; 145 double w_bound=perc_/100.0*accum_w.back(); 146 RandomAccessIterator upper(first); 147 while (sum_w + 0.5*trait.weight(upper) < w_bound) { 148 sum_w += trait.weight(upper); 149 ++upper; 150 } 151 if (upper==last) 152 return trait.data(last); 153 if (sum_w==0.0) { 154 while (first!=last) 155 if (trait.weight(first)) 156 return trait.data(first); 157 // all weights zero, hmm 158 return 0; 159 } 160 // lower should have nonzero weight 161 // and (lower, upper) should have zero weights 162 RandomAccessIterator lower=upper1; 163 while (trait.weight(lower)==0) 164 lower; 165 166 double p1=100.0/accum_w.back()*(sum_w + 0.5*trait.weight(upper)); 167 double p0=100.0/accum_w.back()*(sum_w  0.5*trait.weight(lower)); 168 double alpha = (perc_p0)/(p1p0); 169 utility::yat_assert<std::runtime_error>(alpha<=1.0, 170 "Percentile: alpha out of bounds"); 171 return trait.data(lower)+alpha*(trait.data(upper)trait.data(lower)); 172 } 173 174 std::vector<utility::DataWeight> v_copy; 175 v_copy.reserve(lastfirst); 176 std::copy(first, last, std::back_inserter(v_copy)); 177 std::sort(v_copy.begin(), v_copy.end()); 178 return calculate(v_copy.begin(), v_copy.end(), true, tag); 152 179 } 153 180 
trunk/yat/statistics/utility.h
r1338 r1404 153 153 template <class T> 154 154 double median(T first, T last, const bool sorted=false) 155 { return percentile (first, last, 50.0, sorted); }155 { return percentile2(first, last, 50.0, sorted); } 156 156 157 157 /** … … 170 170 171 171 Requirements: T should be an iterator over a range of doubles (or 172 any type being convertable to double). If \a sorted is false 173 iterator must be mutable, else readonly iterator is also ok. 172 any type being convertable to double). 174 173 175 174 @return \a p'th percentile of range 176 */ 177 template <class T> 178 double percentile(T first, T last, double p, bool sorted=false) 175 176 \deprecated percentile2 will replace this function in the future 177 178 \note the definition of percentile used here is not identical to 179 that one used in percentile2 and Percentile. The difference is 180 smaller for large ranges. 181 */ 182 template <class RandomAccessIterator> 183 double percentile(RandomAccessIterator first, RandomAccessIterator last, 184 double p, bool sorted=false) 185 { 186 // range is one value only is a special case 187 if (first+1 == last) 188 return utility::iterator_traits<RandomAccessIterator>().data(first); 189 if (sorted) { 190 // have to take care of this special case 191 if (p>=100) 192 return utility::iterator_traits<RandomAccessIterator>().data(last); 193 double j = p/100 * (std::distance(first,last)1); 194 int i = static_cast<int>(j); 195 return (1j+floor(j))*first[i] + (jfloor(j))*first[i+1]; 196 } 197 198 std::vector<typename std::iterator_traits<RandomAccessIterator>::value_type> 199 v_copy; 200 v_copy.reserve(std::distance(first,last)); 201 std::copy(first, last, std::back_inserter(v_copy)); 202 size_t i = static_cast<size_t>(p/100 * (v_copy.size()1)); 203 if (i+2 < v_copy.size()) { 204 std::partial_sort(v_copy.begin(), v_copy.begin()+i+2, v_copy.end()); 205 } 206 else 207 std::sort(v_copy.begin(), v_copy.end()); 208 return percentile(v_copy.begin(), v_copy.end(), p, true); 209 } 210 211 212 /** 213 \see Percentiler 214 215 \since new in yat 0.5 216 */ 217 template <class RandomAccessIterator> 218 double percentile2(RandomAccessIterator first, RandomAccessIterator last, 219 double p, bool sorted=false) 179 220 { 180 221 Percentiler percentiler(p, sorted);
Note: See TracChangeset
for help on using the changeset viewer.