# Changeset 1404 for trunk/yat

Ignore:
Timestamp:
Aug 8, 2008, 12:47:18 AM (13 years ago)
Message:

refs #366 - weighted percentile
Added a class Percentile that can calculate the percentile for both
unweighted and weighted ranges. In order to make sense of the weighted
case, I had to modify the definition of percentile (slightly). The old
function percentile is using the old definition, but a new function,
percentile2, is using the new function and is simply calling the
Percentile class. The median is the same for the two definitions and
therefore it makes no difference which function to call, but to enable
calculation of weighted median percentile2 is called.

Location:
trunk/yat
Files:
5 edited

Unmodified
Removed
• ## trunk/yat/statistics/Percentiler.h

 r1339 */ #include "yat/utility/DataWeight.h" #include "yat/utility/iterator_traits.h" #include "yat/utility/yat_assert.h" #include "yat/utility/WeightIterator.h" #include #include #include #include #include #include #include namespace theplu { \param perc percentile to calculate [0,100]. Defaule value is 50, which implies class will calculate median. \param sorted if true class assues that ranges are already \param sorted if true class assumes that ranges are already sorted, if false the range will copied to a new range which is sorted. /** The percentile is found by interpolation, using the formula \f$percentile = (1 - \delta) x_i + \delta x_{i+1} \f$ where \a p is floor\f$((n - 1)p/100)\f$ and \f$\delta \f$ is \f$(n-1)p/100 - i \f$. Thus the minimum value of the range is given by perc equal to zero, the maximum is given by perc equal to 100 and the median value is given by perc equal to 50. Function is a non-mutable function, i.e., \a first and \a last can be const_iterators. template double operator()(RandomAccessIterator first, RandomAccessIterator last) const; /* RandomAccessIterator last) const { return calculate(first, last, sorted_, typename utility::weighted_iterator_traits::type()); typename utility::weighted_iterator_traits::type()); } */ private: double perc_; bool sorted_; // unweighted version - NOTE range must be sorted // unweighted version template double calculate(RandomAccessIterator first, RandomAccessIterator last, utility::unweighted_iterator_tag tag) const; bool sorted, utility::unweighted_iterator_tag tag) const; // weighted version - NOTE range must be sorted // weighted version template double calculate(RandomAccessIterator first, RandomAccessIterator last, utility::weighted_iterator_tag tag) const; bool sorted, utility::weighted_iterator_tag tag) const; // using compiler generated copy // unweighted version template double Percentiler::operator()(RandomAccessIterator first, RandomAccessIterator last) const double Percentiler::calculate(RandomAccessIterator first, RandomAccessIterator last, bool sorted, utility::unweighted_iterator_tag tag) const { // range is one value only is a special case if (first+1 == last) return *first; // just for convenience typedef typename utility::weighted_iterator_traits::type weighted_tag; if (sorted_){ if (perc_>=100) return utility::iterator_traits().data(first); if (sorted) { size_t n = last - first; // have to take care of this special case if (perc_>= 100.0 - 50.0/n) return *(--last); return calculate(first, last, weighted_tag()); if (perc_<= 50.0/n) return *first; double j = n * (perc_-50.0/n) / 100.0; int i = static_cast(j); return (1-j+floor(j))*first[i] + (j-floor(j))*first[i+1]; } std::vector::value_type> v_copy; std::vector v_copy; v_copy.reserve(std::distance(first,last)); std::copy(first, last, std::back_inserter(v_copy)); size_t i = static_cast(perc_/100 * (v_copy.size()-1)); if (i+2 < v_copy.size()) { std::partial_sort(v_copy.begin(), v_copy.begin()+i+2, v_copy.end()); } else std::sort(v_copy.begin(), v_copy.end()); return calculate(v_copy.begin(), v_copy.end(), weighted_tag()); } // unweighted version template double Percentiler::calculate(RandomAccessIterator first, RandomAccessIterator last, utility::unweighted_iterator_tag tag) const { double j = perc_/100 * (std::distance(first,last)-1); int i = static_cast(j); return (1-j+floor(j))*first[i] + (j-floor(j))*first[i+1]; std::sort(v_copy.begin(), v_copy.end()); return calculate(v_copy.begin(), v_copy.end(), true, tag); } double Percentiler::calculate(RandomAccessIterator first, RandomAccessIterator last, bool sorted, utility::weighted_iterator_tag tag) const { assert(false && "not implemented"); return 0; if (sorted) { utility::iterator_traits trait; std::vector accum_w; accum_w.reserve(last-first); std::partial_sum(weight_iterator(first), weight_iterator(last), std::back_inserter(accum_w)); double sum_w=0; double w_bound=perc_/100.0*accum_w.back(); RandomAccessIterator upper(first); while (sum_w + 0.5*trait.weight(upper) < w_bound) { sum_w += trait.weight(upper); ++upper; } if (upper==last) return trait.data(--last); if (sum_w==0.0) { while (first!=last) if (trait.weight(first)) return trait.data(first); // all weights zero, hmm return 0; } // lower should have non-zero weight // and (lower, upper) should have zero weights RandomAccessIterator lower=upper-1; while (trait.weight(lower)==0) --lower; double p1=100.0/accum_w.back()*(sum_w + 0.5*trait.weight(upper)); double p0=100.0/accum_w.back()*(sum_w - 0.5*trait.weight(lower)); double alpha = (perc_-p0)/(p1-p0); utility::yat_assert(alpha<=1.0, "Percentile: alpha out of bounds"); return trait.data(lower)+alpha*(trait.data(upper)-trait.data(lower)); } std::vector v_copy; v_copy.reserve(last-first); std::copy(first, last, std::back_inserter(v_copy)); std::sort(v_copy.begin(), v_copy.end()); return calculate(v_copy.begin(), v_copy.end(), true, tag); }
• ## trunk/yat/statistics/utility.h

 r1338 template double median(T first, T last, const bool sorted=false) { return percentile(first, last, 50.0, sorted); } { return percentile2(first, last, 50.0, sorted); } /** Requirements: T should be an iterator over a range of doubles (or any type being convertable to double). If \a sorted is false iterator must be mutable, else read-only iterator is also ok. any type being convertable to double). @return \a p'th percentile of range */ template double percentile(T first, T last, double p, bool sorted=false) \deprecated percentile2 will replace this function in the future \note the definition of percentile used here is not identical to that one used in percentile2 and Percentile. The difference is smaller for large ranges. */ template double percentile(RandomAccessIterator first, RandomAccessIterator last, double p, bool sorted=false) { // range is one value only is a special case if (first+1 == last) return utility::iterator_traits().data(first); if (sorted) { // have to take care of this special case if (p>=100) return utility::iterator_traits().data(--last); double j = p/100 * (std::distance(first,last)-1); int i = static_cast(j); return (1-j+floor(j))*first[i] + (j-floor(j))*first[i+1]; } std::vector::value_type> v_copy; v_copy.reserve(std::distance(first,last)); std::copy(first, last, std::back_inserter(v_copy)); size_t i = static_cast(p/100 * (v_copy.size()-1)); if (i+2 < v_copy.size()) { std::partial_sort(v_copy.begin(), v_copy.begin()+i+2, v_copy.end()); } else std::sort(v_copy.begin(), v_copy.end()); return percentile(v_copy.begin(), v_copy.end(), p, true); } /** \see Percentiler \since new in yat 0.5 */ template double percentile2(RandomAccessIterator first, RandomAccessIterator last, double p, bool sorted=false) { Percentiler percentiler(p, sorted);
• ## trunk/yat/utility/DataIterator.h

 r1377 DataIterator               // Derived , Base                          // Base , boost::remove_reference::data_reference> , typename boost::remove_reference::data_reference>::type , boost::forward_traversal_tag    // CategoryOrTraversal , typename iterator_traits::data_reference // Reference */ template DataIterator make_data_iterator(Base base) DataIterator data_iterator(Base base) { return DataIterator(base);
• ## trunk/yat/utility/MatrixWeighted.cc

 r1384 columns_ = data.columns(); resize(data.columns(), data.rows()); std::copy(data.begin(), data.end(), make_data_iterator(vec_.begin())); std::copy(weight.begin(), weight.end(), make_weight_iterator(vec_.begin())); std::copy(data.begin(), data.end(), data_iterator(vec_.begin())); std::copy(weight.begin(), weight.end(), weight_iterator(vec_.begin())); }
• ## trunk/yat/utility/WeightIterator.h

 r1377 WeightIterator               // Derived , Base                          // Base , boost::remove_reference::weight_reference> , typename boost::remove_reference::weight_reference>::type , boost::forward_traversal_tag    // CategoryOrTraversal , typename iterator_traits::weight_reference // Reference */ template WeightIterator make_weight_iterator(Base base) WeightIterator weight_iterator(Base base) { return WeightIterator(base);
Note: See TracChangeset for help on using the changeset viewer.