source: trunk/yat/statistics/Percentiler.h @ 1404

Last change on this file since 1404 was 1404, checked in by Peter, 13 years ago

refs #366 - weighted percentile
Added a class Percentile that can calculate the percentile for both
unweighted and weighted ranges. In order to make sense of the weighted
case, I had to modify the definition of percentile (slightly). The old
function percentile is using the old definition, but a new function,
percentile2, is using the new function and is simply calling the
Percentile class. The median is the same for the two definitions and
therefore it makes no difference which function to call, but to enable
calculation of weighted median percentile2 is called.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.3 KB
Line 
1#ifndef _theplu_yat_statistics_percentiler_
2#define _theplu_yat_statistics_percentiler_
3
4// $Id: Percentiler.h 1404 2008-08-07 22:47:18Z peter $
5
6/*
7  Copyright (C) 2008 Peter Johansson
8
9  This file is part of the yat library, http://trac.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 2 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program; if not, write to the Free Software
23  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24  02111-1307, USA.
25*/
26
27#include "yat/utility/DataWeight.h"
28#include "yat/utility/iterator_traits.h"
29#include "yat/utility/yat_assert.h"
30#include "yat/utility/WeightIterator.h"
31
32#include <algorithm>
33#include <cassert>
34#include <cmath>
35#include <numeric>
36#include <stdexcept>
37#include <vector>
38
39#include <iostream>
40
41namespace theplu {
42namespace yat {
43namespace statistics { 
44
45  /**
46     \brief Functor to calculate percentile of a range
47
48     \since New in yat 0.5
49   */
50  class Percentiler
51  {
52  public:
53    /**
54       \param perc percentile to calculate [0,100]. Defaule value is
55       50, which implies class will calculate median.
56       \param sorted if true class assumes that ranges are already
57       sorted, if false the range will copied to a new range which is
58       sorted.
59     */
60    Percentiler(double perc=50, bool sorted=false);
61
62    /**
63       Function is a non-mutable function, i.e., \a first and \a last
64       can be const_iterators.
65
66       \return percentile of range
67     */
68    template<typename RandomAccessIterator>
69    double operator()(RandomAccessIterator first, 
70                      RandomAccessIterator last) const
71    {
72      return calculate(first, last, sorted_, 
73       typename utility::weighted_iterator_traits<RandomAccessIterator>::type());
74    }
75  private:
76    double perc_;
77    bool sorted_;
78
79    // unweighted version
80    template<typename RandomAccessIterator>
81    double calculate(RandomAccessIterator first, RandomAccessIterator last,
82                     bool sorted, utility::unweighted_iterator_tag tag) const;
83
84    // weighted version
85    template<typename RandomAccessIterator>
86    double calculate(RandomAccessIterator first, RandomAccessIterator last,
87                     bool sorted, utility::weighted_iterator_tag tag) const;
88
89    // using compiler generated copy
90    //Percentiler(const Percentiler&);
91    //Percentiler& operator=(const Percentiler&);
92
93  };
94
95  // template implementations
96  // /////////////////////////
97
98  // unweighted version
99  template<typename RandomAccessIterator>
100  double 
101  Percentiler::calculate(RandomAccessIterator first, 
102                         RandomAccessIterator last,
103                         bool sorted,
104                         utility::unweighted_iterator_tag tag) const
105  {
106    // range is one value only is a special case
107    if (first+1 == last)
108      return utility::iterator_traits<RandomAccessIterator>().data(first);
109    if (sorted) {
110      size_t n = last - first;
111      // have to take care of this special case
112      if (perc_>= 100.0 - 50.0/n)
113        return *(--last);
114      if (perc_<= 50.0/n)
115        return *first;
116      double j = n * (perc_-50.0/n) / 100.0;
117      int i = static_cast<int>(j);
118      return (1-j+floor(j))*first[i] + (j-floor(j))*first[i+1];
119    }
120
121    std::vector<double> v_copy;
122    v_copy.reserve(std::distance(first,last));
123    std::copy(first, last, std::back_inserter(v_copy));
124    std::sort(v_copy.begin(), v_copy.end());
125    return calculate(v_copy.begin(), v_copy.end(), true, tag);
126  }
127
128
129  // weighted version
130  template<typename RandomAccessIterator>
131  double Percentiler::calculate(RandomAccessIterator first, 
132                                RandomAccessIterator last,
133                                bool sorted,
134                                utility::weighted_iterator_tag tag) const
135  {
136    if (sorted) {
137      utility::iterator_traits<RandomAccessIterator> trait;
138      std::vector<double> accum_w;
139      accum_w.reserve(last-first);
140      std::partial_sum(weight_iterator(first),
141                       weight_iterator(last),
142                       std::back_inserter(accum_w));
143
144      double sum_w=0;
145      double w_bound=perc_/100.0*accum_w.back();
146      RandomAccessIterator upper(first);
147      while (sum_w + 0.5*trait.weight(upper) < w_bound) {
148        sum_w += trait.weight(upper);
149        ++upper;
150      }
151      if (upper==last)
152        return trait.data(--last);
153      if (sum_w==0.0) {
154        while (first!=last)
155          if (trait.weight(first))
156            return trait.data(first);
157        // all weights zero, hmm
158        return 0;
159      }
160      // lower should have non-zero weight
161      // and (lower, upper) should have zero weights
162      RandomAccessIterator lower=upper-1;
163      while (trait.weight(lower)==0)
164        --lower;
165
166      double p1=100.0/accum_w.back()*(sum_w + 0.5*trait.weight(upper));
167      double p0=100.0/accum_w.back()*(sum_w - 0.5*trait.weight(lower));
168      double alpha = (perc_-p0)/(p1-p0);
169      utility::yat_assert<std::runtime_error>(alpha<=1.0, 
170                                              "Percentile: alpha out of bounds");
171      return trait.data(lower)+alpha*(trait.data(upper)-trait.data(lower));
172    }
173
174    std::vector<utility::DataWeight> v_copy;
175    v_copy.reserve(last-first);
176    std::copy(first, last, std::back_inserter(v_copy));
177    std::sort(v_copy.begin(), v_copy.end());
178    return calculate(v_copy.begin(), v_copy.end(), true, tag);
179  }
180
181
182}}} // of namespace statistics, yat, and theplu
183
184#endif
Note: See TracBrowser for help on using the repository browser.