source: trunk/yat/statistics/Percentiler.h

Last change on this file was 3875, checked in by Peter, 19 months ago

merge release 0.17 into trunk

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.1 KB
Line 
1#ifndef _theplu_yat_statistics_percentiler_
2#define _theplu_yat_statistics_percentiler_
3
4// $Id: Percentiler.h 3875 2020-03-05 01:40:57Z peter $
5
6/*
7  Copyright (C) 2008, 2009 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2010, 2011, 2014, 2016, 2020 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include "yat/utility/concept_check.h"
27#include "yat/utility/DataWeight.h"
28#include "yat/utility/iterator_traits.h"
29#include "yat/utility/yat_assert.h"
30#include "yat/utility/WeightIterator.h"
31
32#include <boost/concept_check.hpp>
33#include <boost/iterator/iterator_concepts.hpp>
34
35#include <algorithm>
36#include <cmath>
37#include <limits>
38#include <numeric>
39#include <stdexcept>
40#include <vector>
41
42namespace theplu {
43namespace yat {
44namespace statistics {
45
46  /**
47     \brief Functor to calculate percentile of a range
48
49     \since New in yat 0.5
50   */
51  class Percentiler
52  {
53  public:
54    /**
55       \param perc percentile to calculate [0,100]. Default value is
56       50, which implies class will calculate median.
57       \param sorted if true class assumes that ranges are already
58       sorted, if false the range will copied to a new range which is
59       sorted.
60     */
61    Percentiler(double perc=50, bool sorted=false);
62
63    /**
64       Function is a non-mutable function, i.e., \a first and \a last
65       can be const_iterators.
66
67       The \a Nth percentile is defined such that, for example, when
68       having four numbers \f$ 0.69 < 1.41 < 3.14 < 28 \f$, the \a Nth
69       percentile is:
70
71       - \f$ 0.69 \f$ if \f$ N < 25 \f$
72       - \f$ (0.69+1.41)/2 \f$ if \f$ N=25 \f$
73       - \f$ 1.41 \f$ if \f$ 25 < N < 50 \f$
74       - \f$ (1.41+3.14)/2 \f$ if \f$ N=50 \f$
75       - \f$ 3.14 \f$ if \f$ 50 < N < 75 \f$
76       - \f$ (3.14+28)/2 \f$ if \f$ N=75 \f$
77       - \f$ 28 \f$ if \f$ 75 < N \f$
78
79       Similarily, if we have a weighted range \f$ x_0=0.69, w_0=1 ;
80       x_1=1.41, w_1=0 ; x_2=3.14, w_2=0.5 ; x_3=28, w_3=0.5 \f$, we
81       calculate the accumulated normalized weight \f$ S_k = \frac
82       {100}{\sum w_i}\sum_{i=0}^k w_i \f$ and the percentile is
83
84       - \f$ 0.69 \f$ if \f$ N < S_0 \f$
85       - \f$ (0.69+3.14)/2 \f$ if \f$ N=S_0  \f$
86       - \f$ 3.14 \f$ if \f$ S_0 < N < S_2 \f$
87       - \f$ (3.14+28)/2 \f$ if \f$ N=S_2 \f$
88       - \f$ 28 \f$ if \f$ S_2 < N \f$
89
90       Note, that data point with weight zero is completely ignored.
91
92       Type Requirements:
93       - \c RandomAccessIterator must be a \ref
94       concept_data_iterator
95       - \c RandomAccessIterator must be a \random_access_traversal_iterator
96       - \c For unweighted iterator, RandomAccessIterator must be an
97         \lvalue_iterator as implied by \forward_iterator.
98
99       \return percentile of range
100     */
101    template<typename RandomAccessIterator>
102    double operator()(RandomAccessIterator first,
103                      RandomAccessIterator last) const
104    {
105      BOOST_CONCEPT_ASSERT((utility::DataIteratorConcept<RandomAccessIterator>));
106      BOOST_CONCEPT_ASSERT((boost_concepts::RandomAccessTraversal<RandomAccessIterator>));
107      //
108      if (first==last)
109        return std::numeric_limits<double>::quiet_NaN();
110      // just to avoid long line
111      using utility::weighted_iterator_traits;
112      typedef typename weighted_iterator_traits<RandomAccessIterator>::type tag;
113      return calculate(first, last, sorted_, tag());
114    }
115  private:
116    double perc_;
117    bool sorted_;
118
119    // unweighted version
120    template<typename RandomAccessIterator>
121    double calculate(RandomAccessIterator first, RandomAccessIterator last,
122                     bool sorted, utility::unweighted_iterator_tag tag) const;
123
124    // weighted version
125    template<typename RandomAccessIterator>
126    double calculate(RandomAccessIterator first, RandomAccessIterator last,
127                     bool sorted, utility::weighted_iterator_tag tag) const;
128
129    // using compiler generated copy
130    //Percentiler(const Percentiler&);
131    //Percentiler& operator=(const Percentiler&);
132
133  };
134
135  // template implementations
136  // /////////////////////////
137
138  // unweighted version
139  template<typename RandomAccessIterator>
140  double
141  Percentiler::calculate(RandomAccessIterator first,
142                         RandomAccessIterator last,
143                         bool sorted,
144                         utility::unweighted_iterator_tag tag) const
145  {
146    BOOST_CONCEPT_ASSERT((boost_concepts::LvalueIterator<RandomAccessIterator>));
147    size_t n = last - first;
148    // range is one value only is a special case
149    if (n == 1)
150      *first;
151
152    double j = n * perc_ / 100.0;
153
154    // Some special cases
155    if (j > n-1) {
156      if (sorted)
157        return *(--last);
158      return *std::max_element(first, last);
159    }
160    if (j < 1.0) {
161      if (sorted)
162        return *first;
163      return *std::min_element(first, last);
164    }
165
166    size_t i = static_cast<size_t>(j);
167    // for border case (j integer) we take the average of adjacent bins
168    size_t k = (i==j) ? i-1 : i;
169
170    if (sorted) {
171      if (i==k)
172        return first[i];
173      return (first[i]+first[k])/2;
174    }
175
176    std::vector<double> vec(first, last);
177    // find the ith element
178    std::nth_element(vec.begin(), vec.begin()+i, vec.end());
179    // if i==k simply return vec[i]
180    if (i==k)
181      return vec[i];
182    YAT_ASSERT(k==i-1);
183    // otherwise return average of vec[i] and vec[k].
184
185    // We need to find the kth element. Since we have called
186    // nth_element above, we know that all elements in (begin+i, end)
187    // are >= vec[i] and all elements in [begin, begin+i) are <=
188    // vec[i]. Consequently, kth (k=i-1) element is the max element in
189    // range [begin, begin+i).
190    return (vec[i] + *std::max_element(vec.begin(), vec.begin()+i))/2;
191  }
192
193
194  // weighted version
195  template<typename RandomAccessIterator>
196  double Percentiler::calculate(RandomAccessIterator first,
197                                RandomAccessIterator last,
198                                bool sorted,
199                                utility::weighted_iterator_tag tag) const
200  {
201    if (sorted) {
202      utility::iterator_traits<RandomAccessIterator> trait;
203      std::vector<double> accum_w;
204      accum_w.reserve(last-first);
205      std::partial_sum(weight_iterator(first),
206                       weight_iterator(last),
207                       std::back_inserter(accum_w));
208
209      double w_bound=perc_/100.0*accum_w.back();
210      std::vector<double>::const_iterator upper(accum_w.begin());
211      double margin=1e-10;
212      while (upper!=accum_w.end() && *upper <= w_bound+margin)
213        ++upper;
214      while (upper!=accum_w.begin() &&
215             (upper==accum_w.end() ||
216              trait.weight(first+(upper-accum_w.begin()))==0.0))
217        --upper;
218      std::vector<double>::const_iterator lower(upper);
219      while ( *(lower-1)>=w_bound-margin && lower>accum_w.begin())
220        --lower;
221
222      return (trait.data(first+(upper-accum_w.begin()))+
223              trait.data(first+(lower-accum_w.begin())))/2;
224    }
225
226    std::vector<utility::DataWeight> v_copy(first, last);
227    std::sort(v_copy.begin(), v_copy.end());
228    return calculate(v_copy.begin(), v_copy.end(), true, tag);
229  }
230
231
232}}} // of namespace statistics, yat, and theplu
233
234#endif
Note: See TracBrowser for help on using the repository browser.