source: trunk/yat/statistics/Percentiler.h @ 1701

Last change on this file since 1701 was 1487, checked in by Jari Häkkinen, 13 years ago

Addresses #436. GPL license copy reference should also be updated.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.7 KB
Line 
1#ifndef _theplu_yat_statistics_percentiler_
2#define _theplu_yat_statistics_percentiler_
3
4// $Id: Percentiler.h 1487 2008-09-10 08:41:36Z jari $
5
6/*
7  Copyright (C) 2008 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with yat. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "yat/utility/DataWeight.h"
26#include "yat/utility/iterator_traits.h"
27#include "yat/utility/yat_assert.h"
28#include "yat/utility/WeightIterator.h"
29
30#include <algorithm>
31#include <cassert>
32#include <cmath>
33#include <numeric>
34#include <stdexcept>
35#include <vector>
36
37#include <iostream>
38
39namespace theplu {
40namespace yat {
41namespace statistics { 
42
43  /**
44     \brief Functor to calculate percentile of a range
45
46     \since New in yat 0.5
47   */
48  class Percentiler
49  {
50  public:
51    /**
52       \param perc percentile to calculate [0,100]. Default value is
53       50, which implies class will calculate median.
54       \param sorted if true class assumes that ranges are already
55       sorted, if false the range will copied to a new range which is
56       sorted.
57     */
58    Percentiler(double perc=50, bool sorted=false);
59
60    /**
61       Function is a non-mutable function, i.e., \a first and \a last
62       can be const_iterators.
63
64       The \a Nth percentile is defined such that, for example, when
65       having four numbers \f$ 0.69 < 1.41 < 3.14 < 28 \f$, the \a Nth
66       percentile is:
67
68       - \f$ 0.69 \f$ if \f$ N < 25 \f$
69       - \f$ (0.69+1.41)/2 \f$ if \f$ N=25 \f$
70       - \f$ 1.41 \f$ if \f$ 25 < N < 50 \f$
71       - \f$ (1.41+3.14)/2 \f$ if \f$ N=50 \f$
72       - \f$ 3.14 \f$ if \f$ 50 < N < 75 \f$
73       - \f$ (3.14+28)/2 \f$ if \f$ N=75 \f$
74       - \f$ 28 \f$ if \f$ 75 < N \f$
75
76       Similarily, if we have a weighted range \f$ x_0=0.69, w_0=1 ;
77       x_1=1.41, w_1=0 ; x_2=3.14, w_2=0.5 ; x_3=28, w_3=0.5 \f$, we
78       calculate the accumulated normalized weight \f$ S_k = \frac
79       {100}{\sum w_i}\sum_{i=0}^k w_i \f$ and the percentile is
80
81       - \f$ 0.69 \f$ if \f$ N < S_0 \f$
82       - \f$ (0.69+3.14)/2 \f$ if \f$ N=S_0  \f$
83       - \f$ 3.14 \f$ if \f$ S_0 < N < S_2 \f$
84       - \f$ (3.14+28)/2 \f$ if \f$ N=S_2 \f$
85       - \f$ 28 \f$ if \f$ S_2 < N \f$
86       
87       Note, that data point with weight zero is completely ignored.
88
89       \return percentile of range
90     */
91    template<typename RandomAccessIterator>
92    double operator()(RandomAccessIterator first, 
93                      RandomAccessIterator last) const
94    {
95      return calculate(first, last, sorted_, 
96       typename utility::weighted_iterator_traits<RandomAccessIterator>::type());
97    }
98  private:
99    double perc_;
100    bool sorted_;
101
102    // unweighted version
103    template<typename RandomAccessIterator>
104    double calculate(RandomAccessIterator first, RandomAccessIterator last,
105                     bool sorted, utility::unweighted_iterator_tag tag) const;
106
107    // weighted version
108    template<typename RandomAccessIterator>
109    double calculate(RandomAccessIterator first, RandomAccessIterator last,
110                     bool sorted, utility::weighted_iterator_tag tag) const;
111
112    // using compiler generated copy
113    //Percentiler(const Percentiler&);
114    //Percentiler& operator=(const Percentiler&);
115
116  };
117
118  // template implementations
119  // /////////////////////////
120
121  // unweighted version
122  template<typename RandomAccessIterator>
123  double 
124  Percentiler::calculate(RandomAccessIterator first, 
125                         RandomAccessIterator last,
126                         bool sorted,
127                         utility::unweighted_iterator_tag tag) const
128  {
129    // range is one value only is a special case
130    if (first+1 == last)
131      *first;
132    if (sorted) {
133      size_t n = last - first;
134      // have to take care of this special case
135      if (perc_>= 100.0)
136        return *(--last);
137      if (perc_<= 0.0)
138        return *first;
139      double j = n * perc_ / 100.0;
140      size_t i = static_cast<size_t>(j);
141      if (i==j)
142        return (first[i]+first[i-1])/2;
143      return first[i];
144    }
145
146    std::vector<double> v_copy;
147    v_copy.reserve(std::distance(first,last));
148    std::copy(first, last, std::back_inserter(v_copy));
149    std::sort(v_copy.begin(), v_copy.end());
150    return calculate(v_copy.begin(), v_copy.end(), true, tag);
151  }
152
153
154  // weighted version
155  template<typename RandomAccessIterator>
156  double Percentiler::calculate(RandomAccessIterator first, 
157                                RandomAccessIterator last,
158                                bool sorted,
159                                utility::weighted_iterator_tag tag) const
160  {
161    if (sorted) {
162      utility::iterator_traits<RandomAccessIterator> trait;
163      std::vector<double> accum_w;
164      accum_w.reserve(last-first);
165      std::partial_sum(weight_iterator(first),
166                       weight_iterator(last),
167                       std::back_inserter(accum_w));
168
169      double w_bound=perc_/100.0*accum_w.back();
170      std::vector<double>::const_iterator upper(accum_w.begin());
171      double margin=1e-10;
172      while (*upper <= w_bound+margin && upper!=accum_w.end())
173        ++upper;
174      if (upper==accum_w.end())
175        --upper;
176      std::vector<double>::const_iterator lower(upper);
177      while ( *(lower-1)>=w_bound-margin && lower>accum_w.begin())
178        --lower;
179     
180      return (trait.data(first+(upper-accum_w.begin()))+
181              trait.data(first+(lower-accum_w.begin())))/2;
182    }
183
184    std::vector<utility::DataWeight> v_copy;
185    v_copy.reserve(last-first);
186    std::copy(first, last, std::back_inserter(v_copy));
187    std::sort(v_copy.begin(), v_copy.end());
188    return calculate(v_copy.begin(), v_copy.end(), true, tag);
189  }
190
191
192}}} // of namespace statistics, yat, and theplu
193
194#endif
Note: See TracBrowser for help on using the repository browser.