source: trunk/yat/statistics/Percentiler.h @ 2076

Last change on this file since 2076 was 2076, checked in by Peter, 14 years ago

prefer including iosfwd instead of iostream when possible

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.7 KB
Line 
1#ifndef _theplu_yat_statistics_percentiler_
2#define _theplu_yat_statistics_percentiler_
3
4// $Id: Percentiler.h 2076 2009-10-06 22:17:22Z peter $
5
6/*
7  Copyright (C) 2008 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2009 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include "yat/utility/DataWeight.h"
27#include "yat/utility/iterator_traits.h"
28#include "yat/utility/yat_assert.h"
29#include "yat/utility/WeightIterator.h"
30
31#include <algorithm>
32#include <cassert>
33#include <cmath>
34#include <numeric>
35#include <stdexcept>
36#include <vector>
37
38namespace theplu {
39namespace yat {
40namespace statistics { 
41
42  /**
43     \brief Functor to calculate percentile of a range
44
45     \since New in yat 0.5
46   */
47  class Percentiler
48  {
49  public:
50    /**
51       \param perc percentile to calculate [0,100]. Default value is
52       50, which implies class will calculate median.
53       \param sorted if true class assumes that ranges are already
54       sorted, if false the range will copied to a new range which is
55       sorted.
56     */
57    Percentiler(double perc=50, bool sorted=false);
58
59    /**
60       Function is a non-mutable function, i.e., \a first and \a last
61       can be const_iterators.
62
63       The \a Nth percentile is defined such that, for example, when
64       having four numbers \f$ 0.69 < 1.41 < 3.14 < 28 \f$, the \a Nth
65       percentile is:
66
67       - \f$ 0.69 \f$ if \f$ N < 25 \f$
68       - \f$ (0.69+1.41)/2 \f$ if \f$ N=25 \f$
69       - \f$ 1.41 \f$ if \f$ 25 < N < 50 \f$
70       - \f$ (1.41+3.14)/2 \f$ if \f$ N=50 \f$
71       - \f$ 3.14 \f$ if \f$ 50 < N < 75 \f$
72       - \f$ (3.14+28)/2 \f$ if \f$ N=75 \f$
73       - \f$ 28 \f$ if \f$ 75 < N \f$
74
75       Similarily, if we have a weighted range \f$ x_0=0.69, w_0=1 ;
76       x_1=1.41, w_1=0 ; x_2=3.14, w_2=0.5 ; x_3=28, w_3=0.5 \f$, we
77       calculate the accumulated normalized weight \f$ S_k = \frac
78       {100}{\sum w_i}\sum_{i=0}^k w_i \f$ and the percentile is
79
80       - \f$ 0.69 \f$ if \f$ N < S_0 \f$
81       - \f$ (0.69+3.14)/2 \f$ if \f$ N=S_0  \f$
82       - \f$ 3.14 \f$ if \f$ S_0 < N < S_2 \f$
83       - \f$ (3.14+28)/2 \f$ if \f$ N=S_2 \f$
84       - \f$ 28 \f$ if \f$ S_2 < N \f$
85       
86       Note, that data point with weight zero is completely ignored.
87
88       \return percentile of range
89     */
90    template<typename RandomAccessIterator>
91    double operator()(RandomAccessIterator first, 
92                      RandomAccessIterator last) const
93    {
94      return calculate(first, last, sorted_, 
95       typename utility::weighted_iterator_traits<RandomAccessIterator>::type());
96    }
97  private:
98    double perc_;
99    bool sorted_;
100
101    // unweighted version
102    template<typename RandomAccessIterator>
103    double calculate(RandomAccessIterator first, RandomAccessIterator last,
104                     bool sorted, utility::unweighted_iterator_tag tag) const;
105
106    // weighted version
107    template<typename RandomAccessIterator>
108    double calculate(RandomAccessIterator first, RandomAccessIterator last,
109                     bool sorted, utility::weighted_iterator_tag tag) const;
110
111    // using compiler generated copy
112    //Percentiler(const Percentiler&);
113    //Percentiler& operator=(const Percentiler&);
114
115  };
116
117  // template implementations
118  // /////////////////////////
119
120  // unweighted version
121  template<typename RandomAccessIterator>
122  double 
123  Percentiler::calculate(RandomAccessIterator first, 
124                         RandomAccessIterator last,
125                         bool sorted,
126                         utility::unweighted_iterator_tag tag) const
127  {
128    // range is one value only is a special case
129    if (first+1 == last)
130      *first;
131    if (sorted) {
132      size_t n = last - first;
133      // have to take care of this special case
134      if (perc_>= 100.0)
135        return *(--last);
136      if (perc_<= 0.0)
137        return *first;
138      double j = n * perc_ / 100.0;
139      size_t i = static_cast<size_t>(j);
140      if (i==j)
141        return (first[i]+first[i-1])/2;
142      return first[i];
143    }
144
145    std::vector<double> v_copy;
146    v_copy.reserve(std::distance(first,last));
147    std::copy(first, last, std::back_inserter(v_copy));
148    std::sort(v_copy.begin(), v_copy.end());
149    return calculate(v_copy.begin(), v_copy.end(), true, tag);
150  }
151
152
153  // weighted version
154  template<typename RandomAccessIterator>
155  double Percentiler::calculate(RandomAccessIterator first, 
156                                RandomAccessIterator last,
157                                bool sorted,
158                                utility::weighted_iterator_tag tag) const
159  {
160    if (sorted) {
161      utility::iterator_traits<RandomAccessIterator> trait;
162      std::vector<double> accum_w;
163      accum_w.reserve(last-first);
164      std::partial_sum(weight_iterator(first),
165                       weight_iterator(last),
166                       std::back_inserter(accum_w));
167
168      double w_bound=perc_/100.0*accum_w.back();
169      std::vector<double>::const_iterator upper(accum_w.begin());
170      double margin=1e-10;
171      while (*upper <= w_bound+margin && upper!=accum_w.end())
172        ++upper;
173      if (upper==accum_w.end())
174        --upper;
175      std::vector<double>::const_iterator lower(upper);
176      while ( *(lower-1)>=w_bound-margin && lower>accum_w.begin())
177        --lower;
178     
179      return (trait.data(first+(upper-accum_w.begin()))+
180              trait.data(first+(lower-accum_w.begin())))/2;
181    }
182
183    std::vector<utility::DataWeight> v_copy;
184    v_copy.reserve(last-first);
185    std::copy(first, last, std::back_inserter(v_copy));
186    std::sort(v_copy.begin(), v_copy.end());
187    return calculate(v_copy.begin(), v_copy.end(), true, tag);
188  }
189
190
191}}} // of namespace statistics, yat, and theplu
192
193#endif
Note: See TracBrowser for help on using the repository browser.