source: trunk/yat/statistics/Percentiler.h @ 2078

Last change on this file since 2078 was 2078, checked in by Peter, 14 years ago

remove inclusions not needed

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.7 KB
Line 
1#ifndef _theplu_yat_statistics_percentiler_
2#define _theplu_yat_statistics_percentiler_
3
4// $Id: Percentiler.h 2078 2009-10-07 16:33:56Z peter $
5
6/*
7  Copyright (C) 2008 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2009 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include "yat/utility/DataWeight.h"
27#include "yat/utility/iterator_traits.h"
28#include "yat/utility/yat_assert.h"
29#include "yat/utility/WeightIterator.h"
30
31#include <algorithm>
32#include <cmath>
33#include <numeric>
34#include <stdexcept>
35#include <vector>
36
37namespace theplu {
38namespace yat {
39namespace statistics { 
40
41  /**
42     \brief Functor to calculate percentile of a range
43
44     \since New in yat 0.5
45   */
46  class Percentiler
47  {
48  public:
49    /**
50       \param perc percentile to calculate [0,100]. Default value is
51       50, which implies class will calculate median.
52       \param sorted if true class assumes that ranges are already
53       sorted, if false the range will copied to a new range which is
54       sorted.
55     */
56    Percentiler(double perc=50, bool sorted=false);
57
58    /**
59       Function is a non-mutable function, i.e., \a first and \a last
60       can be const_iterators.
61
62       The \a Nth percentile is defined such that, for example, when
63       having four numbers \f$ 0.69 < 1.41 < 3.14 < 28 \f$, the \a Nth
64       percentile is:
65
66       - \f$ 0.69 \f$ if \f$ N < 25 \f$
67       - \f$ (0.69+1.41)/2 \f$ if \f$ N=25 \f$
68       - \f$ 1.41 \f$ if \f$ 25 < N < 50 \f$
69       - \f$ (1.41+3.14)/2 \f$ if \f$ N=50 \f$
70       - \f$ 3.14 \f$ if \f$ 50 < N < 75 \f$
71       - \f$ (3.14+28)/2 \f$ if \f$ N=75 \f$
72       - \f$ 28 \f$ if \f$ 75 < N \f$
73
74       Similarily, if we have a weighted range \f$ x_0=0.69, w_0=1 ;
75       x_1=1.41, w_1=0 ; x_2=3.14, w_2=0.5 ; x_3=28, w_3=0.5 \f$, we
76       calculate the accumulated normalized weight \f$ S_k = \frac
77       {100}{\sum w_i}\sum_{i=0}^k w_i \f$ and the percentile is
78
79       - \f$ 0.69 \f$ if \f$ N < S_0 \f$
80       - \f$ (0.69+3.14)/2 \f$ if \f$ N=S_0  \f$
81       - \f$ 3.14 \f$ if \f$ S_0 < N < S_2 \f$
82       - \f$ (3.14+28)/2 \f$ if \f$ N=S_2 \f$
83       - \f$ 28 \f$ if \f$ S_2 < N \f$
84       
85       Note, that data point with weight zero is completely ignored.
86
87       \return percentile of range
88     */
89    template<typename RandomAccessIterator>
90    double operator()(RandomAccessIterator first, 
91                      RandomAccessIterator last) const
92    {
93      return calculate(first, last, sorted_, 
94       typename utility::weighted_iterator_traits<RandomAccessIterator>::type());
95    }
96  private:
97    double perc_;
98    bool sorted_;
99
100    // unweighted version
101    template<typename RandomAccessIterator>
102    double calculate(RandomAccessIterator first, RandomAccessIterator last,
103                     bool sorted, utility::unweighted_iterator_tag tag) const;
104
105    // weighted version
106    template<typename RandomAccessIterator>
107    double calculate(RandomAccessIterator first, RandomAccessIterator last,
108                     bool sorted, utility::weighted_iterator_tag tag) const;
109
110    // using compiler generated copy
111    //Percentiler(const Percentiler&);
112    //Percentiler& operator=(const Percentiler&);
113
114  };
115
116  // template implementations
117  // /////////////////////////
118
119  // unweighted version
120  template<typename RandomAccessIterator>
121  double 
122  Percentiler::calculate(RandomAccessIterator first, 
123                         RandomAccessIterator last,
124                         bool sorted,
125                         utility::unweighted_iterator_tag tag) const
126  {
127    // range is one value only is a special case
128    if (first+1 == last)
129      *first;
130    if (sorted) {
131      size_t n = last - first;
132      // have to take care of this special case
133      if (perc_>= 100.0)
134        return *(--last);
135      if (perc_<= 0.0)
136        return *first;
137      double j = n * perc_ / 100.0;
138      size_t i = static_cast<size_t>(j);
139      if (i==j)
140        return (first[i]+first[i-1])/2;
141      return first[i];
142    }
143
144    std::vector<double> v_copy;
145    v_copy.reserve(std::distance(first,last));
146    std::copy(first, last, std::back_inserter(v_copy));
147    std::sort(v_copy.begin(), v_copy.end());
148    return calculate(v_copy.begin(), v_copy.end(), true, tag);
149  }
150
151
152  // weighted version
153  template<typename RandomAccessIterator>
154  double Percentiler::calculate(RandomAccessIterator first, 
155                                RandomAccessIterator last,
156                                bool sorted,
157                                utility::weighted_iterator_tag tag) const
158  {
159    if (sorted) {
160      utility::iterator_traits<RandomAccessIterator> trait;
161      std::vector<double> accum_w;
162      accum_w.reserve(last-first);
163      std::partial_sum(weight_iterator(first),
164                       weight_iterator(last),
165                       std::back_inserter(accum_w));
166
167      double w_bound=perc_/100.0*accum_w.back();
168      std::vector<double>::const_iterator upper(accum_w.begin());
169      double margin=1e-10;
170      while (*upper <= w_bound+margin && upper!=accum_w.end())
171        ++upper;
172      if (upper==accum_w.end())
173        --upper;
174      std::vector<double>::const_iterator lower(upper);
175      while ( *(lower-1)>=w_bound-margin && lower>accum_w.begin())
176        --lower;
177     
178      return (trait.data(first+(upper-accum_w.begin()))+
179              trait.data(first+(lower-accum_w.begin())))/2;
180    }
181
182    std::vector<utility::DataWeight> v_copy;
183    v_copy.reserve(last-first);
184    std::copy(first, last, std::back_inserter(v_copy));
185    std::sort(v_copy.begin(), v_copy.end());
186    return calculate(v_copy.begin(), v_copy.end(), true, tag);
187  }
188
189
190}}} // of namespace statistics, yat, and theplu
191
192#endif
Note: See TracBrowser for help on using the repository browser.