source: trunk/yat/statistics/Percentiler.h @ 1486

Last change on this file since 1486 was 1486, checked in by Jari Häkkinen, 13 years ago

Addresses #436.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.8 KB
Line 
1#ifndef _theplu_yat_statistics_percentiler_
2#define _theplu_yat_statistics_percentiler_
3
4// $Id: Percentiler.h 1486 2008-09-09 21:17:19Z jari $
5
6/*
7  Copyright (C) 2008 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program; if not, write to the Free Software
23  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24  02111-1307, USA.
25*/
26
27#include "yat/utility/DataWeight.h"
28#include "yat/utility/iterator_traits.h"
29#include "yat/utility/yat_assert.h"
30#include "yat/utility/WeightIterator.h"
31
32#include <algorithm>
33#include <cassert>
34#include <cmath>
35#include <numeric>
36#include <stdexcept>
37#include <vector>
38
39#include <iostream>
40
41namespace theplu {
42namespace yat {
43namespace statistics { 
44
45  /**
46     \brief Functor to calculate percentile of a range
47
48     \since New in yat 0.5
49   */
50  class Percentiler
51  {
52  public:
53    /**
54       \param perc percentile to calculate [0,100]. Default value is
55       50, which implies class will calculate median.
56       \param sorted if true class assumes that ranges are already
57       sorted, if false the range will copied to a new range which is
58       sorted.
59     */
60    Percentiler(double perc=50, bool sorted=false);
61
62    /**
63       Function is a non-mutable function, i.e., \a first and \a last
64       can be const_iterators.
65
66       The \a Nth percentile is defined such that, for example, when
67       having four numbers \f$ 0.69 < 1.41 < 3.14 < 28 \f$, the \a Nth
68       percentile is:
69
70       - \f$ 0.69 \f$ if \f$ N < 25 \f$
71       - \f$ (0.69+1.41)/2 \f$ if \f$ N=25 \f$
72       - \f$ 1.41 \f$ if \f$ 25 < N < 50 \f$
73       - \f$ (1.41+3.14)/2 \f$ if \f$ N=50 \f$
74       - \f$ 3.14 \f$ if \f$ 50 < N < 75 \f$
75       - \f$ (3.14+28)/2 \f$ if \f$ N=75 \f$
76       - \f$ 28 \f$ if \f$ 75 < N \f$
77
78       Similarily, if we have a weighted range \f$ x_0=0.69, w_0=1 ;
79       x_1=1.41, w_1=0 ; x_2=3.14, w_2=0.5 ; x_3=28, w_3=0.5 \f$, we
80       calculate the accumulated normalized weight \f$ S_k = \frac
81       {100}{\sum w_i}\sum_{i=0}^k w_i \f$ and the percentile is
82
83       - \f$ 0.69 \f$ if \f$ N < S_0 \f$
84       - \f$ (0.69+3.14)/2 \f$ if \f$ N=S_0  \f$
85       - \f$ 3.14 \f$ if \f$ S_0 < N < S_2 \f$
86       - \f$ (3.14+28)/2 \f$ if \f$ N=S_2 \f$
87       - \f$ 28 \f$ if \f$ S_2 < N \f$
88       
89       Note, that data point with weight zero is completely ignored.
90
91       \return percentile of range
92     */
93    template<typename RandomAccessIterator>
94    double operator()(RandomAccessIterator first, 
95                      RandomAccessIterator last) const
96    {
97      return calculate(first, last, sorted_, 
98       typename utility::weighted_iterator_traits<RandomAccessIterator>::type());
99    }
100  private:
101    double perc_;
102    bool sorted_;
103
104    // unweighted version
105    template<typename RandomAccessIterator>
106    double calculate(RandomAccessIterator first, RandomAccessIterator last,
107                     bool sorted, utility::unweighted_iterator_tag tag) const;
108
109    // weighted version
110    template<typename RandomAccessIterator>
111    double calculate(RandomAccessIterator first, RandomAccessIterator last,
112                     bool sorted, utility::weighted_iterator_tag tag) const;
113
114    // using compiler generated copy
115    //Percentiler(const Percentiler&);
116    //Percentiler& operator=(const Percentiler&);
117
118  };
119
120  // template implementations
121  // /////////////////////////
122
123  // unweighted version
124  template<typename RandomAccessIterator>
125  double 
126  Percentiler::calculate(RandomAccessIterator first, 
127                         RandomAccessIterator last,
128                         bool sorted,
129                         utility::unweighted_iterator_tag tag) const
130  {
131    // range is one value only is a special case
132    if (first+1 == last)
133      *first;
134    if (sorted) {
135      size_t n = last - first;
136      // have to take care of this special case
137      if (perc_>= 100.0)
138        return *(--last);
139      if (perc_<= 0.0)
140        return *first;
141      double j = n * perc_ / 100.0;
142      size_t i = static_cast<size_t>(j);
143      if (i==j)
144        return (first[i]+first[i-1])/2;
145      return first[i];
146    }
147
148    std::vector<double> v_copy;
149    v_copy.reserve(std::distance(first,last));
150    std::copy(first, last, std::back_inserter(v_copy));
151    std::sort(v_copy.begin(), v_copy.end());
152    return calculate(v_copy.begin(), v_copy.end(), true, tag);
153  }
154
155
156  // weighted version
157  template<typename RandomAccessIterator>
158  double Percentiler::calculate(RandomAccessIterator first, 
159                                RandomAccessIterator last,
160                                bool sorted,
161                                utility::weighted_iterator_tag tag) const
162  {
163    if (sorted) {
164      utility::iterator_traits<RandomAccessIterator> trait;
165      std::vector<double> accum_w;
166      accum_w.reserve(last-first);
167      std::partial_sum(weight_iterator(first),
168                       weight_iterator(last),
169                       std::back_inserter(accum_w));
170
171      double w_bound=perc_/100.0*accum_w.back();
172      std::vector<double>::const_iterator upper(accum_w.begin());
173      double margin=1e-10;
174      while (*upper <= w_bound+margin && upper!=accum_w.end())
175        ++upper;
176      if (upper==accum_w.end())
177        --upper;
178      std::vector<double>::const_iterator lower(upper);
179      while ( *(lower-1)>=w_bound-margin && lower>accum_w.begin())
180        --lower;
181     
182      return (trait.data(first+(upper-accum_w.begin()))+
183              trait.data(first+(lower-accum_w.begin())))/2;
184    }
185
186    std::vector<utility::DataWeight> v_copy;
187    v_copy.reserve(last-first);
188    std::copy(first, last, std::back_inserter(v_copy));
189    std::sort(v_copy.begin(), v_copy.end());
190    return calculate(v_copy.begin(), v_copy.end(), true, tag);
191  }
192
193
194}}} // of namespace statistics, yat, and theplu
195
196#endif
Note: See TracBrowser for help on using the repository browser.