source: branches/0.7-stable/yat/statistics/Percentiler.h @ 2466

Last change on this file since 2466 was 2466, checked in by Peter, 11 years ago

Fixed so percentiler ignores element with zero weight

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 6.2 KB
Line 
1#ifndef _theplu_yat_statistics_percentiler_
2#define _theplu_yat_statistics_percentiler_
3
4// $Id: Percentiler.h 2466 2011-04-12 00:10:59Z peter $
5
6/*
7  Copyright (C) 2008, 2009 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2010 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include "yat/utility/concept_check.h"
27#include "yat/utility/DataWeight.h"
28#include "yat/utility/iterator_traits.h"
29#include "yat/utility/yat_assert.h"
30#include "yat/utility/WeightIterator.h"
31
32#include <boost/concept_check.hpp>
33
34#include <algorithm>
35#include <cmath>
36#include <limits>
37#include <numeric>
38#include <stdexcept>
39#include <vector>
40
41namespace theplu {
42namespace yat {
43namespace statistics { 
44
45  /**
46     \brief Functor to calculate percentile of a range
47
48     \since New in yat 0.5
49   */
50  class Percentiler
51  {
52  public:
53    /**
54       \param perc percentile to calculate [0,100]. Default value is
55       50, which implies class will calculate median.
56       \param sorted if true class assumes that ranges are already
57       sorted, if false the range will copied to a new range which is
58       sorted.
59     */
60    Percentiler(double perc=50, bool sorted=false);
61
62    /**
63       Function is a non-mutable function, i.e., \a first and \a last
64       can be const_iterators.
65
66       The \a Nth percentile is defined such that, for example, when
67       having four numbers \f$ 0.69 < 1.41 < 3.14 < 28 \f$, the \a Nth
68       percentile is:
69
70       - \f$ 0.69 \f$ if \f$ N < 25 \f$
71       - \f$ (0.69+1.41)/2 \f$ if \f$ N=25 \f$
72       - \f$ 1.41 \f$ if \f$ 25 < N < 50 \f$
73       - \f$ (1.41+3.14)/2 \f$ if \f$ N=50 \f$
74       - \f$ 3.14 \f$ if \f$ 50 < N < 75 \f$
75       - \f$ (3.14+28)/2 \f$ if \f$ N=75 \f$
76       - \f$ 28 \f$ if \f$ 75 < N \f$
77
78       Similarily, if we have a weighted range \f$ x_0=0.69, w_0=1 ;
79       x_1=1.41, w_1=0 ; x_2=3.14, w_2=0.5 ; x_3=28, w_3=0.5 \f$, we
80       calculate the accumulated normalized weight \f$ S_k = \frac
81       {100}{\sum w_i}\sum_{i=0}^k w_i \f$ and the percentile is
82
83       - \f$ 0.69 \f$ if \f$ N < S_0 \f$
84       - \f$ (0.69+3.14)/2 \f$ if \f$ N=S_0  \f$
85       - \f$ 3.14 \f$ if \f$ S_0 < N < S_2 \f$
86       - \f$ (3.14+28)/2 \f$ if \f$ N=S_2 \f$
87       - \f$ 28 \f$ if \f$ S_2 < N \f$
88       
89       Note, that data point with weight zero is completely ignored.
90
91       Requirements: \c RandomAccessIterator should be a \ref
92       concept_data_iterator and \random_access_iterator
93
94       \return percentile of range
95     */
96    template<typename RandomAccessIterator>
97    double operator()(RandomAccessIterator first, 
98                      RandomAccessIterator last) const
99    {
100      BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
101      BOOST_CONCEPT_ASSERT((utility::DataIteratorConcept<RandomAccessIterator>));
102      if (first==last)
103        return std::numeric_limits<double>::quiet_NaN();
104      return calculate(first, last, sorted_, 
105       typename utility::weighted_iterator_traits<RandomAccessIterator>::type());
106    }
107  private:
108    double perc_;
109    bool sorted_;
110
111    // unweighted version
112    template<typename RandomAccessIterator>
113    double calculate(RandomAccessIterator first, RandomAccessIterator last,
114                     bool sorted, utility::unweighted_iterator_tag tag) const;
115
116    // weighted version
117    template<typename RandomAccessIterator>
118    double calculate(RandomAccessIterator first, RandomAccessIterator last,
119                     bool sorted, utility::weighted_iterator_tag tag) const;
120
121    // using compiler generated copy
122    //Percentiler(const Percentiler&);
123    //Percentiler& operator=(const Percentiler&);
124
125  };
126
127  // template implementations
128  // /////////////////////////
129
130  // unweighted version
131  template<typename RandomAccessIterator>
132  double 
133  Percentiler::calculate(RandomAccessIterator first, 
134                         RandomAccessIterator last,
135                         bool sorted,
136                         utility::unweighted_iterator_tag tag) const
137  {
138    // range is one value only is a special case
139    if (first+1 == last)
140      *first;
141    if (sorted) {
142      size_t n = last - first;
143      // have to take care of this special case
144      if (perc_>= 100.0)
145        return *(--last);
146      if (perc_<= 0.0)
147        return *first;
148      double j = n * perc_ / 100.0;
149      size_t i = static_cast<size_t>(j);
150      if (i==j)
151        return (first[i]+first[i-1])/2;
152      return first[i];
153    }
154
155    std::vector<double> v_copy;
156    v_copy.reserve(std::distance(first,last));
157    std::copy(first, last, std::back_inserter(v_copy));
158    std::sort(v_copy.begin(), v_copy.end());
159    return calculate(v_copy.begin(), v_copy.end(), true, tag);
160  }
161
162
163  // weighted version
164  template<typename RandomAccessIterator>
165  double Percentiler::calculate(RandomAccessIterator first, 
166                                RandomAccessIterator last,
167                                bool sorted,
168                                utility::weighted_iterator_tag tag) const
169  {
170    if (sorted) {
171      utility::iterator_traits<RandomAccessIterator> trait;
172      std::vector<double> accum_w;
173      accum_w.reserve(last-first);
174      std::partial_sum(weight_iterator(first),
175                       weight_iterator(last),
176                       std::back_inserter(accum_w));
177
178      double w_bound=perc_/100.0*accum_w.back();
179      std::vector<double>::const_iterator upper(accum_w.begin());
180      double margin=1e-10;
181      while (upper!=accum_w.end() && *upper <= w_bound+margin)
182        ++upper;
183      while (upper!=accum_w.begin() &&
184             (upper==accum_w.end() || 
185              trait.weight(first+(upper-accum_w.begin()))==0.0))
186        --upper;
187      std::vector<double>::const_iterator lower(upper);
188      while ( *(lower-1)>=w_bound-margin && lower>accum_w.begin())
189        --lower;
190     
191      return (trait.data(first+(upper-accum_w.begin()))+
192              trait.data(first+(lower-accum_w.begin())))/2;
193    }
194
195    std::vector<utility::DataWeight> v_copy;
196    v_copy.reserve(last-first);
197    std::copy(first, last, std::back_inserter(v_copy));
198    std::sort(v_copy.begin(), v_copy.end());
199    return calculate(v_copy.begin(), v_copy.end(), true, tag);
200  }
201
202
203}}} // of namespace statistics, yat, and theplu
204
205#endif
Note: See TracBrowser for help on using the repository browser.