source: branches/0.7-stable/yat/statistics/Percentiler.h @ 2465

Last change on this file since 2465 was 2465, checked in by Peter, 11 years ago

avoid dereference end of range iterator.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 6.1 KB
Line 
1#ifndef _theplu_yat_statistics_percentiler_
2#define _theplu_yat_statistics_percentiler_
3
4// $Id: Percentiler.h 2465 2011-04-11 20:55:41Z peter $
5
6/*
7  Copyright (C) 2008, 2009 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2010 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include "yat/utility/concept_check.h"
27#include "yat/utility/DataWeight.h"
28#include "yat/utility/iterator_traits.h"
29#include "yat/utility/yat_assert.h"
30#include "yat/utility/WeightIterator.h"
31
32#include <boost/concept_check.hpp>
33
34#include <algorithm>
35#include <cmath>
36#include <limits>
37#include <numeric>
38#include <stdexcept>
39#include <vector>
40
41namespace theplu {
42namespace yat {
43namespace statistics { 
44
45  /**
46     \brief Functor to calculate percentile of a range
47
48     \since New in yat 0.5
49   */
50  class Percentiler
51  {
52  public:
53    /**
54       \param perc percentile to calculate [0,100]. Default value is
55       50, which implies class will calculate median.
56       \param sorted if true class assumes that ranges are already
57       sorted, if false the range will copied to a new range which is
58       sorted.
59     */
60    Percentiler(double perc=50, bool sorted=false);
61
62    /**
63       Function is a non-mutable function, i.e., \a first and \a last
64       can be const_iterators.
65
66       The \a Nth percentile is defined such that, for example, when
67       having four numbers \f$ 0.69 < 1.41 < 3.14 < 28 \f$, the \a Nth
68       percentile is:
69
70       - \f$ 0.69 \f$ if \f$ N < 25 \f$
71       - \f$ (0.69+1.41)/2 \f$ if \f$ N=25 \f$
72       - \f$ 1.41 \f$ if \f$ 25 < N < 50 \f$
73       - \f$ (1.41+3.14)/2 \f$ if \f$ N=50 \f$
74       - \f$ 3.14 \f$ if \f$ 50 < N < 75 \f$
75       - \f$ (3.14+28)/2 \f$ if \f$ N=75 \f$
76       - \f$ 28 \f$ if \f$ 75 < N \f$
77
78       Similarily, if we have a weighted range \f$ x_0=0.69, w_0=1 ;
79       x_1=1.41, w_1=0 ; x_2=3.14, w_2=0.5 ; x_3=28, w_3=0.5 \f$, we
80       calculate the accumulated normalized weight \f$ S_k = \frac
81       {100}{\sum w_i}\sum_{i=0}^k w_i \f$ and the percentile is
82
83       - \f$ 0.69 \f$ if \f$ N < S_0 \f$
84       - \f$ (0.69+3.14)/2 \f$ if \f$ N=S_0  \f$
85       - \f$ 3.14 \f$ if \f$ S_0 < N < S_2 \f$
86       - \f$ (3.14+28)/2 \f$ if \f$ N=S_2 \f$
87       - \f$ 28 \f$ if \f$ S_2 < N \f$
88       
89       Note, that data point with weight zero is completely ignored.
90
91       Requirements: \c RandomAccessIterator should be a \ref
92       concept_data_iterator and \random_access_iterator
93
94       \return percentile of range
95     */
96    template<typename RandomAccessIterator>
97    double operator()(RandomAccessIterator first, 
98                      RandomAccessIterator last) const
99    {
100      BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
101      BOOST_CONCEPT_ASSERT((utility::DataIteratorConcept<RandomAccessIterator>));
102      if (first==last)
103        return std::numeric_limits<double>::quiet_NaN();
104      return calculate(first, last, sorted_, 
105       typename utility::weighted_iterator_traits<RandomAccessIterator>::type());
106    }
107  private:
108    double perc_;
109    bool sorted_;
110
111    // unweighted version
112    template<typename RandomAccessIterator>
113    double calculate(RandomAccessIterator first, RandomAccessIterator last,
114                     bool sorted, utility::unweighted_iterator_tag tag) const;
115
116    // weighted version
117    template<typename RandomAccessIterator>
118    double calculate(RandomAccessIterator first, RandomAccessIterator last,
119                     bool sorted, utility::weighted_iterator_tag tag) const;
120
121    // using compiler generated copy
122    //Percentiler(const Percentiler&);
123    //Percentiler& operator=(const Percentiler&);
124
125  };
126
127  // template implementations
128  // /////////////////////////
129
130  // unweighted version
131  template<typename RandomAccessIterator>
132  double 
133  Percentiler::calculate(RandomAccessIterator first, 
134                         RandomAccessIterator last,
135                         bool sorted,
136                         utility::unweighted_iterator_tag tag) const
137  {
138    // range is one value only is a special case
139    if (first+1 == last)
140      *first;
141    if (sorted) {
142      size_t n = last - first;
143      // have to take care of this special case
144      if (perc_>= 100.0)
145        return *(--last);
146      if (perc_<= 0.0)
147        return *first;
148      double j = n * perc_ / 100.0;
149      size_t i = static_cast<size_t>(j);
150      if (i==j)
151        return (first[i]+first[i-1])/2;
152      return first[i];
153    }
154
155    std::vector<double> v_copy;
156    v_copy.reserve(std::distance(first,last));
157    std::copy(first, last, std::back_inserter(v_copy));
158    std::sort(v_copy.begin(), v_copy.end());
159    return calculate(v_copy.begin(), v_copy.end(), true, tag);
160  }
161
162
163  // weighted version
164  template<typename RandomAccessIterator>
165  double Percentiler::calculate(RandomAccessIterator first, 
166                                RandomAccessIterator last,
167                                bool sorted,
168                                utility::weighted_iterator_tag tag) const
169  {
170    if (sorted) {
171      utility::iterator_traits<RandomAccessIterator> trait;
172      std::vector<double> accum_w;
173      accum_w.reserve(last-first);
174      std::partial_sum(weight_iterator(first),
175                       weight_iterator(last),
176                       std::back_inserter(accum_w));
177
178      double w_bound=perc_/100.0*accum_w.back();
179      std::vector<double>::const_iterator upper(accum_w.begin());
180      double margin=1e-10;
181      while (upper!=accum_w.end() && *upper <= w_bound+margin)
182        ++upper;
183      if (upper==accum_w.end())
184        --upper;
185      std::vector<double>::const_iterator lower(upper);
186      while ( *(lower-1)>=w_bound-margin && lower>accum_w.begin())
187        --lower;
188     
189      return (trait.data(first+(upper-accum_w.begin()))+
190              trait.data(first+(lower-accum_w.begin())))/2;
191    }
192
193    std::vector<utility::DataWeight> v_copy;
194    v_copy.reserve(last-first);
195    std::copy(first, last, std::back_inserter(v_copy));
196    std::sort(v_copy.begin(), v_copy.end());
197    return calculate(v_copy.begin(), v_copy.end(), true, tag);
198  }
199
200
201}}} // of namespace statistics, yat, and theplu
202
203#endif
Note: See TracBrowser for help on using the repository browser.