source: trunk/yat/statistics/Percentiler.h @ 2263

Last change on this file since 2263 was 2263, checked in by Peter, 13 years ago

adding a class DataIteratorConcept? for concept data iterator. refs #624

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 6.1 KB
Line 
1#ifndef _theplu_yat_statistics_percentiler_
2#define _theplu_yat_statistics_percentiler_
3
4// $Id: Percentiler.h 2263 2010-05-26 22:13:07Z peter $
5
6/*
7  Copyright (C) 2008, 2009 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2010 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include "yat/utility/concept_check.h"
27#include "yat/utility/DataWeight.h"
28#include "yat/utility/iterator_traits.h"
29#include "yat/utility/yat_assert.h"
30#include "yat/utility/WeightIterator.h"
31
32#include <boost/concept_check.hpp>
33
34#include <algorithm>
35#include <cmath>
36#include <numeric>
37#include <stdexcept>
38#include <vector>
39
40namespace theplu {
41namespace yat {
42namespace statistics { 
43
44  /**
45     \brief Functor to calculate percentile of a range
46
47     \since New in yat 0.5
48   */
49  class Percentiler
50  {
51  public:
52    /**
53       \param perc percentile to calculate [0,100]. Default value is
54       50, which implies class will calculate median.
55       \param sorted if true class assumes that ranges are already
56       sorted, if false the range will copied to a new range which is
57       sorted.
58     */
59    Percentiler(double perc=50, bool sorted=false);
60
61    /**
62       Function is a non-mutable function, i.e., \a first and \a last
63       can be const_iterators.
64
65       The \a Nth percentile is defined such that, for example, when
66       having four numbers \f$ 0.69 < 1.41 < 3.14 < 28 \f$, the \a Nth
67       percentile is:
68
69       - \f$ 0.69 \f$ if \f$ N < 25 \f$
70       - \f$ (0.69+1.41)/2 \f$ if \f$ N=25 \f$
71       - \f$ 1.41 \f$ if \f$ 25 < N < 50 \f$
72       - \f$ (1.41+3.14)/2 \f$ if \f$ N=50 \f$
73       - \f$ 3.14 \f$ if \f$ 50 < N < 75 \f$
74       - \f$ (3.14+28)/2 \f$ if \f$ N=75 \f$
75       - \f$ 28 \f$ if \f$ 75 < N \f$
76
77       Similarily, if we have a weighted range \f$ x_0=0.69, w_0=1 ;
78       x_1=1.41, w_1=0 ; x_2=3.14, w_2=0.5 ; x_3=28, w_3=0.5 \f$, we
79       calculate the accumulated normalized weight \f$ S_k = \frac
80       {100}{\sum w_i}\sum_{i=0}^k w_i \f$ and the percentile is
81
82       - \f$ 0.69 \f$ if \f$ N < S_0 \f$
83       - \f$ (0.69+3.14)/2 \f$ if \f$ N=S_0  \f$
84       - \f$ 3.14 \f$ if \f$ S_0 < N < S_2 \f$
85       - \f$ (3.14+28)/2 \f$ if \f$ N=S_2 \f$
86       - \f$ 28 \f$ if \f$ S_2 < N \f$
87       
88       Note, that data point with weight zero is completely ignored.
89
90       Requirements: \c RandomAccessIterator should be a \ref
91       concept_data_iterator and \random_access_iterator
92
93       \return percentile of range
94     */
95    template<typename RandomAccessIterator>
96    double operator()(RandomAccessIterator first, 
97                      RandomAccessIterator last) const
98    {
99      BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
100      BOOST_CONCEPT_ASSERT((utility::DataIteratorConcept<RandomAccessIterator>));
101      return calculate(first, last, sorted_, 
102       typename utility::weighted_iterator_traits<RandomAccessIterator>::type());
103    }
104  private:
105    double perc_;
106    bool sorted_;
107
108    // unweighted version
109    template<typename RandomAccessIterator>
110    double calculate(RandomAccessIterator first, RandomAccessIterator last,
111                     bool sorted, utility::unweighted_iterator_tag tag) const;
112
113    // weighted version
114    template<typename RandomAccessIterator>
115    double calculate(RandomAccessIterator first, RandomAccessIterator last,
116                     bool sorted, utility::weighted_iterator_tag tag) const;
117
118    // using compiler generated copy
119    //Percentiler(const Percentiler&);
120    //Percentiler& operator=(const Percentiler&);
121
122  };
123
124  // template implementations
125  // /////////////////////////
126
127  // unweighted version
128  template<typename RandomAccessIterator>
129  double 
130  Percentiler::calculate(RandomAccessIterator first, 
131                         RandomAccessIterator last,
132                         bool sorted,
133                         utility::unweighted_iterator_tag tag) const
134  {
135    // range is one value only is a special case
136    if (first+1 == last)
137      *first;
138    if (sorted) {
139      size_t n = last - first;
140      // have to take care of this special case
141      if (perc_>= 100.0)
142        return *(--last);
143      if (perc_<= 0.0)
144        return *first;
145      double j = n * perc_ / 100.0;
146      size_t i = static_cast<size_t>(j);
147      if (i==j)
148        return (first[i]+first[i-1])/2;
149      return first[i];
150    }
151
152    std::vector<double> v_copy;
153    v_copy.reserve(std::distance(first,last));
154    std::copy(first, last, std::back_inserter(v_copy));
155    std::sort(v_copy.begin(), v_copy.end());
156    return calculate(v_copy.begin(), v_copy.end(), true, tag);
157  }
158
159
160  // weighted version
161  template<typename RandomAccessIterator>
162  double Percentiler::calculate(RandomAccessIterator first, 
163                                RandomAccessIterator last,
164                                bool sorted,
165                                utility::weighted_iterator_tag tag) const
166  {
167    if (sorted) {
168      utility::iterator_traits<RandomAccessIterator> trait;
169      std::vector<double> accum_w;
170      accum_w.reserve(last-first);
171      std::partial_sum(weight_iterator(first),
172                       weight_iterator(last),
173                       std::back_inserter(accum_w));
174
175      double w_bound=perc_/100.0*accum_w.back();
176      std::vector<double>::const_iterator upper(accum_w.begin());
177      double margin=1e-10;
178      while (*upper <= w_bound+margin && upper!=accum_w.end())
179        ++upper;
180      if (upper==accum_w.end())
181        --upper;
182      std::vector<double>::const_iterator lower(upper);
183      while ( *(lower-1)>=w_bound-margin && lower>accum_w.begin())
184        --lower;
185     
186      return (trait.data(first+(upper-accum_w.begin()))+
187              trait.data(first+(lower-accum_w.begin())))/2;
188    }
189
190    std::vector<utility::DataWeight> v_copy;
191    v_copy.reserve(last-first);
192    std::copy(first, last, std::back_inserter(v_copy));
193    std::sort(v_copy.begin(), v_copy.end());
194    return calculate(v_copy.begin(), v_copy.end(), true, tag);
195  }
196
197
198}}} // of namespace statistics, yat, and theplu
199
200#endif
Note: See TracBrowser for help on using the repository browser.