source: trunk/lib/statistics/utility.h @ 519

Last change on this file since 519 was 519, checked in by Peter, 17 years ago

percentile is now called percentile

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.9 KB
Line 
1// $Id: utility.h 519 2006-02-22 09:28:12Z peter $
2
3#ifndef _theplu_statistics_utility_
4#define _theplu_statistics_utility_
5
6#include <c++_tools/gslapi/vector.h>
7
8#include <algorithm>
9#include <cassert>
10#include <cmath>
11#include <vector>
12
13namespace theplu {
14namespace statistics { 
15
16  //forward declarations
17  template <class T>
18  double percentile(const std::vector<T>& vec, const double p, 
19                    const bool sorted=false);
20 
21
22  ///
23  /// Calculates the probabilty to get \a k or smaller from a
24  /// hypergeometric distribution with parameters \a n1 \a n2 \a
25  /// t. Hypergeomtric situation you get in the following situation:
26  /// Let there be \a n1 ways for a "good" selection and \a n2 ways
27  /// for a "bad" selection out of a total of possibilities. Take \a
28  /// t samples without replacement and \a k of those are "good"
29  /// samples. \a k will follow a hypergeomtric distribution.
30  /// @cumulative hypergeomtric distribution functions P(k).
31  ///
32  double cdf_hypergeometric_P(u_int k, u_int n1, u_int n2, u_int t);
33
34
35  ///
36  /// Median is defined to be value in the middle. If number of values
37  /// is even median is the average of the two middle values.  the
38  /// median value is given by p equal to 50. If @a sorted is false
39  /// (default), the vector is copied, the copy is sorted, and then
40  /// used to calculate the median.
41  ///
42  /// @return median
43  ///
44  /// @note interface will change
45  ///
46  template <class T> 
47  inline double median(const std::vector<T>& v, const bool sorted=false) 
48  { return percentile(v, 50.0, sorted); }
49
50  ///
51  /// Median is defined to be value in the middle. If number of values
52  /// is even median is the average of the two middle values. If @a
53  /// sorted is true, the function assumes vector @a vec to be
54  /// sorted. If @a sorted is false, the vector is copied, the copy is
55  /// sorted (default), and then used to calculate the median.
56  ///
57  /// @return median
58  ///
59  double median(const gslapi::vector& vec, const bool sorted=false);
60
61  ///
62  /// The percentile is determined by the \a p, a number between 0 and
63  /// 100. The percentile is found by interpolation, using the formula
64  /// \f$ percentile = (1 - \delta) x_i + \delta x_{i+1} \f$ where \a
65  /// p is floor\f$((n - 1)p/100)\f$ and \f$ \delta \f$ is \f$
66  /// (n-1)p/100 - i \f$.Thus the minimum value of the vector is given
67  /// by p equal to zero, the maximum is given by p equal to 100 and
68  /// the median value is given by p equal to 50. If @a sorted
69  /// is false (default), the vector is copied, the copy is sorted,
70  /// and then used to calculate the median.
71  ///
72  /// @return \a p'th percentile
73  ///
74  template <class T>
75  double percentile(const std::vector<T>& vec, const double p, 
76                    const bool sorted=false)
77  {
78    assert(!(p>100 && p<0));
79    if (sorted){
80      if (p>=100)
81        return vec.back();
82      double j = p/100 * (vec.size()-1);
83      int i = static_cast<int>(j);
84      return (1-j+floor(j))*vec[i] + (j-floor(j))*vec[i+1];
85    }
86    if (p==100)
87      return  *std::max_element(vec.begin(),vec.end());
88    std::vector<T> v_copy(vec);
89    double j = p/100 * (v_copy.size()-1);
90    int i = static_cast<int>(j);
91    std::partial_sort(v_copy.begin(),v_copy.begin()+i+2 , v_copy.end());
92    return (1-j+floor(j))*v_copy[i] + (j-floor(j))*v_copy[i+1];
93 
94  }
95
96  ///
97  /// The percentile is determined by the \a p, a number between 0 and
98  /// 100. The percentile is found by interpolation, using the formula
99  /// \f$ percentile = (1 - \delta) x_i + \delta x_{i+1} \f$ where \a
100  /// p is floor\f$((n - 1)p/100)\f$ and \f$ \delta \f$ is \f$
101  /// (n-1)p/100 - i \f$.Thus the minimum value of the vector is given
102  /// by p equal to zero, the maximum is given by p equal to 100 and
103  /// the median value is given by p equal to 50. If @a sorted
104  /// is false (default), the vector is copied, the copy is sorted,
105  /// and then used to calculate the median.
106  ///
107  /// @return \a p'th percentile
108  ///
109  double percentile(const gslapi::vector& vec, const double, 
110                    const bool sorted=false);
111
112}} // of namespace statistics and namespace theplu
113
114#endif
115
Note: See TracBrowser for help on using the repository browser.