source: trunk/lib/statistics/utility.h @ 502

Last change on this file since 502 was 502, checked in by Peter, 16 years ago

improved median and percentile functions

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.0 KB
Line 
1// $Id: utility.h 502 2006-01-30 22:05:20Z peter $
2
3#ifndef _theplu_statistics_utility_
4#define _theplu_statistics_utility_
5
6#include <c++_tools/gslapi/vector.h>
7
8#include <algorithm>
9#include <cassert>
10#include <cmath>
11#include <vector>
12
13namespace theplu {
14namespace statistics { 
15
16  ///
17  /// Calculates the probabilty to get \a k or smaller from a
18  /// hypergeometric distribution with parameters \a n1 \a n2 \a
19  /// t. Hypergeomtric situation you get in the following situation:
20  /// Let there be \a n1 ways for a "good" selection and \a n2 ways
21  /// for a "bad" selection out of a total of possibilities. Take \a
22  /// t samples without replacement and \a k of those are "good"
23  /// samples. \a k will follow a hypergeomtric distribution.
24  /// @cumulative hypergeomtric distribution functions P(k).
25  ///
26  double cdf_hypergeometric_P(u_int k, u_int n1, u_int n2, u_int t);
27
28
29  ///
30  /// The percentile is determined by the \a p, a number between 0 and
31  /// 100. The percentile is found by interpolation, using the formula
32  /// \f$ percentile = (1 - \delta) x_i + \delta x_{i+1} \f$ where \a
33  /// p is floor\f$((n - 1)p/100)\f$ and \f$ \delta \f$ is \f$
34  /// (n-1)p/100 - i \f$.Thus the minimum value of the vector is given
35  /// by p equal to zero, the maximum is given by p equal to 100 and
36  /// the median value is given by p equal to 50. If @a sorted
37  /// is false (default), the vector is copied, the copy is sorted,
38  /// and then used to calculate the median.
39  ///
40  /// @return \a p'th percentile
41  ///
42  template <class T>
43  double percentile(const std::vector<T>& vec, const double p, 
44                    const bool sorted=false)
45  {
46    assert(!(p>100 && p<0));
47    if (sorted){
48      if (p>=100)
49        return vec.back();
50      double j = p/100 * (vec.size()-1);
51      int i = static_cast<int>(j);
52      return (1-j+floor(j))*vec[i] + (j-floor(j))*vec[i+1];
53    }
54    if (p==100)
55      return  *std::max_element(vec.begin(),vec.end());
56    std::vector<T> v_copy(vec);
57    double j = p/100 * (v_copy.size()-1);
58    int i = static_cast<int>(j);
59    std::partial_sort(v_copy.begin(),v_copy.begin()+i+2 , v_copy.end());
60    return (1-j+floor(j))*v_copy[i] + (j-floor(j))*v_copy[i+1];
61 
62  }
63
64  ///
65  /// Median is defined to be value in the middle. If number of values
66  /// is even median is the average of the two middle values.  the
67  /// median value is given by p equal to 50. If @a sorted is false
68  /// (default), the vector is copied, the copy is sorted, and then
69  /// used to calculate the median.
70  ///
71  /// @return median
72  ///
73  /// @note interface will change
74  ///
75  template <class T> 
76  inline double median(const std::vector<T>& v, const bool sorted=false) 
77  { return percentile(v, 50.0, sorted); }
78
79  ///
80  /// Median is defined to be value in the middle. If number of values
81  /// is even median is the average of the two middle values. If @a
82  /// sorted is true, the function assumes vector @a vec to be
83  /// sorted. If @a sorted is false, the vector is copied, the copy is
84  /// sorted (default), and then used to calculate the median.
85  ///
86  /// @return median
87  ///
88  double median(const gslapi::vector& vec, const bool sorted=false);
89
90}} // of namespace statistics and namespace theplu
91
92#endif
93
Note: See TracBrowser for help on using the repository browser.