source: trunk/yat/statistics/KolmogorovSmirnov.h @ 1874

Last change on this file since 1874 was 1874, checked in by Peter, 14 years ago

merge patch release 0.5.2 into trunk. Delta 0.5.2 - 0.5.1

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.3 KB
Line 
1#ifndef _theplu_yat_statistics_kolmogorov_smirnov_
2#define _theplu_yat_statistics_kolmogorov_smirnov_
3
4// $Id: KolmogorovSmirnov.h 1874 2009-03-17 22:09:01Z peter $
5
6/*
7  Copyright (C) 2008 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2009 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include <set>
27#include <vector>
28
29namespace theplu {
30namespace yat {
31namespace statistics { 
32
33  /**
34     \brief Kolmogow Smirnov Test
35   */
36  class KolmogorovSmirnov
37  {
38  public:
39    /**
40       struct used to store data in KolmogorovSmirnov.
41       
42       This struct is public to make usage of the add range function
43       more convenient.
44
45       \since New in yat 0.5
46     */
47    struct Element
48    {
49      /**
50         \brief default constructor
51      */
52      inline Element(void) {};
53
54      /**
55         \brief Constructor
56       */
57      inline Element(double x, bool class_label, double w=1.0)
58        : value(x), label(class_label), weight(w) {};
59
60      /**
61         \brief data value
62       */
63      double value;
64     
65      /**
66          bool telling which group the data point belong to
67      */
68      bool label;
69
70      /**
71         weight for the data point
72       */
73      double weight;
74
75      /**
76         \return true if value is less than rhs.value
77       */
78      bool operator<(const Element rhs) const;
79    };
80
81    /**
82       \brief Constructor
83     */ 
84    KolmogorovSmirnov(void);
85
86    /**
87       \brief add a value
88     */
89    void add(double value, bool class_label, double weight=1.0);
90
91    /**
92       \brief add a range
93       
94       value_type of \a ForwardIterator must be convertible to
95       KolmogorovSmirnov::Element
96       
97       Insertion takes typically N*log(N). However, if range is
98       sorted, insertion takes linear time. A common use case of this
99       function is when calculating several KS statistics on a data
100       set (values and weights) with different class labels.
101
102       \since New in yat 0.5
103    */
104    template <typename ForwardIterator>
105    void add(ForwardIterator first, ForwardIterator last); 
106
107    /**
108       \brief Large-Sample Approximation
109
110       This analytical approximation of p-value can be used when all
111       weight equal unity and sample sizes \a n and \a m are
112       large. The p-value is calcuated as \f$ P = \displaystyle - 2
113       \sum_{k=1}^{\infty} (-1)^ke^{-2k^2s^2}\f$, where s is the
114       scaled score:
115
116       \f$ s = \sqrt\frac{nm}{n+m} \f$ score().
117
118       \since New in yat 0.5
119
120       Following Hollander and Wolfe
121    */
122    double p_value(void) const;
123
124    /**
125       \brief p-value
126
127       Performs a permutation test using \a perm label randomizations
128       and calculate how often a score equal or larger than score() is
129       obtained.
130
131       \see shuffle
132    */
133    double p_value(size_t perm) const;
134
135    /**
136       \brief resets everything to zero
137    */
138    void reset(void);
139
140    /**
141       \brief Kolmogorov Smirnov statistic
142
143       \f$ sup_x | F_{\textrm{True}}(x) - F_{\textrm{False}}(x) | \f$ where
144       \f$ F(x) = \frac{\sum_{i:x_i\le x}w_i}{ \sum w_i} \f$
145    */
146    double score(void) const;
147
148    /**
149       \brief shuffle class labels
150
151       This is equivalent to reset and re-add values with shuffled
152       class labels.
153
154       \since New in yat 0.5
155     */
156    void shuffle(void);
157
158    /**
159       Same as score() but keeping the sign, in other words,
160       abs(signed_score())==score()
161
162       A positive score implies that values in class \c true \em on
163       \em average are smaller than values in class \c false.
164
165       \since New in yat 0.5
166     */
167    double signed_score(void) const;
168
169  private:
170    void scores(std::vector<double>&) const;
171    // add weights to sum_w1 and sum_w2 respectively depending on
172    // label in element.
173    template <typename ForwardIterator>
174    void add_sum_w(ForwardIterator first, ForwardIterator last);
175   
176    mutable bool cached_;
177    mutable double score_;
178    typedef std::multiset<Element> data_w;
179    data_w data_;
180    double sum_w1_;
181    double sum_w2_;
182
183    friend std::ostream& operator<<(std::ostream&, const KolmogorovSmirnov&);
184
185    // using compiler generated copy and assignment
186    //KolmogorovSmirnov(const KolmogorovSmirnov&);
187    //KolmogorovSmirnov& operator=(const KolmogorovSmirnov&);
188  };
189
190  /**
191     \brief output operator
192   */
193  std::ostream& operator<<(std::ostream&, const KolmogorovSmirnov&);
194
195
196  //  template implementations
197
198  template <typename ForwardIterator>
199  void KolmogorovSmirnov::add(ForwardIterator first, ForwardIterator last)
200  {
201    ForwardIterator iter(first);
202    typename data_w::const_iterator hint(data_.begin());
203    for ( ; iter!=last; ++iter) 
204      if (iter->weight) // ignore data points with zero weight
205        hint = data_.insert(hint, *iter);
206    add_sum_w(first, last);
207    cached_=false;
208  }
209
210
211  template <typename ForwardIterator>
212  void KolmogorovSmirnov::add_sum_w(ForwardIterator first, 
213                                    ForwardIterator last)
214  {
215    while (first!=last) {
216      if (first->label)
217        sum_w1_ += first->weight;
218      else
219        sum_w2_ += first->weight;
220      ++first;
221    }
222  }
223
224}}} // of namespace theplu yat statistics
225
226#endif
Note: See TracBrowser for help on using the repository browser.