source: trunk/yat/statistics/KolmogorovSmirnov.h @ 1797

Last change on this file since 1797 was 1797, checked in by Peter, 12 years ago

updating copyright statements

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.2 KB
Line 
1#ifndef _theplu_yat_statistics_kolmogorov_smirnov_
2#define _theplu_yat_statistics_kolmogorov_smirnov_
3
4// $Id: KolmogorovSmirnov.h 1797 2009-02-12 18:07:10Z peter $
5
6/*
7  Copyright (C) 2008 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2009 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include <set>
27#include <vector>
28
29namespace theplu {
30namespace yat {
31namespace statistics { 
32
33  /**
34     \brief Kolmogow Smirnov Test
35   */
36  class KolmogorovSmirnov
37  {
38  public:
39    /**
40       struct used to store data in KolmogorovSmirnov.
41       
42       This struct is public to make usage of the add range function
43       more convenient.
44
45       \since New in yat 0.5
46     */
47    struct Element
48    {
49      /**
50         \brief default constructor
51      */
52      inline Element(void) {};
53
54      /**
55         \brief Constructor
56       */
57      inline Element(double x, bool class_label, double w=1.0)
58        : value(x), label(class_label), weight(w) {};
59
60      /**
61         \brief data value
62       */
63      double value;
64     
65      /**
66          bool telling which group the data point belong to
67      */
68      bool label;
69
70      /**
71         weight for the data point
72       */
73      double weight;
74
75      /**
76         \return true if value is less than rhs.value
77       */
78      bool operator<(const Element rhs) const;
79    };
80
81    /**
82       \brief Constructor
83     */ 
84    KolmogorovSmirnov(void);
85
86    /**
87       \brief add a value
88     */
89    void add(double value, bool class_label, double weight=1.0);
90
91    /**
92       \brief add a range
93       
94       value_type of \a ForwardIterator must be convertible to
95       KolmogorovSmirnov::Element
96       
97       Insertion takes typically N*log(N). However, if range is
98       sorted, insertion takes linear time. A common use case of this
99       function is when calculating several KS statistics on a data
100       set (values and weights) with different class labels.
101
102       \since New in yat 0.5
103    */
104    template <typename ForwardIterator>
105    void add(ForwardIterator first, ForwardIterator last); 
106
107    /**
108       \brief Large-Sample Approximation
109
110       This analytical approximation of p-value can be used when all
111       weight equal unity and sample sizes \a n and \a m are
112       large. The p-value is calcuated as \f$ P = \displaystyle - 2
113       \sum_{k=1}^{\infty} (-1)^ke^{-2k^2s^2}\f$, where s is the
114       scaled score:
115
116       \f$ s = \sqrt\frac{nm}{n+m} \f$ score().
117
118       \since New in yat 0.5
119
120       Following Hollander and Wolfe
121    */
122    double p_value(void) const;
123
124    /**
125       \brief p-value
126
127       Performs a permutation test using \a perm label randomizations
128       and calculate how often a score equal or larger than score() is
129       obtained.
130
131       \see shuffle
132    */
133    double p_value(size_t perm) const;
134
135    /**
136       \brief resets everything to zero
137    */
138    void reset(void);
139
140    /**
141       \brief Kolmogorov Smirnov statistic
142
143       \f$ sup_x | F_{\textrm{True}}(x) - F_{\textrm{False}}(x) | \f$ where
144       \f$ F(x) = \frac{\sum_{i:x_i\le x}w_i}{ \sum w_i} \f$
145    */
146    double score(void) const;
147
148    /**
149       \brief shuffle class labels
150
151       This is equivalent to reset and re-add values with shuffled
152       class labels.
153
154       \since New in yat 0.5
155     */
156    void shuffle(void);
157
158    /**
159       Same as score() but keeping the sign, in other words,
160       abs(signed_score())==score()
161
162       \since New in yat 0.5
163     */
164    double signed_score(void) const;
165
166  private:
167    void scores(std::vector<double>&) const;
168    // add weights to sum_w1 and sum_w2 respectively depending on
169    // label in element.
170    template <typename ForwardIterator>
171    void add_sum_w(ForwardIterator first, ForwardIterator last);
172   
173    mutable bool cached_;
174    mutable double score_;
175    typedef std::multiset<Element> data_w;
176    data_w data_;
177    double sum_w1_;
178    double sum_w2_;
179
180    friend std::ostream& operator<<(std::ostream&, const KolmogorovSmirnov&);
181
182    // using compiler generated copy and assignment
183    //KolmogorovSmirnov(const KolmogorovSmirnov&);
184    //KolmogorovSmirnov& operator=(const KolmogorovSmirnov&);
185  };
186
187  /**
188     \brief output operator
189   */
190  std::ostream& operator<<(std::ostream&, const KolmogorovSmirnov&);
191
192
193  //  template implementations
194
195  template <typename ForwardIterator>
196  void KolmogorovSmirnov::add(ForwardIterator first, ForwardIterator last)
197  {
198    ForwardIterator iter(first);
199    typename data_w::const_iterator hint(data_.begin());
200    for ( ; iter!=last; ++iter) 
201      if (iter->weight) // ignore data points with zero weight
202        hint = data_.insert(hint, *iter);
203    add_sum_w(first, last);
204    cached_=false;
205  }
206
207
208  template <typename ForwardIterator>
209  void KolmogorovSmirnov::add_sum_w(ForwardIterator first, 
210                                    ForwardIterator last)
211  {
212    while (first!=last) {
213      if (first->label)
214        sum_w1_ += first->weight;
215      else
216        sum_w2_ += first->weight;
217      ++first;
218    }
219  }
220
221}}} // of namespace theplu yat statistics
222
223#endif
Note: See TracBrowser for help on using the repository browser.