source: trunk/yat/statistics/KolmogorovSmirnov.h @ 2064

Last change on this file since 2064 was 2064, checked in by Peter, 12 years ago

passing by reference

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.3 KB
Line 
1#ifndef _theplu_yat_statistics_kolmogorov_smirnov_
2#define _theplu_yat_statistics_kolmogorov_smirnov_
3
4// $Id: KolmogorovSmirnov.h 2064 2009-09-16 17:19:09Z peter $
5
6/*
7  Copyright (C) 2008 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2009 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include <iosfwd>
27#include <set>
28#include <vector>
29
30namespace theplu {
31namespace yat {
32namespace statistics { 
33
34  /**
35     \brief Kolmogow Smirnov Test
36   */
37  class KolmogorovSmirnov
38  {
39  public:
40    /**
41       struct used to store data in KolmogorovSmirnov.
42       
43       This struct is public to make usage of the add range function
44       more convenient.
45
46       \since New in yat 0.5
47     */
48    struct Element
49    {
50      /**
51         \brief default constructor
52      */
53      inline Element(void) {};
54
55      /**
56         \brief Constructor
57       */
58      inline Element(double x, bool class_label, double w=1.0)
59        : value(x), label(class_label), weight(w) {};
60
61      /**
62         \brief data value
63       */
64      double value;
65     
66      /**
67          bool telling which group the data point belong to
68      */
69      bool label;
70
71      /**
72         weight for the data point
73       */
74      double weight;
75
76      /**
77         \return true if value is less than rhs.value
78       */
79      bool operator<(const Element& rhs) const;
80    };
81
82    /**
83       \brief Constructor
84     */ 
85    KolmogorovSmirnov(void);
86
87    /**
88       \brief add a value
89     */
90    void add(double value, bool class_label, double weight=1.0);
91
92    /**
93       \brief add a range
94       
95       value_type of \a ForwardIterator must be convertible to
96       KolmogorovSmirnov::Element
97       
98       Insertion takes typically N*log(N). However, if range is
99       sorted, insertion takes linear time. A common use case of this
100       function is when calculating several KS statistics on a data
101       set (values and weights) with different class labels.
102
103       \since New in yat 0.5
104    */
105    template <typename ForwardIterator>
106    void add(ForwardIterator first, ForwardIterator last); 
107
108    /**
109       \brief Large-Sample Approximation
110
111       This analytical approximation of p-value can be used when all
112       weight equal unity and sample sizes \a n and \a m are
113       large. The p-value is calcuated as \f$ P = \displaystyle - 2
114       \sum_{k=1}^{\infty} (-1)^ke^{-2k^2s^2}\f$, where s is the
115       scaled score:
116
117       \f$ s = \sqrt\frac{nm}{n+m} \f$ score().
118
119       \since New in yat 0.5
120
121       Following Hollander and Wolfe
122    */
123    double p_value(void) const;
124
125    /**
126       \brief p-value
127
128       Performs a permutation test using \a perm label randomizations
129       and calculate how often a score equal or larger than score() is
130       obtained.
131
132       \see shuffle
133    */
134    double p_value(size_t perm) const;
135
136    /**
137       \brief resets everything to zero
138    */
139    void reset(void);
140
141    /**
142       \brief Kolmogorov Smirnov statistic
143
144       \f$ sup_x | F_{\textrm{True}}(x) - F_{\textrm{False}}(x) | \f$ where
145       \f$ F(x) = \frac{\sum_{i:x_i\le x}w_i}{ \sum w_i} \f$
146    */
147    double score(void) const;
148
149    /**
150       \brief shuffle class labels
151
152       This is equivalent to reset and re-add values with shuffled
153       class labels.
154
155       \since New in yat 0.5
156     */
157    void shuffle(void);
158
159    /**
160       Same as score() but keeping the sign, in other words,
161       abs(signed_score())==score()
162
163       A positive score implies that values in class \c true \em on
164       \em average are smaller than values in class \c false.
165
166       \since New in yat 0.5
167     */
168    double signed_score(void) const;
169
170  private:
171    void scores(std::vector<double>&) const;
172    // add weights to sum_w1 and sum_w2 respectively depending on
173    // label in element.
174    template <typename ForwardIterator>
175    void add_sum_w(ForwardIterator first, ForwardIterator last);
176   
177    mutable bool cached_;
178    mutable double score_;
179    typedef std::multiset<Element> data_w;
180    data_w data_;
181    double sum_w1_;
182    double sum_w2_;
183
184    friend std::ostream& operator<<(std::ostream&, const KolmogorovSmirnov&);
185
186    // using compiler generated copy and assignment
187    //KolmogorovSmirnov(const KolmogorovSmirnov&);
188    //KolmogorovSmirnov& operator=(const KolmogorovSmirnov&);
189  };
190
191  /**
192     \brief output operator
193
194     \relates KolmogorovSmirnov
195   */
196  std::ostream& operator<<(std::ostream&, const KolmogorovSmirnov&);
197
198
199  //  template implementations
200
201  template <typename ForwardIterator>
202  void KolmogorovSmirnov::add(ForwardIterator first, ForwardIterator last)
203  {
204    ForwardIterator iter(first);
205    typename data_w::const_iterator hint(data_.begin());
206    for ( ; iter!=last; ++iter) 
207      if (iter->weight) // ignore data points with zero weight
208        hint = data_.insert(hint, *iter);
209    add_sum_w(first, last);
210    cached_=false;
211  }
212
213
214  template <typename ForwardIterator>
215  void KolmogorovSmirnov::add_sum_w(ForwardIterator first, 
216                                    ForwardIterator last)
217  {
218    while (first!=last) {
219      if (first->label)
220        sum_w1_ += first->weight;
221      else
222        sum_w2_ += first->weight;
223      ++first;
224    }
225  }
226
227}}} // of namespace theplu yat statistics
228
229#endif
Note: See TracBrowser for help on using the repository browser.