source: trunk/yat/statistics/KolmogorovSmirnov.h @ 2515

Last change on this file since 2515 was 2515, checked in by Peter, 10 years ago

avoid inline

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.5 KB
Line 
1#ifndef _theplu_yat_statistics_kolmogorov_smirnov_
2#define _theplu_yat_statistics_kolmogorov_smirnov_
3
4// $Id: KolmogorovSmirnov.h 2515 2011-07-11 02:36:49Z peter $
5
6/*
7  Copyright (C) 2008 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2009, 2010 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include <boost/concept_check.hpp>
27
28#include <iosfwd>
29#include <set>
30#include <vector>
31
32namespace theplu {
33namespace yat {
34namespace statistics { 
35
36  /**
37     \brief Kolmogow Smirnov Test
38   */
39  class KolmogorovSmirnov
40  {
41  public:
42    /**
43       struct used to store data in KolmogorovSmirnov.
44       
45       This struct is public to make usage of the add range function
46       more convenient.
47
48       \since New in yat 0.5
49     */
50    struct Element
51    {
52      /**
53         \brief default constructor
54      */
55      Element(void);
56
57      /**
58         \brief Constructor
59       */
60      Element(double x, bool class_label, double w=1.0);
61
62      /**
63         \brief data value
64       */
65      double value;
66     
67      /**
68          bool telling which group the data point belongs to
69      */
70      bool label;
71
72      /**
73         weight for the data point
74       */
75      double weight;
76
77      /**
78         \return true if value is less than rhs.value
79       */
80      bool operator<(const Element& rhs) const;
81    };
82
83    /**
84       \brief Constructor
85     */ 
86    KolmogorovSmirnov(void);
87
88    /**
89       \brief add a value
90     */
91    void add(double value, bool class_label, double weight=1.0);
92
93    /**
94       \brief add a range
95       
96       value_type of \a ForwardIterator must be convertible to
97       KolmogorovSmirnov::Element
98       
99       Insertion takes typically N*log(N). However, if range is
100       sorted, insertion takes linear time. A common use case of this
101       function is when calculating several KS statistics on a data
102       set (values and weights) with different class labels.
103
104       \since New in yat 0.5
105    */
106    template <typename ForwardIterator>
107    void add(ForwardIterator first, ForwardIterator last); 
108
109    /**
110       \brief Large-Sample Approximation
111
112       This analytical approximation of p-value can be used when all
113       weight equal unity and sample sizes \a n and \a m are
114       large. The p-value is calcuated as \f$ P = \displaystyle - 2
115       \sum_{k=1}^{\infty} (-1)^ke^{-2k^2s^2}\f$, where s is the
116       scaled score:
117
118       \f$ s = \sqrt\frac{nm}{n+m} \f$ score().
119
120       \since New in yat 0.5
121
122       Following Hollander and Wolfe
123    */
124    double p_value(void) const;
125
126    /**
127       \brief p-value
128
129       Performs a permutation test using \a perm label randomizations
130       and calculate how often a score equal or larger than score() is
131       obtained.
132
133       \see shuffle
134    */
135    double p_value(size_t perm) const;
136
137    /**
138       \brief resets everything to zero
139    */
140    void reset(void);
141
142    /**
143       \brief Kolmogorov Smirnov statistic
144
145       \f$ sup_x | F_{\textrm{True}}(x) - F_{\textrm{False}}(x) | \f$ where
146       \f$ F(x) = \frac{\sum_{i:x_i\le x}w_i}{ \sum w_i} \f$
147    */
148    double score(void) const;
149
150    /**
151       \brief shuffle class labels
152
153       This is equivalent to reset and re-add values with shuffled
154       class labels.
155
156       \since New in yat 0.5
157     */
158    void shuffle(void);
159
160    /**
161       Same as score() but keeping the sign, in other words,
162       abs(signed_score())==score()
163
164       A positive score implies that values in class \c true \em on
165       \em average are smaller than values in class \c false.
166
167       \since New in yat 0.5
168     */
169    double signed_score(void) const;
170
171  private:
172    void scores(std::vector<double>&) const;
173    // add weights to sum_w1 and sum_w2 respectively depending on
174    // label in element.
175    template <typename ForwardIterator>
176    void add_sum_w(ForwardIterator first, ForwardIterator last);
177   
178    mutable bool cached_;
179    mutable double score_;
180    typedef std::multiset<Element> data_w;
181    data_w data_;
182    double sum_w1_;
183    double sum_w2_;
184
185    friend std::ostream& operator<<(std::ostream&, const KolmogorovSmirnov&);
186
187    // using compiler generated copy and assignment
188    //KolmogorovSmirnov(const KolmogorovSmirnov&);
189    //KolmogorovSmirnov& operator=(const KolmogorovSmirnov&);
190  };
191
192  /**
193     \brief output operator
194
195     \relates KolmogorovSmirnov
196   */
197  std::ostream& operator<<(std::ostream&, const KolmogorovSmirnov&);
198
199
200  //  template implementations
201
202  template <typename ForwardIterator>
203  void KolmogorovSmirnov::add(ForwardIterator first, ForwardIterator last)
204  {
205    BOOST_CONCEPT_ASSERT((boost::ForwardIterator<ForwardIterator>));
206    typedef typename std::iterator_traits<ForwardIterator>::reference ref;
207    BOOST_CONCEPT_ASSERT((boost::Convertible<ref, KolmogorovSmirnov::Element>));
208    ForwardIterator iter(first);
209    typename data_w::const_iterator hint(data_.begin());
210    for ( ; iter!=last; ++iter) 
211      if ((*iter).weight) // ignore data points with zero weight
212        hint = data_.insert(hint, *iter);
213    add_sum_w(first, last);
214    cached_=false;
215  }
216
217
218  template <typename ForwardIterator>
219  void KolmogorovSmirnov::add_sum_w(ForwardIterator first, 
220                                    ForwardIterator last)
221  {
222    while (first!=last) {
223      if ((*first).label)
224        sum_w1_ += (*first).weight;
225      else
226        sum_w2_ += (*first).weight;
227      ++first;
228    }
229  }
230
231}}} // of namespace theplu yat statistics
232
233#endif
Note: See TracBrowser for help on using the repository browser.