source: trunk/yat/statistics/KolmogorovSmirnov.h @ 2202

Last change on this file since 2202 was 2202, checked in by Peter, 12 years ago

merging release 0.6 into trunk

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.6 KB
Line 
1#ifndef _theplu_yat_statistics_kolmogorov_smirnov_
2#define _theplu_yat_statistics_kolmogorov_smirnov_
3
4// $Id: KolmogorovSmirnov.h 2202 2010-02-21 18:39:13Z peter $
5
6/*
7  Copyright (C) 2008 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2009, 2010 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include <boost/concept_check.hpp>
27
28#include <iosfwd>
29#include <set>
30#include <vector>
31
32namespace theplu {
33namespace yat {
34namespace statistics { 
35
36  /**
37     \brief Kolmogow Smirnov Test
38   */
39  class KolmogorovSmirnov
40  {
41  public:
42    /**
43       struct used to store data in KolmogorovSmirnov.
44       
45       This struct is public to make usage of the add range function
46       more convenient.
47
48       \since New in yat 0.5
49     */
50    struct Element
51    {
52      /**
53         \brief default constructor
54      */
55      inline Element(void) {};
56
57      /**
58         \brief Constructor
59       */
60      inline Element(double x, bool class_label, double w=1.0)
61        : value(x), label(class_label), weight(w) {};
62
63      /**
64         \brief data value
65       */
66      double value;
67     
68      /**
69          bool telling which group the data point belong to
70      */
71      bool label;
72
73      /**
74         weight for the data point
75       */
76      double weight;
77
78      /**
79         \return true if value is less than rhs.value
80       */
81      bool operator<(const Element& rhs) const;
82    };
83
84    /**
85       \brief Constructor
86     */ 
87    KolmogorovSmirnov(void);
88
89    /**
90       \brief add a value
91     */
92    void add(double value, bool class_label, double weight=1.0);
93
94    /**
95       \brief add a range
96       
97       value_type of \a ForwardIterator must be convertible to
98       KolmogorovSmirnov::Element
99       
100       Insertion takes typically N*log(N). However, if range is
101       sorted, insertion takes linear time. A common use case of this
102       function is when calculating several KS statistics on a data
103       set (values and weights) with different class labels.
104
105       \since New in yat 0.5
106    */
107    template <typename ForwardIterator>
108    void add(ForwardIterator first, ForwardIterator last); 
109
110    /**
111       \brief Large-Sample Approximation
112
113       This analytical approximation of p-value can be used when all
114       weight equal unity and sample sizes \a n and \a m are
115       large. The p-value is calcuated as \f$ P = \displaystyle - 2
116       \sum_{k=1}^{\infty} (-1)^ke^{-2k^2s^2}\f$, where s is the
117       scaled score:
118
119       \f$ s = \sqrt\frac{nm}{n+m} \f$ score().
120
121       \since New in yat 0.5
122
123       Following Hollander and Wolfe
124    */
125    double p_value(void) const;
126
127    /**
128       \brief p-value
129
130       Performs a permutation test using \a perm label randomizations
131       and calculate how often a score equal or larger than score() is
132       obtained.
133
134       \see shuffle
135    */
136    double p_value(size_t perm) const;
137
138    /**
139       \brief resets everything to zero
140    */
141    void reset(void);
142
143    /**
144       \brief Kolmogorov Smirnov statistic
145
146       \f$ sup_x | F_{\textrm{True}}(x) - F_{\textrm{False}}(x) | \f$ where
147       \f$ F(x) = \frac{\sum_{i:x_i\le x}w_i}{ \sum w_i} \f$
148    */
149    double score(void) const;
150
151    /**
152       \brief shuffle class labels
153
154       This is equivalent to reset and re-add values with shuffled
155       class labels.
156
157       \since New in yat 0.5
158     */
159    void shuffle(void);
160
161    /**
162       Same as score() but keeping the sign, in other words,
163       abs(signed_score())==score()
164
165       A positive score implies that values in class \c true \em on
166       \em average are smaller than values in class \c false.
167
168       \since New in yat 0.5
169     */
170    double signed_score(void) const;
171
172  private:
173    void scores(std::vector<double>&) const;
174    // add weights to sum_w1 and sum_w2 respectively depending on
175    // label in element.
176    template <typename ForwardIterator>
177    void add_sum_w(ForwardIterator first, ForwardIterator last);
178   
179    mutable bool cached_;
180    mutable double score_;
181    typedef std::multiset<Element> data_w;
182    data_w data_;
183    double sum_w1_;
184    double sum_w2_;
185
186    friend std::ostream& operator<<(std::ostream&, const KolmogorovSmirnov&);
187
188    // using compiler generated copy and assignment
189    //KolmogorovSmirnov(const KolmogorovSmirnov&);
190    //KolmogorovSmirnov& operator=(const KolmogorovSmirnov&);
191  };
192
193  /**
194     \brief output operator
195
196     \relates KolmogorovSmirnov
197   */
198  std::ostream& operator<<(std::ostream&, const KolmogorovSmirnov&);
199
200
201  //  template implementations
202
203  template <typename ForwardIterator>
204  void KolmogorovSmirnov::add(ForwardIterator first, ForwardIterator last)
205  {
206    BOOST_CONCEPT_ASSERT((boost::ForwardIterator<ForwardIterator>));
207    typedef typename std::iterator_traits<ForwardIterator>::reference ref;
208    BOOST_CONCEPT_ASSERT((boost::Convertible<ref, KolmogorovSmirnov::Element>));
209    ForwardIterator iter(first);
210    typename data_w::const_iterator hint(data_.begin());
211    for ( ; iter!=last; ++iter) 
212      if ((*iter).weight) // ignore data points with zero weight
213        hint = data_.insert(hint, *iter);
214    add_sum_w(first, last);
215    cached_=false;
216  }
217
218
219  template <typename ForwardIterator>
220  void KolmogorovSmirnov::add_sum_w(ForwardIterator first, 
221                                    ForwardIterator last)
222  {
223    while (first!=last) {
224      if ((*first).label)
225        sum_w1_ += (*first).weight;
226      else
227        sum_w2_ += (*first).weight;
228      ++first;
229    }
230  }
231
232}}} // of namespace theplu yat statistics
233
234#endif
Note: See TracBrowser for help on using the repository browser.