source: trunk/yat/statistics/KolmogorovSmirnov.cc @ 1600

Last change on this file since 1600 was 1600, checked in by Peter, 13 years ago
  • Fixed bug that data disappeared because data internally was stored in a set (is now a multiset).
  • Changed ordering of data, which implies that sign of signed_score is flipped.
  • Added some tests for KolmogorovSmirnov?
  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 3.6 KB
Line 
1// $Id: KolmogorovSmirnov.cc 1600 2008-10-27 19:10:49Z peter $
2
3/*
4  Copyright (C) 2008 Peter Johansson
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with yat. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include "KolmogorovSmirnov.h"
23#include "yat/random/random.h"
24#include "yat/utility/stl_utility.h"
25
26#include <algorithm>
27#include <cassert>
28#include <cmath>
29#include <deque>
30#include <functional>
31
32namespace theplu {
33namespace yat {
34namespace statistics {
35
36
37  KolmogorovSmirnov::KolmogorovSmirnov(void)
38    : cached_(true), score_(0.0), sum_w1_(0.0), sum_w2_(0.0)
39  {
40  }
41
42
43  void KolmogorovSmirnov::add(double x, bool target, double w)
44  {
45    if (w==0) // ignoring zero weight data
46      return;
47
48    data_.insert(std::make_pair(std::make_pair(x,w),target));
49    if (target){
50      sum_w1_+=w;
51    }
52    else {
53      sum_w2_+=w;
54    }
55    cached_=false;
56  }
57
58
59  double KolmogorovSmirnov::p_value(void) const
60  {
61    double res=0;
62    double res2=0;
63    double s2 = score() * score() * sum_w1_*sum_w2_/(sum_w1_+sum_w2_);
64    int sign = 1;
65    for (size_t k = 1; k<100; ++k) {
66      res += sign * 2 * exp(-2.0 * k * k * s2);
67      sign *= -1;
68      // ignore remaining terms as they are small
69      if (res==res2)
70        return res;
71      res2 = res;
72    }
73
74    return res;
75  }
76
77
78  double KolmogorovSmirnov::p_value(size_t perm) const
79  {
80    size_t count=0;
81    std::deque<bool> targets;
82    typedef data_w::const_iterator const_iter;
83    for (const_iter i(data_.begin()); i!=data_.end(); ++i)
84      targets.push_back(i->second);
85
86    for (size_t i=0; i<perm; ++i){
87      random::random_shuffle(targets.begin(), targets.end()); 
88      KolmogorovSmirnov ks;
89      std::deque<bool>::const_iterator target_i(targets.begin());
90      const_iter end(data_.end());
91      for (const_iter i(data_.begin()); i!=end; ++i, ++target_i) 
92        ks.add(i->first.first, *target_i, i->first.second);
93     
94      if (ks.score()>=score())
95        ++count;
96    }
97    return static_cast<double>(count)/perm;
98  }
99
100
101  double KolmogorovSmirnov::score(void) const
102  {
103    return std::abs(signed_score());
104  }
105
106
107  double KolmogorovSmirnov::signed_score(void) const
108  {
109    if (cached_)
110      return score_;
111    if (!sum_w1_ || !sum_w2_)
112      return 0.0;
113    score_=0;
114    std::vector<double> v;
115    scores(v);
116    std::less<double> l;
117    utility::abs<double> a;
118    using utility::make_compose_f_gx_hy;
119    score_ = *std::max_element(v.begin(), v.end(), 
120                               make_compose_f_gx_hy(l, a, a));
121    cached_=true;
122    return score_;
123  }
124
125
126  void KolmogorovSmirnov::scores(std::vector<double>& res) const
127  {
128    res.clear();
129    res.reserve(data_.size());
130    data_w::const_iterator iter(data_.begin());
131    double f1=0;
132    double f2=0;
133    // Peter, we should take care of ties!
134    while(iter!=data_.end()){
135      if (iter->second)
136        f1 += iter->first.second;
137      else
138        f2 += iter->first.second;
139      res.push_back(f1/sum_w1_-f2/sum_w2_);
140      ++iter;
141    }
142  }
143
144
145  void KolmogorovSmirnov::reset(void)
146  {
147    cached_=false;
148    sum_w1_=0;
149    sum_w2_=0;
150  }
151
152
153  std::ostream& operator<<(std::ostream& os, const KolmogorovSmirnov& ks)
154  {
155    std::vector<double> s;
156    ks.scores(s);
157    for (size_t i=0; i<s.size(); ++i)
158      os << i << "\t" << s[i] << "\n";
159    return os;
160  }
161
162
163}}} // of namespace theplu yat statistics
Note: See TracBrowser for help on using the repository browser.