source: trunk/yat/statistics/KolmogorovSmirnov.cc @ 1687

Last change on this file since 1687 was 1687, checked in by Peter, 12 years ago

fixes #455

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 3.8 KB
Line 
1// $Id: KolmogorovSmirnov.cc 1687 2008-12-30 22:00:24Z peter $
2
3/*
4  Copyright (C) 2008 Peter Johansson
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with yat. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include "KolmogorovSmirnov.h"
23#include "yat/random/random.h"
24#include "yat/utility/stl_utility.h"
25
26#include <algorithm>
27#include <cassert>
28#include <cmath>
29#include <deque>
30#include <functional>
31#include <limits>
32
33namespace theplu {
34namespace yat {
35namespace statistics {
36
37
38  KolmogorovSmirnov::KolmogorovSmirnov(void)
39    : cached_(true), score_(0.0), sum_w1_(0.0), sum_w2_(0.0)
40  {
41  }
42
43
44  void KolmogorovSmirnov::add(double x, bool target, double w)
45  {
46    if (w==0) // ignoring zero weight data
47      return;
48
49    data_.insert(std::make_pair(std::make_pair(x,w),target));
50    if (target){
51      sum_w1_+=w;
52    }
53    else {
54      sum_w2_+=w;
55    }
56    cached_=false;
57  }
58
59
60  double KolmogorovSmirnov::p_value(void) const
61  {
62    double res=0;
63    double res2=0;
64    double s2 = score() * score() * sum_w1_*sum_w2_/(sum_w1_+sum_w2_);
65    int sign = 1;
66    for (size_t k = 1; k<100; ++k) {
67      res += sign * 2 * exp(-2.0 * k * k * s2);
68      sign *= -1;
69      // ignore remaining terms as they are small
70      if (res==res2)
71        return res;
72      res2 = res;
73    }
74
75    return res;
76  }
77
78
79  double KolmogorovSmirnov::p_value(size_t perm) const
80  {
81    size_t count=0;
82    std::deque<bool> targets;
83    typedef data_w::const_iterator const_iter;
84    for (const_iter i(data_.begin()); i!=data_.end(); ++i)
85      targets.push_back(i->second);
86
87    double score_threshold = score()-10*std::numeric_limits<double>().epsilon();
88
89    for (size_t i=0; i<perm; ++i){
90      random::random_shuffle(targets.begin(), targets.end()); 
91      KolmogorovSmirnov ks;
92      std::deque<bool>::const_iterator target_i(targets.begin());
93      const_iter end(data_.end());
94      for (const_iter iter(data_.begin()); iter!=end; ++iter, ++target_i) 
95        ks.add(iter->first.first, *target_i, iter->first.second);
96     
97      if (ks.score()>=score_threshold)
98        ++count;
99    }
100    return static_cast<double>(count)/perm;
101  }
102
103
104  void KolmogorovSmirnov::reset(void)
105  {
106    cached_=false;
107    data_.clear();
108    sum_w1_=0;
109    sum_w2_=0;
110  }
111
112
113  double KolmogorovSmirnov::score(void) const
114  {
115    return std::abs(signed_score());
116  }
117
118
119  double KolmogorovSmirnov::signed_score(void) const
120  {
121    if (cached_)
122      return score_;
123    if (!sum_w1_ || !sum_w2_)
124      return 0.0;
125    score_=0;
126    std::vector<double> v;
127    scores(v);
128    std::less<double> l;
129    utility::abs<double> a;
130    using utility::make_compose_f_gx_hy;
131    score_ = *std::max_element(v.begin(), v.end(), 
132                               make_compose_f_gx_hy(l, a, a));
133    cached_=true;
134    return score_;
135  }
136
137
138  void KolmogorovSmirnov::scores(std::vector<double>& res) const
139  {
140    res.clear();
141    res.reserve(data_.size());
142    data_w::const_iterator iter(data_.begin());
143    double f1=0;
144    double f2=0;
145    while(iter!=data_.end()){
146      size_t count=0;
147      double value = iter->first.first;
148      while (iter!=data_.end() && iter->first.first==value) {
149        if (iter->second) {
150          f1 += iter->first.second;
151        }
152        else {
153          f2 += iter->first.second;
154        }
155        ++count;
156        ++iter;
157      }
158      res.resize(res.size()+count, f1/sum_w1_-f2/sum_w2_);
159    }
160  }
161
162
163  std::ostream& operator<<(std::ostream& os, const KolmogorovSmirnov& ks)
164  {
165    std::vector<double> s;
166    ks.scores(s);
167    for (size_t i=0; i<s.size(); ++i)
168      os << i << "\t" << s[i] << "\n";
169    return os;
170  }
171
172
173}}} // of namespace theplu yat statistics
Note: See TracBrowser for help on using the repository browser.