source: trunk/yat/statistics/KolmogorovSmirnov.cc @ 1004

Last change on this file since 1004 was 1004, checked in by Peter, 14 years ago

adding a wrapper around std::random_shuffle that uses the RNG class. Changed all calls to random_shuffle to use the added function. Acctually all calls already used the RNG class so essentially this change is only cosmetic, but by providing a function I think it is easier to avoid using multiple random generators.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 3.2 KB
Line 
1// $Id: KolmogorovSmirnov.cc 1004 2008-01-23 18:24:53Z peter $
2
3/*
4  Copyright (C) 2008 Peter Johansson
5
6  This file is part of the yat library, http://trac.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 2 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with this program; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21  02111-1307, USA.
22*/
23
24#include "KolmogorovSmirnov.h"
25#include "yat/random/random.h"
26#include "yat/utility/stl_utility.h"
27
28#include <algorithm>
29#include <cassert>
30#include <cmath>
31#include <deque>
32#include <functional>
33
34
35namespace theplu {
36namespace yat {
37namespace statistics {
38
39
40  KolmogorovSmirnov::KolmogorovSmirnov(void)
41    : cached_(true), score_(0.0), sum_w1_(0.0), sum_w2_(0.0)
42  {
43  }
44
45
46  void KolmogorovSmirnov::add(double x, bool target, double w)
47  {
48    if (w==0) // ignoring zero weight data
49      return;
50
51    data_.insert(std::make_pair(std::make_pair(x,w),target));
52    if (target){
53      ++n1_;
54      sum_w1_+=w;
55    }
56    else {
57      ++n2_;
58      sum_w2_+=w;
59    }
60    cached_=false;
61  }
62
63
64  double KolmogorovSmirnov::p_value(size_t perm) const
65  {
66    size_t count=0;
67    std::deque<bool> targets;
68    typedef data_w::const_iterator const_iter;
69    for (const_iter i(data_.begin()); i!=data_.end(); ++i)
70      targets.push_back(i->second);
71
72    for (size_t i=0; i<perm; ++i){
73      yat::random::DiscreteUniform d;
74      std::random_shuffle(targets.begin(), targets.end(), d); 
75      KolmogorovSmirnov ks;
76      std::deque<bool>::const_iterator target_i(targets.begin());
77      const_iter end(data_.end());
78      for (const_iter i(data_.begin()); i!=end; ++i, ++target_i) 
79        ks.add(i->first.first, *target_i, i->first.second);
80     
81      if (ks.score()>=score())
82        ++count;
83    }
84    return static_cast<double>(count)/perm;
85  }
86
87
88  double KolmogorovSmirnov::score(void) const
89  {
90    if (cached_)
91      return score_;
92    if (!sum_w1_ || !sum_w2_)
93      return 0.0;
94    score_=0;
95    std::vector<double> v;
96    scores(v);
97    std::less<double> l;
98    utility::abs<double> a;
99    using utility::make_compose_f_gx_hy;
100    score_ = std::abs(*std::max_element(v.begin(), v.end(), 
101                                        make_compose_f_gx_hy(l, a, a)));
102    return score_;
103  }
104
105
106  void KolmogorovSmirnov::scores(std::vector<double>& res) const
107  {
108    res.clear();
109    res.reserve(data_.size());
110    data_w::const_iterator iter(data_.begin());
111    double f1=0;
112    double f2=0;
113    // Peter, we should take care of ties!
114    while(iter!=data_.end()){
115      if (iter->second)
116        f1 += iter->first.second;
117      else
118        f2 += iter->first.second;
119      res.push_back(f1/sum_w1_-f2/sum_w2_);
120    }
121  }
122
123
124  void KolmogorovSmirnov::reset(void)
125  {
126    cached_=false;
127    sum_w1_=0;
128    sum_w2_=0;
129  }
130
131
132  std::ostream& operator<<(std::ostream& os, const KolmogorovSmirnov& ks)
133  {
134    std::vector<double> s;
135    ks.scores(s);
136    for (size_t i=0; i<s.size(); ++i)
137      os << i << "\t" << s[i] << "\n";
138    return os;
139  }
140
141
142}}} // of namespace theplu yat statistics
Note: See TracBrowser for help on using the repository browser.