source: trunk/test/kolmogorov_smirnov_test.cc @ 1701

Last change on this file since 1701 was 1701, checked in by Peter, 12 years ago

class KolmogorovSmirnov?:
add (sorted) range (fixes #462)
allow copy and assignment
add a shuffle function
p_value calculation now scales linearly with number of data points and
not N*logN as before

configure.ac: added a test to see if std::multiset::iterator is mutable

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.6 KB
Line 
1// $Id: kolmogorov_smirnov_test.cc 1701 2009-01-07 22:53:05Z peter $
2
3/*
4  Copyright (C) 2008, 2009 Peter Johansson
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with yat. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include "Suite.h"
23
24#include "yat/random/random.h"
25#include "yat/statistics/Averager.h"
26#include "yat/statistics/KolmogorovSmirnov.h"
27
28#include <cmath>
29#include <deque>
30#include <iostream>
31#include <vector>
32
33using namespace theplu::yat;
34
35void test_one_sample(test::Suite&);
36void test_two_sample(test::Suite&);
37void test_p_value(test::Suite&);
38void test_shuffle(test::Suite&);
39void test_range(test::Suite&);
40void test_reset(test::Suite&);
41void test_ties(test::Suite&);
42
43int main(int argc, char* argv[])
44{ 
45  test::Suite suite(argc, argv);
46
47  test_one_sample(suite);
48  test_two_sample(suite);
49  test_p_value(suite);
50  test_shuffle(suite);
51  test_range(suite);
52  test_reset(suite);
53  test_ties(suite);
54
55  return suite.return_value();
56}
57
58void test_one_sample(test::Suite& suite)
59{
60  std::vector<double> correct(11);
61  for (size_t i=0; i<correct.size(); ++i) {
62    double s1 = 1.0 - i/10.0;
63    double s2 = 0.0-i/10.0;
64    if (std::abs(s1)>std::abs(s2))
65      correct[i] = s1;
66    else
67      correct[i] = s2;
68  }
69
70  for (size_t i=0; i<11; ++i) {
71    statistics::KolmogorovSmirnov ks;
72    for (size_t j=0; j<11; ++j) {
73      ks.add(j, i==j);
74    }     
75    double score = ks.signed_score();
76    if (!suite.add(suite.equal(score, correct[i]))) {
77      suite.err() << "signed_score(void) failed\n";
78    }
79  }
80
81  statistics::KolmogorovSmirnov ks;
82  for (size_t i=0; i<11; ++i) {
83    ks.add(i, i==0);
84  }     
85  size_t n=110000;
86  double p = ks.p_value(n);
87  double p_correct = 2.0/11.0;
88  double margin = 10*std::sqrt(p_correct*(1-p_correct)/n);
89  if (p>p_correct+margin || p<p_correct-margin) {
90    suite.err() << "Error: p-value: " << p << "\n"
91                << "expected approximately: " << p_correct << "\n"
92                << "and at most " << margin << "deviation\n";
93    suite.add(false);
94  } 
95}
96
97
98void test_two_sample(test::Suite& suite)
99{
100  suite.err() << "testing two sample\n";
101  statistics::KolmogorovSmirnov ks;
102  for (size_t i=0; i<5; ++i)
103    ks.add(i+0.5, i<2);
104  suite.add(suite.equal(ks.score(), 1.0));
105  size_t n=100000;
106  double p = ks.p_value(n);
107  double p_correct = 0.2;
108  double margin=10*std::sqrt(p_correct*(1-p_correct)/n);
109  if (std::abs(p-p_correct)>margin) {
110    suite.add(false);
111    suite.err() << "Error: p = " << p << "\n"
112                << "correct p would be: " << p_correct << "\n"
113                << "expected a difference less than " << margin << "margin\n";
114    suite.err() << p << std::endl;
115  }
116}
117
118
119void test_p_value(test::Suite& suite)
120{
121  statistics::KolmogorovSmirnov ks;
122  for (size_t i=0; i<100; ++i) {
123    ks.add(i, true);
124    ks.add(i+14.5, false);
125  } 
126  suite.add(suite.equal(ks.score(), 0.15, 10));
127
128  statistics::Averager a;
129  for (size_t n=0; n<100; ++n) {
130    a.add(ks.p_value(100));
131  }
132  double margin = 5 * a.standard_error(); 
133  double p_approx = ks.p_value();
134  if (std::abs(a.mean()-p_approx)>margin) {
135    suite.add(false);
136    suite.err() << "Error: unexpected large deviation between p_values\n"
137                << "permutation p-value: " << a.mean() << "\n"
138                << "analytical approximation: " << p_approx << "\n"
139                << "expected deviation to be smaller than " << margin << "\n";
140  }
141 
142}
143
144
145void test_range(test::Suite& suite)
146{
147  suite.err() << "testing range" << std::endl;
148  std::deque<bool> labels;
149  for (size_t i=0; i<10; ++i) {
150    labels.push_back(i<5);
151  }
152  std::vector<statistics::KolmogorovSmirnov::Element> data;
153  statistics::KolmogorovSmirnov::Element elem;
154  elem.weight = 1.0;
155  for (size_t i=0; i<10; ++i) {
156    elem.value = i;
157    elem.label = labels[i];
158    data.push_back(elem);
159  }
160  statistics::KolmogorovSmirnov ks;
161  ks.add(data.begin(), data.end());
162  suite.add(suite.equal(ks.score(), 1.0));
163 
164  // testing that adding a range gives same result as adding elements
165  // sequentially
166  statistics::KolmogorovSmirnov ks2;
167  for (size_t i=0; i<data.size(); ++i)
168    ks2.add(data[i].value, data[i].label, data[i].weight);
169  suite.add(suite.equal(ks2.signed_score(), ks.signed_score()));
170
171  theplu::yat::random::random_shuffle(labels.begin(), labels.end());
172  for (size_t i=0; i<data.size(); ++i) {
173    data[i].label = labels[i];
174  }
175  ks.reset();
176  ks.add(data.begin(), data.end());
177  ks2.reset();
178  for (size_t i=0; i<data.size(); ++i)
179    ks2.add(data[i].value, data[i].label, data[i].weight);
180  suite.add(suite.equal(ks2.signed_score(), ks.signed_score()));
181}
182
183
184void test_reset(test::Suite& suite)
185{
186  suite.err() << "testing reset\n";
187  statistics::KolmogorovSmirnov ks;
188  ks.add(1.0, true);
189  ks.add(2.0, false);
190  ks.add(3.0, true);
191  double score = ks.score();
192  double p = ks.p_value();
193  ks.reset();
194  ks.add(1.0, true);
195  ks.add(2.0, false);
196  ks.add(3.0, true);
197  suite.add(suite.equal(ks.score(), score));
198  suite.add(suite.equal(ks.p_value(), p));
199}
200
201
202void test_ties(test::Suite& suite)
203{
204  suite.err() << "test ties" << std::endl;
205  statistics::KolmogorovSmirnov ks;
206  for (size_t i=0; i<5; ++i)
207    ks.add(i, true);
208  ks.add(0, false);
209  suite.add(suite.equal(ks.score(), 1.0-0.2));
210}
211
212
213void test_shuffle(test::Suite& suite)
214{
215  suite.err() << "testing shuffle" << std::endl;
216  statistics::KolmogorovSmirnov ks;
217  for (size_t i=0; i<10; ++i)
218    ks.add(i, i<5);
219  ks.shuffle();
220}
221
222
Note: See TracBrowser for help on using the repository browser.