source: trunk/test/statistics.cc @ 3236

Last change on this file since 3236 was 3236, checked in by Peter, 9 years ago

BH correction on unsorted range. closes #796

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 11.9 KB
Line 
1// $Id: statistics.cc 3236 2014-05-23 13:42:51Z peter $
2
3/*
4  Copyright (C) 2004 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2005 Peter Johansson
6  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
7  Copyright (C) 2007, 2008, 2009 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2010, 2011, 2012, 2013 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include <config.h>
27
28#include "Suite.h"
29
30#include "yat/classifier/Target.h"
31#include "yat/statistics/Average.h"
32#include "yat/statistics/utility.h"
33#include "yat/statistics/tTest.h"
34#include "yat/utility/DataWeight.h"
35#include "yat/utility/Matrix.h"
36#include "yat/utility/MatrixWeighted.h"
37#include "yat/utility/Vector.h"
38
39#include <boost/concept_archetype.hpp>
40
41#include <cmath>
42#include <cstdlib>
43#include <iostream>
44#include <limits>
45#include <map>
46#include <vector>
47
48using namespace theplu::yat;
49void test_benjamini_hochberg(test::Suite&);
50void test_benjamini_hochberg_unsorted(test::Suite&);
51void test_entropy(test::Suite&);
52void test_mad(test::Suite&);
53void test_mutual_information(test::Suite&);
54
55void test_median_empty(test::Suite&);
56void test_percentiler(test::Suite&);
57void test_percentiler_nan(test::Suite&);
58
59template<typename RandomAccessIterator>
60void test_percentiler(test::Suite&, RandomAccessIterator,
61                      RandomAccessIterator,
62                      double p, double correct);
63
64template<typename RandomAccessIterator1, typename RandomAccessIterator2>
65void cmp_percentiler(test::Suite&,
66                     RandomAccessIterator1,
67                     RandomAccessIterator1,
68                     RandomAccessIterator2,
69                     RandomAccessIterator2);
70
71int main(int argc, char* argv[])
72{
73  test::Suite suite(argc, argv);
74
75  utility::Vector gsl_vec(10);
76  std::vector<double> data;
77  for (unsigned int i=0; i<10; i++){
78    data.push_back(static_cast<double>(i));
79    gsl_vec(i)=i;
80  }
81
82  double m=statistics::median(data.begin(), data.end());
83  double m_gsl=statistics::median(gsl_vec.begin(), gsl_vec.end());
84  if (m!=4.5 || m!=m_gsl)
85    suite.add(false);
86  if (false) {
87    using statistics::median;
88    double x = median(boost::random_access_iterator_archetype<double>(),
89                      boost::random_access_iterator_archetype<double>());
90    test::dummie_function(x);
91    x = median(boost::random_access_iterator_archetype<utility::DataWeight>(),
92               boost::random_access_iterator_archetype<utility::DataWeight>());
93    test::dummie_function(x);
94  }
95  statistics::percentile2(data.begin(), data.end(), 100);
96  data.resize(1);
97  statistics::median(data.begin(), data.end());
98  // testing percentile2
99  test_percentiler(suite);
100
101  // test weighted percentiler with NaNs
102  test_percentiler_nan(suite);
103
104  double skewness_gsl=statistics::skewness(gsl_vec);
105  if (!suite.equal(1-skewness_gsl, 1.0) )
106    suite.add(false);
107  double kurtosis_gsl=statistics::kurtosis(gsl_vec);
108  suite.add(suite.equal_fix(kurtosis_gsl,-1.5616363636363637113,1e-10));
109  statistics::Average func;
110  suite.add(suite.equal(func(gsl_vec.begin(), gsl_vec.end()),4.5));
111  // easiest way to get a weighted iterator
112  classifier::MatrixLookupWeighted mlw(10,20,2.0, 1.0);
113  suite.add(suite.equal(func(mlw.begin(), mlw.end()),2.0));
114  // do not run compiler test
115  if (false) {
116    statistics::Average average;
117    double x = average(boost::input_iterator_archetype<double>(),
118                       boost::input_iterator_archetype<double>());
119    test::dummie_function(x);
120    using utility::DataWeight;
121    x = average(boost::input_iterator_archetype_no_proxy<DataWeight>(),
122                boost::input_iterator_archetype_no_proxy<DataWeight>());
123    test::dummie_function(x);
124  }
125
126  test_mad(suite);
127
128  // do not run compiler test
129  if (false) {
130    statistics::tTest t_test;
131    classifier::Target target;
132    add(t_test, boost::forward_iterator_archetype<double>(),
133        boost::forward_iterator_archetype<double>(), target);
134    add(t_test, boost::forward_iterator_archetype<utility::DataWeight>(),
135        boost::forward_iterator_archetype<utility::DataWeight>(), target);
136  }
137  test_benjamini_hochberg(suite);
138  test_benjamini_hochberg_unsorted(suite);
139  test_entropy(suite);
140  test_median_empty(suite);
141  test_mutual_information(suite);
142  return suite.return_value();
143}
144
145
146void test_benjamini_hochberg(test::Suite& suite)
147{
148  std::vector<double> p;
149  p.push_back(0.0001);
150  p.push_back(0.01);
151  p.push_back(0.015);
152  p.push_back(0.5);
153  p.push_back(0.99);
154  std::vector<double> q(p.size());
155  statistics::benjamini_hochberg(p.begin(), p.end(), q.begin());
156  suite.add(suite.equal(q[0], p[0]*5));
157  suite.add(suite.equal(q[1], p[1]*2.5));
158  suite.add(suite.equal(q[2], 0.025));
159  suite.add(suite.equal(q[3], p[3]*1.25));
160  suite.add(suite.equal(q[4], 0.99));
161
162  // do nut run compiler test
163  if (false) {
164    using statistics::benjamini_hochberg;
165    benjamini_hochberg(boost::bidirectional_iterator_archetype<double>(),
166                       boost::bidirectional_iterator_archetype<double>(),
167                       boost::mutable_bidirectional_iterator_archetype<double>());
168  }
169}
170
171
172void test_benjamini_hochberg_unsorted(test::Suite& suite)
173{
174  std::vector<double> p;
175  p.push_back(0.015);
176  p.push_back(0.0001);
177  p.push_back(0.01);
178  p.push_back(0.5);
179  p.push_back(0.99);
180  std::vector<double> q(p.size());
181  statistics::benjamini_hochberg_unsorted(p.begin(), p.end(), q.begin());
182  suite.add(suite.equal(q[1], p[1]*5));
183  suite.add(suite.equal(q[2], p[2]*2.5));
184  suite.add(suite.equal(q[0], 0.025));
185  suite.add(suite.equal(q[3], p[3]*1.25));
186  suite.add(suite.equal(q[4], 0.99));
187
188  // do nut run compiler test
189  if (false) {
190    using statistics::benjamini_hochberg_unsorted;
191    boost::random_access_iterator_archetype<double> input;
192    boost::mutable_random_access_iterator_archetype<double> result;
193    benjamini_hochberg_unsorted(input, input, result);
194  }
195}
196
197
198void test_entropy(test::Suite& suite)
199{
200  suite.out() << "testing entropy(2)\n";
201  using statistics::entropy;
202  std::vector<int> x(10000,0);
203  x[512] = 42;
204  double e = entropy(x.begin(), x.end());
205  if (e>1e-15) {
206    suite.add(false);
207    suite.out() << "entropy: " << e << " expected close to 0\n";
208  }
209  x[0] = 42;
210  e = entropy(x.begin(), x.end());
211  if (e<=0) {
212    suite.add(false);
213    suite.out() << "entropy: " << e << " expected > 0\n";
214  }
215
216  // do not run compiler test
217  if (false) {
218    entropy(boost::input_iterator_archetype<double>(),
219            boost::input_iterator_archetype<double>());
220  }
221}
222
223
224void test_mad(test::Suite& suite)
225{
226  suite.err() << "testing mad" << std::endl;
227  utility::Vector x(3);
228  x(0) = 3;
229  x(1) = 1;
230  x(2) = 100;
231  suite.add(suite.equal(statistics::mad(x.begin(), x.end()), 2));
232
233  std::vector<utility::DataWeight> wx(3);
234  wx[0] = utility::DataWeight(3, 0.4);
235  wx[1] = utility::DataWeight(1, 0.4);
236  wx[2] = utility::DataWeight(100, 0.6);
237  suite.add(suite.equal(statistics::mad(wx.begin(), wx.end()), 2));
238  // do not run compiler test
239  if (false) {
240    using statistics::mad;
241    double x = mad(boost::random_access_iterator_archetype<double>(),
242                   boost::random_access_iterator_archetype<double>());
243    test::dummie_function(x);
244    x = mad(boost::random_access_iterator_archetype<utility::DataWeight>(),
245            boost::random_access_iterator_archetype<utility::DataWeight>());
246    test::dummie_function(x);
247  }
248}
249
250
251void test_mutual_information(test::Suite& suite)
252{
253  suite.out() << "testing mutual_information\n";
254  using statistics::mutual_information;
255  utility::Matrix x(2,2);
256  x(0,0) = 100;
257  x(1,1) = 100;
258  double mi = mutual_information(x);
259  if (!suite.add(suite.equal(mi,1.0,100))) {
260    suite.err() << "error: mutual information: " << mi << "\n";
261  }
262
263  // testing a non-square Matrix
264  x.resize(3,4,0);
265  x(0,0) = 1;
266  x(1,1) = 1;
267  x(2,2) = 1;
268  x(2,3) = 1;
269  mi = mutual_information(x);
270  suite.out() << "mi: " << mi << "\n";
271}
272
273
274// test for ticket #660
275void test_median_empty(test::Suite& suite)
276{
277  std::vector<double> x;
278  double m = 0;
279  m = statistics::median(x.begin(), x.end(), true);
280  test::dummie_function(m);
281}
282
283
284void test_percentiler(test::Suite& suite)
285{
286  suite.err() << "testing unweighted percentile2" << std::endl;
287  std::vector<double> x;
288  x.reserve(6);
289  for (unsigned int i=0; i<5; i++){
290    x.push_back(static_cast<double>(i+1));
291  }
292  test_percentiler(suite, x.begin(), x.end(), 50, 3);
293  x.push_back(6);
294  test_percentiler(suite, x.begin(), x.end(), 50, 3.5);
295  test_percentiler(suite, x.begin(), x.end(), 25, 2);
296  test_percentiler(suite, x.begin(), x.end(), 0, 1);
297  test_percentiler(suite, x.begin(), x.end(), 10, 1);
298
299  suite.err() << "testing duplication of data\n";
300  std::vector<double> x2(x);
301  for (size_t i=0; i<x.size(); ++i)
302    x2.push_back(x[i]);
303  cmp_percentiler(suite, x.begin(), x.end(), x2.begin(), x2.end());
304
305
306  // testing weighted
307
308  suite.err() << "testing weighted percentile2" << std::endl;
309  std::vector<utility::DataWeight> xw(x.size());
310  for (size_t i=0; i<xw.size(); ++i) {
311    xw[i].data() = x[i];
312    xw[i].weight() = 1.0;
313  }
314  const std::vector<utility::DataWeight> xw_orig(xw);
315  suite.err() << "testing weighted" << std::endl;
316  test_percentiler(suite, xw.begin(), xw.end(), 0, 1);
317  test_percentiler(suite, xw.begin(), xw.end(), 100, 6);
318  test_percentiler(suite, xw.begin(), xw.end(), 49, 3);
319  test_percentiler(suite, xw.begin(), xw.end(), 51, 4);
320  test_percentiler(suite, xw.begin(), xw.end(), 50, 3.5);
321  test_percentiler(suite, x.begin(), x.end(), 10, 1);
322
323  suite.err() << "testing weighted with unity weights" << std::endl;
324  cmp_percentiler(suite, x.begin(), x.end(), xw.begin(), xw.end());
325
326  suite.err() << "testing that w=0 equals removed data point\n";
327  xw=xw_orig;
328  std::vector<utility::DataWeight> xw2(xw_orig);
329  xw[3].weight() = 0.0;
330  xw2.erase(xw2.begin()+3);
331  cmp_percentiler(suite, xw.begin(), xw.end(), xw2.begin(), xw2.end());
332
333  suite.err() << "testing rescaling of weights\n";
334  xw2 = xw;
335  for (size_t i=0; i<xw2.size(); ++i)
336    xw2[i].weight()*=2;
337  cmp_percentiler(suite, xw.begin(), xw.end(), xw2.begin(), xw2.end());
338
339  // do not run compiler test
340  if (false) {
341    statistics::Percentiler percentiler(50);
342    using boost::random_access_iterator_archetype;
343    typedef random_access_iterator_archetype<double> Iterator;
344    double x = percentiler(Iterator(), Iterator()); 
345    test::dummie_function(x);
346    using utility::DataWeight;
347    typedef random_access_iterator_archetype<DataWeight> WeigtedItererator;
348    x = percentiler(WeigtedItererator(), WeigtedItererator());
349    test::dummie_function(x);
350  }
351}
352
353void test_percentiler_nan(test::Suite& suite)
354{
355  using utility::DataWeight;
356  std::vector<double> v;
357  v.push_back(1);
358  v.push_back(10);
359  v.push_back(4);
360  v.push_back(2);
361  std::vector<DataWeight> wv(5);
362  wv[0] = DataWeight(v[0]);
363  wv[1] = DataWeight(v[1]);
364  wv[2] = DataWeight(std::numeric_limits<double>::quiet_NaN(), 0.0);
365  wv[3] = DataWeight(v[2]);
366  wv[4] = DataWeight(v[3]);
367
368  cmp_percentiler(suite, v.begin(), v.end(), wv.begin(), wv.end());
369}
370
371template<typename RandomAccessIterator>
372void test_percentiler(test::Suite& suite, 
373                      RandomAccessIterator first, 
374                      RandomAccessIterator last,
375                      double p, double correct)
376{
377  using statistics::percentile2;
378  double x = percentile2(first, last, p);
379  if (!suite.add(suite.equal(x, correct, 10))) {
380    suite.err() << "Error in percentile2 for " << p << "th percentile \n";
381    suite.err() << "  calculated value: " << x << "\n";
382    suite.err() << "  expected value: " << correct << "\n";
383  }
384}
385
386template<typename RandomAccessIterator1, typename RandomAccessIterator2>
387void cmp_percentiler(test::Suite& suite, 
388                     RandomAccessIterator1 first1, 
389                     RandomAccessIterator1 last1,
390                     RandomAccessIterator2 first2,
391                     RandomAccessIterator2 last2)
392{
393  for (double p=0; p<=100; p+=10) {
394    double correct=statistics::percentile2(first1, last1, p);
395    test_percentiler(suite, first2, last2, p, correct);
396  }
397
398}
Note: See TracBrowser for help on using the repository browser.