source: trunk/test/statistics.cc @ 3550

Last change on this file since 3550 was 3550, checked in by Peter, 5 years ago

Update copyright years. Happy New Year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 12.5 KB
Line 
1// $Id: statistics.cc 3550 2017-01-03 05:41:02Z peter $
2
3/*
4  Copyright (C) 2004 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2005 Peter Johansson
6  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
7  Copyright (C) 2007, 2008, 2009 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include <config.h>
27
28#include "Suite.h"
29
30#include "yat/classifier/Target.h"
31#include "yat/statistics/Average.h"
32#include "yat/statistics/utility.h"
33#include "yat/statistics/tTest.h"
34#include "yat/utility/DataWeight.h"
35#include "yat/utility/Matrix.h"
36#include "yat/utility/MatrixWeighted.h"
37#include "yat/utility/Vector.h"
38
39#include <boost/concept_archetype.hpp>
40#include <boost/iterator/iterator_archetypes.hpp>
41
42#include <cmath>
43#include <cstdlib>
44#include <iostream>
45#include <limits>
46#include <map>
47#include <vector>
48
49using namespace theplu::yat;
50void test_benjamini_hochberg(test::Suite&);
51void test_benjamini_hochberg_unsorted(test::Suite&);
52void test_entropy(test::Suite&);
53void test_mad(test::Suite&);
54void test_mutual_information(test::Suite&);
55
56void test_median_empty(test::Suite&);
57void test_percentiler(test::Suite&);
58void test_percentiler_nan(test::Suite&);
59
60template<typename RandomAccessIterator>
61void test_percentiler(test::Suite&, RandomAccessIterator,
62                      RandomAccessIterator,
63                      double p, double correct);
64
65template<typename RandomAccessIterator1, typename RandomAccessIterator2>
66void cmp_percentiler(test::Suite&,
67                     RandomAccessIterator1,
68                     RandomAccessIterator1,
69                     RandomAccessIterator2,
70                     RandomAccessIterator2);
71
72int main(int argc, char* argv[])
73{
74  test::Suite suite(argc, argv);
75
76  utility::Vector gsl_vec(10);
77  std::vector<double> data;
78  for (unsigned int i=0; i<10; i++){
79    data.push_back(static_cast<double>(i));
80    gsl_vec(i)=i;
81  }
82
83  double m=statistics::median(data.begin(), data.end());
84  double m_gsl=statistics::median(gsl_vec.begin(), gsl_vec.end());
85  if (m!=4.5 || m!=m_gsl)
86    suite.add(false);
87  if (false) {
88    using statistics::median;
89    typedef boost::iterator_archetypes::readable_iterator_t Access;
90    typedef boost::random_access_traversal_tag Traversal;
91    boost::iterator_archetype<double, Access, Traversal> input;
92    double x = median(input, input);
93    boost::iterator_archetype<utility::DataWeight, Access, Traversal> input2;
94    x = median(input2, input2);
95    test::avoid_compiler_warning(x);
96  }
97  statistics::percentile2(data.begin(), data.end(), 100);
98  data.resize(1);
99  statistics::median(data.begin(), data.end());
100  // testing percentile2
101  test_percentiler(suite);
102
103  // test weighted percentiler with NaNs
104  test_percentiler_nan(suite);
105
106  double skewness_gsl=statistics::skewness(gsl_vec);
107  if (!suite.equal(1-skewness_gsl, 1.0) )
108    suite.add(false);
109  double kurtosis_gsl=statistics::kurtosis(gsl_vec);
110  suite.add(suite.equal_fix(kurtosis_gsl,-1.5616363636363637113,1e-10));
111  statistics::Average func;
112  suite.add(suite.equal(func(gsl_vec.begin(), gsl_vec.end()),4.5));
113  // easiest way to get a weighted iterator
114  classifier::MatrixLookupWeighted mlw(10,20,2.0, 1.0);
115  suite.add(suite.equal(func(mlw.begin(), mlw.end()),2.0));
116  // do not run compiler test
117  if (false) {
118    statistics::Average average;
119    double x = average(boost::input_iterator_archetype<double>(),
120                       boost::input_iterator_archetype<double>());
121    test::avoid_compiler_warning(x);
122    using utility::DataWeight;
123    x = average(boost::input_iterator_archetype_no_proxy<DataWeight>(),
124                boost::input_iterator_archetype_no_proxy<DataWeight>());
125    test::avoid_compiler_warning(x);
126  }
127
128  test_mad(suite);
129
130  // do not run compiler test
131  if (false) {
132    statistics::tTest t_test;
133    classifier::Target target;
134    add(t_test, boost::forward_iterator_archetype<double>(),
135        boost::forward_iterator_archetype<double>(), target);
136    add(t_test, boost::forward_iterator_archetype<utility::DataWeight>(),
137        boost::forward_iterator_archetype<utility::DataWeight>(), target);
138  }
139  test_benjamini_hochberg(suite);
140  test_benjamini_hochberg_unsorted(suite);
141  test_entropy(suite);
142  test_median_empty(suite);
143  test_mutual_information(suite);
144  return suite.return_value();
145}
146
147
148void test_benjamini_hochberg(test::Suite& suite)
149{
150  std::vector<double> p;
151  p.push_back(0.0001);
152  p.push_back(0.01);
153  p.push_back(0.015);
154  p.push_back(0.5);
155  p.push_back(0.99);
156  std::vector<double> q(p.size());
157  statistics::benjamini_hochberg(p.begin(), p.end(), q.begin());
158  suite.add(suite.equal(q[0], p[0]*5));
159  suite.add(suite.equal(q[1], p[1]*2.5));
160  suite.add(suite.equal(q[2], 0.025));
161  suite.add(suite.equal(q[3], p[3]*1.25));
162  suite.add(suite.equal(q[4], 0.99));
163
164  // do nut run compiler test
165  if (false) {
166    using statistics::benjamini_hochberg;
167
168    typedef double Value;
169    typedef boost::iterator_archetypes::readable_iterator_t Access;
170    typedef boost::bidirectional_traversal_tag Traversal;
171    typedef boost::iterator_archetypes::readable_writable_iterator_t Access2;
172    typedef boost::bidirectional_traversal_tag Traversal2;
173
174    boost::iterator_archetype<Value, Access, Traversal> iterator;
175    boost::iterator_archetype<Value, Access2, Traversal2> iterator2;
176
177    benjamini_hochberg(iterator, iterator, iterator2);
178  }
179}
180
181
182void test_benjamini_hochberg_unsorted(test::Suite& suite)
183{
184  std::vector<double> p;
185  p.push_back(0.015);
186  p.push_back(0.0001);
187  p.push_back(0.01);
188  p.push_back(0.5);
189  p.push_back(0.99);
190  std::vector<double> q(p.size());
191  statistics::benjamini_hochberg_unsorted(p.begin(), p.end(), q.begin());
192  suite.add(suite.equal(q[1], p[1]*5));
193  suite.add(suite.equal(q[2], p[2]*2.5));
194  suite.add(suite.equal(q[0], 0.025));
195  suite.add(suite.equal(q[3], p[3]*1.25));
196  suite.add(suite.equal(q[4], 0.99));
197
198  // do nut run compiler test
199  if (false) {
200    using statistics::benjamini_hochberg_unsorted;
201
202    typedef double Value;
203    typedef boost::iterator_archetypes::readable_iterator_t Access;
204    typedef boost::random_access_traversal_tag Traversal;
205    typedef boost::iterator_archetypes::readable_writable_iterator_t Access2;
206    typedef boost::random_access_traversal_tag Traversal2;
207
208    boost::iterator_archetype<Value, Access, Traversal> input;
209    boost::iterator_archetype<Value, Access2, Traversal2> result;
210
211    benjamini_hochberg_unsorted(input, input, result);
212  }
213}
214
215
216void test_entropy(test::Suite& suite)
217{
218  suite.out() << "testing entropy(2)\n";
219  using statistics::entropy;
220  std::vector<int> x(10000,0);
221  x[512] = 42;
222  double e = entropy(x.begin(), x.end());
223  if (e>1e-15) {
224    suite.add(false);
225    suite.out() << "entropy: " << e << " expected close to 0\n";
226  }
227  x[0] = 42;
228  e = entropy(x.begin(), x.end());
229  if (e<=0) {
230    suite.add(false);
231    suite.out() << "entropy: " << e << " expected > 0\n";
232  }
233
234  // do not run compiler test
235  if (false) {
236    entropy(boost::input_iterator_archetype<double>(),
237            boost::input_iterator_archetype<double>());
238  }
239}
240
241
242void test_mad(test::Suite& suite)
243{
244  suite.err() << "testing mad" << std::endl;
245  utility::Vector x(3);
246  x(0) = 3;
247  x(1) = 1;
248  x(2) = 100;
249  suite.add(suite.equal(statistics::mad(x.begin(), x.end()), 2));
250
251  std::vector<utility::DataWeight> wx(3);
252  wx[0] = utility::DataWeight(3, 0.4);
253  wx[1] = utility::DataWeight(1, 0.4);
254  wx[2] = utility::DataWeight(100, 0.6);
255  suite.add(suite.equal(statistics::mad(wx.begin(), wx.end()), 2));
256  // do not run compiler test
257  if (false) {
258    using statistics::mad;
259    typedef boost::iterator_archetypes::readable_iterator_t Access;
260    typedef boost::random_access_traversal_tag Traversal;
261    boost::iterator_archetype<double, Access, Traversal> input;
262    double x = mad(input, input);
263    boost::iterator_archetype<utility::DataWeight, Access, Traversal> input2;
264    x = mad(input2, input2);
265    test::avoid_compiler_warning(x);
266  }
267}
268
269
270void test_mutual_information(test::Suite& suite)
271{
272  suite.out() << "testing mutual_information\n";
273  using statistics::mutual_information;
274  utility::Matrix x(2,2);
275  x(0,0) = 100;
276  x(1,1) = 100;
277  double mi = mutual_information(x);
278  if (!suite.add(suite.equal(mi,1.0,100))) {
279    suite.err() << "error: mutual information: " << mi << "\n";
280  }
281
282  // testing a non-square Matrix
283  x.resize(3,4,0);
284  x(0,0) = 1;
285  x(1,1) = 1;
286  x(2,2) = 1;
287  x(2,3) = 1;
288  mi = mutual_information(x);
289  suite.out() << "mi: " << mi << "\n";
290}
291
292
293// test for ticket #660
294void test_median_empty(test::Suite& suite)
295{
296  std::vector<double> x;
297  double m = 0;
298  m = statistics::median(x.begin(), x.end(), true);
299  test::avoid_compiler_warning(m);
300}
301
302
303void test_percentiler(test::Suite& suite)
304{
305  suite.err() << "testing unweighted percentile2" << std::endl;
306  std::vector<double> x;
307  x.reserve(6);
308  for (unsigned int i=0; i<5; i++){
309    x.push_back(static_cast<double>(i+1));
310  }
311  test_percentiler(suite, x.begin(), x.end(), 50, 3);
312  x.push_back(6);
313  test_percentiler(suite, x.begin(), x.end(), 50, 3.5);
314  test_percentiler(suite, x.begin(), x.end(), 25, 2);
315  test_percentiler(suite, x.begin(), x.end(), 0, 1);
316  test_percentiler(suite, x.begin(), x.end(), 10, 1);
317
318  suite.err() << "testing duplication of data\n";
319  std::vector<double> x2(x);
320  for (size_t i=0; i<x.size(); ++i)
321    x2.push_back(x[i]);
322  cmp_percentiler(suite, x.begin(), x.end(), x2.begin(), x2.end());
323
324
325  // testing weighted
326
327  suite.err() << "testing weighted percentile2" << std::endl;
328  std::vector<utility::DataWeight> xw(x.size());
329  for (size_t i=0; i<xw.size(); ++i) {
330    xw[i].data() = x[i];
331    xw[i].weight() = 1.0;
332  }
333  const std::vector<utility::DataWeight> xw_orig(xw);
334  suite.err() << "testing weighted" << std::endl;
335  test_percentiler(suite, xw.begin(), xw.end(), 0, 1);
336  test_percentiler(suite, xw.begin(), xw.end(), 100, 6);
337  test_percentiler(suite, xw.begin(), xw.end(), 49, 3);
338  test_percentiler(suite, xw.begin(), xw.end(), 51, 4);
339  test_percentiler(suite, xw.begin(), xw.end(), 50, 3.5);
340  test_percentiler(suite, x.begin(), x.end(), 10, 1);
341
342  suite.err() << "testing weighted with unity weights" << std::endl;
343  cmp_percentiler(suite, x.begin(), x.end(), xw.begin(), xw.end());
344
345  suite.err() << "testing that w=0 equals removed data point\n";
346  xw=xw_orig;
347  std::vector<utility::DataWeight> xw2(xw_orig);
348  xw[3].weight() = 0.0;
349  xw2.erase(xw2.begin()+3);
350  cmp_percentiler(suite, xw.begin(), xw.end(), xw2.begin(), xw2.end());
351
352  suite.err() << "testing rescaling of weights\n";
353  xw2 = xw;
354  for (size_t i=0; i<xw2.size(); ++i)
355    xw2[i].weight()*=2;
356  cmp_percentiler(suite, xw.begin(), xw.end(), xw2.begin(), xw2.end());
357
358  // do not run compiler test
359  if (false) {
360    statistics::Percentiler percentiler(50);
361    typedef boost::iterator_archetypes::readable_iterator_t Access;
362    typedef boost::random_access_traversal_tag Traversal;
363    boost::iterator_archetype<double, Access, Traversal> input;
364    double x = percentiler(input, input);
365    boost::iterator_archetype<utility::DataWeight, Access, Traversal> input2;
366    x = percentiler(input2, input2);
367    test::avoid_compiler_warning(x);
368  }
369}
370
371void test_percentiler_nan(test::Suite& suite)
372{
373  using utility::DataWeight;
374  std::vector<double> v;
375  v.push_back(1);
376  v.push_back(10);
377  v.push_back(4);
378  v.push_back(2);
379  std::vector<DataWeight> wv(5);
380  wv[0] = DataWeight(v[0]);
381  wv[1] = DataWeight(v[1]);
382  wv[2] = DataWeight(std::numeric_limits<double>::quiet_NaN(), 0.0);
383  wv[3] = DataWeight(v[2]);
384  wv[4] = DataWeight(v[3]);
385
386  cmp_percentiler(suite, v.begin(), v.end(), wv.begin(), wv.end());
387}
388
389template<typename RandomAccessIterator>
390void test_percentiler(test::Suite& suite, 
391                      RandomAccessIterator first, 
392                      RandomAccessIterator last,
393                      double p, double correct)
394{
395  using statistics::percentile2;
396  double x = percentile2(first, last, p);
397  if (!suite.add(suite.equal(x, correct, 10))) {
398    suite.err() << "Error in percentile2 for " << p << "th percentile \n";
399    suite.err() << "  calculated value: " << x << "\n";
400    suite.err() << "  expected value: " << correct << "\n";
401  }
402}
403
404template<typename RandomAccessIterator1, typename RandomAccessIterator2>
405void cmp_percentiler(test::Suite& suite, 
406                     RandomAccessIterator1 first1, 
407                     RandomAccessIterator1 last1,
408                     RandomAccessIterator2 first2,
409                     RandomAccessIterator2 last2)
410{
411  for (double p=0; p<=100; p+=10) {
412    double correct=statistics::percentile2(first1, last1, p);
413    test_percentiler(suite, first2, last2, p, correct);
414  }
415
416}
Note: See TracBrowser for help on using the repository browser.