source: trunk/test/statistics.cc @ 3745

Last change on this file since 3745 was 3745, checked in by Peter, 5 years ago

add function calculating correlation matrix

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 12.8 KB
Line 
1// $Id: statistics.cc 3745 2018-07-27 00:27:02Z peter $
2
3/*
4  Copyright (C) 2004 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2005 Peter Johansson
6  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
7  Copyright (C) 2007, 2008, 2009 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2010, 2011, 2012, 2013, 2014, 2016 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include <config.h>
27
28#include "Suite.h"
29
30#include "yat/classifier/Target.h"
31#include "yat/statistics/Average.h"
32#include "yat/statistics/utility.h"
33#include "yat/statistics/tTest.h"
34#include "yat/utility/DataWeight.h"
35#include "yat/utility/Matrix.h"
36#include "yat/utility/MatrixWeighted.h"
37#include "yat/utility/Vector.h"
38
39#include <boost/concept_archetype.hpp>
40#include <boost/iterator/iterator_archetypes.hpp>
41
42#include <cmath>
43#include <cstdlib>
44#include <iostream>
45#include <limits>
46#include <map>
47#include <vector>
48
49using namespace theplu::yat;
50void test_benjamini_hochberg(test::Suite&);
51void test_benjamini_hochberg_unsorted(test::Suite&);
52void test_correlation(test::Suite& suite);
53void test_entropy(test::Suite&);
54void test_mad(test::Suite&);
55void test_mutual_information(test::Suite&);
56
57void test_median_empty(test::Suite&);
58void test_percentiler(test::Suite&);
59void test_percentiler_nan(test::Suite&);
60
61template<typename RandomAccessIterator>
62void test_percentiler(test::Suite&, RandomAccessIterator,
63                      RandomAccessIterator,
64                      double p, double correct);
65
66template<typename RandomAccessIterator1, typename RandomAccessIterator2>
67void cmp_percentiler(test::Suite&,
68                     RandomAccessIterator1,
69                     RandomAccessIterator1,
70                     RandomAccessIterator2,
71                     RandomAccessIterator2);
72
73int main(int argc, char* argv[])
74{
75  test::Suite suite(argc, argv);
76
77  utility::Vector gsl_vec(10);
78  std::vector<double> data;
79  for (unsigned int i=0; i<10; i++){
80    data.push_back(static_cast<double>(i));
81    gsl_vec(i)=i;
82  }
83
84  double m=statistics::median(data.begin(), data.end());
85  double m_gsl=statistics::median(gsl_vec.begin(), gsl_vec.end());
86  if (m!=4.5 || m!=m_gsl)
87    suite.add(false);
88  if (false) {
89    using statistics::median;
90    typedef boost::iterator_archetypes::readable_iterator_t Access;
91    typedef boost::random_access_traversal_tag Traversal;
92    boost::iterator_archetype<double, Access, Traversal> input;
93    double x = median(input, input);
94    boost::iterator_archetype<utility::DataWeight, Access, Traversal> input2;
95    x = median(input2, input2);
96    test::avoid_compiler_warning(x);
97  }
98  statistics::percentile2(data.begin(), data.end(), 100);
99  data.resize(1);
100  statistics::median(data.begin(), data.end());
101  // testing percentile2
102  test_percentiler(suite);
103
104  // test weighted percentiler with NaNs
105  test_percentiler_nan(suite);
106
107  double skewness_gsl=statistics::skewness(gsl_vec);
108  if (!suite.equal(1-skewness_gsl, 1.0) )
109    suite.add(false);
110  double kurtosis_gsl=statistics::kurtosis(gsl_vec);
111  suite.add(suite.equal_fix(kurtosis_gsl,-1.5616363636363637113,1e-10));
112  statistics::Average func;
113  suite.add(suite.equal(func(gsl_vec.begin(), gsl_vec.end()),4.5));
114  // easiest way to get a weighted iterator
115  classifier::MatrixLookupWeighted mlw(10,20,2.0, 1.0);
116  suite.add(suite.equal(func(mlw.begin(), mlw.end()),2.0));
117  // do not run compiler test
118  if (false) {
119    statistics::Average average;
120    double x = average(boost::input_iterator_archetype<double>(),
121                       boost::input_iterator_archetype<double>());
122    test::avoid_compiler_warning(x);
123    using utility::DataWeight;
124    x = average(boost::input_iterator_archetype_no_proxy<DataWeight>(),
125                boost::input_iterator_archetype_no_proxy<DataWeight>());
126    test::avoid_compiler_warning(x);
127  }
128
129  test_mad(suite);
130
131  // do not run compiler test
132  if (false) {
133    statistics::tTest t_test;
134    classifier::Target target;
135    add(t_test, boost::forward_iterator_archetype<double>(),
136        boost::forward_iterator_archetype<double>(), target);
137    add(t_test, boost::forward_iterator_archetype<utility::DataWeight>(),
138        boost::forward_iterator_archetype<utility::DataWeight>(), target);
139  }
140  test_benjamini_hochberg(suite);
141  test_benjamini_hochberg_unsorted(suite);
142  test_entropy(suite);
143  test_median_empty(suite);
144  test_mutual_information(suite);
145  test_correlation(suite);
146  return suite.return_value();
147}
148
149
150void test_correlation(test::Suite& suite)
151{
152  utility::Matrix x(10,100);
153  utility::Matrix C = statistics::correlation(x);
154  if (!suite.equal(C(0,0), 1.0)) {
155    suite.add(false);
156    suite.err() << "correlation element(0, 0) not unity\n";
157  }
158}
159
160
161void test_benjamini_hochberg(test::Suite& suite)
162{
163  std::vector<double> p;
164  p.push_back(0.0001);
165  p.push_back(0.01);
166  p.push_back(0.015);
167  p.push_back(0.5);
168  p.push_back(0.99);
169  std::vector<double> q(p.size());
170  statistics::benjamini_hochberg(p.begin(), p.end(), q.begin());
171  suite.add(suite.equal(q[0], p[0]*5));
172  suite.add(suite.equal(q[1], p[1]*2.5));
173  suite.add(suite.equal(q[2], 0.025));
174  suite.add(suite.equal(q[3], p[3]*1.25));
175  suite.add(suite.equal(q[4], 0.99));
176
177  // do nut run compiler test
178  if (false) {
179    using statistics::benjamini_hochberg;
180
181    typedef double Value;
182    typedef boost::iterator_archetypes::readable_iterator_t Access;
183    typedef boost::bidirectional_traversal_tag Traversal;
184    typedef boost::iterator_archetypes::readable_writable_iterator_t Access2;
185    typedef boost::bidirectional_traversal_tag Traversal2;
186
187    boost::iterator_archetype<Value, Access, Traversal> iterator;
188    boost::iterator_archetype<Value, Access2, Traversal2> iterator2;
189
190    benjamini_hochberg(iterator, iterator, iterator2);
191  }
192}
193
194
195void test_benjamini_hochberg_unsorted(test::Suite& suite)
196{
197  std::vector<double> p;
198  p.push_back(0.015);
199  p.push_back(0.0001);
200  p.push_back(0.01);
201  p.push_back(0.5);
202  p.push_back(0.99);
203  std::vector<double> q(p.size());
204  statistics::benjamini_hochberg_unsorted(p.begin(), p.end(), q.begin());
205  suite.add(suite.equal(q[1], p[1]*5));
206  suite.add(suite.equal(q[2], p[2]*2.5));
207  suite.add(suite.equal(q[0], 0.025));
208  suite.add(suite.equal(q[3], p[3]*1.25));
209  suite.add(suite.equal(q[4], 0.99));
210
211  // do nut run compiler test
212  if (false) {
213    using statistics::benjamini_hochberg_unsorted;
214
215    typedef double Value;
216    typedef boost::iterator_archetypes::readable_iterator_t Access;
217    typedef boost::random_access_traversal_tag Traversal;
218    typedef boost::iterator_archetypes::readable_writable_iterator_t Access2;
219    typedef boost::random_access_traversal_tag Traversal2;
220
221    boost::iterator_archetype<Value, Access, Traversal> input;
222    boost::iterator_archetype<Value, Access2, Traversal2> result;
223
224    benjamini_hochberg_unsorted(input, input, result);
225  }
226}
227
228
229void test_entropy(test::Suite& suite)
230{
231  suite.out() << "testing entropy(2)\n";
232  using statistics::entropy;
233  std::vector<int> x(10000,0);
234  x[512] = 42;
235  double e = entropy(x.begin(), x.end());
236  if (e>1e-15) {
237    suite.add(false);
238    suite.out() << "entropy: " << e << " expected close to 0\n";
239  }
240  x[0] = 42;
241  e = entropy(x.begin(), x.end());
242  if (e<=0) {
243    suite.add(false);
244    suite.out() << "entropy: " << e << " expected > 0\n";
245  }
246
247  // do not run compiler test
248  if (false) {
249    entropy(boost::input_iterator_archetype<double>(),
250            boost::input_iterator_archetype<double>());
251  }
252}
253
254
255void test_mad(test::Suite& suite)
256{
257  suite.err() << "testing mad" << std::endl;
258  utility::Vector x(3);
259  x(0) = 3;
260  x(1) = 1;
261  x(2) = 100;
262  suite.add(suite.equal(statistics::mad(x.begin(), x.end()), 2));
263
264  std::vector<utility::DataWeight> wx(3);
265  wx[0] = utility::DataWeight(3, 0.4);
266  wx[1] = utility::DataWeight(1, 0.4);
267  wx[2] = utility::DataWeight(100, 0.6);
268  suite.add(suite.equal(statistics::mad(wx.begin(), wx.end()), 2));
269  // do not run compiler test
270  if (false) {
271    using statistics::mad;
272    typedef boost::iterator_archetypes::readable_iterator_t Access;
273    typedef boost::random_access_traversal_tag Traversal;
274    boost::iterator_archetype<double, Access, Traversal> input;
275    double x = mad(input, input);
276    boost::iterator_archetype<utility::DataWeight, Access, Traversal> input2;
277    x = mad(input2, input2);
278    test::avoid_compiler_warning(x);
279  }
280}
281
282
283void test_mutual_information(test::Suite& suite)
284{
285  suite.out() << "testing mutual_information\n";
286  using statistics::mutual_information;
287  utility::Matrix x(2,2);
288  x(0,0) = 100;
289  x(1,1) = 100;
290  double mi = mutual_information(x);
291  if (!suite.add(suite.equal(mi,1.0,100))) {
292    suite.err() << "error: mutual information: " << mi << "\n";
293  }
294
295  // testing a non-square Matrix
296  x.resize(3,4,0);
297  x(0,0) = 1;
298  x(1,1) = 1;
299  x(2,2) = 1;
300  x(2,3) = 1;
301  mi = mutual_information(x);
302  suite.out() << "mi: " << mi << "\n";
303}
304
305
306// test for ticket #660
307void test_median_empty(test::Suite& suite)
308{
309  std::vector<double> x;
310  double m = 0;
311  m = statistics::median(x.begin(), x.end(), true);
312  test::avoid_compiler_warning(m);
313}
314
315
316void test_percentiler(test::Suite& suite)
317{
318  suite.err() << "testing unweighted percentile2" << std::endl;
319  std::vector<double> x;
320  x.reserve(6);
321  for (unsigned int i=0; i<5; i++){
322    x.push_back(static_cast<double>(i+1));
323  }
324  test_percentiler(suite, x.begin(), x.end(), 50, 3);
325  x.push_back(6);
326  test_percentiler(suite, x.begin(), x.end(), 50, 3.5);
327  test_percentiler(suite, x.begin(), x.end(), 25, 2);
328  test_percentiler(suite, x.begin(), x.end(), 0, 1);
329  test_percentiler(suite, x.begin(), x.end(), 10, 1);
330
331  suite.err() << "testing duplication of data\n";
332  std::vector<double> x2(x);
333  for (size_t i=0; i<x.size(); ++i)
334    x2.push_back(x[i]);
335  cmp_percentiler(suite, x.begin(), x.end(), x2.begin(), x2.end());
336
337
338  // testing weighted
339
340  suite.err() << "testing weighted percentile2" << std::endl;
341  std::vector<utility::DataWeight> xw(x.size());
342  for (size_t i=0; i<xw.size(); ++i) {
343    xw[i].data() = x[i];
344    xw[i].weight() = 1.0;
345  }
346  const std::vector<utility::DataWeight> xw_orig(xw);
347  suite.err() << "testing weighted" << std::endl;
348  test_percentiler(suite, xw.begin(), xw.end(), 0, 1);
349  test_percentiler(suite, xw.begin(), xw.end(), 100, 6);
350  test_percentiler(suite, xw.begin(), xw.end(), 49, 3);
351  test_percentiler(suite, xw.begin(), xw.end(), 51, 4);
352  test_percentiler(suite, xw.begin(), xw.end(), 50, 3.5);
353  test_percentiler(suite, x.begin(), x.end(), 10, 1);
354
355  suite.err() << "testing weighted with unity weights" << std::endl;
356  cmp_percentiler(suite, x.begin(), x.end(), xw.begin(), xw.end());
357
358  suite.err() << "testing that w=0 equals removed data point\n";
359  xw=xw_orig;
360  std::vector<utility::DataWeight> xw2(xw_orig);
361  xw[3].weight() = 0.0;
362  xw2.erase(xw2.begin()+3);
363  cmp_percentiler(suite, xw.begin(), xw.end(), xw2.begin(), xw2.end());
364
365  suite.err() << "testing rescaling of weights\n";
366  xw2 = xw;
367  for (size_t i=0; i<xw2.size(); ++i)
368    xw2[i].weight()*=2;
369  cmp_percentiler(suite, xw.begin(), xw.end(), xw2.begin(), xw2.end());
370
371  // do not run compiler test
372  if (false) {
373    statistics::Percentiler percentiler(50);
374    typedef boost::iterator_archetypes::readable_iterator_t Access;
375    typedef boost::random_access_traversal_tag Traversal;
376    boost::iterator_archetype<double, Access, Traversal> input;
377    double x = percentiler(input, input);
378    boost::iterator_archetype<utility::DataWeight, Access, Traversal> input2;
379    x = percentiler(input2, input2);
380    test::avoid_compiler_warning(x);
381  }
382}
383
384void test_percentiler_nan(test::Suite& suite)
385{
386  using utility::DataWeight;
387  std::vector<double> v;
388  v.push_back(1);
389  v.push_back(10);
390  v.push_back(4);
391  v.push_back(2);
392  std::vector<DataWeight> wv(5);
393  wv[0] = DataWeight(v[0]);
394  wv[1] = DataWeight(v[1]);
395  wv[2] = DataWeight(std::numeric_limits<double>::quiet_NaN(), 0.0);
396  wv[3] = DataWeight(v[2]);
397  wv[4] = DataWeight(v[3]);
398
399  cmp_percentiler(suite, v.begin(), v.end(), wv.begin(), wv.end());
400}
401
402template<typename RandomAccessIterator>
403void test_percentiler(test::Suite& suite, 
404                      RandomAccessIterator first, 
405                      RandomAccessIterator last,
406                      double p, double correct)
407{
408  using statistics::percentile2;
409  double x = percentile2(first, last, p);
410  if (!suite.add(suite.equal(x, correct, 10))) {
411    suite.err() << "Error in percentile2 for " << p << "th percentile \n";
412    suite.err() << "  calculated value: " << x << "\n";
413    suite.err() << "  expected value: " << correct << "\n";
414  }
415}
416
417template<typename RandomAccessIterator1, typename RandomAccessIterator2>
418void cmp_percentiler(test::Suite& suite, 
419                     RandomAccessIterator1 first1, 
420                     RandomAccessIterator1 last1,
421                     RandomAccessIterator2 first2,
422                     RandomAccessIterator2 last2)
423{
424  for (double p=0; p<=100; p+=10) {
425    double correct=statistics::percentile2(first1, last1, p);
426    test_percentiler(suite, first2, last2, p, correct);
427  }
428
429}
Note: See TracBrowser for help on using the repository browser.