source: trunk/test/statistics.cc @ 3137

Last change on this file since 3137 was 3137, checked in by Peter, 8 years ago

closes #772. new function that calculates mutual information

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 11.0 KB
Line 
1// $Id: statistics.cc 3137 2013-11-28 04:11:23Z peter $
2
3/*
4  Copyright (C) 2004 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2005 Peter Johansson
6  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
7  Copyright (C) 2007, 2008, 2009 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2010, 2011, 2012, 2013 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include <config.h>
27
28#include "Suite.h"
29
30#include "yat/classifier/Target.h"
31#include "yat/statistics/Average.h"
32#include "yat/statistics/utility.h"
33#include "yat/statistics/tTest.h"
34#include "yat/utility/DataWeight.h"
35#include "yat/utility/Matrix.h"
36#include "yat/utility/MatrixWeighted.h"
37#include "yat/utility/Vector.h"
38
39#include <boost/concept_archetype.hpp>
40
41#include <cmath>
42#include <cstdlib>
43#include <iostream>
44#include <limits>
45#include <map>
46#include <vector>
47
48using namespace theplu::yat;
49void test_benjamini_hochberg(test::Suite&);
50void test_entropy(test::Suite&);
51void test_mad(test::Suite&);
52void test_mutual_information(test::Suite&);
53
54void test_median_empty(test::Suite&);
55void test_percentiler(test::Suite&);
56void test_percentiler_nan(test::Suite&);
57
58template<typename RandomAccessIterator>
59void test_percentiler(test::Suite&, RandomAccessIterator,
60                      RandomAccessIterator,
61                      double p, double correct);
62
63template<typename RandomAccessIterator1, typename RandomAccessIterator2>
64void cmp_percentiler(test::Suite&,
65                     RandomAccessIterator1,
66                     RandomAccessIterator1,
67                     RandomAccessIterator2,
68                     RandomAccessIterator2);
69
70int main(int argc, char* argv[])
71{
72  test::Suite suite(argc, argv);
73
74  utility::Vector gsl_vec(10);
75  std::vector<double> data;
76  for (unsigned int i=0; i<10; i++){
77    data.push_back(static_cast<double>(i));
78    gsl_vec(i)=i;
79  }
80
81  double m=statistics::median(data.begin(), data.end());
82  double m_gsl=statistics::median(gsl_vec.begin(), gsl_vec.end());
83  if (m!=4.5 || m!=m_gsl)
84    suite.add(false);
85  if (false) {
86    using statistics::median;
87    double x = median(boost::random_access_iterator_archetype<double>(),
88                      boost::random_access_iterator_archetype<double>());
89    test::dummie_function(x);
90    x = median(boost::random_access_iterator_archetype<utility::DataWeight>(),
91               boost::random_access_iterator_archetype<utility::DataWeight>());
92    test::dummie_function(x);
93  }
94  statistics::percentile2(data.begin(), data.end(), 100);
95  data.resize(1);
96  statistics::median(data.begin(), data.end());
97  // testing percentile2
98  test_percentiler(suite);
99
100  // test weighted percentiler with NaNs
101  test_percentiler_nan(suite);
102
103  double skewness_gsl=statistics::skewness(gsl_vec);
104  if (!suite.equal(1-skewness_gsl, 1.0) )
105    suite.add(false);
106  double kurtosis_gsl=statistics::kurtosis(gsl_vec);
107  suite.add(suite.equal_fix(kurtosis_gsl,-1.5616363636363637113,1e-10));
108  statistics::Average func;
109  suite.add(suite.equal(func(gsl_vec.begin(), gsl_vec.end()),4.5));
110  // easiest way to get a weighted iterator
111  classifier::MatrixLookupWeighted mlw(10,20,2.0, 1.0);
112  suite.add(suite.equal(func(mlw.begin(), mlw.end()),2.0));
113  // do not run compiler test
114  if (false) {
115    statistics::Average average;
116    double x = average(boost::input_iterator_archetype<double>(),
117                       boost::input_iterator_archetype<double>());
118    test::dummie_function(x);
119    using utility::DataWeight;
120    x = average(boost::input_iterator_archetype_no_proxy<DataWeight>(),
121                boost::input_iterator_archetype_no_proxy<DataWeight>());
122    test::dummie_function(x);
123  }
124
125  test_mad(suite);
126
127  // do not run compiler test
128  if (false) {
129    statistics::tTest t_test;
130    classifier::Target target;
131    add(t_test, boost::forward_iterator_archetype<double>(),
132        boost::forward_iterator_archetype<double>(), target);
133    add(t_test, boost::forward_iterator_archetype<utility::DataWeight>(),
134        boost::forward_iterator_archetype<utility::DataWeight>(), target);
135  }
136  test_benjamini_hochberg(suite);
137  test_entropy(suite);
138  test_median_empty(suite);
139  test_mutual_information(suite);
140  return suite.return_value();
141}
142
143
144void test_benjamini_hochberg(test::Suite& suite)
145{
146  std::vector<double> p;
147  p.push_back(0.0001);
148  p.push_back(0.01);
149  p.push_back(0.015);
150  p.push_back(0.5);
151  p.push_back(0.99);
152  std::vector<double> q(p.size());
153  statistics::benjamini_hochberg(p.begin(), p.end(), q.begin());
154  suite.add(suite.equal(q[0], p[0]*5));
155  suite.add(suite.equal(q[1], p[1]*2.5));
156  suite.add(suite.equal(q[2], 0.025));
157  suite.add(suite.equal(q[3], p[3]*1.25));
158  suite.add(suite.equal(q[4], 0.99));
159
160  // do nut run compiler test
161  if (false) {
162    using statistics::benjamini_hochberg;
163    benjamini_hochberg(boost::bidirectional_iterator_archetype<double>(),
164                       boost::bidirectional_iterator_archetype<double>(),
165                       boost::mutable_bidirectional_iterator_archetype<double>());
166  }
167}
168
169
170void test_entropy(test::Suite& suite)
171{
172  suite.out() << "testing entropy(2)\n";
173  using statistics::entropy;
174  std::vector<int> x(10000,0);
175  x[512] = 42;
176  double e = entropy(x.begin(), x.end());
177  if (e>1e-15) {
178    suite.add(false);
179    suite.out() << "entropy: " << e << " expected close to 0\n";
180  }
181  x[0] = 42;
182  e = entropy(x.begin(), x.end());
183  if (e<=0) {
184    suite.add(false);
185    suite.out() << "entropy: " << e << " expected > 0\n";
186  }
187
188  // do not run compiler test
189  if (false) {
190    entropy(boost::input_iterator_archetype<double>(),
191            boost::input_iterator_archetype<double>());
192  }
193}
194
195
196void test_mad(test::Suite& suite)
197{
198  suite.err() << "testing mad" << std::endl;
199  utility::Vector x(3);
200  x(0) = 3;
201  x(1) = 1;
202  x(2) = 100;
203  suite.add(suite.equal(statistics::mad(x.begin(), x.end()), 2));
204
205  std::vector<utility::DataWeight> wx(3);
206  wx[0] = utility::DataWeight(3, 0.4);
207  wx[1] = utility::DataWeight(1, 0.4);
208  wx[2] = utility::DataWeight(100, 0.6);
209  suite.add(suite.equal(statistics::mad(wx.begin(), wx.end()), 2));
210  // do not run compiler test
211  if (false) {
212    using statistics::mad;
213    double x = mad(boost::random_access_iterator_archetype<double>(),
214                   boost::random_access_iterator_archetype<double>());
215    test::dummie_function(x);
216    x = mad(boost::random_access_iterator_archetype<utility::DataWeight>(),
217            boost::random_access_iterator_archetype<utility::DataWeight>());
218    test::dummie_function(x);
219  }
220}
221
222
223void test_mutual_information(test::Suite& suite)
224{
225  suite.out() << "testing mutual_information\n";
226  using statistics::mutual_information;
227  utility::Matrix x(2,2);
228  x(0,0) = 100;
229  x(1,1) = 100;
230  double mi = mutual_information(x);
231  if (!suite.add(suite.equal(mi,1.0,100))) {
232    suite.err() << "error: mutual information: " << mi << "\n";
233  }
234
235  // testing a non-square Matrix
236  x.resize(3,4,0);
237  x(0,0) = 1;
238  x(1,1) = 1;
239  x(2,2) = 1;
240  x(2,3) = 1;
241  mi = mutual_information(x);
242  suite.out() << "mi: " << mi << "\n";
243}
244
245
246// test for ticket #660
247void test_median_empty(test::Suite& suite)
248{
249  std::vector<double> x;
250  double m = 0;
251  m = statistics::median(x.begin(), x.end(), true);
252  test::dummie_function(m);
253}
254
255
256void test_percentiler(test::Suite& suite)
257{
258  suite.err() << "testing unweighted percentile2" << std::endl;
259  std::vector<double> x;
260  x.reserve(6);
261  for (unsigned int i=0; i<5; i++){
262    x.push_back(static_cast<double>(i+1));
263  }
264  test_percentiler(suite, x.begin(), x.end(), 50, 3);
265  x.push_back(6);
266  test_percentiler(suite, x.begin(), x.end(), 50, 3.5);
267  test_percentiler(suite, x.begin(), x.end(), 25, 2);
268  test_percentiler(suite, x.begin(), x.end(), 0, 1);
269  test_percentiler(suite, x.begin(), x.end(), 10, 1);
270
271  suite.err() << "testing duplication of data\n";
272  std::vector<double> x2(x);
273  for (size_t i=0; i<x.size(); ++i)
274    x2.push_back(x[i]);
275  cmp_percentiler(suite, x.begin(), x.end(), x2.begin(), x2.end());
276
277
278  // testing weighted
279
280  suite.err() << "testing weighted percentile2" << std::endl;
281  std::vector<utility::DataWeight> xw(x.size());
282  for (size_t i=0; i<xw.size(); ++i) {
283    xw[i].data() = x[i];
284    xw[i].weight() = 1.0;
285  }
286  const std::vector<utility::DataWeight> xw_orig(xw);
287  suite.err() << "testing weighted" << std::endl;
288  test_percentiler(suite, xw.begin(), xw.end(), 0, 1);
289  test_percentiler(suite, xw.begin(), xw.end(), 100, 6);
290  test_percentiler(suite, xw.begin(), xw.end(), 49, 3);
291  test_percentiler(suite, xw.begin(), xw.end(), 51, 4);
292  test_percentiler(suite, xw.begin(), xw.end(), 50, 3.5);
293  test_percentiler(suite, x.begin(), x.end(), 10, 1);
294
295  suite.err() << "testing weighted with unity weights" << std::endl;
296  cmp_percentiler(suite, x.begin(), x.end(), xw.begin(), xw.end());
297
298  suite.err() << "testing that w=0 equals removed data point\n";
299  xw=xw_orig;
300  std::vector<utility::DataWeight> xw2(xw_orig);
301  xw[3].weight() = 0.0;
302  xw2.erase(xw2.begin()+3);
303  cmp_percentiler(suite, xw.begin(), xw.end(), xw2.begin(), xw2.end());
304
305  suite.err() << "testing rescaling of weights\n";
306  xw2 = xw;
307  for (size_t i=0; i<xw2.size(); ++i)
308    xw2[i].weight()*=2;
309  cmp_percentiler(suite, xw.begin(), xw.end(), xw2.begin(), xw2.end());
310
311  // do not run compiler test
312  if (false) {
313    statistics::Percentiler percentiler(50);
314    using boost::random_access_iterator_archetype;
315    typedef random_access_iterator_archetype<double> Iterator;
316    double x = percentiler(Iterator(), Iterator()); 
317    test::dummie_function(x);
318    using utility::DataWeight;
319    typedef random_access_iterator_archetype<DataWeight> WeigtedItererator;
320    x = percentiler(WeigtedItererator(), WeigtedItererator());
321    test::dummie_function(x);
322  }
323}
324
325void test_percentiler_nan(test::Suite& suite)
326{
327  using utility::DataWeight;
328  std::vector<double> v;
329  v.push_back(1);
330  v.push_back(10);
331  v.push_back(4);
332  v.push_back(2);
333  std::vector<DataWeight> wv(5);
334  wv[0] = DataWeight(v[0]);
335  wv[1] = DataWeight(v[1]);
336  wv[2] = DataWeight(std::numeric_limits<double>::quiet_NaN(), 0.0);
337  wv[3] = DataWeight(v[2]);
338  wv[4] = DataWeight(v[3]);
339
340  cmp_percentiler(suite, v.begin(), v.end(), wv.begin(), wv.end());
341}
342
343template<typename RandomAccessIterator>
344void test_percentiler(test::Suite& suite, 
345                      RandomAccessIterator first, 
346                      RandomAccessIterator last,
347                      double p, double correct)
348{
349  using statistics::percentile2;
350  double x = percentile2(first, last, p);
351  if (!suite.add(suite.equal(x, correct, 10))) {
352    suite.err() << "Error in percentile2 for " << p << "th percentile \n";
353    suite.err() << "  calculated value: " << x << "\n";
354    suite.err() << "  expected value: " << correct << "\n";
355  }
356}
357
358template<typename RandomAccessIterator1, typename RandomAccessIterator2>
359void cmp_percentiler(test::Suite& suite, 
360                     RandomAccessIterator1 first1, 
361                     RandomAccessIterator1 last1,
362                     RandomAccessIterator2 first2,
363                     RandomAccessIterator2 last2)
364{
365  for (double p=0; p<=100; p+=10) {
366    double correct=statistics::percentile2(first1, last1, p);
367    test_percentiler(suite, first2, last2, p, correct);
368  }
369
370}
Note: See TracBrowser for help on using the repository browser.