source: trunk/test/statistics.cc @ 3135

Last change on this file since 3135 was 3135, checked in by Peter, 8 years ago

add function that calculates entropy from a range

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 10.4 KB
Line 
1// $Id: statistics.cc 3135 2013-11-27 07:58:49Z peter $
2
3/*
4  Copyright (C) 2004 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2005 Peter Johansson
6  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
7  Copyright (C) 2007, 2008, 2009 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2010, 2011, 2012, 2013 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include <config.h>
27
28#include "Suite.h"
29
30#include "yat/classifier/Target.h"
31#include "yat/statistics/Average.h"
32#include "yat/statistics/utility.h"
33#include "yat/statistics/tTest.h"
34#include "yat/utility/DataWeight.h"
35#include "yat/utility/MatrixWeighted.h"
36#include "yat/utility/Vector.h"
37
38#include <boost/concept_archetype.hpp>
39
40#include <cmath>
41#include <cstdlib>
42#include <iostream>
43#include <limits>
44#include <map>
45#include <vector>
46
47using namespace theplu::yat;
48void test_benjamini_hochberg(test::Suite&);
49void test_entropy(test::Suite&);
50void test_mad(test::Suite&);
51
52void test_median_empty(test::Suite&);
53void test_percentiler(test::Suite&);
54void test_percentiler_nan(test::Suite&);
55
56template<typename RandomAccessIterator>
57void test_percentiler(test::Suite&, RandomAccessIterator, 
58                      RandomAccessIterator,
59                      double p, double correct);
60
61template<typename RandomAccessIterator1, typename RandomAccessIterator2>
62void cmp_percentiler(test::Suite&, 
63                     RandomAccessIterator1, 
64                     RandomAccessIterator1,
65                     RandomAccessIterator2,
66                     RandomAccessIterator2);
67
68int main(int argc, char* argv[])
69{ 
70  test::Suite suite(argc, argv);
71
72  utility::Vector gsl_vec(10);
73  std::vector<double> data;
74  for (unsigned int i=0; i<10; i++){
75    data.push_back(static_cast<double>(i));
76    gsl_vec(i)=i;
77  }
78
79  double m=statistics::median(data.begin(), data.end());
80  double m_gsl=statistics::median(gsl_vec.begin(), gsl_vec.end());
81  if (m!=4.5 || m!=m_gsl)
82    suite.add(false);
83  if (false) {
84    using statistics::median;
85    double x = median(boost::random_access_iterator_archetype<double>(),
86                      boost::random_access_iterator_archetype<double>());
87    test::dummie_function(x);
88    x = median(boost::random_access_iterator_archetype<utility::DataWeight>(),
89               boost::random_access_iterator_archetype<utility::DataWeight>());
90    test::dummie_function(x);
91  }
92  statistics::percentile2(data.begin(), data.end(), 100);
93  data.resize(1);
94  statistics::median(data.begin(), data.end());
95  // testing percentile2
96  test_percentiler(suite);
97
98  // test weighted percentiler with NaNs
99  test_percentiler_nan(suite);
100
101  double skewness_gsl=statistics::skewness(gsl_vec);
102  if (!suite.equal(1-skewness_gsl, 1.0) )
103    suite.add(false);
104  double kurtosis_gsl=statistics::kurtosis(gsl_vec);
105  suite.add(suite.equal_fix(kurtosis_gsl,-1.5616363636363637113,1e-10));
106  statistics::Average func;
107  suite.add(suite.equal(func(gsl_vec.begin(), gsl_vec.end()),4.5));
108  // easiest way to get a weighted iterator
109  classifier::MatrixLookupWeighted mlw(10,20,2.0, 1.0);
110  suite.add(suite.equal(func(mlw.begin(), mlw.end()),2.0));
111  // do not run compiler test
112  if (false) {
113    statistics::Average average;
114    double x = average(boost::input_iterator_archetype<double>(),
115                       boost::input_iterator_archetype<double>());
116    test::dummie_function(x);
117    using utility::DataWeight;
118    x = average(boost::input_iterator_archetype_no_proxy<DataWeight>(),
119                boost::input_iterator_archetype_no_proxy<DataWeight>());
120    test::dummie_function(x);
121  }
122
123  test_mad(suite);
124
125  // do not run compiler test
126  if (false) {
127    statistics::tTest t_test;
128    classifier::Target target;
129    add(t_test, boost::forward_iterator_archetype<double>(),
130        boost::forward_iterator_archetype<double>(), target);
131    add(t_test, boost::forward_iterator_archetype<utility::DataWeight>(),
132        boost::forward_iterator_archetype<utility::DataWeight>(), target);
133  }
134  test_benjamini_hochberg(suite);
135  test_entropy(suite);
136  test_median_empty(suite);
137  return suite.return_value();
138}
139
140
141void test_benjamini_hochberg(test::Suite& suite)
142{
143  std::vector<double> p;
144  p.push_back(0.0001);
145  p.push_back(0.01);
146  p.push_back(0.015);
147  p.push_back(0.5);
148  p.push_back(0.99);
149  std::vector<double> q(p.size());
150  statistics::benjamini_hochberg(p.begin(), p.end(), q.begin());
151  suite.add(suite.equal(q[0], p[0]*5));
152  suite.add(suite.equal(q[1], p[1]*2.5));
153  suite.add(suite.equal(q[2], 0.025));
154  suite.add(suite.equal(q[3], p[3]*1.25));
155  suite.add(suite.equal(q[4], 0.99));
156
157  // do nut run compiler test
158  if (false) {
159    using statistics::benjamini_hochberg;
160    benjamini_hochberg(boost::bidirectional_iterator_archetype<double>(),
161                       boost::bidirectional_iterator_archetype<double>(),
162                       boost::mutable_bidirectional_iterator_archetype<double>());
163  }
164}
165
166
167void test_entropy(test::Suite& suite)
168{
169  suite.out() << "testing entropy(2)\n";
170  using statistics::entropy;
171  std::vector<int> x(10000,0);
172  x[512] = 42;
173  double e = entropy(x.begin(), x.end());
174  if (e>1e-15) {
175    suite.add(false);
176    suite.out() << "entropy: " << e << " expected close to 0\n";
177  }
178  x[0] = 42;
179  e = entropy(x.begin(), x.end());
180  if (e<=0) {
181    suite.add(false);
182    suite.out() << "entropy: " << e << " expected > 0\n";
183  }
184
185  // do not run compiler test
186  if (false) {
187    entropy(boost::input_iterator_archetype<double>(),
188            boost::input_iterator_archetype<double>());
189  }
190}
191
192
193void test_mad(test::Suite& suite)
194{
195  suite.err() << "testing mad" << std::endl;
196  utility::Vector x(3);
197  x(0) = 3;
198  x(1) = 1;
199  x(2) = 100;
200  suite.add(suite.equal(statistics::mad(x.begin(), x.end()), 2));
201
202  std::vector<utility::DataWeight> wx(3);
203  wx[0] = utility::DataWeight(3, 0.4);
204  wx[1] = utility::DataWeight(1, 0.4);
205  wx[2] = utility::DataWeight(100, 0.6);
206  suite.add(suite.equal(statistics::mad(wx.begin(), wx.end()), 2));
207  // do not run compiler test
208  if (false) {
209    using statistics::mad;
210    double x = mad(boost::random_access_iterator_archetype<double>(),
211                   boost::random_access_iterator_archetype<double>());
212    test::dummie_function(x);
213    x = mad(boost::random_access_iterator_archetype<utility::DataWeight>(),
214            boost::random_access_iterator_archetype<utility::DataWeight>());
215    test::dummie_function(x);
216  }
217}
218
219
220// test for ticket #660
221void test_median_empty(test::Suite& suite)
222{
223  std::vector<double> x;
224  double m = 0;
225  m = statistics::median(x.begin(), x.end(), true);
226  test::dummie_function(m);
227}
228
229
230void test_percentiler(test::Suite& suite)
231{
232  suite.err() << "testing unweighted percentile2" << std::endl;
233  std::vector<double> x;
234  x.reserve(6);
235  for (unsigned int i=0; i<5; i++){
236    x.push_back(static_cast<double>(i+1));
237  }
238  test_percentiler(suite, x.begin(), x.end(), 50, 3);
239  x.push_back(6);
240  test_percentiler(suite, x.begin(), x.end(), 50, 3.5);
241  test_percentiler(suite, x.begin(), x.end(), 25, 2);
242  test_percentiler(suite, x.begin(), x.end(), 0, 1);
243  test_percentiler(suite, x.begin(), x.end(), 10, 1);
244
245  suite.err() << "testing duplication of data\n";
246  std::vector<double> x2(x);
247  for (size_t i=0; i<x.size(); ++i)
248    x2.push_back(x[i]);
249  cmp_percentiler(suite, x.begin(), x.end(), x2.begin(), x2.end());
250
251
252  // testing weighted
253
254  suite.err() << "testing weighted percentile2" << std::endl;
255  std::vector<utility::DataWeight> xw(x.size());
256  for (size_t i=0; i<xw.size(); ++i) {
257    xw[i].data() = x[i];
258    xw[i].weight() = 1.0;
259  }
260  const std::vector<utility::DataWeight> xw_orig(xw);
261  suite.err() << "testing weighted" << std::endl;
262  test_percentiler(suite, xw.begin(), xw.end(), 0, 1);
263  test_percentiler(suite, xw.begin(), xw.end(), 100, 6);
264  test_percentiler(suite, xw.begin(), xw.end(), 49, 3);
265  test_percentiler(suite, xw.begin(), xw.end(), 51, 4);
266  test_percentiler(suite, xw.begin(), xw.end(), 50, 3.5);
267  test_percentiler(suite, x.begin(), x.end(), 10, 1);
268
269  suite.err() << "testing weighted with unity weights" << std::endl;
270  cmp_percentiler(suite, x.begin(), x.end(), xw.begin(), xw.end());
271
272  suite.err() << "testing that w=0 equals removed data point\n";
273  xw=xw_orig;
274  std::vector<utility::DataWeight> xw2(xw_orig);
275  xw[3].weight() = 0.0;
276  xw2.erase(xw2.begin()+3);
277  cmp_percentiler(suite, xw.begin(), xw.end(), xw2.begin(), xw2.end());
278
279  suite.err() << "testing rescaling of weights\n";
280  xw2 = xw;
281  for (size_t i=0; i<xw2.size(); ++i)
282    xw2[i].weight()*=2;
283  cmp_percentiler(suite, xw.begin(), xw.end(), xw2.begin(), xw2.end());
284
285  // do not run compiler test
286  if (false) {
287    statistics::Percentiler percentiler(50);
288    using boost::random_access_iterator_archetype;
289    typedef random_access_iterator_archetype<double> Iterator;
290    double x = percentiler(Iterator(), Iterator()); 
291    test::dummie_function(x);
292    using utility::DataWeight;
293    typedef random_access_iterator_archetype<DataWeight> WeigtedItererator;
294    x = percentiler(WeigtedItererator(), WeigtedItererator());
295    test::dummie_function(x);
296  }
297}
298
299void test_percentiler_nan(test::Suite& suite)
300{
301  using utility::DataWeight;
302  std::vector<double> v;
303  v.push_back(1);
304  v.push_back(10);
305  v.push_back(4);
306  v.push_back(2);
307  std::vector<DataWeight> wv(5);
308  wv[0] = DataWeight(v[0]);
309  wv[1] = DataWeight(v[1]);
310  wv[2] = DataWeight(std::numeric_limits<double>::quiet_NaN(), 0.0);
311  wv[3] = DataWeight(v[2]);
312  wv[4] = DataWeight(v[3]);
313
314  cmp_percentiler(suite, v.begin(), v.end(), wv.begin(), wv.end());
315}
316
317template<typename RandomAccessIterator>
318void test_percentiler(test::Suite& suite, 
319                      RandomAccessIterator first, 
320                      RandomAccessIterator last,
321                      double p, double correct)
322{
323  using statistics::percentile2;
324  double x = percentile2(first, last, p);
325  if (!suite.add(suite.equal(x, correct, 10))) {
326    suite.err() << "Error in percentile2 for " << p << "th percentile \n";
327    suite.err() << "  calculated value: " << x << "\n";
328    suite.err() << "  expected value: " << correct << "\n";
329  }
330}
331
332template<typename RandomAccessIterator1, typename RandomAccessIterator2>
333void cmp_percentiler(test::Suite& suite, 
334                     RandomAccessIterator1 first1, 
335                     RandomAccessIterator1 last1,
336                     RandomAccessIterator2 first2,
337                     RandomAccessIterator2 last2)
338{
339  for (double p=0; p<=100; p+=10) {
340    double correct=statistics::percentile2(first1, last1, p);
341    test_percentiler(suite, first2, last2, p, correct);
342  }
343
344}
Note: See TracBrowser for help on using the repository browser.