source: branches/0.7-stable/test/statistics.cc @ 2392

Last change on this file since 2392 was 2370, checked in by Peter, 12 years ago

closes #646

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 8.5 KB
Line 
1// $Id: statistics.cc 2370 2010-12-11 04:00:49Z peter $
2
3/*
4  Copyright (C) 2004 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2005 Peter Johansson
6  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
7  Copyright (C) 2007, 2008, 2009 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2010 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include "Suite.h"
27
28#include "yat/classifier/Target.h"
29#include "yat/statistics/Average.h"
30#include "yat/statistics/utility.h"
31#include "yat/statistics/tTest.h"
32#include "yat/utility/DataWeight.h"
33#include "yat/utility/MatrixWeighted.h"
34#include "yat/utility/Vector.h"
35
36#include <boost/concept_archetype.hpp>
37
38#include <cmath>
39#include <cstdlib>
40#include <iostream>
41#include <limits>
42#include <map>
43#include <vector>
44
45using namespace theplu::yat;
46void test_mad(test::Suite&);
47
48void test_percentiler(test::Suite&);
49void test_percentiler_nan(test::Suite&);
50
51template<typename RandomAccessIterator>
52void test_percentiler(test::Suite&, RandomAccessIterator, 
53                      RandomAccessIterator,
54                      double p, double correct);
55
56template<typename RandomAccessIterator1, typename RandomAccessIterator2>
57void cmp_percentiler(test::Suite&, 
58                     RandomAccessIterator1, 
59                     RandomAccessIterator1,
60                     RandomAccessIterator2,
61                     RandomAccessIterator2);
62
63int main(int argc, char* argv[])
64{ 
65  test::Suite suite(argc, argv);
66
67  utility::Vector gsl_vec(10);
68  std::vector<double> data;
69  for (unsigned int i=0; i<10; i++){
70    data.push_back(static_cast<double>(i));
71    gsl_vec(i)=i;
72  }
73
74  double m=statistics::median(data.begin(), data.end());
75  double m_gsl=statistics::median(gsl_vec.begin(), gsl_vec.end());
76  if (m!=4.5 || m!=m_gsl)
77    suite.add(false);
78  if (false) {
79    using statistics::median;
80    double x = median(boost::random_access_iterator_archetype<double>(),
81                      boost::random_access_iterator_archetype<double>());
82    x = median(boost::random_access_iterator_archetype<utility::DataWeight>(),
83               boost::random_access_iterator_archetype<utility::DataWeight>());
84  }
85  statistics::percentile2(data.begin(), data.end(), 100);
86  data.resize(1);
87  statistics::median(data.begin(), data.end());
88  // testing percentile2
89  test_percentiler(suite);
90
91  // test weighted percentiler with NaNs
92  test_percentiler_nan(suite);
93
94  double skewness_gsl=statistics::skewness(gsl_vec);
95  if (!suite.equal(1-skewness_gsl, 1.0) )
96    suite.add(false);
97  double kurtosis_gsl=statistics::kurtosis(gsl_vec);
98  suite.add(suite.equal_fix(kurtosis_gsl,-1.5616363636363637113,1e-10));
99  statistics::Average func;
100  suite.add(suite.equal(func(gsl_vec.begin(), gsl_vec.end()),4.5));
101  // easiest way to get a weighted iterator
102  classifier::MatrixLookupWeighted mlw(10,20,2.0, 1.0);
103  suite.add(suite.equal(func(mlw.begin(), mlw.end()),2.0));
104  // do not run compiler test
105  if (false) {
106    statistics::Average average;
107    double x = average(boost::input_iterator_archetype<double>(),
108                       boost::input_iterator_archetype<double>());
109    x = average(boost::input_iterator_archetype_no_proxy<utility::DataWeight>(),
110                boost::input_iterator_archetype_no_proxy<utility::DataWeight>());
111           
112  }
113 
114  test_mad(suite);
115
116  // do not run compiler test
117  if (false) {
118    statistics::tTest t_test;
119    classifier::Target target;
120    add(t_test, boost::forward_iterator_archetype<double>(), 
121        boost::forward_iterator_archetype<double>(), target);
122    add(t_test, boost::forward_iterator_archetype<utility::DataWeight>(), 
123        boost::forward_iterator_archetype<utility::DataWeight>(), target);
124           
125  }
126  return suite.return_value();
127}
128
129
130void test_mad(test::Suite& suite)
131{
132  suite.err() << "testing mad" << std::endl;
133  utility::Vector x(3);
134  x(0) = 3;
135  x(1) = 1;
136  x(2) = 100;
137  suite.add(suite.equal(statistics::mad(x.begin(), x.end()), 2));
138
139  std::vector<utility::DataWeight> wx(3);
140  wx[0] = utility::DataWeight(3, 0.4);
141  wx[1] = utility::DataWeight(1, 0.4);
142  wx[2] = utility::DataWeight(100, 0.6);
143  suite.add(suite.equal(statistics::mad(wx.begin(), wx.end()), 2));
144  // do not run compiler test
145  if (false) {
146    using statistics::mad;
147    double x = mad(boost::random_access_iterator_archetype<double>(),
148                   boost::random_access_iterator_archetype<double>());
149    x = mad(boost::random_access_iterator_archetype<utility::DataWeight>(),
150            boost::random_access_iterator_archetype<utility::DataWeight>());
151  }
152}
153
154
155void test_percentiler(test::Suite& suite)
156{
157  suite.err() << "testing unweighted percentile2" << std::endl;
158  std::vector<double> x;
159  x.reserve(6);
160  for (unsigned int i=0; i<5; i++){
161    x.push_back(static_cast<double>(i+1));
162  }
163  test_percentiler(suite, x.begin(), x.end(), 50, 3);
164  x.push_back(6);
165  test_percentiler(suite, x.begin(), x.end(), 50, 3.5);
166  test_percentiler(suite, x.begin(), x.end(), 25, 2);
167  test_percentiler(suite, x.begin(), x.end(), 0, 1);
168  test_percentiler(suite, x.begin(), x.end(), 10, 1);
169
170  suite.err() << "testing duplication of data\n";
171  std::vector<double> x2(x);
172  for (size_t i=0; i<x.size(); ++i)
173    x2.push_back(x[i]);
174  cmp_percentiler(suite, x.begin(), x.end(), x2.begin(), x2.end());
175
176
177  // testing weighted
178
179  suite.err() << "testing weighted percentile2" << std::endl;
180  std::vector<utility::DataWeight> xw(x.size());
181  for (size_t i=0; i<xw.size(); ++i) {
182    xw[i].data() = x[i];
183    xw[i].weight() = 1.0;
184  }
185  const std::vector<utility::DataWeight> xw_orig(xw);
186  suite.err() << "testing weighted" << std::endl;
187  test_percentiler(suite, xw.begin(), xw.end(), 0, 1);
188  test_percentiler(suite, xw.begin(), xw.end(), 100, 6);
189  test_percentiler(suite, xw.begin(), xw.end(), 49, 3);
190  test_percentiler(suite, xw.begin(), xw.end(), 51, 4);
191  test_percentiler(suite, xw.begin(), xw.end(), 50, 3.5);
192  test_percentiler(suite, x.begin(), x.end(), 10, 1);
193
194  suite.err() << "testing weighted with unity weights" << std::endl;
195  cmp_percentiler(suite, x.begin(), x.end(), xw.begin(), xw.end());
196
197  suite.err() << "testing that w=0 equals removed data point\n";
198  xw=xw_orig;
199  std::vector<utility::DataWeight> xw2(xw_orig);
200  xw[3].weight() = 0.0;
201  xw2.erase(xw2.begin()+3);
202  cmp_percentiler(suite, xw.begin(), xw.end(), xw2.begin(), xw2.end());
203
204  suite.err() << "testing rescaling of weights\n";
205  xw2 = xw;
206  for (size_t i=0; i<xw2.size(); ++i)
207    xw2[i].weight()*=2;
208  cmp_percentiler(suite, xw.begin(), xw.end(), xw2.begin(), xw2.end());
209
210  // do not run compiler test
211  if (false) {
212    statistics::Percentiler percentiler(50);
213    using boost::random_access_iterator_archetype;
214    typedef random_access_iterator_archetype<double> Iterator;
215    double x = percentiler(Iterator(), Iterator()); 
216    using utility::DataWeight;
217    typedef random_access_iterator_archetype<DataWeight> WeigtedItererator;
218    x = percentiler(WeigtedItererator(), WeigtedItererator());
219  }
220}
221
222void test_percentiler_nan(test::Suite& suite)
223{
224  using utility::DataWeight;
225  std::vector<double> v;
226  v.push_back(1);
227  v.push_back(10);
228  v.push_back(4);
229  v.push_back(2);
230  std::vector<DataWeight> wv(5);
231  wv[0] = DataWeight(v[0]);
232  wv[1] = DataWeight(v[1]);
233  wv[2] = DataWeight(std::numeric_limits<double>::quiet_NaN(), 0.0);
234  wv[3] = DataWeight(v[2]);
235  wv[4] = DataWeight(v[3]);
236
237  cmp_percentiler(suite, v.begin(), v.end(), wv.begin(), wv.end());
238}
239
240template<typename RandomAccessIterator>
241void test_percentiler(test::Suite& suite, 
242                      RandomAccessIterator first, 
243                      RandomAccessIterator last,
244                      double p, double correct)
245{
246  using statistics::percentile2;
247  double x = percentile2(first, last, p);
248  if (!suite.add(suite.equal(x, correct, 10))) {
249    suite.err() << "Error in percentile2 for " << p << "th percentile \n";
250    suite.err() << "  calculated value: " << x << "\n";
251    suite.err() << "  expected value: " << correct << "\n";
252  }
253}
254
255template<typename RandomAccessIterator1, typename RandomAccessIterator2>
256void cmp_percentiler(test::Suite& suite, 
257                     RandomAccessIterator1 first1, 
258                     RandomAccessIterator1 last1,
259                     RandomAccessIterator2 first2,
260                     RandomAccessIterator2 last2)
261{
262  for (double p=0; p<100; p+=10) {
263    double correct=statistics::percentile2(first1, last1, p);
264    test_percentiler(suite, first2, last2, p, correct);
265  }
266
267}
Note: See TracBrowser for help on using the repository browser.