source: branches/0.7-stable/test/statistics.cc @ 2466

Last change on this file since 2466 was 2466, checked in by Peter, 11 years ago

Fixed so percentiler ignores element with zero weight

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 8.7 KB
Line 
1// $Id: statistics.cc 2466 2011-04-12 00:10:59Z peter $
2
3/*
4  Copyright (C) 2004 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2005 Peter Johansson
6  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
7  Copyright (C) 2007, 2008, 2009 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2010 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include "Suite.h"
27
28#include "yat/classifier/Target.h"
29#include "yat/statistics/Average.h"
30#include "yat/statistics/utility.h"
31#include "yat/statistics/tTest.h"
32#include "yat/utility/DataWeight.h"
33#include "yat/utility/MatrixWeighted.h"
34#include "yat/utility/Vector.h"
35
36#include <boost/concept_archetype.hpp>
37
38#include <cmath>
39#include <cstdlib>
40#include <iostream>
41#include <limits>
42#include <map>
43#include <vector>
44
45using namespace theplu::yat;
46void test_mad(test::Suite&);
47
48void test_median_empty(test::Suite&);
49void test_percentiler(test::Suite&);
50void test_percentiler_nan(test::Suite&);
51
52template<typename RandomAccessIterator>
53void test_percentiler(test::Suite&, RandomAccessIterator, 
54                      RandomAccessIterator,
55                      double p, double correct);
56
57template<typename RandomAccessIterator1, typename RandomAccessIterator2>
58void cmp_percentiler(test::Suite&, 
59                     RandomAccessIterator1, 
60                     RandomAccessIterator1,
61                     RandomAccessIterator2,
62                     RandomAccessIterator2);
63
64int main(int argc, char* argv[])
65{ 
66  test::Suite suite(argc, argv);
67
68  utility::Vector gsl_vec(10);
69  std::vector<double> data;
70  for (unsigned int i=0; i<10; i++){
71    data.push_back(static_cast<double>(i));
72    gsl_vec(i)=i;
73  }
74
75  double m=statistics::median(data.begin(), data.end());
76  double m_gsl=statistics::median(gsl_vec.begin(), gsl_vec.end());
77  if (m!=4.5 || m!=m_gsl)
78    suite.add(false);
79  if (false) {
80    using statistics::median;
81    double x = median(boost::random_access_iterator_archetype<double>(),
82                      boost::random_access_iterator_archetype<double>());
83    x = median(boost::random_access_iterator_archetype<utility::DataWeight>(),
84               boost::random_access_iterator_archetype<utility::DataWeight>());
85  }
86  statistics::percentile2(data.begin(), data.end(), 100);
87  data.resize(1);
88  statistics::median(data.begin(), data.end());
89  // testing percentile2
90  test_percentiler(suite);
91
92  // test weighted percentiler with NaNs
93  test_percentiler_nan(suite);
94
95  double skewness_gsl=statistics::skewness(gsl_vec);
96  if (!suite.equal(1-skewness_gsl, 1.0) )
97    suite.add(false);
98  double kurtosis_gsl=statistics::kurtosis(gsl_vec);
99  suite.add(suite.equal_fix(kurtosis_gsl,-1.5616363636363637113,1e-10));
100  statistics::Average func;
101  suite.add(suite.equal(func(gsl_vec.begin(), gsl_vec.end()),4.5));
102  // easiest way to get a weighted iterator
103  classifier::MatrixLookupWeighted mlw(10,20,2.0, 1.0);
104  suite.add(suite.equal(func(mlw.begin(), mlw.end()),2.0));
105  // do not run compiler test
106  if (false) {
107    statistics::Average average;
108    double x = average(boost::input_iterator_archetype<double>(),
109                       boost::input_iterator_archetype<double>());
110    x = average(boost::input_iterator_archetype_no_proxy<utility::DataWeight>(),
111                boost::input_iterator_archetype_no_proxy<utility::DataWeight>());
112           
113  }
114 
115  test_mad(suite);
116
117  // do not run compiler test
118  if (false) {
119    statistics::tTest t_test;
120    classifier::Target target;
121    add(t_test, boost::forward_iterator_archetype<double>(), 
122        boost::forward_iterator_archetype<double>(), target);
123    add(t_test, boost::forward_iterator_archetype<utility::DataWeight>(), 
124        boost::forward_iterator_archetype<utility::DataWeight>(), target);
125           
126  }
127  test_median_empty(suite);
128  return suite.return_value();
129}
130
131
132void test_mad(test::Suite& suite)
133{
134  suite.err() << "testing mad" << std::endl;
135  utility::Vector x(3);
136  x(0) = 3;
137  x(1) = 1;
138  x(2) = 100;
139  suite.add(suite.equal(statistics::mad(x.begin(), x.end()), 2));
140
141  std::vector<utility::DataWeight> wx(3);
142  wx[0] = utility::DataWeight(3, 0.4);
143  wx[1] = utility::DataWeight(1, 0.4);
144  wx[2] = utility::DataWeight(100, 0.6);
145  suite.add(suite.equal(statistics::mad(wx.begin(), wx.end()), 2));
146  // do not run compiler test
147  if (false) {
148    using statistics::mad;
149    double x = mad(boost::random_access_iterator_archetype<double>(),
150                   boost::random_access_iterator_archetype<double>());
151    x = mad(boost::random_access_iterator_archetype<utility::DataWeight>(),
152            boost::random_access_iterator_archetype<utility::DataWeight>());
153  }
154}
155
156
157// test for ticket #660
158void test_median_empty(test::Suite& suite)
159{
160  std::vector<double> x;
161  double m = 0;
162  m = statistics::median(x.begin(), x.end(), true);
163}
164
165
166void test_percentiler(test::Suite& suite)
167{
168  suite.err() << "testing unweighted percentile2" << std::endl;
169  std::vector<double> x;
170  x.reserve(6);
171  for (unsigned int i=0; i<5; i++){
172    x.push_back(static_cast<double>(i+1));
173  }
174  test_percentiler(suite, x.begin(), x.end(), 50, 3);
175  x.push_back(6);
176  test_percentiler(suite, x.begin(), x.end(), 50, 3.5);
177  test_percentiler(suite, x.begin(), x.end(), 25, 2);
178  test_percentiler(suite, x.begin(), x.end(), 0, 1);
179  test_percentiler(suite, x.begin(), x.end(), 10, 1);
180
181  suite.err() << "testing duplication of data\n";
182  std::vector<double> x2(x);
183  for (size_t i=0; i<x.size(); ++i)
184    x2.push_back(x[i]);
185  cmp_percentiler(suite, x.begin(), x.end(), x2.begin(), x2.end());
186
187
188  // testing weighted
189
190  suite.err() << "testing weighted percentile2" << std::endl;
191  std::vector<utility::DataWeight> xw(x.size());
192  for (size_t i=0; i<xw.size(); ++i) {
193    xw[i].data() = x[i];
194    xw[i].weight() = 1.0;
195  }
196  const std::vector<utility::DataWeight> xw_orig(xw);
197  suite.err() << "testing weighted" << std::endl;
198  test_percentiler(suite, xw.begin(), xw.end(), 0, 1);
199  test_percentiler(suite, xw.begin(), xw.end(), 100, 6);
200  test_percentiler(suite, xw.begin(), xw.end(), 49, 3);
201  test_percentiler(suite, xw.begin(), xw.end(), 51, 4);
202  test_percentiler(suite, xw.begin(), xw.end(), 50, 3.5);
203  test_percentiler(suite, x.begin(), x.end(), 10, 1);
204
205  suite.err() << "testing weighted with unity weights" << std::endl;
206  cmp_percentiler(suite, x.begin(), x.end(), xw.begin(), xw.end());
207
208  suite.err() << "testing that w=0 equals removed data point\n";
209  xw=xw_orig;
210  std::vector<utility::DataWeight> xw2(xw_orig);
211  xw[3].weight() = 0.0;
212  xw2.erase(xw2.begin()+3);
213  cmp_percentiler(suite, xw.begin(), xw.end(), xw2.begin(), xw2.end());
214
215  suite.err() << "testing rescaling of weights\n";
216  xw2 = xw;
217  for (size_t i=0; i<xw2.size(); ++i)
218    xw2[i].weight()*=2;
219  cmp_percentiler(suite, xw.begin(), xw.end(), xw2.begin(), xw2.end());
220
221  // do not run compiler test
222  if (false) {
223    statistics::Percentiler percentiler(50);
224    using boost::random_access_iterator_archetype;
225    typedef random_access_iterator_archetype<double> Iterator;
226    double x = percentiler(Iterator(), Iterator()); 
227    using utility::DataWeight;
228    typedef random_access_iterator_archetype<DataWeight> WeigtedItererator;
229    x = percentiler(WeigtedItererator(), WeigtedItererator());
230  }
231}
232
233void test_percentiler_nan(test::Suite& suite)
234{
235  using utility::DataWeight;
236  std::vector<double> v;
237  v.push_back(1);
238  v.push_back(10);
239  v.push_back(4);
240  v.push_back(2);
241  std::vector<DataWeight> wv(5);
242  wv[0] = DataWeight(v[0]);
243  wv[1] = DataWeight(v[1]);
244  wv[2] = DataWeight(std::numeric_limits<double>::quiet_NaN(), 0.0);
245  wv[3] = DataWeight(v[2]);
246  wv[4] = DataWeight(v[3]);
247
248  cmp_percentiler(suite, v.begin(), v.end(), wv.begin(), wv.end());
249}
250
251template<typename RandomAccessIterator>
252void test_percentiler(test::Suite& suite, 
253                      RandomAccessIterator first, 
254                      RandomAccessIterator last,
255                      double p, double correct)
256{
257  using statistics::percentile2;
258  double x = percentile2(first, last, p);
259  if (!suite.add(suite.equal(x, correct, 10))) {
260    suite.err() << "Error in percentile2 for " << p << "th percentile \n";
261    suite.err() << "  calculated value: " << x << "\n";
262    suite.err() << "  expected value: " << correct << "\n";
263  }
264}
265
266template<typename RandomAccessIterator1, typename RandomAccessIterator2>
267void cmp_percentiler(test::Suite& suite, 
268                     RandomAccessIterator1 first1, 
269                     RandomAccessIterator1 last1,
270                     RandomAccessIterator2 first2,
271                     RandomAccessIterator2 last2)
272{
273  for (double p=0; p<=100; p+=10) {
274    double correct=statistics::percentile2(first1, last1, p);
275    test_percentiler(suite, first2, last2, p, correct);
276  }
277
278}
Note: See TracBrowser for help on using the repository browser.