source: trunk/test/normalization.cc @ 3543

Last change on this file since 3543 was 3543, checked in by Peter, 5 years ago

refs #803. use boost iterator categories in normalizer::Spearman

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 17.6 KB
Line 
1// $Id: normalization.cc 3543 2016-12-23 07:04:15Z peter $
2
3/*
4  Copyright (C) 2008, 2009 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2010, 2012, 2014 Peter Johansson
6
7  This file is part of the yat library, http://dev.thep.lu.se/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 3 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with yat. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include <config.h>
24
25#include "Suite.h"
26
27#include "yat/normalizer/Centralizer.h"
28#include "yat/normalizer/ColumnNormalizer.h"
29#include "yat/normalizer/Gauss.h"
30#include "yat/normalizer/qQuantileNormalizer.h"
31#include "yat/normalizer/QuantileNormalizer.h"
32#include "yat/normalizer/RowNormalizer.h"
33#include "yat/normalizer/Spearman.h"
34#include "yat/normalizer/Zscore.h"
35
36#include "yat/utility/DataIterator.h"
37#include "yat/utility/FileUtil.h"
38#include "yat/utility/Matrix.h"
39#include "yat/utility/MatrixWeighted.h"
40#include "yat/utility/WeightIterator.h"
41#include "yat/utility/WeightedIteratorArchetype.h"
42
43#include <boost/concept_archetype.hpp>
44#include <boost/iterator/iterator_archetypes.hpp>
45
46#include <climits>
47#include <fstream>
48#include <iterator>
49#include <limits>
50#include <vector>
51
52using namespace theplu::yat;
53void test_centralizer(test::Suite&);
54void test_column_normalize(test::Suite&);
55void test_gauss_normalize(test::Suite&);
56void test_qquantile_normalize(test::Suite&);
57void test_qquantile_normalize_weighted(test::Suite&);
58void test_quantile_normalize(test::Suite&);
59void test_row_normalize(test::Suite&);
60void test_spearman(test::Suite&);
61void test_spearman_weighted(test::Suite&);
62void test_z_score(test::Suite&);
63
64int main(int argc, char* argv[])
65{
66  test::Suite suite(argc, argv);
67  suite.err() << "testing normalizations ... " << std::endl;
68
69  test_centralizer(suite);
70  test_column_normalize(suite);
71  test_qquantile_normalize(suite);
72  test_qquantile_normalize_weighted(suite);
73  test_quantile_normalize(suite);
74  test_gauss_normalize(suite);
75  test_row_normalize(suite);
76  test_spearman(suite);
77  test_z_score(suite);
78
79  return suite.return_value();
80}
81
82
83void test_centralizer(test::Suite& suite)
84{
85  suite.err() << "Testing Centralizer\n";
86  std::vector<double> vec;
87  vec.push_back(1);
88  vec.push_back(2);
89  vec.push_back(3);
90  normalizer::Centralizer<> c;
91  c(vec.begin(), vec.end(), vec.begin());
92  for (size_t i=0; i<vec.size(); ++i)
93    suite.add(suite.equal(vec[i], static_cast<double>(i)-1.0));
94
95  std::vector<utility::DataWeight> vec2;
96  vec2.push_back(utility::DataWeight(1,1));
97  vec2.push_back(utility::DataWeight(2,0.5));
98  vec2.push_back(utility::DataWeight(2,0.5));
99  std::vector<utility::DataWeight> vec3(vec2.size());
100  c(vec2.begin(), vec2.end(), vec3.begin());
101  for (size_t i=0; i<vec2.size(); ++i)
102    suite.add(suite.equal(vec3[i].weight(), vec2[i].weight()));
103  suite.add(suite.equal(vec3[0].data(), -0.5));
104  suite.add(suite.equal(vec3[1].data(), 0.5));
105  suite.add(suite.equal(vec3[2].data(), 0.5));
106
107  // compile test should not be run
108  if (false) {
109    typedef test::DataIterator<boost::forward_traversal_tag> trait;
110    trait::unweighted_const_iterator unweighted_const_iterator;
111    trait::unweighted_iterator unweighted_mutable_iterator;
112    trait::weighted_const_iterator weighted_const_iterator;
113    trait::weighted_iterator weighted_mutable_iterator;
114
115    c(unweighted_const_iterator, unweighted_const_iterator,
116      unweighted_mutable_iterator);
117    c(unweighted_const_iterator, unweighted_const_iterator,
118      weighted_mutable_iterator);
119    c(weighted_const_iterator, weighted_const_iterator,
120      unweighted_mutable_iterator);
121    c(weighted_const_iterator, weighted_const_iterator,
122      weighted_mutable_iterator);
123  }
124}
125
126
127void test_column_normalize(test::Suite& suite)
128{
129  using namespace normalizer;
130  suite.err() << "Testing ColumnNormalizer\n";
131
132  utility::Matrix m(2,2);
133  m(0,0) = 0;
134  m(0,1) = 10;
135  m(1,0) = 2;
136  m(1,1) = 4;
137  ColumnNormalizer<Centralizer<> > qn;
138  qn(m, m);
139  suite.err() << "Testing m(0,0)\n";
140  suite.add(suite.equal(m(0,0), -1));
141  suite.err() << "Testing m(0,1)\n";
142  suite.add(suite.equal(m(0,1), 3));
143  suite.err() << "Testing m(1,0)\n";
144  suite.add(suite.equal(m(1,0), 1));
145  suite.err() << "Testing m(1,1)\n";
146  suite.add(suite.equal(m(1,1), -3));
147
148  if (false) { // do not run compile tests
149    test::container2d_archetype<double> container2d;
150    test::mutable_container2d_archetype<double> mutable_container2d;
151    qn(container2d, mutable_container2d);
152  }
153}
154
155
156void test_qquantile_normalize(test::Suite& suite)
157{
158  using namespace normalizer;
159
160  suite.err() << "Testing qQuantileNormalizer\n";
161  std::string data(test::filename("data/normalization_test.data"));
162  if (utility::FileUtil(data.c_str()).permissions("r")) {
163    suite.add(false);
164    suite.err() << "Cannot access file " << data << '\n';
165    return;
166  }
167  std::ifstream data_stream(data.c_str());
168
169  utility::Matrix m(data_stream);
170
171  suite.err() << "testing number of parts (Q) boundary conditions\n";
172  qQuantileNormalizer(m.begin_column(0), m.end_column(0), m.rows());
173  qQuantileNormalizer(m.begin_column(0), m.end_column(0), 3);
174
175  // first column as target
176  qQuantileNormalizer qqn(m.begin_column(0), m.end_column(0) ,9);
177  ColumnNormalizer<qQuantileNormalizer> cn(qqn);
178  utility::Matrix result(m.rows(),m.columns());
179  cn(m, result);
180
181  suite.err() << "test that result can be stored in the source matrix...";
182  cn(m,m);
183  if (suite.add(result==m))
184    suite.err() << " ok.\n";
185  else
186    suite.err() << " failed.\n";
187
188  // Enough iteration will make all columns to have the same values as
189  // the target.
190  suite.err() << "Testing that q=matrix rows gives QuantileNormalization\n";
191  utility::Matrix m2(4,2);
192  m2(0,0) = 0; m2(0,1) = 10;
193  m2(1,0) = 2; m2(1,1) = 4;
194  m2(2,0) = 1; m2(2,1) = 0;
195  m2(3,0) = 3; m2(3,1) = 7;
196  qQuantileNormalizer qqn2(m2.begin_column(0), m2.end_column(0), m2.rows());
197  ColumnNormalizer<qQuantileNormalizer> cn2(qqn2);
198  utility::Matrix result2(m2.rows(),m2.columns());
199  cn2(m2,result2);
200  suite.add( suite.equal_fix(m2(0,0),result2(2,1),1.0e-12) &&
201             suite.equal_fix(m2(1,0),result2(3,1),1.0e-12) &&
202             suite.equal_fix(m2(2,0),result2(1,1),1.0e-12) &&
203             suite.equal_fix(m2(3,0),result2(0,1),1.0e-12) );
204  // compile test should not be run
205  if (false) {
206    qQuantileNormalizer qqn3(boost::forward_iterator_archetype<double>(),
207                             boost::forward_iterator_archetype<double>(),
208                             100);
209    qqn3(boost::random_access_iterator_archetype<double>(),
210         boost::random_access_iterator_archetype<double>(),
211         boost::mutable_random_access_iterator_archetype<double>());
212    using utility::DataWeight;
213    qQuantileNormalizer qqn4(boost::forward_iterator_archetype<DataWeight>(),
214                             boost::forward_iterator_archetype<DataWeight>(),
215                             100);
216    qqn4(boost::random_access_iterator_archetype<DataWeight>(),
217         boost::random_access_iterator_archetype<DataWeight>(),
218         boost::mutable_random_access_iterator_archetype<DataWeight>());
219    qqn4(boost::random_access_iterator_archetype<DataWeight>(),
220         boost::random_access_iterator_archetype<DataWeight>(),
221         boost::mutable_random_access_iterator_archetype<double>());
222  }
223}
224
225
226void test_qquantile_normalize_weighted(test::Suite& suite)
227{
228  using namespace normalizer;
229
230  suite.err() << "Testing qQuantileNormalizer weighted\n";
231
232  // test with unweighted target and source
233  std::vector<double> target;
234  target.reserve(1000);
235  while (target.size()<1000)
236    target.push_back(target.size());
237  qQuantileNormalizer qQN(target.begin(), target.end(), 4);
238  std::vector<double> source;
239  while (source.size()<10)
240    source.push_back(source.size()*10);
241  std::vector<double> result(source.size());
242
243  qQN(source.begin(), source.end(), result.begin());
244
245  using utility::DataWeight;
246  suite.err() << "Testing with unweighted target and weighted source\n";
247  std::vector<utility::DataWeight> src_w(source.size(), DataWeight(0, 1));
248  std::copy(source.begin(), source.end(),
249            utility::data_iterator(src_w.begin()));
250
251  std::vector<utility::DataWeight> result_w(src_w.size());
252  qQN(src_w.begin(), src_w.end(), result_w.begin());
253  suite.add(suite.equal_range(result.begin(), result.end(),
254                              utility::data_iterator(result_w.begin())));
255
256  suite.err() << "Testing with missing value in source\n";
257  // adding a missing value
258  std::vector<utility::DataWeight>::iterator MWi=src_w.begin();
259  MWi+=5;
260  src_w.insert(MWi, DataWeight(std::numeric_limits<double>::quiet_NaN(), 0.0));
261  std::vector<utility::DataWeight> result_w2(src_w.size());
262  qQN(src_w.begin(), src_w.end(), result_w2.begin());
263  // excluding missing value from comparison in suite.equal_range
264  MWi=result_w2.begin();
265  MWi+=5;
266  result_w2.erase(MWi);
267  suite.add(suite.equal_range(utility::data_iterator(result_w.begin()),
268                              utility::data_iterator(result_w.end()),
269                              utility::data_iterator(result_w2.begin())));
270
271  suite.err() << "testing with weighted target" << std::endl;
272  std::vector<utility::DataWeight> target_w(target.size()+1, DataWeight(0, 1));
273  target_w[0] = DataWeight(5.3, 0);
274  std::copy(target.begin(), target.end(),
275            utility::data_iterator(target_w.begin()+1));
276  qQuantileNormalizer qQNw(target_w.begin(), target_w.end(), 4);
277  std::vector<utility::DataWeight> result_w3(src_w.size());
278  qQNw(src_w.begin(), src_w.end(), result_w3.begin());
279  // excluding missing value from comparison in suite.equal_range
280  MWi=result_w3.begin();
281  MWi+=5;
282  result_w3.erase(MWi);
283  suite.add(suite.equal_range(utility::data_iterator(result_w3.begin()),
284                              utility::data_iterator(result_w3.end()),
285                              utility::data_iterator(result_w2.begin())));
286
287}
288
289
290void test_quantile_normalize(test::Suite& suite)
291{
292  suite.err() << "Testing QuantileNormalizer\n";
293  utility::Matrix m(2,2);
294  m(0,0) = 0;
295  m(0,1) = 10;
296  m(1,0) = 2;
297  m(1,1) = 4;
298  normalizer::QuantileNormalizer qn;
299  qn(m, m);
300  suite.err() << "Testing m(0,0)\n";
301  suite.add(suite.equal(m(0,0), 2));
302  suite.err() << "Testing m(0,1)\n";
303  suite.add(suite.equal(m(0,1), 6));
304  suite.err() << "Testing m(1,0)\n";
305  suite.add(suite.equal(m(1,0), 6));
306  suite.err() << "Testing m(1,1)\n";
307  suite.add(suite.equal(m(1,1), 2));
308
309  // testing with landscape-shaped data matrix
310  m.resize(2, 10);
311  qn(m, m);
312}
313
314void test_row_normalize(test::Suite& suite)
315{
316  using namespace normalizer;
317  suite.err() << "Testing RowNormalizer\n";
318
319  utility::Matrix m(2,3);
320  m(0,0) = 0;
321  m(0,1) = 10;
322  m(1,0) = 2;
323  m(1,1) = 4;
324  utility::Matrix m2(m);
325  m2.transpose();
326  ColumnNormalizer<Centralizer<> > cn;
327  RowNormalizer<Centralizer<> > rn;
328  cn(m, m);
329  rn(m2, m2);
330  m2.transpose();
331  suite.equal_range(m.begin(), m.end(), m2.begin());
332  if (false) { // do not run compile tests
333    test::container2d_archetype<double> container2d;
334    test::mutable_container2d_archetype<double> mutable_container2d;
335    rn(container2d, mutable_container2d);
336  }
337}
338
339void test_spearman(test::Suite& suite)
340{
341  suite.err() << "Testing Spearman\n";
342  normalizer::Spearman spearman;
343  std::vector<double> vec;
344  vec.push_back(0);
345  vec.push_back(2);
346  vec.push_back(3);
347  vec.push_back(1);
348  spearman(vec.begin(), vec.end(), vec.begin());
349  std::vector<double> correct;
350  correct.push_back(1.0/8);
351  correct.push_back(5.0/8);
352  correct.push_back(7.0/8);
353  correct.push_back(3.0/8);
354  suite.add(suite.equal_range(vec.begin(), vec.end(), correct.begin()));
355  suite.err() << "Testing Spearman with ties\n";
356  vec[1]=vec[2];
357  correct[1] = correct[2] = (correct[1]+correct[2])/2;
358  spearman(vec.begin(), vec.end(), vec.begin());
359  suite.add(suite.equal_range(vec.begin(), vec.end(), correct.begin()));
360  test_spearman_weighted(suite);
361
362}
363
364
365void test_gauss_normalize(test::Suite& suite)
366{
367  suite.err() << "Testing Gauss\n";
368  normalizer::Gauss gauss;
369  std::vector<double> vec;
370  vec.push_back(1);
371  gauss(vec.begin(), vec.end(), vec.begin());
372  suite.add(suite.equal(vec.front(), 0));
373  vec.push_back(1);
374  gauss(vec.begin(), vec.end(), vec.begin());
375  suite.add(suite.equal(vec.front(), -vec.back()));
376  // compile test should not be run
377  if (false) {
378    using boost::iterator_archetype;
379    iterator_archetype<double,
380                       boost::iterator_archetypes::readable_iterator_t,
381                       boost::random_access_traversal_tag>
382      readable_iterator;
383
384    iterator_archetype<double,
385                       boost::iterator_archetypes::readable_writable_iterator_t,
386                       boost::random_access_traversal_tag>
387      writable_iterator;
388
389    // We have to use lvalue here because otherwise proxy classes
390    // provided by boost kick in and they do not provide the needed
391    // data() and weight() functions that e.g. DataWeightProxy does.
392    iterator_archetype<utility::DataWeight,
393                       boost::iterator_archetypes::readable_lvalue_iterator_t,
394                       boost::random_access_traversal_tag>
395      weighted_readable_iterator;
396
397    iterator_archetype<utility::DataWeight,
398                       boost::iterator_archetypes::writable_lvalue_iterator_t,
399                       boost::random_access_traversal_tag>
400      weighted_writable_iterator;
401
402    gauss(readable_iterator, readable_iterator, writable_iterator);
403    gauss(readable_iterator, readable_iterator, weighted_writable_iterator);
404    gauss(weighted_readable_iterator, weighted_readable_iterator,
405          writable_iterator);
406    gauss(weighted_readable_iterator, weighted_readable_iterator,
407          weighted_writable_iterator);
408  }
409}
410
411void test_spearman_weighted(test::Suite& suite)
412{
413  suite.err() << "Testing Weighted Spearman\n";
414  normalizer::Spearman spearman;
415
416  suite.err() << "Testing that unity weights reproduces unweighted case\n";
417  utility::MatrixWeighted m(1,4,0,1);
418  utility::MatrixWeighted res(m.rows(), m.columns(),3.14,0);
419  m(0,0).data()=0;
420  m(0,1).data()=2;
421  m(0,2).data()=3;
422  m(0,3).data()=1;
423  std::vector<double> correct(m.columns());
424  std::vector<double> correct_w(m.columns(), 1.0);
425  std::copy(utility::data_iterator(m.begin_row(0)),
426            utility::data_iterator(m.end_row(0)),
427            correct.begin());
428  spearman(correct.begin(), correct.end(), correct.begin());
429  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
430
431  using utility::data_iterator;
432  suite.add(suite.equal_range(data_iterator(res.begin_row(0)),
433                               data_iterator(res.end_row(0)),
434                               correct.begin()));
435  using utility::weight_iterator;
436  suite.add(suite.equal_range(weight_iterator(res.begin_row(0)),
437                               weight_iterator(res.end_row(0)),
438                               correct_w.begin()));
439
440  suite.err() << "Testing rescaling of weights\n";
441  for (size_t i=0; i<m.columns(); ++i) {
442    m(0,i).weight() *= 2;
443    correct_w[i] *= 2;
444  }
445  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
446  suite.add(suite.equal_range(data_iterator(res.begin_row(0)),
447                               data_iterator(res.end_row(0)),
448                               correct.begin()));
449  suite.add(suite.equal_range(weight_iterator(res.begin_row(0)),
450                               weight_iterator(res.end_row(0)),
451                               correct_w.begin()));
452
453
454  suite.err() << "Testing case with a zero weight\n";
455  m(0,1).data() = std::numeric_limits<double>::quiet_NaN();
456  m(0,1).weight() = 0.0;
457  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
458  suite.add(suite.equal(res(0,0).data(), 0.5/3));
459  suite.add(suite.equal(res(0,2).data(), 2.5/3));
460  suite.add(suite.equal(res(0,3).data(), 1.5/3));
461
462  suite.err() << "Testing case with ties\n";
463  m(0,0).data() = m(0,2).data();
464  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
465  suite.add(suite.equal(res(0,0).data(), 2.0/3));
466  suite.add(suite.equal(res(0,2).data(), 2.0/3));
467  suite.add(suite.equal(res(0,3).data(), 0.5/3));
468  // do not run compile test
469  if (false) {
470    typedef test::DataIterator<boost::random_access_traversal_tag> trait;
471    trait::unweighted_const_iterator unweighted_const_iterator;
472    trait::unweighted_iterator unweighted_mutable_iterator;
473    trait::weighted_const_iterator weighted_const_iterator;
474    trait::weighted_iterator weighted_mutable_iterator;
475
476    spearman(unweighted_const_iterator, unweighted_const_iterator,
477             unweighted_mutable_iterator);
478    spearman(unweighted_const_iterator, unweighted_const_iterator,
479             weighted_mutable_iterator);
480    spearman(weighted_const_iterator, weighted_const_iterator,
481             unweighted_mutable_iterator);
482    spearman(weighted_const_iterator, weighted_const_iterator,
483             weighted_mutable_iterator);
484  }
485}
486
487
488void test_z_score(test::Suite& suite)
489{
490  suite.err() << "Testing Zscore\n";
491  std::vector<double> vec;
492  vec.push_back(0);
493  vec.push_back(3.14);
494  normalizer::Zscore zscore;
495  zscore(vec.begin(), vec.end(), vec.begin());
496  for (size_t i=0; i<vec.size(); ++i)
497    suite.add(suite.equal(vec[i], 2.0*i-1.0));
498
499  std::vector<utility::DataWeight> vec2;
500  vec2.push_back(utility::DataWeight(1,1));
501  vec2.push_back(utility::DataWeight(2.13,0.5));
502  vec2.push_back(utility::DataWeight(2.13,0.5));
503  std::vector<utility::DataWeight> vec3(vec2.size());
504  zscore(vec2.begin(), vec2.end(), vec3.begin());
505  for (size_t i=0; i<vec2.size(); ++i)
506    suite.add(suite.equal(vec3[i].weight(), vec2[i].weight()));
507  suite.add(suite.equal(vec3[0].data(), -1.0));
508  suite.add(suite.equal(vec3[1].data(), 1.0));
509  suite.add(suite.equal(vec3[2].data(), 1.0));
510  // compile test should not be run
511  if (false) {
512    typedef test::DataIterator<boost::forward_traversal_tag> trait;
513    trait::unweighted_const_iterator unweighted_const_iterator;
514    trait::unweighted_iterator unweighted_mutable_iterator;
515    trait::weighted_const_iterator weighted_const_iterator;
516    trait::weighted_iterator weighted_mutable_iterator;
517
518    zscore(unweighted_const_iterator, unweighted_const_iterator,
519           unweighted_mutable_iterator);
520    zscore(unweighted_const_iterator, unweighted_const_iterator,
521           weighted_mutable_iterator);
522    zscore(weighted_const_iterator, weighted_const_iterator,
523           unweighted_mutable_iterator);
524    zscore(weighted_const_iterator, weighted_const_iterator,
525           weighted_mutable_iterator);
526  }
527}
Note: See TracBrowser for help on using the repository browser.