source: trunk/test/normalization.cc @ 3544

Last change on this file since 3544 was 3544, checked in by Peter, 5 years ago

refs #803. use boost iterator categories in normalizer::Gauss

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 17.0 KB
Line 
1// $Id: normalization.cc 3544 2016-12-23 07:42:56Z peter $
2
3/*
4  Copyright (C) 2008, 2009 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2010, 2012, 2014 Peter Johansson
6
7  This file is part of the yat library, http://dev.thep.lu.se/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 3 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with yat. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include <config.h>
24
25#include "Suite.h"
26
27#include "yat/normalizer/Centralizer.h"
28#include "yat/normalizer/ColumnNormalizer.h"
29#include "yat/normalizer/Gauss.h"
30#include "yat/normalizer/qQuantileNormalizer.h"
31#include "yat/normalizer/QuantileNormalizer.h"
32#include "yat/normalizer/RowNormalizer.h"
33#include "yat/normalizer/Spearman.h"
34#include "yat/normalizer/Zscore.h"
35
36#include "yat/utility/DataIterator.h"
37#include "yat/utility/FileUtil.h"
38#include "yat/utility/Matrix.h"
39#include "yat/utility/MatrixWeighted.h"
40#include "yat/utility/WeightIterator.h"
41#include "yat/utility/WeightedIteratorArchetype.h"
42
43#include <boost/concept_archetype.hpp>
44#include <boost/iterator/iterator_archetypes.hpp>
45
46#include <climits>
47#include <fstream>
48#include <iterator>
49#include <limits>
50#include <vector>
51
52using namespace theplu::yat;
53void test_centralizer(test::Suite&);
54void test_column_normalize(test::Suite&);
55void test_gauss_normalize(test::Suite&);
56void test_qquantile_normalize(test::Suite&);
57void test_qquantile_normalize_weighted(test::Suite&);
58void test_quantile_normalize(test::Suite&);
59void test_row_normalize(test::Suite&);
60void test_spearman(test::Suite&);
61void test_spearman_weighted(test::Suite&);
62void test_z_score(test::Suite&);
63
64int main(int argc, char* argv[])
65{
66  test::Suite suite(argc, argv);
67  suite.err() << "testing normalizations ... " << std::endl;
68
69  test_centralizer(suite);
70  test_column_normalize(suite);
71  test_qquantile_normalize(suite);
72  test_qquantile_normalize_weighted(suite);
73  test_quantile_normalize(suite);
74  test_gauss_normalize(suite);
75  test_row_normalize(suite);
76  test_spearman(suite);
77  test_z_score(suite);
78
79  return suite.return_value();
80}
81
82
83void test_centralizer(test::Suite& suite)
84{
85  suite.err() << "Testing Centralizer\n";
86  std::vector<double> vec;
87  vec.push_back(1);
88  vec.push_back(2);
89  vec.push_back(3);
90  normalizer::Centralizer<> c;
91  c(vec.begin(), vec.end(), vec.begin());
92  for (size_t i=0; i<vec.size(); ++i)
93    suite.add(suite.equal(vec[i], static_cast<double>(i)-1.0));
94
95  std::vector<utility::DataWeight> vec2;
96  vec2.push_back(utility::DataWeight(1,1));
97  vec2.push_back(utility::DataWeight(2,0.5));
98  vec2.push_back(utility::DataWeight(2,0.5));
99  std::vector<utility::DataWeight> vec3(vec2.size());
100  c(vec2.begin(), vec2.end(), vec3.begin());
101  for (size_t i=0; i<vec2.size(); ++i)
102    suite.add(suite.equal(vec3[i].weight(), vec2[i].weight()));
103  suite.add(suite.equal(vec3[0].data(), -0.5));
104  suite.add(suite.equal(vec3[1].data(), 0.5));
105  suite.add(suite.equal(vec3[2].data(), 0.5));
106
107  // compile test should not be run
108  if (false) {
109    typedef test::DataIterator<boost::forward_traversal_tag> trait;
110    trait::unweighted_const_iterator unweighted_const_iterator;
111    trait::unweighted_iterator unweighted_mutable_iterator;
112    trait::weighted_const_iterator weighted_const_iterator;
113    trait::weighted_iterator weighted_mutable_iterator;
114
115    c(unweighted_const_iterator, unweighted_const_iterator,
116      unweighted_mutable_iterator);
117    c(unweighted_const_iterator, unweighted_const_iterator,
118      weighted_mutable_iterator);
119    c(weighted_const_iterator, weighted_const_iterator,
120      unweighted_mutable_iterator);
121    c(weighted_const_iterator, weighted_const_iterator,
122      weighted_mutable_iterator);
123  }
124}
125
126
127void test_column_normalize(test::Suite& suite)
128{
129  using namespace normalizer;
130  suite.err() << "Testing ColumnNormalizer\n";
131
132  utility::Matrix m(2,2);
133  m(0,0) = 0;
134  m(0,1) = 10;
135  m(1,0) = 2;
136  m(1,1) = 4;
137  ColumnNormalizer<Centralizer<> > qn;
138  qn(m, m);
139  suite.err() << "Testing m(0,0)\n";
140  suite.add(suite.equal(m(0,0), -1));
141  suite.err() << "Testing m(0,1)\n";
142  suite.add(suite.equal(m(0,1), 3));
143  suite.err() << "Testing m(1,0)\n";
144  suite.add(suite.equal(m(1,0), 1));
145  suite.err() << "Testing m(1,1)\n";
146  suite.add(suite.equal(m(1,1), -3));
147
148  if (false) { // do not run compile tests
149    test::container2d_archetype<double> container2d;
150    test::mutable_container2d_archetype<double> mutable_container2d;
151    qn(container2d, mutable_container2d);
152  }
153}
154
155
156void test_qquantile_normalize(test::Suite& suite)
157{
158  using namespace normalizer;
159
160  suite.err() << "Testing qQuantileNormalizer\n";
161  std::string data(test::filename("data/normalization_test.data"));
162  if (utility::FileUtil(data.c_str()).permissions("r")) {
163    suite.add(false);
164    suite.err() << "Cannot access file " << data << '\n';
165    return;
166  }
167  std::ifstream data_stream(data.c_str());
168
169  utility::Matrix m(data_stream);
170
171  suite.err() << "testing number of parts (Q) boundary conditions\n";
172  qQuantileNormalizer(m.begin_column(0), m.end_column(0), m.rows());
173  qQuantileNormalizer(m.begin_column(0), m.end_column(0), 3);
174
175  // first column as target
176  qQuantileNormalizer qqn(m.begin_column(0), m.end_column(0) ,9);
177  ColumnNormalizer<qQuantileNormalizer> cn(qqn);
178  utility::Matrix result(m.rows(),m.columns());
179  cn(m, result);
180
181  suite.err() << "test that result can be stored in the source matrix...";
182  cn(m,m);
183  if (suite.add(result==m))
184    suite.err() << " ok.\n";
185  else
186    suite.err() << " failed.\n";
187
188  // Enough iteration will make all columns to have the same values as
189  // the target.
190  suite.err() << "Testing that q=matrix rows gives QuantileNormalization\n";
191  utility::Matrix m2(4,2);
192  m2(0,0) = 0; m2(0,1) = 10;
193  m2(1,0) = 2; m2(1,1) = 4;
194  m2(2,0) = 1; m2(2,1) = 0;
195  m2(3,0) = 3; m2(3,1) = 7;
196  qQuantileNormalizer qqn2(m2.begin_column(0), m2.end_column(0), m2.rows());
197  ColumnNormalizer<qQuantileNormalizer> cn2(qqn2);
198  utility::Matrix result2(m2.rows(),m2.columns());
199  cn2(m2,result2);
200  suite.add( suite.equal_fix(m2(0,0),result2(2,1),1.0e-12) &&
201             suite.equal_fix(m2(1,0),result2(3,1),1.0e-12) &&
202             suite.equal_fix(m2(2,0),result2(1,1),1.0e-12) &&
203             suite.equal_fix(m2(3,0),result2(0,1),1.0e-12) );
204  // compile test should not be run
205  if (false) {
206    qQuantileNormalizer qqn3(boost::forward_iterator_archetype<double>(),
207                             boost::forward_iterator_archetype<double>(),
208                             100);
209    qqn3(boost::random_access_iterator_archetype<double>(),
210         boost::random_access_iterator_archetype<double>(),
211         boost::mutable_random_access_iterator_archetype<double>());
212    using utility::DataWeight;
213    qQuantileNormalizer qqn4(boost::forward_iterator_archetype<DataWeight>(),
214                             boost::forward_iterator_archetype<DataWeight>(),
215                             100);
216    qqn4(boost::random_access_iterator_archetype<DataWeight>(),
217         boost::random_access_iterator_archetype<DataWeight>(),
218         boost::mutable_random_access_iterator_archetype<DataWeight>());
219    qqn4(boost::random_access_iterator_archetype<DataWeight>(),
220         boost::random_access_iterator_archetype<DataWeight>(),
221         boost::mutable_random_access_iterator_archetype<double>());
222  }
223}
224
225
226void test_qquantile_normalize_weighted(test::Suite& suite)
227{
228  using namespace normalizer;
229
230  suite.err() << "Testing qQuantileNormalizer weighted\n";
231
232  // test with unweighted target and source
233  std::vector<double> target;
234  target.reserve(1000);
235  while (target.size()<1000)
236    target.push_back(target.size());
237  qQuantileNormalizer qQN(target.begin(), target.end(), 4);
238  std::vector<double> source;
239  while (source.size()<10)
240    source.push_back(source.size()*10);
241  std::vector<double> result(source.size());
242
243  qQN(source.begin(), source.end(), result.begin());
244
245  using utility::DataWeight;
246  suite.err() << "Testing with unweighted target and weighted source\n";
247  std::vector<utility::DataWeight> src_w(source.size(), DataWeight(0, 1));
248  std::copy(source.begin(), source.end(),
249            utility::data_iterator(src_w.begin()));
250
251  std::vector<utility::DataWeight> result_w(src_w.size());
252  qQN(src_w.begin(), src_w.end(), result_w.begin());
253  suite.add(suite.equal_range(result.begin(), result.end(),
254                              utility::data_iterator(result_w.begin())));
255
256  suite.err() << "Testing with missing value in source\n";
257  // adding a missing value
258  std::vector<utility::DataWeight>::iterator MWi=src_w.begin();
259  MWi+=5;
260  src_w.insert(MWi, DataWeight(std::numeric_limits<double>::quiet_NaN(), 0.0));
261  std::vector<utility::DataWeight> result_w2(src_w.size());
262  qQN(src_w.begin(), src_w.end(), result_w2.begin());
263  // excluding missing value from comparison in suite.equal_range
264  MWi=result_w2.begin();
265  MWi+=5;
266  result_w2.erase(MWi);
267  suite.add(suite.equal_range(utility::data_iterator(result_w.begin()),
268                              utility::data_iterator(result_w.end()),
269                              utility::data_iterator(result_w2.begin())));
270
271  suite.err() << "testing with weighted target" << std::endl;
272  std::vector<utility::DataWeight> target_w(target.size()+1, DataWeight(0, 1));
273  target_w[0] = DataWeight(5.3, 0);
274  std::copy(target.begin(), target.end(),
275            utility::data_iterator(target_w.begin()+1));
276  qQuantileNormalizer qQNw(target_w.begin(), target_w.end(), 4);
277  std::vector<utility::DataWeight> result_w3(src_w.size());
278  qQNw(src_w.begin(), src_w.end(), result_w3.begin());
279  // excluding missing value from comparison in suite.equal_range
280  MWi=result_w3.begin();
281  MWi+=5;
282  result_w3.erase(MWi);
283  suite.add(suite.equal_range(utility::data_iterator(result_w3.begin()),
284                              utility::data_iterator(result_w3.end()),
285                              utility::data_iterator(result_w2.begin())));
286
287}
288
289
290void test_quantile_normalize(test::Suite& suite)
291{
292  suite.err() << "Testing QuantileNormalizer\n";
293  utility::Matrix m(2,2);
294  m(0,0) = 0;
295  m(0,1) = 10;
296  m(1,0) = 2;
297  m(1,1) = 4;
298  normalizer::QuantileNormalizer qn;
299  qn(m, m);
300  suite.err() << "Testing m(0,0)\n";
301  suite.add(suite.equal(m(0,0), 2));
302  suite.err() << "Testing m(0,1)\n";
303  suite.add(suite.equal(m(0,1), 6));
304  suite.err() << "Testing m(1,0)\n";
305  suite.add(suite.equal(m(1,0), 6));
306  suite.err() << "Testing m(1,1)\n";
307  suite.add(suite.equal(m(1,1), 2));
308
309  // testing with landscape-shaped data matrix
310  m.resize(2, 10);
311  qn(m, m);
312}
313
314void test_row_normalize(test::Suite& suite)
315{
316  using namespace normalizer;
317  suite.err() << "Testing RowNormalizer\n";
318
319  utility::Matrix m(2,3);
320  m(0,0) = 0;
321  m(0,1) = 10;
322  m(1,0) = 2;
323  m(1,1) = 4;
324  utility::Matrix m2(m);
325  m2.transpose();
326  ColumnNormalizer<Centralizer<> > cn;
327  RowNormalizer<Centralizer<> > rn;
328  cn(m, m);
329  rn(m2, m2);
330  m2.transpose();
331  suite.equal_range(m.begin(), m.end(), m2.begin());
332  if (false) { // do not run compile tests
333    test::container2d_archetype<double> container2d;
334    test::mutable_container2d_archetype<double> mutable_container2d;
335    rn(container2d, mutable_container2d);
336  }
337}
338
339void test_spearman(test::Suite& suite)
340{
341  suite.err() << "Testing Spearman\n";
342  normalizer::Spearman spearman;
343  std::vector<double> vec;
344  vec.push_back(0);
345  vec.push_back(2);
346  vec.push_back(3);
347  vec.push_back(1);
348  spearman(vec.begin(), vec.end(), vec.begin());
349  std::vector<double> correct;
350  correct.push_back(1.0/8);
351  correct.push_back(5.0/8);
352  correct.push_back(7.0/8);
353  correct.push_back(3.0/8);
354  suite.add(suite.equal_range(vec.begin(), vec.end(), correct.begin()));
355  suite.err() << "Testing Spearman with ties\n";
356  vec[1]=vec[2];
357  correct[1] = correct[2] = (correct[1]+correct[2])/2;
358  spearman(vec.begin(), vec.end(), vec.begin());
359  suite.add(suite.equal_range(vec.begin(), vec.end(), correct.begin()));
360  test_spearman_weighted(suite);
361
362}
363
364
365void test_gauss_normalize(test::Suite& suite)
366{
367  suite.err() << "Testing Gauss\n";
368  normalizer::Gauss gauss;
369  std::vector<double> vec;
370  vec.push_back(1);
371  gauss(vec.begin(), vec.end(), vec.begin());
372  suite.add(suite.equal(vec.front(), 0));
373  vec.push_back(1);
374  gauss(vec.begin(), vec.end(), vec.begin());
375  suite.add(suite.equal(vec.front(), -vec.back()));
376  // compile test should not be run
377  if (false) {
378    typedef test::DataIterator<boost::random_access_traversal_tag> trait;
379    trait::unweighted_const_iterator readable_iterator;
380    trait::unweighted_iterator writable_iterator;
381    trait::weighted_const_iterator weighted_readable_iterator;
382    trait::weighted_iterator weighted_writable_iterator;
383
384    gauss(readable_iterator, readable_iterator, writable_iterator);
385    gauss(readable_iterator, readable_iterator, weighted_writable_iterator);
386    gauss(weighted_readable_iterator, weighted_readable_iterator,
387          writable_iterator);
388    gauss(weighted_readable_iterator, weighted_readable_iterator,
389          weighted_writable_iterator);
390  }
391}
392
393void test_spearman_weighted(test::Suite& suite)
394{
395  suite.err() << "Testing Weighted Spearman\n";
396  normalizer::Spearman spearman;
397
398  suite.err() << "Testing that unity weights reproduces unweighted case\n";
399  utility::MatrixWeighted m(1,4,0,1);
400  utility::MatrixWeighted res(m.rows(), m.columns(),3.14,0);
401  m(0,0).data()=0;
402  m(0,1).data()=2;
403  m(0,2).data()=3;
404  m(0,3).data()=1;
405  std::vector<double> correct(m.columns());
406  std::vector<double> correct_w(m.columns(), 1.0);
407  std::copy(utility::data_iterator(m.begin_row(0)),
408            utility::data_iterator(m.end_row(0)),
409            correct.begin());
410  spearman(correct.begin(), correct.end(), correct.begin());
411  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
412
413  using utility::data_iterator;
414  suite.add(suite.equal_range(data_iterator(res.begin_row(0)),
415                               data_iterator(res.end_row(0)),
416                               correct.begin()));
417  using utility::weight_iterator;
418  suite.add(suite.equal_range(weight_iterator(res.begin_row(0)),
419                               weight_iterator(res.end_row(0)),
420                               correct_w.begin()));
421
422  suite.err() << "Testing rescaling of weights\n";
423  for (size_t i=0; i<m.columns(); ++i) {
424    m(0,i).weight() *= 2;
425    correct_w[i] *= 2;
426  }
427  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
428  suite.add(suite.equal_range(data_iterator(res.begin_row(0)),
429                               data_iterator(res.end_row(0)),
430                               correct.begin()));
431  suite.add(suite.equal_range(weight_iterator(res.begin_row(0)),
432                               weight_iterator(res.end_row(0)),
433                               correct_w.begin()));
434
435
436  suite.err() << "Testing case with a zero weight\n";
437  m(0,1).data() = std::numeric_limits<double>::quiet_NaN();
438  m(0,1).weight() = 0.0;
439  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
440  suite.add(suite.equal(res(0,0).data(), 0.5/3));
441  suite.add(suite.equal(res(0,2).data(), 2.5/3));
442  suite.add(suite.equal(res(0,3).data(), 1.5/3));
443
444  suite.err() << "Testing case with ties\n";
445  m(0,0).data() = m(0,2).data();
446  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
447  suite.add(suite.equal(res(0,0).data(), 2.0/3));
448  suite.add(suite.equal(res(0,2).data(), 2.0/3));
449  suite.add(suite.equal(res(0,3).data(), 0.5/3));
450  // do not run compile test
451  if (false) {
452    typedef test::DataIterator<boost::random_access_traversal_tag> trait;
453    trait::unweighted_const_iterator unweighted_const_iterator;
454    trait::unweighted_iterator unweighted_mutable_iterator;
455    trait::weighted_const_iterator weighted_const_iterator;
456    trait::weighted_iterator weighted_mutable_iterator;
457
458    spearman(unweighted_const_iterator, unweighted_const_iterator,
459             unweighted_mutable_iterator);
460    spearman(unweighted_const_iterator, unweighted_const_iterator,
461             weighted_mutable_iterator);
462    spearman(weighted_const_iterator, weighted_const_iterator,
463             unweighted_mutable_iterator);
464    spearman(weighted_const_iterator, weighted_const_iterator,
465             weighted_mutable_iterator);
466  }
467}
468
469
470void test_z_score(test::Suite& suite)
471{
472  suite.err() << "Testing Zscore\n";
473  std::vector<double> vec;
474  vec.push_back(0);
475  vec.push_back(3.14);
476  normalizer::Zscore zscore;
477  zscore(vec.begin(), vec.end(), vec.begin());
478  for (size_t i=0; i<vec.size(); ++i)
479    suite.add(suite.equal(vec[i], 2.0*i-1.0));
480
481  std::vector<utility::DataWeight> vec2;
482  vec2.push_back(utility::DataWeight(1,1));
483  vec2.push_back(utility::DataWeight(2.13,0.5));
484  vec2.push_back(utility::DataWeight(2.13,0.5));
485  std::vector<utility::DataWeight> vec3(vec2.size());
486  zscore(vec2.begin(), vec2.end(), vec3.begin());
487  for (size_t i=0; i<vec2.size(); ++i)
488    suite.add(suite.equal(vec3[i].weight(), vec2[i].weight()));
489  suite.add(suite.equal(vec3[0].data(), -1.0));
490  suite.add(suite.equal(vec3[1].data(), 1.0));
491  suite.add(suite.equal(vec3[2].data(), 1.0));
492  // compile test should not be run
493  if (false) {
494    typedef test::DataIterator<boost::forward_traversal_tag> trait;
495    trait::unweighted_const_iterator unweighted_const_iterator;
496    trait::unweighted_iterator unweighted_mutable_iterator;
497    trait::weighted_const_iterator weighted_const_iterator;
498    trait::weighted_iterator weighted_mutable_iterator;
499
500    zscore(unweighted_const_iterator, unweighted_const_iterator,
501           unweighted_mutable_iterator);
502    zscore(unweighted_const_iterator, unweighted_const_iterator,
503           weighted_mutable_iterator);
504    zscore(weighted_const_iterator, weighted_const_iterator,
505           unweighted_mutable_iterator);
506    zscore(weighted_const_iterator, weighted_const_iterator,
507           weighted_mutable_iterator);
508  }
509}
Note: See TracBrowser for help on using the repository browser.