source: trunk/test/normalization.cc @ 3547

Last change on this file since 3547 was 3547, checked in by Peter, 6 years ago

refs #803; use boost iterator concepts in qQuantileNormalizer.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 17.8 KB
Line 
1// $Id: normalization.cc 3547 2016-12-30 11:47:33Z peter $
2
3/*
4  Copyright (C) 2008, 2009 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2010, 2012, 2014 Peter Johansson
6
7  This file is part of the yat library, http://dev.thep.lu.se/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 3 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with yat. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include <config.h>
24
25#include "Suite.h"
26
27#include "yat/normalizer/Centralizer.h"
28#include "yat/normalizer/ColumnNormalizer.h"
29#include "yat/normalizer/Gauss.h"
30#include "yat/normalizer/qQuantileNormalizer.h"
31#include "yat/normalizer/QuantileNormalizer.h"
32#include "yat/normalizer/RowNormalizer.h"
33#include "yat/normalizer/Spearman.h"
34#include "yat/normalizer/Zscore.h"
35
36#include "yat/utility/DataIterator.h"
37#include "yat/utility/FileUtil.h"
38#include "yat/utility/Matrix.h"
39#include "yat/utility/MatrixWeighted.h"
40#include "yat/utility/WeightIterator.h"
41#include "yat/utility/WeightedIteratorArchetype.h"
42
43#include <boost/concept_archetype.hpp>
44#include <boost/iterator/iterator_archetypes.hpp>
45
46#include <climits>
47#include <fstream>
48#include <iterator>
49#include <limits>
50#include <vector>
51
52using namespace theplu::yat;
53void test_centralizer(test::Suite&);
54void test_column_normalize(test::Suite&);
55void test_gauss_normalize(test::Suite&);
56void test_qquantile_normalize(test::Suite&);
57void test_qquantile_normalize_weighted(test::Suite&);
58void test_quantile_normalize(test::Suite&);
59void test_row_normalize(test::Suite&);
60void test_spearman(test::Suite&);
61void test_spearman_weighted(test::Suite&);
62void test_z_score(test::Suite&);
63
64int main(int argc, char* argv[])
65{
66  test::Suite suite(argc, argv);
67  suite.err() << "testing normalizations ... " << std::endl;
68
69  test_centralizer(suite);
70  test_column_normalize(suite);
71  test_qquantile_normalize(suite);
72  test_qquantile_normalize_weighted(suite);
73  test_quantile_normalize(suite);
74  test_gauss_normalize(suite);
75  test_row_normalize(suite);
76  test_spearman(suite);
77  test_z_score(suite);
78
79  return suite.return_value();
80}
81
82
83void test_centralizer(test::Suite& suite)
84{
85  suite.err() << "Testing Centralizer\n";
86  std::vector<double> vec;
87  vec.push_back(1);
88  vec.push_back(2);
89  vec.push_back(3);
90  normalizer::Centralizer<> c;
91  c(vec.begin(), vec.end(), vec.begin());
92  for (size_t i=0; i<vec.size(); ++i)
93    suite.add(suite.equal(vec[i], static_cast<double>(i)-1.0));
94
95  std::vector<utility::DataWeight> vec2;
96  vec2.push_back(utility::DataWeight(1,1));
97  vec2.push_back(utility::DataWeight(2,0.5));
98  vec2.push_back(utility::DataWeight(2,0.5));
99  std::vector<utility::DataWeight> vec3(vec2.size());
100  c(vec2.begin(), vec2.end(), vec3.begin());
101  for (size_t i=0; i<vec2.size(); ++i)
102    suite.add(suite.equal(vec3[i].weight(), vec2[i].weight()));
103  suite.add(suite.equal(vec3[0].data(), -0.5));
104  suite.add(suite.equal(vec3[1].data(), 0.5));
105  suite.add(suite.equal(vec3[2].data(), 0.5));
106
107  // compile test should not be run
108  if (false) {
109    typedef test::DataIterator<boost::forward_traversal_tag> trait;
110    trait::unweighted_const_iterator unweighted_const_iterator;
111    trait::unweighted_iterator unweighted_mutable_iterator;
112    trait::weighted_const_iterator weighted_const_iterator;
113    trait::weighted_iterator weighted_mutable_iterator;
114
115    c(unweighted_const_iterator, unweighted_const_iterator,
116      unweighted_mutable_iterator);
117    c(unweighted_const_iterator, unweighted_const_iterator,
118      weighted_mutable_iterator);
119    c(weighted_const_iterator, weighted_const_iterator,
120      unweighted_mutable_iterator);
121    c(weighted_const_iterator, weighted_const_iterator,
122      weighted_mutable_iterator);
123  }
124}
125
126
127void test_column_normalize(test::Suite& suite)
128{
129  using namespace normalizer;
130  suite.err() << "Testing ColumnNormalizer\n";
131
132  utility::Matrix m(2,2);
133  m(0,0) = 0;
134  m(0,1) = 10;
135  m(1,0) = 2;
136  m(1,1) = 4;
137  ColumnNormalizer<Centralizer<> > qn;
138  qn(m, m);
139  suite.err() << "Testing m(0,0)\n";
140  suite.add(suite.equal(m(0,0), -1));
141  suite.err() << "Testing m(0,1)\n";
142  suite.add(suite.equal(m(0,1), 3));
143  suite.err() << "Testing m(1,0)\n";
144  suite.add(suite.equal(m(1,0), 1));
145  suite.err() << "Testing m(1,1)\n";
146  suite.add(suite.equal(m(1,1), -3));
147
148  if (false) { // do not run compile tests
149    test::container2d_archetype<double> container2d;
150    test::mutable_container2d_archetype<double> mutable_container2d;
151    qn(container2d, mutable_container2d);
152  }
153}
154
155
156void test_qquantile_normalize(test::Suite& suite)
157{
158  using namespace normalizer;
159
160  suite.err() << "Testing qQuantileNormalizer\n";
161  std::string data(test::filename("data/normalization_test.data"));
162  if (utility::FileUtil(data.c_str()).permissions("r")) {
163    suite.add(false);
164    suite.err() << "Cannot access file " << data << '\n';
165    return;
166  }
167  std::ifstream data_stream(data.c_str());
168
169  utility::Matrix m(data_stream);
170
171  suite.err() << "testing number of parts (Q) boundary conditions\n";
172  qQuantileNormalizer(m.begin_column(0), m.end_column(0), m.rows());
173  qQuantileNormalizer(m.begin_column(0), m.end_column(0), 3);
174
175  // first column as target
176  qQuantileNormalizer qqn(m.begin_column(0), m.end_column(0) ,9);
177  ColumnNormalizer<qQuantileNormalizer> cn(qqn);
178  utility::Matrix result(m.rows(),m.columns());
179  cn(m, result);
180
181  suite.err() << "test that result can be stored in the source matrix...";
182  cn(m,m);
183  if (suite.add(result==m))
184    suite.err() << " ok.\n";
185  else
186    suite.err() << " failed.\n";
187
188  // Enough iteration will make all columns to have the same values as
189  // the target.
190  suite.err() << "Testing that q=matrix rows gives QuantileNormalization\n";
191  utility::Matrix m2(4,2);
192  m2(0,0) = 0; m2(0,1) = 10;
193  m2(1,0) = 2; m2(1,1) = 4;
194  m2(2,0) = 1; m2(2,1) = 0;
195  m2(3,0) = 3; m2(3,1) = 7;
196  qQuantileNormalizer qqn2(m2.begin_column(0), m2.end_column(0), m2.rows());
197  ColumnNormalizer<qQuantileNormalizer> cn2(qqn2);
198  utility::Matrix result2(m2.rows(),m2.columns());
199  cn2(m2,result2);
200  suite.add( suite.equal_fix(m2(0,0),result2(2,1),1.0e-12) &&
201             suite.equal_fix(m2(1,0),result2(3,1),1.0e-12) &&
202             suite.equal_fix(m2(2,0),result2(1,1),1.0e-12) &&
203             suite.equal_fix(m2(3,0),result2(0,1),1.0e-12) );
204  // compile test should not be run
205  if (false) {
206    qQuantileNormalizer qqn3(boost::forward_iterator_archetype<double>(),
207                             boost::forward_iterator_archetype<double>(),
208                             100);
209    qqn3(boost::random_access_iterator_archetype<double>(),
210         boost::random_access_iterator_archetype<double>(),
211         boost::mutable_random_access_iterator_archetype<double>());
212    using utility::DataWeight;
213    qQuantileNormalizer qqn4(boost::forward_iterator_archetype<DataWeight>(),
214                             boost::forward_iterator_archetype<DataWeight>(),
215                             100);
216    qqn4(boost::random_access_iterator_archetype<DataWeight>(),
217         boost::random_access_iterator_archetype<DataWeight>(),
218         boost::mutable_random_access_iterator_archetype<DataWeight>());
219    qqn4(boost::random_access_iterator_archetype<DataWeight>(),
220         boost::random_access_iterator_archetype<DataWeight>(),
221         boost::mutable_random_access_iterator_archetype<double>());
222
223    // test against boost iterator concepts
224    typedef test::DataIterator<boost::forward_traversal_tag> trait1;
225    trait1::unweighted_const_iterator input;
226    trait1::weighted_const_iterator weighted_input;
227    qQuantileNormalizer qqn5(input, input, 100);
228    qQuantileNormalizer qqn6(weighted_input, weighted_input, 100);
229
230    typedef test::DataIterator<boost::random_access_traversal_tag> trait2;
231    trait2::unweighted_const_iterator const_iterator;
232    trait2::unweighted_iterator iterator;
233    trait2::weighted_const_iterator const_weighted_iterator;
234    trait2::weighted_iterator weighted_iterator;
235    qqn5(const_iterator, const_iterator, iterator);
236    qqn5(const_iterator, const_iterator, weighted_iterator);
237    qqn5(const_weighted_iterator, const_weighted_iterator, iterator);
238    qqn5(const_iterator, const_iterator, weighted_iterator);
239  }
240}
241
242
243void test_qquantile_normalize_weighted(test::Suite& suite)
244{
245  using namespace normalizer;
246
247  suite.err() << "Testing qQuantileNormalizer weighted\n";
248
249  // test with unweighted target and source
250  std::vector<double> target;
251  target.reserve(1000);
252  while (target.size()<1000)
253    target.push_back(target.size());
254  qQuantileNormalizer qQN(target.begin(), target.end(), 4);
255  std::vector<double> source;
256  while (source.size()<10)
257    source.push_back(source.size()*10);
258  std::vector<double> result(source.size());
259
260  qQN(source.begin(), source.end(), result.begin());
261
262  using utility::DataWeight;
263  suite.err() << "Testing with unweighted target and weighted source\n";
264  std::vector<utility::DataWeight> src_w(source.size(), DataWeight(0, 1));
265  std::copy(source.begin(), source.end(),
266            utility::data_iterator(src_w.begin()));
267
268  std::vector<utility::DataWeight> result_w(src_w.size());
269  qQN(src_w.begin(), src_w.end(), result_w.begin());
270  suite.add(suite.equal_range(result.begin(), result.end(),
271                              utility::data_iterator(result_w.begin())));
272
273  suite.err() << "Testing with missing value in source\n";
274  // adding a missing value
275  std::vector<utility::DataWeight>::iterator MWi=src_w.begin();
276  MWi+=5;
277  src_w.insert(MWi, DataWeight(std::numeric_limits<double>::quiet_NaN(), 0.0));
278  std::vector<utility::DataWeight> result_w2(src_w.size());
279  qQN(src_w.begin(), src_w.end(), result_w2.begin());
280  // excluding missing value from comparison in suite.equal_range
281  MWi=result_w2.begin();
282  MWi+=5;
283  result_w2.erase(MWi);
284  suite.add(suite.equal_range(utility::data_iterator(result_w.begin()),
285                              utility::data_iterator(result_w.end()),
286                              utility::data_iterator(result_w2.begin())));
287
288  suite.err() << "testing with weighted target" << std::endl;
289  std::vector<utility::DataWeight> target_w(target.size()+1, DataWeight(0, 1));
290  target_w[0] = DataWeight(5.3, 0);
291  std::copy(target.begin(), target.end(),
292            utility::data_iterator(target_w.begin()+1));
293  qQuantileNormalizer qQNw(target_w.begin(), target_w.end(), 4);
294  std::vector<utility::DataWeight> result_w3(src_w.size());
295  qQNw(src_w.begin(), src_w.end(), result_w3.begin());
296  // excluding missing value from comparison in suite.equal_range
297  MWi=result_w3.begin();
298  MWi+=5;
299  result_w3.erase(MWi);
300  suite.add(suite.equal_range(utility::data_iterator(result_w3.begin()),
301                              utility::data_iterator(result_w3.end()),
302                              utility::data_iterator(result_w2.begin())));
303
304}
305
306
307void test_quantile_normalize(test::Suite& suite)
308{
309  suite.err() << "Testing QuantileNormalizer\n";
310  utility::Matrix m(2,2);
311  m(0,0) = 0;
312  m(0,1) = 10;
313  m(1,0) = 2;
314  m(1,1) = 4;
315  normalizer::QuantileNormalizer qn;
316  qn(m, m);
317  suite.err() << "Testing m(0,0)\n";
318  suite.add(suite.equal(m(0,0), 2));
319  suite.err() << "Testing m(0,1)\n";
320  suite.add(suite.equal(m(0,1), 6));
321  suite.err() << "Testing m(1,0)\n";
322  suite.add(suite.equal(m(1,0), 6));
323  suite.err() << "Testing m(1,1)\n";
324  suite.add(suite.equal(m(1,1), 2));
325
326  // testing with landscape-shaped data matrix
327  m.resize(2, 10);
328  qn(m, m);
329}
330
331void test_row_normalize(test::Suite& suite)
332{
333  using namespace normalizer;
334  suite.err() << "Testing RowNormalizer\n";
335
336  utility::Matrix m(2,3);
337  m(0,0) = 0;
338  m(0,1) = 10;
339  m(1,0) = 2;
340  m(1,1) = 4;
341  utility::Matrix m2(m);
342  m2.transpose();
343  ColumnNormalizer<Centralizer<> > cn;
344  RowNormalizer<Centralizer<> > rn;
345  cn(m, m);
346  rn(m2, m2);
347  m2.transpose();
348  suite.equal_range(m.begin(), m.end(), m2.begin());
349  if (false) { // do not run compile tests
350    test::container2d_archetype<double> container2d;
351    test::mutable_container2d_archetype<double> mutable_container2d;
352    rn(container2d, mutable_container2d);
353  }
354}
355
356void test_spearman(test::Suite& suite)
357{
358  suite.err() << "Testing Spearman\n";
359  normalizer::Spearman spearman;
360  std::vector<double> vec;
361  vec.push_back(0);
362  vec.push_back(2);
363  vec.push_back(3);
364  vec.push_back(1);
365  spearman(vec.begin(), vec.end(), vec.begin());
366  std::vector<double> correct;
367  correct.push_back(1.0/8);
368  correct.push_back(5.0/8);
369  correct.push_back(7.0/8);
370  correct.push_back(3.0/8);
371  suite.add(suite.equal_range(vec.begin(), vec.end(), correct.begin()));
372  suite.err() << "Testing Spearman with ties\n";
373  vec[1]=vec[2];
374  correct[1] = correct[2] = (correct[1]+correct[2])/2;
375  spearman(vec.begin(), vec.end(), vec.begin());
376  suite.add(suite.equal_range(vec.begin(), vec.end(), correct.begin()));
377  test_spearman_weighted(suite);
378
379}
380
381
382void test_gauss_normalize(test::Suite& suite)
383{
384  suite.err() << "Testing Gauss\n";
385  normalizer::Gauss gauss;
386  std::vector<double> vec;
387  vec.push_back(1);
388  gauss(vec.begin(), vec.end(), vec.begin());
389  suite.add(suite.equal(vec.front(), 0));
390  vec.push_back(1);
391  gauss(vec.begin(), vec.end(), vec.begin());
392  suite.add(suite.equal(vec.front(), -vec.back()));
393  // compile test should not be run
394  if (false) {
395    typedef test::DataIterator<boost::random_access_traversal_tag> trait;
396    trait::unweighted_const_iterator readable_iterator;
397    trait::unweighted_iterator writable_iterator;
398    trait::weighted_const_iterator weighted_readable_iterator;
399    trait::weighted_iterator weighted_writable_iterator;
400
401    gauss(readable_iterator, readable_iterator, writable_iterator);
402    gauss(readable_iterator, readable_iterator, weighted_writable_iterator);
403    gauss(weighted_readable_iterator, weighted_readable_iterator,
404          writable_iterator);
405    gauss(weighted_readable_iterator, weighted_readable_iterator,
406          weighted_writable_iterator);
407  }
408}
409
410void test_spearman_weighted(test::Suite& suite)
411{
412  suite.err() << "Testing Weighted Spearman\n";
413  normalizer::Spearman spearman;
414
415  suite.err() << "Testing that unity weights reproduces unweighted case\n";
416  utility::MatrixWeighted m(1,4,0,1);
417  utility::MatrixWeighted res(m.rows(), m.columns(),3.14,0);
418  m(0,0).data()=0;
419  m(0,1).data()=2;
420  m(0,2).data()=3;
421  m(0,3).data()=1;
422  std::vector<double> correct(m.columns());
423  std::vector<double> correct_w(m.columns(), 1.0);
424  std::copy(utility::data_iterator(m.begin_row(0)),
425            utility::data_iterator(m.end_row(0)),
426            correct.begin());
427  spearman(correct.begin(), correct.end(), correct.begin());
428  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
429
430  using utility::data_iterator;
431  suite.add(suite.equal_range(data_iterator(res.begin_row(0)),
432                               data_iterator(res.end_row(0)),
433                               correct.begin()));
434  using utility::weight_iterator;
435  suite.add(suite.equal_range(weight_iterator(res.begin_row(0)),
436                               weight_iterator(res.end_row(0)),
437                               correct_w.begin()));
438
439  suite.err() << "Testing rescaling of weights\n";
440  for (size_t i=0; i<m.columns(); ++i) {
441    m(0,i).weight() *= 2;
442    correct_w[i] *= 2;
443  }
444  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
445  suite.add(suite.equal_range(data_iterator(res.begin_row(0)),
446                               data_iterator(res.end_row(0)),
447                               correct.begin()));
448  suite.add(suite.equal_range(weight_iterator(res.begin_row(0)),
449                               weight_iterator(res.end_row(0)),
450                               correct_w.begin()));
451
452
453  suite.err() << "Testing case with a zero weight\n";
454  m(0,1).data() = std::numeric_limits<double>::quiet_NaN();
455  m(0,1).weight() = 0.0;
456  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
457  suite.add(suite.equal(res(0,0).data(), 0.5/3));
458  suite.add(suite.equal(res(0,2).data(), 2.5/3));
459  suite.add(suite.equal(res(0,3).data(), 1.5/3));
460
461  suite.err() << "Testing case with ties\n";
462  m(0,0).data() = m(0,2).data();
463  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
464  suite.add(suite.equal(res(0,0).data(), 2.0/3));
465  suite.add(suite.equal(res(0,2).data(), 2.0/3));
466  suite.add(suite.equal(res(0,3).data(), 0.5/3));
467  // do not run compile test
468  if (false) {
469    typedef test::DataIterator<boost::random_access_traversal_tag> trait;
470    trait::unweighted_const_iterator unweighted_const_iterator;
471    trait::unweighted_iterator unweighted_mutable_iterator;
472    trait::weighted_const_iterator weighted_const_iterator;
473    trait::weighted_iterator weighted_mutable_iterator;
474
475    spearman(unweighted_const_iterator, unweighted_const_iterator,
476             unweighted_mutable_iterator);
477    spearman(unweighted_const_iterator, unweighted_const_iterator,
478             weighted_mutable_iterator);
479    spearman(weighted_const_iterator, weighted_const_iterator,
480             unweighted_mutable_iterator);
481    spearman(weighted_const_iterator, weighted_const_iterator,
482             weighted_mutable_iterator);
483  }
484}
485
486
487void test_z_score(test::Suite& suite)
488{
489  suite.err() << "Testing Zscore\n";
490  std::vector<double> vec;
491  vec.push_back(0);
492  vec.push_back(3.14);
493  normalizer::Zscore zscore;
494  zscore(vec.begin(), vec.end(), vec.begin());
495  for (size_t i=0; i<vec.size(); ++i)
496    suite.add(suite.equal(vec[i], 2.0*i-1.0));
497
498  std::vector<utility::DataWeight> vec2;
499  vec2.push_back(utility::DataWeight(1,1));
500  vec2.push_back(utility::DataWeight(2.13,0.5));
501  vec2.push_back(utility::DataWeight(2.13,0.5));
502  std::vector<utility::DataWeight> vec3(vec2.size());
503  zscore(vec2.begin(), vec2.end(), vec3.begin());
504  for (size_t i=0; i<vec2.size(); ++i)
505    suite.add(suite.equal(vec3[i].weight(), vec2[i].weight()));
506  suite.add(suite.equal(vec3[0].data(), -1.0));
507  suite.add(suite.equal(vec3[1].data(), 1.0));
508  suite.add(suite.equal(vec3[2].data(), 1.0));
509  // compile test should not be run
510  if (false) {
511    typedef test::DataIterator<boost::forward_traversal_tag> trait;
512    trait::unweighted_const_iterator unweighted_const_iterator;
513    trait::unweighted_iterator unweighted_mutable_iterator;
514    trait::weighted_const_iterator weighted_const_iterator;
515    trait::weighted_iterator weighted_mutable_iterator;
516
517    zscore(unweighted_const_iterator, unweighted_const_iterator,
518           unweighted_mutable_iterator);
519    zscore(unweighted_const_iterator, unweighted_const_iterator,
520           weighted_mutable_iterator);
521    zscore(weighted_const_iterator, weighted_const_iterator,
522           unweighted_mutable_iterator);
523    zscore(weighted_const_iterator, weighted_const_iterator,
524           weighted_mutable_iterator);
525  }
526}
Note: See TracBrowser for help on using the repository browser.