source: trunk/test/normalization.cc @ 3342

Last change on this file since 3342 was 3342, checked in by Peter, 8 years ago

merge 0.12.2 into trunk

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 18.4 KB
Line 
1// $Id: normalization.cc 3342 2014-11-06 05:26:24Z peter $
2
3/*
4  Copyright (C) 2008, 2009 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2010, 2012, 2014 Peter Johansson
6
7  This file is part of the yat library, http://dev.thep.lu.se/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 3 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with yat. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include <config.h>
24
25#include "Suite.h"
26
27#include "yat/normalizer/Centralizer.h"
28#include "yat/normalizer/ColumnNormalizer.h"
29#include "yat/normalizer/Gauss.h"
30#include "yat/normalizer/qQuantileNormalizer.h"
31#include "yat/normalizer/QuantileNormalizer.h"
32#include "yat/normalizer/RowNormalizer.h"
33#include "yat/normalizer/Spearman.h"
34#include "yat/normalizer/Zscore.h"
35
36#include "yat/utility/DataIterator.h"
37#include "yat/utility/FileUtil.h"
38#include "yat/utility/Matrix.h"
39#include "yat/utility/MatrixWeighted.h"
40#include "yat/utility/WeightIterator.h"
41
42#include <boost/concept_archetype.hpp>
43#include <boost/iterator/iterator_archetypes.hpp>
44
45#include <climits>
46#include <fstream>
47#include <iterator>
48#include <limits>
49#include <vector>
50
51using namespace theplu::yat;
52void test_centralizer(test::Suite&);
53void test_column_normalize(test::Suite&);
54void test_gauss_normalize(test::Suite&);
55void test_qquantile_normalize(test::Suite&);
56void test_qquantile_normalize_weighted(test::Suite&);
57void test_quantile_normalize(test::Suite&);
58void test_row_normalize(test::Suite&);
59void test_spearman(test::Suite&);
60void test_spearman_weighted(test::Suite&);
61void test_z_score(test::Suite&);
62
63int main(int argc, char* argv[])
64{
65  test::Suite suite(argc, argv);
66  suite.err() << "testing normalizations ... " << std::endl;
67
68  test_centralizer(suite);
69  test_column_normalize(suite);
70  test_qquantile_normalize(suite);
71  test_qquantile_normalize_weighted(suite);
72  test_quantile_normalize(suite);
73  test_gauss_normalize(suite);
74  test_row_normalize(suite);
75  test_spearman(suite);
76  test_z_score(suite);
77
78  return suite.return_value();
79}
80
81
82void test_centralizer(test::Suite& suite)
83{
84  suite.err() << "Testing Centralizer\n";
85  std::vector<double> vec;
86  vec.push_back(1);
87  vec.push_back(2);
88  vec.push_back(3);
89  normalizer::Centralizer<> c;
90  c(vec.begin(), vec.end(), vec.begin());
91  for (size_t i=0; i<vec.size(); ++i)
92    suite.add(suite.equal(vec[i], static_cast<double>(i)-1.0));
93
94  std::vector<utility::DataWeight> vec2;
95  vec2.push_back(utility::DataWeight(1,1));
96  vec2.push_back(utility::DataWeight(2,0.5));
97  vec2.push_back(utility::DataWeight(2,0.5));
98  std::vector<utility::DataWeight> vec3(vec2.size());
99  c(vec2.begin(), vec2.end(), vec3.begin());
100  for (size_t i=0; i<vec2.size(); ++i)
101    suite.add(suite.equal(vec3[i].weight(), vec2[i].weight()));
102  suite.add(suite.equal(vec3[0].data(), -0.5));
103  suite.add(suite.equal(vec3[1].data(), 0.5));
104  suite.add(suite.equal(vec3[2].data(), 0.5));
105
106  // compile test should not be run
107  if (false) {
108
109    using boost::detail::dummy_constructor;
110    c(boost::forward_iterator_archetype<double>(),
111      boost::forward_iterator_archetype<double>(),
112      boost::mutable_forward_iterator_archetype<double>());
113
114    c(boost::forward_iterator_archetype<double>(),
115      boost::forward_iterator_archetype<double>(),
116      boost::mutable_forward_iterator_archetype<utility::DataWeight>());
117
118    c(boost::forward_iterator_archetype<utility::DataWeight>(),
119      boost::forward_iterator_archetype<utility::DataWeight>(),
120      boost::mutable_forward_iterator_archetype<double>());
121
122    c(boost::forward_iterator_archetype<utility::DataWeight>(),
123      boost::forward_iterator_archetype<utility::DataWeight>(),
124      boost::mutable_forward_iterator_archetype<utility::DataWeight>());
125  }
126}
127
128
129void test_column_normalize(test::Suite& suite)
130{
131  using namespace normalizer;
132  suite.err() << "Testing ColumnNormalizer\n";
133
134  utility::Matrix m(2,2);
135  m(0,0) = 0;
136  m(0,1) = 10;
137  m(1,0) = 2;
138  m(1,1) = 4;
139  ColumnNormalizer<Centralizer<> > qn;
140  qn(m, m);
141  suite.err() << "Testing m(0,0)\n";
142  suite.add(suite.equal(m(0,0), -1));
143  suite.err() << "Testing m(0,1)\n";
144  suite.add(suite.equal(m(0,1), 3));
145  suite.err() << "Testing m(1,0)\n";
146  suite.add(suite.equal(m(1,0), 1));
147  suite.err() << "Testing m(1,1)\n";
148  suite.add(suite.equal(m(1,1), -3));
149
150  if (false) { // do not run compile tests
151    test::container2d_archetype<double> container2d; 
152    test::mutable_container2d_archetype<double> mutable_container2d; 
153    qn(container2d, mutable_container2d);
154  }
155}
156
157
158void test_qquantile_normalize(test::Suite& suite)
159{
160  using namespace normalizer;
161
162  suite.err() << "Testing qQuantileNormalizer\n";
163  std::string data(test::filename("data/normalization_test.data"));
164  if (utility::FileUtil(data.c_str()).permissions("r")) {
165    suite.add(false);
166    suite.err() << "Cannot access file " << data << '\n';
167    return;
168  }
169  std::ifstream data_stream(data.c_str());
170
171  utility::Matrix m(data_stream);
172
173  suite.err() << "testing number of parts (Q) boundary conditions\n";
174  qQuantileNormalizer(m.begin_column(0), m.end_column(0), m.rows());
175  qQuantileNormalizer(m.begin_column(0), m.end_column(0), 3);
176
177  // first column as target
178  qQuantileNormalizer qqn(m.begin_column(0), m.end_column(0) ,9); 
179  ColumnNormalizer<qQuantileNormalizer> cn(qqn);
180  utility::Matrix result(m.rows(),m.columns());
181  cn(m, result);
182
183  suite.err() << "test that result can be stored in the source matrix...";
184  cn(m,m);
185  if (suite.add(result==m))
186    suite.err() << " ok.\n";
187  else 
188    suite.err() << " failed.\n";
189
190  // Enough iteration will make all columns to have the same values as
191  // the target.
192  suite.err() << "Testing that q=matrix rows gives QuantileNormalization\n";
193  utility::Matrix m2(4,2);
194  m2(0,0) = 0; m2(0,1) = 10;
195  m2(1,0) = 2; m2(1,1) = 4;
196  m2(2,0) = 1; m2(2,1) = 0;
197  m2(3,0) = 3; m2(3,1) = 7;
198  qQuantileNormalizer qqn2(m2.begin_column(0), m2.end_column(0), m2.rows());
199  ColumnNormalizer<qQuantileNormalizer> cn2(qqn2);
200  utility::Matrix result2(m2.rows(),m2.columns());
201  cn2(m2,result2);
202  suite.add( suite.equal_fix(m2(0,0),result2(2,1),1.0e-12) &&
203             suite.equal_fix(m2(1,0),result2(3,1),1.0e-12) &&
204             suite.equal_fix(m2(2,0),result2(1,1),1.0e-12) &&
205             suite.equal_fix(m2(3,0),result2(0,1),1.0e-12) );
206  // compile test should not be run
207  if (false) {
208    qQuantileNormalizer qqn3(boost::forward_iterator_archetype<double>(),
209                             boost::forward_iterator_archetype<double>(),
210                             100);
211    qqn3(boost::random_access_iterator_archetype<double>(), 
212         boost::random_access_iterator_archetype<double>(),
213         boost::mutable_random_access_iterator_archetype<double>());
214    using utility::DataWeight;
215    qQuantileNormalizer qqn4(boost::forward_iterator_archetype<DataWeight>(),
216                             boost::forward_iterator_archetype<DataWeight>(),
217                             100);
218    qqn4(boost::random_access_iterator_archetype<DataWeight>(), 
219         boost::random_access_iterator_archetype<DataWeight>(),
220         boost::mutable_random_access_iterator_archetype<DataWeight>());
221    qqn4(boost::random_access_iterator_archetype<DataWeight>(), 
222         boost::random_access_iterator_archetype<DataWeight>(),
223         boost::mutable_random_access_iterator_archetype<double>());
224  }
225}
226
227
228void test_qquantile_normalize_weighted(test::Suite& suite)
229{
230  using namespace normalizer;
231
232  suite.err() << "Testing qQuantileNormalizer weighted\n";
233
234  // test with unweighted target and source
235  std::vector<double> target;
236  target.reserve(1000);
237  while (target.size()<1000)
238    target.push_back(target.size());
239  qQuantileNormalizer qQN(target.begin(), target.end(), 4);
240  std::vector<double> source;
241  while (source.size()<10)
242    source.push_back(source.size()*10);
243  std::vector<double> result(source.size());
244 
245  qQN(source.begin(), source.end(), result.begin());
246 
247  using utility::DataWeight;
248  suite.err() << "Testing with unweighted target and weighted source\n";
249  std::vector<utility::DataWeight> src_w(source.size(), DataWeight(0, 1));
250  std::copy(source.begin(), source.end(),
251            utility::data_iterator(src_w.begin()));
252
253  std::vector<utility::DataWeight> result_w(src_w.size());
254  qQN(src_w.begin(), src_w.end(), result_w.begin());
255  suite.add(suite.equal_range(result.begin(), result.end(),
256                              utility::data_iterator(result_w.begin())));
257
258  suite.err() << "Testing with missing value in source\n";
259  // adding a missing value
260  std::vector<utility::DataWeight>::iterator MWi=src_w.begin();
261  MWi+=5;
262  src_w.insert(MWi, DataWeight(std::numeric_limits<double>::quiet_NaN(), 0.0));
263  std::vector<utility::DataWeight> result_w2(src_w.size());
264  qQN(src_w.begin(), src_w.end(), result_w2.begin());
265  // excluding missing value from comparison in suite.equal_range
266  MWi=result_w2.begin();
267  MWi+=5;
268  result_w2.erase(MWi);
269  suite.add(suite.equal_range(utility::data_iterator(result_w.begin()), 
270                              utility::data_iterator(result_w.end()),
271                              utility::data_iterator(result_w2.begin())));
272
273  suite.err() << "testing with weighted target" << std::endl;
274  std::vector<utility::DataWeight> target_w(target.size()+1, DataWeight(0, 1));
275  target_w[0] = DataWeight(5.3, 0);
276  std::copy(target.begin(), target.end(),
277            utility::data_iterator(target_w.begin()+1));
278  qQuantileNormalizer qQNw(target_w.begin(), target_w.end(), 4);
279  std::vector<utility::DataWeight> result_w3(src_w.size());
280  qQNw(src_w.begin(), src_w.end(), result_w3.begin());
281  // excluding missing value from comparison in suite.equal_range
282  MWi=result_w3.begin();
283  MWi+=5;
284  result_w3.erase(MWi);
285  suite.add(suite.equal_range(utility::data_iterator(result_w3.begin()), 
286                              utility::data_iterator(result_w3.end()),
287                              utility::data_iterator(result_w2.begin())));
288 
289}
290
291
292void test_quantile_normalize(test::Suite& suite)
293{
294  suite.err() << "Testing QuantileNormalizer\n";
295  utility::Matrix m(2,2);
296  m(0,0) = 0;
297  m(0,1) = 10;
298  m(1,0) = 2;
299  m(1,1) = 4;
300  normalizer::QuantileNormalizer qn;
301  qn(m, m);
302  suite.err() << "Testing m(0,0)\n";
303  suite.add(suite.equal(m(0,0), 2));
304  suite.err() << "Testing m(0,1)\n";
305  suite.add(suite.equal(m(0,1), 6));
306  suite.err() << "Testing m(1,0)\n";
307  suite.add(suite.equal(m(1,0), 6));
308  suite.err() << "Testing m(1,1)\n";
309  suite.add(suite.equal(m(1,1), 2));
310
311  // testing with landscape-shaped data matrix
312  m.resize(2, 10);
313  qn(m, m);
314}
315
316void test_row_normalize(test::Suite& suite)
317{
318  using namespace normalizer;
319  suite.err() << "Testing RowNormalizer\n";
320 
321  utility::Matrix m(2,3);
322  m(0,0) = 0;
323  m(0,1) = 10;
324  m(1,0) = 2;
325  m(1,1) = 4;
326  utility::Matrix m2(m);
327  m2.transpose();
328  ColumnNormalizer<Centralizer<> > cn;
329  RowNormalizer<Centralizer<> > rn;
330  cn(m, m);
331  rn(m2, m2);
332  m2.transpose();
333  suite.equal_range(m.begin(), m.end(), m2.begin());
334  if (false) { // do not run compile tests
335    test::container2d_archetype<double> container2d; 
336    test::mutable_container2d_archetype<double> mutable_container2d; 
337    rn(container2d, mutable_container2d);
338  }
339}
340
341void test_spearman(test::Suite& suite)
342{
343  suite.err() << "Testing Spearman\n";
344  normalizer::Spearman spearman;
345  std::vector<double> vec;
346  vec.push_back(0);
347  vec.push_back(2);
348  vec.push_back(3);
349  vec.push_back(1);
350  spearman(vec.begin(), vec.end(), vec.begin());
351  std::vector<double> correct;
352  correct.push_back(1.0/8);
353  correct.push_back(5.0/8);
354  correct.push_back(7.0/8);
355  correct.push_back(3.0/8);
356  suite.add(suite.equal_range(vec.begin(), vec.end(), correct.begin()));
357  suite.err() << "Testing Spearman with ties\n";
358  vec[1]=vec[2];
359  correct[1] = correct[2] = (correct[1]+correct[2])/2;
360  spearman(vec.begin(), vec.end(), vec.begin());
361  suite.add(suite.equal_range(vec.begin(), vec.end(), correct.begin()));
362  test_spearman_weighted(suite);
363}
364
365
366void test_gauss_normalize(test::Suite& suite)
367{
368  suite.err() << "Testing Gauss\n";
369  normalizer::Gauss gauss;
370  std::vector<double> vec;
371  vec.push_back(1);
372  gauss(vec.begin(), vec.end(), vec.begin());
373  suite.add(suite.equal(vec.front(), 0));
374  vec.push_back(1);
375  gauss(vec.begin(), vec.end(), vec.begin());
376  suite.add(suite.equal(vec.front(), -vec.back()));
377  // compile test should not be run
378  if (false) {
379    using boost::iterator_archetype;
380    iterator_archetype<double,
381                       boost::iterator_archetypes::readable_iterator_t,
382                       boost::random_access_traversal_tag>
383      readable_iterator;
384
385    iterator_archetype<double,
386                       boost::iterator_archetypes::readable_writable_iterator_t,
387                       boost::random_access_traversal_tag>
388      writable_iterator;
389
390    // We have to use lvalue here because otherwise proxy classes
391    // provided by boost kick in and they do not provide the needed
392    // data() and weight() functions that e.g. DataWeightProxy does.
393    iterator_archetype<utility::DataWeight,
394                       boost::iterator_archetypes::readable_lvalue_iterator_t,
395                       boost::random_access_traversal_tag>
396      weighted_readable_iterator;
397
398    iterator_archetype<utility::DataWeight,
399                       boost::iterator_archetypes::writable_lvalue_iterator_t,
400                       boost::random_access_traversal_tag>
401      weighted_writable_iterator;
402
403    gauss(readable_iterator, readable_iterator, writable_iterator);
404    gauss(readable_iterator, readable_iterator, weighted_writable_iterator);
405    gauss(weighted_readable_iterator, weighted_readable_iterator,
406          writable_iterator);
407    gauss(weighted_readable_iterator, weighted_readable_iterator,
408          weighted_writable_iterator);
409  }
410}
411
412void test_spearman_weighted(test::Suite& suite)
413{
414  suite.err() << "Testing Weighted Spearman\n";
415  normalizer::Spearman spearman;
416
417  suite.err() << "Testing that unity weights reproduces unweighted case\n";
418  utility::MatrixWeighted m(1,4,0,1);
419  utility::MatrixWeighted res(m.rows(), m.columns(),3.14,0);
420  m(0,0).data()=0;
421  m(0,1).data()=2;
422  m(0,2).data()=3;
423  m(0,3).data()=1;
424  std::vector<double> correct(m.columns());
425  std::vector<double> correct_w(m.columns(), 1.0);
426  std::copy(utility::data_iterator(m.begin_row(0)),
427            utility::data_iterator(m.end_row(0)),
428            correct.begin());
429  spearman(correct.begin(), correct.end(), correct.begin());
430  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
431
432  using utility::data_iterator;
433  suite.add(suite.equal_range(data_iterator(res.begin_row(0)),
434                               data_iterator(res.end_row(0)),
435                               correct.begin()));
436  using utility::weight_iterator;
437  suite.add(suite.equal_range(weight_iterator(res.begin_row(0)),
438                               weight_iterator(res.end_row(0)),
439                               correct_w.begin()));
440
441  suite.err() << "Testing rescaling of weights\n";
442  for (size_t i=0; i<m.columns(); ++i) {
443    m(0,i).weight() *= 2;
444    correct_w[i] *= 2;
445  }   
446  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
447  suite.add(suite.equal_range(data_iterator(res.begin_row(0)),
448                               data_iterator(res.end_row(0)),
449                               correct.begin()));
450  suite.add(suite.equal_range(weight_iterator(res.begin_row(0)),
451                               weight_iterator(res.end_row(0)),
452                               correct_w.begin()));
453
454 
455  suite.err() << "Testing case with a zero weight\n";
456  m(0,1).data() = std::numeric_limits<double>::quiet_NaN();
457  m(0,1).weight() = 0.0;
458  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
459  suite.add(suite.equal(res(0,0).data(), 0.5/3)); 
460  suite.add(suite.equal(res(0,2).data(), 2.5/3)); 
461  suite.add(suite.equal(res(0,3).data(), 1.5/3)); 
462
463  suite.err() << "Testing case with ties\n";
464  m(0,0).data() = m(0,2).data();
465  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
466  suite.add(suite.equal(res(0,0).data(), 2.0/3)); 
467  suite.add(suite.equal(res(0,2).data(), 2.0/3)); 
468  suite.add(suite.equal(res(0,3).data(), 0.5/3)); 
469  // compile test should not be run
470  if (false) {
471    using boost::iterator_archetype;
472    iterator_archetype<double,
473                       boost::iterator_archetypes::readable_iterator_t,
474                       boost::random_access_traversal_tag>
475      readable_iterator;
476
477    iterator_archetype<double,
478                       boost::iterator_archetypes::readable_writable_iterator_t,
479                       boost::random_access_traversal_tag>
480      writable_iterator;
481
482    // We have to use lvalue here because otherwise proxy classes
483    // provided by boost kick in and they do not provide the needed
484    // data() and weight() functions that e.g. DataWeightProxy does.
485    iterator_archetype<utility::DataWeight,
486                       boost::iterator_archetypes::readable_lvalue_iterator_t,
487                       boost::random_access_traversal_tag>
488      weighted_readable_iterator;
489
490    iterator_archetype<utility::DataWeight,
491                       boost::iterator_archetypes::writable_lvalue_iterator_t,
492                       boost::random_access_traversal_tag>
493      weighted_writable_iterator;
494
495    spearman(readable_iterator, readable_iterator, writable_iterator);
496    spearman(readable_iterator, readable_iterator, weighted_writable_iterator);
497    spearman(weighted_readable_iterator, weighted_readable_iterator,
498             writable_iterator);
499    spearman(weighted_readable_iterator, weighted_readable_iterator,
500             weighted_writable_iterator);
501  }
502
503}
504
505void test_z_score(test::Suite& suite)
506{
507  suite.err() << "Testing Zscore\n";
508  std::vector<double> vec;
509  vec.push_back(0);
510  vec.push_back(3.14);
511  normalizer::Zscore zscore;
512  zscore(vec.begin(), vec.end(), vec.begin());
513  for (size_t i=0; i<vec.size(); ++i)
514    suite.add(suite.equal(vec[i], 2.0*i-1.0));
515
516  std::vector<utility::DataWeight> vec2;
517  vec2.push_back(utility::DataWeight(1,1));
518  vec2.push_back(utility::DataWeight(2.13,0.5));
519  vec2.push_back(utility::DataWeight(2.13,0.5));
520  std::vector<utility::DataWeight> vec3(vec2.size());
521  zscore(vec2.begin(), vec2.end(), vec3.begin());
522  for (size_t i=0; i<vec2.size(); ++i)
523    suite.add(suite.equal(vec3[i].weight(), vec2[i].weight()));
524  suite.add(suite.equal(vec3[0].data(), -1.0));
525  suite.add(suite.equal(vec3[1].data(), 1.0));
526  suite.add(suite.equal(vec3[2].data(), 1.0));
527  // compile test should not be run
528  if (false) {
529    boost::detail::dummy_constructor dummy_cons;
530    zscore(boost::forward_iterator_archetype<double>(), 
531           boost::forward_iterator_archetype<double>(),
532           boost::output_iterator_archetype<double>(dummy_cons));
533    using utility::DataWeight;
534    zscore(boost::forward_iterator_archetype<DataWeight>(), 
535           boost::forward_iterator_archetype<DataWeight>(),
536           boost::mutable_forward_iterator_archetype<DataWeight>());
537    zscore(boost::random_access_iterator_archetype<DataWeight>(), 
538           boost::random_access_iterator_archetype<DataWeight>(),
539           boost::mutable_random_access_iterator_archetype<double>());
540    zscore(boost::random_access_iterator_archetype<double>(), 
541           boost::random_access_iterator_archetype<double>(),
542           boost::mutable_random_access_iterator_archetype<DataWeight>());
543  }
544}
Note: See TracBrowser for help on using the repository browser.