source: branches/0.12-stable/test/normalization.cc @ 3321

Last change on this file since 3321 was 3321, checked in by Peter, 9 years ago

fixes bug #815

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 17.3 KB
Line 
1// $Id: normalization.cc 3321 2014-09-19 06:59:59Z peter $
2
3/*
4  Copyright (C) 2008, 2009 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2010, 2012, 2014 Peter Johansson
6
7  This file is part of the yat library, http://dev.thep.lu.se/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 3 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with yat. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include <config.h>
24
25#include "Suite.h"
26
27#include "yat/normalizer/Centralizer.h"
28#include "yat/normalizer/ColumnNormalizer.h"
29#include "yat/normalizer/Gauss.h"
30#include "yat/normalizer/qQuantileNormalizer.h"
31#include "yat/normalizer/QuantileNormalizer.h"
32#include "yat/normalizer/RowNormalizer.h"
33#include "yat/normalizer/Spearman.h"
34#include "yat/normalizer/Zscore.h"
35
36#include "yat/utility/DataIterator.h"
37#include "yat/utility/FileUtil.h"
38#include "yat/utility/Matrix.h"
39#include "yat/utility/MatrixWeighted.h"
40#include "yat/utility/WeightIterator.h"
41
42#include <boost/concept_archetype.hpp>
43
44#include <climits>
45#include <fstream>
46#include <limits>
47#include <vector>
48
49using namespace theplu::yat;
50void test_centralizer(test::Suite&);
51void test_column_normalize(test::Suite&);
52void test_gauss_normalize(test::Suite&);
53void test_qquantile_normalize(test::Suite&);
54void test_qquantile_normalize_weighted(test::Suite&);
55void test_quantile_normalize(test::Suite&);
56void test_row_normalize(test::Suite&);
57void test_spearman(test::Suite&);
58void test_spearman_weighted(test::Suite&);
59void test_z_score(test::Suite&);
60
61int main(int argc, char* argv[])
62{ 
63  test::Suite suite(argc, argv);
64  suite.err() << "testing normalizations ... " << std::endl;
65
66  test_centralizer(suite);
67  test_column_normalize(suite);
68  test_qquantile_normalize(suite);
69  test_qquantile_normalize_weighted(suite);
70  test_quantile_normalize(suite);
71  test_gauss_normalize(suite);
72  test_row_normalize(suite);
73  test_spearman(suite);
74  test_z_score(suite);
75
76  return suite.return_value();
77}
78
79
80void test_centralizer(test::Suite& suite)
81{
82  suite.err() << "Testing Centralizer\n";
83  std::vector<double> vec;
84  vec.push_back(1);
85  vec.push_back(2);
86  vec.push_back(3);
87  normalizer::Centralizer<> c;
88  c(vec.begin(), vec.end(), vec.begin());
89  for (size_t i=0; i<vec.size(); ++i)
90    suite.add(suite.equal(vec[i], static_cast<double>(i)-1.0));
91
92  std::vector<utility::DataWeight> vec2;
93  vec2.push_back(utility::DataWeight(1,1));
94  vec2.push_back(utility::DataWeight(2,0.5));
95  vec2.push_back(utility::DataWeight(2,0.5));
96  std::vector<utility::DataWeight> vec3(vec2.size());
97  c(vec2.begin(), vec2.end(), vec3.begin());
98  for (size_t i=0; i<vec2.size(); ++i)
99    suite.add(suite.equal(vec3[i].weight(), vec2[i].weight()));
100  suite.add(suite.equal(vec3[0].data(), -0.5));
101  suite.add(suite.equal(vec3[1].data(), 0.5));
102  suite.add(suite.equal(vec3[2].data(), 0.5));
103
104  // compile test should not be run
105  if (false) {
106    c(boost::input_iterator_archetype<double>(), 
107      boost::input_iterator_archetype<double>(),
108      boost::mutable_forward_iterator_archetype<double>());
109
110    c(boost::input_iterator_archetype<double>(), 
111      boost::input_iterator_archetype<double>(),
112      boost::mutable_forward_iterator_archetype<utility::DataWeight>());
113
114    c(boost::input_iterator_archetype_no_proxy<utility::DataWeight>(), 
115      boost::input_iterator_archetype_no_proxy<utility::DataWeight>(),
116      boost::mutable_forward_iterator_archetype<double>());
117
118    c(boost::input_iterator_archetype_no_proxy<utility::DataWeight>(), 
119      boost::input_iterator_archetype_no_proxy<utility::DataWeight>(),
120      boost::mutable_forward_iterator_archetype<utility::DataWeight>());
121  }
122}
123
124
125void test_column_normalize(test::Suite& suite)
126{
127  using namespace normalizer;
128  suite.err() << "Testing ColumnNormalizer\n";
129 
130  utility::Matrix m(2,2);
131  m(0,0) = 0;
132  m(0,1) = 10;
133  m(1,0) = 2;
134  m(1,1) = 4;
135  ColumnNormalizer<Centralizer<> > qn;
136  qn(m, m);
137  suite.err() << "Testing m(0,0)\n";
138  suite.add(suite.equal(m(0,0), -1));
139  suite.err() << "Testing m(0,1)\n";
140  suite.add(suite.equal(m(0,1), 3));
141  suite.err() << "Testing m(1,0)\n";
142  suite.add(suite.equal(m(1,0), 1));
143  suite.err() << "Testing m(1,1)\n";
144  suite.add(suite.equal(m(1,1), -3));
145
146  if (false) { // do not run compile tests
147    test::container2d_archetype<double> container2d; 
148    test::mutable_container2d_archetype<double> mutable_container2d; 
149    qn(container2d, mutable_container2d);
150  }
151}
152
153
154void test_qquantile_normalize(test::Suite& suite)
155{
156  using namespace normalizer;
157
158  suite.err() << "Testing qQuantileNormalizer\n";
159  std::string data(test::filename("data/normalization_test.data"));
160  if (utility::FileUtil(data.c_str()).permissions("r")) {
161    suite.add(false);
162    suite.err() << "Cannot access file " << data << '\n';
163    return;
164  }
165  std::ifstream data_stream(data.c_str());
166
167  utility::Matrix m(data_stream);
168
169  suite.err() << "testing number of parts (Q) boundary conditions\n";
170  qQuantileNormalizer(m.begin_column(0), m.end_column(0), m.rows());
171  qQuantileNormalizer(m.begin_column(0), m.end_column(0), 3);
172
173  // first column as target
174  qQuantileNormalizer qqn(m.begin_column(0), m.end_column(0) ,9); 
175  ColumnNormalizer<qQuantileNormalizer> cn(qqn);
176  utility::Matrix result(m.rows(),m.columns());
177  cn(m, result);
178
179  suite.err() << "test that result can be stored in the source matrix...";
180  cn(m,m);
181  if (suite.add(result==m))
182    suite.err() << " ok.\n";
183  else 
184    suite.err() << " failed.\n";
185
186  // Enough iteration will make all columns to have the same values as
187  // the target.
188  suite.err() << "Testing that q=matrix rows gives QuantileNormalization\n";
189  utility::Matrix m2(4,2);
190  m2(0,0) = 0; m2(0,1) = 10;
191  m2(1,0) = 2; m2(1,1) = 4;
192  m2(2,0) = 1; m2(2,1) = 0;
193  m2(3,0) = 3; m2(3,1) = 7;
194  qQuantileNormalizer qqn2(m2.begin_column(0), m2.end_column(0), m2.rows());
195  ColumnNormalizer<qQuantileNormalizer> cn2(qqn2);
196  utility::Matrix result2(m2.rows(),m2.columns());
197  cn2(m2,result2);
198  suite.add( suite.equal_fix(m2(0,0),result2(2,1),1.0e-12) &&
199             suite.equal_fix(m2(1,0),result2(3,1),1.0e-12) &&
200             suite.equal_fix(m2(2,0),result2(1,1),1.0e-12) &&
201             suite.equal_fix(m2(3,0),result2(0,1),1.0e-12) );
202  // compile test should not be run
203  if (false) {
204    qQuantileNormalizer qqn3(boost::forward_iterator_archetype<double>(),
205                             boost::forward_iterator_archetype<double>(),
206                             100);
207    qqn3(boost::random_access_iterator_archetype<double>(), 
208         boost::random_access_iterator_archetype<double>(),
209         boost::mutable_random_access_iterator_archetype<double>());
210    using utility::DataWeight;
211    qQuantileNormalizer qqn4(boost::forward_iterator_archetype<DataWeight>(),
212                             boost::forward_iterator_archetype<DataWeight>(),
213                             100);
214    qqn4(boost::random_access_iterator_archetype<DataWeight>(), 
215         boost::random_access_iterator_archetype<DataWeight>(),
216         boost::mutable_random_access_iterator_archetype<DataWeight>());
217    qqn4(boost::random_access_iterator_archetype<DataWeight>(), 
218         boost::random_access_iterator_archetype<DataWeight>(),
219         boost::mutable_random_access_iterator_archetype<double>());
220  }
221}
222
223
224void test_qquantile_normalize_weighted(test::Suite& suite)
225{
226  using namespace normalizer;
227
228  suite.err() << "Testing qQuantileNormalizer weighted\n";
229
230  // test with unweighted target and source
231  std::vector<double> target;
232  target.reserve(1000);
233  while (target.size()<1000)
234    target.push_back(target.size());
235  qQuantileNormalizer qQN(target.begin(), target.end(), 4);
236  std::vector<double> source;
237  while (source.size()<10)
238    source.push_back(source.size()*10);
239  std::vector<double> result(source.size());
240 
241  qQN(source.begin(), source.end(), result.begin());
242 
243  using utility::DataWeight;
244  suite.err() << "Testing with unweighted target and weighted source\n";
245  std::vector<utility::DataWeight> src_w(source.size(), DataWeight(0, 1));
246  std::copy(source.begin(), source.end(),
247            utility::data_iterator(src_w.begin()));
248
249  std::vector<utility::DataWeight> result_w(src_w.size());
250  qQN(src_w.begin(), src_w.end(), result_w.begin());
251  suite.add(suite.equal_range(result.begin(), result.end(),
252                              utility::data_iterator(result_w.begin())));
253
254  suite.err() << "Testing with missing value in source\n";
255  // adding a missing value
256  std::vector<utility::DataWeight>::iterator MWi=src_w.begin();
257  MWi+=5;
258  src_w.insert(MWi, DataWeight(std::numeric_limits<double>::quiet_NaN(), 0.0));
259  std::vector<utility::DataWeight> result_w2(src_w.size());
260  qQN(src_w.begin(), src_w.end(), result_w2.begin());
261  // excluding missing value from comparison in suite.equal_range
262  MWi=result_w2.begin();
263  MWi+=5;
264  result_w2.erase(MWi);
265  suite.add(suite.equal_range(utility::data_iterator(result_w.begin()), 
266                              utility::data_iterator(result_w.end()),
267                              utility::data_iterator(result_w2.begin())));
268
269  suite.err() << "testing with weighted target" << std::endl;
270  std::vector<utility::DataWeight> target_w(target.size()+1, DataWeight(0, 1));
271  target_w[0] = DataWeight(5.3, 0);
272  std::copy(target.begin(), target.end(),
273            utility::data_iterator(target_w.begin()+1));
274  qQuantileNormalizer qQNw(target_w.begin(), target_w.end(), 4);
275  std::vector<utility::DataWeight> result_w3(src_w.size());
276  qQNw(src_w.begin(), src_w.end(), result_w3.begin());
277  // excluding missing value from comparison in suite.equal_range
278  MWi=result_w3.begin();
279  MWi+=5;
280  result_w3.erase(MWi);
281  suite.add(suite.equal_range(utility::data_iterator(result_w3.begin()), 
282                              utility::data_iterator(result_w3.end()),
283                              utility::data_iterator(result_w2.begin())));
284 
285}
286
287
288void test_quantile_normalize(test::Suite& suite)
289{
290  suite.err() << "Testing QuantileNormalizer\n";
291  utility::Matrix m(2,2);
292  m(0,0) = 0;
293  m(0,1) = 10;
294  m(1,0) = 2;
295  m(1,1) = 4;
296  normalizer::QuantileNormalizer qn;
297  qn(m, m);
298  suite.err() << "Testing m(0,0)\n";
299  suite.add(suite.equal(m(0,0), 2));
300  suite.err() << "Testing m(0,1)\n";
301  suite.add(suite.equal(m(0,1), 6));
302  suite.err() << "Testing m(1,0)\n";
303  suite.add(suite.equal(m(1,0), 6));
304  suite.err() << "Testing m(1,1)\n";
305  suite.add(suite.equal(m(1,1), 2));
306
307  // testing with landscape-shaped data matrix
308  m.resize(2, 10);
309  qn(m, m);
310}
311
312void test_row_normalize(test::Suite& suite)
313{
314  using namespace normalizer;
315  suite.err() << "Testing RowNormalizer\n";
316 
317  utility::Matrix m(2,3);
318  m(0,0) = 0;
319  m(0,1) = 10;
320  m(1,0) = 2;
321  m(1,1) = 4;
322  utility::Matrix m2(m);
323  m2.transpose();
324  ColumnNormalizer<Centralizer<> > cn;
325  RowNormalizer<Centralizer<> > rn;
326  cn(m, m);
327  rn(m2, m2);
328  m2.transpose();
329  suite.equal_range(m.begin(), m.end(), m2.begin());
330  if (false) { // do not run compile tests
331    test::container2d_archetype<double> container2d; 
332    test::mutable_container2d_archetype<double> mutable_container2d; 
333    rn(container2d, mutable_container2d);
334  }
335}
336
337void test_spearman(test::Suite& suite)
338{
339  suite.err() << "Testing Spearman\n";
340  normalizer::Spearman spearman;
341  std::vector<double> vec;
342  vec.push_back(0);
343  vec.push_back(2);
344  vec.push_back(3);
345  vec.push_back(1);
346  spearman(vec.begin(), vec.end(), vec.begin());
347  std::vector<double> correct;
348  correct.push_back(1.0/8);
349  correct.push_back(5.0/8);
350  correct.push_back(7.0/8);
351  correct.push_back(3.0/8);
352  suite.add(suite.equal_range(vec.begin(), vec.end(), correct.begin()));
353  suite.err() << "Testing Spearman with ties\n";
354  vec[1]=vec[2];
355  correct[1] = correct[2] = (correct[1]+correct[2])/2;
356  spearman(vec.begin(), vec.end(), vec.begin());
357  suite.add(suite.equal_range(vec.begin(), vec.end(), correct.begin()));
358  test_spearman_weighted(suite);
359}
360
361
362void test_gauss_normalize(test::Suite& suite)
363{
364  suite.err() << "Testing Gauss\n";
365  normalizer::Gauss gauss;
366  std::vector<double> vec;
367  vec.push_back(1);
368  gauss(vec.begin(), vec.end(), vec.begin());
369  suite.add(suite.equal(vec.front(), 0));
370  vec.push_back(1);
371  gauss(vec.begin(), vec.end(), vec.begin());
372  suite.add(suite.equal(vec.front(), -vec.back()));
373  // compile test should not be run
374  if (false) {
375    gauss(boost::random_access_iterator_archetype<double>(), 
376          boost::random_access_iterator_archetype<double>(),
377          boost::mutable_random_access_iterator_archetype<double>());
378    using utility::DataWeight;
379    gauss(boost::random_access_iterator_archetype<DataWeight>(), 
380          boost::random_access_iterator_archetype<DataWeight>(),
381          boost::mutable_random_access_iterator_archetype<DataWeight>());
382    gauss(boost::random_access_iterator_archetype<DataWeight>(), 
383          boost::random_access_iterator_archetype<DataWeight>(),
384          boost::mutable_random_access_iterator_archetype<double>());
385    gauss(boost::random_access_iterator_archetype<double>(), 
386          boost::random_access_iterator_archetype<double>(),
387          boost::mutable_random_access_iterator_archetype<DataWeight>());
388  }
389}
390
391void test_spearman_weighted(test::Suite& suite)
392{
393  suite.err() << "Testing Weighted Spearman\n";
394  normalizer::Spearman spearman;
395
396  suite.err() << "Testing that unity weights reproduces unweighted case\n";
397  utility::MatrixWeighted m(1,4,0,1);
398  utility::MatrixWeighted res(m.rows(), m.columns(),3.14,0);
399  m(0,0).data()=0;
400  m(0,1).data()=2;
401  m(0,2).data()=3;
402  m(0,3).data()=1;
403  std::vector<double> correct(m.columns());
404  std::vector<double> correct_w(m.columns(), 1.0);
405  std::copy(utility::data_iterator(m.begin_row(0)),
406            utility::data_iterator(m.end_row(0)),
407            correct.begin());
408  spearman(correct.begin(), correct.end(), correct.begin());
409  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
410
411  using utility::data_iterator;
412  suite.add(suite.equal_range(data_iterator(res.begin_row(0)),
413                               data_iterator(res.end_row(0)),
414                               correct.begin()));
415  using utility::weight_iterator;
416  suite.add(suite.equal_range(weight_iterator(res.begin_row(0)),
417                               weight_iterator(res.end_row(0)),
418                               correct_w.begin()));
419
420  suite.err() << "Testing rescaling of weights\n";
421  for (size_t i=0; i<m.columns(); ++i) {
422    m(0,i).weight() *= 2;
423    correct_w[i] *= 2;
424  }   
425  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
426  suite.add(suite.equal_range(data_iterator(res.begin_row(0)),
427                               data_iterator(res.end_row(0)),
428                               correct.begin()));
429  suite.add(suite.equal_range(weight_iterator(res.begin_row(0)),
430                               weight_iterator(res.end_row(0)),
431                               correct_w.begin()));
432
433 
434  suite.err() << "Testing case with a zero weight\n";
435  m(0,1).data() = std::numeric_limits<double>::quiet_NaN();
436  m(0,1).weight() = 0.0;
437  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
438  suite.add(suite.equal(res(0,0).data(), 0.5/3)); 
439  suite.add(suite.equal(res(0,2).data(), 2.5/3)); 
440  suite.add(suite.equal(res(0,3).data(), 1.5/3)); 
441
442  suite.err() << "Testing case with ties\n";
443  m(0,0).data() = m(0,2).data();
444  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
445  suite.add(suite.equal(res(0,0).data(), 2.0/3)); 
446  suite.add(suite.equal(res(0,2).data(), 2.0/3)); 
447  suite.add(suite.equal(res(0,3).data(), 0.5/3)); 
448  // compile test should not be run
449  if (false) {
450    spearman(boost::random_access_iterator_archetype<double>(), 
451             boost::random_access_iterator_archetype<double>(),
452             boost::mutable_random_access_iterator_archetype<double>());
453    using utility::DataWeight;
454    spearman(boost::random_access_iterator_archetype<DataWeight>(), 
455             boost::random_access_iterator_archetype<DataWeight>(),
456             boost::mutable_random_access_iterator_archetype<DataWeight>());
457    spearman(boost::random_access_iterator_archetype<double>(), 
458             boost::random_access_iterator_archetype<double>(),
459             boost::mutable_random_access_iterator_archetype<DataWeight>());
460    spearman(boost::random_access_iterator_archetype<DataWeight>(), 
461             boost::random_access_iterator_archetype<DataWeight>(),
462             boost::mutable_random_access_iterator_archetype<double>());
463  }
464
465}
466
467void test_z_score(test::Suite& suite)
468{
469  suite.err() << "Testing Zscore\n";
470  std::vector<double> vec;
471  vec.push_back(0);
472  vec.push_back(3.14);
473  normalizer::Zscore zscore;
474  zscore(vec.begin(), vec.end(), vec.begin());
475  for (size_t i=0; i<vec.size(); ++i)
476    suite.add(suite.equal(vec[i], 2.0*i-1.0));
477
478  std::vector<utility::DataWeight> vec2;
479  vec2.push_back(utility::DataWeight(1,1));
480  vec2.push_back(utility::DataWeight(2.13,0.5));
481  vec2.push_back(utility::DataWeight(2.13,0.5));
482  std::vector<utility::DataWeight> vec3(vec2.size());
483  zscore(vec2.begin(), vec2.end(), vec3.begin());
484  for (size_t i=0; i<vec2.size(); ++i)
485    suite.add(suite.equal(vec3[i].weight(), vec2[i].weight()));
486  suite.add(suite.equal(vec3[0].data(), -1.0));
487  suite.add(suite.equal(vec3[1].data(), 1.0));
488  suite.add(suite.equal(vec3[2].data(), 1.0));
489  // compile test should not be run
490  if (false) {
491    boost::detail::dummy_constructor dummy_cons;
492    zscore(boost::forward_iterator_archetype<double>(), 
493           boost::forward_iterator_archetype<double>(),
494           boost::output_iterator_archetype<double>(dummy_cons));
495    using utility::DataWeight;
496    zscore(boost::forward_iterator_archetype<DataWeight>(), 
497           boost::forward_iterator_archetype<DataWeight>(),
498           boost::mutable_forward_iterator_archetype<DataWeight>());
499    zscore(boost::random_access_iterator_archetype<DataWeight>(), 
500           boost::random_access_iterator_archetype<DataWeight>(),
501           boost::mutable_random_access_iterator_archetype<double>());
502    zscore(boost::random_access_iterator_archetype<double>(), 
503           boost::random_access_iterator_archetype<double>(),
504           boost::mutable_random_access_iterator_archetype<DataWeight>());
505  }
506}
Note: See TracBrowser for help on using the repository browser.