source: trunk/test/normalization_test.cc @ 2158

Last change on this file since 2158 was 2158, checked in by Peter, 13 years ago

qQuantileNormalizer now works with mixed iterators. closes #498

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 16.7 KB
Line 
1// $Id: normalization_test.cc 2158 2010-01-18 00:33:15Z peter $
2
3/*
4  Copyright (C) 2008, 2009 Jari Häkkinen, Peter Johansson
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with yat. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include "Suite.h"
23
24#include "yat/normalizer/Centralizer.h"
25#include "yat/normalizer/ColumnNormalizer.h"
26#include "yat/normalizer/Gauss.h"
27#include "yat/normalizer/qQuantileNormalizer.h"
28#include "yat/normalizer/QuantileNormalizer.h"
29#include "yat/normalizer/RowNormalizer.h"
30#include "yat/normalizer/Spearman.h"
31#include "yat/normalizer/Zscore.h"
32
33#include "yat/utility/DataIterator.h"
34#include "yat/utility/FileUtil.h"
35#include "yat/utility/Matrix.h"
36#include "yat/utility/MatrixWeighted.h"
37#include "yat/utility/WeightIterator.h"
38
39#include <boost/concept_archetype.hpp>
40
41#include <climits>
42#include <fstream>
43#include <limits>
44#include <vector>
45
46using namespace theplu::yat;
47void test_centralizer(test::Suite&);
48void test_column_normalize(test::Suite&);
49void test_gauss_normalize(test::Suite&);
50void test_qquantile_normalize(test::Suite&);
51void test_qquantile_normalize_weighted(test::Suite&);
52void test_quantile_normalize(test::Suite&);
53void test_row_normalize(test::Suite&);
54void test_spearman(test::Suite&);
55void test_spearman_weighted(test::Suite&);
56void test_z_score(test::Suite&);
57
58int main(int argc, char* argv[])
59{ 
60  test::Suite suite(argc, argv);
61  suite.err() << "testing normalizations ... " << std::endl;
62
63  test_centralizer(suite);
64  test_column_normalize(suite);
65  test_qquantile_normalize(suite);
66  test_qquantile_normalize_weighted(suite);
67  test_quantile_normalize(suite);
68  test_gauss_normalize(suite);
69  test_row_normalize(suite);
70  test_spearman(suite);
71  test_z_score(suite);
72
73  return suite.return_value();
74}
75
76
77void test_centralizer(test::Suite& suite)
78{
79  suite.err() << "Testing Centralizer\n";
80  std::vector<double> vec;
81  vec.push_back(1);
82  vec.push_back(2);
83  vec.push_back(3);
84  normalizer::Centralizer<> c;
85  c(vec.begin(), vec.end(), vec.begin());
86  for (size_t i=0; i<vec.size(); ++i)
87    suite.add(suite.equal(vec[i], static_cast<double>(i)-1.0));
88
89  std::vector<utility::DataWeight> vec2;
90  vec2.push_back(utility::DataWeight(1,1));
91  vec2.push_back(utility::DataWeight(2,0.5));
92  vec2.push_back(utility::DataWeight(2,0.5));
93  std::vector<utility::DataWeight> vec3(vec2.size());
94  c(vec2.begin(), vec2.end(), vec3.begin());
95  for (size_t i=0; i<vec2.size(); ++i)
96    suite.add(suite.equal(vec3[i].weight(), vec2[i].weight()));
97  suite.add(suite.equal(vec3[0].data(), -0.5));
98  suite.add(suite.equal(vec3[1].data(), 0.5));
99  suite.add(suite.equal(vec3[2].data(), 0.5));
100
101  // compile test should not be run
102  if (false) {
103    boost::detail::dummy_constructor dummy_cons;
104    c(boost::input_iterator_archetype<double>(), 
105      boost::input_iterator_archetype<double>(),
106      boost::output_iterator_archetype<double>(dummy_cons));
107  }
108}
109
110
111void test_column_normalize(test::Suite& suite)
112{
113  using namespace normalizer;
114  suite.err() << "Testing ColumnNormalizer\n";
115 
116  utility::Matrix m(2,2);
117  m(0,0) = 0;
118  m(0,1) = 10;
119  m(1,0) = 2;
120  m(1,1) = 4;
121  ColumnNormalizer<Centralizer<> > qn;
122  qn(m, m);
123  suite.err() << "Testing m(0,0)\n";
124  suite.add(suite.equal(m(0,0), -1));
125  suite.err() << "Testing m(0,1)\n";
126  suite.add(suite.equal(m(0,1), 3));
127  suite.err() << "Testing m(1,0)\n";
128  suite.add(suite.equal(m(1,0), 1));
129  suite.err() << "Testing m(1,1)\n";
130  suite.add(suite.equal(m(1,1), -3));
131
132  if (false) { // do not run compile tests
133    test::container2d_archetype<double> container2d; 
134    test::mutable_container2d_archetype<double> mutable_container2d; 
135    qn(container2d, mutable_container2d);
136  }
137}
138
139
140void test_qquantile_normalize(test::Suite& suite)
141{
142  using namespace normalizer;
143
144  suite.err() << "Testing qQuantileNormalizer\n";
145  std::string data(test::filename("data/normalization_test.data"));
146  if (utility::FileUtil(data.c_str()).permissions("r")) {
147    suite.add(false);
148    suite.err() << "Cannot access file " << data << '\n';
149    return;
150  }
151  std::ifstream data_stream(data.c_str());
152
153  utility::Matrix m(data_stream);
154
155  suite.err() << "testing number of parts (Q) boundary conditions\n";
156  qQuantileNormalizer(m.begin_column(0), m.end_column(0), m.rows());
157  qQuantileNormalizer(m.begin_column(0), m.end_column(0), 3);
158
159  // first column as target
160  qQuantileNormalizer qqn(m.begin_column(0), m.end_column(0) ,9); 
161  ColumnNormalizer<qQuantileNormalizer> cn(qqn);
162  utility::Matrix result(m.rows(),m.columns());
163  cn(m, result);
164
165  suite.err() << "test that result can be stored in the source matrix...";
166  cn(m,m);
167  if (suite.add(result==m))
168    suite.err() << " ok.\n";
169  else 
170    suite.err() << " failed.\n";
171
172  // Enough iteration will make all columns to have the same values as
173  // the target.
174  suite.err() << "Testing that q=matrix rows gives QuantileNormalization\n";
175  utility::Matrix m2(4,2);
176  m2(0,0) = 0; m2(0,1) = 10;
177  m2(1,0) = 2; m2(1,1) = 4;
178  m2(2,0) = 1; m2(2,1) = 0;
179  m2(3,0) = 3; m2(3,1) = 7;
180  qQuantileNormalizer qqn2(m2.begin_column(0), m2.end_column(0), m2.rows());
181  ColumnNormalizer<qQuantileNormalizer> cn2(qqn2);
182  utility::Matrix result2(m2.rows(),m2.columns());
183  cn2(m2,result2);
184  suite.add( suite.equal_fix(m2(0,0),result2(2,1),1.0e-12) &&
185             suite.equal_fix(m2(1,0),result2(3,1),1.0e-12) &&
186             suite.equal_fix(m2(2,0),result2(1,1),1.0e-12) &&
187             suite.equal_fix(m2(3,0),result2(0,1),1.0e-12) );
188  // compile test should not be run
189  if (false) {
190    qQuantileNormalizer qqn3(boost::forward_iterator_archetype<double>(),
191                             boost::forward_iterator_archetype<double>(),
192                             100);
193    qqn3(boost::random_access_iterator_archetype<double>(), 
194         boost::random_access_iterator_archetype<double>(),
195         boost::mutable_random_access_iterator_archetype<double>());
196    using utility::DataWeight;
197    qQuantileNormalizer qqn4(boost::forward_iterator_archetype<DataWeight>(),
198                             boost::forward_iterator_archetype<DataWeight>(),
199                             100);
200    qqn4(boost::random_access_iterator_archetype<DataWeight>(), 
201         boost::random_access_iterator_archetype<DataWeight>(),
202         boost::mutable_random_access_iterator_archetype<DataWeight>());
203    qqn4(boost::random_access_iterator_archetype<DataWeight>(), 
204         boost::random_access_iterator_archetype<DataWeight>(),
205         boost::mutable_random_access_iterator_archetype<double>());
206  }
207}
208
209
210void test_qquantile_normalize_weighted(test::Suite& suite)
211{
212  using namespace normalizer;
213
214  suite.err() << "Testing qQuantileNormalizer weighted\n";
215
216  // test with unweighted target and source
217  std::vector<double> target;
218  target.reserve(1000);
219  while (target.size()<1000)
220    target.push_back(target.size());
221  qQuantileNormalizer qQN(target.begin(), target.end(), 4);
222  std::vector<double> source;
223  while (source.size()<10)
224    source.push_back(source.size()*10);
225  std::vector<double> result(source.size());
226 
227  qQN(source.begin(), source.end(), result.begin());
228 
229  using utility::DataWeight;
230  suite.err() << "Testing with unweighted target and weighted source\n";
231  std::vector<utility::DataWeight> src_w(source.size(), DataWeight(0, 1));
232  std::copy(source.begin(), source.end(),
233            utility::data_iterator(src_w.begin()));
234
235  std::vector<utility::DataWeight> result_w(src_w.size());
236  qQN(src_w.begin(), src_w.end(), result_w.begin());
237  suite.add(suite.equal_range(result.begin(), result.end(),
238                              utility::data_iterator(result_w.begin())));
239
240  suite.err() << "Testing with missing value in source\n";
241  // adding a missing value
242  std::vector<utility::DataWeight>::iterator MWi=src_w.begin();
243  MWi+=5;
244  src_w.insert(MWi, DataWeight(std::numeric_limits<double>::quiet_NaN(), 0.0));
245  std::vector<utility::DataWeight> result_w2(src_w.size());
246  qQN(src_w.begin(), src_w.end(), result_w2.begin());
247  // excluding missing value from comparison in suite.equal_range
248  MWi=result_w2.begin();
249  MWi+=5;
250  result_w2.erase(MWi);
251  suite.add(suite.equal_range(utility::data_iterator(result_w.begin()), 
252                              utility::data_iterator(result_w.end()),
253                              utility::data_iterator(result_w2.begin())));
254
255  suite.err() << "testing with weighted target" << std::endl;
256  std::vector<utility::DataWeight> target_w(target.size()+1, DataWeight(0, 1));
257  target_w[0] = DataWeight(5.3, 0);
258  std::copy(target.begin(), target.end(),
259            utility::data_iterator(target_w.begin()+1));
260  qQuantileNormalizer qQNw(target_w.begin(), target_w.end(), 4);
261  std::vector<utility::DataWeight> result_w3(src_w.size());
262  qQNw(src_w.begin(), src_w.end(), result_w3.begin());
263  // excluding missing value from comparison in suite.equal_range
264  MWi=result_w3.begin();
265  MWi+=5;
266  result_w3.erase(MWi);
267  suite.add(suite.equal_range(utility::data_iterator(result_w3.begin()), 
268                              utility::data_iterator(result_w3.end()),
269                              utility::data_iterator(result_w2.begin())));
270 
271}
272
273
274void test_quantile_normalize(test::Suite& suite)
275{
276  suite.err() << "Testing QuantileNormalizer\n";
277 
278  utility::Matrix m(2,2);
279  m(0,0) = 0;
280  m(0,1) = 10;
281  m(1,0) = 2;
282  m(1,1) = 4;
283  normalizer::QuantileNormalizer qn;
284  qn(m, m);
285  suite.err() << "Testing m(0,0)\n";
286  suite.add(suite.equal(m(0,0), 2));
287  suite.err() << "Testing m(0,1)\n";
288  suite.add(suite.equal(m(0,1), 6));
289  suite.err() << "Testing m(1,0)\n";
290  suite.add(suite.equal(m(1,0), 6));
291  suite.err() << "Testing m(1,1)\n";
292  suite.add(suite.equal(m(1,1), 2));
293}
294
295void test_row_normalize(test::Suite& suite)
296{
297  using namespace normalizer;
298  suite.err() << "Testing RowNormalizer\n";
299 
300  utility::Matrix m(2,3);
301  m(0,0) = 0;
302  m(0,1) = 10;
303  m(1,0) = 2;
304  m(1,1) = 4;
305  utility::Matrix m2(m);
306  m2.transpose();
307  ColumnNormalizer<Centralizer<> > cn;
308  RowNormalizer<Centralizer<> > rn;
309  cn(m, m);
310  rn(m2, m2);
311  m2.transpose();
312  suite.equal_range(m.begin(), m.end(), m2.begin());
313  if (false) { // do not run compile tests
314    test::container2d_archetype<double> container2d; 
315    test::mutable_container2d_archetype<double> mutable_container2d; 
316    rn(container2d, mutable_container2d);
317  }
318}
319
320void test_spearman(test::Suite& suite)
321{
322  suite.err() << "Testing Spearman\n";
323  normalizer::Spearman spearman;
324  std::vector<double> vec;
325  vec.push_back(0);
326  vec.push_back(2);
327  vec.push_back(3);
328  vec.push_back(1);
329  spearman(vec.begin(), vec.end(), vec.begin());
330  std::vector<double> correct;
331  correct.push_back(1.0/8);
332  correct.push_back(5.0/8);
333  correct.push_back(7.0/8);
334  correct.push_back(3.0/8);
335  suite.add(suite.equal_range(vec.begin(), vec.end(), correct.begin()));
336  suite.err() << "Testing Spearman with ties\n";
337  vec[1]=vec[2];
338  correct[1] = correct[2] = (correct[1]+correct[2])/2;
339  spearman(vec.begin(), vec.end(), vec.begin());
340  suite.add(suite.equal_range(vec.begin(), vec.end(), correct.begin()));
341  test_spearman_weighted(suite);
342}
343
344
345void test_gauss_normalize(test::Suite& suite)
346{
347  suite.err() << "Testing Gauss\n";
348  normalizer::Gauss gauss;
349  std::vector<double> vec;
350  vec.push_back(1);
351  gauss(vec.begin(), vec.end(), vec.begin());
352  suite.add(suite.equal(vec.front(), 0));
353  vec.push_back(1);
354  gauss(vec.begin(), vec.end(), vec.begin());
355  suite.add(suite.equal(vec.front(), -vec.back()));
356  // compile test should not be run
357  if (false) {
358    gauss(boost::random_access_iterator_archetype<double>(), 
359          boost::random_access_iterator_archetype<double>(),
360          boost::mutable_random_access_iterator_archetype<double>());
361    using utility::DataWeight;
362    gauss(boost::random_access_iterator_archetype<DataWeight>(), 
363          boost::random_access_iterator_archetype<DataWeight>(),
364          boost::mutable_random_access_iterator_archetype<DataWeight>());
365    gauss(boost::random_access_iterator_archetype<DataWeight>(), 
366          boost::random_access_iterator_archetype<DataWeight>(),
367          boost::mutable_random_access_iterator_archetype<double>());
368    gauss(boost::random_access_iterator_archetype<double>(), 
369          boost::random_access_iterator_archetype<double>(),
370          boost::mutable_random_access_iterator_archetype<DataWeight>());
371  }
372}
373
374void test_spearman_weighted(test::Suite& suite)
375{
376  suite.err() << "Testing Weighted Spearman\n";
377  normalizer::Spearman spearman;
378
379  suite.err() << "Testing that unity weights reproduces unweighted case\n";
380  utility::MatrixWeighted m(1,4,0,1);
381  utility::MatrixWeighted res(m.rows(), m.columns(),3.14,0);
382  m(0,0).data()=0;
383  m(0,1).data()=2;
384  m(0,2).data()=3;
385  m(0,3).data()=1;
386  std::vector<double> correct(m.columns());
387  std::vector<double> correct_w(m.columns(), 1.0);
388  std::copy(utility::data_iterator(m.begin_row(0)),
389            utility::data_iterator(m.end_row(0)),
390            correct.begin());
391  spearman(correct.begin(), correct.end(), correct.begin());
392  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
393
394  using utility::data_iterator;
395  suite.add(suite.equal_range(data_iterator(res.begin_row(0)),
396                               data_iterator(res.end_row(0)),
397                               correct.begin()));
398  using utility::weight_iterator;
399  suite.add(suite.equal_range(weight_iterator(res.begin_row(0)),
400                               weight_iterator(res.end_row(0)),
401                               correct_w.begin()));
402
403  suite.err() << "Testing rescaling of weights\n";
404  for (size_t i=0; i<m.columns(); ++i) {
405    m(0,i).weight() *= 2;
406    correct_w[i] *= 2;
407  }   
408  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
409  suite.add(suite.equal_range(data_iterator(res.begin_row(0)),
410                               data_iterator(res.end_row(0)),
411                               correct.begin()));
412  suite.add(suite.equal_range(weight_iterator(res.begin_row(0)),
413                               weight_iterator(res.end_row(0)),
414                               correct_w.begin()));
415
416 
417  suite.err() << "Testing case with a zero weight\n";
418  m(0,1).data() = std::numeric_limits<double>::quiet_NaN();
419  m(0,1).weight() = 0.0;
420  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
421  suite.add(suite.equal(res(0,0).data(), 0.5/3)); 
422  suite.add(suite.equal(res(0,2).data(), 2.5/3)); 
423  suite.add(suite.equal(res(0,3).data(), 1.5/3)); 
424
425  suite.err() << "Testing case with ties\n";
426  m(0,0).data() = m(0,2).data();
427  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
428  suite.add(suite.equal(res(0,0).data(), 2.0/3)); 
429  suite.add(suite.equal(res(0,2).data(), 2.0/3)); 
430  suite.add(suite.equal(res(0,3).data(), 0.5/3)); 
431  // compile test should not be run
432  if (false) {
433    spearman(boost::random_access_iterator_archetype<double>(), 
434             boost::random_access_iterator_archetype<double>(),
435             boost::mutable_random_access_iterator_archetype<double>());
436    using utility::DataWeight;
437    spearman(boost::random_access_iterator_archetype<DataWeight>(), 
438             boost::random_access_iterator_archetype<DataWeight>(),
439             boost::mutable_random_access_iterator_archetype<DataWeight>());
440    spearman(boost::random_access_iterator_archetype<double>(), 
441             boost::random_access_iterator_archetype<double>(),
442             boost::mutable_random_access_iterator_archetype<DataWeight>());
443    spearman(boost::random_access_iterator_archetype<DataWeight>(), 
444             boost::random_access_iterator_archetype<DataWeight>(),
445             boost::mutable_random_access_iterator_archetype<double>());
446  }
447
448}
449
450void test_z_score(test::Suite& suite)
451{
452  suite.err() << "Testing Zscore\n";
453  std::vector<double> vec;
454  vec.push_back(0);
455  vec.push_back(3.14);
456  normalizer::Zscore zscore;
457  zscore(vec.begin(), vec.end(), vec.begin());
458  for (size_t i=0; i<vec.size(); ++i)
459    suite.add(suite.equal(vec[i], 2.0*i-1.0));
460
461  std::vector<utility::DataWeight> vec2;
462  vec2.push_back(utility::DataWeight(1,1));
463  vec2.push_back(utility::DataWeight(2.13,0.5));
464  vec2.push_back(utility::DataWeight(2.13,0.5));
465  std::vector<utility::DataWeight> vec3(vec2.size());
466  zscore(vec2.begin(), vec2.end(), vec3.begin());
467  for (size_t i=0; i<vec2.size(); ++i)
468    suite.add(suite.equal(vec3[i].weight(), vec2[i].weight()));
469  suite.add(suite.equal(vec3[0].data(), -1.0));
470  suite.add(suite.equal(vec3[1].data(), 1.0));
471  suite.add(suite.equal(vec3[2].data(), 1.0));
472  // compile test should not be run
473  if (false) {
474    boost::detail::dummy_constructor dummy_cons;
475    zscore(boost::forward_iterator_archetype<double>(), 
476           boost::forward_iterator_archetype<double>(),
477           boost::output_iterator_archetype<double>(dummy_cons));
478    using utility::DataWeight;
479    zscore(boost::forward_iterator_archetype<DataWeight>(), 
480           boost::forward_iterator_archetype<DataWeight>(),
481           boost::mutable_forward_iterator_archetype<DataWeight>());
482    zscore(boost::random_access_iterator_archetype<DataWeight>(), 
483           boost::random_access_iterator_archetype<DataWeight>(),
484           boost::mutable_random_access_iterator_archetype<double>());
485    zscore(boost::random_access_iterator_archetype<double>(), 
486           boost::random_access_iterator_archetype<double>(),
487           boost::mutable_random_access_iterator_archetype<DataWeight>());
488  }
489}
490
491
Note: See TracBrowser for help on using the repository browser.