source: trunk/test/normalization_test.cc @ 2119

Last change on this file since 2119 was 2119, checked in by Peter, 12 years ago

converted files to utf-8. fixes #577

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 12.6 KB
Line 
1// $Id: normalization_test.cc 2119 2009-12-12 23:11:43Z peter $
2
3/*
4  Copyright (C) 2008, 2009 Jari Häkkinen, Peter Johansson
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with yat. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include "Suite.h"
23
24#include "yat/normalizer/Centralizer.h"
25#include "yat/normalizer/ColumnNormalizer.h"
26#include "yat/normalizer/Gauss.h"
27#include "yat/normalizer/qQuantileNormalizer.h"
28#include "yat/normalizer/QuantileNormalizer.h"
29#include "yat/normalizer/RowNormalizer.h"
30#include "yat/normalizer/Spearman.h"
31#include "yat/normalizer/Zscore.h"
32
33#include "yat/utility/DataIterator.h"
34#include "yat/utility/FileUtil.h"
35#include "yat/utility/Matrix.h"
36#include "yat/utility/MatrixWeighted.h"
37#include "yat/utility/WeightIterator.h"
38
39#include <climits>
40#include <fstream>
41#include <limits>
42#include <vector>
43
44using namespace theplu::yat;
45void test_centralizer(test::Suite&);
46void test_column_normalize(test::Suite&);
47void test_gauss_normalize(test::Suite&);
48void test_qquantile_normalize(test::Suite&);
49void test_qquantile_normalize_weighted(test::Suite&);
50void test_quantile_normalize(test::Suite&);
51void test_row_normalize(test::Suite&);
52void test_spearman(test::Suite&);
53void test_spearman_weighted(test::Suite&);
54void test_z_score(test::Suite&);
55
56int main(int argc, char* argv[])
57{ 
58  test::Suite suite(argc, argv);
59  suite.err() << "testing normalizations ... " << std::endl;
60
61  test_centralizer(suite);
62  test_column_normalize(suite);
63  test_qquantile_normalize(suite);
64  test_qquantile_normalize_weighted(suite);
65  test_quantile_normalize(suite);
66  test_gauss_normalize(suite);
67  test_row_normalize(suite);
68  test_spearman(suite);
69  test_z_score(suite);
70
71  return suite.return_value();
72}
73
74
75void test_centralizer(test::Suite& suite)
76{
77  suite.err() << "Testing Centralizer\n";
78  std::vector<double> vec;
79  vec.push_back(1);
80  vec.push_back(2);
81  vec.push_back(3);
82  normalizer::Centralizer<> c;
83  c(vec.begin(), vec.end(), vec.begin());
84  for (size_t i=0; i<vec.size(); ++i)
85    suite.add(suite.equal(vec[i], static_cast<double>(i)-1.0));
86
87  std::vector<utility::DataWeight> vec2;
88  vec2.push_back(utility::DataWeight(1,1));
89  vec2.push_back(utility::DataWeight(2,0.5));
90  vec2.push_back(utility::DataWeight(2,0.5));
91  std::vector<utility::DataWeight> vec3(vec2.size());
92  c(vec2.begin(), vec2.end(), vec3.begin());
93  for (size_t i=0; i<vec2.size(); ++i)
94    suite.add(suite.equal(vec3[i].weight(), vec2[i].weight()));
95  suite.add(suite.equal(vec3[0].data(), -0.5));
96  suite.add(suite.equal(vec3[1].data(), 0.5));
97  suite.add(suite.equal(vec3[2].data(), 0.5));
98
99}
100
101
102void test_column_normalize(test::Suite& suite)
103{
104  using namespace normalizer;
105  suite.err() << "Testing ColumnNormalizer\n";
106 
107  utility::Matrix m(2,2);
108  m(0,0) = 0;
109  m(0,1) = 10;
110  m(1,0) = 2;
111  m(1,1) = 4;
112  ColumnNormalizer<Centralizer<> > qn;
113  qn(m, m);
114  suite.err() << "Testing m(0,0)\n";
115  suite.add(suite.equal(m(0,0), -1));
116  suite.err() << "Testing m(0,1)\n";
117  suite.add(suite.equal(m(0,1), 3));
118  suite.err() << "Testing m(1,0)\n";
119  suite.add(suite.equal(m(1,0), 1));
120  suite.err() << "Testing m(1,1)\n";
121  suite.add(suite.equal(m(1,1), -3));
122}
123
124
125void test_qquantile_normalize(test::Suite& suite)
126{
127  using namespace normalizer;
128
129  suite.err() << "Testing qQuantileNormalizer\n";
130  std::string data(test::filename("data/normalization_test.data"));
131  if (utility::FileUtil(data.c_str()).permissions("r")) {
132    suite.add(false);
133    suite.err() << "Cannot access file " << data << '\n';
134    return;
135  }
136  std::ifstream data_stream(data.c_str());
137
138  utility::Matrix m(data_stream);
139
140  suite.err() << "testing number of parts (Q) boundary conditions\n";
141  qQuantileNormalizer(m.begin_column(0), m.end_column(0), m.rows());
142  qQuantileNormalizer(m.begin_column(0), m.end_column(0), 3);
143
144  // first column as target
145  qQuantileNormalizer qqn(m.begin_column(0), m.end_column(0) ,9); 
146  ColumnNormalizer<qQuantileNormalizer> cn(qqn);
147  utility::Matrix result(m.rows(),m.columns());
148  cn(m, result);
149
150  suite.err() << "test that result can be stored in the source matrix...";
151  cn(m,m);
152  if (suite.add(result==m))
153    suite.err() << " ok.\n";
154  else 
155    suite.err() << " failed.\n";
156
157  // Enough iteration will make all columns to have the same values as
158  // the target.
159  suite.err() << "Testing that q=matrix rows gives QuantileNormalization\n";
160  utility::Matrix m2(4,2);
161  m2(0,0) = 0; m2(0,1) = 10;
162  m2(1,0) = 2; m2(1,1) = 4;
163  m2(2,0) = 1; m2(2,1) = 0;
164  m2(3,0) = 3; m2(3,1) = 7;
165  qQuantileNormalizer qqn2(m2.begin_column(0), m2.end_column(0), m2.rows());
166  ColumnNormalizer<qQuantileNormalizer> cn2(qqn2);
167  utility::Matrix result2(m2.rows(),m2.columns());
168  cn2(m2,result2);
169  suite.add( suite.equal_fix(m2(0,0),result2(2,1),1.0e-12) &&
170             suite.equal_fix(m2(1,0),result2(3,1),1.0e-12) &&
171             suite.equal_fix(m2(2,0),result2(1,1),1.0e-12) &&
172             suite.equal_fix(m2(3,0),result2(0,1),1.0e-12) );
173}
174
175
176void test_qquantile_normalize_weighted(test::Suite& suite)
177{
178  using namespace normalizer;
179
180  suite.err() << "Testing qQuantileNormalizer weighted\n";
181
182  // test with unweighted target and source
183  std::vector<double> target;
184  target.reserve(1000);
185  while (target.size()<1000)
186    target.push_back(target.size());
187  qQuantileNormalizer qQN(target.begin(), target.end(), 4);
188  std::vector<double> source;
189  while (source.size()<10)
190    source.push_back(source.size()*10);
191  std::vector<double> result(source.size());
192 
193  qQN(source.begin(), source.end(), result.begin());
194 
195  using utility::DataWeight;
196  suite.err() << "Testing with unweighted target and weighted source\n";
197  std::vector<utility::DataWeight> src_w(source.size(), DataWeight(0, 1));
198  std::copy(source.begin(), source.end(),
199            utility::data_iterator(src_w.begin()));
200
201  std::vector<utility::DataWeight> result_w(src_w.size());
202  qQN(src_w.begin(), src_w.end(), result_w.begin());
203  suite.add(suite.equal_range(result.begin(), result.end(),
204                              utility::data_iterator(result_w.begin())));
205
206  suite.err() << "Testing with missing value in source\n";
207  // adding a missing value
208  std::vector<utility::DataWeight>::iterator MWi=src_w.begin();
209  MWi+=5;
210  src_w.insert(MWi, DataWeight(std::numeric_limits<double>::quiet_NaN(), 0.0));
211  std::vector<utility::DataWeight> result_w2(src_w.size());
212  qQN(src_w.begin(), src_w.end(), result_w2.begin());
213  // excluding missing value from comparison in suite.equal_range
214  MWi=result_w2.begin();
215  MWi+=5;
216  result_w2.erase(MWi);
217  suite.add(suite.equal_range(utility::data_iterator(result_w.begin()), 
218                              utility::data_iterator(result_w.end()),
219                              utility::data_iterator(result_w2.begin())));
220
221  suite.err() << "testing with weighted target" << std::endl;
222  std::vector<utility::DataWeight> target_w(target.size()+1, DataWeight(0, 1));
223  target_w[0] = DataWeight(5.3, 0);
224  std::copy(target.begin(), target.end(),
225            utility::data_iterator(target_w.begin()+1));
226  qQuantileNormalizer qQNw(target_w.begin(), target_w.end(), 4);
227  std::vector<utility::DataWeight> result_w3(src_w.size());
228  qQNw(src_w.begin(), src_w.end(), result_w3.begin());
229  // excluding missing value from comparison in suite.equal_range
230  MWi=result_w3.begin();
231  MWi+=5;
232  result_w3.erase(MWi);
233  suite.add(suite.equal_range(utility::data_iterator(result_w3.begin()), 
234                              utility::data_iterator(result_w3.end()),
235                              utility::data_iterator(result_w2.begin())));
236 
237}
238
239
240void test_quantile_normalize(test::Suite& suite)
241{
242  suite.err() << "Testing QuantileNormalizer\n";
243 
244  utility::Matrix m(2,2);
245  m(0,0) = 0;
246  m(0,1) = 10;
247  m(1,0) = 2;
248  m(1,1) = 4;
249  normalizer::QuantileNormalizer qn;
250  qn(m, m);
251  suite.err() << "Testing m(0,0)\n";
252  suite.add(suite.equal(m(0,0), 2));
253  suite.err() << "Testing m(0,1)\n";
254  suite.add(suite.equal(m(0,1), 6));
255  suite.err() << "Testing m(1,0)\n";
256  suite.add(suite.equal(m(1,0), 6));
257  suite.err() << "Testing m(1,1)\n";
258  suite.add(suite.equal(m(1,1), 2));
259}
260
261void test_row_normalize(test::Suite& suite)
262{
263  using namespace normalizer;
264  suite.err() << "Testing RowNormalizer\n";
265 
266  utility::Matrix m(2,3);
267  m(0,0) = 0;
268  m(0,1) = 10;
269  m(1,0) = 2;
270  m(1,1) = 4;
271  utility::Matrix m2(m);
272  m2.transpose();
273  ColumnNormalizer<Centralizer<> > cn;
274  RowNormalizer<Centralizer<> > rn;
275  cn(m, m);
276  rn(m2, m2);
277  m2.transpose();
278  suite.equal_range(m.begin(), m.end(), m2.begin());
279}
280
281void test_spearman(test::Suite& suite)
282{
283  suite.err() << "Testing Spearman\n";
284  normalizer::Spearman spearman;
285  std::vector<double> vec;
286  vec.push_back(0);
287  vec.push_back(2);
288  vec.push_back(3);
289  vec.push_back(1);
290  spearman(vec.begin(), vec.end(), vec.begin());
291  std::vector<double> correct;
292  correct.push_back(1.0/8);
293  correct.push_back(5.0/8);
294  correct.push_back(7.0/8);
295  correct.push_back(3.0/8);
296  suite.add(suite.equal_range(vec.begin(), vec.end(), correct.begin()));
297  suite.err() << "Testing Spearman with ties\n";
298  vec[1]=vec[2];
299  correct[1] = correct[2] = (correct[1]+correct[2])/2;
300  spearman(vec.begin(), vec.end(), vec.begin());
301  suite.add(suite.equal_range(vec.begin(), vec.end(), correct.begin()));
302  test_spearman_weighted(suite);
303}
304
305
306void test_gauss_normalize(test::Suite& suite)
307{
308  suite.err() << "Testing Gauss\n";
309  normalizer::Gauss gauss;
310  std::vector<double> vec;
311  vec.push_back(1);
312  gauss(vec.begin(), vec.end(), vec.begin());
313  suite.add(suite.equal(vec.front(), 0));
314  vec.push_back(1);
315  gauss(vec.begin(), vec.end(), vec.begin());
316  suite.add(suite.equal(vec.front(), -vec.back()));
317
318}
319
320void test_spearman_weighted(test::Suite& suite)
321{
322  suite.err() << "Testing Weighted Spearman\n";
323  normalizer::Spearman spearman;
324
325  suite.err() << "Testing that unity weights reproduces unweighted case\n";
326  utility::MatrixWeighted m(1,4,0,1);
327  utility::MatrixWeighted res(m.rows(), m.columns(),3.14,0);
328  m(0,0).data()=0;
329  m(0,1).data()=2;
330  m(0,2).data()=3;
331  m(0,3).data()=1;
332  std::vector<double> correct(m.columns());
333  std::vector<double> correct_w(m.columns(), 1.0);
334  std::copy(utility::data_iterator(m.begin_row(0)),
335            utility::data_iterator(m.end_row(0)),
336            correct.begin());
337  spearman(correct.begin(), correct.end(), correct.begin());
338  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
339
340  using utility::data_iterator;
341  suite.add(suite.equal_range(data_iterator(res.begin_row(0)),
342                               data_iterator(res.end_row(0)),
343                               correct.begin()));
344  using utility::weight_iterator;
345  suite.add(suite.equal_range(weight_iterator(res.begin_row(0)),
346                               weight_iterator(res.end_row(0)),
347                               correct_w.begin()));
348
349  suite.err() << "Testing rescaling of weights\n";
350  for (size_t i=0; i<m.columns(); ++i) {
351    m(0,i).weight() *= 2;
352    correct_w[i] *= 2;
353  }   
354  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
355  suite.add(suite.equal_range(data_iterator(res.begin_row(0)),
356                               data_iterator(res.end_row(0)),
357                               correct.begin()));
358  suite.add(suite.equal_range(weight_iterator(res.begin_row(0)),
359                               weight_iterator(res.end_row(0)),
360                               correct_w.begin()));
361
362 
363  suite.err() << "Testing case with a zero weight\n";
364  m(0,1).data() = std::numeric_limits<double>::quiet_NaN();
365  m(0,1).weight() = 0.0;
366  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
367  suite.add(suite.equal(res(0,0).data(), 0.5/3)); 
368  suite.add(suite.equal(res(0,2).data(), 2.5/3)); 
369  suite.add(suite.equal(res(0,3).data(), 1.5/3)); 
370
371  suite.err() << "Testing case with ties\n";
372  m(0,0).data() = m(0,2).data();
373  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
374  suite.add(suite.equal(res(0,0).data(), 2.0/3)); 
375  suite.add(suite.equal(res(0,2).data(), 2.0/3)); 
376  suite.add(suite.equal(res(0,3).data(), 0.5/3)); 
377}
378
379void test_z_score(test::Suite& suite)
380{
381  suite.err() << "Testing Zscore\n";
382  std::vector<double> vec;
383  vec.push_back(0);
384  vec.push_back(3.14);
385  normalizer::Zscore zscore;
386  zscore(vec.begin(), vec.end(), vec.begin());
387  for (size_t i=0; i<vec.size(); ++i)
388    suite.add(suite.equal(vec[i], 2.0*i-1.0));
389
390  std::vector<utility::DataWeight> vec2;
391  vec2.push_back(utility::DataWeight(1,1));
392  vec2.push_back(utility::DataWeight(2.13,0.5));
393  vec2.push_back(utility::DataWeight(2.13,0.5));
394  std::vector<utility::DataWeight> vec3(vec2.size());
395  zscore(vec2.begin(), vec2.end(), vec3.begin());
396  for (size_t i=0; i<vec2.size(); ++i)
397    suite.add(suite.equal(vec3[i].weight(), vec2[i].weight()));
398  suite.add(suite.equal(vec3[0].data(), -1.0));
399  suite.add(suite.equal(vec3[1].data(), 1.0));
400  suite.add(suite.equal(vec3[2].data(), 1.0));
401}
402
403
Note: See TracBrowser for help on using the repository browser.