source: trunk/test/normalization_test.cc @ 1778

Last change on this file since 1778 was 1778, checked in by Peter, 12 years ago

refs #478. Fixed so the tests for weighted qQN now are OK. To align
weighted and unweighted, I needed to change the definition of
index. The weighted index is more natural as the sum of weights with
value less than x plus half of the weight for x. The half of its own
weight part makes it quite symmetric. Anyway, for unity weights it
becomes, e.g, for a vector of size 4: 0.5, 1.5, 2.5, 3.5. Obviously
this doesnt change anything in the behavior since we are just adding a
0.5 to all index.

Also I decided to re-scale index inside the Partitioner, so the index
are now within range (0, 1).

Weighted version of normalize had to be implemented in itself and
could not use the unweighted because the weights come into play also
in this step.

I wanna test with a weighted range also as target, and when that works
I think we can close this ticket.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 11.7 KB
Line 
1// $Id: normalization_test.cc 1778 2009-02-06 13:28:33Z peter $
2
3/*
4  Copyright (C) 2008, 2009 Jari Häkkinen, Peter Johansson
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with yat. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include "Suite.h"
23
24#include "yat/normalizer/Centralizer.h"
25#include "yat/normalizer/ColumnNormalizer.h"
26#include "yat/normalizer/Gauss.h"
27#include "yat/normalizer/qQuantileNormalizer.h"
28#include "yat/normalizer/QuantileNormalizer.h"
29#include "yat/normalizer/RowNormalizer.h"
30#include "yat/normalizer/Spearman.h"
31#include "yat/normalizer/Zscore.h"
32
33#include "yat/utility/DataIterator.h"
34#include "yat/utility/FileUtil.h"
35#include "yat/utility/Matrix.h"
36#include "yat/utility/MatrixWeighted.h"
37#include "yat/utility/WeightIterator.h"
38
39#include <fstream>
40#include <limits>
41#include <vector>
42
43using namespace theplu::yat;
44void test_centralizer(test::Suite&);
45void test_column_normalize(test::Suite&);
46void test_gauss_normalize(test::Suite&);
47void test_qquantile_normalize(test::Suite&);
48void test_qquantile_normalize_weighted(test::Suite&);
49void test_quantile_normalize(test::Suite&);
50void test_row_normalize(test::Suite&);
51void test_spearman(test::Suite&);
52void test_spearman_weighted(test::Suite&);
53void test_z_score(test::Suite&);
54
55int main(int argc, char* argv[])
56{ 
57  test::Suite suite(argc, argv);
58  suite.err() << "testing normalizations ... " << std::endl;
59
60  test_centralizer(suite);
61  test_column_normalize(suite);
62  test_qquantile_normalize(suite);
63  test_qquantile_normalize_weighted(suite);
64  test_quantile_normalize(suite);
65  test_gauss_normalize(suite);
66  test_row_normalize(suite);
67  test_spearman(suite);
68  test_z_score(suite);
69
70  return suite.return_value();
71}
72
73
74void test_centralizer(test::Suite& suite)
75{
76  suite.err() << "Testing Centralizer\n";
77  std::vector<double> vec;
78  vec.push_back(1);
79  vec.push_back(2);
80  vec.push_back(3);
81  normalizer::Centralizer<> c;
82  c(vec.begin(), vec.end(), vec.begin());
83  for (size_t i=0; i<vec.size(); ++i)
84    suite.add(suite.equal(vec[i], static_cast<double>(i)-1.0));
85
86  std::vector<utility::DataWeight> vec2;
87  vec2.push_back(utility::DataWeight(1,1));
88  vec2.push_back(utility::DataWeight(2,0.5));
89  vec2.push_back(utility::DataWeight(2,0.5));
90  std::vector<utility::DataWeight> vec3(vec2.size());
91  c(vec2.begin(), vec2.end(), vec3.begin());
92  for (size_t i=0; i<vec2.size(); ++i)
93    suite.add(suite.equal(vec3[i].weight(), vec2[i].weight()));
94  suite.add(suite.equal(vec3[0].data(), -0.5));
95  suite.add(suite.equal(vec3[1].data(), 0.5));
96  suite.add(suite.equal(vec3[2].data(), 0.5));
97
98}
99
100
101void test_column_normalize(test::Suite& suite)
102{
103  using namespace normalizer;
104  suite.err() << "Testing ColumnNormalizer\n";
105 
106  utility::Matrix m(2,2);
107  m(0,0) = 0;
108  m(0,1) = 10;
109  m(1,0) = 2;
110  m(1,1) = 4;
111  ColumnNormalizer<Centralizer<> > qn;
112  qn(m, m);
113  suite.err() << "Testing m(0,0)\n";
114  suite.add(suite.equal(m(0,0), -1));
115  suite.err() << "Testing m(0,1)\n";
116  suite.add(suite.equal(m(0,1), 3));
117  suite.err() << "Testing m(1,0)\n";
118  suite.add(suite.equal(m(1,0), 1));
119  suite.err() << "Testing m(1,1)\n";
120  suite.add(suite.equal(m(1,1), -3));
121}
122
123
124void test_qquantile_normalize(test::Suite& suite)
125{
126  using namespace normalizer;
127
128  suite.err() << "Testing qQuantileNormalizer\n";
129  std::string data(test::filename("data/normalization_test.data"));
130  if (utility::FileUtil(data.c_str()).permissions("r")) {
131    suite.add(false);
132    suite.err() << "Cannot access file " << data << '\n';
133    return;
134  }
135  std::ifstream data_stream(data.c_str());
136
137  utility::Matrix m(data_stream);
138
139  suite.err() << "testing number of parts (Q) boundary conditions\n";
140  qQuantileNormalizer(m.begin_column(0), m.end_column(0), m.rows());
141  qQuantileNormalizer(m.begin_column(0), m.end_column(0), 3);
142
143  // first column as target
144  qQuantileNormalizer qqn(m.begin_column(0), m.end_column(0) ,9); 
145  ColumnNormalizer<qQuantileNormalizer> cn(qqn);
146  utility::Matrix result(m.rows(),m.columns());
147  cn(m, result);
148
149  suite.err() << "test that result can be stored in the source matrix...";
150  cn(m,m);
151  if (suite.add(result==m))
152    suite.err() << " ok.\n";
153  else 
154    suite.err() << " failed.\n";
155
156  // Enough iteration will make all columns to have the same values as
157  // the target.
158  suite.err() << "Testing that q=matrix rows gives QuantileNormalization\n";
159  utility::Matrix m2(4,2);
160  m2(0,0) = 0; m2(0,1) = 10;
161  m2(1,0) = 2; m2(1,1) = 4;
162  m2(2,0) = 1; m2(2,1) = 0;
163  m2(3,0) = 3; m2(3,1) = 7;
164  qQuantileNormalizer qqn2(m2.begin_column(0), m2.end_column(0), m2.rows());
165  ColumnNormalizer<qQuantileNormalizer> cn2(qqn2);
166  utility::Matrix result2(m2.rows(),m2.columns());
167  cn2(m2,result2);
168  suite.add( suite.equal_fix(m2(0,0),result2(2,1),1.0e-12) &&
169             suite.equal_fix(m2(1,0),result2(3,1),1.0e-12) &&
170             suite.equal_fix(m2(2,0),result2(1,1),1.0e-12) &&
171             suite.equal_fix(m2(3,0),result2(0,1),1.0e-12) );
172}
173
174
175void test_qquantile_normalize_weighted(test::Suite& suite)
176{
177  using namespace normalizer;
178
179  suite.err() << "Testing qQuantileNormalizer weighted\n";
180
181  // test with unweighted target and source
182  std::vector<double> target;
183  target.reserve(1000);
184  while (target.size()<1000)
185    target.push_back(target.size());
186  qQuantileNormalizer qQN(target.begin(), target.end(), 4);
187  std::vector<double> source;
188  while (source.size()<10)
189    source.push_back(source.size()*10);
190  std::vector<double> result(source.size());
191 
192  qQN(source.begin(), source.end(), result.begin());
193 
194  using utility::DataWeight;
195  suite.err() << "Testing with unweighted target and weighted source\n";
196  std::vector<utility::DataWeight> src_w(source.size(), DataWeight(0, 1));
197  std::copy(source.begin(), source.end(),
198            utility::data_iterator(src_w.begin()));
199
200  std::vector<utility::DataWeight> result_w(src_w.size());
201  qQN(src_w.begin(), src_w.end(), result_w.begin());
202  suite.add(suite.equal_range(result.begin(), result.end(),
203                              utility::data_iterator(result_w.begin())));
204
205  suite.err() << "Testing with missing value in source\n";
206  // adding a missing value
207  src_w.insert(src_w.begin(), DataWeight(5.2, 0.0));
208  std::vector<utility::DataWeight> result_w2(src_w.size());
209  qQN(src_w.begin(), src_w.end(), result_w2.begin());
210  // excluding missing value (result_w[0])
211  suite.add(suite.equal_range(utility::data_iterator(result_w.begin()), 
212                               utility::data_iterator(result_w.end()),
213                               ++utility::data_iterator(result_w2.begin())));
214}
215
216
217void test_quantile_normalize(test::Suite& suite)
218{
219  suite.err() << "Testing QuantileNormalizer\n";
220 
221  utility::Matrix m(2,2);
222  m(0,0) = 0;
223  m(0,1) = 10;
224  m(1,0) = 2;
225  m(1,1) = 4;
226  normalizer::QuantileNormalizer qn;
227  qn(m, m);
228  suite.err() << "Testing m(0,0)\n";
229  suite.add(suite.equal(m(0,0), 2));
230  suite.err() << "Testing m(0,1)\n";
231  suite.add(suite.equal(m(0,1), 6));
232  suite.err() << "Testing m(1,0)\n";
233  suite.add(suite.equal(m(1,0), 6));
234  suite.err() << "Testing m(1,1)\n";
235  suite.add(suite.equal(m(1,1), 2));
236}
237
238void test_row_normalize(test::Suite& suite)
239{
240  using namespace normalizer;
241  suite.err() << "Testing RowNormalizer\n";
242 
243  utility::Matrix m(2,3);
244  m(0,0) = 0;
245  m(0,1) = 10;
246  m(1,0) = 2;
247  m(1,1) = 4;
248  utility::Matrix m2(m);
249  m2.transpose();
250  ColumnNormalizer<Centralizer<> > cn;
251  RowNormalizer<Centralizer<> > rn;
252  cn(m, m);
253  rn(m2, m2);
254  m2.transpose();
255  suite.equal_range(m.begin(), m.end(), m2.begin());
256}
257
258void test_spearman(test::Suite& suite)
259{
260  suite.err() << "Testing Spearman\n";
261  normalizer::Spearman spearman;
262  std::vector<double> vec;
263  vec.push_back(0);
264  vec.push_back(2);
265  vec.push_back(3);
266  vec.push_back(1);
267  spearman(vec.begin(), vec.end(), vec.begin());
268  std::vector<double> correct;
269  correct.push_back(1.0/8);
270  correct.push_back(5.0/8);
271  correct.push_back(7.0/8);
272  correct.push_back(3.0/8);
273  suite.add(suite.equal_range(vec.begin(), vec.end(), correct.begin()));
274  suite.err() << "Testing Spearman with ties\n";
275  vec[1]=vec[2];
276  correct[1] = correct[2] = (correct[1]+correct[2])/2;
277  spearman(vec.begin(), vec.end(), vec.begin());
278  suite.add(suite.equal_range(vec.begin(), vec.end(), correct.begin()));
279  test_spearman_weighted(suite);
280}
281
282
283void test_gauss_normalize(test::Suite& suite)
284{
285  suite.err() << "Testing Gauss\n";
286  normalizer::Gauss gauss;
287  std::vector<double> vec;
288  vec.push_back(1);
289  gauss(vec.begin(), vec.end(), vec.begin());
290  suite.add(suite.equal(vec.front(), 0));
291  vec.push_back(1);
292  gauss(vec.begin(), vec.end(), vec.begin());
293  suite.add(suite.equal(vec.front(), -vec.back()));
294
295}
296
297void test_spearman_weighted(test::Suite& suite)
298{
299  suite.err() << "Testing Weighted Spearman\n";
300  normalizer::Spearman spearman;
301
302  suite.err() << "Testing that unity weights reproduces unweighted case\n";
303  utility::MatrixWeighted m(1,4,0,1);
304  utility::MatrixWeighted res(m.rows(), m.columns(),3.14,0);
305  m(0,0).data()=0;
306  m(0,1).data()=2;
307  m(0,2).data()=3;
308  m(0,3).data()=1;
309  std::vector<double> correct(m.columns());
310  std::vector<double> correct_w(m.columns(), 1.0);
311  std::copy(utility::data_iterator(m.begin_row(0)),
312            utility::data_iterator(m.end_row(0)),
313            correct.begin());
314  spearman(correct.begin(), correct.end(), correct.begin());
315  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
316
317  using utility::data_iterator;
318  suite.add(suite.equal_range(data_iterator(res.begin_row(0)),
319                               data_iterator(res.end_row(0)),
320                               correct.begin()));
321  using utility::weight_iterator;
322  suite.add(suite.equal_range(weight_iterator(res.begin_row(0)),
323                               weight_iterator(res.end_row(0)),
324                               correct_w.begin()));
325
326  suite.err() << "Testing rescaling of weights\n";
327  for (size_t i=0; i<m.columns(); ++i) {
328    m(0,i).weight() *= 2;
329    correct_w[i] *= 2;
330  }   
331  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
332  suite.add(suite.equal_range(data_iterator(res.begin_row(0)),
333                               data_iterator(res.end_row(0)),
334                               correct.begin()));
335  suite.add(suite.equal_range(weight_iterator(res.begin_row(0)),
336                               weight_iterator(res.end_row(0)),
337                               correct_w.begin()));
338
339 
340  suite.err() << "Testing case with a zero weight\n";
341  m(0,1).data() = std::numeric_limits<double>::quiet_NaN();
342  m(0,1).weight() = 0.0;
343  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
344  suite.add(suite.equal(res(0,0).data(), 0.5/3)); 
345  suite.add(suite.equal(res(0,2).data(), 2.5/3)); 
346  suite.add(suite.equal(res(0,3).data(), 1.5/3)); 
347
348  suite.err() << "Testing case with ties\n";
349  m(0,0).data() = m(0,2).data();
350  spearman(m.begin_row(0), m.end_row(0), res.begin_row(0));
351  suite.add(suite.equal(res(0,0).data(), 2.0/3)); 
352  suite.add(suite.equal(res(0,2).data(), 2.0/3)); 
353  suite.add(suite.equal(res(0,3).data(), 0.5/3)); 
354}
355
356void test_z_score(test::Suite& suite)
357{
358  suite.err() << "Testing Zscore\n";
359  std::vector<double> vec;
360  vec.push_back(0);
361  vec.push_back(3.14);
362  normalizer::Zscore zscore;
363  zscore(vec.begin(), vec.end(), vec.begin());
364  for (size_t i=0; i<vec.size(); ++i)
365    suite.add(suite.equal(vec[i], 2.0*i-1.0));
366
367  std::vector<utility::DataWeight> vec2;
368  vec2.push_back(utility::DataWeight(1,1));
369  vec2.push_back(utility::DataWeight(2.13,0.5));
370  vec2.push_back(utility::DataWeight(2.13,0.5));
371  std::vector<utility::DataWeight> vec3(vec2.size());
372  zscore(vec2.begin(), vec2.end(), vec3.begin());
373  for (size_t i=0; i<vec2.size(); ++i)
374    suite.add(suite.equal(vec3[i].weight(), vec2[i].weight()));
375  suite.add(suite.equal(vec3[0].data(), -1.0));
376  suite.add(suite.equal(vec3[1].data(), 1.0));
377  suite.add(suite.equal(vec3[2].data(), 1.0));
378}
379
380
Note: See TracBrowser for help on using the repository browser.