source: trunk/test/ncc_test.cc @ 2119

Last change on this file since 2119 was 2119, checked in by Peter, 13 years ago

converted files to utf-8. fixes #577

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 11.0 KB
Line 
1// $Id: ncc_test.cc 2119 2009-12-12 23:11:43Z peter $
2
3/*
4  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér
5  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér
6
7  This file is part of the yat library, http://dev.thep.lu.se/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 3 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with yat. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include "Suite.h"
24
25#include "yat/classifier/MatrixLookup.h"
26#include "yat/classifier/MatrixLookupWeighted.h"
27#include "yat/classifier/NCC.h"
28#include "yat/classifier/Target.h"
29#include "yat/utility/DataIterator.h"
30#include "yat/utility/DataWeight.h"
31#include "yat/utility/Matrix.h"
32#include "yat/utility/MatrixWeighted.h"
33#include "yat/statistics/EuclideanDistance.h"
34#include "yat/statistics/PearsonDistance.h"
35#include "yat/utility/utility.h"
36
37#include <cassert>
38#include <fstream>
39#include <iostream>
40#include <stdexcept>
41#include <sstream>
42#include <string>
43#include <limits>
44#include <cmath>
45
46using namespace theplu::yat;
47
48void predict_nan_data_unweighted_data(test::Suite& suite);
49
50int main(int argc,char* argv[])
51{ 
52  test::Suite suite(argc, argv);
53  suite.err() << "testing ncc" << std::endl;
54
55  predict_nan_data_unweighted_data(suite);
56
57  /////////////////////////////////////////////
58  // First test of constructor and training 
59  /////////////////////////////////////////////
60  classifier::MatrixLookup ml(4,4);
61  std::vector<std::string> vec(4, "pos");
62  vec[3]="bjds";
63  classifier::Target target(vec);
64  classifier::NCC<statistics::EuclideanDistance> ncctmp;
65  suite.err() << "training...\n";
66  ncctmp.train(ml,target);
67  suite.err() << "done\n";
68
69  /////////////////////////////////////////////
70  // A test of predictions using unweighted data
71  /////////////////////////////////////////////
72  suite.err() << "test of predictions using unweighted test data\n";
73  utility::Matrix data1(3,4);
74  for(size_t i=0;i<3;i++) {
75    data1(i,0)=3-i;
76    data1(i,1)=5-i;
77    data1(i,2)=i+1;
78    data1(i,3)=i+3;
79  }
80  std::vector<std::string> vec1(4, "pos");
81  vec1[0]="neg";
82  vec1[1]="neg";
83
84  classifier::MatrixLookup ml1(data1);
85  classifier::Target target1(vec1);
86
87  classifier::NCC<statistics::EuclideanDistance> ncc1;
88  ncc1.train(ml1,target1);
89  utility::Matrix prediction1;
90  ncc1.predict(ml1,prediction1);
91  utility::Matrix result1(2,4);
92  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
93  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=sqrt(11.0);
94  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
95                         result1.begin())) {
96    suite.add(false);
97    suite.err() << "Difference to expected prediction too large\n";
98  }
99
100  //////////////////////////////////////////////////////////////////////////
101  // A test of predictions using unweighted training and weighted test data
102  //////////////////////////////////////////////////////////////////////////
103  suite.err() << "test of predictions using unweighted training and weighted test data\n";
104  utility::MatrixWeighted xw11(3,4);
105  xw11(0,0)=xw11(1,1)=xw11(2,2)=xw11(1,3)=utility::DataWeight(0,0);
106  std::copy(data1.begin(), data1.end(), utility::data_iterator(xw11.begin()));
107  classifier::MatrixLookupWeighted mlw1(xw11);
108  //classifier::MatrixLookupWeighted mlw1(data1,weights1);
109  ncc1.predict(mlw1,prediction1); 
110  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=sqrt(15.0);
111  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
112                         result1.begin())) {
113    suite.add(false);
114    suite.err() << "Difference to expected prediction too large\n";
115  }
116
117  //////////////////////////////////////////////////////////////////////////
118  // A test of predictions using weighted training resulting in NaN's
119  // in centroids and unweighted test data
120  //////////////////////////////////////////////////////////////////////////
121  suite.err() << "test of predictions using nan centroids and unweighted test data\n";
122  utility::MatrixWeighted xw12(3,4);
123  xw12(1,0)=xw12(1,1)=utility::DataWeight(0,0);
124  std::copy(data1.begin(), data1.end(), utility::data_iterator(xw12.begin()));
125  classifier::MatrixLookupWeighted mlw2(xw12);
126  //classifier::MatrixLookupWeighted mlw2(data1,weights2);
127  classifier::NCC<statistics::EuclideanDistance> ncc2;
128  ncc2.train(mlw2,target1);
129  ncc2.predict(ml1,prediction1); 
130  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
131  result1(1,0)=result1(1,1)=sqrt(11.0);
132  result1(0,2)=result1(0,3)=sqrt(15.0);
133  if(!std::isnan(ncc2.centroids()(1,0))) 
134    suite.add(false);
135  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
136                         result1.begin())) {
137    suite.add(false);
138    suite.err() << "Difference to expected prediction too large\n";
139  }
140
141  //////////////////////////////////////////////////////////////////////////
142  // A test of predictions when a centroid has nan for all variables that a
143  // test sample has non-zero weights for.
144  //////////////////////////////////////////////////////////////////////////
145  suite.err() << "test of predictions using nan centroids and weighted test data\n";
146  suite.err() << "... using EuclideanDistance" << std::endl;
147  xw11(0,0).weight() = xw11(2,0).weight()=0;
148  classifier::NCC<statistics::EuclideanDistance> ncc3;
149  ncc3.train(mlw2,target1);
150  ncc3.predict(mlw1,prediction1); 
151  if(!std::isnan(ncc3.centroids()(1,0))) {
152    suite.add(false);
153    suite.err() << "Training failed: expected nan in centroid" << std::endl;
154  }
155  if(!(std::isnan(prediction1(0,0)) &&
156       suite.equal(prediction1(1,0),sqrt(3.0)) &&
157       suite.equal(prediction1(0,1),sqrt(3.0)) &&
158       suite.equal(prediction1(1,1),sqrt(15.0)) &&
159       suite.equal(prediction1(0,2),sqrt(27.0)) )) { 
160    suite.add(false);
161    if (!std::isnan(prediction1(0,0)))
162      suite.err() << "prediction1(0,0): " << prediction1(0,0) << "  "
163                  << "expected NaN\n";
164    suite.err() << "Test failed: predictions incorrect" << std::endl;
165  }
166  suite.err() << "... using PearsonDistance" << std::endl;;
167  classifier::NCC<statistics::PearsonDistance> ncc4;
168  ncc4.train(mlw2,target1);
169  ncc4.predict(mlw1,prediction1); 
170  if(!std::isnan(ncc4.centroids()(1,0))) {
171    suite.add(false);
172    suite.err() << "Training failed: expected nan in centroid" << std::endl;
173  }
174  if(!(std::isnan(prediction1(0,0)) &&
175       std::isnan(prediction1(0,2)) &&
176       std::isnan(prediction1(1,0)) &&
177       suite.equal(prediction1(0,1), 0) &&
178       suite.equal(prediction1(1,2), 0) &&
179       suite.equal(prediction1(1,3), 0) && 
180       suite.equal(prediction1(0,3), 2.0) &&
181       suite.equal(prediction1(1,1), 2.0) )) {
182    suite.add(false); 
183    suite.err() << "Test failed: predictions incorrect" << std::endl;
184  }
185
186  ////////////////////////////////////////////////////////////////
187  // A test of when a class has no training samples, should give nan
188  // in predictions.
189  ////////////////////////////////////////////////////////////////
190  //Keep only the second class in the training samples
191  std::vector<size_t> ind(2,2);
192  ind[1]=3;
193  classifier::Target target2(target1,utility::Index(ind));
194  classifier::MatrixLookupWeighted mlw3(xw12,
195                                        utility::Index(data1.rows()),
196                                        utility::Index(ind));
197  classifier::NCC<statistics::PearsonDistance> ncc5;
198  ncc5.train(mlw3,target2);
199  ncc5.predict(mlw1,prediction1); 
200  if (!(std::isnan(prediction1(0,0)) && std::isnan(prediction1(0,1)) && 
201        std::isnan(prediction1(0,2)) && std::isnan(prediction1(0,3)) &&
202        std::isnan(prediction1(1,0)) &&
203        suite.equal(prediction1(1,1), 2.0) &&
204        suite.equal(prediction1(1,2),0) &&
205        suite.equal(prediction1(1,3),0) )) {
206    suite.err() << "Difference to expected prediction too large\n";
207    suite.add(false);
208  }
209
210  //////////////////////////////////////////////////////////////////////////
211  // A test of predictions using Sorlie data
212  //////////////////////////////////////////////////////////////////////////
213  suite.err() << "test with Sorlie data\n";
214  std::ifstream is(test::filename("data/sorlie_centroid_data.txt").c_str());
215  utility::MatrixWeighted data_weight(is,'\t');
216  is.close();
217
218  is.open(test::filename("data/sorlie_centroid_classes.txt").c_str());
219  classifier::Target targets(is);
220  is.close();
221
222  classifier::MatrixLookupWeighted dataviewweighted(data_weight);
223  classifier::NCC<statistics::PearsonDistance> ncc;
224  suite.err() << "training...\n";
225  ncc.train(dataviewweighted,targets);
226
227  // Comparing the centroids to stored result
228  is.open(test::filename("data/sorlie_centroids.txt").c_str());
229  utility::Matrix centroids(is);
230  is.close();
231
232  if(centroids.rows() != ncc.centroids().rows() ||
233     centroids.columns() != ncc.centroids().columns()) {
234    suite.err() << "Error in the dimensionality of centroids\n";
235    suite.err() << "Nof rows: " << centroids.rows() << " expected: " 
236           << ncc.centroids().rows() << std::endl;
237    suite.err() << "Nof columns: " << centroids.columns() << " expected: " 
238           << ncc.centroids().columns() << std::endl;
239  }
240
241  if (!suite.equal_range_fix(centroids.begin(), centroids.end(),
242                             ncc.centroids().begin(), 1e-11)) {
243    suite.add(false);
244    suite.err() << "Difference to stored centroids too large\n";
245  }
246
247  suite.err() << "...predicting...\n";
248  utility::Matrix prediction;
249  ncc.predict(dataviewweighted,prediction);
250 
251  // Comparing the prediction to stored result
252  is.open(test::filename("data/sorlie_centroid_predictions.txt").c_str());
253  utility::Matrix result(is,'\t');
254  is.close();
255
256  if (!suite.equal_range_fix(result.begin(), result.end(),
257                             prediction.begin(), 1e-11)) {
258    suite.add(false);
259    suite.err() << "Difference to stored prediction too large\n";
260  }
261
262  return suite.return_value();
263}
264
265void predict_nan_data_unweighted_data(test::Suite& suite)
266{
267  //////////////////////////////////////////////////////////////////////////
268  // A test of predictions using weighted training resulting in NaN's
269  // in centroids and unweighted test data
270  //////////////////////////////////////////////////////////////////////////
271  suite.err() << "test of predictions using nan centroids and unweighted test data\n";
272  utility::Matrix data1(3,4);
273  for(size_t i=0;i<3;i++) {
274    data1(i,0)=3-i;
275    data1(i,1)=5-i;
276    data1(i,2)=i+1;
277    data1(i,3)=i+3;
278  }
279  utility::MatrixWeighted xw(data1);
280  std::vector<std::string> vec1(4, "pos");
281  vec1[0]="neg";
282  vec1[1]="neg";
283
284  classifier::MatrixLookup ml1(data1);
285  classifier::Target target1(vec1);
286  utility::Matrix prediction1;
287  utility::Matrix result1(2,4);
288
289  xw(1,0).weight()=xw(1,1).weight()=0.0;
290 
291
292  classifier::MatrixLookupWeighted mlw2(xw);
293  classifier::NCC<statistics::EuclideanDistance> ncc2;
294  ncc2.train(mlw2,target1);
295  ncc2.predict(ml1,prediction1); 
296  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
297  result1(1,0)=result1(1,1)=sqrt(11.0);
298  result1(0,2)=result1(0,3)=sqrt(15.0);
299  if(!std::isnan(ncc2.centroids()(1,0))) 
300    suite.add(false);
301  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
302                         result1.begin())) {
303    suite.add(false);
304    suite.err() << "Difference to expected prediction too large\n";
305  }
306}
Note: See TracBrowser for help on using the repository browser.