source: trunk/test/ncc_test.cc @ 1483

Last change on this file since 1483 was 1483, checked in by Peter, 13 years ago

refs #396 - removing a constructor in MatrixLookupWeighted? - needed to re-organize tests in ncc_test to track down an error

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 10.7 KB
Line 
1// $Id: ncc_test.cc 1483 2008-09-09 16:03:15Z peter $
2
3/*
4  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér
5  Copyright (C) 2007 Jari Häkkinen, Peter Johansson, Markus Ringnér
6  Copyright (C) 2008 Peter Johansson, Markus Ringnér
7
8  This file is part of the yat library, http://dev.thep.lu.se/yat
9
10  The yat library is free software; you can redistribute it and/or
11  modify it under the terms of the GNU General Public License as
12  published by the Free Software Foundation; either version 2 of the
13  License, or (at your option) any later version.
14
15  The yat library is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  General Public License for more details.
19
20  You should have received a copy of the GNU General Public License
21  along with this program; if not, write to the Free Software
22  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23  02111-1307, USA.
24*/
25
26#include "Suite.h"
27
28#include "yat/classifier/MatrixLookup.h"
29#include "yat/classifier/MatrixLookupWeighted.h"
30#include "yat/classifier/NCC.h"
31#include "yat/classifier/Target.h"
32#include "yat/utility/Matrix.h"
33#include "yat/statistics/EuclideanDistance.h"
34#include "yat/statistics/PearsonDistance.h"
35#include "yat/utility/utility.h"
36
37#include <cassert>
38#include <fstream>
39#include <iostream>
40#include <stdexcept>
41#include <sstream>
42#include <string>
43#include <limits>
44#include <cmath>
45
46using namespace theplu::yat;
47
48void predict_nan_data_unweighted_data(test::Suite& suite);
49
50int main(int argc,char* argv[])
51{ 
52  test::Suite suite(argc, argv);
53  suite.err() << "testing ncc" << std::endl;
54
55  predict_nan_data_unweighted_data(suite);
56
57  /////////////////////////////////////////////
58  // First test of constructor and training 
59  /////////////////////////////////////////////
60  classifier::MatrixLookup ml(4,4);
61  std::vector<std::string> vec(4, "pos");
62  vec[3]="bjds";
63  classifier::Target target(vec);
64  classifier::NCC<statistics::EuclideanDistance> ncctmp;
65  suite.err() << "training...\n";
66  ncctmp.train(ml,target);
67  suite.err() << "done\n";
68
69  /////////////////////////////////////////////
70  // A test of predictions using unweighted data
71  /////////////////////////////////////////////
72  suite.err() << "test of predictions using unweighted test data\n";
73  utility::Matrix data1(3,4);
74  for(size_t i=0;i<3;i++) {
75    data1(i,0)=3-i;
76    data1(i,1)=5-i;
77    data1(i,2)=i+1;
78    data1(i,3)=i+3;
79  }
80  std::vector<std::string> vec1(4, "pos");
81  vec1[0]="neg";
82  vec1[1]="neg";
83
84  classifier::MatrixLookup ml1(data1);
85  classifier::Target target1(vec1);
86
87  classifier::NCC<statistics::EuclideanDistance> ncc1;
88  ncc1.train(ml1,target1);
89  utility::Matrix prediction1;
90  ncc1.predict(ml1,prediction1);
91  utility::Matrix result1(2,4);
92  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
93  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=sqrt(11.0);
94  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
95                         result1.begin())) {
96    suite.add(false);
97    suite.err() << "Difference to expected prediction too large\n";
98  }
99
100  //////////////////////////////////////////////////////////////////////////
101  // A test of predictions using unweighted training and weighted test data
102  //////////////////////////////////////////////////////////////////////////
103  suite.err() << "test of predictions using unweighted training and weighted test data\n";
104  utility::Matrix weights1(3,4,1.0);
105  weights1(0,0)=weights1(1,1)=weights1(2,2)=weights1(1,3)=0.0;
106  classifier::MatrixLookupWeighted mlw1(data1,weights1);
107  ncc1.predict(mlw1,prediction1); 
108  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=sqrt(15.0);
109  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
110                         result1.begin())) {
111    suite.add(false);
112    suite.err() << "Difference to expected prediction too large\n";
113  }
114
115  //////////////////////////////////////////////////////////////////////////
116  // A test of predictions using weighted training resulting in NaN's
117  // in centroids and unweighted test data
118  //////////////////////////////////////////////////////////////////////////
119  suite.err() << "test of predictions using nan centroids and unweighted test data\n";
120  utility::Matrix weights2(3,4,1.0);
121  weights2(1,0)=weights2(1,1)=0.0;
122  classifier::MatrixLookupWeighted mlw2(data1,weights2);
123  classifier::NCC<statistics::EuclideanDistance> ncc2;
124  ncc2.train(mlw2,target1);
125  ncc2.predict(ml1,prediction1); 
126  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
127  result1(1,0)=result1(1,1)=sqrt(11.0);
128  result1(0,2)=result1(0,3)=sqrt(15.0);
129  if(!std::isnan(ncc2.centroids()(1,0))) 
130    suite.add(false);
131  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
132                         result1.begin())) {
133    suite.add(false);
134    suite.err() << "Difference to expected prediction too large\n";
135  }
136
137  //////////////////////////////////////////////////////////////////////////
138  // A test of predictions when a centroid has nan for all variables that a
139  // test sample has non-zero weights for.
140  //////////////////////////////////////////////////////////////////////////
141  suite.err() << "test of predictions using nan centroids and weighted test data\n";
142  suite.err() << "... using EuclideanDistance" << std::endl;
143  weights1(0,0)=weights1(2,0)=0;
144  classifier::NCC<statistics::EuclideanDistance> ncc3;
145  ncc3.train(mlw2,target1);
146  ncc3.predict(mlw1,prediction1); 
147  if(!std::isnan(ncc3.centroids()(1,0))) {
148    suite.add(false);
149    suite.err() << "Training failed: expected nan in centroid" << std::endl;
150  }
151  if(!(std::isnan(prediction1(0,0)) &&
152       suite.equal(prediction1(1,0),sqrt(3.0)) &&
153       suite.equal(prediction1(0,1),sqrt(3.0)) &&
154       suite.equal(prediction1(1,1),sqrt(15.0)) &&
155       suite.equal(prediction1(0,2),sqrt(27.0)) )) { 
156    suite.add(false);
157    suite.err() << "Test failed: predictions incorrect" << std::endl;
158  }
159  suite.err() << "... using PearsonDistance" << std::endl;;
160  classifier::NCC<statistics::PearsonDistance> ncc4;
161  ncc4.train(mlw2,target1);
162  ncc4.predict(mlw1,prediction1); 
163  if(!std::isnan(ncc4.centroids()(1,0))) {
164    suite.add(false);
165    suite.err() << "Training failed: expected nan in centroid" << std::endl;
166  }
167  if(!(std::isnan(prediction1(0,0)) &&
168       std::isnan(prediction1(0,2)) &&
169       std::isnan(prediction1(1,0)) &&
170       suite.equal(prediction1(0,1), 0) &&
171       suite.equal(prediction1(1,2), 0) &&
172       suite.equal(prediction1(1,3), 0) && 
173       suite.equal(prediction1(0,3), 2.0) &&
174       suite.equal(prediction1(1,1), 2.0) )) {
175    suite.add(false); 
176    suite.err() << "Test failed: predictions incorrect" << std::endl;
177  }
178
179  ////////////////////////////////////////////////////////////////
180  // A test of when a class has no training samples, should give nan
181  // in predictions.
182  ////////////////////////////////////////////////////////////////
183  //Keep only the second class in the training samples
184  std::vector<size_t> ind(2,2);
185  ind[1]=3;
186  classifier::Target target2(target1,utility::Index(ind));
187  classifier::MatrixLookupWeighted mlw3(data1,weights2,utility::Index(ind),false);
188  classifier::NCC<statistics::PearsonDistance> ncc5;
189  ncc5.train(mlw3,target2);
190  ncc5.predict(mlw1,prediction1); 
191  if (!(std::isnan(prediction1(0,0)) && std::isnan(prediction1(0,1)) && 
192        std::isnan(prediction1(0,2)) && std::isnan(prediction1(0,3)) &&
193        std::isnan(prediction1(1,0)) &&
194        suite.equal(prediction1(1,1), 2.0) &&
195        suite.equal(prediction1(1,2),0) &&
196        suite.equal(prediction1(1,3),0) )) {
197    suite.err() << "Difference to expected prediction too large\n";
198    suite.add(false);
199  }
200
201  //////////////////////////////////////////////////////////////////////////
202  // A test of predictions using Sorlie data
203  //////////////////////////////////////////////////////////////////////////
204  suite.err() << "test with Sorlie data\n";
205  std::ifstream is(test::filename("data/sorlie_centroid_data.txt").c_str());
206  utility::Matrix data(is,'\t');
207  is.close();
208
209  is.open(test::filename("data/sorlie_centroid_classes.txt").c_str());
210  classifier::Target targets(is);
211  is.close();
212
213  // Generate weight matrix with 0 for missing values and 1 for others.
214  utility::Matrix weights(data.rows(),data.columns(),0.0);
215  utility::nan(data,weights);
216     
217  classifier::MatrixLookupWeighted dataviewweighted(data,weights);
218  classifier::NCC<statistics::PearsonDistance> ncc;
219  suite.err() << "training...\n";
220  ncc.train(dataviewweighted,targets);
221
222  // Comparing the centroids to stored result
223  is.open(test::filename("data/sorlie_centroids.txt").c_str());
224  utility::Matrix centroids(is);
225  is.close();
226
227  if(centroids.rows() != ncc.centroids().rows() ||
228     centroids.columns() != ncc.centroids().columns()) {
229    suite.err() << "Error in the dimensionality of centroids\n";
230    suite.err() << "Nof rows: " << centroids.rows() << " expected: " 
231           << ncc.centroids().rows() << std::endl;
232    suite.err() << "Nof columns: " << centroids.columns() << " expected: " 
233           << ncc.centroids().columns() << std::endl;
234  }
235
236  if (!suite.equal_range(centroids.begin(), centroids.end(),
237                         ncc.centroids().begin(), 100000)) {
238    suite.add(false);
239    suite.err() << "Difference to stored centroids too large\n";
240  }
241
242  suite.err() << "...predicting...\n";
243  utility::Matrix prediction;
244  ncc.predict(dataviewweighted,prediction);
245 
246  // Comparing the prediction to stored result
247  is.open(test::filename("data/sorlie_centroid_predictions.txt").c_str());
248  utility::Matrix result(is,'\t');
249  is.close();
250
251  if (!suite.equal_range(result.begin(), result.end(),
252                         prediction.begin(), 100000)) {
253    suite.add(false);
254    suite.err() << "Difference to stored prediction too large\n";
255  }
256
257  return suite.return_value();
258}
259
260void predict_nan_data_unweighted_data(test::Suite& suite)
261{
262  //////////////////////////////////////////////////////////////////////////
263  // A test of predictions using weighted training resulting in NaN's
264  // in centroids and unweighted test data
265  //////////////////////////////////////////////////////////////////////////
266  suite.err() << "test of predictions using nan centroids and unweighted test data\n";
267  utility::Matrix data1(3,4);
268  for(size_t i=0;i<3;i++) {
269    data1(i,0)=3-i;
270    data1(i,1)=5-i;
271    data1(i,2)=i+1;
272    data1(i,3)=i+3;
273  }
274  std::vector<std::string> vec1(4, "pos");
275  vec1[0]="neg";
276  vec1[1]="neg";
277
278  classifier::MatrixLookup ml1(data1);
279  classifier::Target target1(vec1);
280  utility::Matrix prediction1;
281  utility::Matrix result1(2,4);
282
283  utility::Matrix weights2(3,4,1.0);
284  weights2(1,0)=weights2(1,1)=0.0;
285
286  classifier::MatrixLookupWeighted mlw2(data1,weights2);
287  classifier::NCC<statistics::EuclideanDistance> ncc2;
288  ncc2.train(mlw2,target1);
289  ncc2.predict(ml1,prediction1); 
290  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
291  result1(1,0)=result1(1,1)=sqrt(11.0);
292  result1(0,2)=result1(0,3)=sqrt(15.0);
293  if(!std::isnan(ncc2.centroids()(1,0))) 
294    suite.add(false);
295  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
296                         result1.begin())) {
297    suite.add(false);
298    suite.err() << "Difference to expected prediction too large\n";
299  }
300}
Note: See TracBrowser for help on using the repository browser.