source: trunk/test/ncc_test.cc @ 1607

Last change on this file since 1607 was 1587, checked in by Peter, 13 years ago

closes #396

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 11.1 KB
Line 
1// $Id: ncc_test.cc 1587 2008-10-17 15:31:42Z peter $
2
3/*
4  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér
5  Copyright (C) 2007 Jari Häkkinen, Peter Johansson, Markus Ringnér
6  Copyright (C) 2008 Peter Johansson, Markus Ringnér
7
8  This file is part of the yat library, http://dev.thep.lu.se/yat
9
10  The yat library is free software; you can redistribute it and/or
11  modify it under the terms of the GNU General Public License as
12  published by the Free Software Foundation; either version 3 of the
13  License, or (at your option) any later version.
14
15  The yat library is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  General Public License for more details.
19
20  You should have received a copy of the GNU General Public License
21  along with yat. If not, see <http://www.gnu.org/licenses/>.
22*/
23
24#include "Suite.h"
25
26#include "yat/classifier/MatrixLookup.h"
27#include "yat/classifier/MatrixLookupWeighted.h"
28#include "yat/classifier/NCC.h"
29#include "yat/classifier/Target.h"
30#include "yat/utility/DataIterator.h"
31#include "yat/utility/DataWeight.h"
32#include "yat/utility/Matrix.h"
33#include "yat/utility/MatrixWeighted.h"
34#include "yat/statistics/EuclideanDistance.h"
35#include "yat/statistics/PearsonDistance.h"
36#include "yat/utility/utility.h"
37
38#include <cassert>
39#include <fstream>
40#include <iostream>
41#include <stdexcept>
42#include <sstream>
43#include <string>
44#include <limits>
45#include <cmath>
46
47using namespace theplu::yat;
48
49void predict_nan_data_unweighted_data(test::Suite& suite);
50
51int main(int argc,char* argv[])
52{ 
53  test::Suite suite(argc, argv);
54  suite.err() << "testing ncc" << std::endl;
55
56  predict_nan_data_unweighted_data(suite);
57
58  /////////////////////////////////////////////
59  // First test of constructor and training 
60  /////////////////////////////////////////////
61  classifier::MatrixLookup ml(4,4);
62  std::vector<std::string> vec(4, "pos");
63  vec[3]="bjds";
64  classifier::Target target(vec);
65  classifier::NCC<statistics::EuclideanDistance> ncctmp;
66  suite.err() << "training...\n";
67  ncctmp.train(ml,target);
68  suite.err() << "done\n";
69
70  /////////////////////////////////////////////
71  // A test of predictions using unweighted data
72  /////////////////////////////////////////////
73  suite.err() << "test of predictions using unweighted test data\n";
74  utility::Matrix data1(3,4);
75  for(size_t i=0;i<3;i++) {
76    data1(i,0)=3-i;
77    data1(i,1)=5-i;
78    data1(i,2)=i+1;
79    data1(i,3)=i+3;
80  }
81  std::vector<std::string> vec1(4, "pos");
82  vec1[0]="neg";
83  vec1[1]="neg";
84
85  classifier::MatrixLookup ml1(data1);
86  classifier::Target target1(vec1);
87
88  classifier::NCC<statistics::EuclideanDistance> ncc1;
89  ncc1.train(ml1,target1);
90  utility::Matrix prediction1;
91  ncc1.predict(ml1,prediction1);
92  utility::Matrix result1(2,4);
93  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
94  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=sqrt(11.0);
95  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
96                         result1.begin())) {
97    suite.add(false);
98    suite.err() << "Difference to expected prediction too large\n";
99  }
100
101  //////////////////////////////////////////////////////////////////////////
102  // A test of predictions using unweighted training and weighted test data
103  //////////////////////////////////////////////////////////////////////////
104  suite.err() << "test of predictions using unweighted training and weighted test data\n";
105  utility::MatrixWeighted xw11(3,4);
106  xw11(0,0)=xw11(1,1)=xw11(2,2)=xw11(1,3)=utility::DataWeight(0,0);
107  std::copy(data1.begin(), data1.end(), utility::data_iterator(xw11.begin()));
108  classifier::MatrixLookupWeighted mlw1(xw11);
109  //classifier::MatrixLookupWeighted mlw1(data1,weights1);
110  ncc1.predict(mlw1,prediction1); 
111  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=sqrt(15.0);
112  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
113                         result1.begin())) {
114    suite.add(false);
115    suite.err() << "Difference to expected prediction too large\n";
116  }
117
118  //////////////////////////////////////////////////////////////////////////
119  // A test of predictions using weighted training resulting in NaN's
120  // in centroids and unweighted test data
121  //////////////////////////////////////////////////////////////////////////
122  suite.err() << "test of predictions using nan centroids and unweighted test data\n";
123  utility::MatrixWeighted xw12(3,4);
124  xw12(1,0)=xw12(1,1)=utility::DataWeight(0,0);
125  std::copy(data1.begin(), data1.end(), utility::data_iterator(xw12.begin()));
126  classifier::MatrixLookupWeighted mlw2(xw12);
127  //classifier::MatrixLookupWeighted mlw2(data1,weights2);
128  classifier::NCC<statistics::EuclideanDistance> ncc2;
129  ncc2.train(mlw2,target1);
130  ncc2.predict(ml1,prediction1); 
131  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
132  result1(1,0)=result1(1,1)=sqrt(11.0);
133  result1(0,2)=result1(0,3)=sqrt(15.0);
134  if(!std::isnan(ncc2.centroids()(1,0))) 
135    suite.add(false);
136  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
137                         result1.begin())) {
138    suite.add(false);
139    suite.err() << "Difference to expected prediction too large\n";
140  }
141
142  //////////////////////////////////////////////////////////////////////////
143  // A test of predictions when a centroid has nan for all variables that a
144  // test sample has non-zero weights for.
145  //////////////////////////////////////////////////////////////////////////
146  suite.err() << "test of predictions using nan centroids and weighted test data\n";
147  suite.err() << "... using EuclideanDistance" << std::endl;
148  xw11(0,0).weight() = xw11(2,0).weight()=0;
149  classifier::NCC<statistics::EuclideanDistance> ncc3;
150  ncc3.train(mlw2,target1);
151  ncc3.predict(mlw1,prediction1); 
152  if(!std::isnan(ncc3.centroids()(1,0))) {
153    suite.add(false);
154    suite.err() << "Training failed: expected nan in centroid" << std::endl;
155  }
156  if(!(std::isnan(prediction1(0,0)) &&
157       suite.equal(prediction1(1,0),sqrt(3.0)) &&
158       suite.equal(prediction1(0,1),sqrt(3.0)) &&
159       suite.equal(prediction1(1,1),sqrt(15.0)) &&
160       suite.equal(prediction1(0,2),sqrt(27.0)) )) { 
161    suite.add(false);
162    if (!std::isnan(prediction1(0,0)))
163      suite.err() << "prediction1(0,0): " << prediction1(0,0) << "  "
164                  << "expected NaN\n";
165    suite.err() << "Test failed: predictions incorrect" << std::endl;
166  }
167  suite.err() << "... using PearsonDistance" << std::endl;;
168  classifier::NCC<statistics::PearsonDistance> ncc4;
169  ncc4.train(mlw2,target1);
170  ncc4.predict(mlw1,prediction1); 
171  if(!std::isnan(ncc4.centroids()(1,0))) {
172    suite.add(false);
173    suite.err() << "Training failed: expected nan in centroid" << std::endl;
174  }
175  if(!(std::isnan(prediction1(0,0)) &&
176       std::isnan(prediction1(0,2)) &&
177       std::isnan(prediction1(1,0)) &&
178       suite.equal(prediction1(0,1), 0) &&
179       suite.equal(prediction1(1,2), 0) &&
180       suite.equal(prediction1(1,3), 0) && 
181       suite.equal(prediction1(0,3), 2.0) &&
182       suite.equal(prediction1(1,1), 2.0) )) {
183    suite.add(false); 
184    suite.err() << "Test failed: predictions incorrect" << std::endl;
185  }
186
187  ////////////////////////////////////////////////////////////////
188  // A test of when a class has no training samples, should give nan
189  // in predictions.
190  ////////////////////////////////////////////////////////////////
191  //Keep only the second class in the training samples
192  std::vector<size_t> ind(2,2);
193  ind[1]=3;
194  classifier::Target target2(target1,utility::Index(ind));
195  classifier::MatrixLookupWeighted mlw3(xw12,
196                                        utility::Index(data1.rows()),
197                                        utility::Index(ind));
198  classifier::NCC<statistics::PearsonDistance> ncc5;
199  ncc5.train(mlw3,target2);
200  ncc5.predict(mlw1,prediction1); 
201  if (!(std::isnan(prediction1(0,0)) && std::isnan(prediction1(0,1)) && 
202        std::isnan(prediction1(0,2)) && std::isnan(prediction1(0,3)) &&
203        std::isnan(prediction1(1,0)) &&
204        suite.equal(prediction1(1,1), 2.0) &&
205        suite.equal(prediction1(1,2),0) &&
206        suite.equal(prediction1(1,3),0) )) {
207    suite.err() << "Difference to expected prediction too large\n";
208    suite.add(false);
209  }
210
211  //////////////////////////////////////////////////////////////////////////
212  // A test of predictions using Sorlie data
213  //////////////////////////////////////////////////////////////////////////
214  suite.err() << "test with Sorlie data\n";
215  std::ifstream is(test::filename("data/sorlie_centroid_data.txt").c_str());
216  utility::MatrixWeighted data_weight(is,'\t');
217  is.close();
218
219  is.open(test::filename("data/sorlie_centroid_classes.txt").c_str());
220  classifier::Target targets(is);
221  is.close();
222
223  classifier::MatrixLookupWeighted dataviewweighted(data_weight);
224  classifier::NCC<statistics::PearsonDistance> ncc;
225  suite.err() << "training...\n";
226  ncc.train(dataviewweighted,targets);
227
228  // Comparing the centroids to stored result
229  is.open(test::filename("data/sorlie_centroids.txt").c_str());
230  utility::Matrix centroids(is);
231  is.close();
232
233  if(centroids.rows() != ncc.centroids().rows() ||
234     centroids.columns() != ncc.centroids().columns()) {
235    suite.err() << "Error in the dimensionality of centroids\n";
236    suite.err() << "Nof rows: " << centroids.rows() << " expected: " 
237           << ncc.centroids().rows() << std::endl;
238    suite.err() << "Nof columns: " << centroids.columns() << " expected: " 
239           << ncc.centroids().columns() << std::endl;
240  }
241
242  if (!suite.equal_range(centroids.begin(), centroids.end(),
243                         ncc.centroids().begin(), 100000)) {
244    suite.add(false);
245    suite.err() << "Difference to stored centroids too large\n";
246  }
247
248  suite.err() << "...predicting...\n";
249  utility::Matrix prediction;
250  ncc.predict(dataviewweighted,prediction);
251 
252  // Comparing the prediction to stored result
253  is.open(test::filename("data/sorlie_centroid_predictions.txt").c_str());
254  utility::Matrix result(is,'\t');
255  is.close();
256
257  if (!suite.equal_range(result.begin(), result.end(),
258                         prediction.begin(), 100000)) {
259    suite.add(false);
260    suite.err() << "Difference to stored prediction too large\n";
261  }
262
263  return suite.return_value();
264}
265
266void predict_nan_data_unweighted_data(test::Suite& suite)
267{
268  //////////////////////////////////////////////////////////////////////////
269  // A test of predictions using weighted training resulting in NaN's
270  // in centroids and unweighted test data
271  //////////////////////////////////////////////////////////////////////////
272  suite.err() << "test of predictions using nan centroids and unweighted test data\n";
273  utility::Matrix data1(3,4);
274  for(size_t i=0;i<3;i++) {
275    data1(i,0)=3-i;
276    data1(i,1)=5-i;
277    data1(i,2)=i+1;
278    data1(i,3)=i+3;
279  }
280  utility::MatrixWeighted xw(data1);
281  std::vector<std::string> vec1(4, "pos");
282  vec1[0]="neg";
283  vec1[1]="neg";
284
285  classifier::MatrixLookup ml1(data1);
286  classifier::Target target1(vec1);
287  utility::Matrix prediction1;
288  utility::Matrix result1(2,4);
289
290  xw(1,0).weight()=xw(1,1).weight()=0.0;
291 
292
293  classifier::MatrixLookupWeighted mlw2(xw);
294  classifier::NCC<statistics::EuclideanDistance> ncc2;
295  ncc2.train(mlw2,target1);
296  ncc2.predict(ml1,prediction1); 
297  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
298  result1(1,0)=result1(1,1)=sqrt(11.0);
299  result1(0,2)=result1(0,3)=sqrt(15.0);
300  if(!std::isnan(ncc2.centroids()(1,0))) 
301    suite.add(false);
302  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
303                         result1.begin())) {
304    suite.add(false);
305    suite.err() << "Difference to expected prediction too large\n";
306  }
307}
Note: See TracBrowser for help on using the repository browser.