source: trunk/test/ncc_test.cc @ 1437

Last change on this file since 1437 was 1437, checked in by Peter, 13 years ago

merge patch release 0.4.2 to trunk. Delta 0.4.2-0.4.1

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 9.3 KB
Line 
1// $Id: ncc_test.cc 1437 2008-08-25 17:55:00Z peter $
2
3/*
4  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér
5  Copyright (C) 2007 Jari Häkkinen, Peter Johansson, Markus Ringnér
6  Copyright (C) 2008 Peter Johansson, Markus Ringnér
7
8  This file is part of the yat library, http://dev.thep.lu.se/yat
9
10  The yat library is free software; you can redistribute it and/or
11  modify it under the terms of the GNU General Public License as
12  published by the Free Software Foundation; either version 2 of the
13  License, or (at your option) any later version.
14
15  The yat library is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  General Public License for more details.
19
20  You should have received a copy of the GNU General Public License
21  along with this program; if not, write to the Free Software
22  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23  02111-1307, USA.
24*/
25
26#include "Suite.h"
27
28#include "yat/classifier/MatrixLookup.h"
29#include "yat/classifier/MatrixLookupWeighted.h"
30#include "yat/classifier/NCC.h"
31#include "yat/classifier/Target.h"
32#include "yat/utility/Matrix.h"
33#include "yat/statistics/EuclideanDistance.h"
34#include "yat/statistics/PearsonDistance.h"
35#include "yat/utility/utility.h"
36
37#include <cassert>
38#include <fstream>
39#include <iostream>
40#include <stdexcept>
41#include <sstream>
42#include <string>
43#include <limits>
44#include <cmath>
45
46using namespace theplu::yat;
47
48int main(int argc,char* argv[])
49{ 
50  test::Suite suite(argc, argv);
51  suite.err() << "testing ncc" << std::endl;
52
53  /////////////////////////////////////////////
54  // First test of constructor and training 
55  /////////////////////////////////////////////
56  classifier::MatrixLookup ml(4,4);
57  std::vector<std::string> vec(4, "pos");
58  vec[3]="bjds";
59  classifier::Target target(vec);
60  classifier::NCC<statistics::EuclideanDistance> ncctmp;
61  suite.err() << "training...\n";
62  ncctmp.train(ml,target);
63  suite.err() << "done\n";
64
65  /////////////////////////////////////////////
66  // A test of predictions using unweighted data
67  /////////////////////////////////////////////
68  suite.err() << "test of predictions using unweighted test data\n";
69  utility::Matrix data1(3,4);
70  for(size_t i=0;i<3;i++) {
71    data1(i,0)=3-i;
72    data1(i,1)=5-i;
73    data1(i,2)=i+1;
74    data1(i,3)=i+3;
75  }
76  std::vector<std::string> vec1(4, "pos");
77  vec1[0]="neg";
78  vec1[1]="neg";
79
80  classifier::MatrixLookup ml1(data1);
81  classifier::Target target1(vec1);
82
83  classifier::NCC<statistics::EuclideanDistance> ncc1;
84  ncc1.train(ml1,target1);
85  utility::Matrix prediction1;
86  ncc1.predict(ml1,prediction1);
87  utility::Matrix result1(2,4);
88  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
89  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=sqrt(11.0);
90  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
91                         result1.begin())) {
92    suite.add(false);
93    suite.err() << "Difference to expected prediction too large\n";
94  }
95
96  //////////////////////////////////////////////////////////////////////////
97  // A test of predictions using unweighted training and weighted test data
98  //////////////////////////////////////////////////////////////////////////
99  suite.err() << "test of predictions using unweighted training and weighted test data\n";
100  utility::Matrix weights1(3,4,1.0);
101  weights1(0,0)=weights1(1,1)=weights1(2,2)=weights1(1,3)=0.0;
102  classifier::MatrixLookupWeighted mlw1(data1,weights1);
103  ncc1.predict(mlw1,prediction1); 
104  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=sqrt(15.0);
105  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
106                         result1.begin())) {
107    suite.add(false);
108    suite.err() << "Difference to expected prediction too large\n";
109  }
110
111  //////////////////////////////////////////////////////////////////////////
112  // A test of predictions using weighted training resulting in NaN's
113  // in centroids and unweighted test data
114  //////////////////////////////////////////////////////////////////////////
115  suite.err() << "test of predictions using nan centroids and unweighted test data\n";
116  utility::Matrix weights2(3,4,1.0);
117  weights2(1,0)=weights2(1,1)=0.0;
118  classifier::MatrixLookupWeighted mlw2(data1,weights2);
119  classifier::NCC<statistics::EuclideanDistance> ncc2;
120  ncc2.train(mlw2,target1);
121  ncc2.predict(ml1,prediction1); 
122  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
123  result1(1,0)=result1(1,1)=sqrt(11.0);
124  result1(0,2)=result1(0,3)=sqrt(15.0);
125  if(!std::isnan(ncc2.centroids()(1,0))) 
126    suite.add(false);
127  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
128                         result1.begin())) {
129    suite.add(false);
130    suite.err() << "Difference to expected prediction too large\n";
131  }
132
133  //////////////////////////////////////////////////////////////////////////
134  // A test of predictions when a centroid has nan for all variables that a
135  // test sample has non-zero weights for.
136  //////////////////////////////////////////////////////////////////////////
137  suite.err() << "test of predictions using nan centroids and weighted test data\n";
138  suite.err() << "... using EuclideanDistance" << std::endl;
139  weights1(0,0)=weights1(2,0)=0;
140  classifier::NCC<statistics::EuclideanDistance> ncc3;
141  ncc3.train(mlw2,target1);
142  ncc3.predict(mlw1,prediction1); 
143  if(!std::isnan(ncc3.centroids()(1,0))) {
144    suite.add(false);
145    suite.err() << "Training failed: expected nan in centroid" << std::endl;
146  }
147  if(!(std::isnan(prediction1(0,0)) &&
148       suite.equal(prediction1(1,0),sqrt(3.0)) &&
149       suite.equal(prediction1(0,1),sqrt(3.0)) &&
150       suite.equal(prediction1(1,1),sqrt(15.0)) &&
151       suite.equal(prediction1(0,2),sqrt(27.0)) )) { 
152    suite.add(false);
153    suite.err() << "Test failed: predictions incorrect" << std::endl;
154  }
155  suite.err() << "... using PearsonDistance" << std::endl;;
156  classifier::NCC<statistics::PearsonDistance> ncc4;
157  ncc4.train(mlw2,target1);
158  ncc4.predict(mlw1,prediction1); 
159  if(!std::isnan(ncc4.centroids()(1,0))) {
160    suite.add(false);
161    suite.err() << "Training failed: expected nan in centroid" << std::endl;
162  }
163  if(!(std::isnan(prediction1(0,0)) &&
164       std::isnan(prediction1(0,2)) &&
165       std::isnan(prediction1(1,0)) &&
166       suite.equal(prediction1(0,1), 0) &&
167       suite.equal(prediction1(1,2), 0) &&
168       suite.equal(prediction1(1,3), 0) && 
169       suite.equal(prediction1(0,3), 2.0) &&
170       suite.equal(prediction1(1,1), 2.0) )) {
171    suite.add(false); 
172    suite.err() << "Test failed: predictions incorrect" << std::endl;
173  }
174
175  ////////////////////////////////////////////////////////////////
176  // A test of when a class has no training samples, should give nan
177  // in predictions.
178  ////////////////////////////////////////////////////////////////
179  //Keep only the second class in the training samples
180  std::vector<size_t> ind(2,2);
181  ind[1]=3;
182  classifier::Target target2(target1,utility::Index(ind));
183  classifier::MatrixLookupWeighted mlw3(data1,weights2,utility::Index(ind),false);
184  classifier::NCC<statistics::PearsonDistance> ncc5;
185  ncc5.train(mlw3,target2);
186  ncc5.predict(mlw1,prediction1); 
187  if (!(std::isnan(prediction1(0,0)) && std::isnan(prediction1(0,1)) && 
188        std::isnan(prediction1(0,2)) && std::isnan(prediction1(0,3)) &&
189        std::isnan(prediction1(1,0)) &&
190        suite.equal(prediction1(1,1), 2.0) &&
191        suite.equal(prediction1(1,2),0) &&
192        suite.equal(prediction1(1,3),0) )) {
193    suite.err() << "Difference to expected prediction too large\n";
194    suite.add(false);
195  }
196
197  //////////////////////////////////////////////////////////////////////////
198  // A test of predictions using Sorlie data
199  //////////////////////////////////////////////////////////////////////////
200  suite.err() << "test with Sorlie data\n";
201  std::ifstream is(test::filename("data/sorlie_centroid_data.txt").c_str());
202  utility::Matrix data(is,'\t');
203  is.close();
204
205  is.open(test::filename("data/sorlie_centroid_classes.txt").c_str());
206  classifier::Target targets(is);
207  is.close();
208
209  // Generate weight matrix with 0 for missing values and 1 for others.
210  utility::Matrix weights(data.rows(),data.columns(),0.0);
211  utility::nan(data,weights);
212     
213  classifier::MatrixLookupWeighted dataviewweighted(data,weights);
214  classifier::NCC<statistics::PearsonDistance> ncc;
215  suite.err() << "training...\n";
216  ncc.train(dataviewweighted,targets);
217
218  // Comparing the centroids to stored result
219  is.open(test::filename("data/sorlie_centroids.txt").c_str());
220  utility::Matrix centroids(is);
221  is.close();
222
223  if(centroids.rows() != ncc.centroids().rows() ||
224     centroids.columns() != ncc.centroids().columns()) {
225    suite.err() << "Error in the dimensionality of centroids\n";
226    suite.err() << "Nof rows: " << centroids.rows() << " expected: " 
227           << ncc.centroids().rows() << std::endl;
228    suite.err() << "Nof columns: " << centroids.columns() << " expected: " 
229           << ncc.centroids().columns() << std::endl;
230  }
231
232  if (!suite.equal_range(centroids.begin(), centroids.end(),
233                         ncc.centroids().begin(), 100000)) {
234    suite.add(false);
235    suite.err() << "Difference to stored centroids too large\n";
236  }
237
238  suite.err() << "...predicting...\n";
239  utility::Matrix prediction;
240  ncc.predict(dataviewweighted,prediction);
241 
242  // Comparing the prediction to stored result
243  is.open(test::filename("data/sorlie_centroid_predictions.txt").c_str());
244  utility::Matrix result(is,'\t');
245  is.close();
246
247  if (!suite.equal_range(result.begin(), result.end(),
248                         prediction.begin(), 100000)) {
249    suite.add(false);
250    suite.err() << "Difference to stored prediction too large\n";
251  }
252
253  return suite.return_value();
254}
Note: See TracBrowser for help on using the repository browser.