source: trunk/test/ncc_test.cc @ 1158

Last change on this file since 1158 was 1158, checked in by Markus Ringnér, 14 years ago

Fixed #322

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 10.1 KB
Line 
1// $Id: ncc_test.cc 1158 2008-02-26 13:49:38Z markus $
2
3/*
4  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér
5
6  This file is part of the yat library, http://trac.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 2 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with this program; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21  02111-1307, USA.
22*/
23
24#include "yat/classifier/Kernel_MEV.h"
25#include "yat/classifier/KernelLookup.h"
26#include "yat/classifier/MatrixLookup.h"
27#include "yat/classifier/MatrixLookupWeighted.h"
28#include "yat/classifier/NCC.h"
29#include "yat/classifier/PolynomialKernelFunction.h"
30#include "yat/classifier/Target.h"
31#include "yat/utility/Matrix.h"
32#include "yat/statistics/EuclideanDistance.h"
33#include "yat/statistics/PearsonDistance.h"
34#include "yat/utility/utility.h"
35
36#include <cassert>
37#include <fstream>
38#include <iostream>
39#include <stdexcept>
40#include <sstream>
41#include <string>
42#include <limits>
43#include <cmath>
44
45using namespace theplu::yat;
46
47double deviation(const utility::Matrix& a, const utility::Matrix& b) {
48  double sl=0;
49  for (size_t i=0; i<a.rows(); i++){
50    for (size_t j=0; j<a.columns(); j++){
51      sl += fabs(a(i,j)-b(i,j));
52    }
53  }
54  sl /= (a.columns()*a.rows());
55  return sl;
56}
57
58int main(const int argc,const char* argv[])
59{ 
60
61  std::ostream* error;
62  if (argc>1 && argv[1]==std::string("-v"))
63    error = &std::cerr;
64  else {
65    error = new std::ofstream("/dev/null");
66    if (argc>1)
67      std::cout << "ncc_test -v : for printing extra information\n";
68  }
69  *error << "testing ncc" << std::endl;
70  bool ok = true;
71
72  /////////////////////////////////////////////
73  // First test of constructor and training 
74  /////////////////////////////////////////////
75  classifier::MatrixLookup ml(4,4);
76  std::vector<std::string> vec(4, "pos");
77  vec[3]="bjds";
78  classifier::Target target(vec);
79  classifier::NCC<statistics::EuclideanDistance> ncctmp;
80  *error << "training...\n";
81  ncctmp.train(ml,target);
82  *error << "done\n";
83
84  /////////////////////////////////////////////
85  // A test of predictions using unweighted data
86  /////////////////////////////////////////////
87  *error << "test of predictions using unweighted test data\n";
88  utility::Matrix data1(3,4);
89  for(size_t i=0;i<3;i++) {
90    data1(i,0)=3-i;
91    data1(i,1)=5-i;
92    data1(i,2)=i+1;
93    data1(i,3)=i+3;
94  }
95  std::vector<std::string> vec1(4, "pos");
96  vec1[0]="neg";
97  vec1[1]="neg";
98
99  classifier::MatrixLookup ml1(data1);
100  classifier::Target target1(vec1);
101
102  classifier::NCC<statistics::EuclideanDistance> ncc1;
103  ncc1.train(ml1,target1);
104  utility::Matrix prediction1;
105  ncc1.predict(ml1,prediction1);
106  double slack_bound=2e-7;
107  utility::Matrix result1(2,4);
108  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
109  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=sqrt(11.0);
110  double slack = deviation(prediction1,result1); 
111  if (slack > slack_bound || std::isnan(slack)){
112    *error << "Difference to expected prediction too large\n";
113    *error << "slack: " << slack << std::endl;
114    *error << "expected less than " << slack_bound << std::endl;
115    ok = false;
116  }
117
118  //////////////////////////////////////////////////////////////////////////
119  // A test of predictions using unweighted training and weighted test data
120  //////////////////////////////////////////////////////////////////////////
121  *error << "test of predictions using unweighted training and weighted test data\n";
122  utility::Matrix weights1(3,4,1.0);
123  weights1(0,0)=weights1(1,1)=weights1(2,2)=weights1(1,3)=0.0;
124  classifier::MatrixLookupWeighted mlw1(data1,weights1);
125  ncc1.predict(mlw1,prediction1); 
126  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=sqrt(15.0);
127  slack = deviation(prediction1,result1);
128  if (slack > slack_bound || std::isnan(slack)){
129    *error << "Difference to expected prediction too large\n";
130    *error << "slack: " << slack << std::endl;
131    *error << "expected less than " << slack_bound << std::endl;
132    ok = false;
133  }
134
135  //////////////////////////////////////////////////////////////////////////
136  // A test of predictions using weighted training resulting in NaN's
137  // in centroids and unweighted test data
138  //////////////////////////////////////////////////////////////////////////
139  *error << "test of predictions using nan centroids and unweighted test data\n";
140  utility::Matrix weights2(3,4,1.0);
141  weights2(1,0)=weights2(1,1)=0.0;
142  classifier::MatrixLookupWeighted mlw2(data1,weights2);
143  classifier::NCC<statistics::EuclideanDistance> ncc2;
144  ncc2.train(mlw2,target1);
145  ncc2.predict(ml1,prediction1); 
146  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
147  result1(1,0)=result1(1,1)=sqrt(11.0);
148  result1(0,2)=result1(0,3)=sqrt(15.0);
149  slack = deviation(prediction1,result1);
150  if(!std::isnan(ncc2.centroids()(1,0))) 
151    ok=false;
152  if (slack > slack_bound || std::isnan(slack)){
153    *error << "Difference to expected prediction too large\n";
154    *error << "slack: " << slack << std::endl;
155    *error << "expected less than " << slack_bound << std::endl;
156    ok = false;
157  }
158
159  //////////////////////////////////////////////////////////////////////////
160  // A test of predictions when a centroid has nan for all variables that a
161  // test sample has non-zero weights for.
162  //////////////////////////////////////////////////////////////////////////
163  *error << "test of predictions using nan centroids and weighted test data\n";
164  *error << "... using EuclideanDistance" << std::endl;
165  weights1(0,0)=weights1(2,0)=0;
166  classifier::NCC<statistics::EuclideanDistance> ncc3;
167  ncc3.train(mlw2,target1);
168  ncc3.predict(mlw1,prediction1); 
169  if(!std::isnan(ncc3.centroids()(1,0))) {
170    ok=false;
171    *error << "Training failed: expected nan in centroid" << std::endl;
172  }
173  if(!(std::isnan(prediction1(0,0)) &&
174       fabs(prediction1(1,0)-sqrt(3.0))<slack_bound &&
175       fabs(prediction1(0,1)-sqrt(3.0))<slack_bound &&
176       fabs(prediction1(1,1)-sqrt(15.0))<slack_bound &&
177       fabs(prediction1(0,2)-sqrt(27.0))<slack_bound)) { 
178    ok=false; 
179    *error << "Test failed: predictions incorrect" << std::endl;
180  }
181  *error << "... using PearsonDistance" << std::endl;;
182  classifier::NCC<statistics::PearsonDistance> ncc4;
183  ncc4.train(mlw2,target1);
184  ncc4.predict(mlw1,prediction1); 
185  if(!std::isnan(ncc4.centroids()(1,0))) {
186    ok=false;
187    *error << "Training failed: expected nan in centroid" << std::endl;
188  }
189  if(!(std::isnan(prediction1(0,0)) &&
190       std::isnan(prediction1(0,2)) &&
191       std::isnan(prediction1(1,0)) &&
192       fabs(prediction1(0,1))<slack_bound &&
193       fabs(prediction1(1,2))<slack_bound &&
194       fabs(prediction1(1,3))<slack_bound && 
195       fabs(prediction1(0,3)-2.0)<slack_bound &&
196       fabs(prediction1(1,1)-2.0)<slack_bound)) {
197    ok=false; 
198    *error << "Test failed: predictions incorrect" << std::endl;
199  }
200
201  ////////////////////////////////////////////////////////////////
202  // A test of when a class has no training samples, should give nan
203  // in predictions.
204  ////////////////////////////////////////////////////////////////
205  //Keep only the second class in the training samples
206  std::vector<size_t> ind(2,2);
207  ind[1]=3;
208  classifier::Target target2(target1,utility::Index(ind));
209  classifier::MatrixLookupWeighted mlw3(data1,weights2,utility::Index(ind),false);
210  classifier::NCC<statistics::PearsonDistance> ncc5;
211  ncc5.train(mlw3,target2);
212  ncc5.predict(mlw1,prediction1); 
213  if (!(std::isnan(prediction1(0,0)) && std::isnan(prediction1(0,1)) && 
214        std::isnan(prediction1(0,2)) && std::isnan(prediction1(0,3)) &&
215        std::isnan(prediction1(1,0)) &&
216        fabs(prediction1(1,1)-2.0)<slack_bound &&
217        fabs(prediction1(1,2))<slack_bound &&
218        fabs(prediction1(1,3))<slack_bound)) {
219    *error << "Difference to expected prediction too large\n";
220    ok = false;
221  }
222
223  //////////////////////////////////////////////////////////////////////////
224  // A test of predictions using Sorlie data
225  //////////////////////////////////////////////////////////////////////////
226  *error << "test with Sorlie data\n";
227  std::ifstream is("data/sorlie_centroid_data.txt");
228  utility::Matrix data(is,'\t');
229  is.close();
230
231  is.open("data/sorlie_centroid_classes.txt");
232  classifier::Target targets(is);
233  is.close();
234
235  // Generate weight matrix with 0 for missing values and 1 for others.
236  utility::Matrix weights(data.rows(),data.columns(),0.0);
237  utility::nan(data,weights);
238     
239  classifier::MatrixLookupWeighted dataviewweighted(data,weights);
240  classifier::NCC<statistics::PearsonDistance> ncc;
241  *error << "training...\n";
242  ncc.train(dataviewweighted,targets);
243
244  // Comparing the centroids to stored result
245  is.open("data/sorlie_centroids.txt");
246  utility::Matrix centroids(is);
247  is.close();
248
249  if(centroids.rows() != ncc.centroids().rows() ||
250     centroids.columns() != ncc.centroids().columns()) {
251    *error << "Error in the dimensionality of centroids\n";
252    *error << "Nof rows: " << centroids.rows() << " expected: " 
253           << ncc.centroids().rows() << std::endl;
254    *error << "Nof columns: " << centroids.columns() << " expected: " 
255           << ncc.centroids().columns() << std::endl;
256  }
257
258  slack = deviation(centroids,ncc.centroids());
259  if (slack > slack_bound || std::isnan(slack)){
260    *error << "Difference to stored centroids too large\n";
261    *error << "slack: " << slack << std::endl;
262    *error << "expected less than " << slack_bound << std::endl;
263    ok = false;
264  }
265
266  *error << "...predicting...\n";
267  utility::Matrix prediction;
268  ncc.predict(dataviewweighted,prediction);
269 
270  // Comparing the prediction to stored result
271  is.open("data/sorlie_centroid_predictions.txt");
272  utility::Matrix result(is,'\t');
273  is.close();
274
275  slack = deviation(result,prediction);
276  if (slack > slack_bound || std::isnan(slack)){
277    *error << "Difference to stored prediction too large\n";
278    *error << "slack: " << slack << std::endl;
279    *error << "expected less than " << slack_bound << std::endl;
280    ok = false;
281  }
282  *error << "done\n";
283
284  if(ok)
285    *error << "OK" << std::endl;
286  else
287    *error << "FAILED" << std::endl;
288
289  if (error!=&std::cerr)
290    delete error;
291
292  if(ok) 
293    return 0;
294  return -1; 
295}
Note: See TracBrowser for help on using the repository browser.