source: trunk/test/ncc_test.cc @ 1142

Last change on this file since 1142 was 1142, checked in by Markus Ringnér, 15 years ago

Refs #335, fixed for NCC, working on KNN

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 9.2 KB
Line 
1// $Id: ncc_test.cc 1142 2008-02-25 14:32:35Z markus $
2
3/*
4  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér
5
6  This file is part of the yat library, http://trac.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 2 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with this program; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21  02111-1307, USA.
22*/
23
24#include "yat/classifier/IGP.h"
25#include "yat/classifier/Kernel_MEV.h"
26#include "yat/classifier/KernelLookup.h"
27#include "yat/classifier/MatrixLookup.h"
28#include "yat/classifier/MatrixLookupWeighted.h"
29#include "yat/classifier/NCC.h"
30#include "yat/classifier/PolynomialKernelFunction.h"
31#include "yat/classifier/Target.h"
32#include "yat/utility/Matrix.h"
33#include "yat/statistics/EuclideanDistance.h"
34#include "yat/statistics/PearsonDistance.h"
35#include "yat/utility/utility.h"
36
37#include <cassert>
38#include <fstream>
39#include <iostream>
40#include <stdexcept>
41#include <sstream>
42#include <string>
43#include <limits>
44#include <cmath>
45
46using namespace theplu::yat;
47
48double deviation(const utility::Matrix& a, const utility::Matrix& b) {
49  double sl=0;
50  for (size_t i=0; i<a.rows(); i++){
51    for (size_t j=0; j<a.columns(); j++){
52      sl += fabs(a(i,j)-b(i,j));
53    }
54  }
55  sl /= (a.columns()*a.rows());
56  return sl;
57}
58
59int main(const int argc,const char* argv[])
60{ 
61
62  std::ostream* error;
63  if (argc>1 && argv[1]==std::string("-v"))
64    error = &std::cerr;
65  else {
66    error = new std::ofstream("/dev/null");
67    if (argc>1)
68      std::cout << "ncc_test -v : for printing extra information\n";
69  }
70  *error << "testing ncc" << std::endl;
71  bool ok = true;
72
73  /////////////////////////////////////////////
74  // First test of constructor and training 
75  /////////////////////////////////////////////
76  classifier::MatrixLookup ml(4,4);
77  std::vector<std::string> vec(4, "pos");
78  vec[3]="bjds";
79  classifier::Target target(vec);
80  classifier::NCC<statistics::EuclideanDistance> ncctmp(ml,target);
81  *error << "training...\n";
82  ncctmp.train();
83  *error << "done\n";
84
85  /////////////////////////////////////////////
86  // A test of predictions using unweighted data
87  /////////////////////////////////////////////
88  *error << "test of predictions using unweighted test data\n";
89  utility::Matrix data1(3,4);
90  for(size_t i=0;i<3;i++) {
91    data1(i,0)=3-i;
92    data1(i,1)=5-i;
93    data1(i,2)=i+1;
94    data1(i,3)=i+3;
95  }
96  std::vector<std::string> vec1(4, "pos");
97  vec1[0]="neg";
98  vec1[1]="neg";
99
100  classifier::MatrixLookup ml1(data1);
101  classifier::Target target1(vec1);
102
103  classifier::NCC<statistics::EuclideanDistance> ncc1(ml1,target1);
104  ncc1.train();
105  utility::Matrix prediction1;
106  ncc1.predict(ml1,prediction1);
107  double slack_bound=2e-7;
108  utility::Matrix result1(2,4);
109  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
110  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=sqrt(11.0);
111  double slack = deviation(prediction1,result1); 
112  if (slack > slack_bound || std::isnan(slack)){
113    *error << "Difference to expected prediction too large\n";
114    *error << "slack: " << slack << std::endl;
115    *error << "expected less than " << slack_bound << std::endl;
116    ok = false;
117  }
118
119  //////////////////////////////////////////////////////////////////////////
120  // A test of predictions using unweighted training and weighted test data
121  //////////////////////////////////////////////////////////////////////////
122  *error << "test of predictions using unweighted training and weighted test data\n";
123  utility::Matrix weights1(3,4,1.0);
124  weights1(0,0)=weights1(1,1)=weights1(2,2)=weights1(1,3)=0.0;
125  classifier::MatrixLookupWeighted mlw1(data1,weights1);
126  ncc1.predict(mlw1,prediction1); 
127  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=sqrt(15.0);
128  slack = deviation(prediction1,result1);
129  if (slack > slack_bound || std::isnan(slack)){
130    *error << "Difference to expected prediction too large\n";
131    *error << "slack: " << slack << std::endl;
132    *error << "expected less than " << slack_bound << std::endl;
133    ok = false;
134  }
135
136  //////////////////////////////////////////////////////////////////////////
137  // A test of predictions using weighted training resulting in NaN's
138  // in centroids and unweighted test data
139  //////////////////////////////////////////////////////////////////////////
140  *error << "test of predictions using nan centroids and unweighted test data\n";
141  utility::Matrix weights2(3,4,1.0);
142  weights2(1,0)=weights2(1,1)=0.0;
143  classifier::MatrixLookupWeighted mlw2(data1,weights2);
144  classifier::NCC<statistics::EuclideanDistance> ncc2(mlw2,target1);
145  ncc2.train();
146  ncc2.predict(ml1,prediction1); 
147  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
148  result1(1,0)=result1(1,1)=sqrt(11.0);
149  result1(0,2)=result1(0,3)=sqrt(15.0);
150  slack = deviation(prediction1,result1);
151  if(!std::isnan(ncc2.centroids()(1,0))) 
152    ok=false;
153  if (slack > slack_bound || std::isnan(slack)){
154    *error << "Difference to expected prediction too large\n";
155    *error << "slack: " << slack << std::endl;
156    *error << "expected less than " << slack_bound << std::endl;
157    ok = false;
158  }
159
160  //////////////////////////////////////////////////////////////////////////
161  // A test of predictions when a centroid has nan for all variables that a
162  // test sample has non-zero weights for.
163  //////////////////////////////////////////////////////////////////////////
164  *error << "test of predictions using nan centroids and weighted test data\n";
165  *error << "... using EuclideanDistance" << std::endl;
166  weights1(0,0)=weights1(2,0)=0;
167  classifier::NCC<statistics::EuclideanDistance> ncc3(mlw2,target1);
168  ncc3.train();
169  ncc3.predict(mlw1,prediction1); 
170  if(!std::isnan(ncc3.centroids()(1,0))) {
171    ok=false;
172    *error << "Training failed: expected nan in centroid" << std::endl;
173  }
174  if(!(std::isnan(prediction1(0,0)) &&
175       fabs(prediction1(1,0)-sqrt(3.0))<slack_bound &&
176       fabs(prediction1(0,1)-sqrt(3.0))<slack_bound &&
177       fabs(prediction1(1,1)-sqrt(15.0))<slack_bound &&
178       fabs(prediction1(0,2)-sqrt(27.0))<slack_bound)) { 
179    ok=false; 
180    *error << "Test failed: predictions incorrect" << std::endl;
181  }
182  *error << "... using PearsonDistance" << std::endl;;
183  classifier::NCC<statistics::PearsonDistance> ncc4(mlw2,target1);
184  ncc4.train();
185  ncc4.predict(mlw1,prediction1); 
186  if(!std::isnan(ncc4.centroids()(1,0))) {
187    ok=false;
188    *error << "Training failed: expected nan in centroid" << std::endl;
189  }
190  if(!(std::isnan(prediction1(0,0)) &&
191       std::isnan(prediction1(0,2)) &&
192       std::isnan(prediction1(1,0)) &&
193       fabs(prediction1(0,1))<slack_bound &&
194       fabs(prediction1(1,2))<slack_bound &&
195       fabs(prediction1(1,3))<slack_bound && 
196       fabs(prediction1(0,3)-2.0)<slack_bound &&
197       fabs(prediction1(1,1)-2.0)<slack_bound)) {
198    ok=false; 
199    *error << "Test failed: predictions incorrect" << std::endl;
200  }
201
202
203  //////////////////////////////////////////////////////////////////////////
204  // A test of predictions using Sorlie data
205  //////////////////////////////////////////////////////////////////////////
206  *error << "test with Sorlie data\n";
207  std::ifstream is("data/sorlie_centroid_data.txt");
208  utility::Matrix data(is,'\t');
209  is.close();
210
211  is.open("data/sorlie_centroid_classes.txt");
212  classifier::Target targets(is);
213  is.close();
214
215  // Generate weight matrix with 0 for missing values and 1 for others.
216  utility::Matrix weights(data.rows(),data.columns(),0.0);
217  utility::nan(data,weights);
218     
219  classifier::MatrixLookupWeighted dataviewweighted(data,weights);
220  classifier::NCC<statistics::PearsonDistance> ncc(dataviewweighted,targets);
221  *error << "training...\n";
222  ncc.train();
223
224  // Comparing the centroids to stored result
225  is.open("data/sorlie_centroids.txt");
226  utility::Matrix centroids(is);
227  is.close();
228
229  if(centroids.rows() != ncc.centroids().rows() ||
230     centroids.columns() != ncc.centroids().columns()) {
231    *error << "Error in the dimensionality of centroids\n";
232    *error << "Nof rows: " << centroids.rows() << " expected: " 
233           << ncc.centroids().rows() << std::endl;
234    *error << "Nof columns: " << centroids.columns() << " expected: " 
235           << ncc.centroids().columns() << std::endl;
236  }
237
238  slack = deviation(centroids,ncc.centroids());
239  if (slack > slack_bound || std::isnan(slack)){
240    *error << "Difference to stored centroids too large\n";
241    *error << "slack: " << slack << std::endl;
242    *error << "expected less than " << slack_bound << std::endl;
243    ok = false;
244  }
245
246  *error << "...predicting...\n";
247  utility::Matrix prediction;
248  ncc.predict(dataviewweighted,prediction);
249 
250  // Comparing the prediction to stored result
251  is.open("data/sorlie_centroid_predictions.txt");
252  utility::Matrix result(is,'\t');
253  is.close();
254
255  slack = deviation(result,prediction);
256  if (slack > slack_bound || std::isnan(slack)){
257    *error << "Difference to stored prediction too large\n";
258    *error << "slack: " << slack << std::endl;
259    *error << "expected less than " << slack_bound << std::endl;
260    ok = false;
261  }
262  *error << "done\n";
263
264  if(ok)
265    *error << "OK" << std::endl;
266  else
267    *error << "FAILED" << std::endl;
268
269  if (error!=&std::cerr)
270    delete error;
271
272  if(ok) 
273    return 0;
274  return -1; 
275}
Note: See TracBrowser for help on using the repository browser.