source: trunk/test/ncc_test.cc @ 2338

Last change on this file since 2338 was 2338, checked in by Peter, 12 years ago

adding an archetype class for distance concept and use that class in KNN and NCC. Adding CopyConstructible? to requirement for Distance concept

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 11.3 KB
Line 
1// $Id: ncc_test.cc 2338 2010-10-16 05:00:12Z peter $
2
3/*
4  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér
5  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér
6
7  This file is part of the yat library, http://dev.thep.lu.se/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 3 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with yat. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include "Suite.h"
24
25#include "yat/classifier/MatrixLookup.h"
26#include "yat/classifier/MatrixLookupWeighted.h"
27#include "yat/classifier/NCC.h"
28#include "yat/classifier/Target.h"
29#include "yat/utility/DataIterator.h"
30#include "yat/utility/DataWeight.h"
31#include "yat/utility/Matrix.h"
32#include "yat/utility/MatrixWeighted.h"
33#include "yat/statistics/EuclideanDistance.h"
34#include "yat/statistics/PearsonDistance.h"
35#include "yat/utility/utility.h"
36
37#include <cassert>
38#include <fstream>
39#include <iostream>
40#include <stdexcept>
41#include <sstream>
42#include <string>
43#include <limits>
44#include <cmath>
45
46using namespace theplu::yat;
47
48void predict_nan_data_unweighted_data(test::Suite& suite);
49void compile_test(test::Suite& suite);
50
51int main(int argc,char* argv[])
52{ 
53  test::Suite suite(argc, argv);
54  suite.err() << "testing ncc" << std::endl;
55
56  predict_nan_data_unweighted_data(suite);
57
58  /////////////////////////////////////////////
59  // First test of constructor and training 
60  /////////////////////////////////////////////
61  classifier::MatrixLookup ml(4,4);
62  std::vector<std::string> vec(4, "pos");
63  vec[3]="bjds";
64  classifier::Target target(vec);
65  classifier::NCC<statistics::EuclideanDistance> ncctmp;
66  suite.err() << "training...\n";
67  ncctmp.train(ml,target);
68  suite.err() << "done\n";
69
70  /////////////////////////////////////////////
71  // A test of predictions using unweighted data
72  /////////////////////////////////////////////
73  suite.err() << "test of predictions using unweighted test data\n";
74  utility::Matrix data1(3,4);
75  for(size_t i=0;i<3;i++) {
76    data1(i,0)=3-i;
77    data1(i,1)=5-i;
78    data1(i,2)=i+1;
79    data1(i,3)=i+3;
80  }
81  std::vector<std::string> vec1(4, "pos");
82  vec1[0]="neg";
83  vec1[1]="neg";
84
85  classifier::MatrixLookup ml1(data1);
86  classifier::Target target1(vec1);
87
88  classifier::NCC<statistics::EuclideanDistance> ncc1;
89  ncc1.train(ml1,target1);
90  utility::Matrix prediction1;
91  ncc1.predict(ml1,prediction1);
92  utility::Matrix result1(2,4);
93  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
94  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=sqrt(11.0);
95  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
96                         result1.begin())) {
97    suite.add(false);
98    suite.err() << "Difference to expected prediction too large\n";
99  }
100
101  //////////////////////////////////////////////////////////////////////////
102  // A test of predictions using unweighted training and weighted test data
103  //////////////////////////////////////////////////////////////////////////
104  suite.err() << "test of predictions using unweighted training and weighted test data\n";
105  utility::MatrixWeighted xw11(3,4);
106  xw11(0,0)=xw11(1,1)=xw11(2,2)=xw11(1,3)=utility::DataWeight(0,0);
107  std::copy(data1.begin(), data1.end(), utility::data_iterator(xw11.begin()));
108  classifier::MatrixLookupWeighted mlw1(xw11);
109  //classifier::MatrixLookupWeighted mlw1(data1,weights1);
110  ncc1.predict(mlw1,prediction1); 
111  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=sqrt(15.0);
112  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
113                         result1.begin())) {
114    suite.add(false);
115    suite.err() << "Difference to expected prediction too large\n";
116  }
117
118  //////////////////////////////////////////////////////////////////////////
119  // A test of predictions using weighted training resulting in NaN's
120  // in centroids and unweighted test data
121  //////////////////////////////////////////////////////////////////////////
122  suite.err() << "test of predictions using nan centroids and unweighted test data\n";
123  utility::MatrixWeighted xw12(3,4);
124  xw12(1,0)=xw12(1,1)=utility::DataWeight(0,0);
125  std::copy(data1.begin(), data1.end(), utility::data_iterator(xw12.begin()));
126  classifier::MatrixLookupWeighted mlw2(xw12);
127  //classifier::MatrixLookupWeighted mlw2(data1,weights2);
128  classifier::NCC<statistics::EuclideanDistance> ncc2;
129  ncc2.train(mlw2,target1);
130  ncc2.predict(ml1,prediction1); 
131  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
132  result1(1,0)=result1(1,1)=sqrt(11.0);
133  result1(0,2)=result1(0,3)=sqrt(15.0);
134  if(!std::isnan(ncc2.centroids()(1,0))) 
135    suite.add(false);
136  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
137                         result1.begin())) {
138    suite.add(false);
139    suite.err() << "Difference to expected prediction too large\n";
140  }
141
142  //////////////////////////////////////////////////////////////////////////
143  // A test of predictions when a centroid has nan for all variables that a
144  // test sample has non-zero weights for.
145  //////////////////////////////////////////////////////////////////////////
146  suite.err() << "test of predictions using nan centroids and weighted test data\n";
147  suite.err() << "... using EuclideanDistance" << std::endl;
148  xw11(0,0).weight() = xw11(2,0).weight()=0;
149  classifier::NCC<statistics::EuclideanDistance> ncc3;
150  ncc3.train(mlw2,target1);
151  ncc3.predict(mlw1,prediction1); 
152  if(!std::isnan(ncc3.centroids()(1,0))) {
153    suite.add(false);
154    suite.err() << "Training failed: expected nan in centroid" << std::endl;
155  }
156  if(!(std::isnan(prediction1(0,0)) &&
157       suite.equal(prediction1(1,0),sqrt(3.0)) &&
158       suite.equal(prediction1(0,1),sqrt(3.0)) &&
159       suite.equal(prediction1(1,1),sqrt(15.0)) &&
160       suite.equal(prediction1(0,2),sqrt(27.0)) )) { 
161    suite.add(false);
162    if (!std::isnan(prediction1(0,0)))
163      suite.err() << "prediction1(0,0): " << prediction1(0,0) << "  "
164                  << "expected NaN\n";
165    suite.err() << "Test failed: predictions incorrect" << std::endl;
166  }
167  suite.err() << "... using PearsonDistance" << std::endl;;
168  classifier::NCC<statistics::PearsonDistance> ncc4;
169  ncc4.train(mlw2,target1);
170  ncc4.predict(mlw1,prediction1); 
171  if(!std::isnan(ncc4.centroids()(1,0))) {
172    suite.add(false);
173    suite.err() << "Training failed: expected nan in centroid" << std::endl;
174  }
175  if(!(std::isnan(prediction1(0,0)) &&
176       std::isnan(prediction1(0,2)) &&
177       std::isnan(prediction1(1,0)) &&
178       suite.equal(prediction1(0,1), 0) &&
179       suite.equal(prediction1(1,2), 0) &&
180       suite.equal(prediction1(1,3), 0) && 
181       suite.equal(prediction1(0,3), 2.0) &&
182       suite.equal(prediction1(1,1), 2.0) )) {
183    suite.add(false); 
184    suite.err() << "Test failed: predictions incorrect" << std::endl;
185  }
186
187  ////////////////////////////////////////////////////////////////
188  // A test of when a class has no training samples, should give nan
189  // in predictions.
190  ////////////////////////////////////////////////////////////////
191  //Keep only the second class in the training samples
192  std::vector<size_t> ind(2,2);
193  ind[1]=3;
194  classifier::Target target2(target1,utility::Index(ind));
195  classifier::MatrixLookupWeighted mlw3(xw12,
196                                        utility::Index(data1.rows()),
197                                        utility::Index(ind));
198  classifier::NCC<statistics::PearsonDistance> ncc5;
199  ncc5.train(mlw3,target2);
200  ncc5.predict(mlw1,prediction1); 
201  if (!(std::isnan(prediction1(0,0)) && std::isnan(prediction1(0,1)) && 
202        std::isnan(prediction1(0,2)) && std::isnan(prediction1(0,3)) &&
203        std::isnan(prediction1(1,0)) &&
204        suite.equal(prediction1(1,1), 2.0) &&
205        suite.equal(prediction1(1,2),0) &&
206        suite.equal(prediction1(1,3),0) )) {
207    suite.err() << "Difference to expected prediction too large\n";
208    suite.add(false);
209  }
210
211  //////////////////////////////////////////////////////////////////////////
212  // A test of predictions using Sorlie data
213  //////////////////////////////////////////////////////////////////////////
214  suite.err() << "test with Sorlie data\n";
215  std::ifstream is(test::filename("data/sorlie_centroid_data.txt").c_str());
216  utility::MatrixWeighted data_weight(is,'\t');
217  is.close();
218
219  is.open(test::filename("data/sorlie_centroid_classes.txt").c_str());
220  classifier::Target targets(is);
221  is.close();
222
223  classifier::MatrixLookupWeighted dataviewweighted(data_weight);
224  classifier::NCC<statistics::PearsonDistance> ncc;
225  suite.err() << "training...\n";
226  ncc.train(dataviewweighted,targets);
227
228  // Comparing the centroids to stored result
229  is.open(test::filename("data/sorlie_centroids.txt").c_str());
230  utility::Matrix centroids(is);
231  is.close();
232
233  if(centroids.rows() != ncc.centroids().rows() ||
234     centroids.columns() != ncc.centroids().columns()) {
235    suite.err() << "Error in the dimensionality of centroids\n";
236    suite.err() << "Nof rows: " << centroids.rows() << " expected: " 
237           << ncc.centroids().rows() << std::endl;
238    suite.err() << "Nof columns: " << centroids.columns() << " expected: " 
239           << ncc.centroids().columns() << std::endl;
240  }
241
242  if (!suite.equal_range_fix(centroids.begin(), centroids.end(),
243                             ncc.centroids().begin(), 1e-11)) {
244    suite.add(false);
245    suite.err() << "Difference to stored centroids too large\n";
246  }
247
248  suite.err() << "...predicting...\n";
249  utility::Matrix prediction;
250  ncc.predict(dataviewweighted,prediction);
251 
252  // Comparing the prediction to stored result
253  is.open(test::filename("data/sorlie_centroid_predictions.txt").c_str());
254  utility::Matrix result(is,'\t');
255  is.close();
256
257  if (!suite.equal_range_fix(result.begin(), result.end(),
258                             prediction.begin(), 1e-11)) {
259    suite.add(false);
260    suite.err() << "Difference to stored prediction too large\n";
261  }
262  compile_test(suite);
263
264  return suite.return_value();
265}
266
267
268void compile_test(test::Suite& suite)
269{
270  if (false) {
271    boost::detail::dummy_constructor dummy;
272    test::distance_archetype distance(dummy);
273    classifier::NCC<test::distance_archetype> ncc(distance);
274  }
275}
276
277
278void predict_nan_data_unweighted_data(test::Suite& suite)
279{
280  //////////////////////////////////////////////////////////////////////////
281  // A test of predictions using weighted training resulting in NaN's
282  // in centroids and unweighted test data
283  //////////////////////////////////////////////////////////////////////////
284  suite.err() << "test of predictions using nan centroids and unweighted test data\n";
285  utility::Matrix data1(3,4);
286  for(size_t i=0;i<3;i++) {
287    data1(i,0)=3-i;
288    data1(i,1)=5-i;
289    data1(i,2)=i+1;
290    data1(i,3)=i+3;
291  }
292  utility::MatrixWeighted xw(data1);
293  std::vector<std::string> vec1(4, "pos");
294  vec1[0]="neg";
295  vec1[1]="neg";
296
297  classifier::MatrixLookup ml1(data1);
298  classifier::Target target1(vec1);
299  utility::Matrix prediction1;
300  utility::Matrix result1(2,4);
301
302  xw(1,0).weight()=xw(1,1).weight()=0.0;
303 
304
305  classifier::MatrixLookupWeighted mlw2(xw);
306  classifier::NCC<statistics::EuclideanDistance> ncc2;
307  ncc2.train(mlw2,target1);
308  ncc2.predict(ml1,prediction1); 
309  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=sqrt(3.0);
310  result1(1,0)=result1(1,1)=sqrt(11.0);
311  result1(0,2)=result1(0,3)=sqrt(15.0);
312  if(!std::isnan(ncc2.centroids()(1,0))) 
313    suite.add(false);
314  if (!suite.equal_range(prediction1.begin(), prediction1.end(),
315                         result1.begin())) {
316    suite.add(false);
317    suite.err() << "Difference to expected prediction too large\n";
318  }
319}
Note: See TracBrowser for help on using the repository browser.