source: trunk/test/knn_test.cc @ 1157

Last change on this file since 1157 was 1157, checked in by Markus Ringnér, 14 years ago

Refs #318

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 8.4 KB
Line 
1// $Id: knn_test.cc 1157 2008-02-26 13:25:19Z markus $
2
3/*
4  Copyright (C) 2007 Peter Johansson, Markus Ringnér
5
6  This file is part of the yat library, http://trac.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 2 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with this program; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21  02111-1307, USA.
22*/
23
24#include "yat/classifier/KNN.h"
25#include "yat/classifier/KNN_ReciprocalDistance.h"
26#include "yat/classifier/KNN_ReciprocalRank.h"
27#include "yat/classifier/MatrixLookup.h"
28#include "yat/classifier/MatrixLookupWeighted.h"
29#include "yat/statistics/EuclideanDistance.h"
30#include "yat/utility/Matrix.h"
31
32
33#include <cassert>
34#include <fstream>
35#include <iostream>
36#include <list>
37#include <string>
38#include <vector>
39
40
41using namespace theplu::yat;
42
43double deviation(const utility::Matrix& a, const utility::Matrix& b) {
44  double sl=0;
45  for (size_t i=0; i<a.rows(); i++){
46    for (size_t j=0; j<a.columns(); j++){
47      sl += fabs(a(i,j)-b(i,j));
48    }
49  }
50  sl /= (a.columns()*a.rows());
51  return sl;
52}
53
54int main(const int argc,const char* argv[])
55
56{ 
57  std::ostream* error;
58  if (argc>1 && argv[1]==std::string("-v"))
59    error = &std::cerr;
60  else {
61    error = new std::ofstream("/dev/null");
62    if (argc>1)
63      std::cout << "knn_test -v : for printing extra information\n";
64  }
65  *error << "testing knn" << std::endl;
66  bool ok = true;
67
68  ////////////////////////////////////////////////////////////////
69  // A test of training and predictions using unweighted data
70  ////////////////////////////////////////////////////////////////
71  *error << "test of predictions using unweighted training and test data\n";
72  utility::Matrix data1(3,4);
73  for(size_t i=0;i<3;i++) {
74    data1(i,0)=3-i;
75    data1(i,1)=5-i;
76    data1(i,2)=i+1;
77    data1(i,3)=i+3;
78  }
79  std::vector<std::string> vec1(4, "pos");
80  vec1[0]="neg";
81  vec1[1]="neg";
82 
83  classifier::MatrixLookup ml1(data1);
84  classifier::Target target1(vec1);
85 
86  classifier::KNN<statistics::EuclideanDistance> knn1;
87  knn1.k(3);
88  knn1.train(ml1,target1);
89  utility::Matrix prediction1;
90  knn1.predict(ml1,prediction1);
91  double slack_bound=2e-7;
92  utility::Matrix result1(2,4);
93  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=2.0;
94  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=1.0;
95  double slack = deviation(prediction1,result1); 
96  if (slack > slack_bound || std::isnan(slack)){
97    *error << "Difference to expected prediction too large\n";
98    *error << "slack: " << slack << std::endl;
99    *error << "expected less than " << slack_bound << std::endl;
100    ok = false;
101  }
102 
103
104  ////////////////////////////////////////////////////////////////
105  // A test of training unweighted and test weighted
106  ////////////////////////////////////////////////////////////////
107  *error << "test of predictions using unweighted training and weighted test data\n";
108  utility::Matrix weights1(3,4,1.0);
109  weights1(2,0)=0;
110  classifier::MatrixLookupWeighted mlw1(data1,weights1);
111  knn1.predict(mlw1,prediction1); 
112  result1(0,0)=1.0;
113  result1(1,0)=2.0;
114  slack = deviation(prediction1,result1);
115  if (slack > slack_bound || std::isnan(slack)){
116    *error << "Difference to expected prediction too large\n";
117    *error << "slack: " << slack << std::endl;
118    *error << "expected less than " << slack_bound << std::endl;
119    ok = false;
120  } 
121
122  ////////////////////////////////////////////////////////////////
123  // A test of training and test both weighted
124  ////////////////////////////////////////////////////////////////
125  *error << "test of predictions using weighted training and test data\n";
126  *error << "... uniform neighbor weighting" << std::endl;
127  weights1(0,1)=0;
128  utility::Matrix weights2(3,4,1.0);
129  weights2(2,3)=0;
130  classifier::MatrixLookupWeighted mlw2(data1,weights2);
131  classifier::KNN<statistics::EuclideanDistance> knn2;
132  knn2.k(3);
133  knn2.train(mlw2,target1);
134  knn2.predict(mlw1,prediction1); 
135  result1(0,1)=1.0;
136  result1(1,1)=2.0;
137  slack = deviation(prediction1,result1);
138  if (slack > slack_bound || std::isnan(slack)){
139    *error << "Difference to expected prediction too large\n";
140    *error << "slack: " << slack << std::endl;
141    *error << "expected less than " << slack_bound << std::endl;
142    ok = false;
143  } 
144
145
146  ////////////////////////////////////////////////////////////////
147  // A test of reciprocal ranks weighting with training and test both weighted
148  ////////////////////////////////////////////////////////////////
149  *error << "... reciprokal rank neighbor weighting" << std::endl;
150  utility::Matrix data2(data1);
151  data2(1,3)=7;
152  classifier::MatrixLookupWeighted mlw3(data2,weights2);
153  classifier::KNN<statistics::EuclideanDistance,classifier::KNN_ReciprocalRank> knn3;
154  knn3.k(3);
155  knn3.train(mlw2,target1);
156  knn3.predict(mlw3,prediction1); 
157  result1(0,0)=result1(1,3)=1.0;
158  result1(0,3)=result1(1,0)=5.0/6.0;
159  result1(0,2)=result1(1,1)=1.0/2.0;
160  result1(0,1)=result1(1,2)=4.0/3.0;
161  slack = deviation(prediction1,result1); 
162  if (slack > slack_bound || std::isnan(slack)){
163    *error << "Difference to expected prediction too large\n";
164    *error << "slack: " << slack << std::endl;
165    *error << "expected less than " << slack_bound << std::endl;
166    ok = false;
167  }
168
169 
170  ////////////////////////////////////////////////////////////////
171  // A test of reciprocal distance weighting with training and test both weighted
172  ////////////////////////////////////////////////////////////////
173  *error << "... reciprokal distance neighbor weighting" << std::endl;
174  classifier::KNN<statistics::EuclideanDistance,classifier::KNN_ReciprocalDistance> 
175    knn4;
176  knn4.k(3);
177  knn4.train(mlw2,target1);
178  knn4.predict(mlw3,prediction1); 
179  if (!(std::isinf(prediction1(0,0)) && std::isinf(prediction1(0,1)) && 
180        std::isinf(prediction1(1,2)) && 
181        fabs(prediction1(1,3)-(1.0/3.67423461417))<slack_bound &&
182        fabs(prediction1(1,0)-(1.0/2.82842712475+1.0/2.44948974278))<slack_bound)){
183    *error << "Difference to expected prediction too large\n";
184    ok = false;
185  }
186
187
188  ////////////////////////////////////////////////////////////////
189  // A test of when a class has no training samples, should give nan
190  // in predictions. Also tests that k is reduced if not enough
191  // training samples.
192  ////////////////////////////////////////////////////////////////
193  //Keep only the second class in the training samples
194  std::vector<size_t> ind(2,2);
195  ind[1]=3;
196  classifier::Target target2(target1,utility::Index(ind));
197  classifier::MatrixLookupWeighted mlw4(data1,weights2,utility::Index(ind),false);
198  classifier::KNN<statistics::EuclideanDistance> knn5;
199  knn5.k(3);
200  knn5.train(mlw4,target2);
201  knn5.predict(mlw3,prediction1); 
202  if (!(std::isnan(prediction1(0,0)) && std::isnan(prediction1(0,1)) && 
203        std::isnan(prediction1(0,2)) && std::isnan(prediction1(0,3)) &&
204        fabs(prediction1(1,0)-2.0)<slack_bound &&
205        fabs(prediction1(1,1)-2.0)<slack_bound &&
206        fabs(prediction1(1,2)-2.0)<slack_bound &&
207        fabs(prediction1(1,3)-2.0)<slack_bound)) {
208    *error << "Difference to expected prediction too large\n";
209    ok = false;
210  }
211
212  ////////////////////////////////////////////////////////////////
213  // A test of when a test sample has no variables with non-zero
214  // weights in common with training samples: should not vote
215  ////////////////////////////////////////////////////////////////
216  *error << "test of predictions with nan distances (set to infinity in KNN)\n";
217  weights1.all(1);
218  weights1(1,0)=weights1(1,1)=weights1(2,0)=weights1(2,1)=0.0;
219  weights2.all(1);
220  weights2(0,0)=0.0;
221  classifier::KNN<statistics::EuclideanDistance> knn6;
222  knn6.k(3);
223  knn6.train(mlw1,target1);
224  knn6.predict(mlw3,prediction1); 
225  result1(0,0)=0;
226  result1(0,2)=result1(1,1)=result1(1,3)=1.0;
227  result1(0,1)=result1(0,3)=result1(1,0)=result1(1,2)=2.0;
228  slack = deviation(prediction1,result1); 
229  if (slack > slack_bound || std::isnan(slack)){
230    *error << "Difference to expected prediction too large\n";
231    *error << "slack: " << slack << std::endl;
232    *error << "expected less than " << slack_bound << std::endl;
233    ok = false;
234  }
235 
236
237
238  if(!ok) {
239    *error << "knn_test failed" << std::endl;
240  }
241  else {
242    *error << "OK" << std::endl;
243  }
244  if (error!=&std::cerr)
245    delete error;
246  if (ok=true) 
247    return 0;
248  return -1;
249}
250
251
Note: See TracBrowser for help on using the repository browser.