source: trunk/test/knn_test.cc @ 1155

Last change on this file since 1155 was 1155, checked in by Markus Ringnér, 14 years ago

Refs. #335, fixed for KNN

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 8.4 KB
Line 
1// $Id: knn_test.cc 1155 2008-02-26 08:46:43Z markus $
2
3/*
4  Copyright (C) 2007 Peter Johansson, Markus Ringnér
5
6  This file is part of the yat library, http://trac.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 2 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with this program; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21  02111-1307, USA.
22*/
23
24#include "yat/classifier/KNN.h"
25#include "yat/classifier/KNN_ReciprocalDistance.h"
26#include "yat/classifier/KNN_ReciprocalRank.h"
27#include "yat/classifier/MatrixLookup.h"
28#include "yat/classifier/MatrixLookupWeighted.h"
29#include "yat/statistics/EuclideanDistance.h"
30#include "yat/utility/Matrix.h"
31
32
33#include <cassert>
34#include <fstream>
35#include <iostream>
36#include <list>
37#include <string>
38#include <vector>
39
40
41using namespace theplu::yat;
42
43double deviation(const utility::Matrix& a, const utility::Matrix& b) {
44  double sl=0;
45  for (size_t i=0; i<a.rows(); i++){
46    for (size_t j=0; j<a.columns(); j++){
47      sl += fabs(a(i,j)-b(i,j));
48    }
49  }
50  sl /= (a.columns()*a.rows());
51  return sl;
52}
53
54int main(const int argc,const char* argv[])
55
56{ 
57  std::ostream* error;
58  if (argc>1 && argv[1]==std::string("-v"))
59    error = &std::cerr;
60  else {
61    error = new std::ofstream("/dev/null");
62    if (argc>1)
63      std::cout << "knn_test -v : for printing extra information\n";
64  }
65  *error << "testing knn" << std::endl;
66  bool ok = true;
67
68  ////////////////////////////////////////////////////////////////
69  // A test of training and predictions using unweighted data
70  ////////////////////////////////////////////////////////////////
71  *error << "test of predictions using unweighted training and test data\n";
72  utility::Matrix data1(3,4);
73  for(size_t i=0;i<3;i++) {
74    data1(i,0)=3-i;
75    data1(i,1)=5-i;
76    data1(i,2)=i+1;
77    data1(i,3)=i+3;
78  }
79  std::vector<std::string> vec1(4, "pos");
80  vec1[0]="neg";
81  vec1[1]="neg";
82 
83  classifier::MatrixLookup ml1(data1);
84  classifier::Target target1(vec1);
85 
86  classifier::KNN<statistics::EuclideanDistance> knn1(ml1,target1);
87  knn1.k(3);
88  knn1.train();
89  utility::Matrix prediction1;
90  knn1.predict(ml1,prediction1);
91  double slack_bound=2e-7;
92  utility::Matrix result1(2,4);
93  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=2.0;
94  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=1.0;
95  double slack = deviation(prediction1,result1); 
96  if (slack > slack_bound || std::isnan(slack)){
97    *error << "Difference to expected prediction too large\n";
98    *error << "slack: " << slack << std::endl;
99    *error << "expected less than " << slack_bound << std::endl;
100    ok = false;
101  }
102 
103
104  ////////////////////////////////////////////////////////////////
105  // A test of training unweighted and test weighted
106  ////////////////////////////////////////////////////////////////
107  *error << "test of predictions using unweighted training and weighted test data\n";
108  utility::Matrix weights1(3,4,1.0);
109  weights1(2,0)=0;
110  classifier::MatrixLookupWeighted mlw1(data1,weights1);
111  knn1.predict(mlw1,prediction1); 
112  result1(0,0)=1.0;
113  result1(1,0)=2.0;
114  slack = deviation(prediction1,result1);
115  if (slack > slack_bound || std::isnan(slack)){
116    *error << "Difference to expected prediction too large\n";
117    *error << "slack: " << slack << std::endl;
118    *error << "expected less than " << slack_bound << std::endl;
119    ok = false;
120  } 
121
122  ////////////////////////////////////////////////////////////////
123  // A test of training and test both weighted
124  ////////////////////////////////////////////////////////////////
125  *error << "test of predictions using weighted training and test data\n";
126  *error << "... uniform neighbor weighting" << std::endl;
127  weights1(0,1)=0;
128  utility::Matrix weights2(3,4,1.0);
129  weights2(2,3)=0;
130  classifier::MatrixLookupWeighted mlw2(data1,weights2);
131  classifier::KNN<statistics::EuclideanDistance> knn2(mlw2,target1);
132  knn2.k(3);
133  knn2.train();
134  knn2.predict(mlw1,prediction1); 
135  result1(0,1)=1.0;
136  result1(1,1)=2.0;
137  slack = deviation(prediction1,result1);
138  if (slack > slack_bound || std::isnan(slack)){
139    *error << "Difference to expected prediction too large\n";
140    *error << "slack: " << slack << std::endl;
141    *error << "expected less than " << slack_bound << std::endl;
142    ok = false;
143  } 
144
145
146  ////////////////////////////////////////////////////////////////
147  // A test of reciprocal ranks weighting with training and test both weighted
148  ////////////////////////////////////////////////////////////////
149  *error << "... reciprokal rank neighbor weighting" << std::endl;
150  utility::Matrix data2(data1);
151  data2(1,3)=7;
152  classifier::MatrixLookupWeighted mlw3(data2,weights2);
153  classifier::KNN<statistics::EuclideanDistance,classifier::KNN_ReciprocalRank> 
154    knn3(mlw2,target1);
155  knn3.k(3);
156  knn3.train();
157  knn3.predict(mlw3,prediction1); 
158  result1(0,0)=result1(1,3)=1.0;
159  result1(0,3)=result1(1,0)=5.0/6.0;
160  result1(0,2)=result1(1,1)=1.0/2.0;
161  result1(0,1)=result1(1,2)=4.0/3.0;
162  slack = deviation(prediction1,result1); 
163  if (slack > slack_bound || std::isnan(slack)){
164    *error << "Difference to expected prediction too large\n";
165    *error << "slack: " << slack << std::endl;
166    *error << "expected less than " << slack_bound << std::endl;
167    ok = false;
168  }
169
170 
171  ////////////////////////////////////////////////////////////////
172  // A test of reciprocal distance weighting with training and test both weighted
173  ////////////////////////////////////////////////////////////////
174  *error << "... reciprokal distance neighbor weighting" << std::endl;
175  classifier::KNN<statistics::EuclideanDistance,classifier::KNN_ReciprocalDistance> 
176    knn4(mlw2,target1);
177  knn4.k(3);
178  knn4.train();
179  knn4.predict(mlw3,prediction1); 
180  if (!(std::isinf(prediction1(0,0)) && std::isinf(prediction1(0,1)) && 
181        std::isinf(prediction1(1,2)) && 
182        fabs(prediction1(1,3)-(1.0/3.67423461417))<slack_bound &&
183        fabs(prediction1(1,0)-(1.0/2.82842712475+1.0/2.44948974278))<slack_bound)){
184    *error << "Difference to expected prediction too large\n";
185    ok = false;
186  }
187
188
189  ////////////////////////////////////////////////////////////////
190  // A test of when a class has no training samples, should give nan
191  // in predictions. Also tests that k is reduced if not enough
192  // training samples.
193  ////////////////////////////////////////////////////////////////
194  //Keep only the second class in the training samples
195  std::vector<size_t> ind(2,2);
196  ind[1]=3;
197  classifier::Target target2(target1,utility::Index(ind));
198  classifier::MatrixLookupWeighted mlw4(data1,weights2,utility::Index(ind),false);
199  classifier::KNN<statistics::EuclideanDistance> knn5(mlw4,target2);
200  knn5.k(3);
201  knn5.train();
202  knn5.predict(mlw3,prediction1); 
203  if (!(std::isnan(prediction1(0,0)) && std::isnan(prediction1(0,1)) && 
204        std::isnan(prediction1(0,2)) && std::isnan(prediction1(0,3)) &&
205        fabs(prediction1(1,0)-2.0)<slack_bound &&
206        fabs(prediction1(1,1)-2.0)<slack_bound &&
207        fabs(prediction1(1,2)-2.0)<slack_bound &&
208        fabs(prediction1(1,3)-2.0)<slack_bound)) {
209    *error << "Difference to expected prediction too large\n";
210    ok = false;
211  }
212
213  ////////////////////////////////////////////////////////////////
214  // A test of when a test sample has no variables with non-zero
215  // weights in common with training samples: should not vote
216  ////////////////////////////////////////////////////////////////
217  *error << "test of predictions with nan distances (set to infinity in KNN)\n";
218  weights1.all(1);
219  weights1(1,0)=weights1(1,1)=weights1(2,0)=weights1(2,1)=0.0;
220  weights2.all(1);
221  weights2(0,0)=0.0;
222  classifier::KNN<statistics::EuclideanDistance> knn6(mlw1,target1);
223  knn6.k(3);
224  knn6.train();
225  knn6.predict(mlw3,prediction1); 
226  result1(0,0)=0;
227  result1(0,2)=result1(1,1)=result1(1,3)=1.0;
228  result1(0,1)=result1(0,3)=result1(1,0)=result1(1,2)=2.0;
229  slack = deviation(prediction1,result1); 
230  if (slack > slack_bound || std::isnan(slack)){
231    *error << "Difference to expected prediction too large\n";
232    *error << "slack: " << slack << std::endl;
233    *error << "expected less than " << slack_bound << std::endl;
234    ok = false;
235  }
236 
237
238
239  if(!ok) {
240    *error << "knn_test failed" << std::endl;
241  }
242  else {
243    *error << "OK" << std::endl;
244  }
245  if (error!=&std::cerr)
246    delete error;
247  if (ok=true) 
248    return 0;
249  return -1;
250}
251
252
Note: See TracBrowser for help on using the repository browser.