source: trunk/test/knn_test.cc @ 2119

Last change on this file since 2119 was 2119, checked in by Peter, 13 years ago

converted files to utf-8. fixes #577

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 10.4 KB
Line 
1// $Id: knn_test.cc 2119 2009-12-12 23:11:43Z peter $
2
3/*
4  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with yat. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include "Suite.h"
23
24#include "yat/classifier/KNN.h"
25#include "yat/classifier/KNN_ReciprocalDistance.h"
26#include "yat/classifier/KNN_ReciprocalRank.h"
27#include "yat/classifier/MatrixLookup.h"
28#include "yat/classifier/MatrixLookupWeighted.h"
29#include "yat/statistics/EuclideanDistance.h"
30#include "yat/utility/DataIterator.h"
31#include "yat/utility/Matrix.h"
32#include "yat/utility/MatrixWeighted.h"
33
34
35#include <cassert>
36#include <fstream>
37#include <iostream>
38#include <list>
39#include <string>
40#include <vector>
41
42
43using namespace theplu::yat;
44
45utility::Matrix data(void);
46utility::MatrixWeighted data_weighted(void);
47double deviation(const utility::Matrix& a, const utility::Matrix& b);
48void test_unweighted(test::Suite&);
49void test_unweighted_weighted(test::Suite&);
50void test_weighted(test::Suite&);
51void test_reciprocal_ranks(test::Suite&);
52void test_reciprocal_distance(test::Suite&);
53void test_no_samples(test::Suite&);
54void test_no_features(test::Suite&);
55std::vector<std::string> vec_target(void);
56
57int main(int argc, char* argv[])
58{ 
59  test::Suite suite(argc, argv);
60  suite.err() << "testing knn" << std::endl;
61  test_unweighted(suite);
62  test_unweighted_weighted(suite);
63  test_weighted(suite);
64  test_reciprocal_ranks(suite);
65  test_reciprocal_distance(suite);
66  test_no_samples(suite);
67  test_no_features(suite);
68  return suite.return_value();
69} 
70
71
72utility::Matrix data(void)
73{
74  utility::Matrix data1(3,4);
75  for(size_t i=0;i<3;i++) {
76    data1(i,0)=3-i;
77    data1(i,1)=5-i;
78    data1(i,2)=i+1;
79    data1(i,3)=i+3;
80  }
81  return data1;
82}
83
84
85utility::MatrixWeighted data_weighted(void)
86{
87  utility::Matrix x = data();
88  utility::MatrixWeighted result(x.rows(), x.columns());
89  std::copy(x.begin(), x.end(), utility::data_iterator(result.begin()));
90  return result;
91}
92
93
94double deviation(const utility::Matrix& a, const utility::Matrix& b) {
95  assert(a.rows()==b.rows());
96  assert(b.columns()==b.columns());
97  double sl=0;
98  for (size_t i=0; i<a.rows(); i++){
99    for (size_t j=0; j<a.columns(); j++){
100      sl += std::abs(a(i,j)-b(i,j));
101    }
102  }
103  sl /= (a.columns()*a.rows());
104  return sl;
105}
106
107void test_unweighted(test::Suite& suite)
108{
109  ////////////////////////////////////////////////////////////////
110  // A test of training and predictions using unweighted data
111  ////////////////////////////////////////////////////////////////
112  suite.err() << "test of predictions using unweighted training " 
113              << "and test data\n";
114  utility::Matrix data1 = data();
115  classifier::MatrixLookup ml1(data1);
116  classifier::Target target1(vec_target());
117 
118  classifier::KNN<statistics::EuclideanDistance> knn1;
119  knn1.k(3);
120  knn1.train(ml1,target1);
121  utility::Matrix prediction1;
122  knn1.predict(ml1,prediction1);
123  utility::Matrix result1(2,4);
124  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=2.0;
125  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=1.0;
126  suite.add(suite.equal_range(result1.begin(), result1.end(), 
127                              prediction1.begin(), 1));
128}
129
130void test_unweighted_weighted(test::Suite& suite)
131{
132  suite.err() << "test of predictions using unweighted training "
133              << "and weighted test data\n";
134  utility::MatrixWeighted xw = data_weighted();
135  xw(2,0).weight()=0;
136 
137  classifier::MatrixLookupWeighted mlw1(xw);
138  classifier::KNN<statistics::EuclideanDistance> knn1;
139  knn1.k(3);
140  utility::Matrix data1 = data();
141  classifier::MatrixLookup ml1(data1);
142  classifier::Target target1(vec_target());
143  knn1.train(ml1,target1);
144  utility::Matrix prediction1;
145  knn1.predict(mlw1,prediction1); 
146  utility::Matrix result1(2,4);
147  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=2.0;
148  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=1.0;
149  result1(0,0)=1.0;
150  result1(1,0)=2.0;
151  suite.add(suite.equal_range(result1.begin(), result1.end(), 
152                              prediction1.begin(), 1));
153}
154
155void test_weighted(test::Suite& suite)
156{
157  ////////////////////////////////////////////////////////////////
158  // A test of training and test both weighted
159  ////////////////////////////////////////////////////////////////
160  suite.err() << "test of predictions using weighted training and test data\n";
161  suite.err() << "... uniform neighbor weighting" << std::endl;
162  utility::MatrixWeighted xw = data_weighted();
163  xw(2,0).weight()=0;
164  xw(0,1).weight()=0;
165  classifier::MatrixLookupWeighted mlw1(xw);
166   
167  utility::MatrixWeighted xw2 = data_weighted();
168  xw2(2,3).weight()=0;
169  classifier::MatrixLookupWeighted mlw2(xw2);
170  classifier::KNN<statistics::EuclideanDistance> knn2;
171  knn2.k(3);
172  classifier::Target target1(vec_target());
173  knn2.train(mlw2,target1);
174  utility::Matrix prediction1;
175  knn2.predict(mlw1,prediction1); 
176  utility::Matrix result1(2,4);
177  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=2.0;
178  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=1.0;
179  result1(0,0)=1.0;
180  result1(1,0)=2.0;
181  result1(0,1)=1.0;
182  result1(1,1)=2.0;
183  suite.add(suite.equal_range(result1.begin(), result1.end(), 
184                              prediction1.begin(), 1));
185}
186
187
188void test_reciprocal_ranks(test::Suite& suite)
189{
190  ////////////////////////////////////////////////////////////////
191  // A test of reciprocal ranks weighting with training and test both weighted
192  ////////////////////////////////////////////////////////////////
193  suite.err() << "... reciprokal rank neighbor weighting" << std::endl;
194  utility::MatrixWeighted xw2 = data_weighted();
195  xw2(2,3).weight()=0;
196  classifier::MatrixLookupWeighted mlw2(xw2);
197  utility::MatrixWeighted xw3 = data_weighted();
198  xw3(1,3).data()=7;
199  xw3(2,3).weight()=0;
200  classifier::MatrixLookupWeighted mlw3(xw3);
201  classifier::KNN<statistics::EuclideanDistance
202    ,classifier::KNN_ReciprocalRank> knn3;
203  knn3.k(3);
204  classifier::Target target1(vec_target());
205  knn3.train(mlw2,target1);
206  utility::Matrix prediction1;
207  knn3.predict(mlw3,prediction1); 
208  utility::Matrix result1(2,4);
209  result1(0,0)=result1(1,3)=1.0;
210  result1(0,3)=result1(1,0)=5.0/6.0;
211  result1(0,2)=result1(1,1)=1.0/2.0;
212  result1(0,1)=result1(1,2)=4.0/3.0;
213  suite.add(suite.equal_range(result1.begin(), result1.end(), 
214                              prediction1.begin(), 1));
215}
216
217void test_reciprocal_distance(test::Suite& suite)
218{
219  ////////////////////////////////////////////////////////////////
220  // A test of reciprocal distance weighting with training and test both weighted
221  ////////////////////////////////////////////////////////////////
222  suite.err() << "... reciprocal distance neighbor weighting" << std::endl;
223  classifier::KNN<statistics::EuclideanDistance,classifier::KNN_ReciprocalDistance> 
224    knn4;
225  knn4.k(3);
226  utility::MatrixWeighted xw2 = data_weighted();
227  xw2(2,3).weight()=0;
228  classifier::MatrixLookupWeighted mlw2(xw2);
229  utility::MatrixWeighted xw3 = data_weighted();
230  xw3(1,3).data()=7;
231  xw3(2,3).weight()=0;
232  classifier::MatrixLookupWeighted mlw3(xw3);
233  classifier::Target target1(vec_target());
234  knn4.train(mlw2,target1);
235  utility::Matrix prediction1;
236  knn4.predict(mlw3,prediction1); 
237  if (!(std::isinf(prediction1(0,0)) && std::isinf(prediction1(0,1)) && 
238        std::isinf(prediction1(1,2)) && 
239        suite.equal_fix(prediction1(1,3), 1.0/3.6742346141747673, 1e-16) &&
240        suite.equal_fix(prediction1(1,0), 
241                        1.0/2.82842712475+1.0/2.4494897427831779, 1e-11)
242        )){
243    suite.err() << "Difference to expected prediction too large\n";
244    suite.add(false);
245  }
246}
247
248
249void test_no_samples(test::Suite& suite)
250{
251  ////////////////////////////////////////////////////////////////
252  // A test of when a class has no training samples, should give nan
253  // in predictions. Also tests that k is reduced if not enough
254  // training samples.
255  ////////////////////////////////////////////////////////////////
256  //Keep only the second class in the training samples
257  std::vector<size_t> ind(2,2);
258  ind[1]=3;
259  classifier::Target target1(vec_target());
260  classifier::Target target2(target1,utility::Index(ind));
261
262  utility::MatrixWeighted xw = data_weighted();
263  xw(2,3).weight()=0.0;
264
265  classifier::MatrixLookupWeighted mlw4(xw, utility::Index(xw.rows()),
266                                        utility::Index(ind));
267  classifier::KNN<statistics::EuclideanDistance> knn5;
268  knn5.k(3);
269  knn5.train(mlw4,target2);
270  utility::MatrixWeighted xw3 = data_weighted();
271  xw3(1,3).data()=7;
272  xw3(2,3).weight()=0;
273  classifier::MatrixLookupWeighted mlw3(xw3);
274  utility::Matrix prediction1;
275  knn5.predict(mlw3,prediction1); 
276  if (!(std::isnan(prediction1(0,0)) && std::isnan(prediction1(0,1)) && 
277        std::isnan(prediction1(0,2)) && std::isnan(prediction1(0,3)) &&
278        suite.equal(prediction1(1,0),2.0) &&
279        suite.equal(prediction1(1,1),2.0) &&
280        suite.equal(prediction1(1,2),2.0) &&
281        suite.equal(prediction1(1,3),2.0) )) {
282    suite.err() << "Difference to expected prediction too large\n";
283    suite.add(false);
284  }
285}
286
287void test_no_features(test::Suite& suite)
288{
289  ////////////////////////////////////////////////////////////////
290  // A test of when a test sample has no variables with non-zero
291  // weights in common with training samples: should not vote
292  ////////////////////////////////////////////////////////////////
293  suite.err() << "test of predictions with nan distances (set to infinity in KNN)\n";
294  utility::MatrixWeighted xw1 = data_weighted();
295  xw1(1,0).weight()=xw1(1,1).weight()=xw1(2,0).weight()=xw1(2,1).weight()=0.0;
296  classifier::MatrixLookupWeighted mlw1(xw1);
297
298  classifier::KNN<statistics::EuclideanDistance> knn6;
299  knn6.k(3);
300  classifier::Target target1(vec_target());
301  knn6.train(mlw1,target1);
302
303  utility::MatrixWeighted xw3 = data_weighted();
304  xw3(1,3).data()=7;
305  xw3(0,0).weight()=0;
306  classifier::MatrixLookupWeighted mlw3(xw3);
307  utility::Matrix prediction1;
308  knn6.predict(mlw3,prediction1); 
309  utility::Matrix result1(2,4);
310  result1(0,0)=0;
311  result1(0,2)=result1(1,1)=result1(1,3)=1.0;
312  result1(0,1)=result1(0,3)=result1(1,0)=result1(1,2)=2.0;
313  suite.add(suite.equal_range(result1.begin(), result1.end(), 
314                              prediction1.begin(), 1));
315}
316
317std::vector<std::string> vec_target(void)
318{
319  std::vector<std::string> vec1(4, "pos");
320  vec1[0]="neg";
321  vec1[1]="neg";
322  return vec1;
323}
324
Note: See TracBrowser for help on using the repository browser.