source: trunk/test/knn_test.cc @ 2339

Last change on this file since 2339 was 2339, checked in by Peter, 11 years ago

refs #627 extending NeighborWeighting? concept to require defaulf constructor and assign operator. Added a archetype class in test.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 11.1 KB
Line 
1// $Id: knn_test.cc 2339 2010-10-16 14:17:30Z peter $
2
3/*
4  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with yat. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include "Suite.h"
23
24#include "yat/classifier/KNN.h"
25#include "yat/classifier/KNN_ReciprocalDistance.h"
26#include "yat/classifier/KNN_ReciprocalRank.h"
27#include "yat/classifier/MatrixLookup.h"
28#include "yat/classifier/MatrixLookupWeighted.h"
29#include "yat/statistics/EuclideanDistance.h"
30#include "yat/utility/DataIterator.h"
31#include "yat/utility/Matrix.h"
32#include "yat/utility/MatrixWeighted.h"
33
34
35#include <cassert>
36#include <fstream>
37#include <iostream>
38#include <list>
39#include <string>
40#include <vector>
41
42
43using namespace theplu::yat;
44
45void compile_test(test::Suite&);
46utility::Matrix data(void);
47utility::MatrixWeighted data_weighted(void);
48double deviation(const utility::Matrix& a, const utility::Matrix& b);
49void test_unweighted(test::Suite&);
50void test_unweighted_weighted(test::Suite&);
51void test_weighted(test::Suite&);
52void test_reciprocal_ranks(test::Suite&);
53void test_reciprocal_distance(test::Suite&);
54void test_no_samples(test::Suite&);
55void test_no_features(test::Suite&);
56std::vector<std::string> vec_target(void);
57
58
59int main(int argc, char* argv[])
60{ 
61  test::Suite suite(argc, argv);
62  suite.err() << "testing knn" << std::endl;
63  test_unweighted(suite);
64  test_unweighted_weighted(suite);
65  test_weighted(suite);
66  test_reciprocal_ranks(suite);
67  test_reciprocal_distance(suite);
68  test_no_samples(suite);
69  test_no_features(suite);
70  compile_test(suite);
71  return suite.return_value();
72} 
73
74
75void compile_test(test::Suite& suite)
76{
77  if (false) {
78    boost::detail::dummy_constructor dummy;
79    test::distance_archetype distance(dummy);
80    classifier::KNN<test::distance_archetype
81                    , test::neighbor_weighting_archetype> knn(distance);
82    knn.k(3);
83    knn.k();
84    classifier::SupervisedClassifier* knn2 = knn.make_classifier();
85    delete knn2;
86    utility::Matrix result;
87    knn.train(classifier::MatrixLookup(data()), classifier::Target());
88    knn.train(classifier::MatrixLookupWeighted(data_weighted()), 
89              classifier::Target());
90    knn.predict(classifier::MatrixLookup(data()), result);
91    knn.predict(classifier::MatrixLookupWeighted(data_weighted()), result);
92  }
93}
94
95
96utility::Matrix data(void)
97{
98  utility::Matrix data1(3,4);
99  for(size_t i=0;i<3;i++) {
100    data1(i,0)=3-i;
101    data1(i,1)=5-i;
102    data1(i,2)=i+1;
103    data1(i,3)=i+3;
104  }
105  return data1;
106}
107
108
109utility::MatrixWeighted data_weighted(void)
110{
111  utility::Matrix x = data();
112  utility::MatrixWeighted result(x.rows(), x.columns());
113  std::copy(x.begin(), x.end(), utility::data_iterator(result.begin()));
114  return result;
115}
116
117
118double deviation(const utility::Matrix& a, const utility::Matrix& b) {
119  assert(a.rows()==b.rows());
120  assert(b.columns()==b.columns());
121  double sl=0;
122  for (size_t i=0; i<a.rows(); i++){
123    for (size_t j=0; j<a.columns(); j++){
124      sl += std::abs(a(i,j)-b(i,j));
125    }
126  }
127  sl /= (a.columns()*a.rows());
128  return sl;
129}
130
131void test_unweighted(test::Suite& suite)
132{
133  ////////////////////////////////////////////////////////////////
134  // A test of training and predictions using unweighted data
135  ////////////////////////////////////////////////////////////////
136  suite.err() << "test of predictions using unweighted training " 
137              << "and test data\n";
138  utility::Matrix data1 = data();
139  classifier::MatrixLookup ml1(data1);
140  classifier::Target target1(vec_target());
141 
142  classifier::KNN<statistics::EuclideanDistance> knn1;
143  knn1.k(3);
144  knn1.train(ml1,target1);
145  utility::Matrix prediction1;
146  knn1.predict(ml1,prediction1);
147  utility::Matrix result1(2,4);
148  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=2.0;
149  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=1.0;
150  suite.add(suite.equal_range(result1.begin(), result1.end(), 
151                              prediction1.begin(), 1));
152}
153
154void test_unweighted_weighted(test::Suite& suite)
155{
156  suite.err() << "test of predictions using unweighted training "
157              << "and weighted test data\n";
158  utility::MatrixWeighted xw = data_weighted();
159  xw(2,0).weight()=0;
160 
161  classifier::MatrixLookupWeighted mlw1(xw);
162  classifier::KNN<statistics::EuclideanDistance> knn1;
163  knn1.k(3);
164  utility::Matrix data1 = data();
165  classifier::MatrixLookup ml1(data1);
166  classifier::Target target1(vec_target());
167  knn1.train(ml1,target1);
168  utility::Matrix prediction1;
169  knn1.predict(mlw1,prediction1); 
170  utility::Matrix result1(2,4);
171  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=2.0;
172  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=1.0;
173  result1(0,0)=1.0;
174  result1(1,0)=2.0;
175  suite.add(suite.equal_range(result1.begin(), result1.end(), 
176                              prediction1.begin(), 1));
177}
178
179void test_weighted(test::Suite& suite)
180{
181  ////////////////////////////////////////////////////////////////
182  // A test of training and test both weighted
183  ////////////////////////////////////////////////////////////////
184  suite.err() << "test of predictions using weighted training and test data\n";
185  suite.err() << "... uniform neighbor weighting" << std::endl;
186  utility::MatrixWeighted xw = data_weighted();
187  xw(2,0).weight()=0;
188  xw(0,1).weight()=0;
189  classifier::MatrixLookupWeighted mlw1(xw);
190   
191  utility::MatrixWeighted xw2 = data_weighted();
192  xw2(2,3).weight()=0;
193  classifier::MatrixLookupWeighted mlw2(xw2);
194  classifier::KNN<statistics::EuclideanDistance> knn2;
195  knn2.k(3);
196  classifier::Target target1(vec_target());
197  knn2.train(mlw2,target1);
198  utility::Matrix prediction1;
199  knn2.predict(mlw1,prediction1); 
200  utility::Matrix result1(2,4);
201  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=2.0;
202  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=1.0;
203  result1(0,0)=1.0;
204  result1(1,0)=2.0;
205  result1(0,1)=1.0;
206  result1(1,1)=2.0;
207  suite.add(suite.equal_range(result1.begin(), result1.end(), 
208                              prediction1.begin(), 1));
209}
210
211
212void test_reciprocal_ranks(test::Suite& suite)
213{
214  ////////////////////////////////////////////////////////////////
215  // A test of reciprocal ranks weighting with training and test both weighted
216  ////////////////////////////////////////////////////////////////
217  suite.err() << "... reciprokal rank neighbor weighting" << std::endl;
218  utility::MatrixWeighted xw2 = data_weighted();
219  xw2(2,3).weight()=0;
220  classifier::MatrixLookupWeighted mlw2(xw2);
221  utility::MatrixWeighted xw3 = data_weighted();
222  xw3(1,3).data()=7;
223  xw3(2,3).weight()=0;
224  classifier::MatrixLookupWeighted mlw3(xw3);
225  classifier::KNN<statistics::EuclideanDistance
226    ,classifier::KNN_ReciprocalRank> knn3;
227  knn3.k(3);
228  classifier::Target target1(vec_target());
229  knn3.train(mlw2,target1);
230  utility::Matrix prediction1;
231  knn3.predict(mlw3,prediction1); 
232  utility::Matrix result1(2,4);
233  result1(0,0)=result1(1,3)=1.0;
234  result1(0,3)=result1(1,0)=5.0/6.0;
235  result1(0,2)=result1(1,1)=1.0/2.0;
236  result1(0,1)=result1(1,2)=4.0/3.0;
237  suite.add(suite.equal_range(result1.begin(), result1.end(), 
238                              prediction1.begin(), 1));
239}
240
241void test_reciprocal_distance(test::Suite& suite)
242{
243  ////////////////////////////////////////////////////////////////
244  // A test of reciprocal distance weighting with training and test both weighted
245  ////////////////////////////////////////////////////////////////
246  suite.err() << "... reciprocal distance neighbor weighting" << std::endl;
247  classifier::KNN<statistics::EuclideanDistance,classifier::KNN_ReciprocalDistance> 
248    knn4;
249  knn4.k(3);
250  utility::MatrixWeighted xw2 = data_weighted();
251  xw2(2,3).weight()=0;
252  classifier::MatrixLookupWeighted mlw2(xw2);
253  utility::MatrixWeighted xw3 = data_weighted();
254  xw3(1,3).data()=7;
255  xw3(2,3).weight()=0;
256  classifier::MatrixLookupWeighted mlw3(xw3);
257  classifier::Target target1(vec_target());
258  knn4.train(mlw2,target1);
259  utility::Matrix prediction1;
260  knn4.predict(mlw3,prediction1); 
261  if (!(std::isinf(prediction1(0,0)) && std::isinf(prediction1(0,1)) && 
262        std::isinf(prediction1(1,2)) && 
263        suite.equal_fix(prediction1(1,3), 1.0/3.6742346141747673, 1e-16) &&
264        suite.equal_fix(prediction1(1,0), 
265                        1.0/2.82842712475+1.0/2.4494897427831779, 1e-11)
266        )){
267    suite.err() << "Difference to expected prediction too large\n";
268    suite.add(false);
269  }
270}
271
272
273void test_no_samples(test::Suite& suite)
274{
275  ////////////////////////////////////////////////////////////////
276  // A test of when a class has no training samples, should give nan
277  // in predictions. Also tests that k is reduced if not enough
278  // training samples.
279  ////////////////////////////////////////////////////////////////
280  //Keep only the second class in the training samples
281  std::vector<size_t> ind(2,2);
282  ind[1]=3;
283  classifier::Target target1(vec_target());
284  classifier::Target target2(target1,utility::Index(ind));
285
286  utility::MatrixWeighted xw = data_weighted();
287  xw(2,3).weight()=0.0;
288
289  classifier::MatrixLookupWeighted mlw4(xw, utility::Index(xw.rows()),
290                                        utility::Index(ind));
291  classifier::KNN<statistics::EuclideanDistance> knn5;
292  knn5.k(3);
293  knn5.train(mlw4,target2);
294  utility::MatrixWeighted xw3 = data_weighted();
295  xw3(1,3).data()=7;
296  xw3(2,3).weight()=0;
297  classifier::MatrixLookupWeighted mlw3(xw3);
298  utility::Matrix prediction1;
299  knn5.predict(mlw3,prediction1); 
300  if (!(std::isnan(prediction1(0,0)) && std::isnan(prediction1(0,1)) && 
301        std::isnan(prediction1(0,2)) && std::isnan(prediction1(0,3)) &&
302        suite.equal(prediction1(1,0),2.0) &&
303        suite.equal(prediction1(1,1),2.0) &&
304        suite.equal(prediction1(1,2),2.0) &&
305        suite.equal(prediction1(1,3),2.0) )) {
306    suite.err() << "Difference to expected prediction too large\n";
307    suite.add(false);
308  }
309}
310
311void test_no_features(test::Suite& suite)
312{
313  ////////////////////////////////////////////////////////////////
314  // A test of when a test sample has no variables with non-zero
315  // weights in common with training samples: should not vote
316  ////////////////////////////////////////////////////////////////
317  suite.err() << "test of predictions with nan distances (set to infinity in KNN)\n";
318  utility::MatrixWeighted xw1 = data_weighted();
319  xw1(1,0).weight()=xw1(1,1).weight()=xw1(2,0).weight()=xw1(2,1).weight()=0.0;
320  classifier::MatrixLookupWeighted mlw1(xw1);
321
322  classifier::KNN<statistics::EuclideanDistance> knn6;
323  knn6.k(3);
324  classifier::Target target1(vec_target());
325  knn6.train(mlw1,target1);
326
327  utility::MatrixWeighted xw3 = data_weighted();
328  xw3(1,3).data()=7;
329  xw3(0,0).weight()=0;
330  classifier::MatrixLookupWeighted mlw3(xw3);
331  utility::Matrix prediction1;
332  knn6.predict(mlw3,prediction1); 
333  utility::Matrix result1(2,4);
334  result1(0,0)=0;
335  result1(0,2)=result1(1,1)=result1(1,3)=1.0;
336  result1(0,1)=result1(0,3)=result1(1,0)=result1(1,2)=2.0;
337  suite.add(suite.equal_range(result1.begin(), result1.end(), 
338                              prediction1.begin(), 1));
339}
340
341std::vector<std::string> vec_target(void)
342{
343  std::vector<std::string> vec1(4, "pos");
344  vec1[0]="neg";
345  vec1[1]="neg";
346  return vec1;
347}
348
Note: See TracBrowser for help on using the repository browser.