source: trunk/test/knn_test.cc @ 1658

Last change on this file since 1658 was 1586, checked in by Peter, 13 years ago

fixing knn_test - refs #396

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 10.4 KB
Line 
1// $Id: knn_test.cc 1586 2008-10-16 21:06:26Z peter $
2
3/*
4  Copyright (C) 2007 Jari Häkkinen, Peter Johansson, Markus Ringnér
5  Copyright (C) 2008 Peter Johansson, Markus Ringnér
6
7  This file is part of the yat library, http://dev.thep.lu.se/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 3 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with yat. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include "Suite.h"
24
25#include "yat/classifier/KNN.h"
26#include "yat/classifier/KNN_ReciprocalDistance.h"
27#include "yat/classifier/KNN_ReciprocalRank.h"
28#include "yat/classifier/MatrixLookup.h"
29#include "yat/classifier/MatrixLookupWeighted.h"
30#include "yat/statistics/EuclideanDistance.h"
31#include "yat/utility/DataIterator.h"
32#include "yat/utility/Matrix.h"
33#include "yat/utility/MatrixWeighted.h"
34
35
36#include <cassert>
37#include <fstream>
38#include <iostream>
39#include <list>
40#include <string>
41#include <vector>
42
43
44using namespace theplu::yat;
45
46utility::Matrix data(void);
47utility::MatrixWeighted data_weighted(void);
48double deviation(const utility::Matrix& a, const utility::Matrix& b);
49void test_unweighted(test::Suite&);
50void test_unweighted_weighted(test::Suite&);
51void test_weighted(test::Suite&);
52void test_reciprocal_ranks(test::Suite&);
53void test_reciprocal_distance(test::Suite&);
54void test_no_samples(test::Suite&);
55void test_no_features(test::Suite&);
56std::vector<std::string> vec_target(void);
57
58int main(int argc, char* argv[])
59{ 
60  test::Suite suite(argc, argv);
61  suite.err() << "testing knn" << std::endl;
62  test_unweighted(suite);
63  test_unweighted_weighted(suite);
64  test_weighted(suite);
65  test_reciprocal_ranks(suite);
66  test_reciprocal_distance(suite);
67  test_no_samples(suite);
68  test_no_features(suite);
69  return suite.return_value();
70} 
71
72
73utility::Matrix data(void)
74{
75  utility::Matrix data1(3,4);
76  for(size_t i=0;i<3;i++) {
77    data1(i,0)=3-i;
78    data1(i,1)=5-i;
79    data1(i,2)=i+1;
80    data1(i,3)=i+3;
81  }
82  return data1;
83}
84
85
86utility::MatrixWeighted data_weighted(void)
87{
88  utility::Matrix x = data();
89  utility::MatrixWeighted result(x.rows(), x.columns());
90  std::copy(x.begin(), x.end(), utility::data_iterator(result.begin()));
91  return result;
92}
93
94
95double deviation(const utility::Matrix& a, const utility::Matrix& b) {
96  assert(a.rows()==b.rows());
97  assert(b.columns()==b.columns());
98  double sl=0;
99  for (size_t i=0; i<a.rows(); i++){
100    for (size_t j=0; j<a.columns(); j++){
101      sl += std::abs(a(i,j)-b(i,j));
102    }
103  }
104  sl /= (a.columns()*a.rows());
105  return sl;
106}
107
108void test_unweighted(test::Suite& suite)
109{
110  ////////////////////////////////////////////////////////////////
111  // A test of training and predictions using unweighted data
112  ////////////////////////////////////////////////////////////////
113  suite.err() << "test of predictions using unweighted training " 
114              << "and test data\n";
115  utility::Matrix data1 = data();
116  classifier::MatrixLookup ml1(data1);
117  classifier::Target target1(vec_target());
118 
119  classifier::KNN<statistics::EuclideanDistance> knn1;
120  knn1.k(3);
121  knn1.train(ml1,target1);
122  utility::Matrix prediction1;
123  knn1.predict(ml1,prediction1);
124  utility::Matrix result1(2,4);
125  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=2.0;
126  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=1.0;
127  suite.add(suite.equal_range(result1.begin(), result1.end(), 
128                              prediction1.begin(), 1));
129}
130
131void test_unweighted_weighted(test::Suite& suite)
132{
133  suite.err() << "test of predictions using unweighted training "
134              << "and weighted test data\n";
135  utility::MatrixWeighted xw = data_weighted();
136  xw(2,0).weight()=0;
137 
138  classifier::MatrixLookupWeighted mlw1(xw);
139  classifier::KNN<statistics::EuclideanDistance> knn1;
140  knn1.k(3);
141  utility::Matrix data1 = data();
142  classifier::MatrixLookup ml1(data1);
143  classifier::Target target1(vec_target());
144  knn1.train(ml1,target1);
145  utility::Matrix prediction1;
146  knn1.predict(mlw1,prediction1); 
147  utility::Matrix result1(2,4);
148  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=2.0;
149  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=1.0;
150  result1(0,0)=1.0;
151  result1(1,0)=2.0;
152  suite.add(suite.equal_range(result1.begin(), result1.end(), 
153                              prediction1.begin(), 1));
154}
155
156void test_weighted(test::Suite& suite)
157{
158  ////////////////////////////////////////////////////////////////
159  // A test of training and test both weighted
160  ////////////////////////////////////////////////////////////////
161  suite.err() << "test of predictions using weighted training and test data\n";
162  suite.err() << "... uniform neighbor weighting" << std::endl;
163  utility::MatrixWeighted xw = data_weighted();
164  xw(2,0).weight()=0;
165  xw(0,1).weight()=0;
166  classifier::MatrixLookupWeighted mlw1(xw);
167   
168  utility::MatrixWeighted xw2 = data_weighted();
169  xw2(2,3).weight()=0;
170  classifier::MatrixLookupWeighted mlw2(xw2);
171  classifier::KNN<statistics::EuclideanDistance> knn2;
172  knn2.k(3);
173  classifier::Target target1(vec_target());
174  knn2.train(mlw2,target1);
175  utility::Matrix prediction1;
176  knn2.predict(mlw1,prediction1); 
177  utility::Matrix result1(2,4);
178  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=2.0;
179  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=1.0;
180  result1(0,0)=1.0;
181  result1(1,0)=2.0;
182  result1(0,1)=1.0;
183  result1(1,1)=2.0;
184  suite.add(suite.equal_range(result1.begin(), result1.end(), 
185                              prediction1.begin(), 1));
186}
187
188
189void test_reciprocal_ranks(test::Suite& suite)
190{
191  ////////////////////////////////////////////////////////////////
192  // A test of reciprocal ranks weighting with training and test both weighted
193  ////////////////////////////////////////////////////////////////
194  suite.err() << "... reciprokal rank neighbor weighting" << std::endl;
195  utility::MatrixWeighted xw2 = data_weighted();
196  xw2(2,3).weight()=0;
197  classifier::MatrixLookupWeighted mlw2(xw2);
198  utility::MatrixWeighted xw3 = data_weighted();
199  xw3(1,3).data()=7;
200  xw3(2,3).weight()=0;
201  classifier::MatrixLookupWeighted mlw3(xw3);
202  classifier::KNN<statistics::EuclideanDistance
203    ,classifier::KNN_ReciprocalRank> knn3;
204  knn3.k(3);
205  classifier::Target target1(vec_target());
206  knn3.train(mlw2,target1);
207  utility::Matrix prediction1;
208  knn3.predict(mlw3,prediction1); 
209  utility::Matrix result1(2,4);
210  result1(0,0)=result1(1,3)=1.0;
211  result1(0,3)=result1(1,0)=5.0/6.0;
212  result1(0,2)=result1(1,1)=1.0/2.0;
213  result1(0,1)=result1(1,2)=4.0/3.0;
214  suite.add(suite.equal_range(result1.begin(), result1.end(), 
215                              prediction1.begin(), 1));
216}
217
218void test_reciprocal_distance(test::Suite& suite)
219{
220  ////////////////////////////////////////////////////////////////
221  // A test of reciprocal distance weighting with training and test both weighted
222  ////////////////////////////////////////////////////////////////
223  suite.err() << "... reciprocal distance neighbor weighting" << std::endl;
224  classifier::KNN<statistics::EuclideanDistance,classifier::KNN_ReciprocalDistance> 
225    knn4;
226  knn4.k(3);
227  utility::MatrixWeighted xw2 = data_weighted();
228  xw2(2,3).weight()=0;
229  classifier::MatrixLookupWeighted mlw2(xw2);
230  utility::MatrixWeighted xw3 = data_weighted();
231  xw3(1,3).data()=7;
232  xw3(2,3).weight()=0;
233  classifier::MatrixLookupWeighted mlw3(xw3);
234  classifier::Target target1(vec_target());
235  knn4.train(mlw2,target1);
236  utility::Matrix prediction1;
237  knn4.predict(mlw3,prediction1); 
238  if (!(std::isinf(prediction1(0,0)) && std::isinf(prediction1(0,1)) && 
239        std::isinf(prediction1(1,2)) && 
240        suite.equal(prediction1(1,3), 1.0/3.6742346141747673, 5000) &&
241        suite.equal(prediction1(1,0), 1.0/2.82842712475+1.0/2.4494897427831779,
242                    5000)
243        )){
244    suite.err() << "Difference to expected prediction too large\n";
245    suite.add(false);
246  }
247}
248
249
250void test_no_samples(test::Suite& suite)
251{
252  ////////////////////////////////////////////////////////////////
253  // A test of when a class has no training samples, should give nan
254  // in predictions. Also tests that k is reduced if not enough
255  // training samples.
256  ////////////////////////////////////////////////////////////////
257  //Keep only the second class in the training samples
258  std::vector<size_t> ind(2,2);
259  ind[1]=3;
260  classifier::Target target1(vec_target());
261  classifier::Target target2(target1,utility::Index(ind));
262
263  utility::MatrixWeighted xw = data_weighted();
264  xw(2,3).weight()=0.0;
265
266  classifier::MatrixLookupWeighted mlw4(xw, utility::Index(xw.rows()),
267                                        utility::Index(ind));
268  classifier::KNN<statistics::EuclideanDistance> knn5;
269  knn5.k(3);
270  knn5.train(mlw4,target2);
271  utility::MatrixWeighted xw3 = data_weighted();
272  xw3(1,3).data()=7;
273  xw3(2,3).weight()=0;
274  classifier::MatrixLookupWeighted mlw3(xw3);
275  utility::Matrix prediction1;
276  knn5.predict(mlw3,prediction1); 
277  if (!(std::isnan(prediction1(0,0)) && std::isnan(prediction1(0,1)) && 
278        std::isnan(prediction1(0,2)) && std::isnan(prediction1(0,3)) &&
279        suite.equal(prediction1(1,0),2.0) &&
280        suite.equal(prediction1(1,1),2.0) &&
281        suite.equal(prediction1(1,2),2.0) &&
282        suite.equal(prediction1(1,3),2.0) )) {
283    suite.err() << "Difference to expected prediction too large\n";
284    suite.add(false);
285  }
286}
287
288void test_no_features(test::Suite& suite)
289{
290  ////////////////////////////////////////////////////////////////
291  // A test of when a test sample has no variables with non-zero
292  // weights in common with training samples: should not vote
293  ////////////////////////////////////////////////////////////////
294  suite.err() << "test of predictions with nan distances (set to infinity in KNN)\n";
295  utility::MatrixWeighted xw1 = data_weighted();
296  xw1(1,0).weight()=xw1(1,1).weight()=xw1(2,0).weight()=xw1(2,1).weight()=0.0;
297  classifier::MatrixLookupWeighted mlw1(xw1);
298
299  classifier::KNN<statistics::EuclideanDistance> knn6;
300  knn6.k(3);
301  classifier::Target target1(vec_target());
302  knn6.train(mlw1,target1);
303
304  utility::MatrixWeighted xw3 = data_weighted();
305  xw3(1,3).data()=7;
306  xw3(0,0).weight()=0;
307  classifier::MatrixLookupWeighted mlw3(xw3);
308  utility::Matrix prediction1;
309  knn6.predict(mlw3,prediction1); 
310  utility::Matrix result1(2,4);
311  result1(0,0)=0;
312  result1(0,2)=result1(1,1)=result1(1,3)=1.0;
313  result1(0,1)=result1(0,3)=result1(1,0)=result1(1,2)=2.0;
314  suite.add(suite.equal_range(result1.begin(), result1.end(), 
315                              prediction1.begin(), 1));
316}
317
318std::vector<std::string> vec_target(void)
319{
320  std::vector<std::string> vec1(4, "pos");
321  vec1[0]="neg";
322  vec1[1]="neg";
323  return vec1;
324}
325
Note: See TracBrowser for help on using the repository browser.