source: trunk/test/regression_test.cc @ 1659

Last change on this file since 1659 was 1487, checked in by Jari Häkkinen, 13 years ago

Addresses #436. GPL license copy reference should also be updated.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 16.8 KB
Line 
1// $Id: regression_test.cc 1487 2008-09-10 08:41:36Z jari $
2
3/*
4  Copyright (C) 2005, 2006, 2007 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2008 Peter Johansson
6
7  This file is part of the yat library, http://dev.thep.lu.se/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 3 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with yat. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include "Suite.h"
24
25#include "yat/regression/KernelBox.h"
26#include "yat/regression/Linear.h"
27#include "yat/regression/LinearWeighted.h"
28#include "yat/regression/Local.h"
29#include "yat/regression/Naive.h"
30#include "yat/regression/NaiveWeighted.h"
31#include "yat/regression/Polynomial.h"
32#include "yat/regression/PolynomialWeighted.h"
33#include "yat/utility/Matrix.h"
34#include "yat/utility/Vector.h"
35
36#include <cmath>
37
38#include <fstream>
39#include <iostream>
40
41
42using namespace theplu::yat;
43
44void equal(regression::OneDimensional&, regression::OneDimensionalWeighted&, 
45           test::Suite&); 
46
47void multidim(test::Suite& suite);
48
49void unity_weights(regression::OneDimensional&, 
50                   regression::OneDimensionalWeighted&, 
51                   const utility::Vector&, const utility::Vector&,
52                   test::Suite&); 
53
54void rescale_weights(regression::OneDimensionalWeighted&, 
55                     const utility::Vector&, const utility::Vector&,
56                     test::Suite&); 
57
58void zero_weights(regression::OneDimensionalWeighted&, 
59                  const utility::Vector&, const utility::Vector&,
60                  test::Suite&); 
61
62
63bool Local_test(regression::OneDimensionalWeighted&, 
64                regression::Kernel&, test::Suite&);
65
66int main(int argc, char* argv[])
67{
68  test::Suite suite(argc, argv);
69
70  suite.err() << "  testing regression" << std::endl;
71
72  // test data for Linear and Naive (Weighted and non-weighted)
73  utility::Vector x(4); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000;
74  utility::Vector y(4); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;
75  utility::Vector w(4); w(0)=0.1;  w(1)=0.2;  w(2)=0.3;  w(3)=0.4;
76
77  // Comparing linear and polynomial(1)
78  regression::Linear linear;
79  linear.fit(x,y);
80  regression::Polynomial polynomial(1);
81  polynomial.fit(x,y);
82  if ( !suite.equal(linear.beta(),polynomial.fit_parameters()(1), 1000) ){
83    suite.err() << "error: beta and fit_parameters(1) not equal" << std::endl;
84    suite.err() << "       beta = " << linear.beta() << std::endl;
85    suite.err() << "       fit_parameters(1) = " 
86           << polynomial.fit_parameters()(1) << std::endl;
87    suite.add(false);
88  }
89  if (!suite.equal(polynomial.fit_parameters()(0),
90                   linear.alpha()-linear.beta()*1985, 10000)){
91    suite.err() << "error: fit_parameters(0) = " 
92           << polynomial.fit_parameters()(0)<< std::endl;
93    suite.err() << "error: alpha-beta*m_x = " 
94           << linear.alpha()-linear.beta()*1985 << std::endl;
95    suite.add(false);
96  }
97  if ( !suite.equal(polynomial.chisq(), linear.chisq(), 100) ){
98    suite.err() << "error: chisq not same in linear and polynomial(1)" 
99           << std::endl;
100    suite.add(false);
101  }
102  if ( !suite.equal(polynomial.predict(1.0),linear.predict(1.0), 1000) ){
103    suite.err() << "error: predict not same in linear and polynomial(1)" 
104           << std::endl;
105    suite.add(false);
106  }
107  if (!suite.equal(polynomial.standard_error2(1985),
108                   linear.standard_error2(1985), 100000)){
109    suite.err() << "error: standard_error not same in linear and polynomial(1)" 
110                << "\n  polynomial: " << polynomial.standard_error2(1985)
111                << "\n  linear: " << linear.standard_error2(1985)
112                << std::endl;
113    suite.add(false);
114  }
115
116  suite.err() << "  testing regression::LinearWeighted" << std::endl;
117  regression::LinearWeighted linear_w;
118  equal(linear, linear_w, suite);
119  linear_w.fit(x,y,w);
120  double y_predicted = linear_w.predict(1990);
121  double y_predicted_err = linear_w.prediction_error2(1990);
122  if (!suite.equal(y_predicted,12.8) ){
123    suite.err() << "error: cannot reproduce fit." << std::endl;
124    suite.err() << "predicted value: " << y_predicted << " expected 12.8" 
125           << std::endl;
126    suite.add(false);
127  }
128
129  // Comparing LinearWeighted and PolynomialWeighted(1)
130  suite.err() << "    comparing LinearWeighted and PolynomialWeighted(1)"
131              << std::endl;
132  linear_w.fit(x,y,w);
133  regression::PolynomialWeighted polynomial_w(1);
134  polynomial_w.fit(x,y,w);
135  if ( !suite.equal(linear_w.beta(), polynomial_w.fit_parameters()(1),10000) ){
136    suite.err() << "error: beta and fit_parameters(1) not equal" << std::endl;
137    suite.err() << "       beta = " << linear_w.beta() << std::endl;
138    suite.err() << "       fit_parameters(1) = " 
139           << polynomial_w.fit_parameters()(1) << std::endl;
140    suite.add(false);
141  }
142  if ( !suite.equal(polynomial_w.fit_parameters()(0),
143                    linear_w.alpha()-linear_w.beta()*1990, 10000) ){
144    suite.err() << "error: fit_parameters(0) = " 
145           << polynomial.fit_parameters()(0)<< std::endl;
146    suite.err() << "error: alpha-beta*m_x = " 
147           << linear.alpha()-linear.beta()*1990 << std::endl;
148    suite.add(false);
149  }
150  if ( !suite.equal(polynomial_w.s2(),linear_w.s2(), 10) ){
151    suite.err() << "error: chisq not same in linear and polynomial(1)" 
152           << std::endl;
153    suite.add(false);
154  }
155  if ( !suite.equal(polynomial_w.predict(1.0), linear_w.predict(1.0), 10000) ){
156    suite.err() << "error: predict not same in linear and polynomial(1)" 
157           << std::endl;
158    suite.add(false);
159  }
160  if ( !suite.equal(polynomial_w.standard_error2(1985), 
161                    linear_w.standard_error2(1985), 100000) ){
162    suite.err() << "error: standard_error not same in linear and polynomial(1)" 
163           << "\n  polynomial: " << polynomial_w.standard_error2(1985)
164           << "\n  linear: " << linear_w.standard_error2(1985)
165           << std::endl;
166    suite.add(false);
167  }
168
169  // testing regression::NaiveWeighted
170  suite.err() << "  testing regression::NaiveWeighted" << std::endl;
171  regression::NaiveWeighted naive_w;
172  regression::Naive naive;
173  equal(naive, naive_w, suite);
174  naive_w.fit(x,y,w);
175
176  y_predicted=naive_w.predict(0.0);
177  y_predicted_err=naive_w.prediction_error2(0.0);
178  if (!suite.equal(y_predicted,0.1*12+0.2*11+0.3*14+0.4*13)) {
179    suite.err() << "regression_NaiveWeighted: cannot reproduce fit.\n";
180    suite.err() << "returned value: " << y_predicted << std::endl;
181    suite.err() << "expected: " << 0.1*12+0.2*11+0.3*14+0.4*13 << std::endl;
182    suite.add(false);
183  }
184
185  // testing regression::Local
186  suite.err() << "  testing regression::Local" << std::endl;
187  regression::KernelBox kb;
188  regression::LinearWeighted rl;
189  if (!Local_test(rl,kb, suite)) {
190    suite.err() << "regression_Local: Linear cannot reproduce fit." << std::endl;
191    suite.add(false);
192  }
193  regression::NaiveWeighted rn;
194  if (!Local_test(rn,kb, suite)) {
195    suite.err() << "regression_Local: Naive cannot reproduce fit." << std::endl;
196    suite.add(false);
197  }
198
199  // testing regression::Polynomial
200  suite.err() << "  testing regression::Polynomial" << std::endl;
201  {
202    std::ifstream s(test::filename("data/regression_gauss.data").c_str());
203    utility::Matrix data(s);
204    utility::Vector x(data.rows());
205    utility::Vector ln_y(data.rows());
206    for (size_t i=0; i<data.rows(); ++i) {
207      x(i)=data(i,0);
208      ln_y(i)=log(data(i,1));
209    }
210
211    regression::Polynomial polynomialfit(2);
212    polynomialfit.fit(x,ln_y);
213    utility::Vector fit=polynomialfit.fit_parameters();
214    if (std::abs(fit(0)-1.012229646706 + fit(1)-0.012561322528 +
215                 fit(2)+1.159674470130)>1e-11) {
216      suite.err() << "regression_Polynomial: cannot reproduce fit." << std::endl;
217      suite.add(false);
218    }
219  }
220
221  suite.err() << "  testing regression::PolynomialWeighted" << std::endl;
222  regression::PolynomialWeighted pol_weighted(2);
223  regression::Polynomial polynomial2(2);
224  equal(polynomial2, pol_weighted, suite);
225
226  multidim(suite);
227
228  return suite.return_value();
229}
230
231
232void equal(regression::OneDimensional& r, 
233           regression::OneDimensionalWeighted& wr, 
234           test::Suite& suite)
235{
236  utility::Vector x(5); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000; x(4)=2010;
237  utility::Vector y(5); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;   y(4)=15;
238
239  unity_weights(r, wr, x, y, suite);
240  rescale_weights(wr, x, y, suite);
241  zero_weights(wr, x, y, suite);
242}
243
244
245void unity_weights(regression::OneDimensional& r, 
246                   regression::OneDimensionalWeighted& rw, 
247                   const utility::Vector& x, const utility::Vector& y,
248                   test::Suite& suite)
249{
250  suite.err() << "    testing unity weights equal to non-weighted version.\n"; 
251  utility::Vector w(x.size(), 1.0);
252  r.fit(x,y);
253  rw.fit(x,y,w);
254  if (!suite.equal(r.predict(2000), rw.predict(2000)) ) {
255    suite.add(false);
256    suite.err() << "Error: predict not equal\n" 
257           << "   weighted: " << rw.predict(2000) << "\n"
258           << "   non-weighted: " << r.predict(2000)
259           << std::endl;
260  }
261  if (!suite.equal(r.s2(), rw.s2(1.0)) ){
262    suite.add(false);
263    suite.err() << "Error: s2 not equal non-weighted version." << std::endl;
264    suite.err() << "weighted s2 = " << rw.s2(1.0) << std::endl;
265    suite.err() << "non-weighted s2 = " << r.s2() << std::endl;
266  }
267  if (!suite.equal(r.standard_error2(2000), rw.standard_error2(2000), 10) ){
268    suite.add(false);
269    suite.err() << "Error: standard_error not equal non-weighted version." 
270           << std::endl;
271  }
272  if (!suite.equal(r.r2(), rw.r2()) ){
273    suite.add(false);
274    suite.err() << "Error: r2 not equal non-weighted version." << std::endl;
275    suite.err() << "weighted r2 = " << rw.r2() << std::endl;
276    suite.err() << "non-weighted r2 = " << r.r2() << std::endl;
277  }
278  if (!suite.equal(r.prediction_error2(2000), rw.prediction_error2(2000,1),
279                   100) ){
280    suite.add(false);
281    suite.err() << "Error: prediction_error2 not equal non-weighted version.\n" 
282           << "       weighted: " << rw.prediction_error2(2000,1) << "\n"
283           << "   non-weighted: " << r.prediction_error2(2000)
284           << std::endl;
285  }
286} 
287
288
289void rescale_weights(regression::OneDimensionalWeighted& wr, 
290                     const utility::Vector& x, const utility::Vector& y,
291                     test::Suite& suite)
292{
293  suite.err() << "    testing rescaling weights.\n"; 
294  utility::Vector w(5);  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0.2;
295  wr.fit(x,y,w);
296  double predict = wr.predict(2000);
297  double prediction_error2 = wr.prediction_error2(2000);
298  double r2 = wr.r2();
299  double s2 = wr.s2();
300  double standard_error2 = wr.standard_error2(2000);
301
302  w*=2;
303  wr.fit(x,y,w);
304  if (!suite.equal(wr.predict(2000), predict, 10000) ){
305    suite.add(false);
306    suite.err() << "Error: predict not equal after rescaling.\n";
307    suite.err() << "       predict = " << predict
308           << " and after doubling weights.\n";
309    suite.err() << "       predict = " << wr.predict(2000) << "\n";
310  }
311  if (!suite.equal(wr.s2(2), s2, 1000) ){
312    suite.add(false);
313    suite.err() << "Error: s2 not equal after rescaling.\n";
314    suite.err() << "       s2 = " << s2 << " and after doubling weights.\n";
315    suite.err() << "       s2 = " << wr.s2(2) << "\n";
316    suite.err() << "difference " << s2-wr.s2(2.0) << std::endl;
317  }
318  if (!suite.equal_sqrt(wr.standard_error2(2000), standard_error2, 100) ){
319    suite.add(false);
320    suite.err() << "Error: standard_error2 not equal after rescaling.\n";
321    suite.err() << "       standard_error2 = " << standard_error2
322           << " and after doubling weights.\n";
323    suite.err() << "       standard_error2 = " 
324           << wr.standard_error2(2000) << "\n";
325    suite.err() << " difference: " << wr.standard_error2(2000)-standard_error2
326           << std::endl;
327  }
328  if (!suite.equal(wr.r2(), r2, 1000) ){
329    suite.add(false);
330    suite.err() << "Error: r2 not equal after rescaling.\n";
331  }
332  if (!suite.equal_sqrt(wr.prediction_error2(2000,2), prediction_error2, 10) ){
333    suite.add(false);
334    suite.err() << "Error: prediction_error2 not equal after rescaling.\n";
335    suite.err() << "       prediction_error2 = " << prediction_error2
336           << " and after doubling weights.\n";
337    suite.err() << "       prediction_error2 = " 
338           << wr.prediction_error2(2000,2) << "\n";
339  }
340}
341
342
343void zero_weights(regression::OneDimensionalWeighted& wr, 
344                  const utility::Vector& x, const utility::Vector& y,
345                  test::Suite& suite)
346{
347  suite.err() << "    testing zero weights equal to missing value.\n"; 
348  utility::Vector w(5);  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0;
349  wr.fit(x,y,w);
350  double predict = wr.predict(2000);
351  double prediction_error2 = wr.prediction_error2(2000);
352  double r2 = wr.r2();
353  double s2 = wr.s2();
354  double standard_error2 = wr.standard_error2(2000);
355
356  utility::Vector x2(4);
357  utility::Vector y2(4);
358  utility::Vector w2(4);
359  for (size_t i=0; i<4; ++i){
360    x2(i) = x(i);
361    y2(i) = y(i);
362    w2(i) = w(i);
363  }
364
365  wr.fit(x2,y2,w2);
366  if (!suite.equal(wr.predict(2000), predict, 10000) ) {
367    suite.add(false);
368    suite.err() << "Error: predict not equal.\n";
369    suite.err() << "       weighted predict: " << wr.predict(2000) << "\n";
370    suite.err() << "       unweighted predict: " << predict << "\n";
371    suite.err() << "       difference: " << wr.predict(2000)-predict << "\n";
372
373  }
374  if (!suite.equal(wr.prediction_error2(2000), prediction_error2) ) {
375    suite.add(false);
376    suite.err() << "Error: prediction_error2 not equal.\n";
377  }
378  if (!suite.equal(wr.r2(), r2) ) {
379    suite.add(false);
380    suite.err() << "Error: r2 not equal.\n";
381    suite.err() << "   r2: " << r2 << "\n";
382    suite.err() << "   r2: " << wr.r2() << "\n";
383  }
384  if (!suite.equal(wr.s2(), s2) ) {
385    suite.add(false);
386    suite.err() << "Error: s2 not equal.\n";
387  }
388  if (!suite.equal(wr.standard_error2(2000), standard_error2) ) {
389    suite.add(false);
390    suite.err() << "Error: standard_error2 not equal.\n";
391  }
392}
393
394
395void multidim(test::Suite& suite)
396{
397  suite.err() << "  testing regression::MultiDimensionalWeighted" << std::endl;
398  utility::Vector x(5); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000; x(4)=2010;
399  utility::Vector y(5); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;   y(4)=15;
400  utility::Vector w(5,1.0);
401 
402  utility::Matrix data(5,3);
403  for (size_t i=0; i<data.rows(); ++i){
404    data(i,0)=1;
405    data(i,1)=x(i);
406    data(i,2)=x(i)*x(i);
407  }
408  regression::MultiDimensional md;
409  md.fit(data,y);
410  regression::MultiDimensionalWeighted mdw;
411  mdw.fit(data,y,w);
412  utility::Vector z(3,1);
413  z(1)=2000;
414  z(2)=2000*2000;
415  if (!suite.equal(md.predict(z), mdw.predict(z))){
416    suite.add(false);
417    suite.err() << "Error: predict not equal\n" 
418           << "   weighted: " << mdw.predict(z) << "\n"
419           << "   non-weighted: " << md.predict(z)
420           << std::endl;
421  }
422
423  if (!suite.equal(md.standard_error2(z), mdw.standard_error2(z)) ){
424    suite.add(false);
425    suite.err() << "Error: standard_error2 not equal\n" 
426           << "   weighted: " << mdw.standard_error2(z) << "\n"
427           << "   non-weighted: " << md.standard_error2(z)
428           << std::endl;
429  }
430  if (!suite.equal(md.prediction_error2(z), mdw.prediction_error2(z,1.0)) ){
431    suite.add(false);
432    suite.err() << "Error: prediction_error2 not equal\n" 
433           << "   weighted: " << mdw.prediction_error2(z,1.0) << "\n"
434           << "   non-weighted: " << md.prediction_error2(z)
435           << std::endl;
436  }
437
438  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0.2;
439  mdw.fit(data,y,w);
440  double predict = mdw.predict(z);
441  double prediction_error2 = mdw.prediction_error2(z, 1.0);
442  double s2 = mdw.s2(1.0);
443  double standard_error2 = mdw.standard_error2(z);
444
445  w*=2;
446  mdw.fit(data,y,w);
447  if (!suite.equal(mdw.predict(z), predict, 10000) ){
448    suite.add(false);
449    suite.err() << "Error: predict not equal after rescaling.\n";
450    suite.err() << "   predict = " << predict
451                << " and after doubling weights.\n";
452    suite.err() << "   predict = " << mdw.predict(z) << "\n";
453  }
454  if (!suite.equal_sqrt(mdw.prediction_error2(z,2), prediction_error2,10) ){ 
455    suite.add(false);
456    suite.err() << "Error: prediction_error2 not equal after rescaling.\n";
457    suite.err() << "       predict_error2 = " << prediction_error2
458           << " and after doubling weights.\n";
459    suite.err() << "       predict_error2 = " 
460                << mdw.prediction_error2(z,2) << "\n";
461  }
462  if (!suite.equal(mdw.s2(2), s2, 1000) ){
463    suite.add(false);
464    suite.err() << "Error: s2 not equal after rescaling.\n";
465    suite.err() << "       s2 = " << s2 << " and after doubling weights.\n";
466    suite.err() << "       s2 = " << mdw.s2(2) << "\n";
467  }
468  if (!suite.equal_sqrt(mdw.standard_error2(z), standard_error2, 100) ){
469    suite.add(false);
470    suite.err() << "Error: standard_error2 not equal after rescaling.\n";
471    suite.err() << " standard_error2 = " << standard_error2
472           << " and after doubling weights.\n";
473    suite.err() << " standard_error2 = " << mdw.standard_error2(z) << "\n";
474  }
475}
476
477
478bool Local_test(regression::OneDimensionalWeighted& r, 
479                regression::Kernel& k, test::Suite& suite)
480{
481  regression::Local rl(r,k);
482  for (size_t i=0; i<1000; i++){
483    rl.add(i, 10);
484  }
485
486  rl.fit(10, 100);
487  if (rl.x().size()!=1000/10)
488    return false;
489  for (size_t i=0; i+1<rl.x().size(); ++i)
490    if (!suite.equal(rl.x()(i+1)-rl.x()(i),10.0))
491      return false;
492
493  utility::Vector y(rl.y_predicted());
494  for (size_t i=0; i<y.size(); i++) 
495    if (!suite.equal(y(i),10.0)){
496      return false;
497    }
498
499  return true;
500}
Note: See TracBrowser for help on using the repository browser.