source: branches/0.4-stable/test/regression_test.cc @ 1433

Last change on this file since 1433 was 1433, checked in by Peter, 14 years ago

fixes #423

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 16.9 KB
Line 
1// $Id: regression_test.cc 1433 2008-08-25 15:23:42Z peter $
2
3/*
4  Copyright (C) 2005, 2006, 2007 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2008 Peter Johansson
6
7  This file is part of the yat library, http://dev.thep.lu.se/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 2 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with this program; if not, write to the Free Software
21  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22  02111-1307, USA.
23*/
24
25#include "Suite.h"
26
27#include "yat/regression/KernelBox.h"
28#include "yat/regression/Linear.h"
29#include "yat/regression/LinearWeighted.h"
30#include "yat/regression/Local.h"
31#include "yat/regression/Naive.h"
32#include "yat/regression/NaiveWeighted.h"
33#include "yat/regression/Polynomial.h"
34#include "yat/regression/PolynomialWeighted.h"
35#include "yat/utility/Matrix.h"
36#include "yat/utility/Vector.h"
37
38#include <cmath>
39
40#include <fstream>
41#include <iostream>
42
43
44using namespace theplu::yat;
45
46void equal(regression::OneDimensional&, regression::OneDimensionalWeighted&, 
47           test::Suite&); 
48
49void multidim(test::Suite& suite);
50
51void unity_weights(regression::OneDimensional&, 
52                   regression::OneDimensionalWeighted&, 
53                   const utility::Vector&, const utility::Vector&,
54                   test::Suite&); 
55
56void rescale_weights(regression::OneDimensionalWeighted&, 
57                     const utility::Vector&, const utility::Vector&,
58                     test::Suite&); 
59
60void zero_weights(regression::OneDimensionalWeighted&, 
61                  const utility::Vector&, const utility::Vector&,
62                  test::Suite&); 
63
64
65bool Local_test(regression::OneDimensionalWeighted&, 
66                regression::Kernel&, test::Suite&);
67
68int main(int argc, char* argv[])
69{
70  test::Suite suite(argc, argv);
71
72  suite.err() << "  testing regression" << std::endl;
73
74  // test data for Linear and Naive (Weighted and non-weighted)
75  utility::Vector x(4); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000;
76  utility::Vector y(4); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;
77  utility::Vector w(4); w(0)=0.1;  w(1)=0.2;  w(2)=0.3;  w(3)=0.4;
78
79  // Comparing linear and polynomial(1)
80  regression::Linear linear;
81  linear.fit(x,y);
82  regression::Polynomial polynomial(1);
83  polynomial.fit(x,y);
84  if ( !suite.equal(linear.beta(),polynomial.fit_parameters()(1), 1000) ){
85    suite.err() << "error: beta and fit_parameters(1) not equal" << std::endl;
86    suite.err() << "       beta = " << linear.beta() << std::endl;
87    suite.err() << "       fit_parameters(1) = " 
88           << polynomial.fit_parameters()(1) << std::endl;
89    suite.add(false);
90  }
91  if (!suite.equal(polynomial.fit_parameters()(0),
92                   linear.alpha()-linear.beta()*1985, 10000)){
93    suite.err() << "error: fit_parameters(0) = " 
94           << polynomial.fit_parameters()(0)<< std::endl;
95    suite.err() << "error: alpha-beta*m_x = " 
96           << linear.alpha()-linear.beta()*1985 << std::endl;
97    suite.add(false);
98  }
99  if ( !suite.equal(polynomial.chisq(), linear.chisq(), 100) ){
100    suite.err() << "error: chisq not same in linear and polynomial(1)" 
101           << std::endl;
102    suite.add(false);
103  }
104  if ( !suite.equal(polynomial.predict(1.0),linear.predict(1.0), 1000) ){
105    suite.err() << "error: predict not same in linear and polynomial(1)" 
106           << std::endl;
107    suite.add(false);
108  }
109  if (!suite.equal(polynomial.standard_error2(1985),
110                   linear.standard_error2(1985), 100000)){
111    suite.err() << "error: standard_error not same in linear and polynomial(1)" 
112                << "\n  polynomial: " << polynomial.standard_error2(1985)
113                << "\n  linear: " << linear.standard_error2(1985)
114                << std::endl;
115    suite.add(false);
116  }
117
118  suite.err() << "  testing regression::LinearWeighted" << std::endl;
119  regression::LinearWeighted linear_w;
120  equal(linear, linear_w, suite);
121  linear_w.fit(x,y,w);
122  double y_predicted = linear_w.predict(1990);
123  double y_predicted_err = linear_w.prediction_error2(1990);
124  if (!suite.equal(y_predicted,12.8) ){
125    suite.err() << "error: cannot reproduce fit." << std::endl;
126    suite.err() << "predicted value: " << y_predicted << " expected 12.8" 
127           << std::endl;
128    suite.add(false);
129  }
130
131  // Comparing LinearWeighted and PolynomialWeighted(1)
132  suite.err() << "    comparing LinearWeighted and PolynomialWeighted(1)"
133              << std::endl;
134  linear_w.fit(x,y,w);
135  regression::PolynomialWeighted polynomial_w(1);
136  polynomial_w.fit(x,y,w);
137  if ( !suite.equal(linear_w.beta(), polynomial_w.fit_parameters()(1),10000) ){
138    suite.err() << "error: beta and fit_parameters(1) not equal" << std::endl;
139    suite.err() << "       beta = " << linear_w.beta() << std::endl;
140    suite.err() << "       fit_parameters(1) = " 
141           << polynomial_w.fit_parameters()(1) << std::endl;
142    suite.add(false);
143  }
144  if ( !suite.equal(polynomial_w.fit_parameters()(0),
145                    linear_w.alpha()-linear_w.beta()*1990, 10000) ){
146    suite.err() << "error: fit_parameters(0) = " 
147           << polynomial.fit_parameters()(0)<< std::endl;
148    suite.err() << "error: alpha-beta*m_x = " 
149           << linear.alpha()-linear.beta()*1990 << std::endl;
150    suite.add(false);
151  }
152  if ( !suite.equal(polynomial_w.s2(),linear_w.s2(), 10) ){
153    suite.err() << "error: chisq not same in linear and polynomial(1)" 
154           << std::endl;
155    suite.add(false);
156  }
157  if ( !suite.equal(polynomial_w.predict(1.0), linear_w.predict(1.0), 10000) ){
158    suite.err() << "error: predict not same in linear and polynomial(1)" 
159           << std::endl;
160    suite.add(false);
161  }
162  if ( !suite.equal(polynomial_w.standard_error2(1985), 
163                    linear_w.standard_error2(1985), 100000) ){
164    suite.err() << "error: standard_error not same in linear and polynomial(1)" 
165           << "\n  polynomial: " << polynomial_w.standard_error2(1985)
166           << "\n  linear: " << linear_w.standard_error2(1985)
167           << std::endl;
168    suite.add(false);
169  }
170
171  // testing regression::NaiveWeighted
172  suite.err() << "  testing regression::NaiveWeighted" << std::endl;
173  regression::NaiveWeighted naive_w;
174  regression::Naive naive;
175  equal(naive, naive_w, suite);
176  naive_w.fit(x,y,w);
177
178  y_predicted=naive_w.predict(0.0);
179  y_predicted_err=naive_w.prediction_error2(0.0);
180  if (!suite.equal(y_predicted,0.1*12+0.2*11+0.3*14+0.4*13)) {
181    suite.err() << "regression_NaiveWeighted: cannot reproduce fit.\n";
182    suite.err() << "returned value: " << y_predicted << std::endl;
183    suite.err() << "expected: " << 0.1*12+0.2*11+0.3*14+0.4*13 << std::endl;
184    suite.add(false);
185  }
186
187  // testing regression::Local
188  suite.err() << "  testing regression::Local" << std::endl;
189  regression::KernelBox kb;
190  regression::LinearWeighted rl;
191  if (!Local_test(rl,kb, suite)) {
192    suite.err() << "regression_Local: Linear cannot reproduce fit." << std::endl;
193    suite.add(false);
194  }
195  regression::NaiveWeighted rn;
196  if (!Local_test(rn,kb, suite)) {
197    suite.err() << "regression_Local: Naive cannot reproduce fit." << std::endl;
198    suite.add(false);
199  }
200
201  // testing regression::Polynomial
202  suite.err() << "  testing regression::Polynomial" << std::endl;
203  {
204    std::ifstream s(test::filename("data/regression_gauss.data").c_str());
205    utility::Matrix data(s);
206    utility::Vector x(data.rows());
207    utility::Vector ln_y(data.rows());
208    for (size_t i=0; i<data.rows(); ++i) {
209      x(i)=data(i,0);
210      ln_y(i)=log(data(i,1));
211    }
212
213    regression::Polynomial polynomialfit(2);
214    polynomialfit.fit(x,ln_y);
215    utility::Vector fit=polynomialfit.fit_parameters();
216    if (std::abs(fit(0)-1.012229646706 + fit(1)-0.012561322528 +
217                 fit(2)+1.159674470130)>1e-11) {
218      suite.err() << "regression_Polynomial: cannot reproduce fit." << std::endl;
219      suite.add(false);
220    }
221  }
222
223  suite.err() << "  testing regression::PolynomialWeighted" << std::endl;
224  regression::PolynomialWeighted pol_weighted(2);
225  regression::Polynomial polynomial2(2);
226  equal(polynomial2, pol_weighted, suite);
227
228  multidim(suite);
229
230  return suite.return_value();
231}
232
233
234void equal(regression::OneDimensional& r, 
235           regression::OneDimensionalWeighted& wr, 
236           test::Suite& suite)
237{
238  utility::Vector x(5); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000; x(4)=2010;
239  utility::Vector y(5); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;   y(4)=15;
240
241  unity_weights(r, wr, x, y, suite);
242  rescale_weights(wr, x, y, suite);
243  zero_weights(wr, x, y, suite);
244}
245
246
247void unity_weights(regression::OneDimensional& r, 
248                   regression::OneDimensionalWeighted& rw, 
249                   const utility::Vector& x, const utility::Vector& y,
250                   test::Suite& suite)
251{
252  suite.err() << "    testing unity weights equal to non-weighted version.\n"; 
253  utility::Vector w(x.size(), 1.0);
254  r.fit(x,y);
255  rw.fit(x,y,w);
256  if (!suite.equal(r.predict(2000), rw.predict(2000)) ) {
257    suite.add(false);
258    suite.err() << "Error: predict not equal\n" 
259           << "   weighted: " << rw.predict(2000) << "\n"
260           << "   non-weighted: " << r.predict(2000)
261           << std::endl;
262  }
263  if (!suite.equal(r.s2(), rw.s2(1.0)) ){
264    suite.add(false);
265    suite.err() << "Error: s2 not equal non-weighted version." << std::endl;
266    suite.err() << "weighted s2 = " << rw.s2(1.0) << std::endl;
267    suite.err() << "non-weighted s2 = " << r.s2() << std::endl;
268  }
269  if (!suite.equal(r.standard_error2(2000), rw.standard_error2(2000), 10) ){
270    suite.add(false);
271    suite.err() << "Error: standard_error not equal non-weighted version." 
272           << std::endl;
273  }
274  if (!suite.equal(r.r2(), rw.r2()) ){
275    suite.add(false);
276    suite.err() << "Error: r2 not equal non-weighted version." << std::endl;
277    suite.err() << "weighted r2 = " << rw.r2() << std::endl;
278    suite.err() << "non-weighted r2 = " << r.r2() << std::endl;
279  }
280  if (!suite.equal(r.prediction_error2(2000), rw.prediction_error2(2000,1),
281                   100) ){
282    suite.add(false);
283    suite.err() << "Error: prediction_error2 not equal non-weighted version.\n" 
284           << "       weighted: " << rw.prediction_error2(2000,1) << "\n"
285           << "   non-weighted: " << r.prediction_error2(2000)
286           << std::endl;
287  }
288} 
289
290
291void rescale_weights(regression::OneDimensionalWeighted& wr, 
292                     const utility::Vector& x, const utility::Vector& y,
293                     test::Suite& suite)
294{
295  suite.err() << "    testing rescaling weights.\n"; 
296  utility::Vector w(5);  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0.2;
297  wr.fit(x,y,w);
298  double predict = wr.predict(2000);
299  double prediction_error2 = wr.prediction_error2(2000);
300  double r2 = wr.r2();
301  double s2 = wr.s2();
302  double standard_error2 = wr.standard_error2(2000);
303
304  w*=2;
305  wr.fit(x,y,w);
306  if (!suite.equal(wr.predict(2000), predict, 10000) ){
307    suite.add(false);
308    suite.err() << "Error: predict not equal after rescaling.\n";
309    suite.err() << "       predict = " << predict
310           << " and after doubling weights.\n";
311    suite.err() << "       predict = " << wr.predict(2000) << "\n";
312  }
313  if (!suite.equal(wr.s2(2), s2, 1000) ){
314    suite.add(false);
315    suite.err() << "Error: s2 not equal after rescaling.\n";
316    suite.err() << "       s2 = " << s2 << " and after doubling weights.\n";
317    suite.err() << "       s2 = " << wr.s2(2) << "\n";
318    suite.err() << "difference " << s2-wr.s2(2.0) << std::endl;
319  }
320  if (!suite.equal_sqrt(wr.standard_error2(2000), standard_error2, 100) ){
321    suite.add(false);
322    suite.err() << "Error: standard_error2 not equal after rescaling.\n";
323    suite.err() << "       standard_error2 = " << standard_error2
324           << " and after doubling weights.\n";
325    suite.err() << "       standard_error2 = " 
326           << wr.standard_error2(2000) << "\n";
327    suite.err() << " difference: " << wr.standard_error2(2000)-standard_error2
328           << std::endl;
329  }
330  if (!suite.equal(wr.r2(), r2, 1000) ){
331    suite.add(false);
332    suite.err() << "Error: r2 not equal after rescaling.\n";
333  }
334  if (!suite.equal_sqrt(wr.prediction_error2(2000,2), prediction_error2, 10) ){
335    suite.add(false);
336    suite.err() << "Error: prediction_error2 not equal after rescaling.\n";
337    suite.err() << "       prediction_error2 = " << prediction_error2
338           << " and after doubling weights.\n";
339    suite.err() << "       prediction_error2 = " 
340           << wr.prediction_error2(2000,2) << "\n";
341  }
342}
343
344
345void zero_weights(regression::OneDimensionalWeighted& wr, 
346                  const utility::Vector& x, const utility::Vector& y,
347                  test::Suite& suite)
348{
349  suite.err() << "    testing zero weights equal to missing value.\n"; 
350  utility::Vector w(5);  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0;
351  wr.fit(x,y,w);
352  double predict = wr.predict(2000);
353  double prediction_error2 = wr.prediction_error2(2000);
354  double r2 = wr.r2();
355  double s2 = wr.s2();
356  double standard_error2 = wr.standard_error2(2000);
357
358  utility::Vector x2(4);
359  utility::Vector y2(4);
360  utility::Vector w2(4);
361  for (size_t i=0; i<4; ++i){
362    x2(i) = x(i);
363    y2(i) = y(i);
364    w2(i) = w(i);
365  }
366
367  wr.fit(x2,y2,w2);
368  if (!suite.equal(wr.predict(2000), predict, 10000) ) {
369    suite.add(false);
370    suite.err() << "Error: predict not equal.\n";
371    suite.err() << "       weighted predict: " << wr.predict(2000) << "\n";
372    suite.err() << "       unweighted predict: " << predict << "\n";
373    suite.err() << "       difference: " << wr.predict(2000)-predict << "\n";
374
375  }
376  if (!suite.equal(wr.prediction_error2(2000), prediction_error2) ) {
377    suite.add(false);
378    suite.err() << "Error: prediction_error2 not equal.\n";
379  }
380  if (!suite.equal(wr.r2(), r2) ) {
381    suite.add(false);
382    suite.err() << "Error: r2 not equal.\n";
383    suite.err() << "   r2: " << r2 << "\n";
384    suite.err() << "   r2: " << wr.r2() << "\n";
385  }
386  if (!suite.equal(wr.s2(), s2) ) {
387    suite.add(false);
388    suite.err() << "Error: s2 not equal.\n";
389  }
390  if (!suite.equal(wr.standard_error2(2000), standard_error2) ) {
391    suite.add(false);
392    suite.err() << "Error: standard_error2 not equal.\n";
393  }
394}
395
396
397void multidim(test::Suite& suite)
398{
399  suite.err() << "  testing regression::MultiDimensionalWeighted" << std::endl;
400  utility::Vector x(5); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000; x(4)=2010;
401  utility::Vector y(5); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;   y(4)=15;
402  utility::Vector w(5,1.0);
403 
404  utility::Matrix data(5,3);
405  for (size_t i=0; i<data.rows(); ++i){
406    data(i,0)=1;
407    data(i,1)=x(i);
408    data(i,2)=x(i)*x(i);
409  }
410  regression::MultiDimensional md;
411  md.fit(data,y);
412  regression::MultiDimensionalWeighted mdw;
413  mdw.fit(data,y,w);
414  utility::Vector z(3,1);
415  z(1)=2000;
416  z(2)=2000*2000;
417  if (!suite.equal(md.predict(z), mdw.predict(z))){
418    suite.add(false);
419    suite.err() << "Error: predict not equal\n" 
420           << "   weighted: " << mdw.predict(z) << "\n"
421           << "   non-weighted: " << md.predict(z)
422           << std::endl;
423  }
424
425  if (!suite.equal(md.standard_error2(z), mdw.standard_error2(z)) ){
426    suite.add(false);
427    suite.err() << "Error: standard_error2 not equal\n" 
428           << "   weighted: " << mdw.standard_error2(z) << "\n"
429           << "   non-weighted: " << md.standard_error2(z)
430           << std::endl;
431  }
432  if (!suite.equal(md.prediction_error2(z), mdw.prediction_error2(z,1.0)) ){
433    suite.add(false);
434    suite.err() << "Error: prediction_error2 not equal\n" 
435           << "   weighted: " << mdw.prediction_error2(z,1.0) << "\n"
436           << "   non-weighted: " << md.prediction_error2(z)
437           << std::endl;
438  }
439
440  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0.2;
441  mdw.fit(data,y,w);
442  double predict = mdw.predict(z);
443  double prediction_error2 = mdw.prediction_error2(z, 1.0);
444  double s2 = mdw.s2(1.0);
445  double standard_error2 = mdw.standard_error2(z);
446
447  w*=2;
448  mdw.fit(data,y,w);
449  if (!suite.equal(mdw.predict(z), predict, 10000) ){
450    suite.add(false);
451    suite.err() << "Error: predict not equal after rescaling.\n";
452    suite.err() << "   predict = " << predict
453                << " and after doubling weights.\n";
454    suite.err() << "   predict = " << mdw.predict(z) << "\n";
455  }
456  if (!suite.equal_sqrt(mdw.prediction_error2(z,2), prediction_error2,10) ){ 
457    suite.add(false);
458    suite.err() << "Error: prediction_error2 not equal after rescaling.\n";
459    suite.err() << "       predict_error2 = " << prediction_error2
460           << " and after doubling weights.\n";
461    suite.err() << "       predict_error2 = " 
462                << mdw.prediction_error2(z,2) << "\n";
463  }
464  if (!suite.equal(mdw.s2(2), s2, 1000) ){
465    suite.add(false);
466    suite.err() << "Error: s2 not equal after rescaling.\n";
467    suite.err() << "       s2 = " << s2 << " and after doubling weights.\n";
468    suite.err() << "       s2 = " << mdw.s2(2) << "\n";
469  }
470  if (!suite.equal_sqrt(mdw.standard_error2(z), standard_error2, 100) ){
471    suite.add(false);
472    suite.err() << "Error: standard_error2 not equal after rescaling.\n";
473    suite.err() << " standard_error2 = " << standard_error2
474           << " and after doubling weights.\n";
475    suite.err() << " standard_error2 = " << mdw.standard_error2(z) << "\n";
476  }
477}
478
479
480bool Local_test(regression::OneDimensionalWeighted& r, 
481                regression::Kernel& k, test::Suite& suite)
482{
483  regression::Local rl(r,k);
484  for (size_t i=0; i<1000; i++){
485    rl.add(i, 10);
486  }
487
488  rl.fit(10, 100);
489  if (rl.x().size()!=1000/10)
490    return false;
491  for (size_t i=0; i+1<rl.x().size(); ++i)
492    if (!suite.equal(rl.x()(i+1)-rl.x()(i),10.0))
493      return false;
494
495  utility::Vector y(rl.y_predicted());
496  for (size_t i=0; i<y.size(); i++) 
497    if (!suite.equal(y(i),10.0)){
498      return false;
499    }
500
501  return true;
502}
Note: See TracBrowser for help on using the repository browser.