source: trunk/test/regression_test.cc @ 741

Last change on this file since 741 was 741, checked in by Peter, 16 years ago

fixes #161 and #164

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 14.5 KB
Line 
1// $Id: regression_test.cc 741 2007-01-13 14:41:40Z peter $
2
3/*
4  Copyright (C) The authors contributing to this file.
5
6  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 2 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with this program; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21  02111-1307, USA.
22*/
23
24#include "yat/regression/KernelBox.h"
25#include "yat/regression/Linear.h"
26#include "yat/regression/LinearWeighted.h"
27#include "yat/regression/Local.h"
28#include "yat/regression/Naive.h"
29#include "yat/regression/NaiveWeighted.h"
30#include "yat/regression/Polynomial.h"
31#include "yat/regression/PolynomialWeighted.h"
32#include "yat/utility/matrix.h"
33#include "yat/utility/vector.h"
34
35#include <cmath>
36
37#include <fstream>
38#include <iostream>
39
40
41using namespace theplu::yat;
42
43bool equal(regression::OneDimensional&, regression::OneDimensionalWeighted&, 
44           std::ostream*); 
45
46bool multidim(std::ostream* error);
47
48bool unity_weights(regression::OneDimensional&, 
49                   regression::OneDimensionalWeighted&, 
50                   const utility::vector&, const utility::vector&,
51                   std::ostream*); 
52
53bool rescale_weights(regression::OneDimensionalWeighted&, 
54                     const utility::vector&, const utility::vector&,
55                     std::ostream*); 
56
57bool zero_weights(regression::OneDimensionalWeighted&, 
58                  const utility::vector&, const utility::vector&,
59                  std::ostream*); 
60
61
62bool Local_test(regression::OneDimensionalWeighted&, 
63                regression::Kernel&);
64
65int main(const int argc,const char* argv[])
66{
67  std::ostream* error;
68  if (argc>1 && argv[1]==std::string("-v"))
69    error = &std::cerr;
70  else {
71    error = new std::ofstream("/dev/null");
72    if (argc>1)
73      std::cout << "regression_test -v : for printing extra information\n";
74  }
75  *error << "  testing regression" << std::endl;
76  bool ok = true;
77
78  // test data for Linear and Naive (Weighted and non-weighted)
79  utility::vector x(4); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000;
80  utility::vector y(4); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;
81  utility::vector w(4); w(0)=0.1;  w(1)=0.2;  w(2)=0.3;  w(3)=0.4;
82
83  // Comparing linear and polynomial(1)
84  regression::Linear linear;
85  linear.fit(x,y);
86  regression::Polynomial polynomial(1);
87  polynomial.fit(x,y);
88  if ( fabs(linear.beta()-polynomial.fit_parameters()(1))>0.0001 ){
89    *error << "error: beta and fit_parameters(1) not equal" << std::endl;
90    *error << "       beta = " << linear.beta() << std::endl;
91    *error << "       fit_parameters(1) = " 
92           << polynomial.fit_parameters()(1) << std::endl;
93    ok = false;
94  }
95  if ( fabs(polynomial.fit_parameters()(0)-linear.alpha()+
96            linear.beta()*1985)>0.0001){
97    *error << "error: fit_parameters(0) = " 
98           << polynomial.fit_parameters()(0)<< std::endl;
99    *error << "error: alpha-beta*m_x = " 
100           << linear.alpha()-linear.beta()*1985 << std::endl;
101    ok = false;
102  }
103  if ( fabs(polynomial.chisq()-linear.chisq())>0.0001){
104    *error << "error: chisq not same in linear and polynomial(1)" 
105           << std::endl;
106    ok = false;
107  }
108  if ( fabs(polynomial.predict(1.0)-linear.predict(1.0))>0.0001){
109    *error << "error: predict not same in linear and polynomial(1)" 
110           << std::endl;
111    ok = false;
112  }
113  if ( fabs(polynomial.standard_error2(1985)-linear.standard_error2(1985))
114       >0.0001){
115    *error << "error: standard_error not same in linear and polynomial(1)" 
116           << "\n  polynomial: " << polynomial.standard_error2(1985)
117           << "\n  linear: " << linear.standard_error2(1985)
118           << std::endl;
119    ok = false;
120  }
121
122  *error << "  testing regression::LinearWeighted" << std::endl;
123  regression::LinearWeighted linear_w;
124  ok = equal(linear, linear_w, error) && ok;
125  linear_w.fit(x,y,w);
126  double y_predicted = linear_w.predict(1990);
127  double y_predicted_err = linear_w.prediction_error2(1990);
128  if (fabs(y_predicted-12.8)>0.001){
129    *error << "error: cannot reproduce fit." << std::endl;
130    *error << "predicted value: " << y_predicted << " expected 12.8" 
131           << std::endl;
132    ok=false;
133  }
134
135  // testing regression::NaiveWeighted
136  *error << "  testing regression::NaiveWeighted" << std::endl;
137  regression::NaiveWeighted naive_w;
138  regression::Naive naive;
139  ok = equal(naive, naive_w, error) && ok;
140  naive_w.fit(x,y,w);
141
142  y_predicted=naive_w.predict(0.0);
143  y_predicted_err=naive_w.prediction_error2(0.0);
144  if (y_predicted!=(0.1*12+0.2*11+0.3*14+0.4*13)) {
145    *error << "regression_NaiveWeighted: cannot reproduce fit." << std::endl;
146    *error << "returned value: " << y_predicted << std::endl;
147    *error << "expected: " << 0.1*12+0.2*11+0.3*14+0.4*13 << std::endl;
148    ok=false;
149  }
150
151  // testing regression::Local
152  *error << "  testing regression::Local" << std::endl;
153  regression::KernelBox kb;
154  regression::LinearWeighted rl;
155  if (!Local_test(rl,kb)) {
156    *error << "regression_Local: Linear cannot reproduce fit." << std::endl;
157    ok=false;
158  }
159  regression::NaiveWeighted rn;
160  if (!Local_test(rn,kb)) {
161    *error << "regression_Local: Naive cannot reproduce fit." << std::endl;
162    ok=false;
163  }
164
165  // testing regression::Polynomial
166  *error << "  testing regression::Polynomial" << std::endl;
167  {
168    std::ifstream s("data/regression_gauss.data");
169    utility::matrix data(s);
170    utility::vector x(data.rows());
171    utility::vector ln_y(data.rows());
172    for (size_t i=0; i<data.rows(); ++i) {
173      x(i)=data(i,0);
174      ln_y(i)=log(data(i,1));
175    }
176
177    regression::Polynomial polynomialfit(2);
178    polynomialfit.fit(x,ln_y);
179    utility::vector fit=polynomialfit.fit_parameters();
180    if (fabs(fit[0]-1.012229646706 + fit[1]-0.012561322528 +
181             fit[2]+1.159674470130)>1e-11) {  // Jari, fix number!
182      *error << "regression_Polynomial: cannot reproduce fit." << std::endl;
183      ok=false;
184    }
185  }
186
187  *error << "  testing regression::PolynomialWeighted" << std::endl;
188  regression::PolynomialWeighted pol_weighted(2);
189  regression::Polynomial polynomial2(2);
190  ok = equal(polynomial2, pol_weighted, error) && ok;
191
192  ok = multidim(error) && ok;
193
194  if (!ok)
195    *error << "Test failed\n" << std::endl;
196
197  if (error!=&std::cerr)
198    delete error;
199
200  return (ok ? 0 : -1);
201}
202
203
204bool equal(regression::OneDimensional& r, 
205           regression::OneDimensionalWeighted& wr, 
206           std::ostream* error)
207{
208  bool ok=true;
209  utility::vector x(5); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000; x(4)=2010;
210  utility::vector y(5); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;   y(4)=15;
211
212  ok = unity_weights(r, wr, x, y, error) && ok;
213  ok = rescale_weights(wr, x, y, error) && ok;
214  ok = zero_weights(wr, x, y, error) && ok;
215  return ok;
216}
217
218
219bool unity_weights(regression::OneDimensional& r, 
220                   regression::OneDimensionalWeighted& rw, 
221                   const utility::vector& x, const utility::vector& y,
222                   std::ostream* error)
223{
224  *error << "    testing unity weights equal to non-weighted version.\n"; 
225  bool ok=true;
226  utility::vector w(x.size(), 1.0);
227  r.fit(x,y);
228  rw.fit(x,y,w);
229  if (r.predict(2000) != rw.predict(2000)){
230    ok = false;
231    *error << "Error: predict not equal\n" 
232           << "   weighted: " << rw.predict(2000) << "\n"
233           << "   non-weighted: " << r.predict(2000)
234           << std::endl;
235  }
236  if (fabs(r.s2()-rw.s2(1.0))>10E-7){
237    ok = false;
238    *error << "Error: s2 not equal non-weighted version." << std::endl;
239    *error << "weighted s2 = " << rw.s2(1.0) << std::endl;
240    *error << "non-weighted s2 = " << r.s2() << std::endl;
241  }
242  if (fabs(r.standard_error2(2000)-rw.standard_error2(2000))>10e-7){
243    ok = false;
244    *error << "Error: standard_error not equal non-weighted version." 
245           << std::endl;
246  }
247  if (fabs(r.r2()-rw.r2())>10E-7){
248    ok = false;
249    *error << "Error: r2 not equal non-weighted version." << std::endl;
250    *error << "weighted r2 = " << rw.r2() << std::endl;
251    *error << "non-weighted r2 = " << r.r2() << std::endl;
252  }
253  if (fabs(r.prediction_error2(2000)-rw.prediction_error2(2000,1))>10e-7){
254    ok = false;
255    *error << "Error: prediction_error2 not equal non-weighted version.\n" 
256           << "       weighted: " << rw.prediction_error2(2000,1) << "\n"
257           << "   non-weighted: " << r.prediction_error2(2000)
258           << std::endl;
259  }
260  return ok;
261} 
262
263
264bool rescale_weights(regression::OneDimensionalWeighted& wr, 
265                     const utility::vector& x, const utility::vector& y,
266                     std::ostream* error)
267{
268  *error << "    testing rescaling weights.\n"; 
269  bool ok = true;
270  utility::vector w(5);  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0.2;
271  wr.fit(x,y,w);
272  double predict = wr.predict(2000);
273  double prediction_error2 = wr.prediction_error2(2000);
274  double r2 = wr.r2();
275  double s2 = wr.s2();
276  double standard_error2 = wr.standard_error2(2000);
277
278  w.scale(2);
279  wr.fit(x,y,w);
280  if (fabs(wr.predict(2000)-predict)>10e-11){
281    ok = false;
282    *error << "Error: predict not equal after rescaling.\n";
283    *error << "       predict = " << predict
284           << " and after doubling weights.\n";
285    *error << "       predict = " << wr.predict(2000) << "\n";
286  }
287  if (fabs(wr.s2(2)-s2)>10e-11){
288    ok = false;
289    *error << "Error: s2 not equal after rescaling.\n";
290    *error << "       s2 = " << s2 << " and after doubling weights.\n";
291    *error << "       s2 = " << wr.s2(2) << "\n";
292    *error << "difference " << s2-wr.s2(2.0) << std::endl;
293  }
294  if (fabs(wr.standard_error2(2000)-standard_error2)>10e-6){
295    ok = false;
296    *error << "Error: standard_error2 not equal after rescaling.\n";
297    *error << "       standard_error2 = " << standard_error2
298           << " and after doubling weights.\n";
299    *error << "       standard_error2 = " 
300           << wr.standard_error2(2000) << "\n";
301    *error << " difference: " << wr.standard_error2(2000)-standard_error2
302           << std::endl;
303  }
304  if (fabs(wr.r2()-r2)>10e-6){
305    ok = false;
306    *error << "Error: r2 not equal after rescaling.\n";
307  }
308  if (fabs(wr.prediction_error2(2000,2)-prediction_error2)>10e-6){
309    ok = false;
310    *error << "Error: prediction_error2 not equal after rescaling.\n";
311    *error << "       prediction_error2 = " << prediction_error2
312           << " and after doubling weights.\n";
313    *error << "       prediction_error2 = " 
314           << wr.prediction_error2(2000,2) << "\n";
315  }
316  return ok; 
317}
318
319
320bool zero_weights(regression::OneDimensionalWeighted& wr, 
321                  const utility::vector& x, const utility::vector& y,
322                  std::ostream* error)
323{
324  *error << "    testing zero weights equal to missing value.\n"; 
325  bool ok = true;
326  utility::vector w(5);  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0;
327  wr.fit(x,y,w);
328  double predict = wr.predict(2000);
329  double prediction_error2 = wr.prediction_error2(2000);
330  double r2 = wr.r2();
331  double s2 = wr.s2();
332  double standard_error2 = wr.standard_error2(2000);
333
334  utility::vector x2(4);
335  utility::vector y2(4);
336  utility::vector w2(4);
337  for (size_t i=0; i<4; ++i){
338    x2(i) = x(i);
339    y2(i) = y(i);
340    w2(i) = w(i);
341  }
342
343  wr.fit(x2,y2,w2);
344  if (wr.predict(2000) != predict){
345    ok = false;
346    *error << "Error: predict not equal.\n";
347  }
348  if (wr.prediction_error2(2000) != prediction_error2){
349    ok = false;
350    *error << "Error: prediction_error2 not equal.\n";
351  }
352  if (wr.r2() != r2){
353    ok = false;
354    *error << "Error: r2 not equal.\n";
355    *error << "   r2: " << r2 << "\n";
356    *error << "   r2: " << wr.r2() << "\n";
357  }
358  if (wr.s2() != s2){
359    ok = false;
360    *error << "Error: s2 not equal.\n";
361  }
362  if (wr.standard_error2(2000) != standard_error2){
363    ok = false;
364    *error << "Error: standard_error2 not equal.\n";
365  }
366  return ok; 
367}
368
369
370bool multidim(std::ostream* error)
371{
372  bool ok = true;
373  *error << "  testing regression::MultiDimensionalWeighted" << std::endl;
374  utility::vector x(5); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000; x(4)=2010;
375  utility::vector y(5); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;   y(4)=15;
376  utility::vector w(5,1.0);
377 
378  utility::matrix data(5,3);
379  for (size_t i=0; i<data.rows(); ++i){
380    data(i,0)=1;
381    data(i,1)=x(i);
382    data(i,2)=x(i)*x(i);
383  }
384  regression::MultiDimensional md;
385  md.fit(data,y);
386  regression::MultiDimensionalWeighted mdw;
387  mdw.fit(data,y,w);
388  utility::vector z(3,1);
389  z(1)=2000;
390  z(2)=2000*2000;
391  if (md.predict(z) != mdw.predict(z)){
392    ok = false;
393    *error << "Error: predict not equal\n" 
394           << "   weighted: " << mdw.predict(z) << "\n"
395           << "   non-weighted: " << md.predict(z)
396           << std::endl;
397  }
398
399  if (md.standard_error2(z) != mdw.standard_error2(z)){
400    ok = false;
401    *error << "Error: standard_error2 not equal\n" 
402           << "   weighted: " << mdw.standard_error2(z) << "\n"
403           << "   non-weighted: " << md.standard_error2(z)
404           << std::endl;
405  }
406  if (fabs(md.prediction_error2(z)-mdw.prediction_error2(z,1.0))>10e-7){
407    ok = false;
408    *error << "Error: prediction_error2 not equal\n" 
409           << "   weighted: " << mdw.prediction_error2(z,1.0) << "\n"
410           << "   non-weighted: " << md.prediction_error2(z)
411           << std::endl;
412  }
413
414  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0.2;
415  mdw.fit(data,y,w);
416  double predict = mdw.predict(z);
417  double prediction_error2 = mdw.prediction_error2(z, 1.0);
418  double s2 = mdw.s2(1.0);
419  double standard_error2 = mdw.standard_error2(z);
420
421  w.scale(2);
422  mdw.fit(data,y,w);
423  if (fabs(mdw.predict(z)-predict)>10e-10){
424    ok = false;
425    *error << "Error: predict not equal after rescaling.\n";
426    *error << "   predict = " << predict << " and after doubling weights.\n";
427    *error << "   predict = " << mdw.predict(z) << "\n";
428  }
429  if (fabs(mdw.prediction_error2(z,2)-prediction_error2)>10e-7){ 
430    ok = false;
431    *error << "Error: prediction_error2 not equal after rescaling.\n";
432    *error << "       predict_error2 = " << prediction_error2
433           << " and after doubling weights.\n";
434    *error << "       predict_error2 = " << mdw.prediction_error2(z,2) << "\n";
435  }
436  if (fabs(mdw.s2(2)-s2)>10e-10){
437    ok = false;
438    *error << "Error: s2 not equal after rescaling.\n";
439    *error << "       s2 = " << s2 << " and after doubling weights.\n";
440    *error << "       s2 = " << mdw.s2(2) << "\n";
441  }
442  if (fabs(mdw.standard_error2(z)-standard_error2)>10e-7){
443    ok = false;
444    *error << "Error: standard_error2 not equal after rescaling.\n";
445    *error << " standard_error2 = " << standard_error2
446           << " and after doubling weights.\n";
447    *error << " standard_error2 = " << mdw.standard_error2(z) << "\n";
448  }
449
450  return ok;
451}
452
453
454bool Local_test(regression::OneDimensionalWeighted& r, 
455                regression::Kernel& k)
456{
457  regression::Local rl(r,k);
458  for (size_t i=0; i<1000; i++){
459    rl.add(i, 10);
460  }
461
462  rl.fit(10, 100);
463
464  utility::vector y = rl.y_predicted();
465  for (size_t i=0; i<y.size(); i++) 
466    if (y(i)!=10.0){
467      return false;
468    }
469  return true;
470}
Note: See TracBrowser for help on using the repository browser.