source: trunk/test/regression_test.cc @ 789

Last change on this file since 789 was 789, checked in by Jari Häkkinen, 16 years ago

Addresses #193. vector now works as outlined here. Added some
functionality. Added a clone function that facilitates resizing of
vectors. clone is needed since assignement operator functionality is
changed.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 15.9 KB
Line 
1// $Id: regression_test.cc 789 2007-03-10 20:07:13Z jari $
2
3/*
4  Copyright (C) The authors contributing to this file.
5
6  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 2 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with this program; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21  02111-1307, USA.
22*/
23
24#include "yat/regression/KernelBox.h"
25#include "yat/regression/Linear.h"
26#include "yat/regression/LinearWeighted.h"
27#include "yat/regression/Local.h"
28#include "yat/regression/Naive.h"
29#include "yat/regression/NaiveWeighted.h"
30#include "yat/regression/Polynomial.h"
31#include "yat/regression/PolynomialWeighted.h"
32#include "yat/utility/matrix.h"
33#include "yat/utility/vector.h"
34
35#include <cmath>
36
37#include <fstream>
38#include <iostream>
39
40
41using namespace theplu::yat;
42
43bool equal(regression::OneDimensional&, regression::OneDimensionalWeighted&, 
44           std::ostream*); 
45
46bool multidim(std::ostream* error);
47
48bool unity_weights(regression::OneDimensional&, 
49                   regression::OneDimensionalWeighted&, 
50                   const utility::vector&, const utility::vector&,
51                   std::ostream*); 
52
53bool rescale_weights(regression::OneDimensionalWeighted&, 
54                     const utility::vector&, const utility::vector&,
55                     std::ostream*); 
56
57bool zero_weights(regression::OneDimensionalWeighted&, 
58                  const utility::vector&, const utility::vector&,
59                  std::ostream*); 
60
61
62bool Local_test(regression::OneDimensionalWeighted&, 
63                regression::Kernel&);
64
65int main(const int argc,const char* argv[])
66{
67  std::ostream* error;
68  if (argc>1 && argv[1]==std::string("-v"))
69    error = &std::cerr;
70  else {
71    error = new std::ofstream("/dev/null");
72    if (argc>1)
73      std::cout << "regression_test -v : for printing extra information\n";
74  }
75  *error << "  testing regression" << std::endl;
76  bool ok = true;
77
78  // test data for Linear and Naive (Weighted and non-weighted)
79  utility::vector x(4); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000;
80  utility::vector y(4); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;
81  utility::vector w(4); w(0)=0.1;  w(1)=0.2;  w(2)=0.3;  w(3)=0.4;
82
83  // Comparing linear and polynomial(1)
84  regression::Linear linear;
85  linear.fit(x,y);
86  regression::Polynomial polynomial(1);
87  polynomial.fit(x,y);
88  if ( fabs(linear.beta()-polynomial.fit_parameters()(1))>0.0001 ){
89    *error << "error: beta and fit_parameters(1) not equal" << std::endl;
90    *error << "       beta = " << linear.beta() << std::endl;
91    *error << "       fit_parameters(1) = " 
92           << polynomial.fit_parameters()(1) << std::endl;
93    ok = false;
94  }
95  if ( fabs(polynomial.fit_parameters()(0)-linear.alpha()+
96            linear.beta()*1985)>0.0001){
97    *error << "error: fit_parameters(0) = " 
98           << polynomial.fit_parameters()(0)<< std::endl;
99    *error << "error: alpha-beta*m_x = " 
100           << linear.alpha()-linear.beta()*1985 << std::endl;
101    ok = false;
102  }
103  if ( fabs(polynomial.chisq()-linear.chisq())>0.0001){
104    *error << "error: chisq not same in linear and polynomial(1)" 
105           << std::endl;
106    ok = false;
107  }
108  if ( fabs(polynomial.predict(1.0)-linear.predict(1.0))>0.0001){
109    *error << "error: predict not same in linear and polynomial(1)" 
110           << std::endl;
111    ok = false;
112  }
113  if ( fabs(polynomial.standard_error2(1985)-linear.standard_error2(1985))
114       >0.0001){
115    *error << "error: standard_error not same in linear and polynomial(1)" 
116           << "\n  polynomial: " << polynomial.standard_error2(1985)
117           << "\n  linear: " << linear.standard_error2(1985)
118           << std::endl;
119    ok = false;
120  }
121
122  *error << "  testing regression::LinearWeighted" << std::endl;
123  regression::LinearWeighted linear_w;
124  ok = equal(linear, linear_w, error) && ok;
125  linear_w.fit(x,y,w);
126  double y_predicted = linear_w.predict(1990);
127  double y_predicted_err = linear_w.prediction_error2(1990);
128  if (fabs(y_predicted-12.8)>0.001){
129    *error << "error: cannot reproduce fit." << std::endl;
130    *error << "predicted value: " << y_predicted << " expected 12.8" 
131           << std::endl;
132    ok=false;
133  }
134
135  // Comparing LinearWeighted and PolynomialWeighted(1)
136  *error << "    comparing LinearWeighted and PolynomialWeighted(1)"
137         << std::endl;
138  linear_w.fit(x,y,w);
139  regression::PolynomialWeighted polynomial_w(1);
140  polynomial_w.fit(x,y,w);
141  if ( fabs(linear_w.beta()-polynomial_w.fit_parameters()(1))>10e-7 ){
142    *error << "error: beta and fit_parameters(1) not equal" << std::endl;
143    *error << "       beta = " << linear_w.beta() << std::endl;
144    *error << "       fit_parameters(1) = " 
145           << polynomial_w.fit_parameters()(1) << std::endl;
146    ok = false;
147  }
148  if ( fabs(polynomial_w.fit_parameters()(0)-linear_w.alpha()+
149            linear_w.beta()*1990)>0.0001){
150    *error << "error: fit_parameters(0) = " 
151           << polynomial.fit_parameters()(0)<< std::endl;
152    *error << "error: alpha-beta*m_x = " 
153           << linear.alpha()-linear.beta()*1990 << std::endl;
154    ok = false;
155  }
156  if ( fabs(polynomial_w.s2()-linear_w.s2())>0.0001){
157    *error << "error: chisq not same in linear and polynomial(1)" 
158           << std::endl;
159    ok = false;
160  }
161  if ( fabs(polynomial_w.predict(1.0)-linear_w.predict(1.0))>0.0001){
162    *error << "error: predict not same in linear and polynomial(1)" 
163           << std::endl;
164    ok = false;
165  }
166  if ( fabs(polynomial_w.standard_error2(1985)-linear_w.standard_error2(1985))
167       >0.0001){
168    *error << "error: standard_error not same in linear and polynomial(1)" 
169           << "\n  polynomial: " << polynomial.standard_error2(1985)
170           << "\n  linear: " << linear.standard_error2(1985)
171           << std::endl;
172    ok = false;
173  }
174
175  // testing regression::NaiveWeighted
176  *error << "  testing regression::NaiveWeighted" << std::endl;
177  regression::NaiveWeighted naive_w;
178  regression::Naive naive;
179  ok = equal(naive, naive_w, error) && ok;
180  naive_w.fit(x,y,w);
181
182  y_predicted=naive_w.predict(0.0);
183  y_predicted_err=naive_w.prediction_error2(0.0);
184  if (y_predicted!=(0.1*12+0.2*11+0.3*14+0.4*13)) {
185    *error << "regression_NaiveWeighted: cannot reproduce fit." << std::endl;
186    *error << "returned value: " << y_predicted << std::endl;
187    *error << "expected: " << 0.1*12+0.2*11+0.3*14+0.4*13 << std::endl;
188    ok=false;
189  }
190
191  // testing regression::Local
192  *error << "  testing regression::Local" << std::endl;
193  regression::KernelBox kb;
194  regression::LinearWeighted rl;
195  if (!Local_test(rl,kb)) {
196    *error << "regression_Local: Linear cannot reproduce fit." << std::endl;
197    ok=false;
198  }
199  regression::NaiveWeighted rn;
200  if (!Local_test(rn,kb)) {
201    *error << "regression_Local: Naive cannot reproduce fit." << std::endl;
202    ok=false;
203  }
204
205  // testing regression::Polynomial
206  *error << "  testing regression::Polynomial" << std::endl;
207  {
208    std::ifstream s("data/regression_gauss.data");
209    utility::matrix data(s);
210    utility::vector x(data.rows());
211    utility::vector ln_y(data.rows());
212    for (size_t i=0; i<data.rows(); ++i) {
213      x(i)=data(i,0);
214      ln_y(i)=log(data(i,1));
215    }
216
217    regression::Polynomial polynomialfit(2);
218    polynomialfit.fit(x,ln_y);
219    utility::vector fit=polynomialfit.fit_parameters();
220    if (fabs(fit[0]-1.012229646706 + fit[1]-0.012561322528 +
221             fit[2]+1.159674470130)>1e-11) {
222      *error << "regression_Polynomial: cannot reproduce fit." << std::endl;
223      ok=false;
224    }
225  }
226
227  *error << "  testing regression::PolynomialWeighted" << std::endl;
228  regression::PolynomialWeighted pol_weighted(2);
229  regression::Polynomial polynomial2(2);
230  ok = equal(polynomial2, pol_weighted, error) && ok;
231
232  ok = multidim(error) && ok;
233
234  if (!ok)
235    *error << "Test failed\n" << std::endl;
236
237  if (error!=&std::cerr)
238    delete error;
239
240  return (ok ? 0 : -1);
241}
242
243
244bool equal(regression::OneDimensional& r, 
245           regression::OneDimensionalWeighted& wr, 
246           std::ostream* error)
247{
248  bool ok=true;
249  utility::vector x(5); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000; x(4)=2010;
250  utility::vector y(5); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;   y(4)=15;
251
252  ok = unity_weights(r, wr, x, y, error) && ok;
253  ok = rescale_weights(wr, x, y, error) && ok;
254  ok = zero_weights(wr, x, y, error) && ok;
255  return ok;
256}
257
258
259bool unity_weights(regression::OneDimensional& r, 
260                   regression::OneDimensionalWeighted& rw, 
261                   const utility::vector& x, const utility::vector& y,
262                   std::ostream* error)
263{
264  *error << "    testing unity weights equal to non-weighted version.\n"; 
265  bool ok=true;
266  utility::vector w(x.size(), 1.0);
267  r.fit(x,y);
268  rw.fit(x,y,w);
269  if (r.predict(2000) != rw.predict(2000)){
270    ok = false;
271    *error << "Error: predict not equal\n" 
272           << "   weighted: " << rw.predict(2000) << "\n"
273           << "   non-weighted: " << r.predict(2000)
274           << std::endl;
275  }
276  if (fabs(r.s2()-rw.s2(1.0))>10E-7){
277    ok = false;
278    *error << "Error: s2 not equal non-weighted version." << std::endl;
279    *error << "weighted s2 = " << rw.s2(1.0) << std::endl;
280    *error << "non-weighted s2 = " << r.s2() << std::endl;
281  }
282  if (fabs(r.standard_error2(2000)-rw.standard_error2(2000))>10e-7){
283    ok = false;
284    *error << "Error: standard_error not equal non-weighted version." 
285           << std::endl;
286  }
287  if (fabs(r.r2()-rw.r2())>10E-7){
288    ok = false;
289    *error << "Error: r2 not equal non-weighted version." << std::endl;
290    *error << "weighted r2 = " << rw.r2() << std::endl;
291    *error << "non-weighted r2 = " << r.r2() << std::endl;
292  }
293  if (fabs(r.prediction_error2(2000)-rw.prediction_error2(2000,1))>10e-7){
294    ok = false;
295    *error << "Error: prediction_error2 not equal non-weighted version.\n" 
296           << "       weighted: " << rw.prediction_error2(2000,1) << "\n"
297           << "   non-weighted: " << r.prediction_error2(2000)
298           << std::endl;
299  }
300  return ok;
301} 
302
303
304bool rescale_weights(regression::OneDimensionalWeighted& wr, 
305                     const utility::vector& x, const utility::vector& y,
306                     std::ostream* error)
307{
308  *error << "    testing rescaling weights.\n"; 
309  bool ok = true;
310  utility::vector w(5);  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0.2;
311  wr.fit(x,y,w);
312  double predict = wr.predict(2000);
313  double prediction_error2 = wr.prediction_error2(2000);
314  double r2 = wr.r2();
315  double s2 = wr.s2();
316  double standard_error2 = wr.standard_error2(2000);
317
318  w*=2;
319  wr.fit(x,y,w);
320  if (fabs(wr.predict(2000)-predict)>10e-11){
321    ok = false;
322    *error << "Error: predict not equal after rescaling.\n";
323    *error << "       predict = " << predict
324           << " and after doubling weights.\n";
325    *error << "       predict = " << wr.predict(2000) << "\n";
326  }
327  if (fabs(wr.s2(2)-s2)>10e-11){
328    ok = false;
329    *error << "Error: s2 not equal after rescaling.\n";
330    *error << "       s2 = " << s2 << " and after doubling weights.\n";
331    *error << "       s2 = " << wr.s2(2) << "\n";
332    *error << "difference " << s2-wr.s2(2.0) << std::endl;
333  }
334  if (fabs(wr.standard_error2(2000)-standard_error2)>10e-6){
335    ok = false;
336    *error << "Error: standard_error2 not equal after rescaling.\n";
337    *error << "       standard_error2 = " << standard_error2
338           << " and after doubling weights.\n";
339    *error << "       standard_error2 = " 
340           << wr.standard_error2(2000) << "\n";
341    *error << " difference: " << wr.standard_error2(2000)-standard_error2
342           << std::endl;
343  }
344  if (fabs(wr.r2()-r2)>10e-6){
345    ok = false;
346    *error << "Error: r2 not equal after rescaling.\n";
347  }
348  if (fabs(wr.prediction_error2(2000,2)-prediction_error2)>10e-6){
349    ok = false;
350    *error << "Error: prediction_error2 not equal after rescaling.\n";
351    *error << "       prediction_error2 = " << prediction_error2
352           << " and after doubling weights.\n";
353    *error << "       prediction_error2 = " 
354           << wr.prediction_error2(2000,2) << "\n";
355  }
356  return ok; 
357}
358
359
360bool zero_weights(regression::OneDimensionalWeighted& wr, 
361                  const utility::vector& x, const utility::vector& y,
362                  std::ostream* error)
363{
364  *error << "    testing zero weights equal to missing value.\n"; 
365  bool ok = true;
366  utility::vector w(5);  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0;
367  wr.fit(x,y,w);
368  double predict = wr.predict(2000);
369  double prediction_error2 = wr.prediction_error2(2000);
370  double r2 = wr.r2();
371  double s2 = wr.s2();
372  double standard_error2 = wr.standard_error2(2000);
373
374  utility::vector x2(4);
375  utility::vector y2(4);
376  utility::vector w2(4);
377  for (size_t i=0; i<4; ++i){
378    x2(i) = x(i);
379    y2(i) = y(i);
380    w2(i) = w(i);
381  }
382
383  wr.fit(x2,y2,w2);
384  if (wr.predict(2000) != predict){
385    ok = false;
386    *error << "Error: predict not equal.\n";
387  }
388  if (wr.prediction_error2(2000) != prediction_error2){
389    ok = false;
390    *error << "Error: prediction_error2 not equal.\n";
391  }
392  if (wr.r2() != r2){
393    ok = false;
394    *error << "Error: r2 not equal.\n";
395    *error << "   r2: " << r2 << "\n";
396    *error << "   r2: " << wr.r2() << "\n";
397  }
398  if (wr.s2() != s2){
399    ok = false;
400    *error << "Error: s2 not equal.\n";
401  }
402  if (wr.standard_error2(2000) != standard_error2){
403    ok = false;
404    *error << "Error: standard_error2 not equal.\n";
405  }
406  return ok; 
407}
408
409
410bool multidim(std::ostream* error)
411{
412  bool ok = true;
413  *error << "  testing regression::MultiDimensionalWeighted" << std::endl;
414  utility::vector x(5); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000; x(4)=2010;
415  utility::vector y(5); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;   y(4)=15;
416  utility::vector w(5,1.0);
417 
418  utility::matrix data(5,3);
419  for (size_t i=0; i<data.rows(); ++i){
420    data(i,0)=1;
421    data(i,1)=x(i);
422    data(i,2)=x(i)*x(i);
423  }
424  regression::MultiDimensional md;
425  md.fit(data,y);
426  regression::MultiDimensionalWeighted mdw;
427  mdw.fit(data,y,w);
428  utility::vector z(3,1);
429  z(1)=2000;
430  z(2)=2000*2000;
431  if (md.predict(z) != mdw.predict(z)){
432    ok = false;
433    *error << "Error: predict not equal\n" 
434           << "   weighted: " << mdw.predict(z) << "\n"
435           << "   non-weighted: " << md.predict(z)
436           << std::endl;
437  }
438
439  if (md.standard_error2(z) != mdw.standard_error2(z)){
440    ok = false;
441    *error << "Error: standard_error2 not equal\n" 
442           << "   weighted: " << mdw.standard_error2(z) << "\n"
443           << "   non-weighted: " << md.standard_error2(z)
444           << std::endl;
445  }
446  if (fabs(md.prediction_error2(z)-mdw.prediction_error2(z,1.0))>10e-7){
447    ok = false;
448    *error << "Error: prediction_error2 not equal\n" 
449           << "   weighted: " << mdw.prediction_error2(z,1.0) << "\n"
450           << "   non-weighted: " << md.prediction_error2(z)
451           << std::endl;
452  }
453
454  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0.2;
455  mdw.fit(data,y,w);
456  double predict = mdw.predict(z);
457  double prediction_error2 = mdw.prediction_error2(z, 1.0);
458  double s2 = mdw.s2(1.0);
459  double standard_error2 = mdw.standard_error2(z);
460
461  w*=2;
462  mdw.fit(data,y,w);
463  if (fabs(mdw.predict(z)-predict)>10e-10){
464    ok = false;
465    *error << "Error: predict not equal after rescaling.\n";
466    *error << "   predict = " << predict << " and after doubling weights.\n";
467    *error << "   predict = " << mdw.predict(z) << "\n";
468  }
469  if (fabs(mdw.prediction_error2(z,2)-prediction_error2)>10e-7){ 
470    ok = false;
471    *error << "Error: prediction_error2 not equal after rescaling.\n";
472    *error << "       predict_error2 = " << prediction_error2
473           << " and after doubling weights.\n";
474    *error << "       predict_error2 = " << mdw.prediction_error2(z,2) << "\n";
475  }
476  if (fabs(mdw.s2(2)-s2)>10e-10){
477    ok = false;
478    *error << "Error: s2 not equal after rescaling.\n";
479    *error << "       s2 = " << s2 << " and after doubling weights.\n";
480    *error << "       s2 = " << mdw.s2(2) << "\n";
481  }
482  if (fabs(mdw.standard_error2(z)-standard_error2)>10e-7){
483    ok = false;
484    *error << "Error: standard_error2 not equal after rescaling.\n";
485    *error << " standard_error2 = " << standard_error2
486           << " and after doubling weights.\n";
487    *error << " standard_error2 = " << mdw.standard_error2(z) << "\n";
488  }
489
490  return ok;
491}
492
493
494bool Local_test(regression::OneDimensionalWeighted& r, 
495                regression::Kernel& k)
496{
497  regression::Local rl(r,k);
498  for (size_t i=0; i<1000; i++){
499    rl.add(i, 10);
500  }
501
502  rl.fit(10, 100);
503
504  utility::vector y(rl.y_predicted());
505  for (size_t i=0; i<y.size(); i++) 
506    if (y(i)!=10.0){
507      return false;
508    }
509  return true;
510}
Note: See TracBrowser for help on using the repository browser.