source: branches/0.8-stable/test/regression.cc @ 2811

Last change on this file since 2811 was 2811, checked in by Jari Häkkinen, 10 years ago

Resolving precision issues on my macbook pro.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 16.9 KB
Line 
1// $Id: regression.cc 2811 2012-08-16 14:06:05Z jari $
2
3/*
4  Copyright (C) 2005, 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2009 Peter Johansson
6
7  This file is part of the yat library, http://dev.thep.lu.se/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 3 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with yat. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include "Suite.h"
24
25#include "yat/regression/KernelBox.h"
26#include "yat/regression/Linear.h"
27#include "yat/regression/LinearWeighted.h"
28#include "yat/regression/Local.h"
29#include "yat/regression/Naive.h"
30#include "yat/regression/NaiveWeighted.h"
31#include "yat/regression/Polynomial.h"
32#include "yat/regression/PolynomialWeighted.h"
33#include "yat/utility/Matrix.h"
34#include "yat/utility/Vector.h"
35
36#include <cmath>
37
38#include <fstream>
39#include <iostream>
40
41
42using namespace theplu::yat;
43
44void equal(regression::OneDimensional&, regression::OneDimensionalWeighted&, 
45           test::Suite&); 
46
47void multidim(test::Suite& suite);
48
49void unity_weights(regression::OneDimensional&, 
50                   regression::OneDimensionalWeighted&, 
51                   const utility::Vector&, const utility::Vector&,
52                   test::Suite&); 
53
54void rescale_weights(regression::OneDimensionalWeighted&, 
55                     const utility::Vector&, const utility::Vector&,
56                     test::Suite&); 
57
58void zero_weights(regression::OneDimensionalWeighted&, 
59                  const utility::Vector&, const utility::Vector&,
60                  test::Suite&); 
61
62
63bool Local_test(regression::OneDimensionalWeighted&, 
64                regression::Kernel&, test::Suite&);
65
66int main(int argc, char* argv[])
67{
68  test::Suite suite(argc, argv);
69
70  suite.err() << "  testing regression" << std::endl;
71
72  // test data for Linear and Naive (Weighted and non-weighted)
73  utility::Vector x(4); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000;
74  utility::Vector y(4); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;
75  utility::Vector w(4); w(0)=0.1;  w(1)=0.2;  w(2)=0.3;  w(3)=0.4;
76
77  // Comparing linear and polynomial(1)
78  regression::Linear linear;
79  linear.fit(x,y);
80  regression::Polynomial polynomial(1);
81  polynomial.fit(x,y);
82  if ( !suite.equal(linear.beta(),polynomial.fit_parameters()(1), 1000) ){
83    suite.err() << "error: beta and fit_parameters(1) not equal" << std::endl;
84    suite.err() << "       beta = " << linear.beta() << std::endl;
85    suite.err() << "       fit_parameters(1) = " 
86           << polynomial.fit_parameters()(1) << std::endl;
87    suite.add(false);
88  }
89  if (!suite.equal(polynomial.fit_parameters()(0),
90                   linear.alpha()-linear.beta()*1985, 10000)){
91    suite.err() << "error: fit_parameters(0) = " 
92           << polynomial.fit_parameters()(0)<< std::endl;
93    suite.err() << "error: alpha-beta*m_x = " 
94           << linear.alpha()-linear.beta()*1985 << std::endl;
95    suite.add(false);
96  }
97  if ( !suite.equal(polynomial.chisq(), linear.chisq(), 100) ){
98    suite.err() << "error: chisq not same in linear and polynomial(1)" 
99           << std::endl;
100    suite.add(false);
101  }
102  if ( !suite.equal(polynomial.predict(1.0),linear.predict(1.0), 1000) ){
103    suite.err() << "error: predict not same in linear and polynomial(1)" 
104           << std::endl;
105    suite.add(false);
106  }
107  if (!suite.equal(polynomial.standard_error2(1985),
108                   linear.standard_error2(1985), 100000)){
109    suite.err() << "error: standard_error not same in linear and polynomial(1)" 
110                << "\n  polynomial: " << polynomial.standard_error2(1985)
111                << "\n  linear: " << linear.standard_error2(1985)
112                << std::endl;
113    suite.add(false);
114  }
115
116  suite.err() << "  testing regression::LinearWeighted" << std::endl;
117  regression::LinearWeighted linear_w;
118  equal(linear, linear_w, suite);
119  linear_w.fit(x,y,w);
120  double y_predicted = linear_w.predict(1990);
121  double y_predicted_err = linear_w.prediction_error2(1990);
122  if (!suite.equal(y_predicted,12.8) ){
123    suite.err() << "error: cannot reproduce fit." << std::endl;
124    suite.err() << "predicted value: " << y_predicted << " expected 12.8" 
125           << std::endl;
126    suite.add(false);
127  }
128
129  // Comparing LinearWeighted and PolynomialWeighted(1)
130  suite.err() << "    comparing LinearWeighted and PolynomialWeighted(1)"
131              << std::endl;
132  linear_w.fit(x,y,w);
133  regression::PolynomialWeighted polynomial_w(1);
134  polynomial_w.fit(x,y,w);
135  if ( !suite.equal(linear_w.beta(), polynomial_w.fit_parameters()(1),10000) ){
136    suite.err() << "error: beta and fit_parameters(1) not equal" << std::endl;
137    suite.err() << "       beta = " << linear_w.beta() << std::endl;
138    suite.err() << "       fit_parameters(1) = " 
139           << polynomial_w.fit_parameters()(1) << std::endl;
140    suite.add(false);
141  }
142  if ( !suite.equal(polynomial_w.fit_parameters()(0),
143                    linear_w.alpha()-linear_w.beta()*1990, 10000) ){
144    suite.err() << "error: fit_parameters(0) = " 
145           << polynomial.fit_parameters()(0)<< std::endl;
146    suite.err() << "error: alpha-beta*m_x = " 
147           << linear.alpha()-linear.beta()*1990 << std::endl;
148    suite.add(false);
149  }
150  if ( !suite.equal(polynomial_w.s2(),linear_w.s2(), 10) ){
151    suite.err() << "error: chisq not same in linear and polynomial(1)" 
152           << std::endl;
153    suite.add(false);
154  }
155  if ( !suite.equal(polynomial_w.predict(1.0), linear_w.predict(1.0), 10000) ){
156    suite.err() << "error: predict not same in linear and polynomial(1)" 
157           << std::endl;
158    suite.add(false);
159  }
160  if ( !suite.equal(polynomial_w.standard_error2(1985), 
161                    linear_w.standard_error2(1985), 100000) ){
162    suite.err() << "error: standard_error not same in linear and polynomial(1)" 
163           << "\n  polynomial: " << polynomial_w.standard_error2(1985)
164           << "\n  linear: " << linear_w.standard_error2(1985)
165           << std::endl;
166    suite.add(false);
167  }
168
169  // testing regression::NaiveWeighted
170  suite.err() << "  testing regression::NaiveWeighted" << std::endl;
171  regression::NaiveWeighted naive_w;
172  regression::Naive naive;
173  equal(naive, naive_w, suite);
174  naive_w.fit(x,y,w);
175
176  y_predicted=naive_w.predict(0.0);
177  y_predicted_err=naive_w.prediction_error2(0.0);
178  if (!suite.equal(y_predicted,0.1*12+0.2*11+0.3*14+0.4*13)) {
179    suite.err() << "regression_NaiveWeighted: cannot reproduce fit.\n";
180    suite.err() << "returned value: " << y_predicted << std::endl;
181    suite.err() << "expected: " << 0.1*12+0.2*11+0.3*14+0.4*13 << std::endl;
182    suite.add(false);
183  }
184
185  // testing regression::Local
186  suite.err() << "  testing regression::Local" << std::endl;
187  regression::KernelBox kb;
188  regression::LinearWeighted rl;
189  if (!Local_test(rl,kb, suite)) {
190    suite.err() << "regression_Local: Linear cannot reproduce fit." << std::endl;
191    suite.add(false);
192  }
193  regression::NaiveWeighted rn;
194  if (!Local_test(rn,kb, suite)) {
195    suite.err() << "regression_Local: Naive cannot reproduce fit." << std::endl;
196    suite.add(false);
197  }
198
199  // testing regression::Polynomial
200  suite.err() << "  testing regression::Polynomial" << std::endl;
201  {
202    std::ifstream s(test::filename("data/regression_gauss.data").c_str());
203    utility::Matrix data(s);
204    utility::Vector x(data.rows());
205    utility::Vector ln_y(data.rows());
206    for (size_t i=0; i<data.rows(); ++i) {
207      x(i)=data(i,0);
208      ln_y(i)=log(data(i,1));
209    }
210
211    regression::Polynomial polynomialfit(2);
212    polynomialfit.fit(x,ln_y);
213    utility::Vector fit=polynomialfit.fit_parameters();
214    suite.add(suite.equal_fix(fit(0), 1.012229646706, 1e-11));
215    suite.add(suite.equal_fix(fit(1), 0.012561322528, 1e-11));
216    suite.add(suite.equal_fix(fit(2), -1.159674470130, 1e-11));
217  }
218
219  suite.err() << "  testing regression::PolynomialWeighted" << std::endl;
220  regression::PolynomialWeighted pol_weighted(2);
221  regression::Polynomial polynomial2(2);
222  equal(polynomial2, pol_weighted, suite);
223
224  multidim(suite);
225
226  return suite.return_value();
227}
228
229
230void equal(regression::OneDimensional& r, 
231           regression::OneDimensionalWeighted& wr, 
232           test::Suite& suite)
233{
234  utility::Vector x(5); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000; x(4)=2010;
235  utility::Vector y(5); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;   y(4)=15;
236
237  unity_weights(r, wr, x, y, suite);
238  rescale_weights(wr, x, y, suite);
239  zero_weights(wr, x, y, suite);
240}
241
242
243void unity_weights(regression::OneDimensional& r, 
244                   regression::OneDimensionalWeighted& rw, 
245                   const utility::Vector& x, const utility::Vector& y,
246                   test::Suite& suite)
247{
248  suite.err() << "    testing unity weights equal to non-weighted version.\n"; 
249  utility::Vector w(x.size(), 1.0);
250  r.fit(x,y);
251  rw.fit(x,y,w);
252  if (!suite.equal(r.predict(2000), rw.predict(2000)) ) {
253    suite.add(false);
254    suite.err() << "Error: predict not equal\n" 
255           << "   weighted: " << rw.predict(2000) << "\n"
256           << "   non-weighted: " << r.predict(2000)
257           << std::endl;
258  }
259  if (!suite.equal(r.s2(), rw.s2(1.0)) ){
260    suite.add(false);
261    suite.err() << "Error: s2 not equal non-weighted version." << std::endl;
262    suite.err() << "weighted s2 = " << rw.s2(1.0) << std::endl;
263    suite.err() << "non-weighted s2 = " << r.s2() << std::endl;
264  }
265  if (!suite.equal_sqrt(r.standard_error2(2000), rw.standard_error2(2000), 20)){
266    suite.add(false);
267    suite.err() << "Error: standard_error not equal non-weighted version." 
268           << std::endl;
269  }
270  if (!suite.equal(r.r2(), rw.r2()) ){
271    suite.add(false);
272    suite.err() << "Error: r2 not equal non-weighted version." << std::endl;
273    suite.err() << "weighted r2 = " << rw.r2() << std::endl;
274    suite.err() << "non-weighted r2 = " << r.r2() << std::endl;
275  }
276  if (!suite.equal_sqrt(r.prediction_error2(2000), rw.prediction_error2(2000,1),
277                        100) ){
278    suite.add(false);
279    suite.err() << "Error: prediction_error2 not equal non-weighted version.\n" 
280           << "       weighted: " << rw.prediction_error2(2000,1) << "\n"
281           << "   non-weighted: " << r.prediction_error2(2000)
282           << std::endl;
283  }
284} 
285
286
287void rescale_weights(regression::OneDimensionalWeighted& wr, 
288                     const utility::Vector& x, const utility::Vector& y,
289                     test::Suite& suite)
290{
291  suite.err() << "    testing rescaling weights.\n"; 
292  utility::Vector w(5);  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0.2;
293  wr.fit(x,y,w);
294  double predict = wr.predict(2000);
295  double prediction_error2 = wr.prediction_error2(2000);
296  double r2 = wr.r2();
297  double s2 = wr.s2();
298  double standard_error2 = wr.standard_error2(2000);
299
300  w*=2;
301  wr.fit(x,y,w);
302  if (!suite.equal(wr.predict(2000), predict, 10000) ){
303    suite.add(false);
304    suite.err() << "Error: predict not equal after rescaling.\n";
305    suite.err() << "       predict = " << predict
306           << " and after doubling weights.\n";
307    suite.err() << "       predict = " << wr.predict(2000) << "\n";
308  }
309  if (!suite.equal(wr.s2(2), s2, 14000) ){
310    suite.add(false);
311    suite.err() << "Error: s2 not equal after rescaling.\n";
312    suite.err() << "       s2 = " << s2 << " and after doubling weights.\n";
313    suite.err() << "       s2 = " << wr.s2(2) << "\n";
314    suite.err() << "difference " << s2-wr.s2(2.0) << std::endl;
315  }
316  if (!suite.equal_sqrt(wr.standard_error2(2000), standard_error2, 100) ){
317    suite.add(false);
318    suite.err() << "Error: standard_error2 not equal after rescaling.\n";
319    suite.err() << "       standard_error2 = " << standard_error2
320           << " and after doubling weights.\n";
321    suite.err() << "       standard_error2 = " 
322           << wr.standard_error2(2000) << "\n";
323    suite.err() << " difference: " << wr.standard_error2(2000)-standard_error2
324           << std::endl;
325  }
326  if (!suite.equal(wr.r2(), r2, 10000) ){
327    suite.add(false);
328    suite.err() << "Error: r2 not equal after rescaling.\n";
329  }
330  if (!suite.equal_sqrt(wr.prediction_error2(2000,2), prediction_error2, 10) ){
331    suite.add(false);
332    suite.err() << "Error: prediction_error2 not equal after rescaling.\n";
333    suite.err() << "       prediction_error2 = " << prediction_error2
334           << " and after doubling weights.\n";
335    suite.err() << "       prediction_error2 = " 
336           << wr.prediction_error2(2000,2) << "\n";
337  }
338}
339
340
341void zero_weights(regression::OneDimensionalWeighted& wr, 
342                  const utility::Vector& x, const utility::Vector& y,
343                  test::Suite& suite)
344{
345  suite.err() << "    testing zero weights equal to missing value.\n"; 
346  utility::Vector w(5);  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0;
347  wr.fit(x,y,w);
348  double predict = wr.predict(2000);
349  double prediction_error2 = wr.prediction_error2(2000);
350  double r2 = wr.r2();
351  double s2 = wr.s2();
352  double standard_error2 = wr.standard_error2(2000);
353
354  utility::Vector x2(4);
355  utility::Vector y2(4);
356  utility::Vector w2(4);
357  for (size_t i=0; i<4; ++i){
358    x2(i) = x(i);
359    y2(i) = y(i);
360    w2(i) = w(i);
361  }
362
363  wr.fit(x2,y2,w2);
364  if (!suite.equal(wr.predict(2000), predict, 10000) ) {
365    suite.add(false);
366    suite.err() << "Error: predict not equal.\n";
367    suite.err() << "       weighted predict: " << wr.predict(2000) << "\n";
368    suite.err() << "       unweighted predict: " << predict << "\n";
369    suite.err() << "       difference: " << wr.predict(2000)-predict << "\n";
370
371  }
372  if (!suite.equal(wr.prediction_error2(2000), prediction_error2) ) {
373    suite.add(false);
374    suite.err() << "Error: prediction_error2 not equal.\n";
375  }
376  if (!suite.equal(wr.r2(), r2) ) {
377    suite.add(false);
378    suite.err() << "Error: r2 not equal.\n";
379    suite.err() << "   r2: " << r2 << "\n";
380    suite.err() << "   r2: " << wr.r2() << "\n";
381  }
382  if (!suite.equal(wr.s2(), s2) ) {
383    suite.add(false);
384    suite.err() << "Error: s2 not equal.\n";
385  }
386  if (!suite.equal(wr.standard_error2(2000), standard_error2) ) {
387    suite.add(false);
388    suite.err() << "Error: standard_error2 not equal.\n";
389  }
390}
391
392
393void multidim(test::Suite& suite)
394{
395  suite.err() << "  testing regression::MultiDimensionalWeighted" << std::endl;
396  utility::Vector x(5); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000; x(4)=2010;
397  utility::Vector y(5); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;   y(4)=15;
398  utility::Vector w(5,1.0);
399 
400  utility::Matrix data(5,3);
401  for (size_t i=0; i<data.rows(); ++i){
402    data(i,0)=1;
403    data(i,1)=x(i);
404    data(i,2)=x(i)*x(i);
405  }
406  regression::MultiDimensional md;
407  md.fit(data,y);
408  regression::MultiDimensionalWeighted mdw;
409  mdw.fit(data,y,w);
410  utility::Vector z(3,1);
411  z(1)=2000;
412  z(2)=2000*2000;
413  if (!suite.equal(md.predict(z), mdw.predict(z))){
414    suite.add(false);
415    suite.err() << "Error: predict not equal\n" 
416           << "   weighted: " << mdw.predict(z) << "\n"
417           << "   non-weighted: " << md.predict(z)
418           << std::endl;
419  }
420
421  if (!suite.equal_sqrt(md.standard_error2(z), mdw.standard_error2(z),20) ){
422    suite.add(false);
423    suite.err() << "Error: standard_error2 not equal\n" 
424           << "   weighted: " << mdw.standard_error2(z) << "\n"
425           << "   non-weighted: " << md.standard_error2(z)
426           << std::endl;
427  }
428  if (!suite.equal_sqrt(md.prediction_error2(z), mdw.prediction_error2(z,1.0),
429                        20) ){
430    suite.add(false);
431    suite.err() << "Error: prediction_error2 not equal\n" 
432           << "   weighted: " << mdw.prediction_error2(z,1.0) << "\n"
433           << "   non-weighted: " << md.prediction_error2(z)
434           << std::endl;
435  }
436
437  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0.2;
438  mdw.fit(data,y,w);
439  double predict = mdw.predict(z);
440  double prediction_error2 = mdw.prediction_error2(z, 1.0);
441  double s2 = mdw.s2(1.0);
442  double standard_error2 = mdw.standard_error2(z);
443
444  w*=2;
445  mdw.fit(data,y,w);
446  if (!suite.equal(mdw.predict(z), predict, 10000) ){
447    suite.add(false);
448    suite.err() << "Error: predict not equal after rescaling.\n";
449    suite.err() << "   predict = " << predict
450                << " and after doubling weights.\n";
451    suite.err() << "   predict = " << mdw.predict(z) << "\n";
452  }
453  if (!suite.equal_sqrt(mdw.prediction_error2(z,2), prediction_error2,10) ){ 
454    suite.add(false);
455    suite.err() << "Error: prediction_error2 not equal after rescaling.\n";
456    suite.err() << "       predict_error2 = " << prediction_error2
457           << " and after doubling weights.\n";
458    suite.err() << "       predict_error2 = " 
459                << mdw.prediction_error2(z,2) << "\n";
460  }
461  if (!suite.equal(mdw.s2(2), s2, 14000) ){
462    suite.add(false);
463    suite.err() << "Error: s2 not equal after rescaling.\n";
464    suite.err() << "       s2 = " << s2 << " and after doubling weights.\n";
465    suite.err() << "       s2 = " << mdw.s2(2) << "\n";
466  }
467  if (!suite.equal_sqrt(mdw.standard_error2(z), standard_error2, 100) ){
468    suite.add(false);
469    suite.err() << "Error: standard_error2 not equal after rescaling.\n";
470    suite.err() << " standard_error2 = " << standard_error2
471           << " and after doubling weights.\n";
472    suite.err() << " standard_error2 = " << mdw.standard_error2(z) << "\n";
473  }
474}
475
476
477bool Local_test(regression::OneDimensionalWeighted& r, 
478                regression::Kernel& k, test::Suite& suite)
479{
480  regression::Local rl(r,k);
481  for (size_t i=0; i<1000; i++){
482    rl.add(i, 10);
483  }
484
485  rl.fit(10, 100);
486  if (rl.x().size()!=1000/10)
487    return false;
488  for (size_t i=0; i+1<rl.x().size(); ++i)
489    if (!suite.equal(rl.x()(i+1)-rl.x()(i),10.0))
490      return false;
491
492  utility::Vector y(rl.y_predicted());
493  for (size_t i=0; i<y.size(); i++) 
494    if (!suite.equal(y(i),10.0)){
495      return false;
496    }
497
498  return true;
499}
Note: See TracBrowser for help on using the repository browser.