source: branches/0.8-stable/test/regression.cc @ 2813

Last change on this file since 2813 was 2813, checked in by Peter, 10 years ago

update copyright years

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 16.9 KB
Line 
1// $Id: regression.cc 2813 2012-08-16 23:18:37Z peter $
2
3/*
4  Copyright (C) 2005, 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2009 Peter Johansson
6  Copyright (C) 2012 Jari Häkkinen
7
8  This file is part of the yat library, http://dev.thep.lu.se/yat
9
10  The yat library is free software; you can redistribute it and/or
11  modify it under the terms of the GNU General Public License as
12  published by the Free Software Foundation; either version 3 of the
13  License, or (at your option) any later version.
14
15  The yat library is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  General Public License for more details.
19
20  You should have received a copy of the GNU General Public License
21  along with yat. If not, see <http://www.gnu.org/licenses/>.
22*/
23
24#include "Suite.h"
25
26#include "yat/regression/KernelBox.h"
27#include "yat/regression/Linear.h"
28#include "yat/regression/LinearWeighted.h"
29#include "yat/regression/Local.h"
30#include "yat/regression/Naive.h"
31#include "yat/regression/NaiveWeighted.h"
32#include "yat/regression/Polynomial.h"
33#include "yat/regression/PolynomialWeighted.h"
34#include "yat/utility/Matrix.h"
35#include "yat/utility/Vector.h"
36
37#include <cmath>
38
39#include <fstream>
40#include <iostream>
41
42
43using namespace theplu::yat;
44
45void equal(regression::OneDimensional&, regression::OneDimensionalWeighted&, 
46           test::Suite&); 
47
48void multidim(test::Suite& suite);
49
50void unity_weights(regression::OneDimensional&, 
51                   regression::OneDimensionalWeighted&, 
52                   const utility::Vector&, const utility::Vector&,
53                   test::Suite&); 
54
55void rescale_weights(regression::OneDimensionalWeighted&, 
56                     const utility::Vector&, const utility::Vector&,
57                     test::Suite&); 
58
59void zero_weights(regression::OneDimensionalWeighted&, 
60                  const utility::Vector&, const utility::Vector&,
61                  test::Suite&); 
62
63
64bool Local_test(regression::OneDimensionalWeighted&, 
65                regression::Kernel&, test::Suite&);
66
67int main(int argc, char* argv[])
68{
69  test::Suite suite(argc, argv);
70
71  suite.err() << "  testing regression" << std::endl;
72
73  // test data for Linear and Naive (Weighted and non-weighted)
74  utility::Vector x(4); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000;
75  utility::Vector y(4); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;
76  utility::Vector w(4); w(0)=0.1;  w(1)=0.2;  w(2)=0.3;  w(3)=0.4;
77
78  // Comparing linear and polynomial(1)
79  regression::Linear linear;
80  linear.fit(x,y);
81  regression::Polynomial polynomial(1);
82  polynomial.fit(x,y);
83  if ( !suite.equal(linear.beta(),polynomial.fit_parameters()(1), 1000) ){
84    suite.err() << "error: beta and fit_parameters(1) not equal" << std::endl;
85    suite.err() << "       beta = " << linear.beta() << std::endl;
86    suite.err() << "       fit_parameters(1) = " 
87           << polynomial.fit_parameters()(1) << std::endl;
88    suite.add(false);
89  }
90  if (!suite.equal(polynomial.fit_parameters()(0),
91                   linear.alpha()-linear.beta()*1985, 10000)){
92    suite.err() << "error: fit_parameters(0) = " 
93           << polynomial.fit_parameters()(0)<< std::endl;
94    suite.err() << "error: alpha-beta*m_x = " 
95           << linear.alpha()-linear.beta()*1985 << std::endl;
96    suite.add(false);
97  }
98  if ( !suite.equal(polynomial.chisq(), linear.chisq(), 100) ){
99    suite.err() << "error: chisq not same in linear and polynomial(1)" 
100           << std::endl;
101    suite.add(false);
102  }
103  if ( !suite.equal(polynomial.predict(1.0),linear.predict(1.0), 1000) ){
104    suite.err() << "error: predict not same in linear and polynomial(1)" 
105           << std::endl;
106    suite.add(false);
107  }
108  if (!suite.equal(polynomial.standard_error2(1985),
109                   linear.standard_error2(1985), 100000)){
110    suite.err() << "error: standard_error not same in linear and polynomial(1)" 
111                << "\n  polynomial: " << polynomial.standard_error2(1985)
112                << "\n  linear: " << linear.standard_error2(1985)
113                << std::endl;
114    suite.add(false);
115  }
116
117  suite.err() << "  testing regression::LinearWeighted" << std::endl;
118  regression::LinearWeighted linear_w;
119  equal(linear, linear_w, suite);
120  linear_w.fit(x,y,w);
121  double y_predicted = linear_w.predict(1990);
122  double y_predicted_err = linear_w.prediction_error2(1990);
123  if (!suite.equal(y_predicted,12.8) ){
124    suite.err() << "error: cannot reproduce fit." << std::endl;
125    suite.err() << "predicted value: " << y_predicted << " expected 12.8" 
126           << std::endl;
127    suite.add(false);
128  }
129
130  // Comparing LinearWeighted and PolynomialWeighted(1)
131  suite.err() << "    comparing LinearWeighted and PolynomialWeighted(1)"
132              << std::endl;
133  linear_w.fit(x,y,w);
134  regression::PolynomialWeighted polynomial_w(1);
135  polynomial_w.fit(x,y,w);
136  if ( !suite.equal(linear_w.beta(), polynomial_w.fit_parameters()(1),10000) ){
137    suite.err() << "error: beta and fit_parameters(1) not equal" << std::endl;
138    suite.err() << "       beta = " << linear_w.beta() << std::endl;
139    suite.err() << "       fit_parameters(1) = " 
140           << polynomial_w.fit_parameters()(1) << std::endl;
141    suite.add(false);
142  }
143  if ( !suite.equal(polynomial_w.fit_parameters()(0),
144                    linear_w.alpha()-linear_w.beta()*1990, 10000) ){
145    suite.err() << "error: fit_parameters(0) = " 
146           << polynomial.fit_parameters()(0)<< std::endl;
147    suite.err() << "error: alpha-beta*m_x = " 
148           << linear.alpha()-linear.beta()*1990 << std::endl;
149    suite.add(false);
150  }
151  if ( !suite.equal(polynomial_w.s2(),linear_w.s2(), 10) ){
152    suite.err() << "error: chisq not same in linear and polynomial(1)" 
153           << std::endl;
154    suite.add(false);
155  }
156  if ( !suite.equal(polynomial_w.predict(1.0), linear_w.predict(1.0), 10000) ){
157    suite.err() << "error: predict not same in linear and polynomial(1)" 
158           << std::endl;
159    suite.add(false);
160  }
161  if ( !suite.equal(polynomial_w.standard_error2(1985), 
162                    linear_w.standard_error2(1985), 100000) ){
163    suite.err() << "error: standard_error not same in linear and polynomial(1)" 
164           << "\n  polynomial: " << polynomial_w.standard_error2(1985)
165           << "\n  linear: " << linear_w.standard_error2(1985)
166           << std::endl;
167    suite.add(false);
168  }
169
170  // testing regression::NaiveWeighted
171  suite.err() << "  testing regression::NaiveWeighted" << std::endl;
172  regression::NaiveWeighted naive_w;
173  regression::Naive naive;
174  equal(naive, naive_w, suite);
175  naive_w.fit(x,y,w);
176
177  y_predicted=naive_w.predict(0.0);
178  y_predicted_err=naive_w.prediction_error2(0.0);
179  if (!suite.equal(y_predicted,0.1*12+0.2*11+0.3*14+0.4*13)) {
180    suite.err() << "regression_NaiveWeighted: cannot reproduce fit.\n";
181    suite.err() << "returned value: " << y_predicted << std::endl;
182    suite.err() << "expected: " << 0.1*12+0.2*11+0.3*14+0.4*13 << std::endl;
183    suite.add(false);
184  }
185
186  // testing regression::Local
187  suite.err() << "  testing regression::Local" << std::endl;
188  regression::KernelBox kb;
189  regression::LinearWeighted rl;
190  if (!Local_test(rl,kb, suite)) {
191    suite.err() << "regression_Local: Linear cannot reproduce fit." << std::endl;
192    suite.add(false);
193  }
194  regression::NaiveWeighted rn;
195  if (!Local_test(rn,kb, suite)) {
196    suite.err() << "regression_Local: Naive cannot reproduce fit." << std::endl;
197    suite.add(false);
198  }
199
200  // testing regression::Polynomial
201  suite.err() << "  testing regression::Polynomial" << std::endl;
202  {
203    std::ifstream s(test::filename("data/regression_gauss.data").c_str());
204    utility::Matrix data(s);
205    utility::Vector x(data.rows());
206    utility::Vector ln_y(data.rows());
207    for (size_t i=0; i<data.rows(); ++i) {
208      x(i)=data(i,0);
209      ln_y(i)=log(data(i,1));
210    }
211
212    regression::Polynomial polynomialfit(2);
213    polynomialfit.fit(x,ln_y);
214    utility::Vector fit=polynomialfit.fit_parameters();
215    suite.add(suite.equal_fix(fit(0), 1.012229646706, 1e-11));
216    suite.add(suite.equal_fix(fit(1), 0.012561322528, 1e-11));
217    suite.add(suite.equal_fix(fit(2), -1.159674470130, 1e-11));
218  }
219
220  suite.err() << "  testing regression::PolynomialWeighted" << std::endl;
221  regression::PolynomialWeighted pol_weighted(2);
222  regression::Polynomial polynomial2(2);
223  equal(polynomial2, pol_weighted, suite);
224
225  multidim(suite);
226
227  return suite.return_value();
228}
229
230
231void equal(regression::OneDimensional& r, 
232           regression::OneDimensionalWeighted& wr, 
233           test::Suite& suite)
234{
235  utility::Vector x(5); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000; x(4)=2010;
236  utility::Vector y(5); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;   y(4)=15;
237
238  unity_weights(r, wr, x, y, suite);
239  rescale_weights(wr, x, y, suite);
240  zero_weights(wr, x, y, suite);
241}
242
243
244void unity_weights(regression::OneDimensional& r, 
245                   regression::OneDimensionalWeighted& rw, 
246                   const utility::Vector& x, const utility::Vector& y,
247                   test::Suite& suite)
248{
249  suite.err() << "    testing unity weights equal to non-weighted version.\n"; 
250  utility::Vector w(x.size(), 1.0);
251  r.fit(x,y);
252  rw.fit(x,y,w);
253  if (!suite.equal(r.predict(2000), rw.predict(2000)) ) {
254    suite.add(false);
255    suite.err() << "Error: predict not equal\n" 
256           << "   weighted: " << rw.predict(2000) << "\n"
257           << "   non-weighted: " << r.predict(2000)
258           << std::endl;
259  }
260  if (!suite.equal(r.s2(), rw.s2(1.0)) ){
261    suite.add(false);
262    suite.err() << "Error: s2 not equal non-weighted version." << std::endl;
263    suite.err() << "weighted s2 = " << rw.s2(1.0) << std::endl;
264    suite.err() << "non-weighted s2 = " << r.s2() << std::endl;
265  }
266  if (!suite.equal_sqrt(r.standard_error2(2000), rw.standard_error2(2000), 20)){
267    suite.add(false);
268    suite.err() << "Error: standard_error not equal non-weighted version." 
269           << std::endl;
270  }
271  if (!suite.equal(r.r2(), rw.r2()) ){
272    suite.add(false);
273    suite.err() << "Error: r2 not equal non-weighted version." << std::endl;
274    suite.err() << "weighted r2 = " << rw.r2() << std::endl;
275    suite.err() << "non-weighted r2 = " << r.r2() << std::endl;
276  }
277  if (!suite.equal_sqrt(r.prediction_error2(2000), rw.prediction_error2(2000,1),
278                        100) ){
279    suite.add(false);
280    suite.err() << "Error: prediction_error2 not equal non-weighted version.\n" 
281           << "       weighted: " << rw.prediction_error2(2000,1) << "\n"
282           << "   non-weighted: " << r.prediction_error2(2000)
283           << std::endl;
284  }
285} 
286
287
288void rescale_weights(regression::OneDimensionalWeighted& wr, 
289                     const utility::Vector& x, const utility::Vector& y,
290                     test::Suite& suite)
291{
292  suite.err() << "    testing rescaling weights.\n"; 
293  utility::Vector w(5);  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0.2;
294  wr.fit(x,y,w);
295  double predict = wr.predict(2000);
296  double prediction_error2 = wr.prediction_error2(2000);
297  double r2 = wr.r2();
298  double s2 = wr.s2();
299  double standard_error2 = wr.standard_error2(2000);
300
301  w*=2;
302  wr.fit(x,y,w);
303  if (!suite.equal(wr.predict(2000), predict, 10000) ){
304    suite.add(false);
305    suite.err() << "Error: predict not equal after rescaling.\n";
306    suite.err() << "       predict = " << predict
307           << " and after doubling weights.\n";
308    suite.err() << "       predict = " << wr.predict(2000) << "\n";
309  }
310  if (!suite.equal(wr.s2(2), s2, 14000) ){
311    suite.add(false);
312    suite.err() << "Error: s2 not equal after rescaling.\n";
313    suite.err() << "       s2 = " << s2 << " and after doubling weights.\n";
314    suite.err() << "       s2 = " << wr.s2(2) << "\n";
315    suite.err() << "difference " << s2-wr.s2(2.0) << std::endl;
316  }
317  if (!suite.equal_sqrt(wr.standard_error2(2000), standard_error2, 100) ){
318    suite.add(false);
319    suite.err() << "Error: standard_error2 not equal after rescaling.\n";
320    suite.err() << "       standard_error2 = " << standard_error2
321           << " and after doubling weights.\n";
322    suite.err() << "       standard_error2 = " 
323           << wr.standard_error2(2000) << "\n";
324    suite.err() << " difference: " << wr.standard_error2(2000)-standard_error2
325           << std::endl;
326  }
327  if (!suite.equal(wr.r2(), r2, 10000) ){
328    suite.add(false);
329    suite.err() << "Error: r2 not equal after rescaling.\n";
330  }
331  if (!suite.equal_sqrt(wr.prediction_error2(2000,2), prediction_error2, 10) ){
332    suite.add(false);
333    suite.err() << "Error: prediction_error2 not equal after rescaling.\n";
334    suite.err() << "       prediction_error2 = " << prediction_error2
335           << " and after doubling weights.\n";
336    suite.err() << "       prediction_error2 = " 
337           << wr.prediction_error2(2000,2) << "\n";
338  }
339}
340
341
342void zero_weights(regression::OneDimensionalWeighted& wr, 
343                  const utility::Vector& x, const utility::Vector& y,
344                  test::Suite& suite)
345{
346  suite.err() << "    testing zero weights equal to missing value.\n"; 
347  utility::Vector w(5);  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0;
348  wr.fit(x,y,w);
349  double predict = wr.predict(2000);
350  double prediction_error2 = wr.prediction_error2(2000);
351  double r2 = wr.r2();
352  double s2 = wr.s2();
353  double standard_error2 = wr.standard_error2(2000);
354
355  utility::Vector x2(4);
356  utility::Vector y2(4);
357  utility::Vector w2(4);
358  for (size_t i=0; i<4; ++i){
359    x2(i) = x(i);
360    y2(i) = y(i);
361    w2(i) = w(i);
362  }
363
364  wr.fit(x2,y2,w2);
365  if (!suite.equal(wr.predict(2000), predict, 10000) ) {
366    suite.add(false);
367    suite.err() << "Error: predict not equal.\n";
368    suite.err() << "       weighted predict: " << wr.predict(2000) << "\n";
369    suite.err() << "       unweighted predict: " << predict << "\n";
370    suite.err() << "       difference: " << wr.predict(2000)-predict << "\n";
371
372  }
373  if (!suite.equal(wr.prediction_error2(2000), prediction_error2) ) {
374    suite.add(false);
375    suite.err() << "Error: prediction_error2 not equal.\n";
376  }
377  if (!suite.equal(wr.r2(), r2) ) {
378    suite.add(false);
379    suite.err() << "Error: r2 not equal.\n";
380    suite.err() << "   r2: " << r2 << "\n";
381    suite.err() << "   r2: " << wr.r2() << "\n";
382  }
383  if (!suite.equal(wr.s2(), s2) ) {
384    suite.add(false);
385    suite.err() << "Error: s2 not equal.\n";
386  }
387  if (!suite.equal(wr.standard_error2(2000), standard_error2) ) {
388    suite.add(false);
389    suite.err() << "Error: standard_error2 not equal.\n";
390  }
391}
392
393
394void multidim(test::Suite& suite)
395{
396  suite.err() << "  testing regression::MultiDimensionalWeighted" << std::endl;
397  utility::Vector x(5); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000; x(4)=2010;
398  utility::Vector y(5); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;   y(4)=15;
399  utility::Vector w(5,1.0);
400 
401  utility::Matrix data(5,3);
402  for (size_t i=0; i<data.rows(); ++i){
403    data(i,0)=1;
404    data(i,1)=x(i);
405    data(i,2)=x(i)*x(i);
406  }
407  regression::MultiDimensional md;
408  md.fit(data,y);
409  regression::MultiDimensionalWeighted mdw;
410  mdw.fit(data,y,w);
411  utility::Vector z(3,1);
412  z(1)=2000;
413  z(2)=2000*2000;
414  if (!suite.equal(md.predict(z), mdw.predict(z))){
415    suite.add(false);
416    suite.err() << "Error: predict not equal\n" 
417           << "   weighted: " << mdw.predict(z) << "\n"
418           << "   non-weighted: " << md.predict(z)
419           << std::endl;
420  }
421
422  if (!suite.equal_sqrt(md.standard_error2(z), mdw.standard_error2(z),20) ){
423    suite.add(false);
424    suite.err() << "Error: standard_error2 not equal\n" 
425           << "   weighted: " << mdw.standard_error2(z) << "\n"
426           << "   non-weighted: " << md.standard_error2(z)
427           << std::endl;
428  }
429  if (!suite.equal_sqrt(md.prediction_error2(z), mdw.prediction_error2(z,1.0),
430                        20) ){
431    suite.add(false);
432    suite.err() << "Error: prediction_error2 not equal\n" 
433           << "   weighted: " << mdw.prediction_error2(z,1.0) << "\n"
434           << "   non-weighted: " << md.prediction_error2(z)
435           << std::endl;
436  }
437
438  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0.2;
439  mdw.fit(data,y,w);
440  double predict = mdw.predict(z);
441  double prediction_error2 = mdw.prediction_error2(z, 1.0);
442  double s2 = mdw.s2(1.0);
443  double standard_error2 = mdw.standard_error2(z);
444
445  w*=2;
446  mdw.fit(data,y,w);
447  if (!suite.equal(mdw.predict(z), predict, 10000) ){
448    suite.add(false);
449    suite.err() << "Error: predict not equal after rescaling.\n";
450    suite.err() << "   predict = " << predict
451                << " and after doubling weights.\n";
452    suite.err() << "   predict = " << mdw.predict(z) << "\n";
453  }
454  if (!suite.equal_sqrt(mdw.prediction_error2(z,2), prediction_error2,10) ){ 
455    suite.add(false);
456    suite.err() << "Error: prediction_error2 not equal after rescaling.\n";
457    suite.err() << "       predict_error2 = " << prediction_error2
458           << " and after doubling weights.\n";
459    suite.err() << "       predict_error2 = " 
460                << mdw.prediction_error2(z,2) << "\n";
461  }
462  if (!suite.equal(mdw.s2(2), s2, 14000) ){
463    suite.add(false);
464    suite.err() << "Error: s2 not equal after rescaling.\n";
465    suite.err() << "       s2 = " << s2 << " and after doubling weights.\n";
466    suite.err() << "       s2 = " << mdw.s2(2) << "\n";
467  }
468  if (!suite.equal_sqrt(mdw.standard_error2(z), standard_error2, 100) ){
469    suite.add(false);
470    suite.err() << "Error: standard_error2 not equal after rescaling.\n";
471    suite.err() << " standard_error2 = " << standard_error2
472           << " and after doubling weights.\n";
473    suite.err() << " standard_error2 = " << mdw.standard_error2(z) << "\n";
474  }
475}
476
477
478bool Local_test(regression::OneDimensionalWeighted& r, 
479                regression::Kernel& k, test::Suite& suite)
480{
481  regression::Local rl(r,k);
482  for (size_t i=0; i<1000; i++){
483    rl.add(i, 10);
484  }
485
486  rl.fit(10, 100);
487  if (rl.x().size()!=1000/10)
488    return false;
489  for (size_t i=0; i+1<rl.x().size(); ++i)
490    if (!suite.equal(rl.x()(i+1)-rl.x()(i),10.0))
491      return false;
492
493  utility::Vector y(rl.y_predicted());
494  for (size_t i=0; i<y.size(); i++) 
495    if (!suite.equal(y(i),10.0)){
496      return false;
497    }
498
499  return true;
500}
Note: See TracBrowser for help on using the repository browser.