source: trunk/test/regression.cc @ 3814

Last change on this file since 3814 was 3814, checked in by Peter, 3 years ago

decrease stringency in test, to avoid spurious test failure on osx
with clang v9.

reported by jari

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 17.2 KB
Line 
1// $Id: regression.cc 3814 2019-07-10 23:56:55Z peter $
2
3/*
4  Copyright (C) 2005, 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
5  Copyright (C) 2009 Peter Johansson
6  Copyright (C) 2012 Jari Häkkinen, Peter Johansson
7  Copyright (C) 2013 Peter Johansson
8  Copyright (C) 2016 Jari Häkkinen
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include <config.h>
27
28#include "Suite.h"
29
30#include "yat/regression/KernelBox.h"
31#include "yat/regression/Linear.h"
32#include "yat/regression/LinearWeighted.h"
33#include "yat/regression/Local.h"
34#include "yat/regression/Naive.h"
35#include "yat/regression/NaiveWeighted.h"
36#include "yat/regression/Polynomial.h"
37#include "yat/regression/PolynomialWeighted.h"
38#include "yat/utility/Matrix.h"
39#include "yat/utility/Vector.h"
40
41#include <cmath>
42
43#include <fstream>
44#include <iostream>
45
46
47using namespace theplu::yat;
48
49void equal(regression::OneDimensional&, regression::OneDimensionalWeighted&, 
50           test::Suite&); 
51
52void multidim(test::Suite& suite);
53
54void unity_weights(regression::OneDimensional&, 
55                   regression::OneDimensionalWeighted&, 
56                   const utility::Vector&, const utility::Vector&,
57                   test::Suite&); 
58
59void rescale_weights(regression::OneDimensionalWeighted&, 
60                     const utility::Vector&, const utility::Vector&,
61                     test::Suite&); 
62
63void zero_weights(regression::OneDimensionalWeighted&, 
64                  const utility::Vector&, const utility::Vector&,
65                  test::Suite&); 
66
67
68bool Local_test(regression::OneDimensionalWeighted&, 
69                regression::Kernel&, test::Suite&);
70
71int main(int argc, char* argv[])
72{
73  test::Suite suite(argc, argv);
74
75  suite.err() << "  testing regression" << std::endl;
76
77  // test data for Linear and Naive (Weighted and non-weighted)
78  utility::Vector x(4); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000;
79  utility::Vector y(4); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;
80  utility::Vector w(4); w(0)=0.1;  w(1)=0.2;  w(2)=0.3;  w(3)=0.4;
81
82  // Comparing linear and polynomial(1)
83  regression::Linear linear;
84  linear.fit(x,y);
85  regression::Polynomial polynomial(1);
86  polynomial.fit(x,y);
87  if ( !suite.equal(linear.beta(),polynomial.fit_parameters()(1), 1000) ){
88    suite.err() << "error: beta and fit_parameters(1) not equal" << std::endl;
89    suite.err() << "       beta = " << linear.beta() << std::endl;
90    suite.err() << "       fit_parameters(1) = " 
91           << polynomial.fit_parameters()(1) << std::endl;
92    suite.add(false);
93  }
94  if (!suite.equal(polynomial.fit_parameters()(0),
95                   linear.alpha()-linear.beta()*1985, 10000)){
96    suite.err() << "error: fit_parameters(0) = " 
97           << polynomial.fit_parameters()(0)<< std::endl;
98    suite.err() << "error: alpha-beta*m_x = " 
99           << linear.alpha()-linear.beta()*1985 << std::endl;
100    suite.add(false);
101  }
102  if ( !suite.equal(polynomial.chisq(), linear.chisq(), 100) ){
103    suite.err() << "error: chisq not same in linear and polynomial(1)" 
104           << std::endl;
105    suite.add(false);
106  }
107  if ( !suite.equal(polynomial.predict(1.0),linear.predict(1.0), 1000) ){
108    suite.err() << "error: predict not same in linear and polynomial(1)" 
109           << std::endl;
110    suite.add(false);
111  }
112  if (!suite.equal(polynomial.standard_error2(1985),
113                   linear.standard_error2(1985), 100000)){
114    suite.err() << "error: standard_error not same in linear and polynomial(1)" 
115                << "\n  polynomial: " << polynomial.standard_error2(1985)
116                << "\n  linear: " << linear.standard_error2(1985)
117                << std::endl;
118    suite.add(false);
119  }
120
121  suite.err() << "  testing regression::LinearWeighted" << std::endl;
122  regression::LinearWeighted linear_w;
123  equal(linear, linear_w, suite);
124  linear_w.fit(x,y,w);
125  double y_predicted = linear_w.predict(1990);
126  if (!suite.equal(y_predicted,12.8) ){
127    suite.err() << "error: cannot reproduce fit." << std::endl;
128    suite.err() << "predicted value: " << y_predicted << " expected 12.8" 
129           << std::endl;
130    suite.add(false);
131  }
132  double y_predicted_err = linear_w.prediction_error2(1990);
133  if (!suite.equal_sqrt(y_predicted_err,1.2) ){
134    suite.err() << "error: cannot reproduce fit." << std::endl;
135    suite.err() << "predicted error: " << y_predicted_err << " expected 1.2\n";
136    suite.add(false);
137  }
138
139  // Comparing LinearWeighted and PolynomialWeighted(1)
140  suite.err() << "    comparing LinearWeighted and PolynomialWeighted(1)"
141              << std::endl;
142  linear_w.fit(x,y,w);
143  regression::PolynomialWeighted polynomial_w(1);
144  polynomial_w.fit(x,y,w);
145  if ( !suite.equal(linear_w.beta(), polynomial_w.fit_parameters()(1),10000) ){
146    suite.err() << "error: beta and fit_parameters(1) not equal" << std::endl;
147    suite.err() << "       beta = " << linear_w.beta() << std::endl;
148    suite.err() << "       fit_parameters(1) = " 
149           << polynomial_w.fit_parameters()(1) << std::endl;
150    suite.add(false);
151  }
152  if ( !suite.equal(polynomial_w.fit_parameters()(0),
153                    linear_w.alpha()-linear_w.beta()*1990, 10000) ){
154    suite.err() << "error: fit_parameters(0) = " 
155           << polynomial.fit_parameters()(0)<< std::endl;
156    suite.err() << "error: alpha-beta*m_x = " 
157           << linear.alpha()-linear.beta()*1990 << std::endl;
158    suite.add(false);
159  }
160  if ( !suite.equal(polynomial_w.s2(),linear_w.s2(), 14) ){
161    suite.err() << "error: chisq not same in linear and polynomial(1)" 
162           << std::endl;
163    suite.add(false);
164  }
165  if ( !suite.equal(polynomial_w.predict(1.0), linear_w.predict(1.0), 10000) ){
166    suite.err() << "error: predict not same in linear and polynomial(1)" 
167           << std::endl;
168    suite.add(false);
169  }
170  if ( !suite.equal(polynomial_w.standard_error2(1985), 
171                    linear_w.standard_error2(1985), 100000) ){
172    suite.err() << "error: standard_error not same in linear and polynomial(1)" 
173           << "\n  polynomial: " << polynomial_w.standard_error2(1985)
174           << "\n  linear: " << linear_w.standard_error2(1985)
175           << std::endl;
176    suite.add(false);
177  }
178
179  // testing regression::NaiveWeighted
180  suite.err() << "  testing regression::NaiveWeighted" << std::endl;
181  regression::NaiveWeighted naive_w;
182  regression::Naive naive;
183  equal(naive, naive_w, suite);
184  naive_w.fit(x,y,w);
185
186  y_predicted=naive_w.predict(0.0);
187  y_predicted_err=naive_w.prediction_error2(0.0);
188  if (!suite.equal(y_predicted,0.1*12+0.2*11+0.3*14+0.4*13)) {
189    suite.err() << "regression_NaiveWeighted: cannot reproduce fit.\n";
190    suite.err() << "returned value: " << y_predicted << std::endl;
191    suite.err() << "expected: " << 0.1*12+0.2*11+0.3*14+0.4*13 << std::endl;
192    suite.add(false);
193  }
194
195  // testing regression::Local
196  suite.err() << "  testing regression::Local" << std::endl;
197  regression::KernelBox kb;
198  regression::LinearWeighted rl;
199  if (!Local_test(rl,kb, suite)) {
200    suite.err() << "regression_Local: Linear cannot reproduce fit." << std::endl;
201    suite.add(false);
202  }
203  regression::NaiveWeighted rn;
204  if (!Local_test(rn,kb, suite)) {
205    suite.err() << "regression_Local: Naive cannot reproduce fit." << std::endl;
206    suite.add(false);
207  }
208
209  // testing regression::Polynomial
210  suite.err() << "  testing regression::Polynomial" << std::endl;
211  {
212    std::ifstream s(test::filename("data/regression_gauss.data").c_str());
213    utility::Matrix data(s);
214    utility::Vector x(data.rows());
215    utility::Vector ln_y(data.rows());
216    for (size_t i=0; i<data.rows(); ++i) {
217      x(i)=data(i,0);
218      ln_y(i)=log(data(i,1));
219    }
220
221    regression::Polynomial polynomialfit(2);
222    polynomialfit.fit(x,ln_y);
223    utility::Vector fit=polynomialfit.fit_parameters();
224    suite.add(suite.equal_fix(fit(0), 1.012229646706, 1e-11));
225    suite.add(suite.equal_fix(fit(1), 0.012561322528, 1e-11));
226    suite.add(suite.equal_fix(fit(2), -1.159674470130, 1e-11));
227  }
228
229  suite.err() << "  testing regression::PolynomialWeighted" << std::endl;
230  regression::PolynomialWeighted pol_weighted(2);
231  regression::Polynomial polynomial2(2);
232  equal(polynomial2, pol_weighted, suite);
233
234  multidim(suite);
235
236  return suite.return_value();
237}
238
239
240void equal(regression::OneDimensional& r, 
241           regression::OneDimensionalWeighted& wr, 
242           test::Suite& suite)
243{
244  utility::Vector x(5); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000; x(4)=2010;
245  utility::Vector y(5); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;   y(4)=15;
246
247  unity_weights(r, wr, x, y, suite);
248  rescale_weights(wr, x, y, suite);
249  zero_weights(wr, x, y, suite);
250}
251
252
253void unity_weights(regression::OneDimensional& r, 
254                   regression::OneDimensionalWeighted& rw, 
255                   const utility::Vector& x, const utility::Vector& y,
256                   test::Suite& suite)
257{
258  suite.err() << "    testing unity weights equal to non-weighted version.\n"; 
259  utility::Vector w(x.size(), 1.0);
260  r.fit(x,y);
261  rw.fit(x,y,w);
262  if (!suite.equal(r.predict(2000), rw.predict(2000)) ) {
263    suite.add(false);
264    suite.err() << "Error: predict not equal\n" 
265           << "   weighted: " << rw.predict(2000) << "\n"
266           << "   non-weighted: " << r.predict(2000)
267           << std::endl;
268  }
269  if (!suite.equal(r.s2(), rw.s2(1.0)) ){
270    suite.add(false);
271    suite.err() << "Error: s2 not equal non-weighted version." << std::endl;
272    suite.err() << "weighted s2 = " << rw.s2(1.0) << std::endl;
273    suite.err() << "non-weighted s2 = " << r.s2() << std::endl;
274  }
275  if (!suite.equal_sqrt(r.standard_error2(2000), rw.standard_error2(2000), 20)){
276    suite.add(false);
277    suite.err() << "Error: standard_error not equal non-weighted version." 
278           << std::endl;
279  }
280  if (!suite.equal(r.r2(), rw.r2()) ){
281    suite.add(false);
282    suite.err() << "Error: r2 not equal non-weighted version." << std::endl;
283    suite.err() << "weighted r2 = " << rw.r2() << std::endl;
284    suite.err() << "non-weighted r2 = " << r.r2() << std::endl;
285  }
286  if (!suite.equal_sqrt(r.prediction_error2(2000), rw.prediction_error2(2000,1),
287                        100) ){
288    suite.add(false);
289    suite.err() << "Error: prediction_error2 not equal non-weighted version.\n" 
290           << "       weighted: " << rw.prediction_error2(2000,1) << "\n"
291           << "   non-weighted: " << r.prediction_error2(2000)
292           << std::endl;
293  }
294} 
295
296
297void rescale_weights(regression::OneDimensionalWeighted& wr, 
298                     const utility::Vector& x, const utility::Vector& y,
299                     test::Suite& suite)
300{
301  suite.err() << "    testing rescaling weights.\n"; 
302  utility::Vector w(5);  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0.2;
303  wr.fit(x,y,w);
304  double predict = wr.predict(2000);
305  double prediction_error2 = wr.prediction_error2(2000);
306  double r2 = wr.r2();
307  double s2 = wr.s2();
308  double standard_error2 = wr.standard_error2(2000);
309
310  w*=2;
311  wr.fit(x,y,w);
312  if (!suite.equal(wr.predict(2000), predict, 10000) ){
313    suite.add(false);
314    suite.err() << "Error: predict not equal after rescaling.\n";
315    suite.err() << "       predict = " << predict
316           << " and after doubling weights.\n";
317    suite.err() << "       predict = " << wr.predict(2000) << "\n";
318  }
319  if (!suite.equal(wr.s2(2), s2, 14000) ){
320    suite.add(false);
321    suite.err() << "Error: s2 not equal after rescaling.\n";
322    suite.err() << "       s2 = " << s2 << " and after doubling weights.\n";
323    suite.err() << "       s2 = " << wr.s2(2) << "\n";
324    suite.err() << "difference " << s2-wr.s2(2.0) << std::endl;
325  }
326  if (!suite.equal_sqrt(wr.standard_error2(2000), standard_error2, 100) ){
327    suite.add(false);
328    suite.err() << "Error: standard_error2 not equal after rescaling.\n";
329    suite.err() << "       standard_error2 = " << standard_error2
330           << " and after doubling weights.\n";
331    suite.err() << "       standard_error2 = " 
332           << wr.standard_error2(2000) << "\n";
333    suite.err() << " difference: " << wr.standard_error2(2000)-standard_error2
334           << std::endl;
335  }
336  if (!suite.equal(wr.r2(), r2, 10000) ){
337    suite.add(false);
338    suite.err() << "Error: r2 not equal after rescaling.\n";
339  }
340  if (!suite.equal_sqrt(wr.prediction_error2(2000,2), prediction_error2, 20) ){
341    suite.add(false);
342    suite.err() << "Error: prediction_error2 not equal after rescaling.\n";
343    suite.err() << "       prediction_error2 = " << prediction_error2
344           << " and after doubling weights.\n";
345    suite.err() << "       prediction_error2 = " 
346           << wr.prediction_error2(2000,2) << "\n";
347  }
348}
349
350
351void zero_weights(regression::OneDimensionalWeighted& wr, 
352                  const utility::Vector& x, const utility::Vector& y,
353                  test::Suite& suite)
354{
355  suite.err() << "    testing zero weights equal to missing value.\n"; 
356  utility::Vector w(5);  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0;
357  wr.fit(x,y,w);
358  double predict = wr.predict(2000);
359  double prediction_error2 = wr.prediction_error2(2000);
360  double r2 = wr.r2();
361  double s2 = wr.s2();
362  double standard_error2 = wr.standard_error2(2000);
363
364  utility::Vector x2(4);
365  utility::Vector y2(4);
366  utility::Vector w2(4);
367  for (size_t i=0; i<4; ++i){
368    x2(i) = x(i);
369    y2(i) = y(i);
370    w2(i) = w(i);
371  }
372
373  wr.fit(x2,y2,w2);
374  if (!suite.equal(wr.predict(2000), predict, 10000) ) {
375    suite.add(false);
376    suite.err() << "Error: predict not equal.\n";
377    suite.err() << "       weighted predict: " << wr.predict(2000) << "\n";
378    suite.err() << "       unweighted predict: " << predict << "\n";
379    suite.err() << "       difference: " << wr.predict(2000)-predict << "\n";
380
381  }
382  if (!suite.equal(wr.prediction_error2(2000), prediction_error2) ) {
383    suite.add(false);
384    suite.err() << "Error: prediction_error2 not equal.\n";
385  }
386  if (!suite.equal(wr.r2(), r2) ) {
387    suite.add(false);
388    suite.err() << "Error: r2 not equal.\n";
389    suite.err() << "   r2: " << r2 << "\n";
390    suite.err() << "   r2: " << wr.r2() << "\n";
391  }
392  if (!suite.equal(wr.s2(), s2) ) {
393    suite.add(false);
394    suite.err() << "Error: s2 not equal.\n";
395  }
396  if (!suite.equal(wr.standard_error2(2000), standard_error2) ) {
397    suite.add(false);
398    suite.err() << "Error: standard_error2 not equal.\n";
399  }
400}
401
402
403void multidim(test::Suite& suite)
404{
405  suite.err() << "  testing regression::MultiDimensionalWeighted" << std::endl;
406  utility::Vector x(5); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000; x(4)=2010;
407  utility::Vector y(5); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;   y(4)=15;
408  utility::Vector w(5,1.0);
409 
410  utility::Matrix data(5,3);
411  for (size_t i=0; i<data.rows(); ++i){
412    data(i,0)=1;
413    data(i,1)=x(i);
414    data(i,2)=x(i)*x(i);
415  }
416  regression::MultiDimensional md;
417  md.fit(data,y);
418  regression::MultiDimensionalWeighted mdw;
419  mdw.fit(data,y,w);
420  utility::Vector z(3,1);
421  z(1)=2000;
422  z(2)=2000*2000;
423  if (!suite.equal(md.predict(z), mdw.predict(z))){
424    suite.add(false);
425    suite.err() << "Error: predict not equal\n" 
426           << "   weighted: " << mdw.predict(z) << "\n"
427           << "   non-weighted: " << md.predict(z)
428           << std::endl;
429  }
430
431  if (!suite.equal_sqrt(md.standard_error2(z), mdw.standard_error2(z),20) ){
432    suite.add(false);
433    suite.err() << "Error: standard_error2 not equal\n" 
434           << "   weighted: " << mdw.standard_error2(z) << "\n"
435           << "   non-weighted: " << md.standard_error2(z)
436           << std::endl;
437  }
438  if (!suite.equal_sqrt(md.prediction_error2(z), mdw.prediction_error2(z,1.0),
439                        20) ){
440    suite.add(false);
441    suite.err() << "Error: prediction_error2 not equal\n" 
442           << "   weighted: " << mdw.prediction_error2(z,1.0) << "\n"
443           << "   non-weighted: " << md.prediction_error2(z)
444           << std::endl;
445  }
446
447  w(0)=1.0;  w(1)=1.0;  w(2)=0.5;  w(3)=0.2;  w(4)=0.2;
448  mdw.fit(data,y,w);
449  double predict = mdw.predict(z);
450  double prediction_error2 = mdw.prediction_error2(z, 1.0);
451  double s2 = mdw.s2(1.0);
452  double standard_error2 = mdw.standard_error2(z);
453
454  w*=2;
455  mdw.fit(data,y,w);
456  if (!suite.equal(mdw.predict(z), predict, 10000) ){
457    suite.add(false);
458    suite.err() << "Error: predict not equal after rescaling.\n";
459    suite.err() << "   predict = " << predict
460                << " and after doubling weights.\n";
461    suite.err() << "   predict = " << mdw.predict(z) << "\n";
462  }
463  if (!suite.equal_sqrt(mdw.prediction_error2(z,2), prediction_error2,20) ){ 
464    suite.add(false);
465    suite.err() << "Error: prediction_error2 not equal after rescaling.\n";
466    suite.err() << "       predict_error2 = " << prediction_error2
467           << " and after doubling weights.\n";
468    suite.err() << "       predict_error2 = " 
469                << mdw.prediction_error2(z,2) << "\n";
470  }
471  if (!suite.equal(mdw.s2(2), s2, 14000) ){
472    suite.add(false);
473    suite.err() << "Error: s2 not equal after rescaling.\n";
474    suite.err() << "       s2 = " << s2 << " and after doubling weights.\n";
475    suite.err() << "       s2 = " << mdw.s2(2) << "\n";
476  }
477  if (!suite.equal_sqrt(mdw.standard_error2(z), standard_error2, 100) ){
478    suite.add(false);
479    suite.err() << "Error: standard_error2 not equal after rescaling.\n";
480    suite.err() << " standard_error2 = " << standard_error2
481           << " and after doubling weights.\n";
482    suite.err() << " standard_error2 = " << mdw.standard_error2(z) << "\n";
483  }
484}
485
486
487bool Local_test(regression::OneDimensionalWeighted& r, 
488                regression::Kernel& k, test::Suite& suite)
489{
490  regression::Local rl(r,k);
491  for (size_t i=0; i<1000; i++){
492    rl.add(i, 10);
493  }
494
495  rl.fit(10, 100);
496  if (rl.x().size()!=1000/10)
497    return false;
498  for (size_t i=0; i+1<rl.x().size(); ++i)
499    if (!suite.equal(rl.x()(i+1)-rl.x()(i),10.0))
500      return false;
501
502  utility::Vector y(rl.y_predicted());
503  for (size_t i=0; i<y.size(); i++) 
504    if (!suite.equal(y(i),10.0)){
505      return false;
506    }
507
508  return true;
509}
Note: See TracBrowser for help on using the repository browser.