1 | #ifndef _theplu_yat_regression_onedimensioanlweighted_ |
---|
2 | #define _theplu_yat_regression_onedimensioanlweighted_ |
---|
3 | |
---|
4 | // $Id: OneDimensionalWeighted.h 702 2006-10-26 14:04:35Z peter $ |
---|
5 | |
---|
6 | /* |
---|
7 | Copyright (C) The authors contributing to this file. |
---|
8 | |
---|
9 | This file is part of the yat library, http://lev.thep.lu.se/trac/yat |
---|
10 | |
---|
11 | The yat library is free software; you can redistribute it and/or |
---|
12 | modify it under the terms of the GNU General Public License as |
---|
13 | published by the Free Software Foundation; either version 2 of the |
---|
14 | License, or (at your option) any later version. |
---|
15 | |
---|
16 | The yat library is distributed in the hope that it will be useful, |
---|
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
19 | General Public License for more details. |
---|
20 | |
---|
21 | You should have received a copy of the GNU General Public License |
---|
22 | along with this program; if not, write to the Free Software |
---|
23 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA |
---|
24 | 02111-1307, USA. |
---|
25 | */ |
---|
26 | |
---|
27 | #include "yat/statistics/AveragerPairWeighted.h" |
---|
28 | |
---|
29 | #include <ostream> |
---|
30 | |
---|
31 | namespace theplu { |
---|
32 | namespace yat { |
---|
33 | namespace utility { |
---|
34 | class vector; |
---|
35 | } |
---|
36 | namespace regression { |
---|
37 | |
---|
38 | /// |
---|
39 | /// Abstract Base Class for One Dimensional fitting in a weighted |
---|
40 | /// fashion. |
---|
41 | /// |
---|
42 | class OneDimensionalWeighted |
---|
43 | { |
---|
44 | |
---|
45 | public: |
---|
46 | /// |
---|
47 | /// Default Constructor. |
---|
48 | /// |
---|
49 | inline OneDimensionalWeighted(void){} |
---|
50 | |
---|
51 | /// |
---|
52 | /// Destructor |
---|
53 | /// |
---|
54 | virtual ~OneDimensionalWeighted(void) {}; |
---|
55 | |
---|
56 | /** |
---|
57 | This function computes the best-fit given a model (see |
---|
58 | specific class for details) by minimizing \f$ |
---|
59 | \sum{w_i(\hat{y_i}-y_i)^2} \f$, where \f$ \hat{y} \f$ is the |
---|
60 | fitted value. The weight \f$ w_i \f$ should be proportional |
---|
61 | to the inverse of the variance for \f$ y_i \f$ |
---|
62 | */ |
---|
63 | virtual void fit(const utility::vector& x, const utility::vector& y, |
---|
64 | const utility::vector& w)=0; |
---|
65 | |
---|
66 | /** |
---|
67 | @brief Mean Squared Error |
---|
68 | |
---|
69 | Mean Squared Error is defined as the weighted mean of the |
---|
70 | squared residiuals \f$ \frac{\sum w_i(y_i-\hat{y}_i)^2}{\sum |
---|
71 | w_i} \f$, which is minimized when fitting the regression model. |
---|
72 | */ |
---|
73 | virtual double mse(void) const=0; |
---|
74 | |
---|
75 | /// |
---|
76 | /// @return expected value in @a x according to the fitted model |
---|
77 | /// |
---|
78 | virtual double predict(const double x) const=0; |
---|
79 | |
---|
80 | /** |
---|
81 | The prediction error is defined as the square root of the |
---|
82 | expected squared deviation a new data point will have from |
---|
83 | value the model provides. The expected squared deviation is |
---|
84 | defined as \f$ E((Y|x,w - \hat{y}(x))^2) \f$ which is equal to |
---|
85 | \f$ E((Y|x,w - E(Y|x))^2) + E((E(Y|x) - \hat{y}(x))^2) \f$, |
---|
86 | which is the conditional variance given \f$ x \f$ and the |
---|
87 | squared standard error (see standard_error()) of the model |
---|
88 | estimation in \f$ x \f$, respectively. |
---|
89 | |
---|
90 | The conditional variance is inversely proportional to the |
---|
91 | weight \f$ w \f$ and is calculated as \f$ Var(Y|x,w) = |
---|
92 | \frac{1}{w}\frac{\sum w_i(y_i-\hat{y}_i)^2\sum w_i^2} |
---|
93 | {\left(\sum w_i\right)^2} =\frac{\sum w_i^2}{w\sum w_i}mse\f$ |
---|
94 | |
---|
95 | @return expected prediction error for a new data point in @a x |
---|
96 | */ |
---|
97 | double prediction_error(const double x, const double w=1.0) const |
---|
98 | { return sqrt(mse()+pow(standard_error(x),2)); } |
---|
99 | |
---|
100 | /** |
---|
101 | r-squared is defined as \f$ \frac{\sum |
---|
102 | w_i(y_i-\hat{y}_i)^2}{\sum w_i(y_i-m_y)^2} \f$ or the fraction |
---|
103 | of the variance explained by the regression model. |
---|
104 | */ |
---|
105 | inline double r_squared(void) const |
---|
106 | { return mse()/variance(); } |
---|
107 | |
---|
108 | /** |
---|
109 | The standard error is defined as \f$ \sqrt{E((Y|x - |
---|
110 | \hat{y}(x))^2) }\f$ |
---|
111 | |
---|
112 | @return error of model value in @a x |
---|
113 | */ |
---|
114 | virtual double standard_error(const double x) const=0; |
---|
115 | |
---|
116 | protected: |
---|
117 | /// |
---|
118 | /// Averager for pair of x and y |
---|
119 | /// |
---|
120 | statistics::AveragerPairWeighted ap_; |
---|
121 | |
---|
122 | private: |
---|
123 | inline double variance(double w=1) const |
---|
124 | { return ap_.y_averager().variance(); } |
---|
125 | |
---|
126 | |
---|
127 | }; |
---|
128 | |
---|
129 | }}} // of namespaces regression, yat, and theplu |
---|
130 | |
---|
131 | #endif |
---|