Changeset 221
- Timestamp:
- Dec 30, 2004, 11:36:25 PM (18 years ago)
- Location:
- trunk/src
- Files:
-
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/Regression.cc
r213 r221 8 8 9 9 Regression::Regression() 10 : sumsq_(0)11 10 { 12 11 } -
trunk/src/Regression.h
r216 r221 34 34 virtual ~Regression(void) {}; 35 35 36 virtual void estimate(const double x, double& y, double& y_err) const=0;37 38 36 /// 39 37 /// This function computes the best-fit given a model (see … … 41 39 /// \sum{(\hat{y_i}-y_i)^2} \f$, where \f$ \hat{y} \f$ is the fitted value. 42 40 /// 43 virtual intfit(const gslapi::vector& x, const gslapi::vector& y)=0;41 virtual void fit(const gslapi::vector& x, const gslapi::vector& y)=0; 44 42 45 43 /// … … 50 48 /// to the inverse of the variance for \f$ y_i \f$ 51 49 /// 52 virtual int fit_weighted(const gslapi::vector& x, const gslapi::vector& y, 53 const gslapi::vector& w)=0; 54 55 inline double sumsq() const { return sumsq_; } 56 50 virtual void fit(const gslapi::vector& x, const gslapi::vector& y, 51 const gslapi::vector& w)=0; 52 /// 53 /// 54 /// 55 virtual void predict(const double x, double& y, double& y_err, 56 const double w=1) const=0; 57 57 58 protected: 58 double sumsq_; 59 59 60 60 }; 61 61 -
trunk/src/RegressionKernelBox.h
r216 r221 24 24 25 25 /// 26 /// Function calculating the weight 26 /// Function calculating the weight as \f$ w(x)=1\f$ if \f$|x|\le 1 27 /// \f$, \f$ w(x)=0 \f$ otherwise. 27 28 /// 28 29 double weight(const double) const; -
trunk/src/RegressionLinear.cc
r216 r221 5 5 6 6 // C++_tools 7 #include "AveragerPair.h" 7 8 #include "Regression.h" 8 9 #include "vector.h" … … 16 17 17 18 RegressionLinear::RegressionLinear(void) 18 : Regression(), cov00_(0), cov01_(0), cov11_(0), k_(0), m_(0) 19 : Regression(), alpha_(0), alpha_var_(0), beta_(0), beta_var_(0), m_x_(0), 20 s2_(0) 19 21 { 20 22 } 21 23 24 void RegressionLinear::fit(const gslapi::vector& x, const gslapi::vector& y) 25 { 26 statistics::AveragerPair ap; 27 for (size_t i=0; i<x.size(); i++) 28 ap.add(x(i),y(i)); 29 30 alpha_ = ap.y_averager().mean(); 31 beta_ = ap.covariance() / ap.x_averager().variance(); 32 33 // estimating the noise level, i.e. the conditional variance of y 34 // given x, Var(y|x). 35 double Q = (ap.y_averager().sum_xsqr_centered() - ap.sum_xy_centered() * 36 ap.sum_xy_centered()/ap.x_averager().sum_xsqr_centered() ) ; 37 s2_ = Q/(x.size()-2); 38 r2_= 1; 39 alpha_var_ = s2_ / x.size(); 40 beta_var_ = s2_ / ap.x_averager().sum_xsqr_centered(); 41 m_x_ = ap.x_averager().mean(); 42 } 43 44 void RegressionLinear::fit(const gslapi::vector& x, const gslapi::vector& y, 45 const gslapi::vector& w) 46 { 47 double m_x = w*x /w.sum(); 48 double m_y = w*y /w.sum(); 49 50 double sxy = 0; 51 for (size_t i=0; i<x.size(); i++) 52 sxy += w(i)*(x(i)-m_x)*(y(i)-m_y); 53 54 double sxx = 0; 55 for (size_t i=0; i<x.size(); i++) 56 sxx += w(i)*(x(i)-m_x)*(x(i)-m_x); 57 58 double syy = 0; 59 for (size_t i=0; i<y.size(); i++) 60 syy += w(i)*(y(i)-m_y)*(y(i)-m_y); 61 62 // estimating the noise level. see attached document for motivation 63 // of the expression. 64 s2_= (syy-sxy*sxy/sxx)/(w.sum()-2*(w*w)/w.sum()) ; 65 66 alpha_ = m_y; 67 beta_ = sxy/sxx; 68 alpha_var_ = s2_/w.sum(); 69 beta_var_ = s2_/sxx; 70 m_x_=m_x; 71 } 72 73 22 74 }} // of namespace statistics and namespace theplu -
trunk/src/RegressionLinear.h
r216 r221 11 11 // Standard C++ includes 12 12 //////////////////////// 13 #include < gsl/gsl_fit.h>13 #include <cmath> 14 14 15 15 namespace theplu { … … 19 19 /// Class for Regression. 20 20 /// 21 22 21 23 22 class RegressionLinear : public Regression … … 40 39 virtual ~RegressionLinear(void) {}; 41 40 42 inline void estimate(const double x, double& y, double& y_err) const 43 { 44 gsl_fit_linear_est(x, m_, k_, cov00_, cov01_, cov11_, &y, &y_err); 41 /// 42 /// @return the parameter \f$ \alpha \f$ 43 /// 44 inline double alpha(void) const { return alpha_; } 45 46 /// 47 /// @return standard deviation of parameter \f$ \alpha \f$ 48 /// 49 inline double alpha_var(void) const { return sqrt(alpha_var_); } 50 51 /// 52 /// @return the parameter \f$ \beta 53 /// 54 inline double beta(void) const { return beta_; } 55 56 /// 57 /// @return standard devaition of parameter \f$ \beta \f$ 58 /// 59 inline double beta_var(void) const { return sqrt(beta_var_); } 60 61 /// 62 /// This function computes the best-fit linear regression 63 /// coefficients \f$ (\alpha, \beta)\f$ of the model \f$ y = 64 /// \alpha + \beta (x-m_x) \f$ from vectors \a x and \a y, by 65 /// minimizing \f$ \sum{(y_i - \alpha - \beta (x-m_x))^2} \f$. 66 /// 67 void fit(const gslapi::vector& x, const gslapi::vector& y) ; 68 69 /// 70 /// This function computes the best-fit linear regression 71 /// coefficients \f$ (\alpha, \beta)\f$ of the model \f$ y = 72 /// \alpha + \beta (x-m_x) \f$ from vectors \a x and \a y, by 73 /// minimizing \f$ \sum{w_i(y_i - \alpha - \beta (x-m_x))^2} \f$. 74 /// 75 void fit(const gslapi::vector& x, const gslapi::vector& y, 76 const gslapi::vector& w); 77 78 /// 79 /// Function predicting value using the linear model. \a y_err is 80 /// the expected deviation from the line for a new data point. If 81 /// weights are used a weight can be specified for the new point. 82 /// 83 inline void predict(const double x, double& y, double& y_err, 84 const double w=1.0) const 85 { 86 y = alpha_ + beta_ * (x-m_x_); 87 y_err = sqrt( alpha_var_+beta_var_*(x-m_x_)*(x-m_x_)+s2_/w ); 45 88 } 46 89 47 90 /// 48 /// This function computes the best-fit linear regression 49 /// coefficients (k,m) of the model \f$ y = kx + m \f$ from 50 /// vectors \a x and \a y, by minimizing \f$ \sum{(kx_i+m-y_i)^2} 51 /// \f$. 91 /// Function returning the coefficient of determination, 92 /// i.e. share of variance explained by the linear model. 52 93 /// 53 inline int fit(const gslapi::vector& x, const gslapi::vector& y) 54 { 55 return gsl_fit_linear(x.gsl_vector_pointer()->data, 56 x.gsl_vector_pointer()->stride, 57 y.gsl_vector_pointer()->data, 58 y.gsl_vector_pointer()->stride, 59 x.gsl_vector_pointer()->size, 60 &m_, &k_, &cov00_, &cov01_, &cov11_, &sumsq_); } 94 inline double r2(void) const { return r2_; } 61 95 62 inline double k(void) const { return k_; }63 inline double m(void) const { return m_; }64 65 66 ///67 /// This function computes the best-fit linear regression68 /// coefficients (k,m) of the model \f$ y = kx + m \f$ from69 /// vectors \a x and \a y, by minimizing \f$ \sum w_i(kx_i+m-y_i)^270 /// \f$. The weight \f$ w_i \f$ is the inverse of the variance for71 /// \f$ y_i \f$72 ///73 inline int fit_weighted(const gslapi::vector& x, const gslapi::vector& y,74 const gslapi::vector& w)75 {76 return gsl_fit_wlinear(x.gsl_vector_pointer()->data,77 x.gsl_vector_pointer()->stride,78 w.gsl_vector_pointer()->data,79 w.gsl_vector_pointer()->stride,80 y.gsl_vector_pointer()->data,81 y.gsl_vector_pointer()->stride,82 x.gsl_vector_pointer()->size,83 &m_, &k_, &cov00_, &cov01_, &cov11_, &sumsq_); }84 85 86 96 private: 87 double cov00_; 88 double cov01_; 89 double cov11_; 90 double k_; 91 double m_; 92 97 double alpha_; 98 double alpha_var_; 99 double beta_; 100 double beta_var_; 101 double m_x_; // average of x values 102 double s2_; // var(y|x) 103 double r2_; // coefficient of determination 93 104 }; 94 105 -
trunk/src/RegressionLocal.cc
r216 r221 17 17 Regression& r, 18 18 RegressionKernel& k, 19 const gslapi::vector& estimated_x) 20 : kernel_(&k), regression_(&r), estimated_x_(estimated_x), 21 estimated_y_(estimated_x), estimated_y_err_(estimated_x) 19 const size_t nof_predictions) 20 : kernel_(&k), regression_(&r) 22 21 { 22 estimated_x_ = gslapi::vector(nof_predictions); 23 estimated_y_ = estimated_x_; 24 estimated_y_err_ = estimated_x_; 23 25 //sorting data with respect to x 24 26 // Peter, could this be done without all the copying … … 29 31 } 30 32 sort(data_.begin(), data_.end()); 33 for (size_t i=0; i<nof_predictions; i++) 34 estimated_x_(i) = 35 data_[static_cast<size_t>((i+0.5)/nof_predictions*data_.size())].first; 31 36 } 32 37 … … 63 68 // copying data 64 69 // Peter, this is a stupid solution which could be 65 // improved when the vector class is updated.70 // improved by using a moving view (in the vector class). 66 71 gslapi::vector x(index_max-index_min+1); 67 72 gslapi::vector y(index_max-index_min+1); … … 69 74 for (size_t j=0; j<x.size(); j++) 70 75 x(j)=data_[index_min+j].first; 71 for (size_t j=0; j<y.size(); j++) 76 for (size_t j=0; j<y.size(); j++){ 72 77 y(j)=data_[index_min+j].second; 78 if (y(j)>10) 79 std::cout << x(j) << "\t" << y(j) << std::endl; 80 } 73 81 // calculating weights 74 82 for (size_t j=0; j<y.size(); j++) … … 79 87 80 88 // fitting the regressor locally 81 regression_->fit_weighted(x,y,w); 82 regression_->estimate(estimated_x_(i), 89 std::cout << "x: " << estimated_x_(i) << std::endl; 90 std::cout << "x.begin: " << x(0) << std::endl; 91 std::cout << "x.end: " << x(x.size()-1) << std::endl; 92 regression_->fit(x,y,w); 93 std::cout << "estimating value\n"; 94 regression_->predict(estimated_x_(i), 83 95 estimated_y_(i), 84 96 estimated_y_err_(i)); 85 97 } 86 98 } 87 88 99 89 100 std::ostream& operator<< (std::ostream& s, const RegressionLocal& r) 90 101 { -
trunk/src/RegressionLocal.h
r216 r221 32 32 33 33 /// 34 /// Constructor loading the object with data, type of regressor 35 /// and type kernel.34 /// Constructor loading the object with data, type of regressor, 35 /// type of kernel and in how many points to predict. 36 36 /// 37 37 RegressionLocal(const gslapi::vector& x, const gslapi::vector& y, 38 Regression& r, RegressionKernel& k, 39 const gslapi::vector&); 38 Regression& r, RegressionKernel& k, const size_t); 40 39 41 40 /// … … 49 48 virtual ~RegressionLocal(void) {}; 50 49 50 /// 51 /// Function returning the points where to predict 52 /// 51 53 inline gslapi::vector estimated_x(void) const { return estimated_x_; } 54 55 /// 56 /// Function returning predicted values 57 /// 52 58 inline gslapi::vector estimated_y(void) const { return estimated_y_; } 59 60 /// 61 /// Function returning error of predictions 62 /// 53 63 inline gslapi::vector estimated_y_err(void) const {return estimated_y_err_;} 54 64 55 65 /// 56 /// Function 66 /// Function performing the fit, using a \a fraction of the data 67 /// point and regression method defined in the constructor. The 68 /// algorithm uses equally many points around the point to 69 /// predict. If this is not possible (because the point is too far 70 /// left/right) the points to the extreme left/right is used. 57 71 /// 58 72 void fit(const double fraction); 73 59 74 60 75 private: -
trunk/src/RegressionNaive.h
r216 r221 1 1 // $Id$ 2 2 3 #ifndef _theplu_statistics_regression_ linear_4 #define _theplu_statistics_regression_ linear_3 #ifndef _theplu_statistics_regression_naive_ 4 #define _theplu_statistics_regression_naive_ 5 5 6 6 // C++ tools include … … 12 12 // Standard C++ includes 13 13 //////////////////////// 14 #include <gsl/gsl_fit.h>14 //#include <gsl/gsl_fit.h> 15 15 #include <utility> 16 16 … … 21 21 /// Class for Regression. 22 22 /// 23 24 23 25 24 class RegressionNaive : public Regression … … 41 40 /// 42 41 virtual ~RegressionNaive(void) {}; 43 44 inline void estimate(const double x, double& y, double& y_err)45 { y=m_; y_err=sqrt(var_); }46 42 47 43 /// … … 50 46 /// \sum{(y_i-m)^2} \f$. 51 47 /// 52 inline intfit(const gslapi::vector& x, const gslapi::vector& y)48 inline void fit(const gslapi::vector& x, const gslapi::vector& y) 53 49 { 54 50 Averager a; … … 56 52 a.add(y(i)); 57 53 m_=a.mean(); 58 var_=a.standard_error()*a.standard_error();59 return 0;54 s2_=a.variance(); 55 m_err_=a.standard_error(); 60 56 } 61 57 … … 63 59 /// This function computes the best-fit for the naive model \f$ y 64 60 /// = m \f$ from vectors \a x and \a y, by minimizing \f$ \sum 65 /// w_i(y_i-m)^2 \f$. The weight \f$ w_i \f$ is the inverse of the66 /// variance for \f$ y_i \f$61 /// w_i(y_i-m)^2 \f$. The weight \f$ w_i \f$ is proportional to 62 /// the inverse of the variance for \f$ y_i \f$ 67 63 /// 68 inline int fit_weighted(const gslapi::vector& x,69 70 64 inline void fit(const gslapi::vector& x, 65 const gslapi::vector& y, 66 const gslapi::vector& w) 71 67 { 72 68 WeightedAverager a; … … 74 70 a.add(y(i), w(i)); 75 71 m_=a.mean(); 76 var_=a.standard_error()*a.standard_error(); 77 return 0; 72 m_err_=a.standard_error(); 73 s2_=m_err_*m_err_*w.sum(); 74 } 75 76 /// 77 /// @return the parameter m 78 /// 79 inline double m(void) const { return m_; } 80 81 /// 82 /// @return standard deviation of parameter m 83 /// 84 inline double alpha_var(void) const { return m_err_; } 85 86 /// 87 /// Function predicting value using the naive model. \a y_err is 88 /// the expected deviation from the line for a new data point. If 89 /// weights are used a weight can be specified for the new point. 90 /// 91 inline void predict(const double x, double& y, double& y_err, 92 const double w=1.0) const 93 { 94 y = m_; 95 y_err = sqrt(m_err_*m_err_ + s2_/w); 78 96 } 79 97 80 98 81 99 private: 82 double var_;100 double s2_; // noise level ( var = s2/w in weighted case) 83 101 double m_; 84 102 double m_err_; 85 103 }; 86 104
Note: See TracChangeset
for help on using the changeset viewer.