# Changeset 586

Ignore:
Timestamp:
Jun 19, 2006, 11:56:04 AM (15 years ago)
Message:

closes #23 redesign of regression classes

Location:
trunk
Files:
19 edited

Unmodified
Removed
• ## trunk/c++_tools/statistics/Linear.cc

 r483 beta_ = ap_.covariance() / ap_.x_averager().variance(); // estimating the noise level, i.e. the conditional variance of y // given x, Var(y|x). double Q = (ap_.y_averager().sum_xx_centered() - ap_.sum_xy_centered() * ap_.sum_xy_centered()/ap_.x_averager().sum_xx_centered() ) ; s2_ = Q/(x.size()-2); r2_= 1; alpha_var_ = s2_ / x.size(); beta_var_ = s2_ / ap_.x_averager().sum_xx_centered(); // calculating deviation between data and model msd_ = (ap_.y_averager().sum_xx_centered() - ap_.sum_xy_centered() * ap_.sum_xy_centered()/ap_.x_averager().sum_xx_centered() )/x.size(); r2_= 1-msd_/ap_.x_averager().variance(); alpha_var_ = msd_ / x.size(); beta_var_ = msd_ / ap_.x_averager().sum_xx_centered(); m_x_ = ap_.x_averager().mean(); } void Linear::predict(const double x, double& y, double& y_err) const double Linear::predict(const double x) const { y = alpha_ + beta_ * (x-m_x_); y_err = sqrt( alpha_var_+beta_var_*(x-m_x_)*(x-m_x_)+s2_ ); return alpha_ + beta_ * (x-m_x_); } std::ostream& Linear::print_header(std::ostream& s) const double Linear::prediction_error(const double x) const { s << "# column 1: x\n" << "# column 2: y\n" << "# column 3: y_err\n" << "# column 4: alpha\n" << "# column 5: alpha_err\n" << "# column 6: beta\n" << "# column 7: beta_err\n" << "# column 8: s2 (var(y|x))\n" << "# column 9: r2 (coefficient of determination)"; return s; return sqrt( alpha_var_+beta_var_*(x-m_x_)*(x-m_x_)+msd_ ); } double Linear::standard_error(const double x) const { return sqrt( alpha_var_+beta_var_*(x-m_x_)*(x-m_x_)); } }}} // of namespaces regression, statisitcs and thep
• ## trunk/c++_tools/statistics/Linear.h

 r430 inline Linear(void) : OneDimensional(), alpha_(0), alpha_var_(0), beta_(0), beta_var_(0), m_x_(0), s2_(0) {} m_x_(0){} /// /// /// Function predicting value using the linear model. \a y_err is /// the expected deviation from the line for a new data point. /// @return value in @a x of model /// void predict(const double x, double& y, double& y_err) const; double predict(const double x) const; /// /// @return prediction value and parameters /// @return expected prediction error for a new data point in @a x /// std::ostream& print(std::ostream&) const; double prediction_error(const double x) const; /// /// @return header for print() /// @return error of model value in @a x /// std::ostream& print_header(std::ostream&) const; double standard_error(const double x) const; /// /// Function returning the coefficient of determination, /// i.e. fraction of variance explained by the linear model. /// @todo implement r2's calculation in fit function /// inline double r2(void) const { return r2_; } double beta_var_; double m_x_; // average of x values double s2_; // var(y|x) double r2_; // coefficient of determination };
• ## trunk/c++_tools/statistics/LinearWeighted.cc

 r582 } std::ostream& LinearWeighted::print(std::ostream& s) const { s << x_ << "\t" << y_ << "\t" << y_err_ << "\t" << alpha_ << "\t" << alpha_err() << "\t" << beta_ << "\t" << beta_err() << "\t" << s2_ << "\t" << r2_; return s; } std::ostream& LinearWeighted::print_header(std::ostream& s) const { s << "# column 1: x\n" << "# column 2: y\n" << "# column 3: y_err\n" << "# column 4: alpha\n" << "# column 5: alpha_err\n" << "# column 6: beta\n" << "# column 7: beta_err\n" << "# column 8: s2 (var(y|x))\n" << "# column 9: r2 (coefficient of determination)"; return s; } }}} // of namespaces regression, statisitcs and thep
• ## trunk/c++_tools/statistics/LinearWeighted.h

 r495 : OneDimensionalWeighted(), alpha_(0), alpha_var_(0), beta_(0), beta_var_(0), m_x_(0), s2_(0) {} m_x_(0){} /// /// /// Function predicting value using the linear model. \a y_err is /// the estimated error of y(x) /// inline void predict(const double x, double& y, double& y_err, const double w=1.0) { y = alpha_ + beta_ * (x-m_x_); y_err_ = y_err = sqrt( alpha_var_+beta_var_*(x-m_x_)*(x-m_x_)); x_=x; y_=y; } /// Function predicting value using the linear model: \f$y = /// \alpha + \beta (x - m) double predict(const double x) const { return alpha_ + beta_ * (x-m_x_); } /// /// @return prediction value and parameters /// estimated deviation from predicted value for a new data point /// in @a x with weight @a w /// std::ostream& print(std::ostream&) const; inline double prediction_error(const double x, const double w=1) const { return sqrt(alpha_var_ + beta_var_*(x-m_x_)*(x-m_x_)+s2_/w); } /// /// @return header for print() /// estimated error @a y_err \f$ y_err = \sqrt{ Var(\alpha) + /// Var(\beta)*(x-m)^2 }. /// std::ostream& print_header(std::ostream&) const; inline double standard_error(const double x) const { return sqrt(alpha_var_ + beta_var_*(x-m_x_)*(x-m_x_) ); } /// double beta_var_; double m_x_; // average of x values double s2_; // var(y|x) double r2_; // coefficient of determination };
• ## trunk/c++_tools/statistics/Local.cc

 r501 assert(ipredict(x(i*step_size), y_predicted_(i),y_err_(i)); y_predicted_(i) = regressor_->predict(x(i*step_size)); y_err_(i) = regressor_->standard_error(x(i*step_size)); } }
• ## trunk/c++_tools/statistics/Makefile.am

 r582 Histogram.cc Kernel.cc KernelBox.cc KernelTriCube.cc Linear.cc \ LinearWeighted.cc Local.cc \ MultiDimensional.cc Naive.cc NaiveWeighted.cc OneDimensional.cc\ MultiDimensional.cc \ MultiDimensionalWeighted.cc \ Naive.cc NaiveWeighted.cc OneDimensional.cc\ Pearson.cc PearsonDistance.cc \ Polynomial.cc ROC.cc Score.cc tScore.cc SNR.cc utility.cc \ Polynomial.cc \ PolynomialWeighted.cc \ ROC.cc Score.cc tScore.cc SNR.cc utility.cc \ WilcoxonFoldChange.cc Fisher.h FoldChange.h \ Histogram.h Kernel.h KernelBox.h KernelTriCube.h Linear.h LinearWeighted.h \ Local.h MultiDimensional.h Naive.h NaiveWeighted.h OneDimensional.h \ Local.h MultiDimensional.h \ MultiDimensionalWeighted.h \ Naive.h NaiveWeighted.h OneDimensional.h \ OneDimensionalWeighted.h Pearson.h  PearsonDistance.h \ Polynomial.h ROC.h Score.h SNR.h tScore.h \ Polynomial.h \ PolynomialWeighted.h \ ROC.h Score.h SNR.h tScore.h \ utility.h WilcoxonFoldChange.h
• ## trunk/c++_tools/statistics/MultiDimensional.cc

 r420 double MultiDimensional::prediction_error(const gslapi::vector& x) const { double s2 = 0; for (size_t i=0; i
• ## trunk/c++_tools/statistics/MultiDimensional.h

 r420 gslapi::vector fit_parameters(void) { return fit_parameters_; } /// /// @return value in @a x according to fitted model /// inline double predict(const gslapi::vector& x) const { return fit_parameters_ * x; } /// /// @return expected prediction error for a new data point in @a x /// double prediction_error(const gslapi::vector& x) const; /// /// @return error of model value in @a x /// double standard_error(const gslapi::vector& x) const; private: double chisquare_;
• ## trunk/c++_tools/statistics/Naive.cc

 r430 } void Naive::predict(const double x, double& y, double& y_err) const double Naive::predict(const double x) const { y = ap_.x_averager().mean(); y_err = ap_.x_averager().std()*(1.0+1.0/ap_.n()); return ap_.y_averager().mean(); } std::ostream& Naive::print_header(std::ostream& s) const double Naive::prediction_error(const double x) const { s << "# column 1: x\n" << "# column 2: y\n" << "# column 3: y_err"; return s; return sqrt(msd_*(1.0+1.0/ap_.n())); } double Naive::standard_error(const double x) const { return sqrt(msd_/ap_.n()); }
• ## trunk/c++_tools/statistics/Naive.h

 r430 /// /// Function predicting value using the naive model. \a y_err is /// the expected deviation from the line for a new data point. The /// Function predicting value using the naive model. /// double predict(const double x) const; /// /// The expected deviation from the line for a new data point. The /// error has two components: the variance of point and error in /// estimation of the mean. /// estimation of the mean. /// void predict(const double x, double& y, double& y_err) const; double prediction_error(const double x) const; /// /// @return header for print() /// @return standard error /// std::ostream& print_header(std::ostream&) const; double standard_error(const double x) const; private: double s2_; // noise level - the typical variance for a point with // weight w is s2/w double m_; double m_err_; // error of estimation of mean m_
• ## trunk/c++_tools/statistics/NaiveWeighted.cc

 r495 } void NaiveWeighted::predict(const double x, double& y, double& y_err, const double w) { x_ = x; y = m_; y_err = m_err_; } std::ostream& NaiveWeighted::print(std::ostream& s) const { s << x_ << "\t" << m_ << "\t" << sqrt(m_err_*m_err_ + s2_); return s; } std::ostream& NaiveWeighted::print_header(std::ostream& s) const { s << "# column 1: x\n" << "# column 2: y\n" << "# column 3: y_err"; return s; } }}} // of namespaces regression, statisitcs and thep
• ## trunk/c++_tools/statistics/NaiveWeighted.h

 r495 //#include #include #include #include /// /// Function predicting value using the naive model, i.e. a /// weighted average. \a y_err is the standard error of the /// weighted mean /// weighted average. /// /// @see AveragerWeighted::standard_error /// void predict(const double x, double& y, double& y_err, const double w=1) ; inline double predict(const double x) const { return m_; } /// /// @return prediction value and parameters /// @return expected prediction error for a new data point in @a x /// with weight @a w /// std::ostream& print(std::ostream&) const; inline double prediction_error(const double x, const double w=1) const { return sqrt(m_err_*m_err_ + s2_/w); } /// /// @return header for print() /// @return estimation of error of model value in @a x /// std::ostream& print_header(std::ostream&) const; inline double standard_error(const double x) const { return m_err_; } private: /// NaiveWeighted(const NaiveWeighted&); double s2_; // noise level - the typical variance for a point with // weight w is s2/w double m_; double m_err_; // error of estimation of mean m_
• ## trunk/c++_tools/statistics/OneDimensional.cc

 r429 namespace regression { std::ostream& OneDimensional::print(std::ostream& os, const double min, double max, const u_int n) const std::ostream& OneDimensional::print(std::ostream& os, const double min, double max, const u_int n) const { double dx; } double y,y_err; for ( double x=min; x<=max; x+=dx) { predict(x,y,y_err); double y = predict(x); double y_err = prediction_error(x); os << x << "\t" << y << "\t" << y_err << "\n"; }
• ## trunk/c++_tools/statistics/OneDimensional.h

 r429 /// function predicting in one point /// virtual void predict(const double x, double& y, double& y_err) const=0; virtual double predict(const double x) const=0; /// /// /// @return expected prediction error for a new data point in @a x /// /// @return stream of prediction values and parameters virtual double prediction_error(const double x) const=0; std::ostream& print(std::ostream& os,const double min, double max, const u_int n) const; /// std::ostream& print(std::ostream&,const double min, const double max, const u_int n) const; /// @return error of model value in @a x /// /// @return header for print() /// virtual std::ostream& print_header(std::ostream& s) const=0; virtual double standard_error(const double x) const=0; protected: /// AveragerPair ap_; double msd_; // mean squared deviation (model from data points) };
• ## trunk/c++_tools/statistics/OneDimensionalWeighted.h

 r495 /// Default Constructor. /// inline OneDimensionalWeighted(void) : x_(0.0), y_(0.0), y_err_(0.0) {} inline OneDimensionalWeighted(void):s2_(0)  {} /// /// specific class for details) by minimizing \f$/// \sum{w_i(\hat{y_i}-y_i)^2} \f$, where \f$\hat{y} \f$ is the /// fitted value. The weight \f$w_i \f$ is should be proportional /// fitted value. The weight \f$w_i \f$ should be proportional /// to the inverse of the variance for \f$y_i \f$ /// virtual void fit(const gslapi::vector& x, const gslapi::vector& y, const gslapi::vector& w)=0; const gslapi::vector& w)=0; /// /// function predicting in one point. y will contain the y(x) /// according to the model. y_err will contain the estimated error /// of y(x). /// function predicting in one point. /// virtual void predict(const double x, double& y, double& y_err, const double w=1) =0; virtual double predict(const double x) const=0; /// /// @return prediction value and parameters /// @return expected prediction error for a new data point in @a x /// with weight @a w /// virtual std::ostream& print(std::ostream&) const=0; virtual double prediction_error(const double x, const double w=1) const=0; /// /// @return header for print() /// @return error of model value in @a x /// virtual std::ostream& print_header(std::ostream& s) const=0; virtual double standard_error(const double x) const=0; protected: /// /// x for predicted point /// double x_; /// /// y for predicted point /// double y_; /// /// estimated error of predicted point (in y). /// double y_err_; double s2_; // noise level - the typical variance for a point with // weight w is s2/w };
• ## trunk/c++_tools/statistics/Polynomial.cc

 r386 } double Polynomial::predict(const double x) const { gslapi::vector vec(power_+1,1); for (size_t i=1; i<=power_; ++i) vec(i) = vec(i-1)*x; return md_.predict(vec); } double Polynomial::prediction_error(const double x) const { gslapi::vector vec(power_+1,1); for (size_t i=1; i<=power_; ++i) vec(i) = vec(i-1)*x; return md_.prediction_error(vec); } double Polynomial::standard_error(const double x) const { gslapi::vector vec(power_+1,1); for (size_t i=1; i<=power_; ++i) vec(i) = vec(i-1)*x; return md_.standard_error(vec); } }}} // of namespaces regression, statisitcs and thep
• ## trunk/c++_tools/statistics/Polynomial.h

 r440 /// /// @todo implement /// @return value in @a x of model /// inline void predict(const double x, double& y, double& y_err) const { assert(0); } double predict(const double x) const; /// /// @todo implement /// @return expected prediction error for a new data point in @a x /// inline std::ostream& print_header(std::ostream& s) const { return s; } double prediction_error(const double x) const; /// /// @return error of model value in @a x /// double standard_error(const double x) const; private:
• ## trunk/doc/Statistics.tex

 r494 The first group is when some of the measurements are known to be more precise than others. The more precise a measuremtns is the larger precise than others. The more precise a measurement is, the larger weight it is given. The simplest case is when the weight are given before the measurements and they can be treated as deterministic. It can be treated as independent of the observable. Since there are various origin for a weight occuring in a statistical analysis, there are various way to treat the weights and in general Since there are various origins for a weight occuring in a statistical analysis, there are various ways to treat the weights and in general the analysis should be tailored to treat the weights correctly. We have not chosen one situation for our implementations, so see specific \end{itemize} An important case is when weights are binary (either 1 or 0). Then we get same result using the weighted version as using the data with get the same result using the weighted version as using the data with weight not equal to zero and the non-weighted version. Hence, using binary weights and the weighted version missing values can be treated the variance is estimated as $\sigma_x^2=\frac{\sum w_xw_y(x-m_x)^2}{\sum w_xw_y}$. As in the non-weighted case we define the correlation to be the ratio between the covariance and geomtrical avergae of the variances the correlation to be the ratio between the covariance and geometrical average of the variances \$\frac{\sum w_xw_y(x-m_x)(y-m_y)}{\sqrt{\sum w_xw_y(x-m_x)^2\sum
• ## trunk/test/regression_test.cc

 r495 #include #include #include #include statistics::regression::LinearWeighted linear_w; linear_w.fit(x,y,w); double y_predicted=0; double y_predicted_err=0; linear_w.predict(1990,y_predicted,y_predicted_err); double y_predicted = linear_w.predict(1990); double y_predicted_err = linear_w.prediction_error(1990); if (fabs(y_predicted-12.8)>0.001){ *error << "error: cannot reproduce fit." << std::endl; statistics::regression::NaiveWeighted naive_w; naive_w.fit(x,y,w); y_predicted=0; y_predicted_err=0; naive_w.predict(0.0,y_predicted,y_predicted_err); y_predicted=naive_w.predict(0.0); y_predicted_err=naive_w.prediction_error(0.0); if (y_predicted!=(0.1*12+0.2*11+0.3*14+0.4*13)) { *error << "regression_NaiveWeighted: cannot reproduce fit." << std::endl; statistics::regression::Naive naive; *error << "testing regression::Polynomial" << std::endl; statistics::regression::Polynomial pol(2); *error << "testing regression::PolynomialWeighted" << std::endl; statistics::regression::PolynomialWeighted pol_weighted(2); if (error!=&std::cerr) delete error;
Note: See TracChangeset for help on using the changeset viewer.