Changeset 235
- Timestamp:
- Feb 21, 2005, 3:53:48 PM (18 years ago)
- Location:
- trunk/src
- Files:
-
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/Regression.cc
r221 r235 8 8 9 9 Regression::Regression() 10 : x_(0.0), y_(0.0), y_err_(0.0) 10 11 { 11 12 } -
trunk/src/Regression.h
r221 r235 4 4 #define _theplu_statistics_regression_ 5 5 6 // C++ tools include7 /////////////////////8 6 #include "vector.h" 9 10 // Standard C++ includes11 ////////////////////////12 7 13 8 … … 54 49 /// 55 50 virtual void predict(const double x, double& y, double& y_err, 56 const double w=1) const=0; 51 const double w=1) =0; 52 /// 53 /// @return prediction value and parameters 54 /// 55 virtual std::ostream& print(std::ostream&) const=0; 56 57 /// 58 /// @return header for print() 59 /// 60 virtual std::ostream& print_header(std::ostream& s) const=0; 57 61 58 62 protected: 59 63 double x_; 64 double y_; 65 double y_err_; 60 66 }; 61 67 -
trunk/src/RegressionLinear.cc
r221 r235 71 71 } 72 72 73 std::ostream& RegressionLinear::print(std::ostream& s) const 74 { 75 s << x_ << "\t" 76 << y_ << "\t" 77 << y_err_ << "\t" 78 << alpha_ << "\t" 79 << alpha_err() << "\t" 80 << beta_ << "\t" 81 << beta_err() << "\t" 82 << s2_ << "\t" 83 << r2_; 84 85 return s; 86 } 87 88 std::ostream& RegressionLinear::print_header(std::ostream& s) const 89 { 90 s << "# column 1: x\n" 91 << "# column 2: y\n" 92 << "# column 3: y_err\n" 93 << "# column 4: alpha\n" 94 << "# column 5: alpha_err\n" 95 << "# column 6: beta\n" 96 << "# column 7: beta_err\n" 97 << "# column 8: s2 (var(y|x))\n" 98 << "# column 9: r2 (coefficient of determination)"; 99 return s; 100 } 101 73 102 74 103 }} // of namespace statistics and namespace theplu -
trunk/src/RegressionLinear.h
r221 r235 47 47 /// @return standard deviation of parameter \f$ \alpha \f$ 48 48 /// 49 inline double alpha_ var(void) const { return sqrt(alpha_var_); }49 inline double alpha_err(void) const { return sqrt(alpha_var_); } 50 50 51 51 /// 52 /// @return the parameter \f$ \beta 52 /// @return the parameter \f$ \beta \f$ 53 53 /// 54 54 inline double beta(void) const { return beta_; } 55 55 56 56 /// 57 /// @return standard dev aition of parameter \f$ \beta \f$57 /// @return standard deviation of parameter \f$ \beta \f$ 58 58 /// 59 inline double beta_ var(void) const { return sqrt(beta_var_); }59 inline double beta_err(void) const { return sqrt(beta_var_); } 60 60 61 61 /// … … 63 63 /// coefficients \f$ (\alpha, \beta)\f$ of the model \f$ y = 64 64 /// \alpha + \beta (x-m_x) \f$ from vectors \a x and \a y, by 65 /// minimizing \f$ \sum{(y_i - \alpha - \beta (x-m_x))^2} \f$. 65 /// minimizing \f$ \sum{(y_i - \alpha - \beta (x-m_x))^2} \f$. By 66 /// construction \f$ \alpha \f$ and \f$ \beta \f$ are independent. 66 67 /// 67 68 void fit(const gslapi::vector& x, const gslapi::vector& y) ; … … 71 72 /// coefficients \f$ (\alpha, \beta)\f$ of the model \f$ y = 72 73 /// \alpha + \beta (x-m_x) \f$ from vectors \a x and \a y, by 73 /// minimizing \f$ \sum{w_i(y_i - \alpha - \beta (x-m_x))^2} \f$. 74 /// minimizing \f$ \sum{w_i(y_i - \alpha - \beta (x-m_x))^2} \f$, 75 /// where \f$ m_x \f$ is the weighted average. By construction \f$ 76 /// \alpha \f$ and \f$ \beta \f$ are independent. 74 77 /// 75 78 void fit(const gslapi::vector& x, const gslapi::vector& y, … … 82 85 /// 83 86 inline void predict(const double x, double& y, double& y_err, 84 const double w=1.0) const87 const double w=1.0) 85 88 { 86 89 y = alpha_ + beta_ * (x-m_x_); 87 90 y_err = sqrt( alpha_var_+beta_var_*(x-m_x_)*(x-m_x_)+s2_/w ); 88 91 } 92 93 /// 94 /// @return prediction value and parameters 95 /// 96 std::ostream& print(std::ostream&) const; 97 98 /// 99 /// @return header for print() 100 /// 101 std::ostream& print_header(std::ostream&) const; 89 102 90 103 /// -
trunk/src/RegressionLocal.cc
r221 r235 13 13 namespace statistics { 14 14 15 RegressionLocal::RegressionLocal(const gslapi::vector& x, 16 const gslapi::vector& y, 17 Regression& r, 18 RegressionKernel& k, 19 const size_t nof_predictions) 20 : kernel_(&k), regression_(&r) 15 RegressionLocal::RegressionLocal(Regression& r, 16 RegressionKernel& k) 17 : kernel_(&k), regressor_(&r) 21 18 { 22 estimated_x_ = gslapi::vector(nof_predictions);23 estimated_y_ = estimated_x_;24 estimated_y_err_ = estimated_x_;25 //sorting data with respect to x26 // Peter, could this be done without all the copying27 std::vector<std::pair<double, double> > data;28 for (size_t i=0; i<x.size(); i++){29 std::pair<double, double> tmp(x(i),y(i));30 data_.push_back(tmp);31 }32 sort(data_.begin(), data_.end());33 for (size_t i=0; i<nof_predictions; i++)34 estimated_x_(i) =35 data_[static_cast<size_t>((i+0.5)/nof_predictions*data_.size())].first;36 19 } 37 20 38 void RegressionLocal::fit(const double f )21 void RegressionLocal::fit(const double f, const u_int step_size) 39 22 { 23 sort(data_.begin(), data_.end()); 24 40 25 // number of points on each side 41 size_t nof_data_points = static_cast<size_t>(f/2*data_.size()); 42 size_t index_min = 0; 43 size_t index_max = 0; 44 for (size_t i=0; i<estimated_x_.size(); i++) { 26 size_t nof_points = static_cast<size_t>(f*data_.size()); 27 double width = 0; 28 int min_index = 0; 29 for (size_t i=0; i<data_.size(); i+=step_size) { 30 x_.push_back(data_[i].first); 31 32 // determining boundaries window/kernel 33 45 34 // extreme left case 46 if (data_[nof_data_points-1].first>estimated_x_(i)){ 47 index_min = 0; 48 index_max = 2*nof_data_points-1; 35 if (data_[i].first < 36 (data_[nof_points-1].first+data_[nof_points-1].first)/2 ){ 37 min_index=0; 38 width = data_[nof_points-1].first-data_[i].first; 49 39 } 50 51 40 // extreme right case 52 else if (data_[ data_.size()-estimated_x_.size()-1].first<53 estimated_x_(i)){54 index_min = data_.size()-2*nof_data_points;55 index_max = data_.size()-1;41 else if (data_[i].first > (data_[data_.size()-1].first+ 42 data_[data_.size()-nof_points].first)/2 ){ 43 min_index = data_.size() - nof_points; 44 width = data_[i].first - data_[data_.size()-nof_points].first; 56 45 } 57 58 // normal case - window in the middle59 46 else { 60 for (size_t j=nof_data_points; j<data_.size(); j++) 61 if (data_[j].first>estimated_x_(i)) { 62 index_min = j-nof_data_points; 63 index_max = j+nof_data_points-1; 64 break; 65 } 47 // stepping forward until x is in the middle 48 while(data_[min_index+nof_points-1].first-data_[i].first 49 < data_[i].first-data_[min_index].first) 50 min_index++; 51 width = data_[min_index+nof_points-1].first-data_[i].first; 52 // Peter, should check if x gets closer to centrum if we step 53 // back one step 66 54 } 67 55 56 68 57 // copying data 69 // Peter, t his is a stupid solution which could be70 // improved by using a moving view (in thevector class).71 gslapi::vector x( index_max-index_min+1);72 gslapi::vector y( index_max-index_min+1);73 gslapi::vector w( index_max-index_min+1);58 // Peter, too much copying. Move the copying outside loop and 59 // use a vector view (when available in gslapi::vector class). 60 gslapi::vector x(nof_points); 61 gslapi::vector y(nof_points); 62 gslapi::vector w(nof_points); 74 63 for (size_t j=0; j<x.size(); j++) 75 x(j)=data_[ index_min+j].first;64 x(j)=data_[min_index+j].first; 76 65 for (size_t j=0; j<y.size(); j++){ 77 y(j)=data_[index_min+j].second; 78 if (y(j)>10) 79 std::cout << x(j) << "\t" << y(j) << std::endl; 66 y(j)=data_[min_index+j].second; 80 67 } 81 68 // calculating weights 82 69 for (size_t j=0; j<y.size(); j++) 83 w(j) = kernel_->weight( 2*(x(j)-data_[index_min].first)/ 84 (data_[index_max].first- 85 data_[index_min].first)-1); 86 70 w(j) = kernel_->weight( (x(j)-(data_[min_index].first+ 71 data_[min_index+nof_points-1].first) /2) 72 /width ); 87 73 88 74 // fitting the regressor locally 89 std::cout << "x: " << estimated_x_(i) << std::endl; 90 std::cout << "x.begin: " << x(0) << std::endl; 91 std::cout << "x.end: " << x(x.size()-1) << std::endl; 92 regression_->fit(x,y,w); 93 std::cout << "estimating value\n"; 94 regression_->predict(estimated_x_(i), 95 estimated_y_(i), 96 estimated_y_err_(i)); 75 regressor_->fit(x,y,w); 76 regressor_->predict(x_[i], y_[i], y_err_[i]); 97 77 } 98 78 } 99 100 std::ostream& operator<< (std::ostream& s, const RegressionLocal& r)79 80 std::ostream& RegressionLocal::print(std::ostream& s) const 101 81 { 102 gslapi::vector x=r.estimated_x(); 103 gslapi::vector y(r.estimated_y()); 104 gslapi::vector y_err(r.estimated_y_err()); 105 for (size_t i=0; i<x.size(); i++){ 106 s << x(i) << "\t"; 107 s << y(i) << "\t"; 108 s << y_err(i) << "\n"; 82 for (size_t i=0; i<x_.size(); i++) { 83 regressor_->print(s); 84 s << std::endl; 109 85 } 110 86 return s; 111 87 } 112 88 89 113 90 }} // of namespace cpptools and namespace theplu -
trunk/src/RegressionLocal.h
r221 r235 32 32 33 33 /// 34 /// Constructor loading the object with data, type ofregressor,35 /// type of kernel and in how many points to predict.34 /// Constructor taking type of \a regressor, 35 /// type of \a kernel. 36 36 /// 37 RegressionLocal(const gslapi::vector& x, const gslapi::vector& y, 38 Regression& r, RegressionKernel& k, const size_t); 37 RegressionLocal(Regression& regressor, RegressionKernel& kernel); 39 38 40 39 /// … … 48 47 virtual ~RegressionLocal(void) {}; 49 48 49 50 /// 51 /// adding a data point 52 /// 53 inline void add(const double x, const double y) 54 { data_.push_back(std::make_pair(x,y)); } 55 50 56 /// 51 57 /// Function returning the points where to predict 52 58 /// 53 inline gslapi::vector estimated_x(void) const { return estimated_x_; }59 inline const std::vector<double>& x(void) const { return x_; } 54 60 55 61 /// 56 62 /// Function returning predicted values 57 63 /// 58 inline gslapi::vector estimated_y(void) const { return estimated_y_; }64 inline const std::vector<double>& y(void) const { return y_; } 59 65 60 66 /// 61 67 /// Function returning error of predictions 62 68 /// 63 inline gslapi::vector estimated_y_err(void) const {return estimated_y_err_;}69 inline const std::vector<double>& y_err(void) const { return y_err_; } 64 70 65 71 /// 66 /// Function performing the fit, using a \a fraction of the data 67 /// point and regression method defined in the constructor. The 68 /// algorithm uses equally many points around the point to 69 /// predict. If this is not possible (because the point is too far 70 /// left/right) the points to the extreme left/right is used. 72 /// Performs the fit in data defined by add using a kernel and a 73 /// regression method defined in the constructor. The algorithm 74 /// selects boundaries for the kernel such that \a fraction of the 75 /// data points are used and the point where the fit is done is in 76 /// the middle. Starting with the smallest x, the function jumps 77 /// \a step_size point in each iteration to do the next fit 71 78 /// 72 void fit(const double fraction); 73 79 void fit(const double fraction, const u_int step_size=1); 80 81 /// 82 /// @return prediction values and parameters 83 /// 84 std::ostream& print(std::ostream&) const; 85 86 /// 87 /// @return header for print() 88 /// 89 inline std::ostream& print_header(std::ostream& s) const 90 { return regressor_->print_header(s); } 91 74 92 75 93 private: … … 77 95 gslapi::vector data_y_; 78 96 RegressionKernel* kernel_; 79 Regression* regress ion_;80 gslapi::vector estimated_x_;81 gslapi::vector estimated_y_;82 gslapi::vector estimated_y_err_;97 Regression* regressor_; 98 std::vector<double> x_; 99 std::vector<double> y_; 100 std::vector<double> y_err_; 83 101 84 102 85 103 }; 86 87 ///88 /// The output operator for Local Regression Class.89 ///90 std::ostream& operator<< (std::ostream& s, const RegressionLocal&);91 104 92 105 }} // of namespace statistics and namespace theplu -
trunk/src/RegressionNaive.cc
r204 r235 1 1 // $Id$ 2 2 3 // Header 3 4 4 #include "RegressionNaive.h" 5 5 6 // C++_tools7 6 #include "Averager.h" 8 7 #include "Regression.h" … … 10 9 #include "WeightedAverager.h" 11 10 11 #include <iostream> 12 12 13 13 … … 17 17 18 18 RegressionNaive::RegressionNaive(void) 19 : Regression() 19 : Regression(), m_(0.0), m_err_(0.0) 20 20 { 21 21 } 22 22 23 void RegressionNaive::fit(const gslapi::vector& x, const gslapi::vector& y) 24 { 25 Averager a; 26 for (size_t i=0; i<y.size(); i++) 27 a.add(y(i)); 28 m_=a.mean(); 29 s2_=a.variance(); 30 m_err_=a.standard_error(); 31 } 23 32 33 void RegressionNaive::fit(const gslapi::vector& x, 34 const gslapi::vector& y, 35 const gslapi::vector& w) 36 { 37 WeightedAverager a; 38 for (size_t i=0; i<y.size(); i++) 39 a.add(y(i), w(i)); 40 m_=a.mean(); 41 m_err_=a.standard_error(); 42 s2_=m_err_*m_err_*w.sum(); 43 } 24 44 45 void RegressionNaive::predict(const double x, double& y, double& y_err, 46 const double w) 47 { 48 x_ = x; 49 y = m_; 50 y_err = sqrt(m_err_*m_err_ + s2_/w); 51 } 52 53 std::ostream& RegressionNaive::print(std::ostream& s) const 54 { 55 s << x_ << "\t" 56 << m_ << "\t" 57 << sqrt(m_err_*m_err_ + s2_); 58 return s; 59 } 60 61 std::ostream& RegressionNaive::print_header(std::ostream& s) const 62 { 63 s << "# column 1: x\n" 64 << "# column 2: y\n" 65 << "# column 3: y_err"; 66 return s; 67 } 68 25 69 }} // of namespace cpptools and namespace theplu -
trunk/src/RegressionNaive.h
r221 r235 13 13 //////////////////////// 14 14 //#include <gsl/gsl_fit.h> 15 #include <iostream> 15 16 #include <utility> 17 18 16 19 17 20 namespace theplu { … … 42 45 43 46 /// 44 /// This function computes the best-fit for the naive model \f$ 45 /// y = m \f$ from vectors \a x and \a y, by minimizing \f$ 46 /// \sum{(y_i-m)^2} \f$. 47 /// This function computes the best-fit for the naive model \f$ y 48 /// = m \f$ from vectors \a x and \a y, by minimizing \f$ 49 /// \sum{(y_i-m)^2} \f$. This function is the same as using the 50 /// weighted version with unity weights. 47 51 /// 48 inline void fit(const gslapi::vector& x, const gslapi::vector& y) 49 { 50 Averager a; 51 for (size_t i=0; i<y.size(); i++) 52 a.add(y(i)); 53 m_=a.mean(); 54 s2_=a.variance(); 55 m_err_=a.standard_error(); 56 } 52 void fit(const gslapi::vector& x, const gslapi::vector& y); 57 53 58 54 /// … … 62 58 /// the inverse of the variance for \f$ y_i \f$ 63 59 /// 64 inline void fit(const gslapi::vector& x, 65 const gslapi::vector& y, 66 const gslapi::vector& w) 67 { 68 WeightedAverager a; 69 for (size_t i=0; i<y.size(); i++) 70 a.add(y(i), w(i)); 71 m_=a.mean(); 72 m_err_=a.standard_error(); 73 s2_=m_err_*m_err_*w.sum(); 74 } 75 76 /// 77 /// @return the parameter m 78 /// 79 inline double m(void) const { return m_; } 80 81 /// 82 /// @return standard deviation of parameter m 83 /// 84 inline double alpha_var(void) const { return m_err_; } 60 void fit(const gslapi::vector& x, 61 const gslapi::vector& y, 62 const gslapi::vector& w); 85 63 86 64 /// 87 65 /// Function predicting value using the naive model. \a y_err is 88 /// the expected deviation from the line for a new data point. If 89 /// weights are used a weight can be specified for the new point. 66 /// the expected deviation from the line for a new data point. The 67 /// weight for the new point can be specified. A smaller weight 68 /// means larger error. The error has two components: the variance 69 /// of point and error in estimation of m_. 90 70 /// 91 inline void predict(const double x, double& y, double& y_err, 92 const double w=1.0) const 93 { 94 y = m_; 95 y_err = sqrt(m_err_*m_err_ + s2_/w); 96 } 71 void predict(const double x, double& y, double& y_err, 72 const double w) ; 97 73 74 /// 75 /// @return prediction value and parameters 76 /// 77 std::ostream& print(std::ostream&) const; 78 79 /// 80 /// @return header for print() 81 /// 82 std::ostream& print_header(std::ostream&) const; 83 98 84 99 85 private: 100 double s2_; // noise level ( var = s2/w in weighted case) 86 double s2_; // noise level - the typical variance for a point with 87 // weight w is s2/w 101 88 double m_; 102 double m_err_; 89 double m_err_; // error of estimation of mean m_ 90 103 91 }; 104 92
Note: See TracChangeset
for help on using the changeset viewer.