Changeset 221


Ignore:
Timestamp:
Dec 30, 2004, 11:36:25 PM (18 years ago)
Author:
Peter
Message:

interface to regression modified

Location:
trunk/src
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/Regression.cc

    r213 r221  
    88 
    99  Regression::Regression()
    10     : sumsq_(0)
    1110  {
    1211  }
  • trunk/src/Regression.h

    r216 r221  
    3434    virtual ~Regression(void) {};
    3535         
    36     virtual void estimate(const double x, double& y, double& y_err) const=0;
    37 
    3836    ///
    3937    /// This function computes the best-fit given a model (see
     
    4139    /// \sum{(\hat{y_i}-y_i)^2} \f$, where \f$ \hat{y} \f$ is the fitted value.
    4240    ///
    43     virtual int fit(const gslapi::vector& x, const gslapi::vector& y)=0;
     41    virtual void fit(const gslapi::vector& x, const gslapi::vector& y)=0;
    4442   
    4543    ///
     
    5048    /// to the inverse of the variance for \f$ y_i \f$
    5149    ///
    52     virtual int fit_weighted(const gslapi::vector& x, const gslapi::vector& y,
    53                              const gslapi::vector& w)=0;
    54 
    55     inline double sumsq() const { return sumsq_; }
    56          
     50    virtual void fit(const gslapi::vector& x, const gslapi::vector& y,
     51                    const gslapi::vector& w)=0;
     52    ///
     53    ///
     54    ///
     55    virtual void predict(const double x, double& y, double& y_err,
     56                         const double w=1) const=0;
     57             
    5758  protected:
    58     double sumsq_;
    59 
     59   
    6060  };
    6161
  • trunk/src/RegressionKernelBox.h

    r216 r221  
    2424
    2525    ///
    26     /// Function calculating the weight
     26    /// Function calculating the weight as \f$ w(x)=1\f$ if \f$|x|\le 1
     27    /// \f$, \f$ w(x)=0 \f$ otherwise.
    2728    ///
    2829    double weight(const double) const;
  • trunk/src/RegressionLinear.cc

    r216 r221  
    55
    66// C++_tools
     7#include "AveragerPair.h"
    78#include "Regression.h"
    89#include "vector.h"
     
    1617 
    1718  RegressionLinear::RegressionLinear(void)
    18     : Regression(), cov00_(0), cov01_(0), cov11_(0), k_(0), m_(0)
     19    : Regression(), alpha_(0), alpha_var_(0), beta_(0), beta_var_(0), m_x_(0),
     20      s2_(0)
    1921  {
    2022  }
    2123
     24  void RegressionLinear::fit(const gslapi::vector& x, const gslapi::vector& y)
     25  {
     26    statistics::AveragerPair ap;
     27    for (size_t i=0; i<x.size(); i++)
     28      ap.add(x(i),y(i));
     29
     30    alpha_ = ap.y_averager().mean();
     31    beta_ = ap.covariance() / ap.x_averager().variance();
     32
     33    // estimating the noise level, i.e. the conditional variance of y
     34    // given x, Var(y|x).
     35    double Q = (ap.y_averager().sum_xsqr_centered() - ap.sum_xy_centered() *
     36                ap.sum_xy_centered()/ap.x_averager().sum_xsqr_centered() ) ;
     37    s2_ = Q/(x.size()-2);
     38    r2_= 1;
     39    alpha_var_ = s2_ / x.size();
     40    beta_var_ = s2_ / ap.x_averager().sum_xsqr_centered();
     41    m_x_ = ap.x_averager().mean();
     42  }
     43
     44  void RegressionLinear::fit(const gslapi::vector& x, const gslapi::vector& y,
     45                             const gslapi::vector& w)
     46  {
     47    double m_x = w*x /w.sum();
     48    double m_y = w*y /w.sum();
     49   
     50    double sxy = 0;
     51    for (size_t i=0; i<x.size(); i++)
     52      sxy += w(i)*(x(i)-m_x)*(y(i)-m_y);
     53
     54    double sxx = 0;
     55    for (size_t i=0; i<x.size(); i++)
     56      sxx += w(i)*(x(i)-m_x)*(x(i)-m_x);
     57
     58    double syy = 0;
     59    for (size_t i=0; i<y.size(); i++)
     60      syy += w(i)*(y(i)-m_y)*(y(i)-m_y);
     61
     62    // estimating the noise level. see attached document for motivation
     63    // of the expression.
     64    s2_= (syy-sxy*sxy/sxx)/(w.sum()-2*(w*w)/w.sum()) ;
     65   
     66    alpha_ = m_y;
     67    beta_ = sxy/sxx;
     68    alpha_var_ = s2_/w.sum();
     69    beta_var_ = s2_/sxx; 
     70    m_x_=m_x;
     71  }
     72
     73   
    2274}} // of namespace statistics and namespace theplu
  • trunk/src/RegressionLinear.h

    r216 r221  
    1111// Standard C++ includes
    1212////////////////////////
    13 #include <gsl/gsl_fit.h>
     13#include <cmath>
    1414
    1515namespace theplu {
     
    1919  /// Class for Regression.   
    2020  ///
    21 
    2221 
    2322  class RegressionLinear : public Regression
     
    4039    virtual ~RegressionLinear(void) {};
    4140         
    42     inline void estimate(const double x, double& y, double& y_err) const
    43     {
    44       gsl_fit_linear_est(x, m_, k_, cov00_, cov01_, cov11_, &y, &y_err);
     41    ///
     42    /// @return the parameter \f$ \alpha \f$
     43    ///
     44    inline double alpha(void) const { return alpha_; }
     45
     46    ///
     47    /// @return standard deviation of parameter \f$ \alpha \f$
     48    ///
     49    inline double alpha_var(void) const { return sqrt(alpha_var_); }
     50
     51    ///
     52    /// @return the parameter \f$ \beta
     53    ///
     54    inline double beta(void) const { return beta_; }
     55
     56    ///
     57    /// @return standard devaition of parameter \f$ \beta \f$
     58    ///
     59    inline double beta_var(void) const { return sqrt(beta_var_); }
     60   
     61    ///
     62    /// This function computes the best-fit linear regression
     63    /// coefficients \f$ (\alpha, \beta)\f$ of the model \f$ y =
     64    /// \alpha + \beta (x-m_x) \f$ from vectors \a x and \a y, by
     65    /// minimizing \f$ \sum{(y_i - \alpha - \beta (x-m_x))^2} \f$.
     66    ///
     67    void fit(const gslapi::vector& x, const gslapi::vector& y) ;
     68   
     69    ///
     70    /// This function computes the best-fit linear regression
     71    /// coefficients \f$ (\alpha, \beta)\f$ of the model \f$ y =
     72    /// \alpha + \beta (x-m_x) \f$ from vectors \a x and \a y, by
     73    /// minimizing \f$ \sum{w_i(y_i - \alpha - \beta (x-m_x))^2} \f$.
     74    ///
     75    void fit(const gslapi::vector& x, const gslapi::vector& y,
     76             const gslapi::vector& w);
     77   
     78    ///
     79    /// Function predicting value using the linear model. \a y_err is
     80    /// the expected deviation from the line for a new data point. If
     81    /// weights are used a weight can be specified for the new point.
     82    ///
     83    inline void predict(const double x, double& y, double& y_err,
     84                        const double w=1.0) const
     85    {
     86      y = alpha_ + beta_ * (x-m_x_);
     87      y_err = sqrt( alpha_var_+beta_var_*(x-m_x_)*(x-m_x_)+s2_/w );
    4588    }
    4689
    4790    ///
    48     /// This function computes the best-fit linear regression
    49     /// coefficients (k,m) of the model  \f$ y = kx + m \f$ from
    50     /// vectors \a x and \a y, by minimizing \f$ \sum{(kx_i+m-y_i)^2}
    51     /// \f$.
     91    /// Function returning the coefficient of determination,
     92    /// i.e. share of variance explained by the linear model.
    5293    ///
    53     inline int fit(const gslapi::vector& x, const gslapi::vector& y)
    54     {
    55       return gsl_fit_linear(x.gsl_vector_pointer()->data,
    56                             x.gsl_vector_pointer()->stride,
    57                             y.gsl_vector_pointer()->data,
    58                             y.gsl_vector_pointer()->stride,
    59                             x.gsl_vector_pointer()->size,
    60                             &m_, &k_, &cov00_, &cov01_, &cov11_, &sumsq_); }
     94    inline double r2(void) const { return r2_; }
    6195
    62     inline double k(void) const { return k_; }
    63     inline double m(void) const { return m_; }
    64 
    65 
    66     ///
    67     /// This function computes the best-fit linear regression
    68     /// coefficients (k,m) of the model \f$ y = kx + m \f$ from
    69     /// vectors \a x and \a y, by minimizing \f$ \sum w_i(kx_i+m-y_i)^2
    70     /// \f$. The weight \f$ w_i \f$ is the inverse of the variance for
    71     /// \f$ y_i \f$
    72     ///
    73     inline int fit_weighted(const gslapi::vector& x, const gslapi::vector& y,
    74                             const gslapi::vector& w)
    75     {
    76       return gsl_fit_wlinear(x.gsl_vector_pointer()->data,
    77                              x.gsl_vector_pointer()->stride,
    78                              w.gsl_vector_pointer()->data,
    79                              w.gsl_vector_pointer()->stride,
    80                              y.gsl_vector_pointer()->data,
    81                              y.gsl_vector_pointer()->stride,
    82                              x.gsl_vector_pointer()->size,
    83                              &m_, &k_,  &cov00_, &cov01_, &cov11_, &sumsq_); }
    84 
    85          
    8696  private:
    87     double cov00_;
    88     double cov01_;
    89     double cov11_;   
    90     double k_;
    91     double m_;
    92 
     97    double alpha_;
     98    double alpha_var_;
     99    double beta_;
     100    double beta_var_;
     101    double m_x_; // average of x values
     102    double s2_; // var(y|x)
     103    double r2_; // coefficient of determination
    93104  };
    94105
  • trunk/src/RegressionLocal.cc

    r216 r221  
    1717                                   Regression& r,
    1818                                   RegressionKernel& k,
    19                                    const gslapi::vector& estimated_x)
    20     : kernel_(&k), regression_(&r), estimated_x_(estimated_x),
    21       estimated_y_(estimated_x), estimated_y_err_(estimated_x)
     19                                   const size_t nof_predictions)
     20    : kernel_(&k), regression_(&r)
    2221  {
     22    estimated_x_ = gslapi::vector(nof_predictions);
     23    estimated_y_ = estimated_x_;
     24    estimated_y_err_ = estimated_x_;
    2325    //sorting data with respect to x
    2426    // Peter, could this be done without all the copying
     
    2931    }
    3032    sort(data_.begin(), data_.end());
     33    for (size_t i=0; i<nof_predictions; i++)
     34      estimated_x_(i) =
     35        data_[static_cast<size_t>((i+0.5)/nof_predictions*data_.size())].first;
    3136  }
    3237
     
    6368      // copying data
    6469      // Peter, this is a stupid solution which could be
    65       // improved when the vector class is updated.
     70      // improved by using a moving view (in the vector class).
    6671      gslapi::vector x(index_max-index_min+1);
    6772      gslapi::vector y(index_max-index_min+1);
     
    6974      for (size_t j=0; j<x.size(); j++)
    7075        x(j)=data_[index_min+j].first;
    71       for (size_t j=0; j<y.size(); j++)
     76      for (size_t j=0; j<y.size(); j++){
    7277        y(j)=data_[index_min+j].second;
     78        if (y(j)>10)
     79          std::cout << x(j) << "\t" << y(j) << std::endl;
     80      }
    7381      // calculating weights
    7482      for (size_t j=0; j<y.size(); j++)
     
    7987     
    8088      // fitting the regressor locally
    81       regression_->fit_weighted(x,y,w);
    82       regression_->estimate(estimated_x_(i),
     89      std::cout << "x: " << estimated_x_(i) << std::endl;
     90      std::cout << "x.begin: " << x(0) << std::endl;
     91      std::cout << "x.end: " << x(x.size()-1) << std::endl;
     92      regression_->fit(x,y,w);
     93      std::cout << "estimating value\n";
     94      regression_->predict(estimated_x_(i),
    8395                            estimated_y_(i),
    8496                            estimated_y_err_(i));
    8597    }
    8698  }
    87 
    88 
     99 
    89100  std::ostream& operator<< (std::ostream& s, const RegressionLocal& r)
    90101  {
  • trunk/src/RegressionLocal.h

    r216 r221  
    3232
    3333    ///
    34     /// Constructor loading the object with data, type of regressor
    35     /// and type kernel.
     34    /// Constructor loading the object with data, type of regressor,
     35    /// type of kernel and in how many points to predict.
    3636    ///
    3737    RegressionLocal(const gslapi::vector& x, const gslapi::vector& y,
    38                     Regression& r, RegressionKernel& k,
    39                     const gslapi::vector&);
     38                    Regression& r, RegressionKernel& k, const size_t);
    4039
    4140    ///
     
    4948    virtual ~RegressionLocal(void) {};
    5049
     50    ///
     51    /// Function returning the points where to predict
     52    ///
    5153    inline gslapi::vector estimated_x(void) const { return estimated_x_; }
     54
     55    ///
     56    /// Function returning predicted values
     57    ///
    5258    inline gslapi::vector estimated_y(void) const { return estimated_y_; }
     59
     60    ///
     61    /// Function returning error of predictions
     62    ///
    5363    inline gslapi::vector estimated_y_err(void) const {return estimated_y_err_;}
    5464 
    5565    ///
    56     /// Function
     66    /// Function performing the fit, using a \a fraction of the data
     67    /// point and regression method defined in the constructor. The
     68    /// algorithm uses equally many points around the point to
     69    /// predict. If this is not possible (because the point is too far
     70    /// left/right) the points to the extreme left/right is used.
    5771    ///
    5872    void fit(const double fraction);
     73   
    5974         
    6075  private:
  • trunk/src/RegressionNaive.h

    r216 r221  
    11// $Id$
    22
    3 #ifndef _theplu_statistics_regression_linear_
    4 #define _theplu_statistics_regression_linear_
     3#ifndef _theplu_statistics_regression_naive_
     4#define _theplu_statistics_regression_naive_
    55
    66// C++ tools include
     
    1212// Standard C++ includes
    1313////////////////////////
    14 #include <gsl/gsl_fit.h>
     14//#include <gsl/gsl_fit.h>
    1515#include <utility>
    1616
     
    2121  /// Class for Regression.   
    2222  ///
    23 
    2423 
    2524  class RegressionNaive : public Regression
     
    4140    ///
    4241    virtual ~RegressionNaive(void) {};
    43 
    44     inline void estimate(const double x, double& y, double& y_err)
    45     { y=m_; y_err=sqrt(var_); }
    4642         
    4743    ///
     
    5046    /// \sum{(y_i-m)^2} \f$.
    5147    ///
    52     inline int fit(const gslapi::vector& x, const gslapi::vector& y)
     48    inline void fit(const gslapi::vector& x, const gslapi::vector& y)
    5349    {
    5450      Averager a;
     
    5652        a.add(y(i));
    5753      m_=a.mean();
    58       var_=a.standard_error()*a.standard_error();
    59       return 0;
     54      s2_=a.variance();
     55      m_err_=a.standard_error();
    6056    }
    6157
     
    6359    /// This function computes the best-fit for the naive model \f$ y
    6460    /// = m \f$ from vectors \a x and \a y, by minimizing \f$ \sum
    65     /// w_i(y_i-m)^2 \f$. The weight \f$ w_i \f$ is the inverse of the
    66     /// variance for \f$ y_i \f$
     61    /// w_i(y_i-m)^2 \f$. The weight \f$ w_i \f$ is proportional to
     62    /// the inverse of the variance for \f$ y_i \f$
    6763    ///
    68     inline int fit_weighted(const gslapi::vector& x,
    69                             const gslapi::vector& y,
    70                             const gslapi::vector& w)
     64    inline void fit(const gslapi::vector& x,
     65                    const gslapi::vector& y,
     66                    const gslapi::vector& w)
    7167    {
    7268      WeightedAverager a;
     
    7470        a.add(y(i), w(i));
    7571      m_=a.mean();
    76       var_=a.standard_error()*a.standard_error();
    77       return 0;
     72      m_err_=a.standard_error();
     73      s2_=m_err_*m_err_*w.sum();
     74    }
     75
     76    ///
     77    /// @return the parameter m
     78    ///
     79    inline double m(void) const { return m_; }
     80
     81    ///
     82    /// @return standard deviation of parameter m
     83    ///
     84    inline double alpha_var(void) const { return m_err_; }
     85
     86    ///
     87    /// Function predicting value using the naive model. \a y_err is
     88    /// the expected deviation from the line for a new data point. If
     89    /// weights are used a weight can be specified for the new point.
     90    ///
     91    inline void predict(const double x, double& y, double& y_err,
     92                        const double w=1.0) const
     93    {
     94      y = m_;
     95      y_err = sqrt(m_err_*m_err_ + s2_/w);
    7896    }
    7997
    8098         
    8199  private:
    82     double var_;
     100    double s2_; // noise level ( var = s2/w in weighted case)
    83101    double m_;
    84 
     102    double m_err_;
    85103  };
    86104
Note: See TracChangeset for help on using the changeset viewer.