Changeset 586


Ignore:
Timestamp:
Jun 19, 2006, 11:56:04 AM (15 years ago)
Author:
Peter
Message:

closes #23 redesign of regression classes

Location:
trunk
Files:
4 added
19 edited

Legend:

Unmodified
Added
Removed
  • trunk/c++_tools/statistics/Linear.cc

    r483 r586  
    2323    beta_ = ap_.covariance() / ap_.x_averager().variance();
    2424
    25     // estimating the noise level, i.e. the conditional variance of y
    26     // given x, Var(y|x).
    27     double Q = (ap_.y_averager().sum_xx_centered() - ap_.sum_xy_centered() *
    28                 ap_.sum_xy_centered()/ap_.x_averager().sum_xx_centered() ) ;
    29     s2_ = Q/(x.size()-2);
    30     r2_= 1;
    31     alpha_var_ = s2_ / x.size();
    32     beta_var_ = s2_ / ap_.x_averager().sum_xx_centered();
     25    // calculating deviation between data and model
     26    msd_ = (ap_.y_averager().sum_xx_centered() - ap_.sum_xy_centered() *
     27            ap_.sum_xy_centered()/ap_.x_averager().sum_xx_centered() )/x.size();
     28    r2_= 1-msd_/ap_.x_averager().variance();
     29    alpha_var_ = msd_ / x.size();
     30    beta_var_ = msd_ / ap_.x_averager().sum_xx_centered();
    3331    m_x_ = ap_.x_averager().mean();
    3432  }
    3533
    36   void Linear::predict(const double x, double& y, double& y_err) const
     34  double Linear::predict(const double x) const
    3735  {
    38     y = alpha_ + beta_ * (x-m_x_);
    39     y_err = sqrt( alpha_var_+beta_var_*(x-m_x_)*(x-m_x_)+s2_ );
     36    return alpha_ + beta_ * (x-m_x_);
    4037  }
    4138
    42   std::ostream& Linear::print_header(std::ostream& s) const
     39  double Linear::prediction_error(const double x) const
    4340  {
    44     s << "# column 1: x\n"
    45       << "# column 2: y\n"
    46       << "# column 3: y_err\n"
    47       << "# column 4: alpha\n"
    48       << "# column 5: alpha_err\n"
    49       << "# column 6: beta\n"
    50       << "# column 7: beta_err\n"
    51       << "# column 8: s2 (var(y|x))\n"
    52       << "# column 9: r2 (coefficient of determination)";
    53     return s;
     41    return sqrt( alpha_var_+beta_var_*(x-m_x_)*(x-m_x_)+msd_ );
    5442  }
    5543
     44  double Linear::standard_error(const double x) const
     45  {
     46    return sqrt( alpha_var_+beta_var_*(x-m_x_)*(x-m_x_));
     47  }
    5648
    5749}}} // of namespaces regression, statisitcs and thep
  • trunk/c++_tools/statistics/Linear.h

    r430 r586  
    2727    inline Linear(void)
    2828      : OneDimensional(), alpha_(0), alpha_var_(0), beta_(0), beta_var_(0),
    29         m_x_(0), s2_(0) {}
     29        m_x_(0){}
    3030
    3131    ///
     
    6464   
    6565    ///
    66     /// Function predicting value using the linear model. \a y_err is
    67     /// the expected deviation from the line for a new data point.
     66    /// @return value in @a x of model
    6867    ///
    69     void predict(const double x, double& y, double& y_err) const;
     68    double predict(const double x) const;
    7069
    7170    ///
    72     /// @return prediction value and parameters
     71    /// @return expected prediction error for a new data point in @a x
    7372    ///
    74     std::ostream& print(std::ostream&) const;
    75              
     73    double prediction_error(const double x) const;
     74
    7675    ///
    77     /// @return header for print()
     76    /// @return error of model value in @a x
    7877    ///
    79     std::ostream& print_header(std::ostream&) const;
     78    double standard_error(const double x) const;
    8079
    8180    ///
    8281    /// Function returning the coefficient of determination,
    8382    /// i.e. fraction of variance explained by the linear model.
    84     /// @todo implement r2's calculation in fit function
    8583    ///
    8684    inline double r2(void) const { return r2_; }
     
    9795    double beta_var_;
    9896    double m_x_; // average of x values
    99     double s2_; // var(y|x)
    10097    double r2_; // coefficient of determination
    10198  };
  • trunk/c++_tools/statistics/LinearWeighted.cc

    r582 r586  
    4545  }
    4646
    47   std::ostream& LinearWeighted::print(std::ostream& s) const
    48   {
    49     s << x_ << "\t"
    50       << y_ << "\t"
    51       << y_err_ << "\t"
    52       << alpha_ << "\t"
    53       << alpha_err() << "\t"
    54       << beta_ << "\t"
    55       << beta_err() << "\t"
    56       << s2_ << "\t"
    57       << r2_;
    58    
    59     return s;
    60   }
    61    
    62   std::ostream& LinearWeighted::print_header(std::ostream& s) const
    63   {
    64     s << "# column 1: x\n"
    65       << "# column 2: y\n"
    66       << "# column 3: y_err\n"
    67       << "# column 4: alpha\n"
    68       << "# column 5: alpha_err\n"
    69       << "# column 6: beta\n"
    70       << "# column 7: beta_err\n"
    71       << "# column 8: s2 (var(y|x))\n"
    72       << "# column 9: r2 (coefficient of determination)";
    73     return s;
    74   }
    75 
    7647
    7748}}} // of namespaces regression, statisitcs and thep
  • trunk/c++_tools/statistics/LinearWeighted.h

    r495 r586  
    2828      : OneDimensionalWeighted(), alpha_(0), alpha_var_(0), beta_(0),
    2929        beta_var_(0),
    30         m_x_(0), s2_(0) {}
     30        m_x_(0){}
    3131
    3232    ///
     
    6767   
    6868    ///
    69     /// Function predicting value using the linear model. \a y_err is
    70     /// the estimated error of y(x)
    71     ///
    72     inline void predict(const double x, double& y, double& y_err,
    73                         const double w=1.0)
    74     {
    75       y = alpha_ + beta_ * (x-m_x_);
    76       y_err_ = y_err = sqrt( alpha_var_+beta_var_*(x-m_x_)*(x-m_x_));
    77       x_=x;
    78       y_=y;
    79     }
     69    /// Function predicting value using the linear model: \f$ y =
     70    /// \alpha + \beta (x - m)
     71    double predict(const double x) const { return alpha_ + beta_ * (x-m_x_); }
    8072
    8173    ///
    82     /// @return prediction value and parameters
     74    /// estimated deviation from predicted value for a new data point
     75    /// in @a x with weight @a w
    8376    ///
    84     std::ostream& print(std::ostream&) const;
    85              
     77    inline double prediction_error(const double x, const double w=1) const
     78    { return sqrt(alpha_var_ + beta_var_*(x-m_x_)*(x-m_x_)+s2_/w); }
     79
    8680    ///
    87     /// @return header for print()
     81    /// estimated error @a y_err \f$ y_err = \sqrt{ Var(\alpha) +
     82    /// Var(\beta)*(x-m)^2 }.
    8883    ///
    89     std::ostream& print_header(std::ostream&) const;
     84    inline double standard_error(const double x) const
     85    { return sqrt(alpha_var_ + beta_var_*(x-m_x_)*(x-m_x_) ); }
    9086
    9187    ///
     
    106102    double beta_var_;
    107103    double m_x_; // average of x values
    108     double s2_; // var(y|x)
    109104    double r2_; // coefficient of determination
    110105  };
  • trunk/c++_tools/statistics/Local.cc

    r501 r586  
    8181      assert(i<y_predicted_.size());
    8282      assert(i<y_err_.size());
    83       regressor_->predict(x(i*step_size), y_predicted_(i),y_err_(i));
     83      y_predicted_(i) = regressor_->predict(x(i*step_size));
     84      y_err_(i) = regressor_->standard_error(x(i*step_size));
    8485    }
    8586  }
  • trunk/c++_tools/statistics/Makefile.am

    r582 r586  
    3232  Histogram.cc Kernel.cc KernelBox.cc KernelTriCube.cc Linear.cc \
    3333  LinearWeighted.cc Local.cc \
    34   MultiDimensional.cc Naive.cc NaiveWeighted.cc OneDimensional.cc\
     34  MultiDimensional.cc \
     35  MultiDimensionalWeighted.cc \
     36  Naive.cc NaiveWeighted.cc OneDimensional.cc\
    3537  Pearson.cc PearsonDistance.cc \
    36   Polynomial.cc ROC.cc Score.cc tScore.cc SNR.cc utility.cc \
     38  Polynomial.cc \
     39  PolynomialWeighted.cc \
     40  ROC.cc Score.cc tScore.cc SNR.cc utility.cc \
    3741  WilcoxonFoldChange.cc
    3842
     
    4549  Fisher.h FoldChange.h \
    4650  Histogram.h Kernel.h KernelBox.h KernelTriCube.h Linear.h LinearWeighted.h \
    47   Local.h MultiDimensional.h Naive.h NaiveWeighted.h OneDimensional.h \
     51  Local.h MultiDimensional.h \
     52  MultiDimensionalWeighted.h \
     53  Naive.h NaiveWeighted.h OneDimensional.h \
    4854  OneDimensionalWeighted.h Pearson.h  PearsonDistance.h \
    49   Polynomial.h ROC.h Score.h SNR.h tScore.h \
     55  Polynomial.h \
     56  PolynomialWeighted.h \
     57  ROC.h Score.h SNR.h tScore.h \
    5058  utility.h WilcoxonFoldChange.h
  • trunk/c++_tools/statistics/MultiDimensional.cc

    r420 r586  
    2323
    2424
     25  double MultiDimensional::prediction_error(const gslapi::vector& x) const
     26  {
     27    double s2 = 0;
     28    for (size_t i=0; i<x.size(); ++i){
     29      s2 += covariance_(i,i)*x(i)*x(i);
     30      for (size_t j=i+1; j<x.size(); ++j)
     31        s2 += 2*covariance_(i,j)*x(i)*x(j);
     32    }
     33    return sqrt(s2+chisquare_);
     34  }
     35
     36
     37  double MultiDimensional::standard_error(const gslapi::vector& x) const
     38  {
     39    double s2 = 0;
     40    for (size_t i=0; i<x.size(); ++i){
     41      s2 += covariance_(i,i)*x(i)*x(i);
     42      for (size_t j=i+1; j<x.size(); ++j)
     43        s2 += 2*covariance_(i,j)*x(i)*x(j);
     44    }
     45    return sqrt(s2);
     46  }
     47
    2548}}} // of namespaces regression, statisitcs and thep
  • trunk/c++_tools/statistics/MultiDimensional.h

    r420 r586  
    4141    gslapi::vector fit_parameters(void) { return fit_parameters_; }
    4242
     43    ///
     44    /// @return value in @a x according to fitted model
     45    ///
     46    inline double predict(const gslapi::vector& x) const
     47    { return fit_parameters_ * x; }
     48
     49    ///
     50    /// @return expected prediction error for a new data point in @a x
     51    ///
     52    double prediction_error(const gslapi::vector& x) const;
     53
     54    ///
     55    /// @return error of model value in @a x
     56    ///
     57    double standard_error(const gslapi::vector& x) const;
     58
    4359  private:
    4460    double chisquare_;
  • trunk/c++_tools/statistics/Naive.cc

    r430 r586  
    2323  }
    2424
    25   void Naive::predict(const double x, double& y, double& y_err) const
     25  double Naive::predict(const double x) const
    2626  {
    27     y = ap_.x_averager().mean();
    28     y_err = ap_.x_averager().std()*(1.0+1.0/ap_.n());
     27    return ap_.y_averager().mean();
    2928  }
    3029 
    31   std::ostream& Naive::print_header(std::ostream& s) const
     30  double Naive::prediction_error(const double x) const
    3231  {
    33     s << "# column 1: x\n"
    34       << "# column 2: y\n"
    35       << "# column 3: y_err";
    36     return s;
     32    return sqrt(msd_*(1.0+1.0/ap_.n()));
     33  }
     34
     35  double Naive::standard_error(const double x) const
     36  {
     37    return sqrt(msd_/ap_.n());
    3738  }
    3839
  • trunk/c++_tools/statistics/Naive.h

    r430 r586  
    4949
    5050    ///
    51     /// Function predicting value using the naive model. \a y_err is
    52     /// the expected deviation from the line for a new data point. The
     51    /// Function predicting value using the naive model.
     52    ///
     53    double predict(const double x) const;
     54 
     55    ///
     56    /// The expected deviation from the line for a new data point. The
    5357    /// error has two components: the variance of point and error in
    54     /// estimation of the mean.
     58    /// estimation of the mean. 
    5559    ///
    56     void predict(const double x, double& y, double& y_err) const;
     60    double prediction_error(const double x) const;
    5761
    5862    ///
    59     /// @return header for print()
     63    /// @return standard error
    6064    ///
    61     std::ostream& print_header(std::ostream&) const;
    62              
    63          
     65    double standard_error(const double x) const;
     66
    6467  private:
    65     double s2_; // noise level - the typical variance for a point with
    66                 // weight w is s2/w
    6768    double m_;
    6869    double m_err_; // error of estimation of mean m_
  • trunk/c++_tools/statistics/NaiveWeighted.cc

    r495 r586  
    2727  }
    2828
    29   void NaiveWeighted::predict(const double x, double& y, double& y_err,
    30                               const double w)
    31   {
    32     x_ = x;
    33     y = m_;
    34     y_err = m_err_;
    35   }
    36  
    37   std::ostream& NaiveWeighted::print(std::ostream& s) const
    38   {
    39     s << x_ << "\t"
    40       << m_ << "\t"
    41       << sqrt(m_err_*m_err_ + s2_);
    42     return s;
    43   }
    44  
    45   std::ostream& NaiveWeighted::print_header(std::ostream& s) const
    46   {
    47     s << "# column 1: x\n"
    48       << "# column 2: y\n"
    49       << "# column 3: y_err";
    50     return s;
    51   }
    52 
    5329
    5430}}} // of namespaces regression, statisitcs and thep
  • trunk/c++_tools/statistics/NaiveWeighted.h

    r495 r586  
    99//#include <c++_tools/statistics/AveragerPairWeighted.h>
    1010
     11#include <cmath>
    1112#include <iostream>
    1213#include <utility>
     
    4950    ///
    5051    /// Function predicting value using the naive model, i.e. a
    51     /// weighted average. \a y_err is the standard error of the
    52     /// weighted mean
     52    /// weighted average.
    5353    ///
    54     /// @see AveragerWeighted::standard_error
    55     ///
    56     void predict(const double x, double& y, double& y_err,
    57                  const double w=1) ;
     54    inline double predict(const double x) const { return m_; }
    5855
    5956    ///
    60     /// @return prediction value and parameters
     57    /// @return expected prediction error for a new data point in @a x
     58    /// with weight @a w
    6159    ///
    62     std::ostream& print(std::ostream&) const;
    63              
     60    inline double prediction_error(const double x, const double w=1) const
     61    { return sqrt(m_err_*m_err_ + s2_/w); }
     62
    6463    ///
    65     /// @return header for print()
     64    /// @return estimation of error of model value in @a x
    6665    ///
    67     std::ostream& print_header(std::ostream&) const;
    68              
    69          
     66    inline double standard_error(const double x) const
     67    { return m_err_; }
     68
    7069  private:
    7170    ///
     
    7473    NaiveWeighted(const NaiveWeighted&);
    7574
    76     double s2_; // noise level - the typical variance for a point with
    77                 // weight w is s2/w
    7875    double m_;
    7976    double m_err_; // error of estimation of mean m_
  • trunk/c++_tools/statistics/OneDimensional.cc

    r429 r586  
    77namespace regression {
    88
    9   std::ostream& OneDimensional::print(std::ostream& os,
    10                                     const double min,
    11                                     double max,
    12                                     const u_int n) const
     9  std::ostream& OneDimensional::print(std::ostream& os, const double min,
     10                                      double max, const u_int n) const
    1311  {
    1412    double dx;
     
    2018    }
    2119
    22     double y,y_err;
    2320    for ( double x=min; x<=max; x+=dx) {
    24       predict(x,y,y_err);
     21      double y = predict(x);
     22      double y_err = prediction_error(x);
    2523      os << x << "\t" << y << "\t" << y_err << "\n";
    2624    }
  • trunk/c++_tools/statistics/OneDimensional.h

    r429 r586  
    4545    /// function predicting in one point
    4646    ///
    47     virtual void predict(const double x, double& y, double& y_err) const=0;
     47    virtual double predict(const double x) const=0;
    4848
    4949    ///
    50     ///
     50    /// @return expected prediction error for a new data point in @a x
    5151    ///
    52     /// @return stream of prediction values and parameters
     52    virtual double prediction_error(const double x) const=0;
     53
     54    std::ostream& print(std::ostream& os,const double min,
     55                        double max, const u_int n) const;
     56
    5357    ///
    54     std::ostream& print(std::ostream&,const double min,
    55                         const double max, const u_int n) const;
    56              
     58    /// @return error of model value in @a x
    5759    ///
    58     /// @return header for print()
    59     ///
    60     virtual std::ostream& print_header(std::ostream& s) const=0;
    61              
     60    virtual double standard_error(const double x) const=0;
     61
    6262  protected:
    6363    ///
     
    6666    AveragerPair ap_;
    6767
     68    double msd_; // mean squared deviation (model from data points)
    6869  };
    6970
  • trunk/c++_tools/statistics/OneDimensionalWeighted.h

    r495 r586  
    2727    /// Default Constructor.
    2828    ///
    29     inline OneDimensionalWeighted(void) : x_(0.0), y_(0.0), y_err_(0.0) {}
     29    inline OneDimensionalWeighted(void):s2_(0)  {}
    3030
    3131    ///
     
    3838    /// specific class for details) by minimizing \f$
    3939    /// \sum{w_i(\hat{y_i}-y_i)^2} \f$, where \f$ \hat{y} \f$ is the
    40     /// fitted value. The weight \f$ w_i \f$ is should be proportional
     40    /// fitted value. The weight \f$ w_i \f$ should be proportional
    4141    /// to the inverse of the variance for \f$ y_i \f$
    4242    ///
    4343    virtual void fit(const gslapi::vector& x, const gslapi::vector& y,
    44                     const gslapi::vector& w)=0;
     44                     const gslapi::vector& w)=0;
     45
    4546    ///
    46     /// function predicting in one point. y will contain the y(x)
    47     /// according to the model. y_err will contain the estimated error
    48     /// of y(x).
     47    /// function predicting in one point.
    4948    ///
    50     virtual void predict(const double x, double& y, double& y_err,
    51                          const double w=1) =0;
     49    virtual double predict(const double x) const=0;
     50
    5251    ///
    53     /// @return prediction value and parameters
     52    /// @return expected prediction error for a new data point in @a x
     53    /// with weight @a w
    5454    ///
    55     virtual std::ostream& print(std::ostream&) const=0;
    56              
     55    virtual double prediction_error(const double x, const double w=1) const=0;
     56
    5757    ///
    58     /// @return header for print()
     58    /// @return error of model value in @a x
    5959    ///
    60     virtual std::ostream& print_header(std::ostream& s) const=0;
    61              
     60    virtual double standard_error(const double x) const=0;
     61
    6262  protected:
    63     ///
    64     /// x for predicted point
    65     ///
    66     double x_;
    67     ///
    68     /// y for predicted point
    69     ///
    70     double y_;
    71     ///
    72     /// estimated error of predicted point (in y).
    73     ///
    74     double y_err_;
     63    double s2_; // noise level - the typical variance for a point with
     64                // weight w is s2/w
     65
    7566  };
    7667
  • trunk/c++_tools/statistics/Polynomial.cc

    r386 r586  
    1919  }
    2020
     21  double Polynomial::predict(const double x) const
     22  {
     23    gslapi::vector vec(power_+1,1);
     24    for (size_t i=1; i<=power_; ++i)
     25      vec(i) = vec(i-1)*x;
     26    return md_.predict(vec);
     27  }
     28
     29  double Polynomial::prediction_error(const double x) const
     30  {
     31    gslapi::vector vec(power_+1,1);
     32    for (size_t i=1; i<=power_; ++i)
     33      vec(i) = vec(i-1)*x;
     34    return md_.prediction_error(vec);
     35  }
     36
     37  double Polynomial::standard_error(const double x) const
     38  {
     39    gslapi::vector vec(power_+1,1);
     40    for (size_t i=1; i<=power_; ++i)
     41      vec(i) = vec(i-1)*x;
     42    return md_.standard_error(vec);
     43  }
    2144
    2245}}} // of namespaces regression, statisitcs and thep
  • trunk/c++_tools/statistics/Polynomial.h

    r440 r586  
    4747
    4848    ///
    49     /// @todo implement
     49    /// @return value in @a x of model
    5050    ///
    51     inline void predict(const double x, double& y, double& y_err) const
    52     { assert(0); }
     51    double predict(const double x) const;
    5352
    5453    ///
    55     /// @todo implement
     54    /// @return expected prediction error for a new data point in @a x
    5655    ///
    57     inline std::ostream& print_header(std::ostream& s) const
    58       { return s; }
     56    double prediction_error(const double x) const;
    5957
     58    ///
     59    /// @return error of model value in @a x
     60    ///
     61    double standard_error(const double x) const;
    6062
    6163  private:
  • trunk/doc/Statistics.tex

    r494 r586  
    4949
    5050The first group is when some of the measurements are known to be more
    51 precise than others. The more precise a measuremtns is the larger
     51precise than others. The more precise a measurement is, the larger
    5252weight it is given. The simplest case is when the weight are given
    5353before the measurements and they can be treated as deterministic. It
     
    7070can be treated as independent of the observable.
    7171
    72 Since there are various origin for a weight occuring in a statistical
    73 analysis, there are various way to treat the weights and in general
     72Since there are various origins for a weight occuring in a statistical
     73analysis, there are various ways to treat the weights and in general
    7474the analysis should be tailored to treat the weights correctly. We
    7575have not chosen one situation for our implementations, so see specific
     
    8383\end{itemize}
    8484An important case is when weights are binary (either 1 or 0). Then we
    85 get same result using the weighted version as using the data with
     85get the same result using the weighted version as using the data with
    8686weight not equal to zero and the non-weighted version. Hence, using
    8787binary weights and the weighted version missing values can be treated
     
    182182the variance is estimated as $\sigma_x^2=\frac{\sum
    183183w_xw_y(x-m_x)^2}{\sum w_xw_y}$. As in the non-weighted case we define
    184 the correlation to be the ratio between the covariance and geomtrical
    185 avergae of the variances
     184the correlation to be the ratio between the covariance and geometrical
     185average of the variances
    186186
    187187$\frac{\sum w_xw_y(x-m_x)(y-m_y)}{\sqrt{\sum w_xw_y(x-m_x)^2\sum
  • trunk/test/regression_test.cc

    r495 r586  
    1010#include <c++_tools/statistics/NaiveWeighted.h>
    1111#include <c++_tools/statistics/Polynomial.h>
     12#include <c++_tools/statistics/PolynomialWeighted.h>
    1213
    1314#include <cmath>
     
    4546  statistics::regression::LinearWeighted linear_w;
    4647  linear_w.fit(x,y,w);
    47   double y_predicted=0;
    48   double y_predicted_err=0;
    49   linear_w.predict(1990,y_predicted,y_predicted_err);
     48  double y_predicted = linear_w.predict(1990);
     49  double y_predicted_err = linear_w.prediction_error(1990);
    5050  if (fabs(y_predicted-12.8)>0.001){
    5151    *error << "error: cannot reproduce fit." << std::endl;
     
    5959  statistics::regression::NaiveWeighted naive_w;
    6060  naive_w.fit(x,y,w);
    61   y_predicted=0;
    62   y_predicted_err=0;
    63   naive_w.predict(0.0,y_predicted,y_predicted_err);
     61  y_predicted=naive_w.predict(0.0);
     62  y_predicted_err=naive_w.prediction_error(0.0);
    6463  if (y_predicted!=(0.1*12+0.2*11+0.3*14+0.4*13)) {
    6564    *error << "regression_NaiveWeighted: cannot reproduce fit." << std::endl;
     
    109108  statistics::regression::Naive naive;
    110109
     110  *error << "testing regression::Polynomial" << std::endl;
     111  statistics::regression::Polynomial pol(2);
     112
     113  *error << "testing regression::PolynomialWeighted" << std::endl;
     114  statistics::regression::PolynomialWeighted pol_weighted(2);
     115
    111116  if (error!=&std::cerr)
    112117    delete error;
Note: See TracChangeset for help on using the changeset viewer.