Changeset 429


Ignore:
Timestamp:
Dec 8, 2005, 8:50:11 PM (17 years ago)
Author:
Peter
Message:

separating weighted and non-weighted regression to different classes.

Location:
trunk
Files:
6 added
11 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/statistics/Linear.cc

    r428 r429  
    1616  void Linear::fit(const gslapi::vector& x, const gslapi::vector& y)
    1717  {
    18     statistics::AveragerPair ap;
     18    ap_.reset();
    1919    for (size_t i=0; i<x.size(); i++)
    20       ap.add(x(i),y(i));
     20      ap_.add(x(i),y(i));
    2121
    22     alpha_ = ap.y_averager().mean();
    23     beta_ = ap.covariance() / ap.x_averager().variance();
     22    alpha_ = ap_.y_averager().mean();
     23    beta_ = ap_.covariance() / ap_.x_averager().variance();
    2424
    2525    // estimating the noise level, i.e. the conditional variance of y
    2626    // given x, Var(y|x).
    27     double Q = (ap.y_averager().sum_xsqr_centered() - ap.sum_xy_centered() *
    28                 ap.sum_xy_centered()/ap.x_averager().sum_xsqr_centered() ) ;
     27    double Q = (ap_.y_averager().sum_xsqr_centered() - ap_.sum_xy_centered() *
     28                ap_.sum_xy_centered()/ap_.x_averager().sum_xsqr_centered() ) ;
    2929    s2_ = Q/(x.size()-2);
    3030    r2_= 1;
    3131    alpha_var_ = s2_ / x.size();
    32     beta_var_ = s2_ / ap.x_averager().sum_xsqr_centered();
    33     m_x_ = ap.x_averager().mean();
     32    beta_var_ = s2_ / ap_.x_averager().sum_xsqr_centered();
     33    m_x_ = ap_.x_averager().mean();
    3434  }
    3535
     
    3737  {
    3838    y = alpha_ + beta_ * (x-m_x_);
    39     y_err = sqrt( alpha_var_+beta_var_*(x-m_x_)*(x-m_x_)+s2_/w );
    40     x_=x;
    41     y_=y;
    42     y_err_=y_err;
     39    y_err = sqrt( alpha_var_+beta_var_*(x-m_x_)*(x-m_x_)+s2_ );
    4340  }
    4441
    45   std::ostream& Linear::print(std::ostream& s) const
    46   {
    47     s << x_ << "\t"
    48       << y_ << "\t"
    49       << y_err_ << "\t"
    50       << alpha_ << "\t"
    51       << alpha_err() << "\t"
    52       << beta_ << "\t"
    53       << beta_err() << "\t"
    54       << s2_ << "\t"
    55       << r2_;
    56    
    57     return s;
    58   }
    59    
    6042  std::ostream& Linear::print_header(std::ostream& s) const
    6143  {
  • trunk/lib/statistics/Linear.h

    r428 r429  
    6767    /// the expected deviation from the line for a new data point.
    6868    ///
    69     void predict(const double x, double& y, double& y_err)
     69    void predict(const double x, double& y, double& y_err);
    7070
    7171    ///
     
    8282    /// Function returning the coefficient of determination,
    8383    /// i.e. fraction of variance explained by the linear model.
     84    /// @todo implement r2's calculation in fit function
    8485    ///
    8586    inline double r2(void) const { return r2_; }
  • trunk/lib/statistics/Local.cc

    r412 r429  
    55#include <c++_tools/gslapi/vector.h>
    66#include <c++_tools/statistics/Kernel.h>
    7 #include <c++_tools/statistics/OneDimensional.h>
     7#include <c++_tools/statistics/OneDimensionalWeighted.h>
    88
    99//#include <algorithm>
  • trunk/lib/statistics/Local.h

    r411 r429  
    55
    66#include <c++_tools/statistics/Kernel.h>
    7 #include <c++_tools/statistics/OneDimensional.h>
     7#include <c++_tools/statistics/OneDimensionalWeighted.h>
    88#include <c++_tools/gslapi/vector.h>
    99
     
    3232    /// type of \a kernel.
    3333    ///
    34     inline Local(OneDimensional& r, Kernel& k) : kernel_(&k), regressor_(&r) {}
     34    inline Local(OneDimensionalWeighted& r, Kernel& k)
     35      : kernel_(&k), regressor_(&r) {}
    3536
    3637    ///
     
    8283    std::vector<std::pair<double, double> > data_;
    8384    Kernel* kernel_;
    84     OneDimensional* regressor_;
     85    OneDimensionalWeighted* regressor_;
    8586    std::vector<double> x_;
    8687    std::vector<double> y_;
  • trunk/lib/statistics/Makefile.am

    r414 r429  
    99libstatistics_la_SOURCES = \
    1010  Averager.cc AveragerPair.cc AveragerWeighted.cc Fisher.cc FoldChange.cc \
    11   Histogram.cc Kernel.cc KernelBox.cc KernelTriCube.cc Linear.cc Local.cc \
    12   MultiDimensional.cc Naive.cc \
     11  Histogram.cc Kernel.cc KernelBox.cc KernelTriCube.cc Linear.cc \
     12  LinearWeighted.cc Local.cc \
     13  MultiDimensional.cc Naive.cc NaiveWeighted.cc OneDimensional.cc\
    1314  Pearson.cc Polynomial.cc ROC.cc Score.cc tScore.cc utility.cc
    1415
     
    1718include_statistics_HEADERS = \
    1819  Averager.h AveragerPair.h AveragerWeighted.h Fisher.h FoldChange.h \
    19   Histogram.h Kernel.h KernelBox.h KernelTriCube.h Linear.h Local.h \
    20   MultiDimensional.h Naive.h \
    21   OneDimensional.h Pearson.h Polynomial.h ROC.h Score.h tScore.h utility.h
     20  Histogram.h Kernel.h KernelBox.h KernelTriCube.h Linear.h LinearWeighted.h \
     21  Local.h MultiDimensional.h Naive.h NaiveWeighted.h OneDimensional.h \
     22  OneDimensionalWeighted.h Pearson.h Polynomial.h ROC.h Score.h tScore.h \
     23  utility.h
  • trunk/lib/statistics/Naive.cc

    r383 r429  
    1818  void Naive::fit(const gslapi::vector& x, const gslapi::vector& y)
    1919  {
    20     Averager a;
     20    ap_.reset();
    2121    for (size_t i=0; i<y.size(); i++)
    22       a.add(y(i));
    23     m_=a.mean();
    24     s2_=a.variance();
    25     m_err_=a.standard_error();
     22      ap_.add(x(i),y(i));
    2623  }
    2724
    28   void Naive::fit(const gslapi::vector& x,
    29                   const gslapi::vector& y,
    30                   const gslapi::vector& w)
    31   {
    32     AveragerWeighted a;
    33     for (size_t i=0; i<y.size(); i++)
    34       a.add(y(i), w(i));
    35     m_=a.mean();
    36     m_err_=a.standard_error();
    37     s2_=m_err_*m_err_*w.sum();
    38   }
    39 
    40   void Naive::predict(const double x, double& y, double& y_err, const double w)
     25  void Naive::predict(const double x, double& y, double& y_err)
    4126  {
    42     x_ = x;
    43     y = m_;
    44     y_err = sqrt(m_err_*m_err_ + s2_/w);
    45   }
    46  
    47   std::ostream& Naive::print(std::ostream& s) const
    48   {
    49     s << x_ << "\t"
    50       << m_ << "\t"
    51       << sqrt(m_err_*m_err_ + s2_);
    52     return s;
     27    y = ap_.x_averager().mean();
     28    y_err = ap_.x_averager().std()*(1.0+1.0/ap_.n());
    5329  }
    5430 
  • trunk/lib/statistics/Naive.h

    r389 r429  
    77
    88#include <c++_tools/gslapi/vector.h>
    9 #include <c++_tools/statistics/Averager.h>
    10 #include <c++_tools/statistics/AveragerWeighted.h>
    119
    1210#include <iostream>
     
    5149
    5250    ///
    53     /// This function computes the best-fit for the naive model \f$ y
    54     /// = m \f$ from vectors \a x and \a y, by minimizing \f$ \sum
    55     /// w_i(y_i-m)^2 \f$. The weight \f$ w_i \f$ is proportional to
    56     /// the inverse of the variance for \f$ y_i \f$
    57     ///
    58     void fit(const gslapi::vector& x,
    59              const gslapi::vector& y,
    60              const gslapi::vector& w);
    61 
    62     ///
    6351    /// Function predicting value using the naive model. \a y_err is
    6452    /// the expected deviation from the line for a new data point. The
    65     /// weight for the new point can be specified. A smaller weight
    66     /// means larger error. The error has two components: the variance
    67     /// of point and error in estimation of m_.
     53    /// error has two components: the variance of point and error in
     54    /// estimation of the mean.
    6855    ///
    69     void predict(const double x, double& y, double& y_err,
    70                  const double w=1) ;
     56    void predict(const double x, double& y, double& y_err) ;
    7157
    72     ///
    73     /// @return prediction value and parameters
    74     ///
    75     std::ostream& print(std::ostream&) const;
    76              
    7758    ///
    7859    /// @return header for print()
  • trunk/lib/statistics/OneDimensional.h

    r428 r429  
    33#ifndef _theplu_statistics_regression_onedimensioanl_
    44#define _theplu_statistics_regression_onedimensioanl_
     5
     6#include <c++_tools/statistics/AveragerPair.h>
    57
    68#include <ostream>
     
    2628    /// Default Constructor.
    2729    ///
    28     inline OneDimensional(void) : x_(0.0), y_(0.0), y_err_(0.0) {}
     30    inline OneDimensional(void) {}
    2931
    3032    ///
     
    4143   
    4244    ///
    43     /// This function computes the best-fit given a model (see
    44     /// specific class for details) by minimizing \f$
    45     /// \sum{w_i(\hat{y_i}-y_i)^2} \f$, where \f$ \hat{y} \f$ is the
    46     /// fitted value. The weight \f$ w_i \f$ is should be proportional
    47     /// to the inverse of the variance for \f$ y_i \f$
    48     ///
    49     virtual void fit(const gslapi::vector& x, const gslapi::vector& y,
    50                     const gslapi::vector& w)=0;
    51     ///
    5245    /// function predicting in one point
    5346    ///
    54     virtual void predict(const double x, double& y, double& y_err,
    55                          const double w=1) =0;
     47    virtual void predict(const double x, double& y, double& y_err) const=0;
     48
    5649    ///
    57     /// @return prediction value and parameters
     50    ///
    5851    ///
    59     virtual std::ostream& print(std::ostream&) const=0;
     52    /// @return stream of prediction values and parameters
     53    ///
     54    std::ostream& print(std::ostream&,const double min,
     55                        const double max, const u_int n) const;
    6056             
    6157    ///
     
    6662  protected:
    6763    ///
    68     /// x for predicted point
     64    /// Averager for pair of x and y
    6965    ///
    70     //double x_;
    71     ///
    72     /// y for predicted point
    73     ///
    74     //double y_;
    75     ///
    76     /// estimated error of predicted point (in y).
    77     ///
    78     //double y_err_;
    79   };
     66    AveragerPair ap_;
     67
     68  };
    8069
    8170}}} // of namespaces regression, statisitcs and thep
  • trunk/lib/statistics/Polynomial.h

    r389 r429  
    4444    gslapi::vector fit_parameters(void) { return md_.fit_parameters(); }
    4545
    46     inline void fit(const gslapi::vector& x, const gslapi::vector& y,
    47                     const gslapi::vector& w) { assert(0); }
     46    ///
     47    /// @todo implement
     48    ///
     49    inline void predict(const double x, double& y, double& y_err) const
     50    { assert(0); }
    4851
    49     inline void predict(const double x, double& y, double& y_err,
    50                          const double w=1) { assert(0); }
    51 
    52     inline std::ostream& print(std::ostream& s) const
    53       { assert(0); return s; }
    54 
     52    ///
     53    /// @todo implement
     54    ///
    5555    inline std::ostream& print_header(std::ostream& s) const
    56       { assert(0); return s; }
     56      { return s; }
    5757
    5858
  • trunk/lib/svm/Kernel.h

    r350 r429  
    5050   
    5151    ///
    52     ///   @todo Constructor taking the \a data matrix, the KernelFunction and a
    53     ///   \a weight matrix as input. Each column in the data matrix
    54     ///   corresponds to one sample.
    55     Kernel(const gslapi::matrix& data, const KernelFunction&,
    56            const gslapi::matrix& weight);
    57 
    58     ///
    5952    ///   Destructor
    6053    ///
  • trunk/test/regression_test.cc

    r399 r429  
    55#include <c++_tools/statistics/KernelBox.h>
    66#include <c++_tools/statistics/Linear.h>
     7#include <c++_tools/statistics/LinearWeighted.h>
    78#include <c++_tools/statistics/Local.h>
    89#include <c++_tools/statistics/Naive.h>
     10#include <c++_tools/statistics/NaiveWeighted.h>
    911#include <c++_tools/statistics/Polynomial.h>
    1012
     
    1719using namespace theplu;
    1820
    19 bool Local_test(statistics::regression::OneDimensional&,
     21bool Local_test(statistics::regression::OneDimensionalWeighted&,
    2022                statistics::regression::Kernel&);
    2123
     
    3537  bool ok = true;
    3638
    37   // test data for Linear and Naive
     39  // test data for Linear and Naive (Weighted and non-weighted)
    3840  gslapi::vector x(4); x(0)=1970; x(1)=1980; x(2)=1990; x(3)=2000;
    3941  gslapi::vector y(4); y(0)=12;   y(1)=11;   y(2)=14;   y(3)=13;
    4042  gslapi::vector w(4); w(0)=0.1;  w(1)=0.2;  w(2)=0.3;  w(3)=0.4;
    4143
    42   // testing regression::Linear
    43   statistics::regression::Linear linear;
    44   linear.fit(x,y,w);
     44  // testing regression::LinearWeighted
     45  statistics::regression::LinearWeighted linear_w;
     46  linear_w.fit(x,y,w);
    4547  double y_predicted=0;
    4648  double y_predicted_err=0;
    47   linear.predict(1990,y_predicted,y_predicted_err);
     49  linear_w.predict(1990,y_predicted,y_predicted_err);
    4850  if (y_predicted!=12.8){
    4951    *error << "regression_Linear: cannot reproduce fit." << std::endl;
     
    5153  }
    5254
    53   // testing regression::Naive
    54   statistics::regression::Naive naive;
    55   naive.fit(x,y,w);
     55  // testing regression::NaiveWeighted
     56  statistics::regression::NaiveWeighted naive_w;
     57  naive_w.fit(x,y,w);
    5658  y_predicted=0;
    5759  y_predicted_err=0;
    58   naive.predict(0.0,y_predicted,y_predicted_err);
     60  naive_w.predict(0.0,y_predicted,y_predicted_err);
    5961  if (y_predicted!=(0.1*12+0.2*11+0.3*14+0.4*13)) {
    60     *error << "regression_Naive: cannot reproduce fit." << std::endl;
     62    *error << "regression_NaiveWeighted: cannot reproduce fit." << std::endl;
    6163    ok=false;
    6264  }
     
    6466  // testing regression::Local
    6567  statistics::regression::KernelBox kb;
    66   statistics::regression::Linear rl;
     68  statistics::regression::LinearWeighted rl;
    6769  if (!Local_test(rl,kb)) {
    6870    *error << "regression_Local: Linear cannot reproduce fit." << std::endl;
    6971    ok=false;
    7072  }
    71   statistics::regression::Naive rn;
     73  statistics::regression::NaiveWeighted rn;
    7274  if (!Local_test(rn,kb)) {
    7375    *error << "regression_Local: Naive cannot reproduce fit." << std::endl;
     
    104106
    105107
    106 bool Local_test(statistics::regression::OneDimensional& r,
     108bool Local_test(statistics::regression::OneDimensionalWeighted& r,
    107109                statistics::regression::Kernel& k)
    108110{
Note: See TracChangeset for help on using the changeset viewer.