Changeset 702 for trunk/yat/regression


Ignore:
Timestamp:
Oct 26, 2006, 4:04:35 PM (17 years ago)
Author:
Peter
Message:

Refs #81 moved mse_ to inherited classes and made mse() pure virtual because mse is calculated different for different classes and therefore this design is more logic. Fixed docs and other things...

Location:
trunk/yat/regression
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • trunk/yat/regression/Linear.h

    r697 r702  
    5050    inline Linear(void)
    5151      : OneDimensional(), alpha_(0), alpha_var_(0), beta_(0), beta_var_(0),
    52         m_x_(0){}
     52        mse_(0), m_x_(0){}
    5353
    5454    ///
     
    8787   
    8888    ///
     89    /// @brief Mean Squared Error
     90    ///
     91    inline double mse(void) const { return mse_; }
     92
     93    ///
    8994    /// @return value in @a x of model
    9095    ///
     
    112117    double beta_;
    113118    double beta_var_;
     119    double mse_;
    114120    double m_x_; // average of x values
    115121    double r2_; // coefficient of determination
  • trunk/yat/regression/LinearWeighted.cc

    r682 r702  
    3939    // product wx*wy, so we can send in w and a dummie to get what we
    4040    // want.
    41     utility::vector dummie(w.size(),1);
    42     statistics::AveragerPairWeighted ap;
    43     ap.add_values(x,y,w,dummie);
    44 
    45     double m_x = ap.x_averager().mean();
    46     double m_y = ap.y_averager().mean();
    47    
    48     double sxy = ap.sum_xy_centered();
    49 
    50     double sxx = ap.x_averager().sum_xx_centered();
    51     double syy = ap.y_averager().sum_xx_centered();
     41    ap_.reset();
     42    ap_.add_values(x,y,utility::vector(x.size(),1),w);
    5243
    5344    // estimating the noise level. see attached document for motivation
    5445    // of the expression.
    55     s2_= (syy-sxy*sxy/sxx)/(w.sum()-2*(w*w)/w.sum()) ;
     46    s2_= (syy()-sxy()*sxy()/sxx())/(w.sum()-2*(w*w)/w.sum()) ;
    5647   
    57     alpha_ = m_y;
    58     beta_ = sxy/sxx;
    59     alpha_var_ = ap.y_averager().standard_error() *
    60       ap.y_averager().standard_error();
    61     beta_var_ = s2_/sxx
    62     m_x_=m_x;
     48    alpha_ = m_y();
     49    beta_ = sxy()/sxx();
     50    alpha_var_ = ap_.y_averager().standard_error() *
     51      ap_.y_averager().standard_error();
     52    beta_var_ = s2_/sxx()
     53    m_x_=m_x();
    6354  }
    6455
  • trunk/yat/regression/LinearWeighted.h

    r682 r702  
    5151      : OneDimensionalWeighted(), alpha_(0), alpha_var_(0), beta_(0),
    5252        beta_var_(0),
    53         m_x_(0){}
     53        m_x_(0), s2_(0) {}
    5454
    5555    ///
     
    8686       \alpha \f$ and \f$ \beta \f$ are independent.
    8787    **/
     88    /// @todo calculate mse
    8889    void fit(const utility::vector& x, const utility::vector& y,
    8990             const utility::vector& w);
    9091   
     92    ///
     93    /// @brief Mean Squared Error
     94    ///
     95    inline double mse(void) const { return mse_; }
     96
    9197    ///
    9298    ///  Function predicting value using the linear model:
     
    100106    ///
    101107    inline double prediction_error(const double x, const double w=1) const
    102     { return sqrt(alpha_var_ + beta_var_*(x-m_x_)*(x-m_x_)+s2_/w); }
     108    { return sqrt(alpha_var_ + beta_var_*(x-m_x_)*(x-m_x_)+s2(w)); }
    103109
    104110    /**
    105111       estimated error \f$ y_{err} = \sqrt{ Var(\alpha) +
    106112       Var(\beta)*(x-m)} \f$.
    107     **/
     113    */
    108114    inline double standard_error(const double x) const
    109115    { return sqrt(alpha_var_ + beta_var_*(x-m_x_)*(x-m_x_) ); }
    110116
    111     ///
    112     /// Function returning the coefficient of determination,
    113     /// i.e. fraction of variance explained by the linear model.
    114     ///
    115     inline double r2(void) const { return r2_; }
     117    /**
     118       Noise level for points with weight @a w.
     119    */
     120    inline double s2(double w=1) const { return s2_/w; }
    116121
    117122  private:
     
    121126    LinearWeighted(const LinearWeighted&);
    122127
     128    inline double m_x(void) const {return ap_.x_averager().mean(); }
     129    inline double m_y(void) const {return ap_.y_averager().mean(); }
     130    inline double sxx(void) const {return ap_.x_averager().sum_xx_centered(); }
     131    inline double syy(void) const {return ap_.y_averager().sum_xx_centered(); }
     132    inline double sxy(void) const {return ap_.sum_xy_centered(); }
     133   
    123134    double alpha_;
    124135    double alpha_var_;
     
    127138    double m_x_; // average of x values
    128139    double r2_; // coefficient of determination
     140    double s2_;
     141    double mse_;
    129142  };
    130143
  • trunk/yat/regression/Naive.cc

    r698 r702  
    5050  double Naive::standard_error(const double x) const
    5151  {
    52     return sqrt(mse_/ap_.n());
     52    return ap_.y_averager().standard_error();
    5353  }
    5454
  • trunk/yat/regression/Naive.h

    r697 r702  
    4949    /// Default Constructor.
    5050    ///
    51     inline Naive(void) : OneDimensional(), m_(0.0), m_err_(0.0) {}
     51    inline Naive(void) : OneDimensional(), mse_(0.0) {}
    5252
    5353    ///
     
    7070
    7171    ///
     72    /// @brief Mean Squared Error
     73    ///
     74    inline double mse(void) const { return mse_; }
     75
     76    ///
    7277    /// Function predicting value using the naive model.
    7378    ///
     
    8085
    8186  private:
    82     double m_;
    83     double m_err_; // error of estimation of mean m_
    84 
     87    double mse_;
    8588  };
    8689
  • trunk/yat/regression/NaiveWeighted.cc

    r682 r702  
    2727#include "yat/utility/vector.h"
    2828
     29#include <cassert>
     30
    2931namespace theplu {
    3032namespace yat {
     
    3537                          const utility::vector& w)
    3638  {
    37     statistics::AveragerWeighted a;
    38     for (size_t i=0; i<y.size(); i++)
    39       a.add(y(i), w(i));
    40     m_=a.mean();
    41     m_err_=a.standard_error();
    42     s2_=m_err_*m_err_*w.sum();
     39    assert(x.size()==y.size());
     40    assert(y.size()==w.size());
     41    ap_.reset();
     42    ap_.add_values(x,y,utility::vector(x.size(),1.0), w);
    4343  }
    4444
  • trunk/yat/regression/NaiveWeighted.h

    r682 r702  
    5151    ///
    5252    inline NaiveWeighted(void)
    53       : OneDimensionalWeighted(), m_(0.0), m_err_(0.0) {}
     53      : OneDimensionalWeighted() {}
    5454
    5555    ///
     
    5858    virtual ~NaiveWeighted(void) {};
    5959         
    60     ///
    61     /// This function computes the best-fit for the naive model \f$ y
    62     /// = m \f$ from vectors \a x and \a y, by minimizing \f$ \sum
    63     /// w_i(y_i-m)^2 \f$. The weight \f$ w_i \f$ is proportional to
    64     /// the inverse of the variance for \f$ y_i \f$
    65     ///
     60    /**
     61      This function computes the best-fit for the naive model \f$ y
     62      = m \f$ from vectors \a x and \a y, by minimizing \f$ \sum
     63      w_i(y_i-m)^2 \f$. The weight \f$ w_i \f$ is proportional to
     64      the inverse of the variance for \f$ y_i \f$
     65    */
    6666    void fit(const utility::vector& x,
    6767             const utility::vector& y,
     
    7272    /// weighted average.
    7373    ///
    74     inline double predict(const double x) const { return m_; }
     74    inline double predict(const double x) const
     75    { return ap_.y_averager().mean(); }
    7576
    7677    ///
    77     /// @return expected prediction error for a new data point in @a x
    78     /// with weight @a w
     78    /// @brief For a naive model mse is identical to standard deviation
     79    ///
     80    /// @see AveragerWeighted
    7981    ///
    80     inline double prediction_error(const double x, const double w=1) const
    81     { return sqrt(m_err_*m_err_ + s2_/w); }
     82    inline double mse(void) const { return ap_.y_averager().std(); }
    8283
    8384    ///
     
    8586    ///
    8687    inline double standard_error(const double x) const
    87     { return m_err_; }
     88    { return ap_.y_averager().standard_error(); }
    8889
    8990  private:
     
    9394    NaiveWeighted(const NaiveWeighted&);
    9495
    95     double m_;
    96     double m_err_; // error of estimation of mean m_
    97 
    9896  };
    9997
  • trunk/yat/regression/OneDimensional.h

    r698 r702  
    6565       @brief Mean Squared Error
    6666
    67        msd is defined as the mean of the squared residiuals \f$
    68        \frac{1}{N}\sum{(y_i-\hat{y}_i)^2} \f$, which is minimized when
    69        fitting the regression model.
    70 
    71        @return data values mean squared deviation from the model
     67       Mean Squared Error is defined as the \f$ \frac
     68       \sum{(\hat{y_i}-y_i)^2{df} \f$ where \f$ df \f$ number of
     69       degree of freedom typically is number data points minus number
     70       of paramers in model.
    7271    */
    73     inline double mse(void) const { return mse_; }
     72    virtual double mse(void) const=0;
    7473
    7574    ///
     
    103102    /// given the x-value (see prediction_error()).
    104103    ///
     104    /// @param os Ostream printout is sent to
    105105    /// @param n number of points printed
    106106    /// @param min smallest x-value for which the model is printed
     
    114114       fraction of the variance explained by the regression model.
    115115    */
    116     inline double r_squared(void) const { return mse()/variance_; }
     116    inline double r_squared(void) const { return mse()/variance(); }
    117117
    118118    /**
     
    126126  protected:
    127127    ///
     128    /// Variance of y
     129    ///
     130    inline double variance(void) const { return ap_.y_averager().variance(); }
     131
     132    ///
    128133    /// Averager for pair of x and y
    129134    ///
    130135    statistics::AveragerPair ap_;
    131136
    132     ///
    133     /// mean squared error (see mse())
    134     ///
    135     double mse_;
    136 
    137     ///
    138     /// variance of y
    139     ///
    140     double variance_;
    141137  };
    142138
  • trunk/yat/regression/OneDimensionalWeighted.h

    r696 r702  
    2525*/
    2626
     27#include "yat/statistics/AveragerPairWeighted.h"
     28
    2729#include <ostream>
    2830
     
    3537 
    3638  ///
    37   /// Abstract Base Class for One Dimensional fitting in a weighted
    38   /// fashion.
    39   ///
    40   /// @todo document
     39  /// Abstract Base Class for One Dimensional fitting in a weighted
     40  /// fashion.
    4141  ///
    4242  class OneDimensionalWeighted
     
    4444 
    4545  public:
    46     ///
    47     /// Default Constructor.
    48     ///
    49     inline OneDimensionalWeighted(void):s2_(0)  {}
     46    ///
     47    /// Default Constructor.
     48    ///
     49    inline OneDimensionalWeighted(void){}
    5050
    5151    ///
    5252    /// Destructor
    5353    ///
    54     virtual ~OneDimensionalWeighted(void) {};
     54    virtual ~OneDimensionalWeighted(void) {};
    5555         
    5656    /**
     
    6464                     const utility::vector& w)=0;
    6565
     66    /**
     67       @brief Mean Squared Error
     68
     69       Mean Squared Error is defined as the weighted mean of the
     70       squared residiuals \f$ \frac{\sum w_i(y_i-\hat{y}_i)^2}{\sum
     71       w_i} \f$, which is minimized when fitting the regression model.
     72    */
     73    virtual double mse(void) const=0;
     74
    6675    ///
    67     /// function predicting in one point.
     76    /// @return expected value in @a x according to the fitted model
    6877    ///
    6978    virtual double predict(const double x) const=0;
    7079
    71     ///
    72     /// @return expected prediction error for a new data point in @a x
    73     /// with weight @a w
    74     ///
    75     virtual double prediction_error(const double x, const double w=1) const=0;
     80    /**
     81       The prediction error is defined as the square root of the
     82       expected squared deviation a new data point will have from
     83       value the model provides. The expected squared deviation is
     84       defined as \f$ E((Y|x,w - \hat{y}(x))^2) \f$ which is equal to
     85       \f$ E((Y|x,w - E(Y|x))^2) + E((E(Y|x) - \hat{y}(x))^2) \f$,
     86       which is the conditional variance given \f$ x \f$ and the
     87       squared standard error (see standard_error()) of the model
     88       estimation in \f$ x \f$, respectively.
     89       
     90       The conditional variance is inversely proportional to the
     91       weight \f$ w \f$ and is calculated as \f$ Var(Y|x,w) =
     92       \frac{1}{w}\frac{\sum w_i(y_i-\hat{y}_i)^2\sum w_i^2}
     93       {\left(\sum w_i\right)^2} =\frac{\sum w_i^2}{w\sum w_i}mse\f$
     94
     95       @return expected prediction error for a new data point in @a x
     96    */
     97    double prediction_error(const double x, const double w=1.0) const
     98    { return sqrt(mse()+pow(standard_error(x),2)); }
    7699
    77100    /**
    78        The standard error is defined as \f$ \sqrt{E(Y|x -
    79        \hat{y}(x))^2 }\f$
     101       r-squared is defined as \f$ \frac{\sum
     102       w_i(y_i-\hat{y}_i)^2}{\sum w_i(y_i-m_y)^2} \f$ or the fraction
     103       of the variance explained by the regression model.
     104    */
     105    inline double r_squared(void) const
     106    { return mse()/variance(); }
     107
     108    /**
     109       The standard error is defined as \f$ \sqrt{E((Y|x -
     110       \hat{y}(x))^2) }\f$
    80111
    81112       @return error of model value in @a x
     
    83114    virtual double standard_error(const double x) const=0;
    84115
    85   protected:
     116  protected:
    86117    ///
    87     /// noise level - the typical variance for a point with weight w
    88     /// is s2/w
     118    /// Averager for pair of x and y
    89119    ///
    90     double s2_;
     120    statistics::AveragerPairWeighted ap_;
     121
     122  private:
     123    inline double variance(double w=1) const
     124    { return ap_.y_averager().variance(); }
     125
     126
    91127  };
    92128
  • trunk/yat/regression/Polynomial.h

    r697 r702  
    4848    ///
    4949    inline Polynomial(size_t power)
    50       : OneDimensional(), power_(power) {}
     50      : OneDimensional(), mse_(0), power_(power) {}
    5151
    5252    ///
     
    6767
    6868    ///
     69    /// @todo
     70    /// @brief Mean Squared Error
     71    ///
     72    inline double mse(void) const { return mse_; }
     73
     74    ///
    6975    /// @return value in @a x of model
    7076    ///
     
    7884  private:
    7985    MultiDimensional md_;
     86    double mse_;
    8087    size_t power_;
    8188
  • trunk/yat/regression/PolynomialWeighted.h

    r682 r702  
    4646    ///
    4747    inline PolynomialWeighted(size_t power)
    48       : OneDimensionalWeighted(), power_(power) {}
     48      : OneDimensionalWeighted(), mse_(0), power_(power) {}
    4949
    5050    ///
     
    6969
    7070    ///
     71    /// @todo
     72    /// @brief Mean Squared Error
     73    ///
     74    inline double mse(void) const { return mse_; }
     75
     76    ///
    7177    /// function predicting in one point.
    7278    ///
     
    8692  private:
    8793    MultiDimensionalWeighted md_;
     94    double mse_;
    8895    size_t power_;
    8996
Note: See TracChangeset for help on using the changeset viewer.