Changeset 713 for trunk/yat/regression


Ignore:
Timestamp:
Dec 21, 2006, 3:43:31 PM (15 years ago)
Author:
Peter
Message:

Fixes #77, #78, #158, and #163, and refs #81

Location:
trunk/yat/regression
Files:
10 edited

Legend:

Unmodified
Added
Removed
  • trunk/yat/regression/Linear.cc

    r703 r713  
    3434  Linear::Linear(void)
    3535    : OneDimensional(), alpha_(0), alpha_var_(0), beta_(0), beta_var_(0),
    36       mse_(0), m_x_(0)
     36      chisq_(0), m_x_(0)
    3737  {
    3838  }
     
    4242  }
    4343         
     44
     45  double Linear::chisq(void) const
     46  {
     47    return chisq_;
     48  }
     49
     50
    4451  void Linear::fit(const utility::vector& x, const utility::vector& y)
    4552  {
     
    5259
    5360    // calculating deviation between data and model
    54     mse_ = (ap_.y_averager().sum_xx_centered() - ap_.sum_xy_centered() *
    55             ap_.sum_xy_centered()/ap_.x_averager().sum_xx_centered() )/x.size();
    56     r2_= 1-mse_/ap_.x_averager().variance();
    57     alpha_var_ = mse_ / x.size();
    58     beta_var_ = mse_ / ap_.x_averager().sum_xx_centered();
     61    chisq_ = ( (ap_.y_averager().sum_xx_centered() - ap_.sum_xy_centered()*
     62              ap_.sum_xy_centered()/ap_.x_averager().sum_xx_centered() ) /
     63               (x.size()-2) );
     64    r2_= 1-chisq_/ap_.x_averager().variance();
     65    alpha_var_ = chisq_ / x.size();
     66    beta_var_ = chisq_ / ap_.x_averager().sum_xx_centered();
    5967    m_x_ = ap_.x_averager().mean();
    6068  }
  • trunk/yat/regression/Linear.h

    r703 r713  
    3636namespace regression {
    3737
    38   ///
    39   /// @brief linear regression.   
    40   ///
    41   /// @todo document
    42   ///
     38  /**
     39     @brief linear regression.   
     40     
     41     Data are modeled as \f$ y_i = \alpha + \beta (x_i-m_x) +
     42     \epsilon_i \f$.
     43  */
    4344  class Linear : public OneDimensional
    4445  {
     
    5556    virtual ~Linear(void);
    5657         
    57     ///
    58     /// @return the parameter \f$ \alpha \f$
    59     ///
     58    /**
     59       The parameter \f$ \alpha \f$ is estimated as \f$
     60       \frac{1}{n}\sum y_i \f$
     61       
     62       @return the parameter \f$ \alpha \f$
     63    */
    6064    inline double alpha(void) const { return alpha_; }
    6165
    62     ///
    63     /// @return standard deviation of parameter \f$ \alpha \f$
    64     ///
     66    /**
     67       The standard deviation is estimated as \f$
     68       \sqrt{\frac{\chi^2}{n}} \f$
     69       
     70       @return standard deviation of parameter \f$ \alpha \f$
     71    */
    6572    inline double alpha_err(void) const { return sqrt(alpha_var_); }
    6673
    67     ///
    68     /// @return the parameter \f$ \beta \f$
    69     ///
     74    /**
     75       The parameter \f$ \beta \f$ is estimated as \f$
     76       \frac{\textrm{Cov}(x,y)}{\textrm{Var}(x)} \f$
     77       
     78       @return the parameter \f$ \beta \f$
     79    */
    7080    inline double beta(void) const { return beta_; }
    7181
    72     ///
    73     /// @return standard deviation of parameter \f$ \beta \f$
    74     ///
     82    /**
     83       The standard deviation is estimated as \f$
     84       \sqrt{\frac{\chi^2}{\sum (x_i-m_x)^2}} \f$
     85       
     86       @return standard deviation of parameter \f$ \beta \f$
     87    */
    7588    inline double beta_err(void) const { return sqrt(beta_var_); }
    7689   
    77     ///
    78     /// This function computes the best-fit linear regression
    79     /// coefficients \f$ (\alpha, \beta)\f$ of the model \f$ y =
    80     /// \alpha + \beta (x-m_x) \f$ from vectors \a x and \a y, by
    81     /// minimizing \f$ \sum{(y_i - \alpha - \beta (x-m_x))^2} \f$. By
    82     /// construction \f$ \alpha \f$ and \f$ \beta \f$ are independent.
    83     ///
     90   
     91  /**
     92       @brief Mean Squared Error
     93   
     94       Chisq is calculated as \f$ \frac{\sum
     95       (y_i-\alpha-\beta(x_i-m_x))^2}{n-2} \f$
     96    */
     97    double chisq(void) const;
     98
     99    /**
     100       Model is fitted by minimizing \f$ \sum{(y_i - \alpha - \beta
     101       (x-m_x))^2} \f$. By construction \f$ \alpha \f$ and \f$ \beta \f$
     102       are independent.
     103    */
    84104    void fit(const utility::vector& x, const utility::vector& y) ;
    85105   
    86106    ///
    87     /// @brief Mean Squared Error
    88     ///
    89     inline double mse(void) const { return mse_; }
    90 
    91     ///
    92     /// @return value in @a x of model
     107    /// @return \f$ \alpha + \beta x \f$
    93108    ///
    94109    double predict(const double x) const;
    95110
    96     ///
    97     /// @return error of model value in @a x
    98     ///
     111    /**
     112       The error of the model is estimated as \f$ \sqrt{
     113       \textrm{alpha\_err}^2+\textrm{beta\_err}^2*(x-m_x)*(x-m_x)}\f$
     114   
     115       @return estimated error of model in @a x
     116    */
    99117    double standard_error(const double x) const;
    100118
     
    115133    double beta_;
    116134    double beta_var_;
    117     double mse_;
     135    double chisq_;
    118136    double m_x_; // average of x values
    119137    double r2_; // coefficient of determination
  • trunk/yat/regression/MultiDimensional.cc

    r703 r713  
    5656  }
    5757
     58  const utility::vector& MultiDimensional::fit_parameters(void) const
     59  {
     60    return fit_parameters_;
     61  }
     62
     63
     64  double MultiDimensional::chisq(void) const
     65  {
     66    return chisquare_;
     67  }
     68
    5869
    5970  double MultiDimensional::prediction_error(const utility::vector& x) const
  • trunk/yat/regression/MultiDimensional.h

    r703 r713  
    6060    /// @return parameters of the model
    6161    ///
    62     utility::vector fit_parameters(void) { return fit_parameters_; }
     62    const utility::vector& fit_parameters(void) const;
     63
     64    /**
     65       @brief Mean Squared Error
     66     */
     67    double chisq(void) const;
    6368
    6469    ///
  • trunk/yat/regression/Naive.cc

    r703 r713  
    4444  }
    4545
     46
     47  double Naive::chisq(void) const
     48  {
     49    return ap_.y_averager().sum_xx_centered()/(ap_.n()-1);
     50  }
     51 
     52
    4653  void Naive::fit(const utility::vector& x, const utility::vector& y)
    4754  {
     
    4956    for (size_t i=0; i<y.size(); i++)
    5057      ap_.add(x(i),y(i));
     58   
    5159  }
     60
    5261
    5362  double Naive::predict(const double x) const
  • trunk/yat/regression/Naive.h

    r703 r713  
    3737namespace regression {
    3838
    39   ///
    40   /// @bief naive fitting.
    41   ///
    42   /// @todo document
    43   ///
     39  /**
     40     @brief Naive Regression
     41   
     42     Data are modeled as \f$ y_i = \alpha + \epsilon_i \f$
     43
     44  */
    4445  class Naive : public OneDimensional
    4546  {
     
    5657    virtual ~Naive(void);
    5758         
     59    /**
     60        \f$\frac{1}{N-1} \sum (x_i-m)^2 \f$
     61   
     62        @brief Mean Squared Error
     63    */
     64    double chisq(void) const;
     65
    5866    ///
    5967    /// This function computes the best-fit for the naive model \f$ y
    6068    /// = m \f$ from vectors \a x and \a y, by minimizing \f$
    61     /// \sum{(y_i-m)^2} \f$. This function is the same as using the
    62     /// weighted version with unity weights.
     69    /// \sum{(y_i-m)^2} \f$.
    6370    ///
    6471    void fit(const utility::vector& x, const utility::vector& y);
    6572
    6673    ///
    67     /// @brief Mean Squared Error
    68     ///
    69     inline double mse(void) const { return mse_; }
    70 
    71     ///
    72     /// Function predicting value using the naive model.
     74    /// The predicted value is the average \f$ m \f$
    7375    ///
    7476    double predict(const double x) const;
     
    7678    ///
    7779    /// @return standard error
     80    ///
     81    /// @see statistics::Averager
    7882    ///
    7983    double standard_error(const double x) const;
  • trunk/yat/regression/OneDimensional.cc

    r703 r713  
    3636  }
    3737
     38
     39  double OneDimensional::prediction_error(const double x) const
     40  {
     41    return sqrt(chisq()+pow(standard_error(x),2));
     42  }
     43
     44
    3845  std::ostream& OneDimensional::print(std::ostream& os, const double min,
    3946                                      double max, const u_int n) const
  • trunk/yat/regression/OneDimensional.h

    r703 r713  
    3535}
    3636namespace regression {
    37  
     37
    3838  ///
    39   /// Abstract Base Class for One Dimensional fitting.   
     39  /// Abstract Base Class for One Dimensional fitting.   
    4040  ///
    4141  /// @see OneDimensionalWeighted.
    4242  ///
    4343  class OneDimensional
    44   {
    45  
    46   public:
    47     ///
    48     /// @brief The default constructor
    49     ///
     44  {
     45
     46  public:
     47    ///
     48    /// @brief The default constructor
     49    ///
    5050    OneDimensional(void);
    5151
     
    5353    /// @brief The destructor
    5454    ///
    55     virtual ~OneDimensional(void);
    56          
     55    virtual ~OneDimensional(void);
     56 
     57    /**
     58       @brief Mean Squared Error
     59       
     60       Mean Squared Error is defined as the \f$ \frac
     61       {\sum{(\hat{y_i}-y_i)^2}}{df} \f$ where \f$ df \f$ number of
     62       degree of freedom typically is number data points minus number
     63       of paramers in model.
     64    */
     65    virtual double chisq(void) const=0;
     66   
    5767    /**
    5868       This function computes the best-fit given a model (see
     
    6272    virtual void fit(const utility::vector& x, const utility::vector& y)=0;
    6373   
    64     /**
    65        @brief Mean Squared Error
    66 
    67        Mean Squared Error is defined as the \f$ \frac
    68        \sum{(\hat{y_i}-y_i)^2{df} \f$ where \f$ df \f$ number of
    69        degree of freedom typically is number data points minus number
    70        of paramers in model.
    71     */
    72     virtual double mse(void) const=0;
    73 
    7474    ///
    7575    /// @return expected value in @a x accrding to the fitted model
    7676    ///
    7777    virtual double predict(const double x) const=0;
    78 
     78   
    7979    /**
    8080       The prediction error is defined as the square root of the
     
    8484       divided into two terms \f$ E(Y|x - E(Y|x))^2 \f$ and \f$
    8585       E(E(Y|x) - \hat{y}(x))^2 \f$, which is the conditional variance
    86        in \f$ x \f$ and the squared standard error (see
     86       given \f$ x \f$ and the squared standard error (see
    8787       standard_error()) of the model estimation in \f$ x \f$,
    8888       respectively.
    89    
     89       
    9090       @return expected prediction error for a new data point in @a x
    9191    */
    92     double prediction_error(const double x) const
    93     { return sqrt(mse()+pow(standard_error(x),2)); }
     92    double prediction_error(const double x) const;
    9493
    9594    ///
     
    114113       fraction of the variance explained by the regression model.
    115114    */
    116     inline double r_squared(void) const { return mse()/variance(); }
     115    inline double r_squared(void) const { return chisq()/variance(); }
    117116
    118117    /**
     
    120119       \hat{y}(x))^2 }\f$
    121120
    122        @return error of model value in @a x
     121       @return expected error of model value in @a x
    123122    */
    124123    virtual double standard_error(const double x) const=0;
    125124
    126   protected:
     125  protected:
    127126    ///
    128127    /// Variance of y
  • trunk/yat/regression/Polynomial.cc

    r703 r713  
    3131
    3232  Polynomial::Polynomial(size_t power)
    33     : OneDimensional(), mse_(0), power_(power)
     33    : OneDimensional(), power_(power)
    3434  {
    3535  }
     36
    3637
    3738  Polynomial::~Polynomial(void)
    3839  {
    3940  }
     41
     42
     43  double Polynomial::chisq(void) const
     44  {
     45    return md_.chisq();
     46  }
     47
    4048
    4149  void Polynomial::fit(const utility::vector& x, const utility::vector& y)
     
    4755    md_.fit(X,y);
    4856  }
     57
     58
     59  const utility::vector& Polynomial::fit_parameters(void) const
     60  {
     61    return md_.fit_parameters();
     62  }
     63
    4964
    5065  double Polynomial::predict(const double x) const
  • trunk/yat/regression/Polynomial.h

    r703 r713  
    3737namespace regression {
    3838
    39   ///
    40   /// @todo document
    41   ///
     39  /**
     40     @brief Polynomial regression
     41     
     42     Data are modeled as \f$ y = \alpha + \beta x + \gamma x^2 +
     43     ... + \delta x_i^{\textrm{power}} + \epsilon_i \f$
     44  */
    4245  class Polynomial : public OneDimensional
    4346  {
     
    5558
    5659    ///
    57     /// fit the model by minimizing the mean squared deviation between
     60    /// Fit the model by minimizing the mean squared deviation between
    5861    /// model and data.
    5962    ///
     
    6366    /// @return parameters of the model
    6467    ///
    65     utility::vector fit_parameters(void) { return md_.fit_parameters(); }
     68    /// @see MultiDimensional
     69    ///
     70    const utility::vector& fit_parameters(void) const;
    6671
    6772    ///
    68     /// @todo
    69     /// @brief Mean Squared Error
     73    /// @brief Variance of residuals
    7074    ///
    71     inline double mse(void) const { return mse_; }
     75    double chisq(void) const;
    7276
    7377    ///
     
    8387  private:
    8488    MultiDimensional md_;
    85     double mse_;
    8689    size_t power_;
    8790
Note: See TracChangeset for help on using the changeset viewer.