Changeset 235


Ignore:
Timestamp:
Feb 21, 2005, 3:53:48 PM (17 years ago)
Author:
Peter
Message:

Major modifications

Location:
trunk/src
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/Regression.cc

    r221 r235  
    88 
    99  Regression::Regression()
     10    : x_(0.0), y_(0.0), y_err_(0.0)
    1011  {
    1112  }
  • trunk/src/Regression.h

    r221 r235  
    44#define _theplu_statistics_regression_
    55
    6 // C++ tools include
    7 /////////////////////
    86#include "vector.h"
    9 
    10 // Standard C++ includes
    11 ////////////////////////
    127
    138
     
    5449    ///
    5550    virtual void predict(const double x, double& y, double& y_err,
    56                          const double w=1) const=0;
     51                         const double w=1) =0;
     52    ///
     53    /// @return prediction value and parameters
     54    ///
     55    virtual std::ostream& print(std::ostream&) const=0;
     56             
     57    ///
     58    /// @return header for print()
     59    ///
     60    virtual std::ostream& print_header(std::ostream& s) const=0;
    5761             
    5862  protected:
    59    
     63    double x_;
     64    double y_;
     65    double y_err_;
    6066  };
    6167
  • trunk/src/RegressionLinear.cc

    r221 r235  
    7171  }
    7272
     73  std::ostream& RegressionLinear::print(std::ostream& s) const
     74  {
     75    s << x_ << "\t"
     76      << y_ << "\t"
     77      << y_err_ << "\t"
     78      << alpha_ << "\t"
     79      << alpha_err() << "\t"
     80      << beta_ << "\t"
     81      << beta_err() << "\t"
     82      << s2_ << "\t"
     83      << r2_;
     84   
     85    return s;
     86  }
     87   
     88  std::ostream& RegressionLinear::print_header(std::ostream& s) const
     89  {
     90    s << "# column 1: x\n"
     91      << "# column 2: y\n"
     92      << "# column 3: y_err\n"
     93      << "# column 4: alpha\n"
     94      << "# column 5: alpha_err\n"
     95      << "# column 6: beta\n"
     96      << "# column 7: beta_err\n"
     97      << "# column 8: s2 (var(y|x))\n"
     98      << "# column 9: r2 (coefficient of determination)";
     99    return s;
     100  }
     101
    73102   
    74103}} // of namespace statistics and namespace theplu
  • trunk/src/RegressionLinear.h

    r221 r235  
    4747    /// @return standard deviation of parameter \f$ \alpha \f$
    4848    ///
    49     inline double alpha_var(void) const { return sqrt(alpha_var_); }
     49    inline double alpha_err(void) const { return sqrt(alpha_var_); }
    5050
    5151    ///
    52     /// @return the parameter \f$ \beta
     52    /// @return the parameter \f$ \beta \f$
    5353    ///
    5454    inline double beta(void) const { return beta_; }
    5555
    5656    ///
    57     /// @return standard devaition of parameter \f$ \beta \f$
     57    /// @return standard deviation of parameter \f$ \beta \f$
    5858    ///
    59     inline double beta_var(void) const { return sqrt(beta_var_); }
     59    inline double beta_err(void) const { return sqrt(beta_var_); }
    6060   
    6161    ///
     
    6363    /// coefficients \f$ (\alpha, \beta)\f$ of the model \f$ y =
    6464    /// \alpha + \beta (x-m_x) \f$ from vectors \a x and \a y, by
    65     /// minimizing \f$ \sum{(y_i - \alpha - \beta (x-m_x))^2} \f$.
     65    /// minimizing \f$ \sum{(y_i - \alpha - \beta (x-m_x))^2} \f$. By
     66    /// construction \f$ \alpha \f$ and \f$ \beta \f$ are independent.
    6667    ///
    6768    void fit(const gslapi::vector& x, const gslapi::vector& y) ;
     
    7172    /// coefficients \f$ (\alpha, \beta)\f$ of the model \f$ y =
    7273    /// \alpha + \beta (x-m_x) \f$ from vectors \a x and \a y, by
    73     /// minimizing \f$ \sum{w_i(y_i - \alpha - \beta (x-m_x))^2} \f$.
     74    /// minimizing \f$ \sum{w_i(y_i - \alpha - \beta (x-m_x))^2} \f$,
     75    /// where \f$ m_x \f$ is the weighted average. By construction \f$
     76    /// \alpha \f$ and \f$ \beta \f$ are independent.
    7477    ///
    7578    void fit(const gslapi::vector& x, const gslapi::vector& y,
     
    8285    ///
    8386    inline void predict(const double x, double& y, double& y_err,
    84                         const double w=1.0) const
     87                        const double w=1.0)
    8588    {
    8689      y = alpha_ + beta_ * (x-m_x_);
    8790      y_err = sqrt( alpha_var_+beta_var_*(x-m_x_)*(x-m_x_)+s2_/w );
    8891    }
     92
     93    ///
     94    /// @return prediction value and parameters
     95    ///
     96    std::ostream& print(std::ostream&) const;
     97             
     98    ///
     99    /// @return header for print()
     100    ///
     101    std::ostream& print_header(std::ostream&) const;
    89102
    90103    ///
  • trunk/src/RegressionLocal.cc

    r221 r235  
    1313namespace statistics {
    1414
    15   RegressionLocal::RegressionLocal(const gslapi::vector& x,
    16                                    const gslapi::vector& y,
    17                                    Regression& r,
    18                                    RegressionKernel& k,
    19                                    const size_t nof_predictions)
    20     : kernel_(&k), regression_(&r)
     15  RegressionLocal::RegressionLocal(Regression& r,
     16                                   RegressionKernel& k)
     17    : kernel_(&k), regressor_(&r)
    2118  {
    22     estimated_x_ = gslapi::vector(nof_predictions);
    23     estimated_y_ = estimated_x_;
    24     estimated_y_err_ = estimated_x_;
    25     //sorting data with respect to x
    26     // Peter, could this be done without all the copying
    27     std::vector<std::pair<double, double> > data;
    28     for (size_t i=0; i<x.size(); i++){
    29       std::pair<double, double> tmp(x(i),y(i));
    30       data_.push_back(tmp);
    31     }
    32     sort(data_.begin(), data_.end());
    33     for (size_t i=0; i<nof_predictions; i++)
    34       estimated_x_(i) =
    35         data_[static_cast<size_t>((i+0.5)/nof_predictions*data_.size())].first;
    3619  }
    3720
    38   void RegressionLocal::fit(const double f)
     21  void RegressionLocal::fit(const double f, const u_int step_size)
    3922  {
     23    sort(data_.begin(), data_.end());
     24
    4025    // number of points on each side
    41     size_t nof_data_points = static_cast<size_t>(f/2*data_.size());
    42     size_t index_min = 0;
    43     size_t index_max = 0;
    44     for (size_t i=0; i<estimated_x_.size(); i++) {
     26    size_t nof_points = static_cast<size_t>(f*data_.size());
     27    double width = 0;
     28    int min_index = 0;
     29    for (size_t i=0; i<data_.size(); i+=step_size) {
     30      x_.push_back(data_[i].first);
     31     
     32      // determining boundaries window/kernel
     33     
    4534      // extreme left case
    46       if (data_[nof_data_points-1].first>estimated_x_(i)){
    47         index_min = 0;
    48         index_max = 2*nof_data_points-1;
     35      if (data_[i].first <
     36          (data_[nof_points-1].first+data_[nof_points-1].first)/2 ){
     37        min_index=0;
     38        width = data_[nof_points-1].first-data_[i].first;
    4939      }
    50      
    5140      // extreme right case
    52       else if (data_[data_.size()-estimated_x_.size()-1].first<
    53                estimated_x_(i)){
    54         index_min = data_.size()-2*nof_data_points;
    55         index_max = data_.size()-1;
     41      else if (data_[i].first > (data_[data_.size()-1].first+
     42                            data_[data_.size()-nof_points].first)/2 ){
     43        min_index = data_.size() - nof_points;
     44        width = data_[i].first - data_[data_.size()-nof_points].first;
    5645      }
    57      
    58       // normal case - window in the middle
    5946      else {
    60         for (size_t j=nof_data_points; j<data_.size(); j++)
    61           if (data_[j].first>estimated_x_(i)) {
    62             index_min = j-nof_data_points;
    63             index_max = j+nof_data_points-1;
    64             break;
    65           }
     47        // stepping forward until x is in the middle
     48        while(data_[min_index+nof_points-1].first-data_[i].first
     49              < data_[i].first-data_[min_index].first)
     50          min_index++;
     51        width = data_[min_index+nof_points-1].first-data_[i].first;
     52        // Peter, should check if x gets closer to centrum if we step
     53        // back one step
    6654      }
    67      
     55
     56
    6857      // copying data
    69       // Peter, this is a stupid solution which could be
    70       // improved by using a moving view (in the vector class).
    71       gslapi::vector x(index_max-index_min+1);
    72       gslapi::vector y(index_max-index_min+1);
    73       gslapi::vector w(index_max-index_min+1);
     58      // Peter, too much copying. Move the copying outside loop and
     59      // use a vector view (when available in gslapi::vector class).
     60      gslapi::vector x(nof_points);
     61      gslapi::vector y(nof_points);
     62      gslapi::vector w(nof_points);
    7463      for (size_t j=0; j<x.size(); j++)
    75         x(j)=data_[index_min+j].first;
     64        x(j)=data_[min_index+j].first;
    7665      for (size_t j=0; j<y.size(); j++){
    77         y(j)=data_[index_min+j].second;
    78         if (y(j)>10)
    79           std::cout << x(j) << "\t" << y(j) << std::endl;
     66        y(j)=data_[min_index+j].second;
    8067      }
    8168      // calculating weights
    8269      for (size_t j=0; j<y.size(); j++)
    83         w(j) = kernel_->weight( 2*(x(j)-data_[index_min].first)/
    84                                 (data_[index_max].first-
    85                                  data_[index_min].first)-1);
    86 
     70        w(j) = kernel_->weight( (x(j)-(data_[min_index].first+
     71                                       data_[min_index+nof_points-1].first) /2)
     72                                /width );
    8773     
    8874      // fitting the regressor locally
    89       std::cout << "x: " << estimated_x_(i) << std::endl;
    90       std::cout << "x.begin: " << x(0) << std::endl;
    91       std::cout << "x.end: " << x(x.size()-1) << std::endl;
    92       regression_->fit(x,y,w);
    93       std::cout << "estimating value\n";
    94       regression_->predict(estimated_x_(i),
    95                             estimated_y_(i),
    96                             estimated_y_err_(i));
     75      regressor_->fit(x,y,w);
     76      regressor_->predict(x_[i], y_[i], y_err_[i]);
    9777    }
    9878  }
    99  
    100   std::ostream& operator<< (std::ostream& s, const RegressionLocal& r)
     79
     80  std::ostream& RegressionLocal::print(std::ostream& s) const
    10181  {
    102     gslapi::vector x=r.estimated_x();
    103     gslapi::vector y(r.estimated_y());
    104     gslapi::vector y_err(r.estimated_y_err());
    105     for (size_t i=0; i<x.size(); i++){
    106       s << x(i) << "\t";
    107       s << y(i) << "\t";
    108       s << y_err(i) << "\n";
     82    for (size_t i=0; i<x_.size(); i++) {
     83      regressor_->print(s);
     84      s << std::endl;
    10985    }
    11086    return s;
    11187  }
    11288
     89
    11390}} // of namespace cpptools and namespace theplu
  • trunk/src/RegressionLocal.h

    r221 r235  
    3232
    3333    ///
    34     /// Constructor loading the object with data, type of regressor,
    35     /// type of kernel and in how many points to predict.
     34    /// Constructor taking type of \a regressor,
     35    /// type of \a kernel.
    3636    ///
    37     RegressionLocal(const gslapi::vector& x, const gslapi::vector& y,
    38                     Regression& r, RegressionKernel& k, const size_t);
     37    RegressionLocal(Regression& regressor, RegressionKernel& kernel);
    3938
    4039    ///
     
    4847    virtual ~RegressionLocal(void) {};
    4948
     49
     50    ///
     51    /// adding a data point
     52    ///
     53    inline void add(const double x, const double y)
     54    { data_.push_back(std::make_pair(x,y)); }
     55
    5056    ///
    5157    /// Function returning the points where to predict
    5258    ///
    53     inline gslapi::vector estimated_x(void) const { return estimated_x_; }
     59    inline const std::vector<double>& x(void) const { return x_; }
    5460
    5561    ///
    5662    /// Function returning predicted values
    5763    ///
    58     inline gslapi::vector estimated_y(void) const { return estimated_y_; }
     64    inline const std::vector<double>& y(void) const { return y_; }
    5965
    6066    ///
    6167    /// Function returning error of predictions
    6268    ///
    63     inline gslapi::vector estimated_y_err(void) const {return estimated_y_err_;}
     69    inline const std::vector<double>& y_err(void) const { return y_err_; }
    6470 
    6571    ///
    66     /// Function performing the fit, using a \a fraction of the data
    67     /// point and regression method defined in the constructor. The
    68     /// algorithm uses equally many points around the point to
    69     /// predict. If this is not possible (because the point is too far
    70     /// left/right) the points to the extreme left/right is used.
     72    /// Performs the fit in data defined by add using a kernel and a
     73    /// regression method defined in the constructor. The algorithm
     74    /// selects boundaries for the kernel such that \a fraction of the
     75    /// data points are used and the point where the fit is done is in
     76    /// the middle. Starting with the smallest x, the function jumps
     77    /// \a step_size point in each iteration to do the next fit
    7178    ///
    72     void fit(const double fraction);
    73    
     79    void fit(const double fraction, const u_int step_size=1);
     80
     81    ///
     82    /// @return prediction values and parameters
     83    ///
     84    std::ostream& print(std::ostream&) const;
     85             
     86    ///
     87    /// @return header for print()
     88    ///
     89    inline std::ostream& print_header(std::ostream& s) const
     90    { return regressor_->print_header(s); }
     91
    7492         
    7593  private:
     
    7795    gslapi::vector data_y_;
    7896    RegressionKernel* kernel_;
    79     Regression* regression_;
    80     gslapi::vector estimated_x_;
    81     gslapi::vector estimated_y_;
    82     gslapi::vector estimated_y_err_;
     97    Regression* regressor_;
     98    std::vector<double> x_;
     99    std::vector<double> y_;
     100    std::vector<double> y_err_;
    83101   
    84102   
    85103  };
    86 
    87   ///
    88   /// The output operator for Local Regression Class.
    89   ///
    90   std::ostream& operator<< (std::ostream& s, const RegressionLocal&);
    91104
    92105}} // of namespace statistics and namespace theplu
  • trunk/src/RegressionNaive.cc

    r204 r235  
    11// $Id$
    22
    3 // Header
     3
    44#include "RegressionNaive.h"
    55
    6 // C++_tools
    76#include "Averager.h"
    87#include "Regression.h"
     
    109#include "WeightedAverager.h"
    1110
     11#include <iostream>
    1212
    1313
     
    1717 
    1818  RegressionNaive::RegressionNaive(void)
    19     : Regression()
     19    : Regression(), m_(0.0), m_err_(0.0)
    2020  {
    2121  }
    2222
     23  void RegressionNaive::fit(const gslapi::vector& x, const gslapi::vector& y)
     24  {
     25    Averager a;
     26    for (size_t i=0; i<y.size(); i++)
     27      a.add(y(i));
     28    m_=a.mean();
     29    s2_=a.variance();
     30    m_err_=a.standard_error();
     31  }
    2332
     33  void RegressionNaive::fit(const gslapi::vector& x,
     34                            const gslapi::vector& y,
     35                            const gslapi::vector& w)
     36  {
     37    WeightedAverager a;
     38    for (size_t i=0; i<y.size(); i++)
     39      a.add(y(i), w(i));
     40    m_=a.mean();
     41    m_err_=a.standard_error();
     42    s2_=m_err_*m_err_*w.sum();
     43  }
    2444
     45  void RegressionNaive::predict(const double x, double& y, double& y_err,
     46                                const double w)
     47  {
     48    x_ = x;
     49    y = m_;
     50    y_err = sqrt(m_err_*m_err_ + s2_/w);
     51  }
     52 
     53  std::ostream& RegressionNaive::print(std::ostream& s) const
     54  {
     55    s << x_ << "\t"
     56      << m_ << "\t"
     57      << sqrt(m_err_*m_err_ + s2_);
     58    return s;
     59  }
     60 
     61  std::ostream& RegressionNaive::print_header(std::ostream& s) const
     62  {
     63    s << "# column 1: x\n"
     64      << "# column 2: y\n"
     65      << "# column 3: y_err";
     66    return s;
     67  }
     68 
    2569}} // of namespace cpptools and namespace theplu
  • trunk/src/RegressionNaive.h

    r221 r235  
    1313////////////////////////
    1414//#include <gsl/gsl_fit.h>
     15#include <iostream>
    1516#include <utility>
     17
     18
    1619
    1720namespace theplu {
     
    4245         
    4346    ///
    44     /// This function computes the best-fit for the naive model \f$
    45     /// y = m \f$ from vectors \a x and \a y, by minimizing \f$
    46     /// \sum{(y_i-m)^2} \f$.
     47    /// This function computes the best-fit for the naive model \f$ y
     48    /// = m \f$ from vectors \a x and \a y, by minimizing \f$
     49    /// \sum{(y_i-m)^2} \f$. This function is the same as using the
     50    /// weighted version with unity weights.
    4751    ///
    48     inline void fit(const gslapi::vector& x, const gslapi::vector& y)
    49     {
    50       Averager a;
    51       for (size_t i=0; i<y.size(); i++)
    52         a.add(y(i));
    53       m_=a.mean();
    54       s2_=a.variance();
    55       m_err_=a.standard_error();
    56     }
     52    void fit(const gslapi::vector& x, const gslapi::vector& y);
    5753
    5854    ///
     
    6258    /// the inverse of the variance for \f$ y_i \f$
    6359    ///
    64     inline void fit(const gslapi::vector& x,
    65                     const gslapi::vector& y,
    66                     const gslapi::vector& w)
    67     {
    68       WeightedAverager a;
    69       for (size_t i=0; i<y.size(); i++)
    70         a.add(y(i), w(i));
    71       m_=a.mean();
    72       m_err_=a.standard_error();
    73       s2_=m_err_*m_err_*w.sum();
    74     }
    75 
    76     ///
    77     /// @return the parameter m
    78     ///
    79     inline double m(void) const { return m_; }
    80 
    81     ///
    82     /// @return standard deviation of parameter m
    83     ///
    84     inline double alpha_var(void) const { return m_err_; }
     60    void fit(const gslapi::vector& x,
     61             const gslapi::vector& y,
     62             const gslapi::vector& w);
    8563
    8664    ///
    8765    /// Function predicting value using the naive model. \a y_err is
    88     /// the expected deviation from the line for a new data point. If
    89     /// weights are used a weight can be specified for the new point.
     66    /// the expected deviation from the line for a new data point. The
     67    /// weight for the new point can be specified. A smaller weight
     68    /// means larger error. The error has two components: the variance
     69    /// of point and error in estimation of m_.
    9070    ///
    91     inline void predict(const double x, double& y, double& y_err,
    92                         const double w=1.0) const
    93     {
    94       y = m_;
    95       y_err = sqrt(m_err_*m_err_ + s2_/w);
    96     }
     71    void predict(const double x, double& y, double& y_err,
     72                 const double w) ;
    9773
     74    ///
     75    /// @return prediction value and parameters
     76    ///
     77    std::ostream& print(std::ostream&) const;
     78             
     79    ///
     80    /// @return header for print()
     81    ///
     82    std::ostream& print_header(std::ostream&) const;
     83             
    9884         
    9985  private:
    100     double s2_; // noise level ( var = s2/w in weighted case)
     86    double s2_; // noise level - the typical variance for a point with
     87                // weight w is s2/w
    10188    double m_;
    102     double m_err_;
     89    double m_err_; // error of estimation of mean m_
     90
    10391  };
    10492
Note: See TracChangeset for help on using the changeset viewer.