Changeset 486


Ignore:
Timestamp:
Jan 4, 2006, 5:41:48 PM (16 years ago)
Author:
Peter
Message:

aligned AveragerWeighted? with Averager and added tests for checking this see class doc

Location:
trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/statistics/AveragerWeighted.cc

    r420 r486  
    33#include <c++_tools/statistics/AveragerWeighted.h>
    44#include <c++_tools/statistics/Averager.h>
     5#include <c++_tools/gslapi/vector.h>
    56
    67#include <ostream>
     
    1011namespace statistics{
    1112 
     13
     14  void AveragerWeighted::add(const gslapi::vector& x, const gslapi::vector& w)
     15  {
     16    assert(x.size()==w.size());
     17    for (size_t i=0; i<x.size(); i++)
     18      add(x(i),w(i));
     19  }
     20
     21
    1222
    1323  AveragerWeighted AveragerWeighted::operator+=(const AveragerWeighted& a)
  • trunk/lib/statistics/AveragerWeighted.h

    r483 r486  
    77
    88#include <cmath>
    9 #include <ostream>
     9//#include <ostream>
    1010
    1111namespace theplu{
     12  class gslapi::vector;
     13
    1214namespace statistics{
    1315
    1416  ///
    15   /// Class to calulate simple (first and second moments) averages
    16   /// with weights.
     17  /// @brief Class to calulate averages with weights.
    1718  ///
    18   /// @see Averager
     19  /// There are several different reasons why a statistical analysis
     20  /// needs to adjust for weighting. In the litterature reasons are
     21  /// mainly divided into two kinds of weights - probablity weights
     22  /// and analytical weights. 1) Analytical weights are appropriate
     23  /// for scientific experiments where some measurements are known to
     24  /// be more precise than others. The larger weight a measurement has
     25  /// the more precise is is assumed to be, or more formally the
     26  /// weight is proportional to the reciprocal variance
     27  /// \f$ \sigma_i^2 = \frac{\sigma^2}{w_i} \f$. 2) Probablity weights
     28  /// are used for the situation when calculating averages over a
     29  /// distribution \f$ f \f$ , but sampling from a distribution \f$ f'
     30  /// \f$. Compensating for this discrepancy averages of observables
     31  /// are taken to be \f$ \sum \frac{f}{f'}X \f$ For further discussion:
     32  /// <a href="../Statistics/index.html">Weighted Statistics document</a><br>
     33  ///
     34  /// If nothing else stated, each function fulfills the
     35  /// following:<br> <ul><li>Setting a weight to zero corresponds to
     36  /// removing the data point from the dataset.</li><li> Setting all
     37  /// weights to unity, the yields the same result as from
     38  /// corresponding function in Averager.</li><li> Rescaling weights
     39  /// does not change the performance of the object.</li></ul>
     40  ///
     41  /// @see Averager AveragerPair AveragerPairWeighted
    1942  ///
    2043  class AveragerWeighted
     
    4366
    4467    ///
     68    /// Adding each value in vector \a x and corresponding value in
     69    /// weight vector \a w.
     70    ///
     71    void add(const gslapi::vector& x, const gslapi::vector& w);
     72
     73    ///
    4574    /// Calculating the weighted mean
    4675    ///
     
    6291
    6392    ///
    64     /// The standard error is calculated as \f$ \sqrt{\frac{\sum
    65     /// w_i^2}{(\sum w_i)^2-\sum w_i^2}\frac{\sum w_i(x_i-m)^2}{\sum
    66     /// w_i}} \f$
    67     ///
    68     /// @return standard deviation of mean
    69     ///
    70     inline double standard_error(void)  const
    71     { return sqrt(sum_ww()/((sum_w()*sum_w())-sum_ww()) *
    72                   sum_xx_centered()/sum_w()); }
    73 
    74     ///
    7593    /// The standard deviation is defined as the square root of the
    76     /// variance.
     94    /// variance().
    7795    ///
    7896    /// @return The standard deviation, root of the variance().
     
    8199
    82100    ///
    83     /// Calculating the sum of weights: \f$ \sum
    84     /// w_i \f$ @return sum of weights
     101    /// Calculates standard deviation of the mean(). Variance from the
     102    /// weights are here neglected. This is true when the weight is
     103    /// known before the measurement. In case this is not a good
     104    /// approximation, use bootstrapping to estimate the error.
     105    ///
     106    /// @return \f$ \frac{\sum w^2}{\left(\sum w\right)^3}\sum w(x-m)^2 \f$
     107    /// where \f$ m \f$ is the mean()
     108    ///
     109    inline double standard_error(void)  const
     110    { return sqrt(sum_ww()/(sum_w()*sum_w()*sum_w()) *
     111                  sum_xx_centered()); }
     112
     113    ///
     114    /// Calculating the sum of weights 
     115    ///
     116    /// @return \f$ \sum w_i \f$
    85117    ///
    86118    inline double sum_w(void) const
     
    88120
    89121    ///
     122    /// @return \f$ \sum_i w_i (x_i-m)^2\f$
     123    ///
     124    inline double sum_xx_centered(void) const
     125    { return sum_wxx() - mean()*mean()*sum_w(); }
     126
     127    ///
    90128    /// The variance is calculated as \f$ \frac{\sum w_i (x_i - m)^2
    91     /// }{\sum w_i} \f$.
     129    /// }{\sum w_i} \f$, where \a m is the known mean.
     130    ///
     131    /// @return Variance when the mean is known to be \a m.
     132    ///
     133    inline double variance(const double m) const
     134    { return (sum_wxx()-2*m*sum_wx())/sum_w()+m*m; }
     135
     136    ///
     137    /// The variance is calculated as \f$ \frac{\sum w_i (x_i - m)^2
     138    /// }{\sum w_i} \f$, where \a m is the mean(). Here the weight are
     139    /// interpreted as probability weights. For analytical weights the
     140    /// variance has no meaning as each data point has its own
     141    /// variance.
    92142    ///
    93143    /// @return The variance.
     
    95145    inline double variance(void) const
    96146    { return sum_xx_centered()/sum_w(); }
    97 
    98     ///
    99     /// The variance is calculated as \f$ \frac{\sum w_i}{\left(\sum
    100     /// w_i\right)^2 - \sum w_i^2} \sum w_i (x_i - m)^2 \f$. This
    101     /// variance is the same as the one implemented in GSL.
    102     ///
    103     /// @return The variance.
    104     ///
    105     inline double variance_unbiased(void) const
    106     { return sum_ww() / (sum_w()*sum_w()-sum_ww()) * sum_xx_centered(); }
    107 
    108     ///
    109     /// @return \f$ \sum_i (x_i-m)^2\f$
    110     ///
    111     /// @note this function is likely to be changed, when this class
    112     /// will be revised.
    113     ///
    114     inline double sum_xsqr_centered(void) const { return sum_xx_centered(); }
    115147
    116148
     
    143175
    144176    ///
    145     /// @return \f$ \sum_i w_i (x_i-m)^2\f$
    146     ///
    147     inline double sum_xx_centered(void) const
    148     { return sum_wxx() - mean()*mean()*sum_w(); }
    149 
    150     ///
    151177    /// operator to add a AveragerWeighted
    152178    ///
     
    167193/// The AveragerWeighted output operator
    168194///
    169 std::ostream& operator<<(std::ostream& s,const AveragerWeighted&);
     195///std::ostream& operator<<(std::ostream& s,const AveragerWeighted&);
    170196
    171197}} // of namespace statistics and namespace theplu
  • trunk/lib/statistics/tScore.cc

    r483 r486  
    5757    }
    5858    double diff = positive.mean() - negative.mean();
    59     double s=sqrt((positive.sum_xsqr_centered()+negative.sum_xsqr_centered())/
     59    double s=sqrt((positive.sum_xx_centered()+negative.sum_xx_centered())/
    6060                  (positive.sum_w()+negative.sum_w()-2));
    6161    t_=diff/s/(1.0/positive.sum_w()+1.0/negative.sum_w());
  • trunk/test/averager_test.cc

    r484 r486  
    44#include <c++_tools/statistics/AveragerPair.h>
    55#include <c++_tools/statistics/AveragerWeighted.h>
     6#include <c++_tools/gslapi/vector.h>
    67
    78#include <fstream>
     
    1213bool equal(const Averager& a, const Averager& b)
    1314{
    14   return a.n()==b.n() && a.mean()==b.mean() && a.variance()==b.variance();
     15  return (a.n()==b.n() && a.mean()==b.mean() && a.variance()==b.variance());
     16}
     17
     18bool equal(const AveragerWeighted& a, const AveragerWeighted& b, double tol,
     19           std::ostream* error)
     20{
     21  bool equal = true;
     22  if  ( fabs(a.mean()-b.mean())>tol){
     23    equal=false;
     24    *error << "mean:\t" << a.mean() << "\t" << b.mean() << std::endl;
     25  }
     26  if ( fabs(a.variance()-b.variance())>tol ) {
     27    equal=false;
     28    *error << "error for variance:\t" << a.variance() << " " << b.variance()
     29           << std::endl;
     30  }
     31  if ( fabs(a.standard_error()-b.standard_error())>tol ) {
     32    equal =false;
     33    *error << "error for standard error:\t" << std::endl;
     34  }
     35  return equal;
    1536}
    1637
     
    2849  bool ok = true;
    2950
     51  // Testing Averager
    3052  *error << "testing Averager" << std::endl;
    3153  Averager a;
     
    7496  }
    7597
     98  // Testing AveragerWeighted
     99  *error << "testing AveragerWeighted" << std::endl;
     100  theplu::gslapi::vector x(3,0);
     101  x(0)=0;
     102  x(1)=1;
     103  x(2)=2;
     104  theplu::gslapi::vector w(3,1);
    76105  theplu::statistics::AveragerWeighted aw;
     106  aw.add(x,w);
     107  a.reset();
     108  a.add(x);
     109  double tol=0.0001;
     110  if (fabs(a.mean()-aw.mean())>tol ||
     111      fabs(a.variance()-aw.variance())>tol ||
     112      fabs(a.standard_error()-aw.standard_error())>tol ||
     113      fabs(a.std()-aw.std())>tol){
     114    *error << "error: AveragerWeighted with unitary weights should "
     115           << "be equal to Averager" << std::endl;
     116    *error << "\tAverager\tAveragerWeighted" << std::endl;
     117    *error << "mean:\t" << a.mean() << "\t" << aw.mean() << std::endl;
     118    *error << "var:\t" << a.variance() << "\t" << aw.variance() << std::endl;
     119    *error << "ste:\t" << a.standard_error() << "\t" << aw.standard_error()
     120           << std::endl;
     121    *error << "std:\t" << a.std() << "\t" << aw.std() << std::endl;
     122
     123    ok=false;
     124  }
     125
     126  AveragerWeighted* aw2 = new AveragerWeighted(aw);
     127  if (!equal(aw,*aw2,tol,error)){
     128    *error << "error: AveragerWeighted copy constructor " << std::endl;
     129    ok=false;
     130  }
     131   
     132  aw2->add(12,0);
     133  if (!equal(aw,*aw2,tol,error)){
     134    *error << "error: AveragerWeighted adding a data point with weight=0 "
     135           << "should make no change " << std::endl;
     136    ok=false;
     137  }
     138     
     139  aw2->reset();
     140  w.scale(17);
     141  aw2->add(x,w);
     142  if (!equal(aw,*aw2,tol,error)){
     143    *error << "error: AveragerWeighted rescaling weights "
     144           << "should make no change " << std::endl;
     145    ok=false;
     146  }
     147  delete aw2;
     148 
     149
    77150  theplu::statistics::AveragerPair ap;
    78151  for (int i=0; i<10; i++)
Note: See TracChangeset for help on using the changeset viewer.