source: trunk/yat/statistics/AveragerWeighted.h @ 718

Last change on this file since 718 was 718, checked in by Jari Häkkinen, 17 years ago

Addresses #170.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 6.3 KB
Line 
1#ifndef _theplu_yat_statistics_averagerweighted_
2#define _theplu_yat_statistics_averagerweighted_
3
4// $Id: AveragerWeighted.h 718 2006-12-26 09:56:26Z jari $
5
6/*
7  Copyright (C) The authors contributing to this file.
8
9  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 2 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program; if not, write to the Free Software
23  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24  02111-1307, USA.
25*/
26
27#include "Averager.h"
28
29#include <cmath>
30
31namespace theplu{
32namespace yat{
33namespace statistics{
34
35  ///
36  /// @brief Class to calulate averages with weights.
37  ///
38  /// There are several different reasons why a statistical analysis
39  /// needs to adjust for weighting. In the litterature reasons are
40  /// mainly divided into two kinds of weights - probablity weights
41  /// and analytical weights. 1) Analytical weights are appropriate
42  /// for scientific experiments where some measurements are known to
43  /// be more precise than others. The larger weight a measurement has
44  /// the more precise is is assumed to be, or more formally the
45  /// weight is proportional to the reciprocal variance
46  /// \f$ \sigma_i^2 = \frac{\sigma^2}{w_i} \f$. 2) Probability weights
47  /// are used for the situation when calculating averages over a
48  /// distribution \f$ f \f$ , but sampling from a distribution \f$ f'
49  /// \f$. Compensating for this discrepancy averages of observables
50  /// are taken to be \f$ \sum \frac{f}{f'}X \f$ For further discussion:
51  /// <a href="Statistics/index.html">Weighted Statistics document</a><br>
52  ///
53  /// If nothing else stated, each function fulfills the
54  /// following:<br> <ul><li>Setting a weight to zero corresponds to
55  /// removing the data point from the dataset.</li><li> Setting all
56  /// weights to unity, the yields the same result as from
57  /// corresponding function in Averager.</li><li> Rescaling weights
58  /// does not change the performance of the object.</li></ul>
59  ///
60  /// @see Averager AveragerPair AveragerPairWeighted
61  ///
62  class AveragerWeighted
63  {
64  public:
65
66    ///
67    /// @brief The default constructor
68    ///
69    AveragerWeighted(void);
70
71    ///
72    /// @brief The copy constructor
73    ///
74    AveragerWeighted(const AveragerWeighted&);
75
76    ///
77    /// Adding a data point \a d, with weight \a w (default is 1)
78    ///
79    void  add(const double d,const double w=1);
80
81    ///
82    /// Adding each value in an array \a x and corresponding value in
83    /// weight array \a w.
84    ///
85    /// The requirements for the types T1 and T2 of the arrays \a x
86    /// and \a w are: operator[] returning an element and function
87    /// size() returning the number of elements.
88    ///
89    template <typename T1, typename T2>
90    void add_values(const T1& x, const T2& w);
91
92    ///
93    /// @brief Calculate the weighted mean
94    ///
95    /// @return \f$ \frac{\sum w_ix_i}{\sum w_i} \f$
96    ///
97    double mean(void) const;
98
99    ///
100    /// @brief Weighted version of number of data points.
101    ///
102    /// If all
103    /// weights are equal, the unweighted version is identical to the
104    /// non-weighted version. Adding a data point with zero weight
105    /// does not change n(). The calculated value is always smaller
106    /// than the actual number of data points added to object.
107    ///
108    /// @return \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$
109    ///
110    double n(void) const;
111
112    ///
113    /// @brief Rescale object.
114    ///
115    /// Each data point is rescaled as \f$ x = a * x \f$
116    ///
117    void rescale(double a);
118
119    ///
120    /// @brief Reset everything to zero.
121    ///
122    void reset(void);
123
124    ///
125    /// @brief The standard deviation is defined as the square root of
126    /// the variance().
127    ///
128    /// @return The standard deviation, root of the variance().
129    ///
130    double std(void) const;
131
132    ///
133    /// @brief Calculates standard deviation of the mean().
134    ///
135    /// Variance from the
136    /// weights are here neglected. This is true when the weight is
137    /// known before the measurement. In case this is not a good
138    /// approximation, use bootstrapping to estimate the error.
139    ///
140    /// @return \f$ \frac{\sum w^2}{\left(\sum w\right)^3}\sum w(x-m)^2 \f$
141    /// where \f$ m \f$ is the mean()
142    ///
143    double standard_error(void) const;
144
145    ///
146    /// Calculating the sum of weights 
147    ///
148    /// @return \f$ \sum w_i \f$
149    ///
150    double sum_w(void) const;
151
152    ///
153    /// @return \f$ \sum w_i^2 \f$
154    ///
155    double sum_ww(void) const;
156
157    ///
158    /// \f$ \sum w_ix_i \f$
159    ///
160    /// @return weighted sum of x
161    ///
162    double sum_wx(void) const;
163
164    ///
165    /// @return \f$ \sum_i w_i (x_i-m)^2\f$
166    ///
167    double sum_xx_centered(void) const;
168
169    /**
170       The variance is calculated as \f$ \frac{\sum w_i (x_i - m)^2
171       }{\sum w_i} \f$, where \a m is the known mean.
172       
173       @return Variance when the mean is known to be \a m.
174    */
175    double variance(const double m) const;
176
177    /**
178       The variance is calculated as \f$ \frac{\sum w_i (x_i - m)^2
179       }{\sum w_i} \f$, where \a m is the mean(). Here the weight are
180       interpreted as probability weights. For analytical weights the
181       variance has no meaning as each data point has its own
182       variance.
183       
184       @return The variance.
185    */
186    double variance(void) const;
187
188
189  private:
190    ///
191    ///  @return \f$ \sum w_i^2x_i \f$
192    ///
193    double sum_wwx(void) const;
194
195    ///
196    ///  @return \f$ \sum w_i^2x_i^2 \f$
197    ///
198    double sum_wwxx(void) const;
199   
200    ///
201    ///  @return \f$ \sum w_i x_i^2 \f$
202    ///
203    double sum_wxx(void) const;
204
205    const Averager& wx(void) const;
206    const Averager& w(void) const;
207
208    ///
209    /// operator to add a AveragerWeighted
210    ///
211    const AveragerWeighted& operator+=(const AveragerWeighted&);
212   
213    Averager w_;
214    Averager wx_;
215    double wwx_;
216    double wxx_;
217  };
218
219  // Template implementations
220  template <typename T1, typename T2>
221  void AveragerWeighted::add_values(const T1& x, const T2& w)
222  {
223    assert(x.size()==w.size());
224    for (size_t i=0; i<x.size(); i++) 
225      add(x[i],w[i]);
226  }
227
228///
229/// The AveragerWeighted output operator
230///
231///std::ostream& operator<<(std::ostream& s,const AveragerWeighted&);
232
233}}} // of namespace statistics, yat, and theplu
234
235#endif
Note: See TracBrowser for help on using the repository browser.