source: trunk/yat/statistics/AveragerWeighted.h @ 1703

Last change on this file since 1703 was 1487, checked in by Jari Häkkinen, 13 years ago

Addresses #436. GPL license copy reference should also be updated.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 6.6 KB
Line 
1#ifndef _theplu_yat_statistics_averagerweighted_
2#define _theplu_yat_statistics_averagerweighted_
3
4// $Id: AveragerWeighted.h 1487 2008-09-10 08:41:36Z jari $
5
6/*
7  Copyright (C) 2004 Jari Häkkinen, Peter Johansson, Cecilia Ritz
8  Copyright (C) 2005, 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2007 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2008 Peter Johansson
11
12  This file is part of the yat library, http://dev.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 3 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with yat. If not, see <http://www.gnu.org/licenses/>.
26*/
27
28#include "Averager.h"
29#include "yat/utility/iterator_traits.h"
30
31#include <cmath>
32
33namespace theplu{
34namespace yat{
35namespace statistics{
36
37  ///
38  /// @brief Class to calulate averages with weights.
39  ///
40  /// There are several different reasons why a statistical analysis
41  /// needs to adjust for weighting. In the litterature reasons are
42  /// mainly divided into two kinds of weights - probablity weights
43  /// and analytical weights. 1) Analytical weights are appropriate
44  /// for scientific experiments where some measurements are known to
45  /// be more precise than others. The larger weight a measurement has
46  /// the more precise is is assumed to be, or more formally the
47  /// weight is proportional to the reciprocal variance
48  /// \f$ \sigma_i^2 = \frac{\sigma^2}{w_i} \f$. 2) Probability weights
49  /// are used for the situation when calculating averages over a
50  /// distribution \f$ f \f$ , but sampling from a distribution \f$ f' \f$.
51  /// Compensating for this discrepancy averages of observables
52  /// are taken to be \f$ \sum \frac{f}{f'}X \f$ For further discussion:
53  /// see \ref weighted_statistics
54  ///
55  /// If nothing else stated, each function fulfills the
56  /// following:<br> <ul><li>Setting a weight to zero corresponds to
57  /// removing the data point from the dataset.</li><li> Setting all
58  /// weights to unity, the yields the same result as from
59  /// corresponding function in Averager.</li><li> Rescaling weights
60  /// does not change the performance of the object.</li></ul>
61  ///
62  /// @see Averager AveragerPair AveragerPairWeighted
63  ///
64  class AveragerWeighted
65  {
66  public:
67
68    ///
69    /// @brief The default constructor
70    ///
71    AveragerWeighted(void);
72
73    ///
74    /// @brief The copy constructor
75    ///
76    AveragerWeighted(const AveragerWeighted&);
77
78    ///
79    /// Adding a data point \a d, with weight \a w (default is 1)
80    ///
81    void  add(const double d,const double w=1);
82
83    ///
84    /// @brief Calculate the weighted mean
85    ///
86    /// @return \f$ \frac{\sum w_ix_i}{\sum w_i} \f$
87    ///
88    double mean(void) const;
89
90    ///
91    /// @brief Weighted version of number of data points.
92    ///
93    /// If all
94    /// weights are equal, the unweighted version is identical to the
95    /// non-weighted version. Adding a data point with zero weight
96    /// does not change n(). The calculated value is always smaller
97    /// than the actual number of data points added to object.
98    ///
99    /// @return \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$
100    ///
101    double n(void) const;
102
103    ///
104    /// @brief Rescale object.
105    ///
106    /// Each data point is rescaled as \f$ x = a * x \f$
107    ///
108    void rescale(double a);
109
110    ///
111    /// @brief Reset everything to zero.
112    ///
113    void reset(void);
114
115    ///
116    /// @brief The standard deviation is defined as the square root of
117    /// the variance().
118    ///
119    /// @return The standard deviation, root of the variance().
120    ///
121    double std(void) const;
122
123    ///
124    /// @brief Calculates standard deviation of the mean().
125    ///
126    /// Variance from the
127    /// weights are here neglected. This is true when the weight is
128    /// known before the measurement. In case this is not a good
129    /// approximation, use bootstrapping to estimate the error.
130    ///
131    /// @return \f$ \frac{\sum w^2}{\left(\sum w\right)^3}\sum w(x-m)^2 \f$
132    /// where \f$ m \f$ is the mean()
133    ///
134    double standard_error(void) const;
135
136    ///
137    /// Calculating the sum of weights 
138    ///
139    /// @return \f$ \sum w_i \f$
140    ///
141    double sum_w(void) const;
142
143    ///
144    /// @return \f$ \sum w_i^2 \f$
145    ///
146    double sum_ww(void) const;
147
148    ///
149    /// \f$ \sum w_ix_i \f$
150    ///
151    /// @return weighted sum of x
152    ///
153    double sum_wx(void) const;
154
155    ///
156    ///  @return \f$ \sum w_i x_i^2 \f$
157    ///
158    double sum_wxx(void) const;
159
160    ///
161    /// @return \f$ \sum_i w_i (x_i-m)^2\f$
162    ///
163    double sum_xx_centered(void) const;
164
165    /**
166       The variance is calculated as \f$ \frac{\sum w_i (x_i - m)^2
167       }{\sum w_i} \f$, where \a m is the known mean.
168       
169       @return Variance when the mean is known to be \a m.
170    */
171    double variance(const double m) const;
172
173    /**
174       The variance is calculated as \f$ \frac{\sum w_i (x_i - m)^2
175       }{\sum w_i} \f$, where \a m is the mean(). Here the weight are
176       interpreted as probability weights. For analytical weights the
177       variance has no meaning as each data point has its own
178       variance.
179       
180       @return The variance.
181    */
182    double variance(void) const;
183
184
185  private:
186    ///
187    ///  @return \f$ \sum w_i^2x_i \f$
188    ///
189    double sum_wwx(void) const;
190
191    ///
192    ///  @return \f$ \sum w_i^2x_i^2 \f$
193    ///
194    double sum_wwxx(void) const;
195   
196    const Averager& wx(void) const;
197    const Averager& w(void) const;
198
199    ///
200    /// operator to add a AveragerWeighted
201    ///
202    const AveragerWeighted& operator+=(const AveragerWeighted&);
203   
204    Averager w_;
205    Averager wx_;
206    double wwx_;
207    double wxx_;
208  };
209
210  /**
211     \brief adding a range of values to AveragerWeighted \a a
212
213     If iterator is non-weighted unitary weights are used.
214   */
215  template <typename Iter>
216  void add(AveragerWeighted& a, Iter first, Iter last)
217  {
218    for ( ; first != last; ++first)
219      a.add(utility::iterator_traits<Iter>().data(first),
220            utility::iterator_traits<Iter>().weight(first));
221  }
222
223  /**
224     \brief add values from two ranges to AveragerWeighted \a a
225
226     Add data from range [first1, last1) with their corresponding
227     weight in range [first2, first2 + distance(first, last) ).
228
229     Requirement: Iter1 and Iter2 are unweighted iterators.
230   */
231  template <typename Iter1, typename Iter2>
232  void add(AveragerWeighted& a, Iter1 first1, Iter1 last1, Iter2 first2)
233  {
234    utility::check_iterator_is_unweighted(first1);
235    utility::check_iterator_is_unweighted(first2);
236    for ( ; first1 != last1; ++first1, ++first2)
237      a.add(*first1, *first2);
238  }
239
240}}} // of namespace statistics, yat, and theplu
241
242#endif
Note: See TracBrowser for help on using the repository browser.