source: trunk/yat/statistics/TukeyBiweightEstimator.h @ 2510

Last change on this file since 2510 was 2510, checked in by Peter, 10 years ago

added TukeyBiweightEstimator?. closes #666

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.2 KB
Line 
1#ifndef _theplu_yat_statistics_tukey_biweight_estimator
2#define _theplu_yat_statistics_tukey_biweight_estimator
3
4// $Id: TukeyBiweightEstimator.h 2510 2011-07-08 23:41:43Z peter $
5
6/*
7  Copyright (C) 2011 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with yat. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "AveragerWeighted.h"
26#include "utility.h"
27
28#include "yat/regression/TukeyBiweight.h"
29
30#include "yat/utility/concept_check.h"
31#include "yat/utility/Exception.h"
32#include "yat/utility/iterator_traits.h"
33
34#include <boost/concept_check.hpp>
35
36#include <algorithm>
37#include <iterator>
38#include <limits>
39#include <vector>
40
41namespace theplu {
42namespace yat {
43namespace statistics { 
44
45  /**
46     \brief Tukey's Biweight Estimator
47
48     Calculates the estimate as a weighted sum
49     \f$ m = \frac{\sum w_i(1-u_i^2)^2x_i}{\sum w_i(1-u_i^2)^2 } \f$
50
51     where the sum runs over data points with |u|<1 and u is calculated as
52
53     \f$ u_i = \frac{x_i-m}{k*\textrm{mad}} \f$
54
55     As the estimate \a m also appears on left-hand side, the estimate
56     is calculated in an iterative manner.
57
58     \see regression::TukeyBiweight
59     \see mad
60
61     \since New in yat 0.8
62   */
63  class TukeyBiweightEstimator
64  {
65  public:
66    /**
67       \brief Constructor
68
69       \param cutoff defines how close to the center (estimate) a data
70       point needs to be (in terms of MADs) to influence the estimate.
71       \param sorted if true object pressumes input is a sorted range;
72       otherwise object works on a copy of the range which will be
73       sorted.
74     */
75    explicit TukeyBiweightEstimator(double cutoff=4.685, bool sorted=false)
76      : cutoff_(cutoff), sorted_(sorted) {}
77
78    /**
79       \return Tukey's Biweight Estimate
80     */
81    template<typename RandomAccessIterator>
82    double operator()(RandomAccessIterator first, 
83                      RandomAccessIterator last) const;
84
85  private:
86    double cutoff_;
87    bool sorted_;
88
89    template<typename RandomAccessIterator>
90    double estimate(RandomAccessIterator first, 
91                    RandomAccessIterator last) const;
92
93    template<typename InputIterator>
94    double estimate(InputIterator first, InputIterator last, 
95                    double center, double spread) const;
96  };
97
98  template<typename RandomAccessIterator>
99  double TukeyBiweightEstimator::operator()(RandomAccessIterator first, 
100                                            RandomAccessIterator last) const
101  {
102    BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
103    using utility::DataIteratorConcept;
104    BOOST_CONCEPT_ASSERT((DataIteratorConcept<RandomAccessIterator>));
105
106    if (sorted_)
107      return estimate(first, last);
108
109    // if not sorted, create a sorted copy
110    typedef typename std::iterator_traits<RandomAccessIterator> traits;
111    std::vector<typename traits::value_type> vec;
112    vec.reserve(last-first);
113    std::copy(first, last, std::back_inserter(vec));
114    std::sort(vec.begin(), vec.end());
115    return estimate(vec.begin(), vec.end());
116
117    const double scale = mad(first, last, true);
118    double m = median(first, last, true);
119    if (scale==0)
120      return m;
121    // ensure m != m0
122    double m0 = m+1.0;
123    size_t epoch = 0;
124    while (m!=m0) {
125      m0 = m;
126      m = estimate(first, last, m, scale);
127      ++epoch;
128      if (epoch>1000)
129        utility::runtime_error("TukeyBiweightIterator: too many epochs");
130    }
131    return m;
132  }
133
134
135  template<typename RandomAccessIterator>
136  double TukeyBiweightEstimator::estimate(RandomAccessIterator first, 
137                                          RandomAccessIterator last) const
138  {
139    BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
140    using utility::DataIteratorConcept;
141    BOOST_CONCEPT_ASSERT((DataIteratorConcept<RandomAccessIterator>));
142
143    const double scale = mad(first, last, true);
144    double m = median(first, last, true);
145    // if mad is zero all (non-zero weight) data points are equal and
146    // median is the only sensible estimate. Also, calculations below
147    // would "divide by zero" so we need to interupt here.
148    if (scale==0)
149      return m;
150    double m0 = m+1.0;
151    size_t epoch = 0;
152    // FIXME: let user define convergence requirement
153    while (m!=m0) {
154      m0 = m;
155      m = estimate(first, last, m, scale);
156      ++epoch;
157      // FIXME: let user define maximal epochs
158      if (epoch>1000)
159        utility::runtime_error("TukeyBiweightIterator: too many epochs");
160    }
161    return m;
162  }
163
164
165  template<typename InputIterator>
166  double TukeyBiweightEstimator::estimate(InputIterator first, 
167                                          InputIterator last, 
168                                          double center, double spread) const
169  {
170    double scale = spread*cutoff_;
171    AveragerWeighted averager;
172    regression::TukeyBiweight biweight;
173    utility::iterator_traits<InputIterator> traits;
174    for ( ; first!=last; ++first) {
175      double x = traits.data(first);
176      double w = traits.weight(first) * biweight((x-center)/scale);
177      averager.add(x, w); 
178    }
179    return averager.mean();
180  }
181
182}}} // end of namespace theplu yat statistics
183# endif
Note: See TracBrowser for help on using the repository browser.