1 | #ifndef _theplu_yat_statistics_tukey_biweight_estimator |
---|
2 | #define _theplu_yat_statistics_tukey_biweight_estimator |
---|
3 | |
---|
4 | // $Id: TukeyBiweightEstimator.h 2817 2012-08-29 00:38:17Z peter $ |
---|
5 | |
---|
6 | /* |
---|
7 | Copyright (C) 2011, 2012 Peter Johansson |
---|
8 | |
---|
9 | This file is part of the yat library, http://dev.thep.lu.se/yat |
---|
10 | |
---|
11 | The yat library is free software; you can redistribute it and/or |
---|
12 | modify it under the terms of the GNU General Public License as |
---|
13 | published by the Free Software Foundation; either version 3 of the |
---|
14 | License, or (at your option) any later version. |
---|
15 | |
---|
16 | The yat library is distributed in the hope that it will be useful, |
---|
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
19 | General Public License for more details. |
---|
20 | |
---|
21 | You should have received a copy of the GNU General Public License |
---|
22 | along with yat. If not, see <http://www.gnu.org/licenses/>. |
---|
23 | */ |
---|
24 | |
---|
25 | #include "AveragerWeighted.h" |
---|
26 | #include "utility.h" |
---|
27 | |
---|
28 | #include "yat/regression/TukeyBiweight.h" |
---|
29 | |
---|
30 | #include "yat/utility/concept_check.h" |
---|
31 | #include "yat/utility/Exception.h" |
---|
32 | #include "yat/utility/iterator_traits.h" |
---|
33 | |
---|
34 | #include <boost/concept_check.hpp> |
---|
35 | |
---|
36 | #include <algorithm> |
---|
37 | #include <iterator> |
---|
38 | #include <limits> |
---|
39 | #include <vector> |
---|
40 | |
---|
41 | namespace theplu { |
---|
42 | namespace yat { |
---|
43 | namespace statistics { |
---|
44 | |
---|
45 | /** |
---|
46 | \brief Tukey's Biweight Estimator |
---|
47 | |
---|
48 | Calculates the estimate as a weighted sum |
---|
49 | \f$ m = \frac{\sum w_i(1-u_i^2)^2x_i}{\sum w_i(1-u_i^2)^2 } \f$ |
---|
50 | |
---|
51 | where the sums run over data points with |u|<1 and u is calculated as |
---|
52 | |
---|
53 | \f$ u_i = \frac{x_i-m}{k*\textrm{mad}} \f$ |
---|
54 | |
---|
55 | As the estimate \a m also appears on right-hand side, the estimate |
---|
56 | is calculated in an iterative manner. |
---|
57 | |
---|
58 | \see regression::TukeyBiweight |
---|
59 | \see mad |
---|
60 | |
---|
61 | \since New in yat 0.8 |
---|
62 | */ |
---|
63 | class TukeyBiweightEstimator |
---|
64 | { |
---|
65 | public: |
---|
66 | /** |
---|
67 | \brief Constructor |
---|
68 | |
---|
69 | \param cutoff defines how close to the center (estimate) a data |
---|
70 | point needs to be (in terms of MADs) to influence the estimate. |
---|
71 | \param sorted if true object pressumes input is a sorted range; |
---|
72 | otherwise object works on a copy of the range which will be |
---|
73 | sorted. |
---|
74 | */ |
---|
75 | explicit TukeyBiweightEstimator(double cutoff=4.685, bool sorted=false) |
---|
76 | : cutoff_(cutoff), sorted_(sorted) {} |
---|
77 | |
---|
78 | /** |
---|
79 | \return Tukey's Biweight Estimate |
---|
80 | */ |
---|
81 | template<typename RandomAccessIterator> |
---|
82 | double operator()(RandomAccessIterator first, |
---|
83 | RandomAccessIterator last) const; |
---|
84 | |
---|
85 | private: |
---|
86 | double cutoff_; |
---|
87 | bool sorted_; |
---|
88 | |
---|
89 | template<typename RandomAccessIterator> |
---|
90 | double estimate(RandomAccessIterator first, |
---|
91 | RandomAccessIterator last) const; |
---|
92 | |
---|
93 | template<typename InputIterator> |
---|
94 | double estimate(InputIterator first, InputIterator last, |
---|
95 | double center, double spread) const; |
---|
96 | }; |
---|
97 | |
---|
98 | template<typename RandomAccessIterator> |
---|
99 | double TukeyBiweightEstimator::operator()(RandomAccessIterator first, |
---|
100 | RandomAccessIterator last) const |
---|
101 | { |
---|
102 | BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>)); |
---|
103 | using utility::DataIteratorConcept; |
---|
104 | BOOST_CONCEPT_ASSERT((DataIteratorConcept<RandomAccessIterator>)); |
---|
105 | |
---|
106 | if (sorted_) |
---|
107 | return estimate(first, last); |
---|
108 | |
---|
109 | // if not sorted, create a sorted copy |
---|
110 | typedef typename std::iterator_traits<RandomAccessIterator> traits; |
---|
111 | std::vector<typename traits::value_type> vec; |
---|
112 | vec.reserve(last-first); |
---|
113 | std::copy(first, last, std::back_inserter(vec)); |
---|
114 | std::sort(vec.begin(), vec.end()); |
---|
115 | return estimate(vec.begin(), vec.end()); |
---|
116 | |
---|
117 | const double scale = mad(first, last, true); |
---|
118 | double m = median(first, last, true); |
---|
119 | if (scale==0) |
---|
120 | return m; |
---|
121 | // ensure m != m0 |
---|
122 | double m0 = m+1.0; |
---|
123 | size_t epoch = 0; |
---|
124 | while (m!=m0) { |
---|
125 | m0 = m; |
---|
126 | m = estimate(first, last, m, scale); |
---|
127 | ++epoch; |
---|
128 | if (epoch>1000) |
---|
129 | utility::runtime_error("TukeyBiweightIterator: too many epochs"); |
---|
130 | } |
---|
131 | return m; |
---|
132 | } |
---|
133 | |
---|
134 | |
---|
135 | template<typename RandomAccessIterator> |
---|
136 | double TukeyBiweightEstimator::estimate(RandomAccessIterator first, |
---|
137 | RandomAccessIterator last) const |
---|
138 | { |
---|
139 | BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>)); |
---|
140 | using utility::DataIteratorConcept; |
---|
141 | BOOST_CONCEPT_ASSERT((DataIteratorConcept<RandomAccessIterator>)); |
---|
142 | |
---|
143 | const double scale = mad(first, last, true); |
---|
144 | double m = median(first, last, true); |
---|
145 | // if mad is zero all (non-zero weight) data points are equal and |
---|
146 | // median is the only sensible estimate. Also, calculations below |
---|
147 | // would "divide by zero" so we need to interupt here. |
---|
148 | if (scale==0) |
---|
149 | return m; |
---|
150 | double m0 = m+1.0; |
---|
151 | size_t epoch = 0; |
---|
152 | // FIXME: let user define convergence requirement |
---|
153 | while (m!=m0) { |
---|
154 | m0 = m; |
---|
155 | m = estimate(first, last, m, scale); |
---|
156 | ++epoch; |
---|
157 | // FIXME: let user define maximal epochs |
---|
158 | if (epoch>1000) |
---|
159 | utility::runtime_error("TukeyBiweightIterator: too many epochs"); |
---|
160 | } |
---|
161 | return m; |
---|
162 | } |
---|
163 | |
---|
164 | |
---|
165 | template<typename InputIterator> |
---|
166 | double TukeyBiweightEstimator::estimate(InputIterator first, |
---|
167 | InputIterator last, |
---|
168 | double center, double spread) const |
---|
169 | { |
---|
170 | double scale = spread*cutoff_; |
---|
171 | AveragerWeighted averager; |
---|
172 | regression::TukeyBiweight biweight; |
---|
173 | utility::iterator_traits<InputIterator> traits; |
---|
174 | for ( ; first!=last; ++first) { |
---|
175 | double x = traits.data(first); |
---|
176 | double w = traits.weight(first) * biweight((x-center)/scale); |
---|
177 | averager.add(x, w); |
---|
178 | } |
---|
179 | return averager.mean(); |
---|
180 | } |
---|
181 | |
---|
182 | }}} // end of namespace theplu yat statistics |
---|
183 | # endif |
---|