1 | #ifndef _theplu_yat_normalizer_z_score_ |
---|
2 | #define _theplu_yat_normalizer_z_score_ |
---|
3 | |
---|
4 | // $Id: Zscore.h 4089 2021-09-07 00:56:40Z peter $ |
---|
5 | |
---|
6 | /* |
---|
7 | Copyright (C) 2008 Jari Häkkinen, Peter Johansson |
---|
8 | Copyright (C) 2009, 2010, 2016, 2021 Peter Johansson |
---|
9 | |
---|
10 | This file is part of the yat library, http://dev.thep.lu.se/yat |
---|
11 | |
---|
12 | The yat library is free software; you can redistribute it and/or |
---|
13 | modify it under the terms of the GNU General Public License as |
---|
14 | published by the Free Software Foundation; either version 3 of the |
---|
15 | License, or (at your option) any later version. |
---|
16 | |
---|
17 | The yat library is distributed in the hope that it will be useful, |
---|
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
20 | General Public License for more details. |
---|
21 | |
---|
22 | You should have received a copy of the GNU General Public License |
---|
23 | along with yat. If not, see <http://www.gnu.org/licenses/>. |
---|
24 | */ |
---|
25 | |
---|
26 | #include "utility.h" |
---|
27 | |
---|
28 | #include "yat/statistics/Averager.h" |
---|
29 | #include "yat/statistics/AveragerWeighted.h" |
---|
30 | |
---|
31 | #include "yat/utility/concept_check.h" |
---|
32 | #include "yat/utility/iterator_traits.h" |
---|
33 | |
---|
34 | #include <boost/concept_check.hpp> |
---|
35 | |
---|
36 | namespace theplu { |
---|
37 | namespace yat { |
---|
38 | namespace normalizer { |
---|
39 | |
---|
40 | /** |
---|
41 | \brief Zero mean and unity variance |
---|
42 | |
---|
43 | Shift and scale the values in a range as: \f$ y_i = |
---|
44 | \frac{x_i-m}{k*s} \f$ where \a m is the mean, \a s is the |
---|
45 | standard deviation, and \a k is set in constructor (default: |
---|
46 | 1.0). After normalization, the range will have zero mean and |
---|
47 | variance = \f$ 1/k^2 \f$. |
---|
48 | |
---|
49 | Type Requirements: |
---|
50 | - \c InputIterator is \ref concept_data_iterator |
---|
51 | - \c InputIterator is \forward_traversal_iterator |
---|
52 | - \c OutputIterator is \writable_iterator |
---|
53 | - \c OutputIterator is \forward_traversal_iterator |
---|
54 | |
---|
55 | \since New in yat 0.5 |
---|
56 | */ |
---|
57 | class Zscore |
---|
58 | { |
---|
59 | public: |
---|
60 | /** |
---|
61 | \param k In the scaling step elements are divided by \c k times |
---|
62 | the standard deviation |
---|
63 | |
---|
64 | \since New in yat 0.19 |
---|
65 | */ |
---|
66 | explicit Zscore(double k=1.0); |
---|
67 | |
---|
68 | /** |
---|
69 | \return the k-factor set in constructor |
---|
70 | |
---|
71 | \since new in yat 0.19 |
---|
72 | */ |
---|
73 | double k(void) const; |
---|
74 | |
---|
75 | /** |
---|
76 | The element in range [result, result + (last-first)) is |
---|
77 | calculated as result[i] = (first[i] - m) / s where m and std |
---|
78 | are the mean and standard deviation, respectively, of the range |
---|
79 | [first, last). |
---|
80 | |
---|
81 | It is possible to centralize a range "in place"; it is |
---|
82 | permissible for the iterators \a first and \a result to be the |
---|
83 | same. \see std::transform |
---|
84 | */ |
---|
85 | template<class InputIterator, class OutputIterator> |
---|
86 | void operator()(InputIterator first, InputIterator last, |
---|
87 | OutputIterator result) const |
---|
88 | { |
---|
89 | BOOST_CONCEPT_ASSERT((utility::DataIteratorConcept<InputIterator>)); |
---|
90 | BOOST_CONCEPT_ASSERT((utility::DataIteratorConcept<OutputIterator>)); |
---|
91 | |
---|
92 | // needed for weighted_if_any2 |
---|
93 | typename utility::weighted_if_any2<InputIterator, OutputIterator>::type tag; |
---|
94 | normalize(first, last, result, tag); |
---|
95 | } |
---|
96 | |
---|
97 | private: |
---|
98 | double k_; |
---|
99 | |
---|
100 | |
---|
101 | template<class ForwardIterator, class OutputIterator> |
---|
102 | void normalize(ForwardIterator first, ForwardIterator last, |
---|
103 | OutputIterator result, |
---|
104 | utility::unweighted_iterator_tag tag) const |
---|
105 | { |
---|
106 | // we need to traverse the input range once in calculating m |
---|
107 | // (and std) and a second time when assigning new values, so |
---|
108 | // single pass iterator will not suffice. |
---|
109 | BOOST_CONCEPT_ASSERT((boost_concepts::ForwardTraversal<ForwardIterator>)); |
---|
110 | |
---|
111 | BOOST_CONCEPT_ASSERT((boost_concepts::WritableIterator<OutputIterator>)); |
---|
112 | |
---|
113 | statistics::Averager a; |
---|
114 | add(a, first, last); |
---|
115 | double m = a.mean(); |
---|
116 | double factor = 1.0 / (a.std() * k_); |
---|
117 | while (first!=last) { |
---|
118 | *result = (*first - m) * factor; |
---|
119 | ++first; |
---|
120 | ++result; |
---|
121 | } |
---|
122 | } |
---|
123 | |
---|
124 | template<class ForwardIterator, class OutputIterator> |
---|
125 | void normalize(ForwardIterator first, ForwardIterator last, |
---|
126 | OutputIterator result, |
---|
127 | utility::weighted_iterator_tag tag) const |
---|
128 | { |
---|
129 | // we need to traverse the input range once in calculating m |
---|
130 | // (and std) and a second time when assigning new values, so |
---|
131 | // single pass iterator will not suffice. |
---|
132 | BOOST_CONCEPT_ASSERT((boost_concepts::ForwardTraversal<ForwardIterator>)); |
---|
133 | |
---|
134 | BOOST_CONCEPT_ASSERT((boost_concepts::WritableIterator<OutputIterator>)); |
---|
135 | // we traverse output range twice |
---|
136 | BOOST_CONCEPT_ASSERT((boost_concepts::ForwardTraversal<OutputIterator>)); |
---|
137 | detail::copy_weight_if_weighted(first, last, result); |
---|
138 | statistics::AveragerWeighted a; |
---|
139 | add(a, first, last); |
---|
140 | double m = a.mean(); |
---|
141 | double factor = 1.0 / (a.std() * k_); |
---|
142 | utility::iterator_traits<ForwardIterator> in_trait; |
---|
143 | utility::iterator_traits<OutputIterator> out_trait; |
---|
144 | while (first!=last) { |
---|
145 | out_trait.data(result) = (in_trait.data(first) - m) * factor; |
---|
146 | ++first; |
---|
147 | ++result; |
---|
148 | } |
---|
149 | } |
---|
150 | }; |
---|
151 | |
---|
152 | }}} // end of namespace normalizer, yat and thep |
---|
153 | #endif |
---|