1 | #ifndef _theplu_yat_normalizer_spearman_ |
---|
2 | #define _theplu_yat_normalizer_spearman_ |
---|
3 | |
---|
4 | /* |
---|
5 | Copyright (C) 2008 Peter Johansson |
---|
6 | |
---|
7 | This file is part of the yat library, http://dev.thep.lu.se/yat |
---|
8 | |
---|
9 | The yat library is free software; you can redistribute it and/or |
---|
10 | modify it under the terms of the GNU General Public License as |
---|
11 | published by the Free Software Foundation; either version 3 of the |
---|
12 | License, or (at your option) any later version. |
---|
13 | |
---|
14 | The yat library is distributed in the hope that it will be useful, |
---|
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
17 | General Public License for more details. |
---|
18 | |
---|
19 | You should have received a copy of the GNU General Public License |
---|
20 | along with yat. If not, see <http://www.gnu.org/licenses/>. |
---|
21 | */ |
---|
22 | |
---|
23 | #include "yat/utility/DataIterator.h" |
---|
24 | #include "yat/utility/iterator_traits.h" |
---|
25 | #include "yat/utility/sort_index.h" |
---|
26 | #include "yat/utility/WeightIterator.h" |
---|
27 | |
---|
28 | #include <algorithm> |
---|
29 | #include <functional> |
---|
30 | #include <vector> |
---|
31 | |
---|
32 | namespace theplu { |
---|
33 | namespace yat { |
---|
34 | namespace normalizer { |
---|
35 | |
---|
36 | /** |
---|
37 | \brief Replace elements with normalized rank |
---|
38 | |
---|
39 | \since New in yat 0.5 |
---|
40 | */ |
---|
41 | class Spearman |
---|
42 | { |
---|
43 | public: |
---|
44 | /** |
---|
45 | \brief default constructor |
---|
46 | */ |
---|
47 | Spearman(void){} |
---|
48 | |
---|
49 | /** |
---|
50 | It is possible to centralize a range "in place"; it is |
---|
51 | permissible for the iterators \a first and \a result to be the |
---|
52 | same. |
---|
53 | |
---|
54 | Each element x is replaced by \f$ sum(w_i)/sum(w) \f$ where the |
---|
55 | first sum runs over elements for which \f$ x_i < x \f$. |
---|
56 | |
---|
57 | In the unweighted case that can be simplified to \f$ rank/n |
---|
58 | \f$, i.e., the samllest element is assigned to 0, next smallest |
---|
59 | \f$ 1/n \f$ etc. |
---|
60 | |
---|
61 | \return result + (last-first) |
---|
62 | */ |
---|
63 | template<typename ForwardIterator, typename RandomAccessIterator> |
---|
64 | RandomAccessIterator operator()(ForwardIterator first, ForwardIterator last, |
---|
65 | RandomAccessIterator result) const |
---|
66 | { |
---|
67 | typename utility::weighted_iterator_traits<ForwardIterator>::type tag; |
---|
68 | return normalize(first, last, result, tag); |
---|
69 | } |
---|
70 | |
---|
71 | |
---|
72 | private: |
---|
73 | // unweighted version |
---|
74 | template<typename ForwardIterator, typename RandomAccessIterator> |
---|
75 | RandomAccessIterator normalize(ForwardIterator first, ForwardIterator last, |
---|
76 | RandomAccessIterator result, |
---|
77 | utility::unweighted_iterator_tag) const |
---|
78 | { |
---|
79 | std::vector<size_t> perm; |
---|
80 | utility::sort_index(first, last, perm); |
---|
81 | double n = perm.size(); |
---|
82 | for ( size_t i=0; i<perm.size(); ++i) |
---|
83 | result[perm[i]] = static_cast<double>(i)/n; |
---|
84 | return result + std::distance(first, last); |
---|
85 | } |
---|
86 | |
---|
87 | |
---|
88 | // weighted version |
---|
89 | template<typename ForwardIterator, typename RandomAccessIterator> |
---|
90 | RandomAccessIterator normalize(ForwardIterator first, ForwardIterator last, |
---|
91 | RandomAccessIterator result, |
---|
92 | utility::weighted_iterator_tag) const |
---|
93 | { |
---|
94 | std::copy(utility::weight_iterator(first), |
---|
95 | utility::weight_iterator(last), |
---|
96 | utility::weight_iterator(result)); |
---|
97 | // set values with w=0 to 0 to avoid problems with NaNs |
---|
98 | utility::iterator_traits<ForwardIterator> forward_trait; |
---|
99 | for (ForwardIterator i=first; i!=last; ++i) |
---|
100 | if (forward_trait.weight(i)==0) |
---|
101 | forward_trait.data(i)=0.0; |
---|
102 | |
---|
103 | std::vector<size_t> index(std::distance(first, last)); |
---|
104 | utility::sort_index(utility::data_iterator(first), |
---|
105 | utility::data_iterator(last), index); |
---|
106 | utility::iterator_traits<RandomAccessIterator> trait; |
---|
107 | trait.data(result+index[0])=0; |
---|
108 | for (size_t i=1; i<index.size(); ++i) |
---|
109 | trait.data(result+index[i]) = |
---|
110 | trait.data(result+index[i-1]) + trait.weight(result+index[i-1]); |
---|
111 | size_t n = std::distance(first, last); |
---|
112 | double w_sum = trait.data(result+index.back()) + |
---|
113 | trait.weight(result+index.back()); |
---|
114 | std::transform(utility::data_iterator(result), |
---|
115 | utility::data_iterator(result+n), |
---|
116 | utility::data_iterator(result), |
---|
117 | std::bind2nd(std::divides<double>(), w_sum)); |
---|
118 | return result + n; |
---|
119 | } |
---|
120 | |
---|
121 | }; |
---|
122 | |
---|
123 | }}} // end of namespace normalizer, yat and thep |
---|
124 | #endif |
---|