1 | #ifndef _theplu_yat_normalizer_spearman_ |
---|
2 | #define _theplu_yat_normalizer_spearman_ |
---|
3 | |
---|
4 | // $Id: Spearman.h 2154 2010-01-17 22:21:29Z peter $ |
---|
5 | |
---|
6 | /* |
---|
7 | Copyright (C) 2008 Jari Häkkinen, Peter Johansson |
---|
8 | Copyright (C) 2009 Peter Johansson |
---|
9 | |
---|
10 | This file is part of the yat library, http://dev.thep.lu.se/yat |
---|
11 | |
---|
12 | The yat library is free software; you can redistribute it and/or |
---|
13 | modify it under the terms of the GNU General Public License as |
---|
14 | published by the Free Software Foundation; either version 3 of the |
---|
15 | License, or (at your option) any later version. |
---|
16 | |
---|
17 | The yat library is distributed in the hope that it will be useful, |
---|
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
20 | General Public License for more details. |
---|
21 | |
---|
22 | You should have received a copy of the GNU General Public License |
---|
23 | along with yat. If not, see <http://www.gnu.org/licenses/>. |
---|
24 | */ |
---|
25 | |
---|
26 | #include "yat/utility/DataIterator.h" |
---|
27 | #include "yat/utility/iterator_traits.h" |
---|
28 | #include "yat/utility/sort_index.h" |
---|
29 | #include "yat/utility/WeightIterator.h" |
---|
30 | |
---|
31 | #include <boost/concept_check.hpp> |
---|
32 | |
---|
33 | #include <algorithm> |
---|
34 | #include <functional> |
---|
35 | #include <vector> |
---|
36 | |
---|
37 | namespace theplu { |
---|
38 | namespace yat { |
---|
39 | namespace normalizer { |
---|
40 | |
---|
41 | /** |
---|
42 | \brief Replace elements with normalized rank |
---|
43 | |
---|
44 | Each element x is replaced by \f$ \frac{\sum I(x_i-x) w_i}{\sum |
---|
45 | w_i} \f$ where \f$ I(x) = 1 \f$ for \f$ x>0 \f$, \f$I(x) = 0.5 |
---|
46 | \f$ for \f$ x=0 \f$, and \f$ I(x) = 0 \f$ for \f$ x<0 \f$. |
---|
47 | |
---|
48 | \since New in yat 0.5 |
---|
49 | */ |
---|
50 | class Spearman |
---|
51 | { |
---|
52 | public: |
---|
53 | /** |
---|
54 | \brief default constructor |
---|
55 | */ |
---|
56 | Spearman(void){} |
---|
57 | |
---|
58 | /** |
---|
59 | It is possible to centralize a range "in place"; it is |
---|
60 | permissible for the iterators \a first and \a result to be the |
---|
61 | same. |
---|
62 | |
---|
63 | Type Requirements: |
---|
64 | - RandomAccessIter1 must be a \random_access_iterator |
---|
65 | - RandomAccessIter2 must be a mutable \random_access_iterator |
---|
66 | |
---|
67 | */ |
---|
68 | template<typename RandomAccessIter1, typename RandomAccessIter2> |
---|
69 | void operator()(RandomAccessIter1 first, RandomAccessIter1 last, |
---|
70 | RandomAccessIter2 result) const |
---|
71 | { |
---|
72 | BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIter1>)); |
---|
73 | BOOST_CONCEPT_ASSERT((boost::Mutable_RandomAccessIterator<RandomAccessIter2>)); |
---|
74 | typename utility::weighted_iterator_traits<RandomAccessIter1>::type tag; |
---|
75 | normalize(first, last, result, tag); |
---|
76 | } |
---|
77 | |
---|
78 | |
---|
79 | private: |
---|
80 | // unweighted version |
---|
81 | template<typename RandomAccessIter1, typename RandomAccessIter2> |
---|
82 | void normalize(RandomAccessIter1 first, RandomAccessIter1 last, |
---|
83 | RandomAccessIter2 result, |
---|
84 | utility::unweighted_iterator_tag) const |
---|
85 | { |
---|
86 | std::vector<size_t> perm; |
---|
87 | utility::sort_index(first, last, perm); |
---|
88 | double n = perm.size(); |
---|
89 | size_t i=0; |
---|
90 | while ( i<perm.size() ) { |
---|
91 | size_t min_i = i; |
---|
92 | while (i<perm.size() && first[perm[i]]<=first[perm[min_i]]) |
---|
93 | ++i; |
---|
94 | double res = static_cast<double>(i + min_i)/(2*n); |
---|
95 | for ( ; min_i < i; ++min_i) |
---|
96 | result[perm[min_i]] = res; |
---|
97 | } |
---|
98 | } |
---|
99 | |
---|
100 | |
---|
101 | // weighted version |
---|
102 | template<typename RandomAccessIter1, typename RandomAccessIter2> |
---|
103 | void normalize(RandomAccessIter1 first, RandomAccessIter1 last, |
---|
104 | RandomAccessIter2 result, |
---|
105 | utility::weighted_iterator_tag) const |
---|
106 | { |
---|
107 | std::copy(utility::weight_iterator(first), |
---|
108 | utility::weight_iterator(last), |
---|
109 | utility::weight_iterator(result)); |
---|
110 | std::vector<double> input_vec; |
---|
111 | input_vec.reserve(last-first); |
---|
112 | // set values with w=0 to 0 to avoid problems with NaNs |
---|
113 | utility::iterator_traits<RandomAccessIter1> trait; |
---|
114 | for (RandomAccessIter1 i=first; i!=last; ++i) { |
---|
115 | if (trait.weight(i)==0) |
---|
116 | input_vec.push_back(0.0); |
---|
117 | else |
---|
118 | input_vec.push_back(trait.data(i)); |
---|
119 | } |
---|
120 | |
---|
121 | std::vector<size_t> perm(input_vec.size()); |
---|
122 | utility::sort_index(input_vec.begin(), input_vec.end(), perm); |
---|
123 | utility::iterator_traits<RandomAccessIter2> rtrait; |
---|
124 | |
---|
125 | double sum_w=0; |
---|
126 | size_t i=0; |
---|
127 | while ( i<perm.size() ) { |
---|
128 | double w=0; |
---|
129 | size_t min_i = i; |
---|
130 | while (i<perm.size() && (trait.weight(first+perm[i])==0 || |
---|
131 | trait.data(first+perm[i]) <= |
---|
132 | trait.data(first+perm[min_i])) ) { |
---|
133 | w += trait.weight(first+perm[i]); |
---|
134 | ++i; |
---|
135 | } |
---|
136 | double res=sum_w + 0.5*w; |
---|
137 | for ( size_t j=min_i; j<i; ++j) |
---|
138 | rtrait.data(result+perm[j]) = res; |
---|
139 | sum_w += w; |
---|
140 | } |
---|
141 | |
---|
142 | size_t n = std::distance(first, last); |
---|
143 | std::transform(utility::data_iterator(result), |
---|
144 | utility::data_iterator(result+n), |
---|
145 | utility::data_iterator(result), |
---|
146 | std::bind2nd(std::divides<double>(), sum_w)); |
---|
147 | } |
---|
148 | |
---|
149 | }; |
---|
150 | |
---|
151 | }}} // end of namespace normalizer, yat and thep |
---|
152 | #endif |
---|