source: branches/0.7-stable/yat/normalizer/Spearman.h @ 2407

Last change on this file since 2407 was 2161, checked in by Peter, 12 years ago

updating copyright years

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 4.5 KB
Line 
1#ifndef _theplu_yat_normalizer_spearman_
2#define _theplu_yat_normalizer_spearman_
3
4// $Id: Spearman.h 2161 2010-01-19 23:59:48Z peter $
5
6/*
7  Copyright (C) 2008 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2009, 2010 Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include "utility.h"
27
28#include "yat/utility/DataIterator.h"
29#include "yat/utility/iterator_traits.h"
30#include "yat/utility/sort_index.h"
31#include "yat/utility/WeightIterator.h"
32
33#include <boost/concept_check.hpp>
34
35#include <algorithm>
36#include <functional>
37#include <vector>
38
39namespace theplu {
40namespace yat {
41namespace normalizer {
42
43  /**
44     \brief Replace elements with normalized rank
45
46     Each element x is replaced by \f$ \frac{\sum I(x_i-x) w_i}{\sum
47     w_i} \f$ where \f$ I(x) = 1 \f$ for \f$ x>0 \f$, \f$I(x) = 0.5
48     \f$ for \f$ x=0 \f$, and \f$ I(x) = 0 \f$ for \f$ x<0 \f$.
49
50     \since New in yat 0.5
51   */
52  class Spearman
53  {
54  public:
55    /**
56       \brief default constructor
57     */
58    Spearman(void){}
59
60    /**
61       It is possible to centralize a range "in place"; it is
62       permissible for the iterators \a first and \a result to be the
63       same.
64
65       Type Requirements:
66       - RandomAccessIter1 must be a \random_access_iterator
67       - RandomAccessIter2 must be a mutable \random_access_iterator
68
69     */
70    template<typename RandomAccessIter1, typename RandomAccessIter2>
71    void operator()(RandomAccessIter1 first, RandomAccessIter1 last,
72                    RandomAccessIter2 result) const
73    {
74      BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIter1>));
75      BOOST_CONCEPT_ASSERT((boost::Mutable_RandomAccessIterator<RandomAccessIter2>));
76      using utility::weighted_if_any2;
77      typename weighted_if_any2<RandomAccessIter1,RandomAccessIter2>::type tag;
78      normalize(first, last, result, tag);
79    }
80
81
82  private:
83    // unweighted version
84    template<typename RandomAccessIter1, typename RandomAccessIter2>
85    void normalize(RandomAccessIter1 first, RandomAccessIter1 last,
86                   RandomAccessIter2 result, 
87                   utility::unweighted_iterator_tag) const
88    {
89      std::vector<size_t> perm;
90      utility::sort_index(first, last, perm);
91      double n = perm.size();
92      size_t i=0; 
93      while ( i<perm.size() ) {
94        size_t min_i = i;
95        while (i<perm.size() && first[perm[i]]<=first[perm[min_i]])
96          ++i;
97        double res = static_cast<double>(i + min_i)/(2*n);
98        for ( ; min_i < i; ++min_i)
99          result[perm[min_i]] = res;
100      }
101    }
102
103
104    // weighted version
105    template<typename RandomAccessIter1, typename RandomAccessIter2>
106    void normalize(RandomAccessIter1 first, RandomAccessIter1 last,
107                   RandomAccessIter2 result, 
108                   utility::weighted_iterator_tag) const
109    {
110      detail::copy_weight_if_weighted(first, last, result);
111      std::vector<double> input_vec;
112      input_vec.reserve(last-first);
113      // set values with w=0 to 0 to avoid problems with NaNs
114      utility::iterator_traits<RandomAccessIter1> trait;
115      for (RandomAccessIter1 i=first; i!=last; ++i) {
116        if (trait.weight(i)==0)
117          input_vec.push_back(0.0);
118        else
119          input_vec.push_back(trait.data(i));
120      }
121
122      std::vector<size_t> perm(input_vec.size());
123      utility::sort_index(input_vec.begin(), input_vec.end(), perm);
124      utility::iterator_traits<RandomAccessIter2> rtrait;
125
126      double sum_w=0;
127      size_t i=0; 
128      while ( i<perm.size() ) {
129        double w=0;
130        size_t min_i = i;
131        while (i<perm.size() && (trait.weight(first+perm[i])==0 ||
132                                 trait.data(first+perm[i]) <= 
133                                 trait.data(first+perm[min_i])) ) {
134          w += trait.weight(first+perm[i]);
135          ++i;
136        }
137        double res=sum_w + 0.5*w;
138        for ( size_t j=min_i; j<i; ++j)
139          rtrait.data(result+perm[j]) = res;
140        sum_w += w;
141      }       
142
143      size_t n = std::distance(first, last);
144      std::transform(utility::data_iterator(result),
145                     utility::data_iterator(result+n),
146                     utility::data_iterator(result),
147                     std::bind2nd(std::divides<double>(), sum_w));
148    }
149
150  };
151
152}}} // end of namespace normalizer, yat and thep
153#endif
Note: See TracBrowser for help on using the repository browser.