Ignore:
Timestamp:
Sep 19, 2008, 4:29:24 PM (13 years ago)
Author:
Peter
Message:

fixes #439 - and also took care of ties

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/yat/normalizer/Spearman.h

    r1510 r1512  
    5252       same.
    5353
    54        Each element x is replaced by \f$ sum(w_i)/sum(w) \f$ where the
    55        first sum runs over elements for which \f$ x_i < x \f$.
     54       Each element x is replaced by \f$ \frac{\sum I(x_i-x)
     55       w_i}{\sum w_i} \f$ where I(x) = 1 for x>0, I(x) = 0.5 for x=0,
     56       and I(x) = 0 for x<0.
    5657
    57        In the unweighted case that can be simplified to \f$ rank/n
    58        \f$, i.e., the samllest element is assigned to 0, next smallest
    59        \f$ 1/n \f$ etc.
    60        
    6158       \return result + (last-first)
    6259     */
     
    8077      utility::sort_index(first, last, perm);
    8178      double n = perm.size();
    82       for ( size_t i=0; i<perm.size(); ++i)
    83         result[perm[i]] = static_cast<double>(i)/n;
     79      size_t i=0;
     80      while ( i<perm.size() ) {
     81        size_t min_i = i;
     82        while (i<perm.size() && first[perm[i]]<=first[perm[min_i]])
     83          ++i;
     84        double res = static_cast<double>(i + min_i)/(2*n);
     85        for ( ; min_i < i; ++min_i)
     86          result[perm[min_i]] = res;
     87      }
    8488      return result + std::distance(first, last);
    8589    }
     
    96100                utility::weight_iterator(result));
    97101      // set values with w=0 to 0 to avoid problems with NaNs
    98       utility::iterator_traits<ForwardIterator> forward_trait;
     102      utility::iterator_traits<ForwardIterator> trait;
    99103      for (ForwardIterator i=first; i!=last; ++i)
    100         if (forward_trait.weight(i)==0)
    101           forward_trait.data(i)=0.0;
     104        if (trait.weight(i)==0)
     105          trait.data(i)=0.0;
    102106
    103       std::vector<size_t> index(std::distance(first, last));
     107      std::vector<size_t> perm(std::distance(first, last));
    104108      utility::sort_index(utility::data_iterator(first),
    105                           utility::data_iterator(last), index);
    106       utility::iterator_traits<RandomAccessIterator> trait;
    107       trait.data(result+index[0])=0;
    108       for (size_t i=1; i<index.size(); ++i)
    109         trait.data(result+index[i]) =
    110           trait.data(result+index[i-1]) + trait.weight(result+index[i-1]);
     109                          utility::data_iterator(last), perm);
     110      utility::iterator_traits<RandomAccessIterator> rtrait;
     111
     112      double sum_w=0;
     113      size_t i=0;
     114      while ( i<perm.size() ) {
     115        double w=0;
     116        size_t min_i = i;
     117        while (i<perm.size() && (trait.weight(first+perm[i])==0 ||
     118                                 trait.data(first+perm[i]) <=
     119                                 trait.data(first+perm[min_i])) ) {
     120          w += trait.weight(first+perm[i]);
     121          ++i;
     122        }
     123        double res=sum_w + 0.5*w;
     124        for ( size_t j=min_i; j<i; ++j)
     125          rtrait.data(result+perm[j]) = res;
     126        sum_w += w;
     127      }       
     128
    111129      size_t n = std::distance(first, last);
    112       double w_sum = trait.data(result+index.back()) +
    113         trait.weight(result+index.back());
    114130      std::transform(utility::data_iterator(result),
    115131                     utility::data_iterator(result+n),
    116132                     utility::data_iterator(result),
    117                      std::bind2nd(std::divides<double>(), w_sum));
     133                     std::bind2nd(std::divides<double>(), sum_w));
    118134      return result + n;
    119135    }
Note: See TracChangeset for help on using the changeset viewer.