Ignore:
Timestamp:
Sep 25, 2008, 9:31:57 PM (13 years ago)
Author:
Peter
Message:

adding a Gauss normalizer

File:
1 copied

Legend:

Unmodified
Added
Removed
  • trunk/yat/normalizer/Gauss.h

    r1533 r1536  
    1 #ifndef _theplu_yat_normalizer_spearman_
    2 #define _theplu_yat_normalizer_spearman_
     1#ifndef _theplu_yat_normalizer_gauss_
     2#define _theplu_yat_normalizer_gauss_
    33
    44/*
     
    2121*/
    2222
    23 #include "yat/utility/DataIterator.h"
     23#include "Spearman.h"
    2424#include "yat/utility/iterator_traits.h"
    25 #include "yat/utility/sort_index.h"
    26 #include "yat/utility/WeightIterator.h"
    2725
    28 #include <algorithm>
    29 #include <functional>
    30 #include <vector>
     26#include <gsl/gsl_cdf.h>
    3127
    3228namespace theplu {
     
    3531
    3632  /**
    37      \brief Replace elements with normalized rank
     33     \brief Gaussian Normalizer
    3834
    3935     \since New in yat 0.5
    4036   */
    41   class Spearman
     37  class Gauss
    4238  {
    4339  public:
    44     /**
    45        \brief default constructor
    46      */
    47     Spearman(void){}
    48 
    4940    /**
    5041       It is possible to centralize a range "in place"; it is
     
    5243       same.
    5344
    54        Each element x is replaced by \f$ \frac{\sum I(x_i-x)
    55        w_i}{\sum w_i} \f$ where I(x) = 1 for x>0, I(x) = 0.5 for x=0,
    56        and I(x) = 0 for x<0.
     45       The range is first rank normalized using Spearman, after which
     46       each element is between 0 and unity. Second each element is
     47       replaced by inverse cumulative standard Gaussian distribution.
     48       
     49       After normalization the range will follow a standard Gaussian
     50       distribution (mean zero and unity variance).
     51
     52       \see gsl_cdf_ugaussian_Pinv
    5753
    5854       \return result + (last-first)
     
    6258                                    RandomAccessIterator result) const
    6359    {
    64       typename utility::weighted_iterator_traits<ForwardIterator>::type tag;
    65       return normalize(first, last, result, tag);
     60      Spearman spearman;
     61      spearman(first, last, result);
     62      RandomAccessIterator end = result + (last-first);
     63      utility::iterator_traits<RandomAccessIterator> trait;
     64      while (result != end) {
     65        trait.data(result) = gsl_cdf_ugaussian_Pinv(trait.data(result));
     66        ++result;
     67      }
     68     
     69      return result;
    6670    }
    6771
    6872
    6973  private:
    70     // unweighted version
    71     template<typename ForwardIterator, typename RandomAccessIterator>
    72     RandomAccessIterator normalize(ForwardIterator first, ForwardIterator last,
    73                                    RandomAccessIterator result,
    74                                    utility::unweighted_iterator_tag) const
    75     {
    76       std::vector<size_t> perm;
    77       utility::sort_index(first, last, perm);
    78       double n = perm.size();
    79       size_t i=0;
    80       while ( i<perm.size() ) {
    81         size_t min_i = i;
    82         while (i<perm.size() && first[perm[i]]<=first[perm[min_i]])
    83           ++i;
    84         double res = static_cast<double>(i + min_i)/(2*n);
    85         for ( ; min_i < i; ++min_i)
    86           result[perm[min_i]] = res;
    87       }
    88       return result + std::distance(first, last);
    89     }
    90 
    91 
    92     // weighted version
    93     template<typename ForwardIterator, typename RandomAccessIterator>
    94     RandomAccessIterator normalize(ForwardIterator first, ForwardIterator last,
    95                                    RandomAccessIterator result,
    96                                    utility::weighted_iterator_tag) const
    97     {
    98       std::copy(utility::weight_iterator(first),
    99                 utility::weight_iterator(last),
    100                 utility::weight_iterator(result));
    101       // set values with w=0 to 0 to avoid problems with NaNs
    102       utility::iterator_traits<ForwardIterator> trait;
    103       for (ForwardIterator i=first; i!=last; ++i)
    104         if (trait.weight(i)==0)
    105           trait.data(i)=0.0;
    106 
    107       std::vector<size_t> perm(std::distance(first, last));
    108       utility::sort_index(utility::data_iterator(first),
    109                           utility::data_iterator(last), perm);
    110       utility::iterator_traits<RandomAccessIterator> rtrait;
    111 
    112       double sum_w=0;
    113       size_t i=0;
    114       while ( i<perm.size() ) {
    115         double w=0;
    116         size_t min_i = i;
    117         while (i<perm.size() && (trait.weight(first+perm[i])==0 ||
    118                                  trait.data(first+perm[i]) <=
    119                                  trait.data(first+perm[min_i])) ) {
    120           w += trait.weight(first+perm[i]);
    121           ++i;
    122         }
    123         double res=sum_w + 0.5*w;
    124         for ( size_t j=min_i; j<i; ++j)
    125           rtrait.data(result+perm[j]) = res;
    126         sum_w += w;
    127       }       
    128 
    129       size_t n = std::distance(first, last);
    130       std::transform(utility::data_iterator(result),
    131                      utility::data_iterator(result+n),
    132                      utility::data_iterator(result),
    133                      std::bind2nd(std::divides<double>(), sum_w));
    134       return result + n;
    135     }
    136 
    13774  };
    13875
Note: See TracChangeset for help on using the changeset viewer.