Changeset 1145 for trunk/yat/statistics


Ignore:
Timestamp:
Feb 25, 2008, 9:23:47 PM (16 years ago)
Author:
Peter
Message:

fixes #292

Location:
trunk/yat/statistics
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/yat/statistics/Histogram.h

    r1000 r1145  
    2929
    3030#include "AveragerWeighted.h"
     31#include "yat/utility/iterator_traits.h"
    3132
    3233#include <string>
     
    191192  };
    192193
     194  /**
     195     Add a range [first, last) of values to Histogram.
     196   */
     197  template<typename ForwardIterator>
     198  void add(Histogram& h,
     199           ForwardIterator first, ForwardIterator last)
     200  {
     201    while (first!=last) {
     202      h.add(utility::iterator_traits<ForwardIterator>().data(),
     203            utility::iterator_traits<ForwardIterator>().weight());
     204      ++first;
     205    }
     206  }
     207
    193208///
    194209/// The Histogram output operator
  • trunk/yat/statistics/KolmogorovSmirnov.h

    r1125 r1145  
    2626
    2727#include <set>
    28 #include <utility>
    2928#include <vector>
    3029
  • trunk/yat/statistics/PearsonCorrelation.cc

    r1139 r1145  
    2525
    2626#include "PearsonCorrelation.h"
    27 #include "Pearson.h"
    28 #include "AveragerPair.h"
    29 #include "AveragerPairWeighted.h"
    30 #include "yat/utility/VectorBase.h"
    31 #include "yat/classifier/DataLookupWeighted1D.h"
    32 #include "yat/classifier/Target.h"
    33 
    34 #include <cmath>
    35 #include <gsl/gsl_cdf.h>
     27#include "utility.h"
    3628
    3729namespace theplu {
     
    4032
    4133  PearsonCorrelation::PearsonCorrelation(void)
    42     : r_(0), nof_samples_(0)
    4334  {
    4435  }
     
    5041  double PearsonCorrelation::p_value_one_sided() const
    5142  {
    52     if(nof_samples_<=2)
    53       return 1;
    54 
    55     double t = sqrt(nof_samples_ - 2)*fabs(r_) /sqrt(1-r_*r_);
    56     return gsl_cdf_tdist_Q(t, nof_samples_ -2 );
     43    return pearson_p_value(ap_.correlation(),
     44                           static_cast<u_int>(ap_.n()));
    5745  }
    5846
  • trunk/yat/statistics/PearsonCorrelation.h

    r1140 r1145  
    2727*/
    2828
    29 #include "AveragerPair.h"
    3029#include "AveragerPairWeighted.h"
    31 #include "yat/classifier/Target.h"
    32 #include "yat/utility/iterator_traits.h"
    33 
    3430
    3531namespace theplu {
    3632namespace yat {
    37 namespace utility {
    38   class VectorBase;
    39 }
    4033namespace statistics {
    4134
     
    5851   
    5952    /**
    60        \f$ \frac{\vert \sum_i(x_i-\bar{x})(y_i-\bar{y})\vert
    61        }{\sqrt{\sum_i (x_i-\bar{x})^2\sum_i (x_i-\bar{x})^2}} \f$.
     53       Adding a data value to PearsonCorrelation.
     54    */
     55    void add(double value, bool target, double weight=1.0);
    6256
     57    /**
     58       \brief correlation
    6359
    64        If ForwardIterator is weighted correlation is calculated as
    65        \f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert }
    66        {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}}
    67        \f$, where \f$ m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$
    68        m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is
    69        chosen to get a correlation equal to unity when \a x and \a y
    70        are equal.
     60       Correlation is calculated as implemented in AveragerPairWeighted
    7161
    72        @return Pearson correlation, if absolute=true absolute value
    73        of Pearson is used.
     62       @return Pearson correlation.
    7463    */
    75     template<typename ForwardIterator>
    76     double score(const classifier::Target& target,
    77                  ForwardIterator first, ForwardIterator last);
    78    
    79     /**
    80        \f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert }
    81        {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}}
    82        \f$, where \f$ m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$
    83        m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is
    84        chosen to get a correlation equal to unity when \a x and \a y
    85        are equal.
    86 
    87        \return absolute value of weighted version of Pearson
    88        correlation.
    89 
    90        \note ietartors must be non-weighted
    91     */
    92     template<typename ForwardIterator1, typename ForwardIterator2>
    93     double score(const classifier::Target& target,
    94                  ForwardIterator1 first1, ForwardIterator1 last1,
    95                  ForwardIterator2 first2);
     64    double score(void) const;
    9665   
    9766    /**
     
    10069       correlation is zero (and the data is Gaussian).
    10170       
    102        @note This function can only be used together with the
    103        unweighted score.
    104        
     71       P-value is calculated using function pearson_p_value(double,
     72       u_int) where degrees of freedom is calculated using n(void) in
     73       AveragerPairWeighted.
     74
    10575       @return one-sided p-value
    10676    */
     
    10878   
    10979  private:
    110     double r_;
    111     int nof_samples_;
    112    
    113     template<typename ForwardIterator>
    114     double score(const classifier::Target& target,
    115                  ForwardIterator first, ForwardIterator last,
    116                  utility::unweighted_iterator_tag);
    117    
    118     template<typename ForwardIterator>
    119     double score(const classifier::Target& target,
    120                  ForwardIterator first, ForwardIterator last,
    121                  utility::weighted_iterator_tag);
     80    AveragerPairWeighted ap_;
    12281   
    12382  };
    124  
    125   template<typename ForwardIterator>
    126   double PearsonCorrelation::score(const classifier::Target& target,
    127                                    ForwardIterator first,
    128                                    ForwardIterator last)
    129   {
    130     nof_samples_ = target.size();
    131       using utility::yat_assert;
    132       yat_assert<std::runtime_error>("PearsonCorrelation: sizes mismatch");
    133       r_ = score(target, first, last,
    134                  utility::iterator_traits<ForwardIterator>::type());
    135       return r_;
    136   }
    137    
    138 
    139   template<typename ForwardIterator>
    140   double PearsonCorrelation::score(const classifier::Target& target,
    141                                    ForwardIterator first,
    142                                    ForwardIterator last,
    143                                    utility::unweighted_iterator_tag tag)
    144    
    145   {
    146     AveragerPair ap;
    147     for (size_t i=0; first!=last; ++first, ++i)
    148       ap.add(target.binary(i), *first);
    149     nof_samples_ = ap.n();
    150     return ap.correlation();
    151   }
    152    
    153 
    154   template<typename ForwardIterator>
    155   double PearsonCorrelation::score(const classifier::Target& target,
    156                                    ForwardIterator first,
    157                                    ForwardIterator last,
    158                                    utility::weighted_iterator_tag tag)
    159    
    160   {
    161     AveragerPairWeighted ap;
    162     for (size_t i=0; first!=last; ++first, ++i)
    163       ap.add(target.binary(i), first.data(), 1.0, first.weight());
    164     nof_samples_ = ap.n();
    165     return ap.correlation();
    166   }
    167    
    168   template<typename ForwardIterator1, typename ForwardIterator2>
    169   double PearsonCorrelation::score(const classifier::Target& target,
    170                                    ForwardIterator1 first1,
    171                                    ForwardIterator1 last1,
    172                                    ForwardIterator2 first2)
    173   {
    174     utility::check_iterator_is_unweighted(first1);
    175     utility::check_iterator_is_unweighted(first2);
    176     AveragerPairWeighted ap;
    177     for (size_t i=0; first1!=last1; ++first1, ++i, ++first2)
    178       ap.add(target.binary(i), *first1, 1.0, *first2);
    179     nof_samples_ = ap.n();
    180     r_ = ap.correlation();
    181     return r_;
    182   }
    18383 
    18484}}} // of namespace statistics, yat, and theplu
  • trunk/yat/statistics/ROC.h

    r1141 r1145  
    150150  };
    151151
    152   /**
    153      Add a range [first, last) of values to ROC. The first last-first
    154      elements in Target.binary are used.
    155    */
    156   template<typename ForwardIterator>
    157   void add(ROC& roc,
    158            ForwardIterator first, ForwardIterator last,
    159            const classifier::Target& target)
    160   {
    161     for (size_t i=0; first!=last; ++i, ++first)
    162       roc.add(utility::iterator_traits<ForwardIterator>().data(),
    163               target.binary(i),
    164               utility::iterator_traits<ForwardIterator>().weight());
    165   }
    166152}}} // of namespace statistics, yat, and theplu
    167153
  • trunk/yat/statistics/tTest.h

    r1000 r1145  
    3636
    3737  ///
    38   /// @brief Class for Fisher's t-test.
     38  /// @brief Class for Student's t-test.
    3939  ///   
    4040  /// See <a href="http://en.wikipedia.org/wiki/Student's_t-test">
  • trunk/yat/statistics/utility.cc

    r1025 r1145  
    2727#include "utility.h"
    2828
     29#include <gsl/gsl_cdf.h>
    2930#include <gsl/gsl_randist.h>
    3031#include <gsl/gsl_statistics_double.h>
     32
     33#include <cassert>
    3134
    3235namespace theplu {
     
    4144    return p;
    4245  }
     46
     47
     48  double pearson_p_value(double r, u_int n)
     49  {
     50    assert(n>=2);
     51    if (n<2)
     52      return std::numeric_limits<double>::quiet_NaN();
     53    return gsl_cdf_tdist_Q(r*sqrt((n-2)/(1-r*r)), n-2);
     54  }
     55
    4356
    4457  double kurtosis(const utility::VectorBase& v)
  • trunk/yat/statistics/utility.h

    r1039 r1145  
    5252 
    5353  /**
    54      Adding each value in an array \a v \a to an object \a o.  The
    55      requirements for the type T1 is to have an add(double, bool)
    56      function, and for T2 of the array \a v are: operator[] returning
    57      an element and function size() returning the number of elements.
     54     Adding a range [\a first, \a last) into an object of type T. The
     55     requirements for the type T is to have an add(double, bool, double)
     56     function.
    5857  */
    59   template <typename T1, typename T2>
    60   void add(T1& o, const T2& v, const classifier::Target& target)
    61   {
    62     for (size_t i=0; i<v.size(); ++i)
    63       o.add(v[i],target.binary(i));
    64   }
    65 
    66   /**
    67      Adding each value in an array \a v \a to an object \a o.  The
    68      requirements for the type T1 is to have an add(double, bool)
    69      function, and for T2 of the array \a v are: operator[] returning
    70      an element and function size() returning the number of elements.
    71   */
    72   template <typename T1>
    73   void add(T1& o, const classifier::DataLookupWeighted1D& v,
     58  template <typename T, typename ForwardIterator>
     59  void add(T& o, ForwardIterator first, ForwardIterator last,
    7460           const classifier::Target& target)
    7561  {
    76     for (size_t i=0; i<v.size(); ++i)
    77       o.add(v.data(i),target.binary(i),v.weight(i));
     62    for (size_t i=0; first!=last; ++i, ++first)
     63      o.add(utility::iterator_traits<ForwardIterator>().data(first),
     64            target.binary(i),
     65            utility::iterator_traits<ForwardIterator>().weight(first));
    7866  }
    7967
     
    9179  double cdf_hypergeometric_P(u_int k, u_int n1, u_int n2, u_int t);
    9280
     81
     82  /**
     83     \brief one-sided p-value
     84
     85     This function uses the t-distribution to calculate the one-sided
     86     p-value. Given that the true correlation is zero (Null
     87     hypothesis) the estimated correlation, r, after a transformation
     88     is t-distributed:
     89
     90     \f$ \sqrt{(n-2)} \frac{r}{\sqrt{(1-r^2)}} \in t(n-2) \f$
     91
     92     \return Probability that correlation is larger than \a r by
     93     chance when having \a n samples.
     94   */
     95  double pearson_p_value(double r, u_int n);
    9396
    9497  ///
Note: See TracChangeset for help on using the changeset viewer.