Changeset 779 for trunk/yat


Ignore:
Timestamp:
Mar 5, 2007, 7:58:30 PM (15 years ago)
Author:
Peter
Message:

Refs #101

Location:
trunk/yat
Files:
21 edited
4 copied
2 moved

Legend:

Unmodified
Added
Removed
  • trunk/yat/classifier/DataLookupWeighted1D.cc

    r720 r779  
    7878
    7979
     80  double sum_weight(const DataLookupWeighted1D& x)
     81  {
     82    double r=0;
     83    for (size_t i=0; i<x.size(); ++i)
     84      r += x.weight(i);
     85    return r;
     86  }
     87
     88
    8089  double DataLookupWeighted1D::weight(const size_t i) const
    8190  {
  • trunk/yat/classifier/DataLookupWeighted1D.h

    r767 r779  
    105105  };
    106106
     107  ///
     108  /// @return sum of weights
     109  ///
     110  double sum_weight(const DataLookupWeighted1D&);
     111
    107112}}} // of namespace classifier, yat, and theplu
    108113
  • trunk/yat/classifier/InputRanker.cc

    r720 r779  
    2828#include "DataLookupWeighted1D.h"
    2929#include "Target.h"
    30 #include "yat/statistics/ROC.h"
     30#include "yat/statistics/Score.h"
    3131#include "yat/utility/stl_utility.h"
    3232
  • trunk/yat/classifier/utility.cc

    r680 r779  
    2424#include "utility.h"
    2525#include "DataLookup1D.h"
     26#include "DataLookupWeighted1D.h"
    2627#include "yat/utility/vector.h"
    2728
     
    3839  }
    3940
     41  void convert(const DataLookupWeighted1D& lookup, utility::vector& value,
     42               utility::vector& weight)
     43  {
     44   
     45    value=utility::vector(lookup.size());
     46    weight=utility::vector(lookup.size());
     47    for(u_int i=0; i<lookup.size(); i++){
     48      value(i)=lookup.data(i);
     49      weight(i)=lookup.weight(i);
     50    }
     51  }
     52
    4053}}} // of namespace classifier, yat, and theplu
  • trunk/yat/classifier/utility.h

    r680 r779  
    3535
    3636  class DataLookup1D;
     37  class DataLookupWeighted1D;
    3738
    3839  ///
     
    4142  void convert(const DataLookup1D&, utility::vector&);
    4243
     44  ///
     45  /// Converts a DataLookupWeighted1D to two utility::vector
     46  ///
     47  void convert(const DataLookupWeighted1D&, utility::vector& value,
     48               utility::vector& weight);
     49
    4350}}} // of namespace classifier, yat, and theplu
    4451
  • trunk/yat/statistics/FoldChange.cc

    r747 r779  
    3939  }
    4040
    41   FoldChange::FoldChange(const FoldChange& other)
    42     : Score(other)
     41  double FoldChange::score(const classifier::Target& target,
     42                           const utility::vector& value) const
    4343  {
    44   }
    45 
    46   FoldChange& FoldChange::operator=(const FoldChange& other)
    47   {
    48     Score::operator=(other);
    49     return *this;
    50   }
    51 
    52   double FoldChange::score(const classifier::Target& target,
    53                            const utility::vector& value)
    54   {
    55     weighted_=false;
    5644    Averager pos;
    5745    Averager neg;
     
    6957
    7058  double FoldChange::score(const classifier::Target& target,
    71                            const classifier::DataLookupWeighted1D& value)
     59                           const classifier::DataLookupWeighted1D& value) const
    7260  {
    73     weighted_=true;
    7461    AveragerWeighted pos;
    7562    AveragerWeighted neg;
     
    8875  double FoldChange::score(const classifier::Target& target,
    8976                           const utility::vector& value,
    90                            const utility::vector& weight)
     77                           const utility::vector& weight) const
    9178  {
    92     weighted_=true;
    9379    AveragerWeighted pos;
    9480    AveragerWeighted neg;
  • trunk/yat/statistics/FoldChange.h

    r767 r779  
    5353    ///
    5454    double score(const classifier::Target& target,
    55                  const utility::vector& value);
     55                 const utility::vector& value) const;
    5656 
    5757    ///
     
    6262    ///
    6363    double score(const classifier::Target& target,
    64                  const classifier::DataLookupWeighted1D& value);
     64                 const classifier::DataLookupWeighted1D& value) const;
    6565 
    6666    ///
     
    7373    double score(const classifier::Target& target,
    7474                 const utility::vector& value,
    75                  const utility::vector& weight);
     75                 const utility::vector& weight) const;
    7676 
    7777  private:
  • trunk/yat/statistics/Makefile.am

    r778 r779  
    2727  AveragerWeighted.cc AveragerPairWeighted.cc Distance.cc   \
    2828  Euclidean.cc Fisher.cc FoldChange.cc Histogram.cc Pearson.cc      \
    29   PearsonDistance.cc ROC.cc ROCscore.cc \
    30   SAM.cc Score.cc SNR.cc tScore.cc  \
     29  PearsonCorrelation.cc PearsonDistance.cc ROC.cc ROCscore.cc \
     30  SAMScore.cc Score.cc SNR.cc tScore.cc tTest.cc \
    3131  utility.cc WilcoxonFoldChange.cc
    3232
     
    3636  AveragerWeighted.h AveragerPairWeighted.h Distance.h Euclidean.h \
    3737  Fisher.h  \
    38   FoldChange.h Histogram.h Pearson.h PearsonDistance.h ROC.h  ROCscore.h\
    39   SAM.h Score.h SNR.h tScore.h \
     38  FoldChange.h Histogram.h Pearson.h PearsonCorrelation.h \
     39  PearsonDistance.h ROC.h ROCscore.h \
     40  SAMScore.h Score.h SNR.h tScore.h tTest.h \
    4041  utility.h WilcoxonFoldChange.h
  • trunk/yat/statistics/Pearson.cc

    r703 r779  
    3030
    3131#include <cmath>
    32 #include <gsl/gsl_cdf.h>
    3332
    3433namespace theplu {
     
    3736
    3837  Pearson::Pearson(bool b)
    39     : Score(b), r_(0), nof_samples_(0)
     38    : Score(b)
    4039  {
    4140  }
     
    4544  }
    4645
    47   double Pearson::p_value() const
     46  double Pearson::score(const classifier::Target& target,
     47                        const utility::vector& value) const
    4848  {
    49     if(weighted_)
    50       return 1;
    51     if(nof_samples_<2){
    52       std::cerr << "Warning: Only " << nof_samples_ << "samples. "
    53                 << "Need at lest 3.\n";
    54       return 1;
    55     }
    56 
    57     double t = sqrt(nof_samples_ - 2)*fabs(r_) /sqrt(1-r_*r_);
    58     double p = gsl_cdf_tdist_Q(t, nof_samples_ -2 );
    59     if (absolute_)
    60       return 2*p;
    61     if (r_<0)
    62       return 1-p;
    63     return p;
    64 
    65   }
    66 
    67   double Pearson::score(const classifier::Target& target,
    68                         const utility::vector& value)
    69   {
    70     weighted_=false;
    7149    AveragerPair ap;
    7250    for (size_t i=0; i<target.size(); i++){
     
    7553      else
    7654        ap.add(-1, value(i));
    77       nof_samples_ = target.size();
    7855    }
    79     r_ = ap.correlation();
    80     if (r_<0 && absolute_)
    81       return -r_;
    82      
    83     return r_;
     56    double r = ap.correlation();
     57    if (r<0 && absolute_)
     58      return -r;
     59    return r;
    8460  }
    8561   
    8662  double Pearson::score(const classifier::Target& target,
    87                         const classifier::DataLookupWeighted1D& value)
     63                        const classifier::DataLookupWeighted1D& value) const
    8864  {
    89     weighted_=true;
    9065    AveragerPairWeighted ap;
    9166    for (size_t i=0; i<target.size(); i++){
     
    9469      else
    9570        ap.add(-1, value.data(i),1,value.weight(i));
    96       nof_samples_ = target.size();
    9771    }
    98     r_ = ap.correlation();
    99     if (r_<0 && absolute_)
    100       return -r_;
    101      
    102     return r_;
     72    double r = ap.correlation();
     73    if (r<0 && absolute_)
     74      return -r;
     75    return r;
    10376  }
    10477
    10578  double Pearson::score(const classifier::Target& target,
    10679                        const utility::vector& value,
    107                         const utility::vector& weight)
     80                        const utility::vector& weight) const
    10881  {
    109     weighted_=true;
    11082    AveragerPairWeighted ap;
    11183    for (size_t i=0; i<target.size(); i++){
     
    11486      else
    11587        ap.add(-1, value(i),1,weight(i));
    116       nof_samples_ = target.size();
    117     }
    118     r_ = ap.correlation();
    119     if (r_<0 && absolute_)
    120       return -r_;
    121      
    122     return r_;
     88    }
     89    double r = ap.correlation();
     90    if (r<0 && absolute_)
     91      return -r;
     92    return r;
    12393  }
    12494
  • trunk/yat/statistics/Pearson.h

    r767 r779  
    3232  class vector;
    3333}
    34 namespace classifier {
    35   class VectorAbstract;
    36 }
    3734namespace statistics { 
    3835
     
    5552         
    5653   
    57     ///
    58     /// \f$ \frac{\vert \sum_i(x_i-\bar{x})(y_i-\bar{y})\vert
    59     /// }{\sqrt{\sum_i (x_i-\bar{x})^2\sum_i (x_i-\bar{x})^2}} \f$.
    60     /// @return Pearson correlation, if absolute=true absolute value
    61     /// of Pearson is used.
    62     ///
     54    /**
     55      \f$ \frac{\vert \sum_i(x_i-\bar{x})(y_i-\bar{y})\vert
     56      }{\sqrt{\sum_i (x_i-\bar{x})^2\sum_i (x_i-\bar{x})^2}} \f$.
     57      @return Pearson correlation, if absolute=true absolute value
     58      of Pearson is used.
     59    */
    6360    double score(const classifier::Target& target,
    64                  const utility::vector& value);
     61                 const utility::vector& value) const;
    6562
    66     ///
    67     /// \f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert }
    68     /// {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}}
    69     /// \f$, where \f$ m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$
    70     /// m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is
    71     /// chosen to get a correlation equal to unity when \a x and \a y
    72     /// are equal. @return absolute value of weighted version of
    73     /// Pearson correlation.
    74     ///
     63    /**
     64      \f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert }
     65      {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}}
     66      \f$, where \f$ m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$
     67      m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is
     68      chosen to get a correlation equal to unity when \a x and \a y
     69      are equal. @return absolute value of weighted version of
     70      Pearson correlation.
     71    */
    7572    double score(const classifier::Target& target,
    76                  const classifier::DataLookupWeighted1D& value);
     73                 const classifier::DataLookupWeighted1D& value) const;
    7774
    78     ///
    79     /// \f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert }
    80     /// {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}}
    81     /// \f$, where \f$ m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$
    82     /// m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is
    83     /// chosen to get a correlation equal to unity when \a x and \a y
    84     /// are equal. @return absolute value of weighted version of
    85     /// Pearson correlation.
    86     ///
     75    /**
     76      \f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert }
     77      {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}}
     78      \f$, where \f$ m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$
     79      m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is
     80      chosen to get a correlation equal to unity when \a x and \a y
     81      are equal. @return absolute value of weighted version of
     82      Pearson correlation.
     83    */
    8784    double score(const classifier::Target& target,
    8885                 const utility::vector& value,
    89                  const utility::vector& weight);
     86                 const utility::vector& weight) const;
    9087
    91     ///
    92     /// The p-value is the probability of getting a correlation as
    93     /// large as the observed value by random chance, when the true
    94     /// correlation is zero (and the data is Gaussian). Note that this
    95     /// function can only be used together with the unweighted
    96     /// score. @return two-sided p-value
    97     ///
    98     double p_value() const;
    99          
    100   private:
    101     double r_;
    102     int nof_samples_;
    103 
    104     //    void centralize(utility::vector&, const utility::vector&);
    10588  };
    10689
  • trunk/yat/statistics/PearsonCorrelation.cc

    r776 r779  
    2222*/
    2323
     24#include "PearsonCorrelation.h"
    2425#include "Pearson.h"
    2526#include "AveragerPair.h"
     
    3637namespace statistics { 
    3738
    38   Pearson::Pearson(bool b)
    39     : Score(b), r_(0), nof_samples_(0)
     39  PearsonCorrelation::PearsonCorrelation(void)
     40    : r_(0), nof_samples_(0)
    4041  {
    4142  }
    4243
    43   Pearson::~Pearson(void)
     44  PearsonCorrelation::~PearsonCorrelation(void)
    4445  {
    4546  }
    4647
    47   double Pearson::p_value() const
     48  double PearsonCorrelation::p_value_one_sided() const
    4849  {
    49     if(weighted_)
    50       return 1;
    5150    if(nof_samples_<2){
    5251      std::cerr << "Warning: Only " << nof_samples_ << "samples. "
     
    5655
    5756    double t = sqrt(nof_samples_ - 2)*fabs(r_) /sqrt(1-r_*r_);
    58     double p = gsl_cdf_tdist_Q(t, nof_samples_ -2 );
    59     if (absolute_)
    60       return 2*p;
    61     if (r_<0)
    62       return 1-p;
    63     return p;
    64 
     57    return gsl_cdf_tdist_Q(t, nof_samples_ -2 );
    6558  }
    6659
    67   double Pearson::score(const classifier::Target& target,
    68                         const utility::vector& value)
     60  double PearsonCorrelation::score(const classifier::Target& target,
     61                                   const utility::vector& value)
    6962  {
    70     weighted_=false;
    71     AveragerPair ap;
    72     for (size_t i=0; i<target.size(); i++){
    73       if (target.binary(i))
    74         ap.add(1, value(i));
    75       else
    76         ap.add(-1, value(i));
    77       nof_samples_ = target.size();
    78     }
    79     r_ = ap.correlation();
    80     if (r_<0 && absolute_)
    81       return -r_;
    82      
    83     return r_;
     63    nof_samples_ = value.size();
     64    Pearson pearson;
     65    return pearson.score(target,value);
    8466  }
    8567   
    86   double Pearson::score(const classifier::Target& target,
    87                         const classifier::DataLookupWeighted1D& value)
     68  double
     69  PearsonCorrelation::score(const classifier::Target& target,
     70                            const classifier::DataLookupWeighted1D& value)
    8871  {
    89     weighted_=true;
    90     AveragerPairWeighted ap;
    91     for (size_t i=0; i<target.size(); i++){
    92       if (target.binary(i))
    93         ap.add(1, value.data(i),1,value.weight(i));
    94       else
    95         ap.add(-1, value.data(i),1,value.weight(i));
    96       nof_samples_ = target.size();
    97     }
    98     r_ = ap.correlation();
    99     if (r_<0 && absolute_)
    100       return -r_;
    101      
    102     return r_;
     72    // Peter what should nof_samples be in weighted case?
     73    nof_samples_=0;
     74    Pearson pearson;
     75    return pearson.score(target,value);
    10376  }
    10477
    105   double Pearson::score(const classifier::Target& target,
    106                         const utility::vector& value,
    107                         const utility::vector& weight)
     78  double PearsonCorrelation::score(const classifier::Target& target,
     79                                   const utility::vector& value,
     80                                   const utility::vector& weight)
    10881  {
    109     weighted_=true;
    110     AveragerPairWeighted ap;
    111     for (size_t i=0; i<target.size(); i++){
    112       if (target.binary(i))
    113         ap.add(1, value(i),1,weight(i));
    114       else
    115         ap.add(-1, value(i),1,weight(i));
    116       nof_samples_ = target.size();
    117     }
    118     r_ = ap.correlation();
    119     if (r_<0 && absolute_)
    120       return -r_;
    121      
    122     return r_;
     82    // Peter what should nof_samples be in weighted case?
     83    nof_samples_ = 0;
     84    Pearson pearson;
     85    return pearson.score(target,value);
    12386  }
    12487
  • trunk/yat/statistics/PearsonCorrelation.h

    r776 r779  
    1 #ifndef _theplu_yat_statistics_pearson_
    2 #define _theplu_yat_statistics_pearson_
     1#ifndef _theplu_yat_statistics_pearson_correlation_
     2#define _theplu_yat_statistics_pearson_correlation_
    33
    44// $Id$
     
    4141  ///   
    4242 
    43   class Pearson : public Score
     43  class PearsonCorrelation
    4444  {
    4545  public:
     
    4747    /// @brief The default constructor.
    4848    ///
    49     Pearson(bool absolute=true);
     49    PearsonCorrelation(void);
    5050
    5151    ///
    5252    /// @brief The destructor.
    5353    ///
    54     virtual ~Pearson(void);
     54    virtual ~PearsonCorrelation(void);
    5555         
    5656   
     
    9191    ///
    9292    /// The p-value is the probability of getting a correlation as
    93     /// large as the observed value by random chance, when the true
    94     /// correlation is zero (and the data is Gaussian). Note that this
    95     /// function can only be used together with the unweighted
    96     /// score. @return two-sided p-value
     93    /// large (or larger) as the observed value by random chance, when the true
     94    /// correlation is zero (and the data is Gaussian).
     95    ///
     96    /// @Note This function can only be used together with the
     97    /// unweighted score.
    9798    ///
    98     double p_value() const;
    99          
     99    /// @return one-sided p-value
     100    ///
     101    double p_value_one_sided() const;
     102
    100103  private:
    101104    double r_;
  • trunk/yat/statistics/ROC.cc

    r747 r779  
    3838namespace statistics { 
    3939
    40   ROC::ROC(bool b)
    41     : Score(b), area_(0.5), minimum_size_(10), nof_pos_(0) 
     40  ROC::ROC(void)
     41    : area_(0.5), minimum_size_(10), nof_pos_(0) 
    4242  {
    4343  }
     
    133133              (nof_pos_*(n()-nof_pos_)) );
    134134   
    135     //Returning score larger 0.5 that you get by random
    136     if (area_<0.5 && absolute_)
    137       area_=1.0-area_;
    138 
    139135    return area_;
    140136  }
     
    171167    area_/=max_area;
    172168   
    173     if (area_<0.5 && absolute_)
    174       area_=1.0-area_;
    175    
    176169    return area_;
    177170  }
     
    208201   
    209202    area_/=max_area;
    210    
    211     if (area_<0.5 && absolute_)
    212       area_=1.0-area_;
    213203   
    214204    return area_;
  • trunk/yat/statistics/ROC.h

    r767 r779  
    2525*/
    2626
    27 #include "Score.h"
    28 
    2927#include <utility>
    3028#include <vector>
     
    3331namespace yat {
    3432namespace classifier {
     33  class DataLookup1D;
     34  class DataLookupWeighted1D;
    3535  class Target;
    3636}
     
    4747  /// U-test (aka Wilcoxon).
    4848  ///
    49   class ROC : public Score
     49  class ROC
    5050  {
    5151 
     
    5454    /// @brief Default constructor
    5555    ///
    56     ROC(bool absolute=true);
     56    ROC(void);
    5757         
    5858    ///
     
    155155    u_int nof_pos_;
    156156    std::vector<std::pair<bool, double> > vec_pair_; // class-value-pair
     157    bool weighted_;
    157158  };
    158159
  • trunk/yat/statistics/ROCscore.cc

    r778 r779  
    2828#include "yat/utility/vector.h"
    2929
     30#include <cassert>
    3031#include <cmath>
    3132#include <utility>
    32 #include <vector>
     33#include <map>
    3334
    3435namespace theplu {
     
    3637namespace statistics { 
    3738
     39  ROCscore::ROCscore(bool absolute)
     40    : Score(absolute)
     41  {
     42  }
     43
    3844  double ROCscore::score(const classifier::Target& target,
    39                          const utility::vector& value)
     45                         const utility::vector& value) const
    4046  {
    4147    assert(target.size()==value.size());
    42     weighted_=false;
     48    // key data, pair<target, weight>
     49    std::multimap<double, std::pair<bool, double> > m;
     50    for (unsigned int i=0; i<target.size(); i++)
     51      m.insert(std::make_pair(value(i),
     52                              std::make_pair(target.binary(i),1.0)));
     53       
     54    return score(m);
     55  }
    4356
    44     std::vector<std::pair<bool, double> > class_value; // class-value-pair
    45     class_value.reserve(target.size());
    46     for (size_t i=0; i<target.size(); i++)
    47       class_value.push_back(std::make_pair(target.binary(i),value(i)));
    4857
    49     std::sort(class_value.begin(),class_value.end(),
    50               utility::pair_value_compare<bool, double>());
    51     double area = 0;
    52     u_int nof_pos =0;
    53     for (size_t i=0; i<class_value.size(); i++){
    54       if (class_value[i].first){
    55         area+=i;
    56         ++nof_pos;
     58
     59  double ROCscore::score(const classifier::Target& target,
     60                         const classifier::DataLookupWeighted1D& value) const
     61  {
     62    assert(target.size()==value.size());
     63    // key data, pair<target, weight>
     64    std::multimap<double, std::pair<bool, double> > m;
     65    for (unsigned int i=0; i<target.size(); i++)
     66      if (value.weight(i))
     67        m.insert(std::make_pair(value.data(i),
     68                                std::make_pair(target.binary(i),
     69                                               value.weight(i))));
     70       
     71    return score(m);
     72  }
     73
     74
     75  double ROCscore::score(const classifier::Target& target,
     76                         const utility::vector& value,
     77                         const utility::vector& weight) const
     78  {
     79    assert(target.size()==value.size());
     80    assert(target.size()==weight.size());
     81    // key data, pair<target, weight>
     82    std::multimap<double, std::pair<bool, double> > m;
     83    for (unsigned int i=0; i<target.size(); i++)
     84      if (weight(i))
     85        m.insert(std::make_pair(value(i),
     86                                std::make_pair(target.binary(i), weight(i))));
     87       
     88    return score(m);
     89  }
     90
     91
     92  double
     93  ROCscore::score(const MultiMap& m) const
     94  {
     95    double area=0;
     96    double cumsum_pos_w=0;
     97    double cumsum_neg_w=0;
     98    typedef MultiMap::const_iterator iter;
     99
     100    for (iter i=m.begin(); i!=m.end(); ++i)
     101      if (i->second.first){
     102        area += i->second.second * cumsum_neg_w;
     103        cumsum_pos_w += i->second.second;
    57104      }
    58     }
     105      else
     106        cumsum_neg_w += i->second.second;
    59107
    60     // Normalizing the area to [0,1]
    61     area = ( (area-nof_pos*(nof_pos-1)/2 ) /
    62              (nof_pos*(class_value.size()-nof_pos)) );
     108    // max area is cumsum_neg_w * cumsum_pos_w
     109    area/=(cumsum_neg_w*cumsum_pos_w);
    63110   
    64111    if (area<0.5 && absolute_)
    65112      return 1.0-area;
    66113    return area;
    67   }
    68 
    69 
    70   // Peter, should be possible to do this in NlogN
    71   double ROCscore::score(const classifier::Target& target,
    72                     const classifier::DataLookupWeighted1D& value)
    73   {
    74 
    75     std::vector<std::pair<bool, double> > class_value; // class-value-pair
    76     class_value.clear();
    77     class_value.reserve(target.size());
    78     for (unsigned int i=0; i<target.size(); i++)
    79       if (value.weight(i))
    80         class_value.push_back(std::make_pair(target.binary(i),value.data(i)));
    81 
    82     std::sort(class_value.begin(),class_value.end(),
    83               utility::pair_value_compare<int, double>());
    84 
    85     double area=0;
    86     double max_area=0;
    87 
    88     for (size_t i=0; i<n(); i++)
    89       if (target.binary(i))
    90         for (size_t j=0; j<n(); j++)
    91           if (!target.binary(j)){
    92             if (value.data(i)>value.data(j))
    93               area+=value.weight(i)*value.weight(j);
    94             max_area+=value.weight(i)*value.weight(j);
    95           }
    96    
    97     area/=max_area;
    98    
    99     if (area<0.5 && absolute_)
    100       return 1.0-area;
    101     return area;
    102   }
    103 
    104 
    105   // Peter, should be possible to do this in NlogN
    106   double ROCscore::score(const classifier::Target& target,
    107                          const utility::vector& value,
    108                          const utility::vector& weight)
    109   {
    110     weighted_=true;
    111 
    112 
    113     std::vector<std::pair<bool, double> > class_value; // class-value-pair
    114     class_value.reserve(target.size());
    115     for (unsigned int i=0; i<target.size(); i++)
    116       if (weight(i))
    117         class_value.push_back(std::make_pair(target.binary(i),value(i)));
    118 
    119     std::sort(class_value.begin(),class_value.end(),
    120               utility::pair_value_compare<int, double>());
    121 
    122     double area=0;
    123     double max_area=0;
    124 
    125     for (size_t i=0; i<n(); i++)
    126       if (target.binary(i))
    127         for (size_t j=0; j<n(); j++)
    128           if (!target.binary(j)){
    129             if (value(i)>value(j))
    130               area+=weight(i)*weight(j);
    131             max_area+=weight(i)*weight(j);
    132           }
    133    
    134     area/=max_area;
    135    
    136     if (area<0.5 && absolute_)
    137       return 1.0-area;
    138     return area;
    139   }
     114}
    140115
    141116}}} // of namespace statistics, yat, and theplu
  • trunk/yat/statistics/ROCscore.h

    r778 r779  
    2626
    2727#include "Score.h"
     28#include "yat/utility/stl_utility.h"
    2829
    29 #include <cctype>
     30#include <utility>
     31#include <map>
    3032
    3133namespace theplu {
     
    4749  public:
    4850    ///
    49     /// @return number of samples
    5051    ///
    51     size_t n(void) const;
    52 
    5352    ///
    54     /// @return number of positive samples (Target.binary()==true)
    55     ///
    56     size_t n_pos(void) const;
     53    ROCscore(bool absolute=true);
    5754
    5855    /// Function taking \a value, \a target (+1 or -1) and vector
     
    6461    ///
    6562    double score(const classifier::Target& target,
    66                  const utility::vector& value);
     63                 const utility::vector& value) const;
    6764   
    6865    /**
     
    7976    */
    8077    double score(const classifier::Target& target,
    81                  const classifier::DataLookupWeighted1D& value);
     78                 const classifier::DataLookupWeighted1D& value) const;
    8279
    8380    /**
     
    9592    double score(const classifier::Target& target,
    9693                 const utility::vector& value,
    97                  const utility::vector& weight);
     94                 const utility::vector& weight) const;
    9895
    9996  private:
     97    typedef std::multimap<double, std::pair<bool, double> > MultiMap;
     98    double score(const MultiMap&) const;
    10099   
    101100  };
  • trunk/yat/statistics/SAMScore.cc

    r776 r779  
    2222*/
    2323
    24 #include "SAM.h"
     24#include "SAMScore.h"
    2525#include "Averager.h"
    2626#include "AveragerWeighted.h"
     
    3535namespace statistics { 
    3636
    37   SAM::SAM(const double s0, bool b)
     37  SAMScore::SAMScore(const double s0, bool b)
    3838    : Score(b), s0_(s0)
    3939  {
    4040  }
    4141
    42   double SAM::score(const classifier::Target& target,
    43                     const utility::vector& value)
     42  double SAMScore::score(const classifier::Target& target,
     43                         const utility::vector& value) const
    4444  {
    45     weighted_=false;
    4645    statistics::Averager positive;
    4746    statistics::Averager negative;
     
    5251        negative.add(value(i));
    5352    }
    54     if(positive.n()+negative.n()<=2)
    55       return 0;
    56     double diff = positive.mean() - negative.mean();
    57     double s2 = ( (1.0/positive.n()+1.0/negative.n()) *
    58                  (positive.sum_xx_centered()+negative.sum_xx_centered()) /
    59                  (positive.n()+negative.n()-2) );
    60     if (diff<0 && absolute_)
    61       return -diff/(sqrt(s2)+s0_);
    62     return diff/(sqrt(s2)+s0_);
     53    return score(positive, negative);
    6354  }
    6455
    65   double SAM::score(const classifier::Target& target,
    66                     const classifier::DataLookupWeighted1D& value)
     56  double SAMScore::score(const classifier::Target& target,
     57                         const classifier::DataLookupWeighted1D& value) const
    6758  {
    68     weighted_=true;
    6959    statistics::AveragerWeighted positive;
    7060    statistics::AveragerWeighted negative;
     
    7565        negative.add(value.data(i),value.weight(i));
    7666    }
    77     if(positive.n()+negative.n()<=2)
    78       return 0;
    79     double diff = positive.mean() - negative.mean();
    80     double s2 = ( (1.0/positive.n()+1.0/negative.n()) *
    81                  (positive.sum_xx_centered()+negative.sum_xx_centered()) /
    82                  (positive.n()+negative.n()-2) );
    83     if (diff<0 && absolute_)
    84       return -diff/(sqrt(s2)+s0_);
    85     return diff/(sqrt(s2)+s0_);
     67    return score(positive, negative);
    8668  }
    8769
    8870
    8971
    90   double SAM::score(const classifier::Target& target,
    91                     const utility::vector& value,
    92                     const utility::vector& weight)
     72  double SAMScore::score(const classifier::Target& target,
     73                         const utility::vector& value,
     74                         const utility::vector& weight) const
    9375  {
    94     weighted_=true;
    9576    statistics::AveragerWeighted positive;
    9677    statistics::AveragerWeighted negative;
     
    10182        negative.add(value(i),weight(i));
    10283    }
    103     if(positive.n()+negative.n()<=2)
    104       return 0;
    105     double diff = positive.mean() - negative.mean();
    106     double s2 = ( (1.0/positive.n()+1.0/negative.n()) *
    107                  (positive.sum_xx_centered()+negative.sum_xx_centered()) /
    108                  (positive.n()+negative.n()-2) );
    109     if (diff<0 && absolute_)
    110       return -diff/(sqrt(s2)+s0_);
    111     return diff/(sqrt(s2)+s0_);
     84    return score(positive, negative);
    11285  }
    11386
  • trunk/yat/statistics/SAMScore.h

    r776 r779  
    1 #ifndef _theplu_yat_statistics_sam
    2 #define _theplu_yat_statistics_sam
     1#ifndef _theplu_yat_statistics_sam_score_
     2#define _theplu_yat_statistics_sam_score_
    33
    44// $Id$
     
    2727#include "Score.h"
    2828
     29#include <cmath>
     30
    2931namespace theplu {
    3032namespace yat {
     
    4850     details
    4951  */   
    50   class SAM : public Score
     52  class SAMScore : public Score
    5153  {
    5254
     
    5557    /// @param s0 \f$ s_0 \f$ is a fudge factor
    5658    ///
    57     SAM(const double s0, bool absolute=true);
     59    SAMScore(const double s0, bool absolute=true);
    5860
    5961    /**
     
    6769    */
    6870    double score(const classifier::Target& target,
    69                  const utility::vector& value);
     71                 const utility::vector& value) const;
    7072
    7173    /**
     
    8183    */
    8284    double score(const classifier::Target& target,
    83                  const classifier::DataLookupWeighted1D& value);         
     85                 const classifier::DataLookupWeighted1D& value) const;         
    8486
    8587    /**
     
    9698    double score(const classifier::Target& target,
    9799                 const utility::vector& value,
    98                  const utility::vector& weight);         
     100                 const utility::vector& weight) const;         
    99101  private:
    100102    double s0_;
     103
     104    template<class T>
     105    double score(const T& positive, const T& negative) const
     106    {
     107      if(positive.n()+negative.n()<=2)
     108        return 0;
     109      double diff = positive.mean() - negative.mean();
     110      double s2 = ( (1.0/positive.n()+1.0/negative.n()) *
     111                    (positive.sum_xx_centered()+negative.sum_xx_centered()) /
     112                    (positive.n()+negative.n()-2) );
     113      if (diff<0 && absolute_)
     114        return -diff/(sqrt(s2)+s0_);
     115      return diff/(sqrt(s2)+s0_);
     116    }
    101117  };
    102118
  • trunk/yat/statistics/SNR.cc

    r747 r779  
    4141                    const utility::vector& value)
    4242  {
    43     weighted_=false;
    4443    statistics::Averager positive;
    4544    statistics::Averager negative;
     
    6362                    const classifier::DataLookupWeighted1D& value)
    6463  {
    65     weighted_=true;
    6664    statistics::AveragerWeighted positive;
    6765    statistics::AveragerWeighted negative;
     
    8987                    const utility::vector& weight)
    9088  {
    91     weighted_=true;
    9289    statistics::AveragerWeighted positive;
    9390    statistics::AveragerWeighted negative;
  • trunk/yat/statistics/Score.cc

    r747 r779  
    2525
    2626#include "yat/classifier/DataLookup1D.h"
     27#include "yat/classifier/DataLookupWeighted1D.h"
    2728#include "yat/classifier/Target.h"
    2829#include "yat/classifier/utility.h"
    2930#include "yat/utility/vector.h"
    3031
     32#include <cassert>
    3133
    3234namespace theplu {
     
    3537
    3638  Score::Score(bool absolute)
    37     : absolute_(absolute), weighted_(false)
     39    : absolute_(absolute)
    3840  {
    3941  }
     
    4345  }
    4446
     47  void Score::absolute(bool absolute)
     48  {
     49    absolute_=absolute;
     50  }
     51
    4552  double Score::score(const classifier::Target& target,
    46                       const classifier::DataLookup1D& value)
     53                      const classifier::DataLookup1D& value) const
    4754  {
    4855    assert(target.size()==value.size());
     
    5259  }
    5360
    54   void Score::absolute(bool absolute)
     61
     62  double Score::score(const classifier::Target& target,
     63                      const classifier::DataLookupWeighted1D& value) const
    5564  {
    56     absolute_=absolute;
     65    assert(target.size()==value.size());
     66    utility::vector a;
     67    utility::vector b;
     68    classifier::convert(value,a,b);
     69    return score(target,a,b);
    5770  }
     71
    5872
    5973  double Score::score(const classifier::Target& target,
    6074                      const classifier::DataLookup1D& value,
    61                       const classifier::DataLookup1D& weight)
     75                      const classifier::DataLookup1D& weight) const
    6276  {
    6377    utility::vector a;
     
    6882  }
    6983
    70   bool Score::weighted(void) const
    71   {
    72     return weighted_;
    73   }
    74 
    7584}}} // of namespace statistics, yat, and theplu
  • trunk/yat/statistics/Score.h

    r767 r779  
    4747    /// @brief Constructor
    4848    ///   
    49     Score(bool absolute=true) ;
     49    Score(bool) ;
    5050   
    5151    ///
     
    6262    /// Function calculating the score. In absolute mode, also the
    6363    /// score using negated class labels is calculated, and the
    64     /// largest of the two scores are calculated. Absolute mode should
    65     /// be used when two-tailed test is wanted.
     64    /// largest of the two scores are returned.
    6665    ///
    6766    virtual double
    6867    score(const classifier::Target& target,
    69           const utility::vector& value) = 0;
     68          const utility::vector& value) const = 0;
    7069 
    7170    ///
    7271    /// Function calculating the score. In absolute mode, also the
    7372    /// score using negated class labels is calculated, and the
    74     /// largest of the two scores are calculated. Absolute mode should
    75     /// be used when two-tailed test is wanted.
     73    /// largest of the two scores are calculated.
     74    ///
     75    /// @a value is copied to a utility::vector and that operator is
     76    /// called. If speed is important this operator should be
     77    /// implemented in inherited class to avoid copying.
    7678    ///
    77     /// @return statistica.
     79    /// @return score
    7880    ///
    79     /// @param target vector of targets (most often +1 -1)
    80     /// @param value vector of the values
    81     ///
    82     double score(const classifier::Target& target,
    83                  const classifier::DataLookup1D& value);
     81    virtual double score(const classifier::Target& target,
     82                         const classifier::DataLookup1D& value) const;
    8483 
    8584    ///
     
    9089    /// is wanted.
    9190    ///
     91    /// @a value is copied to two utility::vector and that operator is
     92    /// called. If speed is important this operator should be
     93    /// implemented in inherited class to avoid copying.
     94    ///
    9295    virtual double
    9396    score(const classifier::Target& target,
    94           const classifier::DataLookupWeighted1D& value) = 0;
     97          const classifier::DataLookupWeighted1D& value) const;
    9598 
    9699    ///
     
    104107    score(const classifier::Target& target,
    105108          const utility::vector& value,
    106           const utility::vector& weight) = 0;
     109          const utility::vector& weight) const = 0;
    107110
    108111    ///
     
    113116    /// is wanted.
    114117    ///
     118    /// @a value and @weight are copied to utility::vector and the
     119    /// corresponding operator is called. If speed is important this
     120    /// operator should be implemented in inherited class to avoid
     121    /// copying.
     122    ///
    115123    double score(const classifier::Target& target,
    116124                 const classifier::DataLookup1D& value,
    117                  const classifier::DataLookup1D& weight);
     125                 const classifier::DataLookup1D& weight) const;
    118126
    119127  protected:
    120     /// return true if method is weighted
    121     bool weighted(void) const;
    122 
    123128    /// true if method is absolute, which means if score is below
    124     /// expected value (by chance) E, score returns E-score instead.
     129    /// expected value (by chance) E, score returns E-score+E instead.
    125130    bool absolute_;
    126     /// true if method is weighted
    127     bool weighted_;
    128131
    129132  }; // class Score
  • trunk/yat/statistics/WilcoxonFoldChange.cc

    r680 r779  
    4242
    4343  double WilcoxonFoldChange::score(const classifier::Target& target,
    44                                    const utility::vector& value)
     44                                   const utility::vector& value) const
    4545  {
    4646    std::vector<double> distance;
    4747    //Peter, should reserve the vector to avoid reallocations   
    48     weighted_=false;
    4948    for (size_t i=0; i<target.size(); i++) {
    5049      if (target(i)!=1) continue;
     
    6059
    6160
    62   double WilcoxonFoldChange::score(const classifier::Target& target,
    63                                    const classifier::DataLookupWeighted1D& value)
     61  double WilcoxonFoldChange::score
     62  (const classifier::Target& target,
     63   const classifier::DataLookupWeighted1D& value) const
    6464  {
    6565    std::cerr << " WilcoxonFoldChange::score  not implemented" << std::endl;
     
    7070  double WilcoxonFoldChange::score(const classifier::Target& target,
    7171                                   const utility::vector& value,
    72                                    const utility::vector& weight)
     72                                   const utility::vector& weight) const
    7373  {
    7474    std::cerr << " WilcoxonFoldChange::score  not implemented" << std::endl;
  • trunk/yat/statistics/WilcoxonFoldChange.h

    r680 r779  
    5353    ///
    5454    double score(const classifier::Target& target,
    55                  const utility::vector& value);
     55                 const utility::vector& value) const;
    5656 
    5757    ///
     
    6464    ///
    6565    double score(const classifier::Target& target,
    66                  const classifier::DataLookupWeighted1D& value);
     66                 const classifier::DataLookupWeighted1D& value) const;
    6767 
    6868    ///
     
    7777    double score(const classifier::Target& target,
    7878                 const utility::vector& value,
    79                  const utility::vector& weight);
     79                 const utility::vector& weight) const;
    8080 
    8181  private:
  • trunk/yat/statistics/tScore.cc

    r747 r779  
    2525#include "Averager.h"
    2626#include "AveragerWeighted.h"
     27#include "yat/classifier/DataLookup1D.h"
    2728#include "yat/classifier/DataLookupWeighted1D.h"
    2829#include "yat/classifier/Target.h"
     
    3738
    3839  tScore::tScore(bool b)
    39     : Score(b),  t_(0)
     40    : Score(b)
    4041  {
    4142  }
    4243
     44
    4345  double tScore::score(const classifier::Target& target,
    44                        const utility::vector& value)
     46                       const utility::vector& value) const
    4547  {
    46     weighted_=false;
     48    return score(target, value, NULL);
     49  }
     50
     51
     52  double tScore::score(const classifier::Target& target,
     53                       const utility::vector& value,
     54                       double* dof) const
     55  {
    4756    statistics::Averager positive;
    4857    statistics::Averager negative;
     
    5362        negative.add(value(i));
    5463    }
    55     double diff = positive.mean() - negative.mean();
    56     dof_=positive.n()+negative.n()-2;
    57     double s2=(positive.sum_xx_centered()+negative.sum_xx_centered())/dof_;
    58 
    59     t_=diff/sqrt(s2/positive.n()+s2/negative.n());
    60     if (t_<0 && absolute_)
    61       t_=-t_;
    62      
    63     return t_;
     64    return score(positive, negative, dof);
    6465  }
    6566
    6667
    6768  double tScore::score(const classifier::Target& target,
    68                        const classifier::DataLookupWeighted1D& value)
     69                       const classifier::DataLookupWeighted1D& value) const
    6970  {
    70     weighted_=true;
     71    return score(target, value, NULL);
     72  }
    7173
     74
     75  double tScore::score(const classifier::Target& target,
     76                       const classifier::DataLookupWeighted1D& value,
     77                       double* dof) const
     78  {
    7279    statistics::AveragerWeighted positive;
    7380    statistics::AveragerWeighted negative;
     
    7885        negative.add(value.data(i),value.weight(i));
    7986    }
    80     double diff = positive.mean() - negative.mean();
    81     dof_=positive.n()+negative.n()-2;
    82     double s2=(positive.sum_xx_centered()+negative.sum_xx_centered())/dof_;
    83     t_=diff/sqrt(s2/positive.n()+s2/(negative.n()));
    84     if (t_<0 && absolute_)
    85       t_=-t_;
    86 
    87     if(positive.sum_w()==0 || negative.sum_w()==0)
    88       t_=0;
    89     return t_;
     87    return score(positive, negative, dof);
    9088  }
    9189
     
    9391  double tScore::score(const classifier::Target& target,
    9492                       const utility::vector& value,
    95                        const utility::vector& weight)
     93                       const utility::vector& weight) const
    9694  {
    97     weighted_=true;
     95    return score(target, value, weight, NULL);
     96  }
    9897
     98
     99  double tScore::score(const classifier::Target& target,
     100                       const utility::vector& value,
     101                       const utility::vector& weight,
     102                       double* dof) const
     103  {
    99104    statistics::AveragerWeighted positive;
    100105    statistics::AveragerWeighted negative;
     
    105110        negative.add(value(i),weight(i));
    106111    }
    107     double diff = positive.mean() - negative.mean();
    108     dof_=positive.n()+negative.n()-2;
    109     double s2=(positive.sum_xx_centered()+negative.sum_xx_centered())/dof_;
    110     t_=diff/sqrt(s2/positive.n()+s2/(negative.n()));
    111     if (t_<0 && absolute_)
    112       t_=-t_;
    113 
    114     if(positive.sum_w()==0 || negative.sum_w()==0)
    115       t_=0;
    116     return t_;
    117   }
    118 
    119   double tScore::p_value(void) const
    120   {
    121     double p = gsl_cdf_tdist_Q(t_, dof_);
    122     return (dof_ > 0 && !weighted_) ? p : 1;
     112    return score(positive, negative, dof);
    123113  }
    124114
  • trunk/yat/statistics/tScore.h

    r767 r779  
    2727#include "Score.h"
    2828
     29#include <cmath>
    2930#include <gsl/gsl_cdf.h>
    3031
     
    4849  public:
    4950    ///
    50     /// 2brief Default Constructor.
     51    /// @brief Default Constructor.
    5152    ///
    5253    tScore(bool absolute=true);
     
    6162       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
    6263       2 } \f$
    63        
     64
    6465       @return t-score. If absolute=true absolute value of t-score
    6566       is returned
    6667    */
    6768    double score(const classifier::Target& target,
    68                  const utility::vector& value);
     69                 const utility::vector& value) const;
     70
     71    /**
     72       Calculates the value of t-score, i.e. the ratio between
     73       difference in mean and standard deviation of this
     74       difference. \f$ t = \frac{ m_x - m_y }
     75       {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
     76       mean, \f$ n \f$ is the number of data points and \f$ s^2 =
     77       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
     78       2 } \f$
     79       
     80       @param dof double pointer in which approximation of degrees of
     81       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
     82
     83       @return t-score. If absolute=true absolute value of t-score
     84       is returned
     85    */
     86    double score(const classifier::Target& target,
     87                 const utility::vector& value, double* dof) const;
     88
     89    /**
     90       Calculates the weighted t-score, i.e. the ratio between
     91       difference in mean and standard deviation of this
     92       difference. \f$ t = \frac{ m_x - m_y }{
     93       s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
     94       weighted mean, n is the weighted version of number of data
     95       points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and
     96       \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{
     97       \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2
     98       } \f$. See AveragerWeighted for details.
     99       
     100       @param dof double pointer in which approximation of degrees of
     101       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
     102
     103       @return t-score. If absolute=true absolute value of t-score
     104       is returned
     105    */
     106    double score(const classifier::Target& target,
     107                 const classifier::DataLookupWeighted1D& value,
     108                 double* dof=0) const;
    69109
    70110    /**
     
    83123    */
    84124    double score(const classifier::Target& target,
    85                  const classifier::DataLookupWeighted1D& value);
     125                 const classifier::DataLookupWeighted1D& value) const;
    86126
    87     ///
    88     /// Calculates the weighted t-score, i.e. the ratio between
    89     /// difference in mean and standard deviation of this
    90     /// difference. \f$ t = \frac{ m_x - m_y }{
    91     /// \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
    92     /// weighted mean, n is the weighted version of number of data
    93     /// points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
    94     /// = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
    95     /// + n_y - 2 } \f$. See AveragerWeighted for details.
    96     ///
    97     /// @return t-score if absolute=true absolute value of t-score
    98     /// is returned
    99     ///
     127    /**
     128      Calculates the weighted t-score, i.e. the ratio between
     129      difference in mean and standard deviation of this
     130      difference. \f$ t = \frac{ m_x - m_y }{
     131      \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
     132      weighted mean, n is the weighted version of number of data
     133      points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
     134      = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
     135      + n_y - 2 } \f$. See AveragerWeighted for details.
     136     
     137      @return t-score if absolute=true absolute value of t-score
     138      is returned
     139    */
    100140    double score(const classifier::Target& target,
    101141                 const utility::vector& value,
    102                  const utility::vector& weight);
     142                 const utility::vector& weight) const;
    103143
    104     ///
    105     /// Calculates the p-value, i.e. the probability of observing a
    106     /// t-score equally or larger if the null hypothesis is true. If P
    107     /// is near zero, this casts doubt on this hypothesis. The null
    108     /// hypothesis is that the means of the two distributions are
    109     /// equal. Assumtions for this test is that the two distributions
    110     /// are normal distributions with equal variance. The latter
    111     /// assumtion is dropped in Welch's t-test.
    112     ///
    113     /// @return the one-sided p-value( if absolute=true is used
    114     /// the two-sided p-value)
    115     ///
    116     double p_value() const;
     144    /**
     145       Calculates the weighted t-score, i.e. the ratio between
     146       difference in mean and standard deviation of this
     147       difference. \f$ t = \frac{ m_x - m_y }{
     148       \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
     149       weighted mean, n is the weighted version of number of data
     150       points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
     151       = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
     152       + n_y - 2 } \f$. See AveragerWeighted for details.
     153     
     154       @param dof double pointer in which approximation of degrees of
     155       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
     156
     157       @return t-score if absolute=true absolute value of t-score
     158       is returned
     159    */
     160    double score(const classifier::Target& target,
     161                 const utility::vector& value,
     162                 const utility::vector& weight,
     163                 double* dof=0) const;
    117164
    118165  private:
    119     double t_;
    120     double dof_;
    121        
     166
     167    template<class T>
     168    double score(const T& pos, const T& neg, double* dof) const
     169    {
     170      double diff = pos.mean() - neg.mean();
     171      if (dof)
     172        *dof=pos.n()+neg.n()-2;
     173      double s2=( (pos.sum_xx_centered()+neg.sum_xx_centered())/
     174                  (pos.n()+neg.n()-2));
     175      double t=diff/sqrt(s2/pos.n()+s2/(neg.n()));
     176      if (t<0 && absolute_)
     177        return -t;
     178      return t;
     179    }
    122180  };
    123181
  • trunk/yat/statistics/tTest.cc

    r776 r779  
    2222*/
    2323
     24#include "tTest.h"
    2425#include "tScore.h"
    25 #include "Averager.h"
    26 #include "AveragerWeighted.h"
    27 #include "yat/classifier/DataLookupWeighted1D.h"
    28 #include "yat/classifier/Target.h"
    29 #include "yat/utility/vector.h"
     26//#include "Averager.h"
     27//#include "AveragerWeighted.h"
     28//#include "yat/classifier/DataLookupWeighted1D.h"
     29//#include "yat/classifier/Target.h"
     30//#include "yat/utility/vector.h"
    3031
    3132#include <cassert>
     
    3637namespace statistics { 
    3738
    38   tScore::tScore(bool b)
    39     : Score(b),  t_(0)
     39  tTest::tTest(bool b)
     40    : t_(0)
    4041  {
    4142  }
    4243
    43   double tScore::score(const classifier::Target& target,
    44                        const utility::vector& value)
     44  double tTest::score(const classifier::Target& target,
     45                      const utility::vector& value)
    4546  {
    46     weighted_=false;
    47     statistics::Averager positive;
    48     statistics::Averager negative;
    49     for(size_t i=0; i<target.size(); i++){
    50       if (target.binary(i))
    51         positive.add(value(i));
    52       else
    53         negative.add(value(i));
    54     }
    55     double diff = positive.mean() - negative.mean();
    56     dof_=positive.n()+negative.n()-2;
    57     double s2=(positive.sum_xx_centered()+negative.sum_xx_centered())/dof_;
    58 
    59     t_=diff/sqrt(s2/positive.n()+s2/negative.n());
    60     if (t_<0 && absolute_)
    61       t_=-t_;
    62      
     47    tScore score;
     48    t_ = score.score(target, value, &dof_);
    6349    return t_;
    6450  }
    6551
    6652
    67   double tScore::score(const classifier::Target& target,
    68                        const classifier::DataLookupWeighted1D& value)
     53  double tTest::score(const classifier::Target& target,
     54                      const classifier::DataLookupWeighted1D& value)
    6955  {
    70     weighted_=true;
    71 
    72     statistics::AveragerWeighted positive;
    73     statistics::AveragerWeighted negative;
    74     for(size_t i=0; i<target.size(); i++){
    75       if (target.binary(i))
    76         positive.add(value.data(i),value.weight(i));
    77       else
    78         negative.add(value.data(i),value.weight(i));
    79     }
    80     double diff = positive.mean() - negative.mean();
    81     dof_=positive.n()+negative.n()-2;
    82     double s2=(positive.sum_xx_centered()+negative.sum_xx_centered())/dof_;
    83     t_=diff/sqrt(s2/positive.n()+s2/(negative.n()));
    84     if (t_<0 && absolute_)
    85       t_=-t_;
    86 
    87     if(positive.sum_w()==0 || negative.sum_w()==0)
    88       t_=0;
     56    tScore score;
     57    t_ = score.score(target, value, &dof_);
    8958    return t_;
    9059  }
    9160
    9261
    93   double tScore::score(const classifier::Target& target,
    94                        const utility::vector& value,
    95                        const utility::vector& weight)
     62  double tTest::score(const classifier::Target& target,
     63                      const utility::vector& value,
     64                      const utility::vector& weight)
    9665  {
    97     weighted_=true;
    98 
    99     statistics::AveragerWeighted positive;
    100     statistics::AveragerWeighted negative;
    101     for(size_t i=0; i<target.size(); i++){
    102       if (target.binary(i))
    103         positive.add(value(i),weight(i));
    104       else
    105         negative.add(value(i),weight(i));
    106     }
    107     double diff = positive.mean() - negative.mean();
    108     dof_=positive.n()+negative.n()-2;
    109     double s2=(positive.sum_xx_centered()+negative.sum_xx_centered())/dof_;
    110     t_=diff/sqrt(s2/positive.n()+s2/(negative.n()));
    111     if (t_<0 && absolute_)
    112       t_=-t_;
    113 
    114     if(positive.sum_w()==0 || negative.sum_w()==0)
    115       t_=0;
     66    tScore score;
     67    t_ = score.score(target, value, weight, &dof_);
    11668    return t_;
    11769  }
    11870
    119   double tScore::p_value(void) const
     71  double tTest::p_value(void) const
    12072  {
    121     double p = gsl_cdf_tdist_Q(t_, dof_);
    122     return (dof_ > 0 && !weighted_) ? p : 1;
     73    if (!dof_)
     74      return 1.0;
     75    return gsl_cdf_tdist_Q(t_, dof_);
    12376  }
    12477
  • trunk/yat/statistics/tTest.h

    r776 r779  
    1 #ifndef _theplu_yat_statistics_tscore_
    2 #define _theplu_yat_statistics_tscore_
     1#ifndef _theplu_yat_statistics_ttest_
     2#define _theplu_yat_statistics_ttest_
    33
    44// $Id$
     
    2525*/
    2626
    27 #include "Score.h"
    28 
    2927#include <gsl/gsl_cdf.h>
    3028
    3129namespace theplu {
    3230namespace yat {
     31  namespace classifier {
     32    class DataLookup1D;
     33    class DataLookupWeighted1D;
     34    class Target;
     35  }
    3336  namespace utility {
    3437    class vector;
     
    4346  /// details on the t-test.
    4447  ///
    45   class tScore : public Score
     48  class tTest
    4649  {
    4750 
    4851  public:
    4952    ///
    50     /// 2brief Default Constructor.
     53    /// @brief Default Constructor.
    5154    ///
    52     tScore(bool absolute=true);
     55    tTest(bool absolute=true);
    5356
    5457   
     
    111114    /// assumtion is dropped in Welch's t-test.
    112115    ///
    113     /// @return the one-sided p-value( if absolute=true is used
    114     /// the two-sided p-value)
     116    /// @return the two-sided p-value
    115117    ///
    116118    double p_value() const;
     119
     120    ///
     121    /// @return One-sided P-value
     122    ///
     123    double p_value_one_sided(void) const;
    117124
    118125  private:
     
    120127    double dof_;
    121128       
     129
    122130  };
    123131
Note: See TracChangeset for help on using the changeset viewer.