Changeset 822


Ignore:
Timestamp:
Mar 19, 2007, 12:44:09 AM (15 years ago)
Author:
Peter
Message:

redesigned tTest fixes #212 and #156

Location:
trunk
Files:
1 added
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/test/Makefile.am

    r816 r822  
    3535  score_test      \
    3636  statistics_test subset_generator_test svd_test svm_test target_test \
     37  ttest_test \
    3738  utility_test vector_test
    3839
     
    4748alignment_test_SOURCES = alignment_test.cc
    4849averager_test_SOURCES = averager_test.cc
    49 commandline_test_SOURCES = commandline_test.cc
    5050consensus_inputranker_test_SOURCES = consensus_inputranker_test.cc
    5151crossvalidation_test_SOURCES = crossvalidation_test.cc
     
    7373svm_test_SOURCES = svm_test.cc
    7474target_test_SOURCES = target_test.cc
     75ttest_test_SOURCES = ttest_test.cc
    7576utility_test_SOURCES = utility_test.cc
    7677vector_test_SOURCES = vector_test.cc
  • trunk/test/roc_test.cc

    r821 r822  
    2222*/
    2323
     24#include "yat/classifier/DataLookupWeighted1D.h"
    2425#include "yat/classifier/Target.h"
    2526#include "yat/statistics/ROC.h"
     27#include "yat/statistics/utility.h"
    2628#include "yat/utility/matrix.h"
    2729#include "yat/utility/vector.h"
     
    5557    value(i)=i;
    5658  statistics::ROC roc;
    57   for (size_t i=0; i<value.size(); ++i){
    58     roc.add(value(i), target.binary(i));
    59   }
     59  add(roc, value, target);
    6060  double area = roc.area();
    6161  if (area!=0.0){
     
    6767  target.set_binary(1,true);
    6868  roc.reset();
    69   for (size_t i=0; i<value.size(); ++i)
    70     roc.add(value(i), target.binary(i));
     69  add(roc, value, target);
    7170  area = roc.area();
    7271  if (area!=1.0){
     
    10099  }
    101100 
     101  classifier::DataLookupWeighted1D dlw(target.size(),1.3);
     102  add(roc, dlw, target);
     103
    102104  if (ok)
    103105    return 0;
  • trunk/yat/statistics/tScore.h

    r779 r822  
    163163                 double* dof=0) const;
    164164
     165    /**
     166       Calcultate t-score from Averager like objects. Requirements for
     167       T1 and T2 are: double mean(), double n(), double sum_xx_centered()
     168       
     169       If \a dof is not a null pointer it is assigned to number of
     170       degrees of freedom.
     171    */
     172    template<typename T1, typename T2>
     173    double score(const T1& pos, const T2& neg, double* dof=0) const;
     174
    165175  private:
     176   
     177  };
    166178
    167     template<class T>
    168     double score(const T& pos, const T& neg, double* dof) const
    169     {
    170       double diff = pos.mean() - neg.mean();
    171       if (dof)
    172         *dof=pos.n()+neg.n()-2;
    173       double s2=( (pos.sum_xx_centered()+neg.sum_xx_centered())/
    174                   (pos.n()+neg.n()-2));
    175       double t=diff/sqrt(s2/pos.n()+s2/(neg.n()));
    176       if (t<0 && absolute_)
    177         return -t;
    178       return t;
    179     }
    180   };
     179  template<typename T1, typename T2>
     180  double tScore::score(const T1& pos, const T2& neg, double* dof) const
     181  {
     182    double diff = pos.mean() - neg.mean();
     183    if (dof)
     184      *dof=pos.n()+neg.n()-2;
     185    double s2=( (pos.sum_xx_centered()+neg.sum_xx_centered())/
     186                (pos.n()+neg.n()-2));
     187    double t=diff/sqrt(s2/pos.n()+s2/neg.n());
     188    if (t<0 && absolute_)
     189      return -t;
     190    return t;
     191  }
    181192
    182193}}} // of namespace statistics, yat, and theplu
  • trunk/yat/statistics/tTest.cc

    r779 r822  
    2323
    2424#include "tTest.h"
     25#include "AveragerWeighted.h"
    2526#include "tScore.h"
    26 //#include "Averager.h"
    27 //#include "AveragerWeighted.h"
    28 //#include "yat/classifier/DataLookupWeighted1D.h"
    29 //#include "yat/classifier/Target.h"
    30 //#include "yat/utility/vector.h"
    3127
     28#include <algorithm>
    3229#include <cassert>
    3330#include <cmath>
     
    3734namespace statistics { 
    3835
    39   tTest::tTest(bool b)
    40     : t_(0)
     36  tTest::tTest(void)
     37    : updated_(false)
    4138  {
    4239  }
    4340
    44   double tTest::score(const classifier::Target& target,
    45                       const utility::vector& value)
     41 
     42  void tTest::add(double x, bool target, double w)
    4643  {
    47     tScore score;
    48     t_ = score.score(target, value, &dof_);
     44    if (!w)
     45      return;
     46    if (target)
     47      pos_.add(x,w);
     48    else
     49      neg_.add(x,w);
     50    updated_=false;
     51  }
     52
     53  double tTest::score(void)
     54  {
     55    if (!updated_){
     56      tScore score(false);
     57      t_ = score.score(pos_, neg_, &dof_);
     58      updated_=true;
     59    }
    4960    return t_;
    5061  }
    5162
    5263
    53   double tTest::score(const classifier::Target& target,
    54                       const classifier::DataLookupWeighted1D& value)
     64  double tTest::p_value(void) const
    5565  {
    56     tScore score;
    57     t_ = score.score(target, value, &dof_);
    58     return t_;
     66    double p=2*p_value_one_sided();
     67    return std::min(p,2-p);
    5968  }
    6069
    6170
    62   double tTest::score(const classifier::Target& target,
    63                       const utility::vector& value,
    64                       const utility::vector& weight)
     71  double tTest::p_value_one_sided(void) const
    6572  {
    66     tScore score;
    67     t_ = score.score(target, value, weight, &dof_);
    68     return t_;
    69   }
    70 
    71   double tTest::p_value(void) const
    72   {
    73     if (!dof_)
     73    double t=t_;
     74    double dof=dof_;
     75    if (!updated_){
     76      tScore score(false);
     77      t = score.score(pos_, neg_, &dof);
     78    }
     79    if (!dof)
    7480      return 1.0;
    75     return gsl_cdf_tdist_Q(t_, dof_);
     81    return gsl_cdf_tdist_Q(t, dof);
    7682  }
    7783
  • trunk/yat/statistics/tTest.h

    r779 r822  
    2525*/
    2626
     27#include "AveragerWeighted.h"
     28
    2729#include <gsl/gsl_cdf.h>
    2830
    2931namespace theplu {
    3032namespace yat {
    31   namespace classifier {
    32     class DataLookup1D;
    33     class DataLookupWeighted1D;
    34     class Target;
    35   }
    36   namespace utility {
    37     class vector;
    38   }
    3933namespace statistics { 
    4034
     
    5347    /// @brief Default Constructor.
    5448    ///
    55     tTest(bool absolute=true);
     49    tTest(void);
    5650
    5751   
    5852    /**
    59        Calculates the value of t-score, i.e. the ratio between
    60        difference in mean and standard deviation of this
    61        difference. \f$ t = \frac{ m_x - m_y }
    62        {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
    63        mean, \f$ n \f$ is the number of data points and \f$ s^2 =
    64        \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
    65        2 } \f$
    66        
    67        @return t-score. If absolute=true absolute value of t-score
    68        is returned
     53       Adding a data value to tTest.
    6954    */
    70     double score(const classifier::Target& target,
    71                  const utility::vector& value);
     55    void add(double value, bool target, double weight=1.0);
    7256
    73     /**
    74        Calculates the weighted t-score, i.e. the ratio between
    75        difference in mean and standard deviation of this
    76        difference. \f$ t = \frac{ m_x - m_y }{
     57    /**
     58       Calculates the t-score, i.e. the ratio between difference in
     59       mean and standard deviation of this difference. The t-score is
     60       calculated as
     61       \f$ t = \frac{ m_x - m_y }{
    7762       s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
    7863       weighted mean, n is the weighted version of number of data
     
    8065       \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{
    8166       \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2
    82        } \f$. See AveragerWeighted for details.
     67       } \f$
     68
     69       \see AveragerWeighted
     70
     71       If all weights are equal to unity this boils down to
     72       \f$ t = \frac{ m_x - m_y }
     73       {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is
     74       the mean, \f$ n \f$ is the number of data points and \f$ s^2 =
     75       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y - 2
     76       } \f$
     77
     78       \see Averager
    8379       
    84        @return t-score. If absolute=true absolute value of t-score
    85        is returned
     80       \return t-score.
    8681    */
    87     double score(const classifier::Target& target,
    88                  const classifier::DataLookupWeighted1D& value);
    89 
    90     ///
    91     /// Calculates the weighted t-score, i.e. the ratio between
    92     /// difference in mean and standard deviation of this
    93     /// difference. \f$ t = \frac{ m_x - m_y }{
    94     /// \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
    95     /// weighted mean, n is the weighted version of number of data
    96     /// points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
    97     /// = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
    98     /// + n_y - 2 } \f$. See AveragerWeighted for details.
    99     ///
    100     /// @return t-score if absolute=true absolute value of t-score
    101     /// is returned
    102     ///
    103     double score(const classifier::Target& target,
    104                  const utility::vector& value,
    105                  const utility::vector& weight);
     82    double score(void);
    10683
    10784    ///
     
    124101
    125102  private:
     103
     104    double dof_;
     105    bool updated_;
    126106    double t_;
    127     double dof_;
    128        
     107    AveragerWeighted pos_;
     108    AveragerWeighted neg_;
    129109
    130110  };
  • trunk/yat/statistics/utility.h

    r757 r822  
    2525*/
    2626
     27#include "yat/classifier/DataLookupWeighted1D.h"
     28#include "yat/classifier/target.h"
    2729#include "yat/utility/vector.h"
    2830
     
    4547                    const bool sorted=false);
    4648 
     49  /**
     50     Adding each value in an array \a v \a to an object \a o.  The
     51     requirements for the type T1 is to have an add(double, bool)
     52     function, and for T2 of the array \a v are: operator[] returning
     53     an element and function size() returning the number of elements.
     54  */
     55  template <typename T1, typename T2>
     56  void add(T1& o, const T2& v, const classifier::Target& target)
     57  {
     58    for (size_t i=0; i<v.size(); ++i)
     59      o.add(v[i],target.binary(i));
     60  }
     61
     62  /**
     63     Adding each value in an array \a v \a to an object \a o.  The
     64     requirements for the type T1 is to have an add(double, bool)
     65     function, and for T2 of the array \a v are: operator[] returning
     66     an element and function size() returning the number of elements.
     67  */
     68  template <typename T1>
     69  void add(T1& o, const classifier::DataLookupWeighted1D& v,
     70           const classifier::Target& target)
     71  {
     72    for (size_t i=0; i<v.size(); ++i)
     73      o.add(v.data(i),target.binary(i),v.weight(i));
     74  }
     75
    4776  ///
    4877  /// Calculates the probability to get \a k or smaller from a
Note: See TracChangeset for help on using the changeset viewer.