Ignore:
Timestamp:
Mar 5, 2007, 7:58:30 PM (15 years ago)
Author:
Peter
Message:

Refs #101

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/yat/statistics/tScore.h

    r767 r779  
    2727#include "Score.h"
    2828
     29#include <cmath>
    2930#include <gsl/gsl_cdf.h>
    3031
     
    4849  public:
    4950    ///
    50     /// 2brief Default Constructor.
     51    /// @brief Default Constructor.
    5152    ///
    5253    tScore(bool absolute=true);
     
    6162       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
    6263       2 } \f$
    63        
     64
    6465       @return t-score. If absolute=true absolute value of t-score
    6566       is returned
    6667    */
    6768    double score(const classifier::Target& target,
    68                  const utility::vector& value);
     69                 const utility::vector& value) const;
     70
     71    /**
     72       Calculates the value of t-score, i.e. the ratio between
     73       difference in mean and standard deviation of this
     74       difference. \f$ t = \frac{ m_x - m_y }
     75       {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
     76       mean, \f$ n \f$ is the number of data points and \f$ s^2 =
     77       \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y -
     78       2 } \f$
     79       
     80       @param dof double pointer in which approximation of degrees of
     81       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
     82
     83       @return t-score. If absolute=true absolute value of t-score
     84       is returned
     85    */
     86    double score(const classifier::Target& target,
     87                 const utility::vector& value, double* dof) const;
     88
     89    /**
     90       Calculates the weighted t-score, i.e. the ratio between
     91       difference in mean and standard deviation of this
     92       difference. \f$ t = \frac{ m_x - m_y }{
     93       s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the
     94       weighted mean, n is the weighted version of number of data
     95       points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and
     96       \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{
     97       \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2
     98       } \f$. See AveragerWeighted for details.
     99       
     100       @param dof double pointer in which approximation of degrees of
     101       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
     102
     103       @return t-score. If absolute=true absolute value of t-score
     104       is returned
     105    */
     106    double score(const classifier::Target& target,
     107                 const classifier::DataLookupWeighted1D& value,
     108                 double* dof=0) const;
    69109
    70110    /**
     
    83123    */
    84124    double score(const classifier::Target& target,
    85                  const classifier::DataLookupWeighted1D& value);
     125                 const classifier::DataLookupWeighted1D& value) const;
    86126
    87     ///
    88     /// Calculates the weighted t-score, i.e. the ratio between
    89     /// difference in mean and standard deviation of this
    90     /// difference. \f$ t = \frac{ m_x - m_y }{
    91     /// \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
    92     /// weighted mean, n is the weighted version of number of data
    93     /// points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
    94     /// = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
    95     /// + n_y - 2 } \f$. See AveragerWeighted for details.
    96     ///
    97     /// @return t-score if absolute=true absolute value of t-score
    98     /// is returned
    99     ///
     127    /**
     128      Calculates the weighted t-score, i.e. the ratio between
     129      difference in mean and standard deviation of this
     130      difference. \f$ t = \frac{ m_x - m_y }{
     131      \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
     132      weighted mean, n is the weighted version of number of data
     133      points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
     134      = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
     135      + n_y - 2 } \f$. See AveragerWeighted for details.
     136     
     137      @return t-score if absolute=true absolute value of t-score
     138      is returned
     139    */
    100140    double score(const classifier::Target& target,
    101141                 const utility::vector& value,
    102                  const utility::vector& weight);
     142                 const utility::vector& weight) const;
    103143
    104     ///
    105     /// Calculates the p-value, i.e. the probability of observing a
    106     /// t-score equally or larger if the null hypothesis is true. If P
    107     /// is near zero, this casts doubt on this hypothesis. The null
    108     /// hypothesis is that the means of the two distributions are
    109     /// equal. Assumtions for this test is that the two distributions
    110     /// are normal distributions with equal variance. The latter
    111     /// assumtion is dropped in Welch's t-test.
    112     ///
    113     /// @return the one-sided p-value( if absolute=true is used
    114     /// the two-sided p-value)
    115     ///
    116     double p_value() const;
     144    /**
     145       Calculates the weighted t-score, i.e. the ratio between
     146       difference in mean and standard deviation of this
     147       difference. \f$ t = \frac{ m_x - m_y }{
     148       \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the
     149       weighted mean, n is the weighted version of number of data
     150       points and \f$ s2 \f$ is an estimation of the variance \f$ s^2
     151       = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x
     152       + n_y - 2 } \f$. See AveragerWeighted for details.
     153     
     154       @param dof double pointer in which approximation of degrees of
     155       freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted.
     156
     157       @return t-score if absolute=true absolute value of t-score
     158       is returned
     159    */
     160    double score(const classifier::Target& target,
     161                 const utility::vector& value,
     162                 const utility::vector& weight,
     163                 double* dof=0) const;
    117164
    118165  private:
    119     double t_;
    120     double dof_;
    121        
     166
     167    template<class T>
     168    double score(const T& pos, const T& neg, double* dof) const
     169    {
     170      double diff = pos.mean() - neg.mean();
     171      if (dof)
     172        *dof=pos.n()+neg.n()-2;
     173      double s2=( (pos.sum_xx_centered()+neg.sum_xx_centered())/
     174                  (pos.n()+neg.n()-2));
     175      double t=diff/sqrt(s2/pos.n()+s2/(neg.n()));
     176      if (t<0 && absolute_)
     177        return -t;
     178      return t;
     179    }
    122180  };
    123181
Note: See TracChangeset for help on using the changeset viewer.