Changeset 449


Ignore:
Timestamp:
Dec 15, 2005, 9:06:10 PM (17 years ago)
Author:
Peter
Message:

added approximative p-value calculation using Chi2, fixed some bugs, and extended the interface slightly

Location:
trunk/lib/statistics
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/statistics/Fisher.cc

    r448 r449  
    55#include <c++_tools/statistics/utility.h>
    66
     7#include <gsl/gsl_cdf.h>
     8#include <gsl/gsl_randist.h>
     9
    710namespace theplu {
    811namespace statistics {
     
    1013 
    1114  Fisher::Fisher(bool b)
    12     : Score(b), a_(0), b_(0), c_(0), d_(0), cutoff_column_(0), cutoff_row_(0)
     15    : Score(b), a_(0), b_(0), c_(0), d_(0), cutoff_column_(0), cutoff_row_(0),
     16      oddsratio_(1.0)
    1317  {
    1418  }
    1519
    1620
    17   void Fisher::expected(double& a, double& b, double& c, double& d)
     21  double Fisher::Chi2() const
     22  {
     23    double a,b,c,d;
     24    expected(a,b,c,d);
     25    return (a-a_)*(a-a_)/a + (b-b_)*(b-b_)/b +
     26      (c-c_)*(c-c_)/c + (d-d_)*(d-d_)/d;
     27  }
     28
     29
     30  void Fisher::expected(double& a, double& b, double& c, double& d) const
    1831  {
    1932    double N = a_+b_+c_+d_;
     
    2740                           const double b,
    2841                           const double c,
    29                            const double d) const
     42                           const double d)
    3043  {
    3144    // If a column sum or a row sum is zero, the table is nonsense
     
    3346      //Peter, should through exception
    3447      std::cerr << "Warning: Fisher: Table is not valid\n";
    35       return 1.0;
     48      return oddsratio_ = 1.0;
    3649    }
    3750
    38     double ratio=(a*d)/(b*d);
    39     if (absolute_ && ratio<1)
    40       return 1/ratio;
    41     return ratio;
     51    oddsratio_=(a*d)/(b*d);
     52    if (absolute_ && oddsratio_<1)
     53      return 1/oddsratio_;
     54    return oddsratio_;
    4255  }
    4356
     
    5467      p=p_value_approximative();
    5568
    56     if (absolute_)
    57       return fabs(2*(p-.5));
     69    if (!absolute_){
     70      p=p/2;
     71      if (oddsratio_<0.5){
     72        // last term because >= not equal to !(<=)
     73        return 1-p+gsl_ran_hypergeometric_pdf(a_, a_+b_, c_+d_, a_+c_);
     74      }
     75    }
    5876    return p;
    5977  }
     
    6280  double Fisher::p_value_approximative() const
    6381  {
    64     return 1.0;
     82    return gsl_cdf_chisq_Q(Chi2(), 1.0);
    6583  }
    66 
    6784
    6885  double Fisher::p_value_exact() const
     
    7087    // Since the calculation is symmetric and cdf_hypergeometric_P
    7188    // loops up to k we choose the smallest number to be k and mirror
    72     // the matrix.
     89    // the matrix. This choice makes the p-value two-sided.
    7390    if (a_<b_ && a_<c_ && a_<d_)
    7491      return statistics::cdf_hypergeometric_P(a_,a_+b_,c_+d_,a_+c_);
    7592    else if (b_<a_ && b_<c_ && b_<d_)
    76       return statistics::cdf_hypergeometric_P(b_,a_+b_,c_+d_,b_+d_);
     93      return  statistics::cdf_hypergeometric_P(b_,a_+b_,c_+d_,b_+d_);
    7794    else if (c_<a_ && c_<b_ && c_<d_)
    78       return statistics::cdf_hypergeometric_P(c_,c_+d_,a_+b_,a_+c_);
    79     else
    80       return statistics::cdf_hypergeometric_P(d_,c_+d_,a_+b_,b_+d_);
     95      return  statistics::cdf_hypergeometric_P(c_,c_+d_,a_+b_,a_+c_);
     96
     97    return statistics::cdf_hypergeometric_P(d_,c_+d_,a_+b_,b_+d_);
     98
    8199  }
    82100
  • trunk/lib/statistics/Fisher.h

    r448 r449  
    5858   
    5959    ///
     60    /// @return Chi2 score
     61    ///
     62    double Chi2(void) const;
     63
     64    ///
    6065    /// Cutoff sets the limit whether a value should go into the left
    6166    /// or the right column. @see score
     
    7782    /// a' = (a+c)(a+b)/(a+b+c+d)
    7883    ///
    79     void expected(double& a, double& b, double& c, double& d);
     84    void expected(double& a, double& b, double& c, double& d) const;
    8085
    8186    ///
     
    136141  private:
    137142    double oddsratio(const double a, const double b,
    138                      const double c, const double d) const;
     143                     const double c, const double d);
    139144
     145    // two-sided
    140146    double p_value_approximative(void) const;
     147    //two-sided
    141148    double p_value_exact(void) const;
    142149
     
    150157    double cutoff_row_;
    151158    u_int minimum_size_;
    152 
     159    double oddsratio_;
    153160  };
    154161
Note: See TracChangeset for help on using the changeset viewer.