Ignore:
Timestamp:
Nov 12, 2008, 11:10:52 PM (15 years ago)
Author:
Peter
Message:

fixes #461. Also modified implementation of cdf_hypergeometric_P, which may cause conflict with modifications done in trunk (refs #87). If so, go with the trunk version (which uses GSL 1.8).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/0.4-stable/yat/statistics/Fisher.cc

    r1621 r1624  
    3030#include <gsl/gsl_cdf.h>
    3131#include <gsl/gsl_randist.h>
     32
     33#include <algorithm>
    3234
    3335namespace theplu {
     
    105107  double Fisher::p_value() const
    106108  {
    107     if (oddsratio_>1.0)
    108       return 2*p_value_one_sided();
    109     if (oddsratio_<1.0){
    110       // If oddsratio is less than unity
    111       // Two-sided p-value is
    112       // P(X <= oddsratio) + P(X >= 1/oddsratio) =
    113       // P(X <= oddsratio) + P(X <= oddsratio)
    114       // 2 * (P(X < oddsratio) + P(X = oddsratio))
    115       // 2 * (1 - P(X >= oddsratio) + P(X = oddsratio))
    116       if (calculate_p_exact())
    117         return 2*(1-p_value_one_sided()+
    118                   gsl_ran_hypergeometric_pdf(a_,a_+b_,c_+d_, a_+c_)
    119                   );
    120       // if p is calculated approximatively correction is not needed
    121       return 2*(1-p_value_one_sided());
    122 
    123     }
    124     return 1.0;
     109    if (calculate_p_exact())
     110      return p_value_exact();
     111    return p_value_approximative();
    125112  }
    126113
     
    128115  double Fisher::p_value_one_sided() const
    129116  {
    130     if ( calculate_p_exact() )
    131       return p_value_exact();
    132     return p_value_approximative();
     117    if (!calculate_p_exact()) {
     118      if (oddsratio_<1.0)
     119        return 1.0-p_value_approximative()/2.0;
     120      return p_value_approximative()/2.0;
     121    }
     122    return statistics::cdf_hypergeometric_P(c_, c_+d_, a_+b_, a_+c_);
    133123  }
    134124
     
    141131  double Fisher::p_value_exact() const
    142132  {
    143     // Since the calculation is symmetric and cdf_hypergeometric_P
    144     // loops up to k we choose the smallest number to be k and mirror
    145     // the matrix. This choice makes the p-value two-sided.
     133    double res=0;
     134    double a, c, tmp;
     135    expected(a, tmp, c, tmp);
     136    // sum P(x) over x for which abs(x-E(a))>=abs(a-E(a)) 
    146137
    147     if (a_<b_ && a_<c_ && a_<d_)
    148       return statistics::cdf_hypergeometric_P(a_,a_+b_,c_+d_,a_+c_);
    149     else if (b_<a_ && b_<c_ && b_<d_)
    150       return  statistics::cdf_hypergeometric_P(b_,a_+b_,c_+d_,b_+d_);
    151     else if (c_<a_ && c_<b_ && c_<d_)
    152       return  statistics::cdf_hypergeometric_P(c_,c_+d_,a_+b_,a_+c_);
     138    // counting left tail
     139    int k = static_cast<int>(a - std::abs(a_-a));
     140    if (k>=0)
     141      res = cdf_hypergeometric_P(k, a_+b_, c_+d_, a_+c_);
     142    // counting right tail
     143    k = static_cast<int>(c - std::abs(c_-c));
     144    if (k>=0)
     145      res += cdf_hypergeometric_P(k, c_+d_, a_+b_, a_+c_);
    153146
    154     return statistics::cdf_hypergeometric_P(d_,c_+d_,a_+b_,b_+d_);
    155 
     147    // avoid p>1 due to double counting middle one
     148    return std::min(1.0, res);
    156149  }
    157150
Note: See TracChangeset for help on using the changeset viewer.