Changeset 1624


Ignore:
Timestamp:
Nov 12, 2008, 11:10:52 PM (12 years ago)
Author:
Peter
Message:

fixes #461. Also modified implementation of cdf_hypergeometric_P, which may cause conflict with modifications done in trunk (refs #87). If so, go with the trunk version (which uses GSL 1.8).

Location:
branches/0.4-stable
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • branches/0.4-stable/build_support/version.m4

    r1436 r1624  
    5454# yat-0.4.1  1:0:0
    5555# yat-0.4.2  1:1:0
     56# yat-0.4.3  1:2:0
    5657#
    57 m4_define([LT_VERSION], [1:1:0])
     58m4_define([LT_VERSION], [1:2:0])
    5859
    5960
  • branches/0.4-stable/test/fisher_test.cc

    r1621 r1624  
    2424#include "yat/statistics/Fisher.h"
    2525
     26using namespace theplu::yat;
     27void test_p_value(test::Suite&);
     28void test_p_value_approximative(test::Suite&);
     29void test_p_value_exact(test::Suite&);
     30
    2631int main(int argc, char* argv[])
    2732
    28   using namespace theplu::yat;
    2933  test::Suite suite(argc, argv);
    3034
     
    6064    suite.err() << "minimum_size failed\n";
    6165  }
     66  test_p_value(suite);
    6267  return suite.return_value();
    6368}
     69
     70
     71void test_p_value(test::Suite& suite)
     72{
     73  test_p_value_exact(suite);
     74  test_p_value_approximative(suite);
     75}
     76
     77
     78void test_p_value_approximative(test::Suite& suite)
     79{
     80  suite.err() << "testing p_value_approximative\n";
     81  statistics::Fisher f;
     82  f.minimum_size() = 0;
     83  f.oddsratio(10,20,20,50);
     84  suite.add(suite.equal(f.p_value(), 2*f.p_value_one_sided()));
     85  f.oddsratio(10,20,10,20);
     86  suite.add(suite.equal(f.p_value(), 1.0));
     87  suite.add(suite.equal(f.p_value_one_sided(), 0.5));
     88}
     89
     90
     91void test_p_value_exact(test::Suite& suite)
     92{
     93  suite.err() << "test p_value_exact\n";
     94  statistics::Fisher f;
     95  f.minimum_size() = 1000;
     96  f.oddsratio(10,20,10,20);
     97  suite.add(suite.equal(f.p_value(), 1.0));
     98
     99  f.oddsratio(10, 20, 20, 200);
     100  suite.add(suite.equal(f.p_value(), 0.0008119060627676223));
     101  suite.add(suite.equal(f.p_value_one_sided(), 0.0008119060627676223));
     102
     103  // testing symmetry
     104  statistics::Fisher f2;
     105  f2.minimum_size() = 1000;
     106  f2.oddsratio(20, 200, 10, 20);
     107  suite.add(suite.equal(f2.p_value(), f.p_value()));
     108
     109  f.oddsratio(1, 1, 1, 2);
     110  suite.add(suite.equal(f.p_value(), 1.0));
     111  suite.add(suite.equal(f.p_value_one_sided(), 0.7));
     112
     113  f.oddsratio(1, 1, 2, 1);
     114  suite.add(suite.equal(f.p_value(), 1.0));
     115  suite.add(suite.equal(f.p_value_one_sided(), 0.9));
     116
     117}
  • branches/0.4-stable/yat/statistics/Fisher.cc

    r1621 r1624  
    3030#include <gsl/gsl_cdf.h>
    3131#include <gsl/gsl_randist.h>
     32
     33#include <algorithm>
    3234
    3335namespace theplu {
     
    105107  double Fisher::p_value() const
    106108  {
    107     if (oddsratio_>1.0)
    108       return 2*p_value_one_sided();
    109     if (oddsratio_<1.0){
    110       // If oddsratio is less than unity
    111       // Two-sided p-value is
    112       // P(X <= oddsratio) + P(X >= 1/oddsratio) =
    113       // P(X <= oddsratio) + P(X <= oddsratio)
    114       // 2 * (P(X < oddsratio) + P(X = oddsratio))
    115       // 2 * (1 - P(X >= oddsratio) + P(X = oddsratio))
    116       if (calculate_p_exact())
    117         return 2*(1-p_value_one_sided()+
    118                   gsl_ran_hypergeometric_pdf(a_,a_+b_,c_+d_, a_+c_)
    119                   );
    120       // if p is calculated approximatively correction is not needed
    121       return 2*(1-p_value_one_sided());
    122 
    123     }
    124     return 1.0;
     109    if (calculate_p_exact())
     110      return p_value_exact();
     111    return p_value_approximative();
    125112  }
    126113
     
    128115  double Fisher::p_value_one_sided() const
    129116  {
    130     if ( calculate_p_exact() )
    131       return p_value_exact();
    132     return p_value_approximative();
     117    if (!calculate_p_exact()) {
     118      if (oddsratio_<1.0)
     119        return 1.0-p_value_approximative()/2.0;
     120      return p_value_approximative()/2.0;
     121    }
     122    return statistics::cdf_hypergeometric_P(c_, c_+d_, a_+b_, a_+c_);
    133123  }
    134124
     
    141131  double Fisher::p_value_exact() const
    142132  {
    143     // Since the calculation is symmetric and cdf_hypergeometric_P
    144     // loops up to k we choose the smallest number to be k and mirror
    145     // the matrix. This choice makes the p-value two-sided.
     133    double res=0;
     134    double a, c, tmp;
     135    expected(a, tmp, c, tmp);
     136    // sum P(x) over x for which abs(x-E(a))>=abs(a-E(a)) 
    146137
    147     if (a_<b_ && a_<c_ && a_<d_)
    148       return statistics::cdf_hypergeometric_P(a_,a_+b_,c_+d_,a_+c_);
    149     else if (b_<a_ && b_<c_ && b_<d_)
    150       return  statistics::cdf_hypergeometric_P(b_,a_+b_,c_+d_,b_+d_);
    151     else if (c_<a_ && c_<b_ && c_<d_)
    152       return  statistics::cdf_hypergeometric_P(c_,c_+d_,a_+b_,a_+c_);
     138    // counting left tail
     139    int k = static_cast<int>(a - std::abs(a_-a));
     140    if (k>=0)
     141      res = cdf_hypergeometric_P(k, a_+b_, c_+d_, a_+c_);
     142    // counting right tail
     143    k = static_cast<int>(c - std::abs(c_-c));
     144    if (k>=0)
     145      res += cdf_hypergeometric_P(k, c_+d_, a_+b_, a_+c_);
    153146
    154     return statistics::cdf_hypergeometric_P(d_,c_+d_,a_+b_,b_+d_);
    155 
     147    // avoid p>1 due to double counting middle one
     148    return std::min(1.0, res);
    156149  }
    157150
  • branches/0.4-stable/yat/statistics/Fisher.h

    r1621 r1624  
    118118    const unsigned int& minimum_size(void) const;
    119119
    120     ///
    121     /// If oddsratio is larger than unity, two-sided p-value is equal
    122     /// to 2*p_value_one_sided(). If oddsratio is smaller than unity
    123     /// two-sided p-value is equal to 2*(1-p_value_one_sided()). If
    124     /// oddsratio is unity two-sided p-value is equal to unity.
    125     ///
    126     /// If all elements in table is at least minimum_size(), a Chi2
    127     /// approximation is used.
    128     ///
    129     /// @return 2-sided p-value
    130     ///
     120    /**
     121       If all elements in table is at least minimum_size(), a Chi2
     122       approximation is used.
     123       
     124       Otherwise a two-sided p-value is calculated using the
     125       hypergeometric distribution
     126       \f$ \sum_k P(k) \f$ where summation runs over \a k such that
     127       \f$ |k-<a>| \ge |a-<a>| \f$.
     128
     129       \return two-sided p-value
     130    */
    131131    double p_value() const;
    132132   
  • branches/0.4-stable/yat/statistics/utility.cc

    r1392 r1624  
    3131#include <gsl/gsl_statistics_double.h>
    3232
     33#include <algorithm>
    3334#include <cassert>
    3435
     
    4142  {
    4243    double p=0;
    43     for (size_t i=0; i<=k; i++)
     44    size_t top = std::min(k, std::min(n1, t));
     45    for (size_t i=std::max(0, static_cast<int>(t-n2)); i<=top; i++)
    4446      p+= gsl_ran_hypergeometric_pdf(i, n1, n2, t);
    4547    return p;
Note: See TracChangeset for help on using the changeset viewer.