Changeset 3739 for branches/0.15-stable


Ignore:
Timestamp:
Jul 5, 2018, 7:59:40 AM (5 years ago)
Author:
Peter
Message:

fixes #908

Location:
branches/0.15-stable
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/0.15-stable/test/fisher.cc

    r3455 r3739  
    2626#include "yat/statistics/Fisher.h"
    2727
     28#include <gsl/gsl_cdf.h>
     29#include <gsl/gsl_randist.h>
    2830#include <climits>
    2931
     
    3436void test_large_numbers(test::Suite&);
    3537void test_yates(test::Suite&);
     38void test_cdf_hypergeometric_Q(test::Suite&);
     39void test_ticket908(test::Suite&);
    3640
    3741int main(int argc, char* argv[])
     
    7377  test_large_numbers(suite);
    7478  test_yates(suite);
     79  test_cdf_hypergeometric_Q(suite);
     80  test_ticket908(suite);
    7581  return suite.return_value();
    7682}
     
    168174  }
    169175}
     176
     177
     178void test_cdf_hypergeometric_Q(test::Suite& suite)
     179{
     180  suite.out() << "\ntest " << __func__ << "\n";
     181  unsigned int n1 = 16;
     182  unsigned int n2 = 3;
     183  unsigned int t = 16;
     184  for (unsigned int k=0; k<=t; ++k) {
     185    // P(X=k)
     186    double p = gsl_ran_hypergeometric_pdf(k, n1, n2, t);
     187    // P(X<=k)
     188    double P = gsl_cdf_hypergeometric_P(k, n1, n2, t);
     189    // P(X>k)
     190    double Q = gsl_cdf_hypergeometric_Q(k, n1, n2, t);
     191    suite.out() << k << " " << p << " " << P << " " << Q << "\n";
     192    double sum = 0;
     193    for (unsigned int x=0; x<=k; ++x)
     194      sum += gsl_ran_hypergeometric_pdf(x, n1, n2, t);
     195    if (!suite.equal(sum, P, 20)) {
     196      suite.add(false);
     197      suite.err() << "error: gsl_cdf_hypergeometric_P for k=" << k << "\n";
     198    }
     199    if (!suite.equal(Q, 1-P, 1000)) {
     200      suite.add(false);
     201      suite.err() << "failed: Q = 1-P for k=" << k << "\n";
     202    }
     203  }
     204}
     205
     206
     207// test bug #908 reported in http://dev.thep.lu.se/yat/ticket/908
     208void test_ticket908(test::Suite& suite)
     209{
     210  suite.out() << "===\ntesting ticket 908\n";
     211  statistics::Fisher f;
     212  int a1 = 13;
     213  int a2 = 3;
     214  int b1 = 3;
     215  int b2 = 0;
     216
     217  double correct_p = 0;
     218  unsigned int n1 = a1 + a2;
     219  unsigned int n2 = b1 + b2;
     220  unsigned int t = a1+b1;
     221  for (unsigned int k=0; k<=n1; ++k) {
     222    double P = gsl_ran_hypergeometric_pdf(k, n1, n2, t);
     223    if (P <=  gsl_ran_hypergeometric_pdf(a1, n1, n2, t)) {
     224      suite.out() << "counting " << k << " " << P << "\n";
     225      correct_p += P;
     226    }
     227  }
     228  statistics::Fisher fisher;
     229  std::vector<double> p;
     230  fisher.oddsratio(a1, a2, b1, b2);
     231  p.push_back(fisher.p_value());
     232  fisher.oddsratio(a2, a1, b2, b1);
     233  p.push_back(fisher.p_value());
     234  fisher.oddsratio(b1, b2, a1, a2);
     235  p.push_back(fisher.p_value());
     236  fisher.oddsratio(b2, b1, a2, a1);
     237  p.push_back(fisher.p_value());
     238  for (size_t i=0; i<p.size(); ++i) {
     239    if (!suite.add(suite.equal(correct_p, p[i], 20)))
     240      suite.err() << "error: incorrect p: " << p[i] << " for case "
     241                  << i << "; expected: " << correct_p << "\n";
     242  }
     243}
  • branches/0.15-stable/yat/statistics/Fisher.cc

    r3455 r3739  
    8787
    8888  /*
     89   (n)       n!
     90   ( )  = --------
     91   (k)    k!(n-k)!
     92
     93
     94
    8995    ( n )
    90     (k+1)   (k+1)!(n-k)!
    91     ----- = ------------
    92      (n)    k!(n-k-1)!
     96    (k+1)   k!(n-k)!
     97    ----- = ------------ = (n-k) / (k+1)
     98     (n)    (k+1)!(n-k-1)!
    9399     (k)
    94100   */
    95101  double Fisher::choose_ratio(unsigned int n, unsigned int k) const
    96102  {
    97     if (k==0)
    98       return n;
    99     if (k==n-1)
    100       return k+1;
     103    assert(k+1 <= n);
    101104    return static_cast<double>(n-k) / (k+1);
    102105  }
     
    106109                                      unsigned int n2, unsigned int t) const
    107110  {
     111    // P(X = k+1) / P(X = k) =
     112    // (choose(n1, k+1) * choose(n2, t-k-1) / choose(n1+n2, t) ) /
     113    // (choose(n1, k) * choose(n2, t-k) / choose(n1+n2, t) ) =
     114    // choose(n1, k+1) / choose(n1, k) /( choose(n2, t-k)/choose(n2,t-k-1) ) =
     115    // choose(n1,k+1)/choose(n1,k) / (choose(n2,(t-k-1)+1)/choose(n2,t-k-1))
     116    // choose_ratio(n1,k) / choose_ratio(n2, t-k-1)
     117    // where choose_ratio(a, b) = choose(a, b+1) / choose(a,b)
    108118    return choose_ratio(n1, k) / choose_ratio(n2, t-k-1);
    109119  }
     
    240250  {
    241251    // The hypergeometric distribution is unimodal (only one peak) and the
    242     // peak (mode) occurs at: (t+1)*(n1+1)/(n+1)
    243 
    244     double mode = static_cast<double>(a_+c_+1)*static_cast<double>(a_+b_+1)/
    245       static_cast<double>(a_+b_+c_+d_+1);
     252    // peak (mode) occurs at: floor((t+1)*(n1+1)/(n+1))
     253
     254    double mode = std::floor((a_+c_+1.0)*(a_+b_+1.0)/(a_+b_+c_+d_+1.0));
    246255    if (a_ >= mode)
    247256      return p_value_exact(a_, a_+b_, c_+d_, a_+c_);
     
    267276    assert(n1);
    268277    assert(n2);
     278
     279    // special case when k=0 and mode (peak of distribution) is at
     280    // zero as well. If mode is not zero 2x2 table should have been
     281    // mirrored before calling this function.
     282    if (k == 0) {
     283      assert(std::floor((a_+c_+1.0)*(a_+b_+1.0)/(a_+b_+c_+d_+1.0)) == 0.0);
     284      return 1.0;
     285    }
    269286
    270287    assert(k);
Note: See TracChangeset for help on using the changeset viewer.