Changeset 169


Ignore:
Timestamp:
Sep 23, 2004, 6:19:14 PM (17 years ago)
Author:
Peter
Message:

Fisher's exact test

Location:
trunk/src
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/Statistics.cc

    r168 r169  
    1010#include <cmath>
    1111#include <cstdlib>
     12#include <gsl/gsl_randist.h>
    1213#include <iostream>
    1314
     
    2021  {
    2122  }
     23
     24  double cdf_hypergeometric_P(u_int k, u_int n1, u_int n2, u_int t)
     25  {
     26    double p=0;
     27    for (u_int i=0; i<=k; i++)
     28      p+= gsl_ran_hypergeometric_pdf(i, n1, n2, t);
     29    return p;
     30  }
     31
     32  double fisher(u_int a, u_int b, u_int c, u_int d)
     33  {
     34    // Since the calculation is symmetric and cdf_hypergeometric_P
     35    // loops up to k we choose the samllest number to be k and mirror
     36    // the matrix.
     37    if (a<b && a<c && a<d)
     38      return cdf_hypergeometric_P(a,a+b,c+d,a+c);
     39    else if (b<a && b<c && b<d)
     40      return cdf_hypergeometric_P(b,a+b,c+d,b+d);
     41    else if (c<a && c<b && c<d)
     42      return cdf_hypergeometric_P(c,c+d,a+b,a+c);
     43    else
     44      return cdf_hypergeometric_P(d,c+d,a+b,b+d);
     45  }
     46
    2247
    2348  double Statistics::median(std::vector<double>& vec)
  • trunk/src/Statistics.h

    r168 r169  
    2525    ///
    2626    Statistics();
     27
     28    ///
     29    /// Calculates the probabilty to get \a k or smaller from a
     30    /// hypergeometric distribution with parameters \a n1 \a n2 \a
     31    /// t. Hypergeomtric situation you get in the following situation:
     32    /// Let there be \a n1 ways for a "good" selection and \a n2 ways
     33    /// for a "bad" selection out of a total of possibilities. Take \a
     34    /// t samples without replacement and \a k of those are "good"
     35    /// samples. \a k will follow a hypergeomtric distribution.
     36    /// @cumulative hypergeomtric distribution functions P(k).
     37    ///
     38    double cdf_hypergeometric_P(u_int k, u_int n1, u_int n2, u_int t);
     39
     40    ///
     41    /// Fisher's Exact test is a procedure that you can use for data
     42    /// in a two by two contingency table: \f$ \\
     43    /// \begin{tabular}{|c|c|} \hline a&b\\ \hline c&d\\ \hline
     44    /// \end{tabular} \f$ Fisher's Exact Test is based on exact
     45    /// probabilities from a specific distribution (the hypergeometric
     46    /// distribution). There's really no lower bound on the amount of
     47    /// data that is needed for Fisher's Exact Test. You do have to
     48    /// have at least one data value in each row and one data value in
     49    /// each column. If an entire row or column is zero, then you
     50    /// don't really have a 2 by 2 table. But you can use Fisher's
     51    /// Exact Test when one of the cells in your table has a zero in
     52    /// it. Fisher's Exact Test is also very useful for highly
     53    /// imbalanced tables. If one or two of the cells in a two by two
     54    /// table have numbers in the thousands and one or two of the
     55    /// other cells has numbers less than 5, you can still use
     56    /// Fisher's Exact Test. For very large tables (where all four
     57    /// entries in the two by two table are large), your computer may
     58    /// take too much time to compute Fisher's Exact Test. In these
     59    /// situations, though, you might as well use the Chi-square test
     60    /// because a large sample approximation (that the Chi-square test
     61    /// relies on) is very reasonable. If all elements are larger than
     62    /// 10 a Chi-square test is reasonable to use. @return one-sided
     63    /// p-value for Fisher's exact test.
     64    ///
     65    double fisher(u_int a, u_int b, u_int c, u_int d); 
    2766
    2867    ///
     
    69108
    70109  private:
    71    
     110
    72111         
    73112  };
Note: See TracChangeset for help on using the changeset viewer.