Changeset 447


Ignore:
Timestamp:
Dec 15, 2005, 7:51:18 PM (17 years ago)
Author:
Peter
Message:

added copy constructor for KernelView? and added construction of KernelView? in test

Location:
trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/statistics/Fisher.cc

    r297 r447  
    1010 
    1111  Fisher::Fisher(bool b)
    12     : Score(b), a_(0), b_(0), c_(0), d_(0)
     12    : Score(b), a_(0), b_(0), c_(0), d_(0), cutoff_column_(0), cutoff_row_(0)
    1313  {
    1414  }
    1515
     16
     17  void expected(double& a, double& b, double& c, double& d)
     18  {
     19    double N=a_+b_+c_+d_;
     20    a =((a_+b_)*(a_+c_)) / N;
     21    b =((a_+b_)*(b_+d_)) / N;
     22    c =((c_+d_)*(a_+c_)) / N;
     23    d =((c_+d_)*(b_+d_)) / N;
     24  }
    1625
    1726  double Fisher::oddsratio(const double a,
     
    2231    // If a column sum or a row sum is zero, the table is nonsense
    2332    if ((a==0 || d==0) && (c==0 || b==0)){
     33      //Peter, should through exception
    2434      std::cerr << "Warning: Fisher: Table is not valid\n";
    2535      return 1.0;
     
    2939    if (absolute_ && ratio<1)
    3040      return 1/ratio;
    31     else
    32       return ratio;
     41    return ratio;
    3342  }
    3443
    3544
    3645  double Fisher::p_value() const
     46  {
     47    double p=1;
     48    if (a<minimum_size_ || b<minimum_size_ ||
     49        c<minimum_size_ || d<minimum_size_){
     50
     51      p=p_value_exact();
     52    }
     53    else
     54      p=p_value_approximative();
     55
     56    if (absolute_)
     57      return fabs(2*(p-.5));
     58    return p;
     59  }
     60
     61
     62  double Fisher::p_value_approximative() const
     63  {
     64   
     65  }
     66
     67
     68  double Fisher::p_value_exact() const
    3769  {
    3870    // Since the calculation is symmetric and cdf_hypergeometric_P
     
    6395    a_=b_=c_=d_=0;
    6496    for (size_t i=0; i<train_set_.size(); i++)
    65       if (x(train_set_[i])==1)
    66         if (y(train_set_[i])==1)
     97      if (x(train_set_[i])<cutoff_column_)
     98        if (y(train_set_[i])<cutoff_row_)
    6799          a_++;
    68100        else
    69101          c_++;
    70102      else
    71         if (y(train_set_[i])==1)
     103        if (y(train_set_[i])<cutoff_row_)
    72104          b_++;
    73105        else
     
    96128      train_set_ = train_set;
    97129
    98     // Since table elements might be non-integer, we choose to not
    99     // store the table as member variables, which means p_value
    100     // function can not be used (which makes sense since p_value for
    101     // weighted version is not defined).
     130    for (size_t i=0; i<train_set_.size(); i++)
     131      if (x(train_set_[i]) < cutoff_column_)
     132        if (y(train_set_[i]) < cutoff_row_)
     133          a_+=w(train_set_[i]);
     134        else
     135          c_+=w(train_set_[i]);
     136      else
     137        if (y(train_set_[i]) < cutoff_column_)
     138          b_+=w(train_set_[i]);
     139        else
     140          d_+=w(train_set_[i]);
    102141
    103     double a=0;
    104     double b=0;
    105     double c=0;
    106     double d=0;
    107     for (size_t i=0; i<train_set_.size(); i++)
    108       if (x(train_set_[i])==1)
    109         if (y(train_set_[i])==1)
    110           a+=w(train_set_[i]);
    111         else
    112           c+=w(train_set_[i]);
    113       else
    114         if (y(train_set_[i])==1)
    115           b+=w(train_set_[i]);
    116         else
    117           d+=w(train_set_[i]);
    118 
    119     return oddsratio(a,b,c,d);
     142    return oddsratio(a_,b_,c_,d_);
    120143  }
    121144
    122   double Fisher::score(const u_int a, const u_int b,
    123                        const u_int c, const u_int d)
     145  double Fisher::score(const double a, const double b,
     146                       const double c, const double d)
    124147  {
    125148    a_=a;
  • trunk/lib/statistics/Fisher.h

    r295 r447  
    1111namespace statistics { 
    1212  ///
    13   /// Class for Fisher's exact test.   
     13  /// @brief Fisher's exact test.   
    1414  /// Fisher's Exact test is a procedure that you can use for data
    1515  /// in a two by two contingency table: \f[ \begin{tabular}{|c|c|}
     
    3535  /// 10 a Chi-square test is reasonable to use.
    3636  ///
    37 
     37  /// @note The statistica assumes that each column and row sum,
     38  /// respectively, are fixed. Just because you have a 2x2 table, this
     39  /// assumtion does not necessarily match you experimental upset. See
     40  /// e.g. Barnard's test for alternative.
     41  ///
    3842 
    3943  class Fisher : public Score
     
    5357   
    5458    ///
     59    /// Cutoff sets the limit whether a value should go into the left
     60    /// or the right column. @see score
     61    ///
     62    /// @return reference to cutoff for column
     63    ///
     64    inline double& cutoff_column(void) { return cutoff_column_; }
     65
     66    ///
     67    /// Cutoff sets the limit whether a value should go into the left
     68    /// or the right row. @see score
     69    ///
     70    /// @return reference to cutoff for row
     71    ///
     72    inline double& cutoff_row(void) { return cutoff_row_; }
     73
     74    ///
     75    /// Calculates the expected values under the null hypothesis.
     76    /// a' = (a+c)(a+b)/(a+b+c+d)
     77    ///
     78    void expected(u_int& a, u_int& b, u_unt& c, u_int& d);
     79
     80    ///
     81    /// minimum_size is the threshold for when the p-value calculation
     82    /// is performed using a Chi2 approximation.
     83    ///
     84    /// @return reference to minimum_size
     85    ///
     86    inline u_int& minimum_size(void){ return minimum_size_; } 
     87
     88    ///
     89    /// If absolute, the p-value is the two-sided p-value. If all
     90    /// elements in table is at least minimum_size, a Chi2
     91    /// approximation is used.
     92    ///
    5593    /// @return p-value
    56     ///
    57     double p_value() const;
     94    ///
     95    double p_value() const;
    5896   
    5997    ///
     
    75113    /// calculated as \f$ \sum w_i \f$, so when each weight is
    76114    /// unitary the same table is created as in the unweighted version
     115    ///
    77116    /// @return odds ratio
     117    ///
     118    /// @note
    78119    ///
    79120    double score(const gslapi::vector& x, const gslapi::vector& y, 
     
    93134         
    94135  private:
     136    double oddsratio(const double a, const double b,
     137                     const double c, const double d) const;
     138
     139    double p_value_approximative(void) const;
     140    double p_value_exact(void) const;
     141
    95142    std::vector<size_t> train_set_;
    96143    gslapi::vector weight_;
    97     u_int a_;
    98     u_int b_;
    99     u_int c_;
    100     u_int d_;
    101 
    102     double oddsratio(const double a, const double b,
    103                      const double c, const double d) const;
     144    double a_;
     145    double b_;
     146    double c_;
     147    double d_;
     148    double cutoff_column_;
     149    double cutoff_row_;
     150    double minimum_size_;
    104151
    105152  };
  • trunk/lib/statistics/ROC.h

    r414 r447  
    7777
    7878    ///
    79     /// Changes minimum_size , i.e. the threshold when a normal
     79    /// minimum_size is the threshold for when a normal
    8080    /// approximation is used for the p-value calculation.
    8181    ///
    82     inline void minimum_size(const u_int minimum_size)
    83       { minimum_size_ = minimum_size; } 
     82    /// @return reference to minimum_size
     83    ///
     84    inline u_int& minimum_size(void){ return minimum_size_; } 
    8485
    8586  private:
  • trunk/test/score_test.cc

    r420 r447  
    5757    ok = false;
    5858  }
    59   roc.minimum_size(20);
     59  roc.minimum_size() = 20;
    6060  p = roc.p_value();
    6161  if (p > pow(10, -8.0) | p < pow(10, -9.0)){
Note: See TracChangeset for help on using the changeset viewer.