1 | // $Id: Fisher.cc 297 2005-04-29 10:03:53Z peter $ |
---|
2 | |
---|
3 | #include <c++_tools/statistics/Fisher.h> |
---|
4 | #include <c++_tools/statistics/Score.h> |
---|
5 | #include <c++_tools/statistics/utility.h> |
---|
6 | |
---|
7 | namespace theplu { |
---|
8 | namespace statistics { |
---|
9 | |
---|
10 | |
---|
11 | Fisher::Fisher(bool b) |
---|
12 | : Score(b), a_(0), b_(0), c_(0), d_(0) |
---|
13 | { |
---|
14 | } |
---|
15 | |
---|
16 | |
---|
17 | double Fisher::oddsratio(const double a, |
---|
18 | const double b, |
---|
19 | const double c, |
---|
20 | const double d) const |
---|
21 | { |
---|
22 | // If a column sum or a row sum is zero, the table is nonsense |
---|
23 | if ((a==0 || d==0) && (c==0 || b==0)){ |
---|
24 | std::cerr << "Warning: Fisher: Table is not valid\n"; |
---|
25 | return 1.0; |
---|
26 | } |
---|
27 | |
---|
28 | double ratio=(a*d)/(b*d); |
---|
29 | if (absolute_ && ratio<1) |
---|
30 | return 1/ratio; |
---|
31 | else |
---|
32 | return ratio; |
---|
33 | } |
---|
34 | |
---|
35 | |
---|
36 | double Fisher::p_value() const |
---|
37 | { |
---|
38 | // Since the calculation is symmetric and cdf_hypergeometric_P |
---|
39 | // loops up to k we choose the smallest number to be k and mirror |
---|
40 | // the matrix. |
---|
41 | if (a_<b_ && a_<c_ && a_<d_) |
---|
42 | return statistics::cdf_hypergeometric_P(a_,a_+b_,c_+d_,a_+c_); |
---|
43 | else if (b_<a_ && b_<c_ && b_<d_) |
---|
44 | return statistics::cdf_hypergeometric_P(b_,a_+b_,c_+d_,b_+d_); |
---|
45 | else if (c_<a_ && c_<b_ && c_<d_) |
---|
46 | return statistics::cdf_hypergeometric_P(c_,c_+d_,a_+b_,a_+c_); |
---|
47 | else |
---|
48 | return statistics::cdf_hypergeometric_P(d_,c_+d_,a_+b_,b_+d_); |
---|
49 | } |
---|
50 | |
---|
51 | |
---|
52 | double Fisher::score(const gslapi::vector& x, |
---|
53 | const gslapi::vector& y, |
---|
54 | const std::vector<size_t>& train_set) |
---|
55 | { |
---|
56 | if (!train_set.size()){ |
---|
57 | train_set_.resize(0); |
---|
58 | for (size_t i=0; i<target_.size(); i++) |
---|
59 | train_set_.push_back(i); |
---|
60 | } |
---|
61 | else |
---|
62 | train_set_ = train_set; |
---|
63 | a_=b_=c_=d_=0; |
---|
64 | for (size_t i=0; i<train_set_.size(); i++) |
---|
65 | if (x(train_set_[i])==1) |
---|
66 | if (y(train_set_[i])==1) |
---|
67 | a_++; |
---|
68 | else |
---|
69 | c_++; |
---|
70 | else |
---|
71 | if (y(train_set_[i])==1) |
---|
72 | b_++; |
---|
73 | else |
---|
74 | d_++; |
---|
75 | |
---|
76 | // If a column sum or a row sum is zero, the table is nonsense |
---|
77 | if ((a_==0 || d_==0) && (c_==0 || b_==0)){ |
---|
78 | std::cerr << "Warning: Fisher: Table is not valid\n"; |
---|
79 | return 0; |
---|
80 | } |
---|
81 | |
---|
82 | return oddsratio(a_,b_,c_,d_); |
---|
83 | } |
---|
84 | |
---|
85 | double Fisher::score(const gslapi::vector& x, |
---|
86 | const gslapi::vector& y, |
---|
87 | const gslapi::vector& w, |
---|
88 | const std::vector<size_t>& train_set) |
---|
89 | { |
---|
90 | if (!train_set.size()){ |
---|
91 | train_set_.resize(0); |
---|
92 | for (size_t i=0; i<target_.size(); i++) |
---|
93 | train_set_.push_back(i); |
---|
94 | } |
---|
95 | else |
---|
96 | train_set_ = train_set; |
---|
97 | |
---|
98 | // Since table elements might be non-integer, we choose to not |
---|
99 | // store the table as member variables, which means p_value |
---|
100 | // function can not be used (which makes sense since p_value for |
---|
101 | // weighted version is not defined). |
---|
102 | |
---|
103 | double a=0; |
---|
104 | double b=0; |
---|
105 | double c=0; |
---|
106 | double d=0; |
---|
107 | for (size_t i=0; i<train_set_.size(); i++) |
---|
108 | if (x(train_set_[i])==1) |
---|
109 | if (y(train_set_[i])==1) |
---|
110 | a+=w(train_set_[i]); |
---|
111 | else |
---|
112 | c+=w(train_set_[i]); |
---|
113 | else |
---|
114 | if (y(train_set_[i])==1) |
---|
115 | b+=w(train_set_[i]); |
---|
116 | else |
---|
117 | d+=w(train_set_[i]); |
---|
118 | |
---|
119 | return oddsratio(a,b,c,d); |
---|
120 | } |
---|
121 | |
---|
122 | double Fisher::score(const u_int a, const u_int b, |
---|
123 | const u_int c, const u_int d) |
---|
124 | { |
---|
125 | a_=a; |
---|
126 | b_=b; |
---|
127 | c_=c; |
---|
128 | d_=d; |
---|
129 | return oddsratio(a,b,c,d); |
---|
130 | } |
---|
131 | |
---|
132 | }} // of namespace statistics and namespace theplu |
---|