1 | // $Id: Pearson.cc 623 2006-09-05 02:13:12Z peter $ |
---|
2 | |
---|
3 | #include <c++_tools/statistics/Pearson.h> |
---|
4 | #include <c++_tools/statistics/AveragerPair.h> |
---|
5 | #include <c++_tools/statistics/AveragerPairWeighted.h> |
---|
6 | #include <c++_tools/utility/vector.h> |
---|
7 | #include <c++_tools/classifier/DataLookupWeighted1D.h> |
---|
8 | #include <c++_tools/classifier/Target.h> |
---|
9 | |
---|
10 | #include <cmath> |
---|
11 | #include <gsl/gsl_cdf.h> |
---|
12 | |
---|
13 | |
---|
14 | namespace theplu { |
---|
15 | namespace statistics { |
---|
16 | |
---|
17 | Pearson::Pearson(bool b) |
---|
18 | : Score(b), r_(0), nof_samples_(0) |
---|
19 | { |
---|
20 | } |
---|
21 | |
---|
22 | double Pearson::p_value() const |
---|
23 | { |
---|
24 | if(weighted_) |
---|
25 | return 1; |
---|
26 | if(nof_samples_<2){ |
---|
27 | std::cerr << "Warning: Only " << nof_samples_ << "samples. " |
---|
28 | << "Need at lest 3.\n"; |
---|
29 | return 1; |
---|
30 | } |
---|
31 | |
---|
32 | double t = sqrt(nof_samples_ - 2)*fabs(r_) /sqrt(1-r_*r_); |
---|
33 | double p = gsl_cdf_tdist_Q(t, nof_samples_ -2 ); |
---|
34 | if (absolute_) |
---|
35 | return 2*p; |
---|
36 | if (r_<0) |
---|
37 | return 1-p; |
---|
38 | return p; |
---|
39 | |
---|
40 | } |
---|
41 | |
---|
42 | double Pearson::score(const classifier::Target& target, |
---|
43 | const utility::vector& value) |
---|
44 | { |
---|
45 | weighted_=false; |
---|
46 | AveragerPair ap; |
---|
47 | for (size_t i=0; i<target.size(); i++){ |
---|
48 | if (target.binary(i)) |
---|
49 | ap.add(1, value(i)); |
---|
50 | else |
---|
51 | ap.add(-1, value(i)); |
---|
52 | nof_samples_ = target.size(); |
---|
53 | } |
---|
54 | r_ = ap.correlation(); |
---|
55 | if (r_<0 && absolute_) |
---|
56 | return -r_; |
---|
57 | |
---|
58 | return r_; |
---|
59 | } |
---|
60 | |
---|
61 | double Pearson::score(const classifier::Target& target, |
---|
62 | const classifier::DataLookupWeighted1D& value) |
---|
63 | { |
---|
64 | weighted_=true; |
---|
65 | AveragerPairWeighted ap; |
---|
66 | for (size_t i=0; i<target.size(); i++){ |
---|
67 | if (target.binary(i)) |
---|
68 | ap.add(1, value.data(i),1,value.weight(i)); |
---|
69 | else |
---|
70 | ap.add(-1, value.data(i),1,value.weight(i)); |
---|
71 | nof_samples_ = target.size(); |
---|
72 | } |
---|
73 | r_ = ap.correlation(); |
---|
74 | if (r_<0 && absolute_) |
---|
75 | return -r_; |
---|
76 | |
---|
77 | return r_; |
---|
78 | } |
---|
79 | |
---|
80 | double Pearson::score(const classifier::Target& target, |
---|
81 | const utility::vector& value, |
---|
82 | const utility::vector& weight) |
---|
83 | { |
---|
84 | weighted_=true; |
---|
85 | AveragerPairWeighted ap; |
---|
86 | for (size_t i=0; i<target.size(); i++){ |
---|
87 | if (target.binary(i)) |
---|
88 | ap.add(1, value(i),1,weight(i)); |
---|
89 | else |
---|
90 | ap.add(-1, value(i),1,weight(i)); |
---|
91 | nof_samples_ = target.size(); |
---|
92 | } |
---|
93 | r_ = ap.correlation(); |
---|
94 | if (r_<0 && absolute_) |
---|
95 | return -r_; |
---|
96 | |
---|
97 | return r_; |
---|
98 | } |
---|
99 | |
---|
100 | }} // of namespace statistics and namespace theplu |
---|