1 | // $Id: Pearson.h 414 2005-12-01 15:17:49Z peter $ |
---|
2 | |
---|
3 | #ifndef _theplu_statistics_pearson_ |
---|
4 | #define _theplu_statistics_pearson_ |
---|
5 | |
---|
6 | #include <c++_tools/statistics/Score.h> |
---|
7 | |
---|
8 | namespace theplu { |
---|
9 | class gslapi::vector; |
---|
10 | |
---|
11 | namespace statistics { |
---|
12 | |
---|
13 | /// |
---|
14 | /// Class for calculating Pearson correlation. |
---|
15 | /// |
---|
16 | |
---|
17 | class Pearson : public Score |
---|
18 | { |
---|
19 | |
---|
20 | public: |
---|
21 | /// |
---|
22 | /// Default Constructor. |
---|
23 | /// |
---|
24 | Pearson(bool absolute=true); |
---|
25 | |
---|
26 | /// |
---|
27 | /// Destructor |
---|
28 | /// |
---|
29 | virtual ~Pearson(void) {}; |
---|
30 | |
---|
31 | |
---|
32 | /// |
---|
33 | /// \f$ \frac{\vert \sum_i(x_i-\bar{x})(y_i-\bar{y})\vert |
---|
34 | /// }{\sqrt{\sum_i (x_i-\bar{x})^2\sum_i (x_i-\bar{x})^2}}\f$. |
---|
35 | /// @return Pearson correlation, if absolute=true absolute value |
---|
36 | /// of Pearson is used. |
---|
37 | /// |
---|
38 | double score(const gslapi::vector&, const gslapi::vector&, |
---|
39 | const std::vector<size_t>& = std::vector<size_t>()); |
---|
40 | |
---|
41 | /// |
---|
42 | /// \f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert } |
---|
43 | /// {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}}\f$, |
---|
44 | /// where \f$m_x = \frac{\sum w_ix_i}{\sum w_i}\f$ and \f$m_x = |
---|
45 | /// \frac{\sum w_ix_i}{\sum w_i}\f$. This expression is chosen to |
---|
46 | /// get a correlation equal to unity when \a x and \a y are |
---|
47 | /// equal. @return absolute value of weighted version of Pearson |
---|
48 | /// correlation. |
---|
49 | /// |
---|
50 | inline double score(const gslapi::vector& x, const gslapi::vector& y, |
---|
51 | const gslapi::vector& w, |
---|
52 | const std::vector<size_t>& train_set = |
---|
53 | std::vector<size_t>()) |
---|
54 | { return score(x,y,w,w,train_set); } |
---|
55 | |
---|
56 | /// |
---|
57 | /// \f$ \frac{\vert \sum_iw^x_iw^y_i(x_i-m_x)(y_i-m_y)\vert } |
---|
58 | /// {\sqrt{\sum_iw^x_iw^y_i(x_i-m_x)^2 /// |
---|
59 | /// \sum_iw^x_iw^y_i(y_i-m_y)^2}}\f$, where \f$m_x = \frac{\sum |
---|
60 | /// w^x_iw^y_ix_i}{\sum w^x_iw^y_i}\f$ and \f$m_y = \frac{\sum |
---|
61 | /// w_ix_i}{\sum w_i}\f$. This expression is chosen to get a |
---|
62 | /// correlation equal to unity when \a x and \a y are |
---|
63 | /// equal. @return absolute value of weighted version of Pearson |
---|
64 | /// correlation. |
---|
65 | /// |
---|
66 | double score(const gslapi::vector& x, const gslapi::vector& y, |
---|
67 | const gslapi::vector& wx, const gslapi::vector& wy, |
---|
68 | const std::vector<size_t>&); |
---|
69 | |
---|
70 | /// |
---|
71 | /// The p-value is the probability of getting a correlation as |
---|
72 | /// large as the observed value by random chance, when the true |
---|
73 | /// correlation is zero (and the data is Gaussian). Note that this |
---|
74 | /// function can only be used together with the unweighted |
---|
75 | /// score. @return two-sided p-value |
---|
76 | /// |
---|
77 | double p_value() const; |
---|
78 | |
---|
79 | private: |
---|
80 | double r_; |
---|
81 | int nof_samples_; |
---|
82 | |
---|
83 | |
---|
84 | void centralize(gslapi::vector&, const gslapi::vector&); |
---|
85 | }; |
---|
86 | |
---|
87 | }} // of namespace statistics and namespace theplu |
---|
88 | |
---|
89 | #endif |
---|
90 | |
---|