source: trunk/lib/statistics/Pearson.h @ 414

Last change on this file since 414 was 414, checked in by Peter, 16 years ago

adding FoldChange? class and fixing bugs in Score classes

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 2.6 KB
Line 
1// $Id: Pearson.h 414 2005-12-01 15:17:49Z peter $
2
3#ifndef _theplu_statistics_pearson_
4#define _theplu_statistics_pearson_
5
6#include <c++_tools/statistics/Score.h>
7
8namespace theplu {
9  class gslapi::vector;
10
11namespace statistics { 
12
13  ///
14  /// Class for calculating Pearson correlation.
15  ///   
16 
17  class Pearson : public Score
18  {
19 
20  public:
21    ///
22    /// Default Constructor.
23    ///
24    Pearson(bool absolute=true);
25
26    ///
27    /// Destructor
28    ///
29    virtual ~Pearson(void) {};
30         
31   
32    ///
33    /// \f$ \frac{\vert \sum_i(x_i-\bar{x})(y_i-\bar{y})\vert
34    /// }{\sqrt{\sum_i (x_i-\bar{x})^2\sum_i (x_i-\bar{x})^2}}\f$.
35    /// @return Pearson correlation, if absolute=true absolute value
36    /// of Pearson is used.
37    ///
38    double score(const gslapi::vector&, const gslapi::vector&, 
39                 const std::vector<size_t>& = std::vector<size_t>());
40
41    ///
42    /// \f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert }
43    /// {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}}\f$,
44    /// where \f$m_x = \frac{\sum w_ix_i}{\sum w_i}\f$ and \f$m_x =
45    /// \frac{\sum w_ix_i}{\sum w_i}\f$. This expression is chosen to
46    /// get a correlation equal to unity when \a x and \a y are
47    /// equal. @return absolute value of weighted version of Pearson
48    /// correlation.
49    ///
50    inline double score(const gslapi::vector& x, const gslapi::vector& y, 
51                        const gslapi::vector& w,
52                        const std::vector<size_t>& train_set = 
53                        std::vector<size_t>()) 
54    { return score(x,y,w,w,train_set); }
55
56    ///
57    /// \f$ \frac{\vert \sum_iw^x_iw^y_i(x_i-m_x)(y_i-m_y)\vert }
58    /// {\sqrt{\sum_iw^x_iw^y_i(x_i-m_x)^2 ///
59    /// \sum_iw^x_iw^y_i(y_i-m_y)^2}}\f$, where \f$m_x = \frac{\sum
60    /// w^x_iw^y_ix_i}{\sum w^x_iw^y_i}\f$ and \f$m_y = \frac{\sum
61    /// w_ix_i}{\sum w_i}\f$. This expression is chosen to get a
62    /// correlation equal to unity when \a x and \a y are
63    /// equal. @return absolute value of weighted version of Pearson
64    /// correlation.
65    ///
66    double score(const gslapi::vector& x, const gslapi::vector& y, 
67                 const gslapi::vector& wx, const gslapi::vector& wy,
68                 const std::vector<size_t>&);
69   
70    ///
71    /// The p-value is the probability of getting a correlation as
72    /// large as the observed value by random chance, when the true
73    /// correlation is zero (and the data is Gaussian). Note that this
74    /// function can only be used together with the unweighted
75    /// score. @return two-sided p-value
76    ///
77    double p_value() const;
78         
79  private:
80    double r_;
81    int nof_samples_;
82
83
84    void centralize(gslapi::vector&, const gslapi::vector&);
85  };
86
87}} // of namespace statistics and namespace theplu
88
89#endif
90
Note: See TracBrowser for help on using the repository browser.