source: trunk/yat/statistics/PearsonCorrelation.h @ 1139

Last change on this file since 1139 was 1139, checked in by Peter, 14 years ago

iterators in PearsonCorrelation? interface

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 5.6 KB
Line 
1#ifndef _theplu_yat_statistics_pearson_correlation_
2#define _theplu_yat_statistics_pearson_correlation_
3
4// $Id: PearsonCorrelation.h 1139 2008-02-24 01:59:27Z peter $
5
6/*
7  Copyright (C) 2004, 2005 Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
9  Copyright (C) 2007, 2008 Peter Johansson
10
11  This file is part of the yat library, http://trac.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26  02111-1307, USA.
27*/
28
29#include "AveragerPair.h"
30#include "AveragerPairWeighted.h"
31#include "yat/classifier/Target.h"
32#include "yat/utility/iterator_traits.h"
33
34
35namespace theplu {
36namespace yat {
37namespace utility {
38  class VectorBase;
39}
40namespace statistics {
41
42  ///
43  /// @brief Class for calculating Pearson correlation.
44  ///
45  class PearsonCorrelation
46  {
47  public:
48    ///
49    /// @brief The default constructor.
50    ///
51    PearsonCorrelation(void);
52   
53    ///
54    /// @brief The destructor.
55    ///
56    virtual ~PearsonCorrelation(void);
57   
58   
59    /**
60       \f$ \frac{\vert \sum_i(x_i-\bar{x})(y_i-\bar{y})\vert
61       }{\sqrt{\sum_i (x_i-\bar{x})^2\sum_i (x_i-\bar{x})^2}} \f$.
62
63
64       If ForwardIterator is weighted correlation is calculated as
65       \f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert }
66       {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}}
67       \f$, where \f$ m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$
68       m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is
69       chosen to get a correlation equal to unity when \a x and \a y
70       are equal.
71
72       @return Pearson correlation, if absolute=true absolute value
73       of Pearson is used.
74    */
75    template<typename ForwardIterator>
76    double score(const classifier::Target& target, 
77                 ForwardIterator first, ForwardIterator last);
78   
79    /**
80       \f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert }
81       {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}}
82       \f$, where \f$ m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$
83       m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is
84       chosen to get a correlation equal to unity when \a x and \a y
85       are equal.
86
87       \return absolute value of weighted version of Pearson
88       correlation.
89
90       \note ietartors must be non-weighted
91    */
92    template<typename ForwardIterator1, typename ForwardIterator2>
93    double score(const classifier::Target& target, 
94                 ForwardIterator1 first1, ForwardIterator1 last1,
95                 ForwardIterator2 first2);
96   
97    /**
98       The p-value is the probability of getting a correlation as
99       large (or larger) as the observed value by random chance, when the true
100       correlation is zero (and the data is Gaussian).
101       
102       @note This function can only be used together with the
103       unweighted score.
104       
105       @return one-sided p-value
106    */
107    double p_value_one_sided() const;
108   
109  private:
110    double r_;
111    int nof_samples_;
112   
113    template<typename ForwardIterator>
114    double score(const classifier::Target& target, 
115                 ForwardIterator first, ForwardIterator last, 
116                 utility::unweighted_iterator_tag);
117   
118    template<typename ForwardIterator>
119    double score(const classifier::Target& target, 
120                 ForwardIterator first, ForwardIterator last,
121                 utility::weighted_iterator_tag);
122   
123  };
124 
125  template<typename ForwardIterator>
126  double PearsonCorrelation::score(const classifier::Target& target, 
127                                   ForwardIterator first, 
128                                   ForwardIterator last)
129  {
130    nof_samples_ = target.size();
131      using utility::yat_assert;
132      yat_assert<std::runtime_error>("PearsonCorrelation: sizes mismatch");
133      r_ = score(target, first, last, 
134                 utility::iterator_traits<ForwardIterator>::type());
135      return r_;
136  }
137   
138
139  template<typename ForwardIterator>
140  double PearsonCorrelation::score(const classifier::Target& target, 
141                                   ForwardIterator first, 
142                                   ForwardIterator last,
143                                   utility::unweighted_iterator_tag tag)
144   
145  {
146    AveragerPair ap;
147    for (size_t i=0; first!=last; ++first, ++i)
148      ap.add(target.binary(i), *first);
149    nof_samples_ = ap.n();
150    return ap.correlation();
151  }
152   
153
154  template<typename ForwardIterator>
155  double PearsonCorrelation::score(const classifier::Target& target, 
156                                   ForwardIterator first, 
157                                   ForwardIterator last,
158                                   utility::weighted_iterator_tag tag)
159   
160  {
161    AveragerPairWeighted ap;
162    for (size_t i=0; first!=last; ++first, ++i)
163      ap.add(target.binary(i), first.data(), 1.0, first.weight());
164    nof_samples_ = ap.n();
165    return ap.correlation();
166  }
167   
168  template<typename ForwardIterator1, typename ForwardIterator2>
169  double PearsonCorrelation::score(const classifier::Target& target, 
170                                   ForwardIterator1 first1, 
171                                   ForwardIterator1 last1,
172                                   ForwardIterator2 first2)
173  {
174    utility::check_iterator_is_unweighted(first1);
175    utility::check_iterator_is_unweighted(first2);
176    AveragerPairWeighted ap;
177    for (size_t i=0; first1!=last1; ++first1, ++i, ++first2)
178      ap.add(target.binary(i), *first1, 1.0, *first2);
179    nof_samples_ = ap.n();
180    r_ = ap.correlation();
181  }
182 
183}}} // of namespace statistics, yat, and theplu
184
185#endif
Note: See TracBrowser for help on using the repository browser.