Changeset 1145 for trunk/yat/statistics
- Timestamp:
- Feb 25, 2008, 9:23:47 PM (16 years ago)
- Location:
- trunk/yat/statistics
- Files:
-
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/yat/statistics/Histogram.h
r1000 r1145 29 29 30 30 #include "AveragerWeighted.h" 31 #include "yat/utility/iterator_traits.h" 31 32 32 33 #include <string> … … 191 192 }; 192 193 194 /** 195 Add a range [first, last) of values to Histogram. 196 */ 197 template<typename ForwardIterator> 198 void add(Histogram& h, 199 ForwardIterator first, ForwardIterator last) 200 { 201 while (first!=last) { 202 h.add(utility::iterator_traits<ForwardIterator>().data(), 203 utility::iterator_traits<ForwardIterator>().weight()); 204 ++first; 205 } 206 } 207 193 208 /// 194 209 /// The Histogram output operator -
trunk/yat/statistics/KolmogorovSmirnov.h
r1125 r1145 26 26 27 27 #include <set> 28 #include <utility>29 28 #include <vector> 30 29 -
trunk/yat/statistics/PearsonCorrelation.cc
r1139 r1145 25 25 26 26 #include "PearsonCorrelation.h" 27 #include "Pearson.h" 28 #include "AveragerPair.h" 29 #include "AveragerPairWeighted.h" 30 #include "yat/utility/VectorBase.h" 31 #include "yat/classifier/DataLookupWeighted1D.h" 32 #include "yat/classifier/Target.h" 33 34 #include <cmath> 35 #include <gsl/gsl_cdf.h> 27 #include "utility.h" 36 28 37 29 namespace theplu { … … 40 32 41 33 PearsonCorrelation::PearsonCorrelation(void) 42 : r_(0), nof_samples_(0)43 34 { 44 35 } … … 50 41 double PearsonCorrelation::p_value_one_sided() const 51 42 { 52 if(nof_samples_<=2) 53 return 1; 54 55 double t = sqrt(nof_samples_ - 2)*fabs(r_) /sqrt(1-r_*r_); 56 return gsl_cdf_tdist_Q(t, nof_samples_ -2 ); 43 return pearson_p_value(ap_.correlation(), 44 static_cast<u_int>(ap_.n())); 57 45 } 58 46 -
trunk/yat/statistics/PearsonCorrelation.h
r1140 r1145 27 27 */ 28 28 29 #include "AveragerPair.h"30 29 #include "AveragerPairWeighted.h" 31 #include "yat/classifier/Target.h"32 #include "yat/utility/iterator_traits.h"33 34 30 35 31 namespace theplu { 36 32 namespace yat { 37 namespace utility {38 class VectorBase;39 }40 33 namespace statistics { 41 34 … … 58 51 59 52 /** 60 \f$ \frac{\vert \sum_i(x_i-\bar{x})(y_i-\bar{y})\vert 61 }{\sqrt{\sum_i (x_i-\bar{x})^2\sum_i (x_i-\bar{x})^2}} \f$. 53 Adding a data value to PearsonCorrelation. 54 */ 55 void add(double value, bool target, double weight=1.0); 62 56 57 /** 58 \brief correlation 63 59 64 If ForwardIterator is weighted correlation is calculated as 65 \f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert } 66 {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}} 67 \f$, where \f$ m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$ 68 m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is 69 chosen to get a correlation equal to unity when \a x and \a y 70 are equal. 60 Correlation is calculated as implemented in AveragerPairWeighted 71 61 72 @return Pearson correlation, if absolute=true absolute value 73 of Pearson is used. 62 @return Pearson correlation. 74 63 */ 75 template<typename ForwardIterator> 76 double score(const classifier::Target& target, 77 ForwardIterator first, ForwardIterator last); 78 79 /** 80 \f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert } 81 {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}} 82 \f$, where \f$ m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$ 83 m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is 84 chosen to get a correlation equal to unity when \a x and \a y 85 are equal. 86 87 \return absolute value of weighted version of Pearson 88 correlation. 89 90 \note ietartors must be non-weighted 91 */ 92 template<typename ForwardIterator1, typename ForwardIterator2> 93 double score(const classifier::Target& target, 94 ForwardIterator1 first1, ForwardIterator1 last1, 95 ForwardIterator2 first2); 64 double score(void) const; 96 65 97 66 /** … … 100 69 correlation is zero (and the data is Gaussian). 101 70 102 @note This function can only be used together with the 103 unweighted score. 104 71 P-value is calculated using function pearson_p_value(double, 72 u_int) where degrees of freedom is calculated using n(void) in 73 AveragerPairWeighted. 74 105 75 @return one-sided p-value 106 76 */ … … 108 78 109 79 private: 110 double r_; 111 int nof_samples_; 112 113 template<typename ForwardIterator> 114 double score(const classifier::Target& target, 115 ForwardIterator first, ForwardIterator last, 116 utility::unweighted_iterator_tag); 117 118 template<typename ForwardIterator> 119 double score(const classifier::Target& target, 120 ForwardIterator first, ForwardIterator last, 121 utility::weighted_iterator_tag); 80 AveragerPairWeighted ap_; 122 81 123 82 }; 124 125 template<typename ForwardIterator>126 double PearsonCorrelation::score(const classifier::Target& target,127 ForwardIterator first,128 ForwardIterator last)129 {130 nof_samples_ = target.size();131 using utility::yat_assert;132 yat_assert<std::runtime_error>("PearsonCorrelation: sizes mismatch");133 r_ = score(target, first, last,134 utility::iterator_traits<ForwardIterator>::type());135 return r_;136 }137 138 139 template<typename ForwardIterator>140 double PearsonCorrelation::score(const classifier::Target& target,141 ForwardIterator first,142 ForwardIterator last,143 utility::unweighted_iterator_tag tag)144 145 {146 AveragerPair ap;147 for (size_t i=0; first!=last; ++first, ++i)148 ap.add(target.binary(i), *first);149 nof_samples_ = ap.n();150 return ap.correlation();151 }152 153 154 template<typename ForwardIterator>155 double PearsonCorrelation::score(const classifier::Target& target,156 ForwardIterator first,157 ForwardIterator last,158 utility::weighted_iterator_tag tag)159 160 {161 AveragerPairWeighted ap;162 for (size_t i=0; first!=last; ++first, ++i)163 ap.add(target.binary(i), first.data(), 1.0, first.weight());164 nof_samples_ = ap.n();165 return ap.correlation();166 }167 168 template<typename ForwardIterator1, typename ForwardIterator2>169 double PearsonCorrelation::score(const classifier::Target& target,170 ForwardIterator1 first1,171 ForwardIterator1 last1,172 ForwardIterator2 first2)173 {174 utility::check_iterator_is_unweighted(first1);175 utility::check_iterator_is_unweighted(first2);176 AveragerPairWeighted ap;177 for (size_t i=0; first1!=last1; ++first1, ++i, ++first2)178 ap.add(target.binary(i), *first1, 1.0, *first2);179 nof_samples_ = ap.n();180 r_ = ap.correlation();181 return r_;182 }183 83 184 84 }}} // of namespace statistics, yat, and theplu -
trunk/yat/statistics/ROC.h
r1141 r1145 150 150 }; 151 151 152 /**153 Add a range [first, last) of values to ROC. The first last-first154 elements in Target.binary are used.155 */156 template<typename ForwardIterator>157 void add(ROC& roc,158 ForwardIterator first, ForwardIterator last,159 const classifier::Target& target)160 {161 for (size_t i=0; first!=last; ++i, ++first)162 roc.add(utility::iterator_traits<ForwardIterator>().data(),163 target.binary(i),164 utility::iterator_traits<ForwardIterator>().weight());165 }166 152 }}} // of namespace statistics, yat, and theplu 167 153 -
trunk/yat/statistics/tTest.h
r1000 r1145 36 36 37 37 /// 38 /// @brief Class for Fisher's t-test.38 /// @brief Class for Student's t-test. 39 39 /// 40 40 /// See <a href="http://en.wikipedia.org/wiki/Student's_t-test"> -
trunk/yat/statistics/utility.cc
r1025 r1145 27 27 #include "utility.h" 28 28 29 #include <gsl/gsl_cdf.h> 29 30 #include <gsl/gsl_randist.h> 30 31 #include <gsl/gsl_statistics_double.h> 32 33 #include <cassert> 31 34 32 35 namespace theplu { … … 41 44 return p; 42 45 } 46 47 48 double pearson_p_value(double r, u_int n) 49 { 50 assert(n>=2); 51 if (n<2) 52 return std::numeric_limits<double>::quiet_NaN(); 53 return gsl_cdf_tdist_Q(r*sqrt((n-2)/(1-r*r)), n-2); 54 } 55 43 56 44 57 double kurtosis(const utility::VectorBase& v) -
trunk/yat/statistics/utility.h
r1039 r1145 52 52 53 53 /** 54 Adding each value in an array \a v \a to an object \a o. The 55 requirements for the type T1 is to have an add(double, bool) 56 function, and for T2 of the array \a v are: operator[] returning 57 an element and function size() returning the number of elements. 54 Adding a range [\a first, \a last) into an object of type T. The 55 requirements for the type T is to have an add(double, bool, double) 56 function. 58 57 */ 59 template <typename T1, typename T2> 60 void add(T1& o, const T2& v, const classifier::Target& target) 61 { 62 for (size_t i=0; i<v.size(); ++i) 63 o.add(v[i],target.binary(i)); 64 } 65 66 /** 67 Adding each value in an array \a v \a to an object \a o. The 68 requirements for the type T1 is to have an add(double, bool) 69 function, and for T2 of the array \a v are: operator[] returning 70 an element and function size() returning the number of elements. 71 */ 72 template <typename T1> 73 void add(T1& o, const classifier::DataLookupWeighted1D& v, 58 template <typename T, typename ForwardIterator> 59 void add(T& o, ForwardIterator first, ForwardIterator last, 74 60 const classifier::Target& target) 75 61 { 76 for (size_t i=0; i<v.size(); ++i) 77 o.add(v.data(i),target.binary(i),v.weight(i)); 62 for (size_t i=0; first!=last; ++i, ++first) 63 o.add(utility::iterator_traits<ForwardIterator>().data(first), 64 target.binary(i), 65 utility::iterator_traits<ForwardIterator>().weight(first)); 78 66 } 79 67 … … 91 79 double cdf_hypergeometric_P(u_int k, u_int n1, u_int n2, u_int t); 92 80 81 82 /** 83 \brief one-sided p-value 84 85 This function uses the t-distribution to calculate the one-sided 86 p-value. Given that the true correlation is zero (Null 87 hypothesis) the estimated correlation, r, after a transformation 88 is t-distributed: 89 90 \f$ \sqrt{(n-2)} \frac{r}{\sqrt{(1-r^2)}} \in t(n-2) \f$ 91 92 \return Probability that correlation is larger than \a r by 93 chance when having \a n samples. 94 */ 95 double pearson_p_value(double r, u_int n); 93 96 94 97 ///
Note: See TracChangeset
for help on using the changeset viewer.