# Changeset 1145 for trunk/yat/statistics

Ignore:
Timestamp:
Feb 25, 2008, 9:23:47 PM (13 years ago)
Message:

fixes #292

Location:
trunk/yat/statistics
Files:
8 edited

Unmodified
Removed
• ## trunk/yat/statistics/Histogram.h

 r1000 #include "AveragerWeighted.h" #include "yat/utility/iterator_traits.h" #include }; /** Add a range [first, last) of values to Histogram. */ template void add(Histogram& h, ForwardIterator first, ForwardIterator last) { while (first!=last) { h.add(utility::iterator_traits().data(), utility::iterator_traits().weight()); ++first; } } /// /// The Histogram output operator
• ## trunk/yat/statistics/KolmogorovSmirnov.h

 r1125 #include #include #include
• ## trunk/yat/statistics/PearsonCorrelation.cc

 r1139 #include "PearsonCorrelation.h" #include "Pearson.h" #include "AveragerPair.h" #include "AveragerPairWeighted.h" #include "yat/utility/VectorBase.h" #include "yat/classifier/DataLookupWeighted1D.h" #include "yat/classifier/Target.h" #include #include #include "utility.h" namespace theplu { PearsonCorrelation::PearsonCorrelation(void) : r_(0), nof_samples_(0) { } double PearsonCorrelation::p_value_one_sided() const { if(nof_samples_<=2) return 1; double t = sqrt(nof_samples_ - 2)*fabs(r_) /sqrt(1-r_*r_); return gsl_cdf_tdist_Q(t, nof_samples_ -2 ); return pearson_p_value(ap_.correlation(), static_cast(ap_.n())); }
• ## trunk/yat/statistics/PearsonCorrelation.h

 r1140 */ #include "AveragerPair.h" #include "AveragerPairWeighted.h" #include "yat/classifier/Target.h" #include "yat/utility/iterator_traits.h" namespace theplu { namespace yat { namespace utility { class VectorBase; } namespace statistics { /** \f$\frac{\vert \sum_i(x_i-\bar{x})(y_i-\bar{y})\vert }{\sqrt{\sum_i (x_i-\bar{x})^2\sum_i (x_i-\bar{x})^2}} \f$. Adding a data value to PearsonCorrelation. */ void add(double value, bool target, double weight=1.0); /** \brief correlation If ForwardIterator is weighted correlation is calculated as \f$\frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert } {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}} \f$, where \f$m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is chosen to get a correlation equal to unity when \a x and \a y are equal. Correlation is calculated as implemented in AveragerPairWeighted @return Pearson correlation, if absolute=true absolute value of Pearson is used. @return Pearson correlation. */ template double score(const classifier::Target& target, ForwardIterator first, ForwardIterator last); /** \f$\frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert } {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}} \f$, where \f$m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is chosen to get a correlation equal to unity when \a x and \a y are equal. \return absolute value of weighted version of Pearson correlation. \note ietartors must be non-weighted */ template double score(const classifier::Target& target, ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2); double score(void) const; /** correlation is zero (and the data is Gaussian). @note This function can only be used together with the unweighted score. P-value is calculated using function pearson_p_value(double, u_int) where degrees of freedom is calculated using n(void) in AveragerPairWeighted. @return one-sided p-value */ private: double r_; int nof_samples_; template double score(const classifier::Target& target, ForwardIterator first, ForwardIterator last, utility::unweighted_iterator_tag); template double score(const classifier::Target& target, ForwardIterator first, ForwardIterator last, utility::weighted_iterator_tag); AveragerPairWeighted ap_; }; template double PearsonCorrelation::score(const classifier::Target& target, ForwardIterator first, ForwardIterator last) { nof_samples_ = target.size(); using utility::yat_assert; yat_assert("PearsonCorrelation: sizes mismatch"); r_ = score(target, first, last, utility::iterator_traits::type()); return r_; } template double PearsonCorrelation::score(const classifier::Target& target, ForwardIterator first, ForwardIterator last, utility::unweighted_iterator_tag tag) { AveragerPair ap; for (size_t i=0; first!=last; ++first, ++i) ap.add(target.binary(i), *first); nof_samples_ = ap.n(); return ap.correlation(); } template double PearsonCorrelation::score(const classifier::Target& target, ForwardIterator first, ForwardIterator last, utility::weighted_iterator_tag tag) { AveragerPairWeighted ap; for (size_t i=0; first!=last; ++first, ++i) ap.add(target.binary(i), first.data(), 1.0, first.weight()); nof_samples_ = ap.n(); return ap.correlation(); } template double PearsonCorrelation::score(const classifier::Target& target, ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2) { utility::check_iterator_is_unweighted(first1); utility::check_iterator_is_unweighted(first2); AveragerPairWeighted ap; for (size_t i=0; first1!=last1; ++first1, ++i, ++first2) ap.add(target.binary(i), *first1, 1.0, *first2); nof_samples_ = ap.n(); r_ = ap.correlation(); return r_; } }}} // of namespace statistics, yat, and theplu
• ## trunk/yat/statistics/ROC.h

 r1141 }; /** Add a range [first, last) of values to ROC. The first last-first elements in Target.binary are used. */ template void add(ROC& roc, ForwardIterator first, ForwardIterator last, const classifier::Target& target) { for (size_t i=0; first!=last; ++i, ++first) roc.add(utility::iterator_traits().data(), target.binary(i), utility::iterator_traits().weight()); } }}} // of namespace statistics, yat, and theplu
• ## trunk/yat/statistics/tTest.h

 r1000 /// /// @brief Class for Fisher's t-test. /// @brief Class for Student's t-test. /// /// See
• ## trunk/yat/statistics/utility.cc

 r1025 #include "utility.h" #include #include #include #include namespace theplu { return p; } double pearson_p_value(double r, u_int n) { assert(n>=2); if (n<2) return std::numeric_limits::quiet_NaN(); return gsl_cdf_tdist_Q(r*sqrt((n-2)/(1-r*r)), n-2); } double kurtosis(const utility::VectorBase& v)
• ## trunk/yat/statistics/utility.h

 r1039 /** Adding each value in an array \a v \a to an object \a o.  The requirements for the type T1 is to have an add(double, bool) function, and for T2 of the array \a v are: operator[] returning an element and function size() returning the number of elements. Adding a range [\a first, \a last) into an object of type T. The requirements for the type T is to have an add(double, bool, double) function. */ template void add(T1& o, const T2& v, const classifier::Target& target) { for (size_t i=0; i void add(T1& o, const classifier::DataLookupWeighted1D& v, template void add(T& o, ForwardIterator first, ForwardIterator last, const classifier::Target& target) { for (size_t i=0; i().data(first), target.binary(i), utility::iterator_traits().weight(first)); } double cdf_hypergeometric_P(u_int k, u_int n1, u_int n2, u_int t); /** \brief one-sided p-value This function uses the t-distribution to calculate the one-sided p-value. Given that the true correlation is zero (Null hypothesis) the estimated correlation, r, after a transformation is t-distributed: \f$\sqrt{(n-2)} \frac{r}{\sqrt{(1-r^2)}} \in t(n-2) \f$ \return Probability that correlation is larger than \a r by chance when having \a n samples. */ double pearson_p_value(double r, u_int n); ///
Note: See TracChangeset for help on using the changeset viewer.