Changeset 779
- Timestamp:
- Mar 5, 2007, 7:58:30 PM (17 years ago)
- Location:
- trunk
- Files:
-
- 1 added
- 27 edited
- 4 copied
- 2 moved
Legend:
- Unmodified
- Added
- Removed
-
trunk/test/Makefile.am
r723 r779 7 7 # Copyright (C) 2005 Jari Häkkinen, Peter Johansson 8 8 # Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér 9 # Copyright (C) 2007 Peter Johansson 9 10 # 10 11 # This file is part of the yat library, http://lev.thep.lu.se/trac/yat … … 29 30 ensemble_test feature_selection_test fileutil_test inputranker_test \ 30 31 kernel_test kernel_lookup_test matrix_test matrix_lookup_test \ 31 ncc_test nni_test pca_test regression_test rnd_test score_test \ 32 ncc_test nni_test pca_test regression_test rnd_test roc_test \ 33 score_test \ 32 34 statistics_test subset_generator_test svd_test svm_test target_test \ 33 35 utility_test vector_test … … 60 62 regression_test_SOURCES = regression_test.cc 61 63 rnd_test_SOURCES = rnd_test.cc 64 roc_test_SOURCES = roc_test.cc 62 65 score_test_SOURCES = score_test.cc 63 66 statistics_test_SOURCES = statistics_test.cc -
trunk/test/consensus_inputranker_test.cc
r680 r779 23 23 24 24 #include "yat/classifier/ConsensusInputRanker.h" 25 #include "yat/statistics/ROC .h"25 #include "yat/statistics/ROCscore.h" 26 26 #include "yat/utility/matrix.h" 27 27 #include "yat/classifier/MatrixLookup.h" … … 59 59 60 60 61 theplu::yat::statistics::ROC roc;61 theplu::yat::statistics::ROCscore roc; 62 62 theplu::yat::classifier::CrossValidationSampler sampler(target,30,3); 63 63 *error << "Building Consensus_Inputranker" << std::endl; -
trunk/test/feature_selection_test.cc
r680 r779 24 24 #include "yat/classifier/FeatureSelectorIR.h" 25 25 #include "yat/classifier/FeatureSelectorRandom.h" 26 #include "yat/statistics/ROC .h"26 #include "yat/statistics/ROCscore.h" 27 27 28 28 #include <fstream> … … 45 45 bool ok = true; 46 46 47 statistics::ROC roc;47 statistics::ROCscore roc; 48 48 classifier::FeatureSelectorIR f(roc, 12); 49 49 classifier::FeatureSelectorRandom f2(12); -
trunk/test/inputranker_test.cc
r680 r779 23 23 24 24 #include "yat/classifier/InputRanker.h" 25 #include "yat/statistics/ROC .h"25 #include "yat/statistics/ROCscore.h" 26 26 #include "yat/utility/matrix.h" 27 27 #include "yat/classifier/MatrixLookup.h" … … 57 57 is.close(); 58 58 59 statistics::ROC roc;59 statistics::ROCscore roc; 60 60 classifier::InputRanker ir(data,target,roc); 61 61 if (ir.id()[0]!=2 || ir.id()[1]!=0 || ir.id()[2]!=1){ -
trunk/test/score_test.cc
r747 r779 25 25 #include "yat/statistics/FoldChange.h" 26 26 #include "yat/statistics/Pearson.h" 27 #include "yat/statistics/ROC .h"28 #include "yat/statistics/SAM .h"27 #include "yat/statistics/ROCscore.h" 28 #include "yat/statistics/SAMScore.h" 29 29 #include "yat/statistics/tScore.h" 30 30 #include "yat/statistics/WilcoxonFoldChange.h" … … 52 52 bool ok = true; 53 53 54 *error << "testing ROC " << std::endl;54 *error << "testing ROCscore" << std::endl; 55 55 utility::vector value(31); 56 56 std::vector<std::string> label(31,"negative"); … … 60 60 for (size_t i=0; i<value.size(); i++) 61 61 value(i)=i; 62 statistics::ROC roc;62 statistics::ROCscore roc; 63 63 double area = roc.score(target, value); 64 64 if (area!=1.0){ … … 73 73 *error << "test_roc: area is " << area << " should be 1.0" 74 74 << std::endl; 75 ok = false;76 }77 78 double p = roc.p_value();79 double p_matlab = 0.00000115;80 if (p/p_matlab > 1.01 | p/p_matlab < 0.99){81 *error << "get_p_approx: p-value not correct" << std::endl;82 ok = false;83 }84 roc.minimum_size() = 20;85 p = roc.p_value();86 if (p > pow(10, -8.0) | p < pow(10, -9.0)){87 *error << "get_p_exact: p-value not correct" << std::endl;88 75 ok = false; 89 76 } … … 141 128 statistics::WilcoxonFoldChange wfc(true); 142 129 143 *error << "testing SAM " << std::endl;144 statistics::SAM sam(1.0,true);130 *error << "testing SAMScore" << std::endl; 131 statistics::SAMScore sam(1.0,true); 145 132 146 133 -
trunk/test/subset_generator_test.cc
r722 r779 32 32 #include "yat/classifier/SVM.h" 33 33 #include "yat/classifier/NCC.h" 34 #include "yat/statistics/ROC .h"34 #include "yat/statistics/ROCscore.h" 35 35 #include "yat/statistics/PearsonDistance.h" 36 36 #include "yat/utility/matrix.h" … … 78 78 classifier::CrossValidationSampler sampler(target, 30, 3); 79 79 80 statistics::ROC score;80 statistics::ROCscore score; 81 81 classifier::FeatureSelectorIR fs(score, 96, 0); 82 82 *error << "building SubsetGenerator" << std::endl; … … 96 96 for (size_t i = 0; i<out.size(); ++i) 97 97 out(i)=ensemble_svm.validate()[0][i].mean(); 98 statistics::ROC roc;98 statistics::ROCscore roc; 99 99 *error << roc.score(target,out) << std::endl; 100 100 -
trunk/yat/classifier/DataLookupWeighted1D.cc
r720 r779 78 78 79 79 80 double sum_weight(const DataLookupWeighted1D& x) 81 { 82 double r=0; 83 for (size_t i=0; i<x.size(); ++i) 84 r += x.weight(i); 85 return r; 86 } 87 88 80 89 double DataLookupWeighted1D::weight(const size_t i) const 81 90 { -
trunk/yat/classifier/DataLookupWeighted1D.h
r767 r779 105 105 }; 106 106 107 /// 108 /// @return sum of weights 109 /// 110 double sum_weight(const DataLookupWeighted1D&); 111 107 112 }}} // of namespace classifier, yat, and theplu 108 113 -
trunk/yat/classifier/InputRanker.cc
r720 r779 28 28 #include "DataLookupWeighted1D.h" 29 29 #include "Target.h" 30 #include "yat/statistics/ ROC.h"30 #include "yat/statistics/Score.h" 31 31 #include "yat/utility/stl_utility.h" 32 32 -
trunk/yat/classifier/utility.cc
r680 r779 24 24 #include "utility.h" 25 25 #include "DataLookup1D.h" 26 #include "DataLookupWeighted1D.h" 26 27 #include "yat/utility/vector.h" 27 28 … … 38 39 } 39 40 41 void convert(const DataLookupWeighted1D& lookup, utility::vector& value, 42 utility::vector& weight) 43 { 44 45 value=utility::vector(lookup.size()); 46 weight=utility::vector(lookup.size()); 47 for(u_int i=0; i<lookup.size(); i++){ 48 value(i)=lookup.data(i); 49 weight(i)=lookup.weight(i); 50 } 51 } 52 40 53 }}} // of namespace classifier, yat, and theplu -
trunk/yat/classifier/utility.h
r680 r779 35 35 36 36 class DataLookup1D; 37 class DataLookupWeighted1D; 37 38 38 39 /// … … 41 42 void convert(const DataLookup1D&, utility::vector&); 42 43 44 /// 45 /// Converts a DataLookupWeighted1D to two utility::vector 46 /// 47 void convert(const DataLookupWeighted1D&, utility::vector& value, 48 utility::vector& weight); 49 43 50 }}} // of namespace classifier, yat, and theplu 44 51 -
trunk/yat/statistics/FoldChange.cc
r747 r779 39 39 } 40 40 41 FoldChange::FoldChange(const FoldChange& other)42 : Score(other)41 double FoldChange::score(const classifier::Target& target, 42 const utility::vector& value) const 43 43 { 44 }45 46 FoldChange& FoldChange::operator=(const FoldChange& other)47 {48 Score::operator=(other);49 return *this;50 }51 52 double FoldChange::score(const classifier::Target& target,53 const utility::vector& value)54 {55 weighted_=false;56 44 Averager pos; 57 45 Averager neg; … … 69 57 70 58 double FoldChange::score(const classifier::Target& target, 71 const classifier::DataLookupWeighted1D& value) 59 const classifier::DataLookupWeighted1D& value) const 72 60 { 73 weighted_=true;74 61 AveragerWeighted pos; 75 62 AveragerWeighted neg; … … 88 75 double FoldChange::score(const classifier::Target& target, 89 76 const utility::vector& value, 90 const utility::vector& weight) 77 const utility::vector& weight) const 91 78 { 92 weighted_=true;93 79 AveragerWeighted pos; 94 80 AveragerWeighted neg; -
trunk/yat/statistics/FoldChange.h
r767 r779 53 53 /// 54 54 double score(const classifier::Target& target, 55 const utility::vector& value) ;55 const utility::vector& value) const; 56 56 57 57 /// … … 62 62 /// 63 63 double score(const classifier::Target& target, 64 const classifier::DataLookupWeighted1D& value) ;64 const classifier::DataLookupWeighted1D& value) const; 65 65 66 66 /// … … 73 73 double score(const classifier::Target& target, 74 74 const utility::vector& value, 75 const utility::vector& weight) ;75 const utility::vector& weight) const; 76 76 77 77 private: -
trunk/yat/statistics/Makefile.am
r778 r779 27 27 AveragerWeighted.cc AveragerPairWeighted.cc Distance.cc \ 28 28 Euclidean.cc Fisher.cc FoldChange.cc Histogram.cc Pearson.cc \ 29 Pearson Distance.cc ROC.cc ROCscore.cc \30 SAM .cc Score.cc SNR.cc tScore.cc\29 PearsonCorrelation.cc PearsonDistance.cc ROC.cc ROCscore.cc \ 30 SAMScore.cc Score.cc SNR.cc tScore.cc tTest.cc \ 31 31 utility.cc WilcoxonFoldChange.cc 32 32 … … 36 36 AveragerWeighted.h AveragerPairWeighted.h Distance.h Euclidean.h \ 37 37 Fisher.h \ 38 FoldChange.h Histogram.h Pearson.h PearsonDistance.h ROC.h ROCscore.h\ 39 SAM.h Score.h SNR.h tScore.h \ 38 FoldChange.h Histogram.h Pearson.h PearsonCorrelation.h \ 39 PearsonDistance.h ROC.h ROCscore.h \ 40 SAMScore.h Score.h SNR.h tScore.h tTest.h \ 40 41 utility.h WilcoxonFoldChange.h -
trunk/yat/statistics/Pearson.cc
r703 r779 30 30 31 31 #include <cmath> 32 #include <gsl/gsl_cdf.h>33 32 34 33 namespace theplu { … … 37 36 38 37 Pearson::Pearson(bool b) 39 : Score(b) , r_(0), nof_samples_(0)38 : Score(b) 40 39 { 41 40 } … … 45 44 } 46 45 47 double Pearson::p_value() const 46 double Pearson::score(const classifier::Target& target, 47 const utility::vector& value) const 48 48 { 49 if(weighted_)50 return 1;51 if(nof_samples_<2){52 std::cerr << "Warning: Only " << nof_samples_ << "samples. "53 << "Need at lest 3.\n";54 return 1;55 }56 57 double t = sqrt(nof_samples_ - 2)*fabs(r_) /sqrt(1-r_*r_);58 double p = gsl_cdf_tdist_Q(t, nof_samples_ -2 );59 if (absolute_)60 return 2*p;61 if (r_<0)62 return 1-p;63 return p;64 65 }66 67 double Pearson::score(const classifier::Target& target,68 const utility::vector& value)69 {70 weighted_=false;71 49 AveragerPair ap; 72 50 for (size_t i=0; i<target.size(); i++){ … … 75 53 else 76 54 ap.add(-1, value(i)); 77 nof_samples_ = target.size();78 55 } 79 r_ = ap.correlation(); 80 if (r_<0 && absolute_) 81 return -r_; 82 83 return r_; 56 double r = ap.correlation(); 57 if (r<0 && absolute_) 58 return -r; 59 return r; 84 60 } 85 61 86 62 double Pearson::score(const classifier::Target& target, 87 const classifier::DataLookupWeighted1D& value) 63 const classifier::DataLookupWeighted1D& value) const 88 64 { 89 weighted_=true;90 65 AveragerPairWeighted ap; 91 66 for (size_t i=0; i<target.size(); i++){ … … 94 69 else 95 70 ap.add(-1, value.data(i),1,value.weight(i)); 96 nof_samples_ = target.size();97 71 } 98 r_ = ap.correlation(); 99 if (r_<0 && absolute_) 100 return -r_; 101 102 return r_; 72 double r = ap.correlation(); 73 if (r<0 && absolute_) 74 return -r; 75 return r; 103 76 } 104 77 105 78 double Pearson::score(const classifier::Target& target, 106 79 const utility::vector& value, 107 const utility::vector& weight) 80 const utility::vector& weight) const 108 81 { 109 weighted_=true;110 82 AveragerPairWeighted ap; 111 83 for (size_t i=0; i<target.size(); i++){ … … 114 86 else 115 87 ap.add(-1, value(i),1,weight(i)); 116 nof_samples_ = target.size(); 117 } 118 r_ = ap.correlation(); 119 if (r_<0 && absolute_) 120 return -r_; 121 122 return r_; 88 } 89 double r = ap.correlation(); 90 if (r<0 && absolute_) 91 return -r; 92 return r; 123 93 } 124 94 -
trunk/yat/statistics/Pearson.h
r767 r779 32 32 class vector; 33 33 } 34 namespace classifier {35 class VectorAbstract;36 }37 34 namespace statistics { 38 35 … … 55 52 56 53 57 / //58 ///\f$ \frac{\vert \sum_i(x_i-\bar{x})(y_i-\bar{y})\vert59 ///}{\sqrt{\sum_i (x_i-\bar{x})^2\sum_i (x_i-\bar{x})^2}} \f$.60 ///@return Pearson correlation, if absolute=true absolute value61 ///of Pearson is used.62 ///54 /** 55 \f$ \frac{\vert \sum_i(x_i-\bar{x})(y_i-\bar{y})\vert 56 }{\sqrt{\sum_i (x_i-\bar{x})^2\sum_i (x_i-\bar{x})^2}} \f$. 57 @return Pearson correlation, if absolute=true absolute value 58 of Pearson is used. 59 */ 63 60 double score(const classifier::Target& target, 64 const utility::vector& value) ;61 const utility::vector& value) const; 65 62 66 / //67 ///\f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert }68 ///{\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}}69 ///\f$, where \f$ m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$70 ///m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is71 ///chosen to get a correlation equal to unity when \a x and \a y72 ///are equal. @return absolute value of weighted version of73 ///Pearson correlation.74 ///63 /** 64 \f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert } 65 {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}} 66 \f$, where \f$ m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$ 67 m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is 68 chosen to get a correlation equal to unity when \a x and \a y 69 are equal. @return absolute value of weighted version of 70 Pearson correlation. 71 */ 75 72 double score(const classifier::Target& target, 76 const classifier::DataLookupWeighted1D& value) ;73 const classifier::DataLookupWeighted1D& value) const; 77 74 78 / //79 ///\f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert }80 ///{\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}}81 ///\f$, where \f$ m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$82 ///m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is83 ///chosen to get a correlation equal to unity when \a x and \a y84 ///are equal. @return absolute value of weighted version of85 ///Pearson correlation.86 ///75 /** 76 \f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert } 77 {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}} 78 \f$, where \f$ m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$ 79 m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is 80 chosen to get a correlation equal to unity when \a x and \a y 81 are equal. @return absolute value of weighted version of 82 Pearson correlation. 83 */ 87 84 double score(const classifier::Target& target, 88 85 const utility::vector& value, 89 const utility::vector& weight) ;86 const utility::vector& weight) const; 90 87 91 ///92 /// The p-value is the probability of getting a correlation as93 /// large as the observed value by random chance, when the true94 /// correlation is zero (and the data is Gaussian). Note that this95 /// function can only be used together with the unweighted96 /// score. @return two-sided p-value97 ///98 double p_value() const;99 100 private:101 double r_;102 int nof_samples_;103 104 // void centralize(utility::vector&, const utility::vector&);105 88 }; 106 89 -
trunk/yat/statistics/PearsonCorrelation.cc
r776 r779 22 22 */ 23 23 24 #include "PearsonCorrelation.h" 24 25 #include "Pearson.h" 25 26 #include "AveragerPair.h" … … 36 37 namespace statistics { 37 38 38 Pearson ::Pearson(bool b)39 : Score(b),r_(0), nof_samples_(0)39 PearsonCorrelation::PearsonCorrelation(void) 40 : r_(0), nof_samples_(0) 40 41 { 41 42 } 42 43 43 Pearson ::~Pearson(void)44 PearsonCorrelation::~PearsonCorrelation(void) 44 45 { 45 46 } 46 47 47 double Pearson ::p_value() const48 double PearsonCorrelation::p_value_one_sided() const 48 49 { 49 if(weighted_)50 return 1;51 50 if(nof_samples_<2){ 52 51 std::cerr << "Warning: Only " << nof_samples_ << "samples. " … … 56 55 57 56 double t = sqrt(nof_samples_ - 2)*fabs(r_) /sqrt(1-r_*r_); 58 double p = gsl_cdf_tdist_Q(t, nof_samples_ -2 ); 59 if (absolute_) 60 return 2*p; 61 if (r_<0) 62 return 1-p; 63 return p; 64 57 return gsl_cdf_tdist_Q(t, nof_samples_ -2 ); 65 58 } 66 59 67 double Pearson ::score(const classifier::Target& target,68 const utility::vector& value)60 double PearsonCorrelation::score(const classifier::Target& target, 61 const utility::vector& value) 69 62 { 70 weighted_=false; 71 AveragerPair ap; 72 for (size_t i=0; i<target.size(); i++){ 73 if (target.binary(i)) 74 ap.add(1, value(i)); 75 else 76 ap.add(-1, value(i)); 77 nof_samples_ = target.size(); 78 } 79 r_ = ap.correlation(); 80 if (r_<0 && absolute_) 81 return -r_; 82 83 return r_; 63 nof_samples_ = value.size(); 64 Pearson pearson; 65 return pearson.score(target,value); 84 66 } 85 67 86 double Pearson::score(const classifier::Target& target, 87 const classifier::DataLookupWeighted1D& value) 68 double 69 PearsonCorrelation::score(const classifier::Target& target, 70 const classifier::DataLookupWeighted1D& value) 88 71 { 89 weighted_=true; 90 AveragerPairWeighted ap; 91 for (size_t i=0; i<target.size(); i++){ 92 if (target.binary(i)) 93 ap.add(1, value.data(i),1,value.weight(i)); 94 else 95 ap.add(-1, value.data(i),1,value.weight(i)); 96 nof_samples_ = target.size(); 97 } 98 r_ = ap.correlation(); 99 if (r_<0 && absolute_) 100 return -r_; 101 102 return r_; 72 // Peter what should nof_samples be in weighted case? 73 nof_samples_=0; 74 Pearson pearson; 75 return pearson.score(target,value); 103 76 } 104 77 105 double Pearson ::score(const classifier::Target& target,106 const utility::vector& value,107 const utility::vector& weight)78 double PearsonCorrelation::score(const classifier::Target& target, 79 const utility::vector& value, 80 const utility::vector& weight) 108 81 { 109 weighted_=true; 110 AveragerPairWeighted ap; 111 for (size_t i=0; i<target.size(); i++){ 112 if (target.binary(i)) 113 ap.add(1, value(i),1,weight(i)); 114 else 115 ap.add(-1, value(i),1,weight(i)); 116 nof_samples_ = target.size(); 117 } 118 r_ = ap.correlation(); 119 if (r_<0 && absolute_) 120 return -r_; 121 122 return r_; 82 // Peter what should nof_samples be in weighted case? 83 nof_samples_ = 0; 84 Pearson pearson; 85 return pearson.score(target,value); 123 86 } 124 87 -
trunk/yat/statistics/PearsonCorrelation.h
r776 r779 1 #ifndef _theplu_yat_statistics_pearson_ 2 #define _theplu_yat_statistics_pearson_ 1 #ifndef _theplu_yat_statistics_pearson_correlation_ 2 #define _theplu_yat_statistics_pearson_correlation_ 3 3 4 4 // $Id$ … … 41 41 /// 42 42 43 class Pearson : public Score43 class PearsonCorrelation 44 44 { 45 45 public: … … 47 47 /// @brief The default constructor. 48 48 /// 49 Pearson (bool absolute=true);49 PearsonCorrelation(void); 50 50 51 51 /// 52 52 /// @brief The destructor. 53 53 /// 54 virtual ~Pearson (void);54 virtual ~PearsonCorrelation(void); 55 55 56 56 … … 91 91 /// 92 92 /// The p-value is the probability of getting a correlation as 93 /// large as the observed value by random chance, when the true 94 /// correlation is zero (and the data is Gaussian). Note that this 95 /// function can only be used together with the unweighted 96 /// score. @return two-sided p-value 93 /// large (or larger) as the observed value by random chance, when the true 94 /// correlation is zero (and the data is Gaussian). 95 /// 96 /// @Note This function can only be used together with the 97 /// unweighted score. 97 98 /// 98 double p_value() const; 99 99 /// @return one-sided p-value 100 /// 101 double p_value_one_sided() const; 102 100 103 private: 101 104 double r_; -
trunk/yat/statistics/ROC.cc
r747 r779 38 38 namespace statistics { 39 39 40 ROC::ROC( bool b)41 : Score(b),area_(0.5), minimum_size_(10), nof_pos_(0)40 ROC::ROC(void) 41 : area_(0.5), minimum_size_(10), nof_pos_(0) 42 42 { 43 43 } … … 133 133 (nof_pos_*(n()-nof_pos_)) ); 134 134 135 //Returning score larger 0.5 that you get by random136 if (area_<0.5 && absolute_)137 area_=1.0-area_;138 139 135 return area_; 140 136 } … … 171 167 area_/=max_area; 172 168 173 if (area_<0.5 && absolute_)174 area_=1.0-area_;175 176 169 return area_; 177 170 } … … 208 201 209 202 area_/=max_area; 210 211 if (area_<0.5 && absolute_)212 area_=1.0-area_;213 203 214 204 return area_; -
trunk/yat/statistics/ROC.h
r767 r779 25 25 */ 26 26 27 #include "Score.h"28 29 27 #include <utility> 30 28 #include <vector> … … 33 31 namespace yat { 34 32 namespace classifier { 33 class DataLookup1D; 34 class DataLookupWeighted1D; 35 35 class Target; 36 36 } … … 47 47 /// U-test (aka Wilcoxon). 48 48 /// 49 class ROC : public Score49 class ROC 50 50 { 51 51 … … 54 54 /// @brief Default constructor 55 55 /// 56 ROC( bool absolute=true);56 ROC(void); 57 57 58 58 /// … … 155 155 u_int nof_pos_; 156 156 std::vector<std::pair<bool, double> > vec_pair_; // class-value-pair 157 bool weighted_; 157 158 }; 158 159 -
trunk/yat/statistics/ROCscore.cc
r778 r779 28 28 #include "yat/utility/vector.h" 29 29 30 #include <cassert> 30 31 #include <cmath> 31 32 #include <utility> 32 #include < vector>33 #include <map> 33 34 34 35 namespace theplu { … … 36 37 namespace statistics { 37 38 39 ROCscore::ROCscore(bool absolute) 40 : Score(absolute) 41 { 42 } 43 38 44 double ROCscore::score(const classifier::Target& target, 39 const utility::vector& value) 45 const utility::vector& value) const 40 46 { 41 47 assert(target.size()==value.size()); 42 weighted_=false; 48 // key data, pair<target, weight> 49 std::multimap<double, std::pair<bool, double> > m; 50 for (unsigned int i=0; i<target.size(); i++) 51 m.insert(std::make_pair(value(i), 52 std::make_pair(target.binary(i),1.0))); 53 54 return score(m); 55 } 43 56 44 std::vector<std::pair<bool, double> > class_value; // class-value-pair45 class_value.reserve(target.size());46 for (size_t i=0; i<target.size(); i++)47 class_value.push_back(std::make_pair(target.binary(i),value(i)));48 57 49 std::sort(class_value.begin(),class_value.end(), 50 utility::pair_value_compare<bool, double>()); 51 double area = 0; 52 u_int nof_pos =0; 53 for (size_t i=0; i<class_value.size(); i++){ 54 if (class_value[i].first){ 55 area+=i; 56 ++nof_pos; 58 59 double ROCscore::score(const classifier::Target& target, 60 const classifier::DataLookupWeighted1D& value) const 61 { 62 assert(target.size()==value.size()); 63 // key data, pair<target, weight> 64 std::multimap<double, std::pair<bool, double> > m; 65 for (unsigned int i=0; i<target.size(); i++) 66 if (value.weight(i)) 67 m.insert(std::make_pair(value.data(i), 68 std::make_pair(target.binary(i), 69 value.weight(i)))); 70 71 return score(m); 72 } 73 74 75 double ROCscore::score(const classifier::Target& target, 76 const utility::vector& value, 77 const utility::vector& weight) const 78 { 79 assert(target.size()==value.size()); 80 assert(target.size()==weight.size()); 81 // key data, pair<target, weight> 82 std::multimap<double, std::pair<bool, double> > m; 83 for (unsigned int i=0; i<target.size(); i++) 84 if (weight(i)) 85 m.insert(std::make_pair(value(i), 86 std::make_pair(target.binary(i), weight(i)))); 87 88 return score(m); 89 } 90 91 92 double 93 ROCscore::score(const MultiMap& m) const 94 { 95 double area=0; 96 double cumsum_pos_w=0; 97 double cumsum_neg_w=0; 98 typedef MultiMap::const_iterator iter; 99 100 for (iter i=m.begin(); i!=m.end(); ++i) 101 if (i->second.first){ 102 area += i->second.second * cumsum_neg_w; 103 cumsum_pos_w += i->second.second; 57 104 } 58 } 105 else 106 cumsum_neg_w += i->second.second; 59 107 60 // Normalizing the area to [0,1] 61 area = ( (area-nof_pos*(nof_pos-1)/2 ) / 62 (nof_pos*(class_value.size()-nof_pos)) ); 108 // max area is cumsum_neg_w * cumsum_pos_w 109 area/=(cumsum_neg_w*cumsum_pos_w); 63 110 64 111 if (area<0.5 && absolute_) 65 112 return 1.0-area; 66 113 return area; 67 } 68 69 70 // Peter, should be possible to do this in NlogN 71 double ROCscore::score(const classifier::Target& target, 72 const classifier::DataLookupWeighted1D& value) 73 { 74 75 std::vector<std::pair<bool, double> > class_value; // class-value-pair 76 class_value.clear(); 77 class_value.reserve(target.size()); 78 for (unsigned int i=0; i<target.size(); i++) 79 if (value.weight(i)) 80 class_value.push_back(std::make_pair(target.binary(i),value.data(i))); 81 82 std::sort(class_value.begin(),class_value.end(), 83 utility::pair_value_compare<int, double>()); 84 85 double area=0; 86 double max_area=0; 87 88 for (size_t i=0; i<n(); i++) 89 if (target.binary(i)) 90 for (size_t j=0; j<n(); j++) 91 if (!target.binary(j)){ 92 if (value.data(i)>value.data(j)) 93 area+=value.weight(i)*value.weight(j); 94 max_area+=value.weight(i)*value.weight(j); 95 } 96 97 area/=max_area; 98 99 if (area<0.5 && absolute_) 100 return 1.0-area; 101 return area; 102 } 103 104 105 // Peter, should be possible to do this in NlogN 106 double ROCscore::score(const classifier::Target& target, 107 const utility::vector& value, 108 const utility::vector& weight) 109 { 110 weighted_=true; 111 112 113 std::vector<std::pair<bool, double> > class_value; // class-value-pair 114 class_value.reserve(target.size()); 115 for (unsigned int i=0; i<target.size(); i++) 116 if (weight(i)) 117 class_value.push_back(std::make_pair(target.binary(i),value(i))); 118 119 std::sort(class_value.begin(),class_value.end(), 120 utility::pair_value_compare<int, double>()); 121 122 double area=0; 123 double max_area=0; 124 125 for (size_t i=0; i<n(); i++) 126 if (target.binary(i)) 127 for (size_t j=0; j<n(); j++) 128 if (!target.binary(j)){ 129 if (value(i)>value(j)) 130 area+=weight(i)*weight(j); 131 max_area+=weight(i)*weight(j); 132 } 133 134 area/=max_area; 135 136 if (area<0.5 && absolute_) 137 return 1.0-area; 138 return area; 139 } 114 } 140 115 141 116 }}} // of namespace statistics, yat, and theplu -
trunk/yat/statistics/ROCscore.h
r778 r779 26 26 27 27 #include "Score.h" 28 #include "yat/utility/stl_utility.h" 28 29 29 #include <cctype> 30 #include <utility> 31 #include <map> 30 32 31 33 namespace theplu { … … 47 49 public: 48 50 /// 49 /// @return number of samples50 51 /// 51 size_t n(void) const;52 53 52 /// 54 /// @return number of positive samples (Target.binary()==true) 55 /// 56 size_t n_pos(void) const; 53 ROCscore(bool absolute=true); 57 54 58 55 /// Function taking \a value, \a target (+1 or -1) and vector … … 64 61 /// 65 62 double score(const classifier::Target& target, 66 const utility::vector& value) ;63 const utility::vector& value) const; 67 64 68 65 /** … … 79 76 */ 80 77 double score(const classifier::Target& target, 81 const classifier::DataLookupWeighted1D& value) ;78 const classifier::DataLookupWeighted1D& value) const; 82 79 83 80 /** … … 95 92 double score(const classifier::Target& target, 96 93 const utility::vector& value, 97 const utility::vector& weight) ;94 const utility::vector& weight) const; 98 95 99 96 private: 97 typedef std::multimap<double, std::pair<bool, double> > MultiMap; 98 double score(const MultiMap&) const; 100 99 101 100 }; -
trunk/yat/statistics/SAMScore.cc
r776 r779 22 22 */ 23 23 24 #include "SAM .h"24 #include "SAMScore.h" 25 25 #include "Averager.h" 26 26 #include "AveragerWeighted.h" … … 35 35 namespace statistics { 36 36 37 SAM ::SAM(const double s0, bool b)37 SAMScore::SAMScore(const double s0, bool b) 38 38 : Score(b), s0_(s0) 39 39 { 40 40 } 41 41 42 double SAM ::score(const classifier::Target& target,43 const utility::vector& value)42 double SAMScore::score(const classifier::Target& target, 43 const utility::vector& value) const 44 44 { 45 weighted_=false;46 45 statistics::Averager positive; 47 46 statistics::Averager negative; … … 52 51 negative.add(value(i)); 53 52 } 54 if(positive.n()+negative.n()<=2) 55 return 0; 56 double diff = positive.mean() - negative.mean(); 57 double s2 = ( (1.0/positive.n()+1.0/negative.n()) * 58 (positive.sum_xx_centered()+negative.sum_xx_centered()) / 59 (positive.n()+negative.n()-2) ); 60 if (diff<0 && absolute_) 61 return -diff/(sqrt(s2)+s0_); 62 return diff/(sqrt(s2)+s0_); 53 return score(positive, negative); 63 54 } 64 55 65 double SAM ::score(const classifier::Target& target,66 const classifier::DataLookupWeighted1D& value)56 double SAMScore::score(const classifier::Target& target, 57 const classifier::DataLookupWeighted1D& value) const 67 58 { 68 weighted_=true;69 59 statistics::AveragerWeighted positive; 70 60 statistics::AveragerWeighted negative; … … 75 65 negative.add(value.data(i),value.weight(i)); 76 66 } 77 if(positive.n()+negative.n()<=2) 78 return 0; 79 double diff = positive.mean() - negative.mean(); 80 double s2 = ( (1.0/positive.n()+1.0/negative.n()) * 81 (positive.sum_xx_centered()+negative.sum_xx_centered()) / 82 (positive.n()+negative.n()-2) ); 83 if (diff<0 && absolute_) 84 return -diff/(sqrt(s2)+s0_); 85 return diff/(sqrt(s2)+s0_); 67 return score(positive, negative); 86 68 } 87 69 88 70 89 71 90 double SAM ::score(const classifier::Target& target,91 const utility::vector& value,92 const utility::vector& weight)72 double SAMScore::score(const classifier::Target& target, 73 const utility::vector& value, 74 const utility::vector& weight) const 93 75 { 94 weighted_=true;95 76 statistics::AveragerWeighted positive; 96 77 statistics::AveragerWeighted negative; … … 101 82 negative.add(value(i),weight(i)); 102 83 } 103 if(positive.n()+negative.n()<=2) 104 return 0; 105 double diff = positive.mean() - negative.mean(); 106 double s2 = ( (1.0/positive.n()+1.0/negative.n()) * 107 (positive.sum_xx_centered()+negative.sum_xx_centered()) / 108 (positive.n()+negative.n()-2) ); 109 if (diff<0 && absolute_) 110 return -diff/(sqrt(s2)+s0_); 111 return diff/(sqrt(s2)+s0_); 84 return score(positive, negative); 112 85 } 113 86 -
trunk/yat/statistics/SAMScore.h
r776 r779 1 #ifndef _theplu_yat_statistics_sam 2 #define _theplu_yat_statistics_sam 1 #ifndef _theplu_yat_statistics_sam_score_ 2 #define _theplu_yat_statistics_sam_score_ 3 3 4 4 // $Id$ … … 27 27 #include "Score.h" 28 28 29 #include <cmath> 30 29 31 namespace theplu { 30 32 namespace yat { … … 48 50 details 49 51 */ 50 class SAM : public Score52 class SAMScore : public Score 51 53 { 52 54 … … 55 57 /// @param s0 \f$ s_0 \f$ is a fudge factor 56 58 /// 57 SAM (const double s0, bool absolute=true);59 SAMScore(const double s0, bool absolute=true); 58 60 59 61 /** … … 67 69 */ 68 70 double score(const classifier::Target& target, 69 const utility::vector& value) ;71 const utility::vector& value) const; 70 72 71 73 /** … … 81 83 */ 82 84 double score(const classifier::Target& target, 83 const classifier::DataLookupWeighted1D& value) ;85 const classifier::DataLookupWeighted1D& value) const; 84 86 85 87 /** … … 96 98 double score(const classifier::Target& target, 97 99 const utility::vector& value, 98 const utility::vector& weight) ;100 const utility::vector& weight) const; 99 101 private: 100 102 double s0_; 103 104 template<class T> 105 double score(const T& positive, const T& negative) const 106 { 107 if(positive.n()+negative.n()<=2) 108 return 0; 109 double diff = positive.mean() - negative.mean(); 110 double s2 = ( (1.0/positive.n()+1.0/negative.n()) * 111 (positive.sum_xx_centered()+negative.sum_xx_centered()) / 112 (positive.n()+negative.n()-2) ); 113 if (diff<0 && absolute_) 114 return -diff/(sqrt(s2)+s0_); 115 return diff/(sqrt(s2)+s0_); 116 } 101 117 }; 102 118 -
trunk/yat/statistics/SNR.cc
r747 r779 41 41 const utility::vector& value) 42 42 { 43 weighted_=false;44 43 statistics::Averager positive; 45 44 statistics::Averager negative; … … 63 62 const classifier::DataLookupWeighted1D& value) 64 63 { 65 weighted_=true;66 64 statistics::AveragerWeighted positive; 67 65 statistics::AveragerWeighted negative; … … 89 87 const utility::vector& weight) 90 88 { 91 weighted_=true;92 89 statistics::AveragerWeighted positive; 93 90 statistics::AveragerWeighted negative; -
trunk/yat/statistics/Score.cc
r747 r779 25 25 26 26 #include "yat/classifier/DataLookup1D.h" 27 #include "yat/classifier/DataLookupWeighted1D.h" 27 28 #include "yat/classifier/Target.h" 28 29 #include "yat/classifier/utility.h" 29 30 #include "yat/utility/vector.h" 30 31 32 #include <cassert> 31 33 32 34 namespace theplu { … … 35 37 36 38 Score::Score(bool absolute) 37 : absolute_(absolute) , weighted_(false)39 : absolute_(absolute) 38 40 { 39 41 } … … 43 45 } 44 46 47 void Score::absolute(bool absolute) 48 { 49 absolute_=absolute; 50 } 51 45 52 double Score::score(const classifier::Target& target, 46 const classifier::DataLookup1D& value) 53 const classifier::DataLookup1D& value) const 47 54 { 48 55 assert(target.size()==value.size()); … … 52 59 } 53 60 54 void Score::absolute(bool absolute) 61 62 double Score::score(const classifier::Target& target, 63 const classifier::DataLookupWeighted1D& value) const 55 64 { 56 absolute_=absolute; 65 assert(target.size()==value.size()); 66 utility::vector a; 67 utility::vector b; 68 classifier::convert(value,a,b); 69 return score(target,a,b); 57 70 } 71 58 72 59 73 double Score::score(const classifier::Target& target, 60 74 const classifier::DataLookup1D& value, 61 const classifier::DataLookup1D& weight) 75 const classifier::DataLookup1D& weight) const 62 76 { 63 77 utility::vector a; … … 68 82 } 69 83 70 bool Score::weighted(void) const71 {72 return weighted_;73 }74 75 84 }}} // of namespace statistics, yat, and theplu -
trunk/yat/statistics/Score.h
r767 r779 47 47 /// @brief Constructor 48 48 /// 49 Score(bool absolute=true) ;49 Score(bool) ; 50 50 51 51 /// … … 62 62 /// Function calculating the score. In absolute mode, also the 63 63 /// score using negated class labels is calculated, and the 64 /// largest of the two scores are calculated. Absolute mode should 65 /// be used when two-tailed test is wanted. 64 /// largest of the two scores are returned. 66 65 /// 67 66 virtual double 68 67 score(const classifier::Target& target, 69 const utility::vector& value) = 0;68 const utility::vector& value) const = 0; 70 69 71 70 /// 72 71 /// Function calculating the score. In absolute mode, also the 73 72 /// score using negated class labels is calculated, and the 74 /// largest of the two scores are calculated. Absolute mode should 75 /// be used when two-tailed test is wanted. 73 /// largest of the two scores are calculated. 74 /// 75 /// @a value is copied to a utility::vector and that operator is 76 /// called. If speed is important this operator should be 77 /// implemented in inherited class to avoid copying. 76 78 /// 77 /// @return s tatistica.79 /// @return score 78 80 /// 79 /// @param target vector of targets (most often +1 -1) 80 /// @param value vector of the values 81 /// 82 double score(const classifier::Target& target, 83 const classifier::DataLookup1D& value); 81 virtual double score(const classifier::Target& target, 82 const classifier::DataLookup1D& value) const; 84 83 85 84 /// … … 90 89 /// is wanted. 91 90 /// 91 /// @a value is copied to two utility::vector and that operator is 92 /// called. If speed is important this operator should be 93 /// implemented in inherited class to avoid copying. 94 /// 92 95 virtual double 93 96 score(const classifier::Target& target, 94 const classifier::DataLookupWeighted1D& value) = 0;97 const classifier::DataLookupWeighted1D& value) const; 95 98 96 99 /// … … 104 107 score(const classifier::Target& target, 105 108 const utility::vector& value, 106 const utility::vector& weight) = 0;109 const utility::vector& weight) const = 0; 107 110 108 111 /// … … 113 116 /// is wanted. 114 117 /// 118 /// @a value and @weight are copied to utility::vector and the 119 /// corresponding operator is called. If speed is important this 120 /// operator should be implemented in inherited class to avoid 121 /// copying. 122 /// 115 123 double score(const classifier::Target& target, 116 124 const classifier::DataLookup1D& value, 117 const classifier::DataLookup1D& weight) ;125 const classifier::DataLookup1D& weight) const; 118 126 119 127 protected: 120 /// return true if method is weighted121 bool weighted(void) const;122 123 128 /// true if method is absolute, which means if score is below 124 /// expected value (by chance) E, score returns E-score instead.129 /// expected value (by chance) E, score returns E-score+E instead. 125 130 bool absolute_; 126 /// true if method is weighted127 bool weighted_;128 131 129 132 }; // class Score -
trunk/yat/statistics/WilcoxonFoldChange.cc
r680 r779 42 42 43 43 double WilcoxonFoldChange::score(const classifier::Target& target, 44 const utility::vector& value) 44 const utility::vector& value) const 45 45 { 46 46 std::vector<double> distance; 47 47 //Peter, should reserve the vector to avoid reallocations 48 weighted_=false;49 48 for (size_t i=0; i<target.size(); i++) { 50 49 if (target(i)!=1) continue; … … 60 59 61 60 62 double WilcoxonFoldChange::score(const classifier::Target& target, 63 const classifier::DataLookupWeighted1D& value) 61 double WilcoxonFoldChange::score 62 (const classifier::Target& target, 63 const classifier::DataLookupWeighted1D& value) const 64 64 { 65 65 std::cerr << " WilcoxonFoldChange::score not implemented" << std::endl; … … 70 70 double WilcoxonFoldChange::score(const classifier::Target& target, 71 71 const utility::vector& value, 72 const utility::vector& weight) 72 const utility::vector& weight) const 73 73 { 74 74 std::cerr << " WilcoxonFoldChange::score not implemented" << std::endl; -
trunk/yat/statistics/WilcoxonFoldChange.h
r680 r779 53 53 /// 54 54 double score(const classifier::Target& target, 55 const utility::vector& value) ;55 const utility::vector& value) const; 56 56 57 57 /// … … 64 64 /// 65 65 double score(const classifier::Target& target, 66 const classifier::DataLookupWeighted1D& value) ;66 const classifier::DataLookupWeighted1D& value) const; 67 67 68 68 /// … … 77 77 double score(const classifier::Target& target, 78 78 const utility::vector& value, 79 const utility::vector& weight) ;79 const utility::vector& weight) const; 80 80 81 81 private: -
trunk/yat/statistics/tScore.cc
r747 r779 25 25 #include "Averager.h" 26 26 #include "AveragerWeighted.h" 27 #include "yat/classifier/DataLookup1D.h" 27 28 #include "yat/classifier/DataLookupWeighted1D.h" 28 29 #include "yat/classifier/Target.h" … … 37 38 38 39 tScore::tScore(bool b) 39 : Score(b) , t_(0)40 : Score(b) 40 41 { 41 42 } 42 43 44 43 45 double tScore::score(const classifier::Target& target, 44 const utility::vector& value) 46 const utility::vector& value) const 45 47 { 46 weighted_=false; 48 return score(target, value, NULL); 49 } 50 51 52 double tScore::score(const classifier::Target& target, 53 const utility::vector& value, 54 double* dof) const 55 { 47 56 statistics::Averager positive; 48 57 statistics::Averager negative; … … 53 62 negative.add(value(i)); 54 63 } 55 double diff = positive.mean() - negative.mean(); 56 dof_=positive.n()+negative.n()-2; 57 double s2=(positive.sum_xx_centered()+negative.sum_xx_centered())/dof_; 58 59 t_=diff/sqrt(s2/positive.n()+s2/negative.n()); 60 if (t_<0 && absolute_) 61 t_=-t_; 62 63 return t_; 64 return score(positive, negative, dof); 64 65 } 65 66 66 67 67 68 double tScore::score(const classifier::Target& target, 68 const classifier::DataLookupWeighted1D& value) 69 const classifier::DataLookupWeighted1D& value) const 69 70 { 70 weighted_=true; 71 return score(target, value, NULL); 72 } 71 73 74 75 double tScore::score(const classifier::Target& target, 76 const classifier::DataLookupWeighted1D& value, 77 double* dof) const 78 { 72 79 statistics::AveragerWeighted positive; 73 80 statistics::AveragerWeighted negative; … … 78 85 negative.add(value.data(i),value.weight(i)); 79 86 } 80 double diff = positive.mean() - negative.mean(); 81 dof_=positive.n()+negative.n()-2; 82 double s2=(positive.sum_xx_centered()+negative.sum_xx_centered())/dof_; 83 t_=diff/sqrt(s2/positive.n()+s2/(negative.n())); 84 if (t_<0 && absolute_) 85 t_=-t_; 86 87 if(positive.sum_w()==0 || negative.sum_w()==0) 88 t_=0; 89 return t_; 87 return score(positive, negative, dof); 90 88 } 91 89 … … 93 91 double tScore::score(const classifier::Target& target, 94 92 const utility::vector& value, 95 const utility::vector& weight) 93 const utility::vector& weight) const 96 94 { 97 weighted_=true; 95 return score(target, value, weight, NULL); 96 } 98 97 98 99 double tScore::score(const classifier::Target& target, 100 const utility::vector& value, 101 const utility::vector& weight, 102 double* dof) const 103 { 99 104 statistics::AveragerWeighted positive; 100 105 statistics::AveragerWeighted negative; … … 105 110 negative.add(value(i),weight(i)); 106 111 } 107 double diff = positive.mean() - negative.mean(); 108 dof_=positive.n()+negative.n()-2; 109 double s2=(positive.sum_xx_centered()+negative.sum_xx_centered())/dof_; 110 t_=diff/sqrt(s2/positive.n()+s2/(negative.n())); 111 if (t_<0 && absolute_) 112 t_=-t_; 113 114 if(positive.sum_w()==0 || negative.sum_w()==0) 115 t_=0; 116 return t_; 117 } 118 119 double tScore::p_value(void) const 120 { 121 double p = gsl_cdf_tdist_Q(t_, dof_); 122 return (dof_ > 0 && !weighted_) ? p : 1; 112 return score(positive, negative, dof); 123 113 } 124 114 -
trunk/yat/statistics/tScore.h
r767 r779 27 27 #include "Score.h" 28 28 29 #include <cmath> 29 30 #include <gsl/gsl_cdf.h> 30 31 … … 48 49 public: 49 50 /// 50 /// 2brief Default Constructor.51 /// @brief Default Constructor. 51 52 /// 52 53 tScore(bool absolute=true); … … 61 62 \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y - 62 63 2 } \f$ 63 64 64 65 @return t-score. If absolute=true absolute value of t-score 65 66 is returned 66 67 */ 67 68 double score(const classifier::Target& target, 68 const utility::vector& value); 69 const utility::vector& value) const; 70 71 /** 72 Calculates the value of t-score, i.e. the ratio between 73 difference in mean and standard deviation of this 74 difference. \f$ t = \frac{ m_x - m_y } 75 {s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the 76 mean, \f$ n \f$ is the number of data points and \f$ s^2 = 77 \frac{ \sum_i (x_i-m_x)^2 + \sum_i (y_i-m_y)^2 }{ n_x + n_y - 78 2 } \f$ 79 80 @param dof double pointer in which approximation of degrees of 81 freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted. 82 83 @return t-score. If absolute=true absolute value of t-score 84 is returned 85 */ 86 double score(const classifier::Target& target, 87 const utility::vector& value, double* dof) const; 88 89 /** 90 Calculates the weighted t-score, i.e. the ratio between 91 difference in mean and standard deviation of this 92 difference. \f$ t = \frac{ m_x - m_y }{ 93 s\sqrt{\frac{1}{n_x}+\frac{1}{n_y}}} \f$ where \f$ m \f$ is the 94 weighted mean, n is the weighted version of number of data 95 points \f$ \frac{\left(\sum w_i\right)^2}{\sum w_i^2} \f$, and 96 \f$ s^2 \f$ is an estimation of the variance \f$ s^2 = \frac{ 97 \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x + n_y - 2 98 } \f$. See AveragerWeighted for details. 99 100 @param dof double pointer in which approximation of degrees of 101 freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted. 102 103 @return t-score. If absolute=true absolute value of t-score 104 is returned 105 */ 106 double score(const classifier::Target& target, 107 const classifier::DataLookupWeighted1D& value, 108 double* dof=0) const; 69 109 70 110 /** … … 83 123 */ 84 124 double score(const classifier::Target& target, 85 const classifier::DataLookupWeighted1D& value) ;125 const classifier::DataLookupWeighted1D& value) const; 86 126 87 / //88 ///Calculates the weighted t-score, i.e. the ratio between89 ///difference in mean and standard deviation of this90 ///difference. \f$ t = \frac{ m_x - m_y }{91 ///\frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the92 ///weighted mean, n is the weighted version of number of data93 ///points and \f$ s2 \f$ is an estimation of the variance \f$ s^294 ///= \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x95 ///+ n_y - 2 } \f$. See AveragerWeighted for details.96 ///97 ///@return t-score if absolute=true absolute value of t-score98 ///is returned99 ///127 /** 128 Calculates the weighted t-score, i.e. the ratio between 129 difference in mean and standard deviation of this 130 difference. \f$ t = \frac{ m_x - m_y }{ 131 \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the 132 weighted mean, n is the weighted version of number of data 133 points and \f$ s2 \f$ is an estimation of the variance \f$ s^2 134 = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x 135 + n_y - 2 } \f$. See AveragerWeighted for details. 136 137 @return t-score if absolute=true absolute value of t-score 138 is returned 139 */ 100 140 double score(const classifier::Target& target, 101 141 const utility::vector& value, 102 const utility::vector& weight) ;142 const utility::vector& weight) const; 103 143 104 /// 105 /// Calculates the p-value, i.e. the probability of observing a 106 /// t-score equally or larger if the null hypothesis is true. If P 107 /// is near zero, this casts doubt on this hypothesis. The null 108 /// hypothesis is that the means of the two distributions are 109 /// equal. Assumtions for this test is that the two distributions 110 /// are normal distributions with equal variance. The latter 111 /// assumtion is dropped in Welch's t-test. 112 /// 113 /// @return the one-sided p-value( if absolute=true is used 114 /// the two-sided p-value) 115 /// 116 double p_value() const; 144 /** 145 Calculates the weighted t-score, i.e. the ratio between 146 difference in mean and standard deviation of this 147 difference. \f$ t = \frac{ m_x - m_y }{ 148 \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the 149 weighted mean, n is the weighted version of number of data 150 points and \f$ s2 \f$ is an estimation of the variance \f$ s^2 151 = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x 152 + n_y - 2 } \f$. See AveragerWeighted for details. 153 154 @param dof double pointer in which approximation of degrees of 155 freedom is returned: pos.n()+neg.n()-2. See AveragerWeighted. 156 157 @return t-score if absolute=true absolute value of t-score 158 is returned 159 */ 160 double score(const classifier::Target& target, 161 const utility::vector& value, 162 const utility::vector& weight, 163 double* dof=0) const; 117 164 118 165 private: 119 double t_; 120 double dof_; 121 166 167 template<class T> 168 double score(const T& pos, const T& neg, double* dof) const 169 { 170 double diff = pos.mean() - neg.mean(); 171 if (dof) 172 *dof=pos.n()+neg.n()-2; 173 double s2=( (pos.sum_xx_centered()+neg.sum_xx_centered())/ 174 (pos.n()+neg.n()-2)); 175 double t=diff/sqrt(s2/pos.n()+s2/(neg.n())); 176 if (t<0 && absolute_) 177 return -t; 178 return t; 179 } 122 180 }; 123 181 -
trunk/yat/statistics/tTest.cc
r776 r779 22 22 */ 23 23 24 #include "tTest.h" 24 25 #include "tScore.h" 25 #include "Averager.h"26 #include "AveragerWeighted.h"27 #include "yat/classifier/DataLookupWeighted1D.h"28 #include "yat/classifier/Target.h"29 #include "yat/utility/vector.h"26 //#include "Averager.h" 27 //#include "AveragerWeighted.h" 28 //#include "yat/classifier/DataLookupWeighted1D.h" 29 //#include "yat/classifier/Target.h" 30 //#include "yat/utility/vector.h" 30 31 31 32 #include <cassert> … … 36 37 namespace statistics { 37 38 38 t Score::tScore(bool b)39 : Score(b),t_(0)39 tTest::tTest(bool b) 40 : t_(0) 40 41 { 41 42 } 42 43 43 double t Score::score(const classifier::Target& target,44 44 double tTest::score(const classifier::Target& target, 45 const utility::vector& value) 45 46 { 46 weighted_=false; 47 statistics::Averager positive; 48 statistics::Averager negative; 49 for(size_t i=0; i<target.size(); i++){ 50 if (target.binary(i)) 51 positive.add(value(i)); 52 else 53 negative.add(value(i)); 54 } 55 double diff = positive.mean() - negative.mean(); 56 dof_=positive.n()+negative.n()-2; 57 double s2=(positive.sum_xx_centered()+negative.sum_xx_centered())/dof_; 58 59 t_=diff/sqrt(s2/positive.n()+s2/negative.n()); 60 if (t_<0 && absolute_) 61 t_=-t_; 62 47 tScore score; 48 t_ = score.score(target, value, &dof_); 63 49 return t_; 64 50 } 65 51 66 52 67 double t Score::score(const classifier::Target& target,68 53 double tTest::score(const classifier::Target& target, 54 const classifier::DataLookupWeighted1D& value) 69 55 { 70 weighted_=true; 71 72 statistics::AveragerWeighted positive; 73 statistics::AveragerWeighted negative; 74 for(size_t i=0; i<target.size(); i++){ 75 if (target.binary(i)) 76 positive.add(value.data(i),value.weight(i)); 77 else 78 negative.add(value.data(i),value.weight(i)); 79 } 80 double diff = positive.mean() - negative.mean(); 81 dof_=positive.n()+negative.n()-2; 82 double s2=(positive.sum_xx_centered()+negative.sum_xx_centered())/dof_; 83 t_=diff/sqrt(s2/positive.n()+s2/(negative.n())); 84 if (t_<0 && absolute_) 85 t_=-t_; 86 87 if(positive.sum_w()==0 || negative.sum_w()==0) 88 t_=0; 56 tScore score; 57 t_ = score.score(target, value, &dof_); 89 58 return t_; 90 59 } 91 60 92 61 93 double t Score::score(const classifier::Target& target,94 95 62 double tTest::score(const classifier::Target& target, 63 const utility::vector& value, 64 const utility::vector& weight) 96 65 { 97 weighted_=true; 98 99 statistics::AveragerWeighted positive; 100 statistics::AveragerWeighted negative; 101 for(size_t i=0; i<target.size(); i++){ 102 if (target.binary(i)) 103 positive.add(value(i),weight(i)); 104 else 105 negative.add(value(i),weight(i)); 106 } 107 double diff = positive.mean() - negative.mean(); 108 dof_=positive.n()+negative.n()-2; 109 double s2=(positive.sum_xx_centered()+negative.sum_xx_centered())/dof_; 110 t_=diff/sqrt(s2/positive.n()+s2/(negative.n())); 111 if (t_<0 && absolute_) 112 t_=-t_; 113 114 if(positive.sum_w()==0 || negative.sum_w()==0) 115 t_=0; 66 tScore score; 67 t_ = score.score(target, value, weight, &dof_); 116 68 return t_; 117 69 } 118 70 119 double t Score::p_value(void) const71 double tTest::p_value(void) const 120 72 { 121 double p = gsl_cdf_tdist_Q(t_, dof_); 122 return (dof_ > 0 && !weighted_) ? p : 1; 73 if (!dof_) 74 return 1.0; 75 return gsl_cdf_tdist_Q(t_, dof_); 123 76 } 124 77 -
trunk/yat/statistics/tTest.h
r776 r779 1 #ifndef _theplu_yat_statistics_t score_2 #define _theplu_yat_statistics_t score_1 #ifndef _theplu_yat_statistics_ttest_ 2 #define _theplu_yat_statistics_ttest_ 3 3 4 4 // $Id$ … … 25 25 */ 26 26 27 #include "Score.h"28 29 27 #include <gsl/gsl_cdf.h> 30 28 31 29 namespace theplu { 32 30 namespace yat { 31 namespace classifier { 32 class DataLookup1D; 33 class DataLookupWeighted1D; 34 class Target; 35 } 33 36 namespace utility { 34 37 class vector; … … 43 46 /// details on the t-test. 44 47 /// 45 class t Score : public Score48 class tTest 46 49 { 47 50 48 51 public: 49 52 /// 50 /// 2brief Default Constructor.53 /// @brief Default Constructor. 51 54 /// 52 t Score(bool absolute=true);55 tTest(bool absolute=true); 53 56 54 57 … … 111 114 /// assumtion is dropped in Welch's t-test. 112 115 /// 113 /// @return the one-sided p-value( if absolute=true is used 114 /// the two-sided p-value) 116 /// @return the two-sided p-value 115 117 /// 116 118 double p_value() const; 119 120 /// 121 /// @return One-sided P-value 122 /// 123 double p_value_one_sided(void) const; 117 124 118 125 private: … … 120 127 double dof_; 121 128 129 122 130 }; 123 131
Note: See TracChangeset
for help on using the changeset viewer.