- Timestamp:
- Sep 5, 2006, 4:13:12 AM (17 years ago)
- Location:
- trunk/c++_tools/statistics
- Files:
-
- 15 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/c++_tools/statistics/Fisher.cc
r616 r623 4 4 #include <c++_tools/statistics/Score.h> 5 5 #include <c++_tools/statistics/utility.h> 6 #include <c++_tools/classifier/DataLookupWeighted1D.h> 6 7 #include <c++_tools/classifier/Target.h> 7 8 … … 136 137 } 137 138 138 double Fisher::score(const classifier::Target& target, 139 const utility::vector& value, 140 const utility::vector& weight) 139 double Fisher::score(const classifier::Target& target, 140 const classifier::DataLookupWeighted1D& value) 141 141 { 142 142 weighted_=true; … … 144 144 for (size_t i=0; i<target.size(); i++) 145 145 if (target.binary(i)) 146 if (value.data(i)>value_cutoff_) 147 a_+=value.weight(i); 148 else 149 c_+=value.weight(i); 150 else 151 if (value.data(i)>value_cutoff_) 152 b_+=value.weight(i); 153 else 154 d_+=value.weight(i); 155 156 // If a column sum or a row sum is zero, the table is non-sense 157 if ((a_==0 || d_==0) && (c_==0 || b_==0)){ 158 // Peter should throw an exception here 159 std::cerr << "Warning: Fisher: Table is not valid\n"; 160 return 1; 161 } 162 163 return oddsratio(a_,b_,c_,d_); 164 } 165 166 double Fisher::score(const classifier::Target& target, 167 const utility::vector& value, 168 const utility::vector& weight) 169 { 170 weighted_=true; 171 a_=b_=c_=d_=0; 172 for (size_t i=0; i<target.size(); i++) 173 if (target.binary(i)) 146 174 if (value(i)>value_cutoff_) 147 175 a_+=weight(i); … … 156 184 // If a column sum or a row sum is zero, the table is non-sense 157 185 if ((a_==0 || d_==0) && (c_==0 || b_==0)){ 186 // Peter should throw an exception 158 187 std::cerr << "Warning: Fisher: Table is not valid\n"; 159 188 return 1; -
trunk/c++_tools/statistics/Fisher.h
r616 r623 118 118 /// 119 119 double score(const classifier::Target& target, 120 const classifier::DataLookupWeighted1D& value); 121 122 123 /// 124 /// Weighted version of score. Each element in 2x2 table is 125 /// calculated as \f$ \sum w_i \f$, so when each weight is 126 /// unitary the same table is created as in the unweighted version 127 /// 128 /// @return odds ratio 129 /// 130 /// @see score 131 /// 132 double score(const classifier::Target& target, 120 133 const utility::vector& value, 121 134 const utility::vector& weight); -
trunk/c++_tools/statistics/FoldChange.cc
r616 r623 5 5 #include <c++_tools/statistics/Averager.h> 6 6 #include <c++_tools/statistics/AveragerWeighted.h> 7 #include <c++_tools/classifier/DataLookupWeighted1D.h> 7 8 #include <c++_tools/classifier/Target.h> 8 9 … … 50 51 51 52 double FoldChange::score(const classifier::Target& target, 53 const classifier::DataLookupWeighted1D& value) 54 { 55 weighted_=true; 56 AveragerWeighted pos; 57 AveragerWeighted neg; 58 59 for (size_t i=0; i<value.size(); i++) 60 if (target.binary(i)) 61 pos.add(value.data(i),value.weight(i)); 62 else 63 neg.add(value.data(i),value.weight(i)); 64 65 if (absolute_) 66 return fabs(pos.mean()-neg.mean()); 67 return pos.mean()-neg.mean(); 68 } 69 70 71 double FoldChange::score(const classifier::Target& target, 52 72 const utility::vector& value, 53 73 const utility::vector& weight) -
trunk/c++_tools/statistics/FoldChange.h
r616 r623 30 30 /// @return difference of the means of the two classes 31 31 /// 32 /// @param target is +1 or -132 /// @param target 33 33 /// @param value vector of the values 34 34 /// … … 37 37 38 38 /// 39 /// @return difference of the means of the two classes 40 /// 41 /// @param target 42 /// @param value vector of the values (with weights) 43 /// 44 double score(const classifier::Target& target, 45 const classifier::DataLookupWeighted1D& value); 46 47 /// 39 48 /// @return difference of the weighted means of the two classes 40 49 /// 41 /// @param target is +1 or -150 /// @param target 42 51 /// @param value vector of the values 43 52 /// @param weight vector of accompanied weight to the values -
trunk/c++_tools/statistics/Pearson.cc
r616 r623 5 5 #include <c++_tools/statistics/AveragerPairWeighted.h> 6 6 #include <c++_tools/utility/vector.h> 7 #include <c++_tools/classifier/DataLookupWeighted1D.h> 7 8 #include <c++_tools/classifier/Target.h> 8 9 … … 59 60 60 61 double Pearson::score(const classifier::Target& target, 62 const classifier::DataLookupWeighted1D& value) 63 { 64 weighted_=true; 65 AveragerPairWeighted ap; 66 for (size_t i=0; i<target.size(); i++){ 67 if (target.binary(i)) 68 ap.add(1, value.data(i),1,value.weight(i)); 69 else 70 ap.add(-1, value.data(i),1,value.weight(i)); 71 nof_samples_ = target.size(); 72 } 73 r_ = ap.correlation(); 74 if (r_<0 && absolute_) 75 return -r_; 76 77 return r_; 78 } 79 80 double Pearson::score(const classifier::Target& target, 61 81 const utility::vector& value, 62 82 const utility::vector& weight) -
trunk/c++_tools/statistics/Pearson.h
r616 r623 53 53 /// 54 54 double score(const classifier::Target& target, 55 const classifier::DataLookupWeighted1D& value); 56 57 /// 58 /// \f$ \frac{\vert \sum_iw^2_i(x_i-\bar{x})(y_i-\bar{y})\vert } 59 /// {\sqrt{\sum_iw^2_i(x_i-\bar{x})^2\sum_iw^2_i(y_i-\bar{y})^2}} 60 /// \f$, where \f$ m_x = \frac{\sum w_ix_i}{\sum w_i} \f$ and \f$ 61 /// m_x = \frac{\sum w_ix_i}{\sum w_i} \f$. This expression is 62 /// chosen to get a correlation equal to unity when \a x and \a y 63 /// are equal. @return absolute value of weighted version of 64 /// Pearson correlation. 65 /// 66 double score(const classifier::Target& target, 55 67 const utility::vector& value, 56 68 const utility::vector& weight); -
trunk/c++_tools/statistics/ROC.cc
r616 r623 1 1 // $Id$ 2 2 3 #include <c++_tools/statistics/ROC.h> 4 #include <c++_tools/utility/stl_utility.h> 5 #include <c++_tools/utility/vector.h> 3 #include "c++_tools/statistics/ROC.h" 4 5 #include "c++_tools/classifier/DataLookupWeighted1D.h" 6 #include "c++_tools/utility/stl_utility.h" 7 #include "c++_tools/utility/vector.h" 6 8 7 9 #include <gsl/gsl_cdf.h> … … 97 99 return area_; 98 100 } 101 102 103 // Peter, should be possible to do this in NlogN 104 double ROC::score(const classifier::Target& target, 105 const classifier::DataLookupWeighted1D& value) 106 { 107 weighted_=true; 108 109 vec_pair_.clear(); 110 vec_pair_.reserve(target.size()); 111 for (unsigned int i=0; i<target.size(); i++) 112 if (value.weight(i)) 113 vec_pair_.push_back(std::make_pair(target.binary(i),value.data(i))); 114 115 std::sort(vec_pair_.begin(),vec_pair_.end(), 116 utility::pair_value_compare<int, double>()); 117 118 area_=0; 119 nof_pos_=0; 120 double max_area=0; 121 122 for (size_t i=0; i<n(); i++) 123 if (target.binary(i)) 124 for (size_t j=0; j<n(); j++) 125 if (!target.binary(j)){ 126 if (value.data(i)>value.data(j)) 127 area_+=value.weight(i)*value.weight(j); 128 max_area+=value.weight(i)*value.weight(j); 129 } 130 131 area_/=max_area; 132 133 if (area_<0.5 && absolute_) 134 area_=1.0-area_; 135 136 return area_; 137 } 138 99 139 100 140 // Peter, should be possible to do this in NlogN -
trunk/c++_tools/statistics/ROC.h
r616 r623 47 47 const utility::vector& value); 48 48 49 /// Function taking values, target, weight and a vector defining 50 /// what samples to use. The area is defines as \f$ \frac{\sum 51 /// w^+w^-}{\sum w^+w^-}\f$, where the sum in the numerator goes 52 /// over all pairs where value+ is larger than value-. The 53 /// denominator goes over all pairs. If target is equal to 1, 54 /// sample belonges to class + otherwise sample belongs to class 55 /// -. @return wheighted version of area under the ROC curve. If 56 /// the area is less than 0.5 and absolute=true, 1-area is 57 /// returned. Complexity is \f$ N^2 \f$ where \f$ N \f$ is number 58 /// of samples. 59 /// 60 double score(const classifier::Target& target, 61 const classifier::DataLookupWeighted1D& value); 62 63 49 64 /// Function taking values, target, weight and a vector defining 50 65 /// what samples to use. The area is defines as \f$ \frac{\sum -
trunk/c++_tools/statistics/SNR.cc
r616 r623 1 1 // $Id$ 2 2 3 4 // Thep C++ Tools 5 #include <c++_tools/statistics/SNR.h> 6 #include <c++_tools/statistics/Averager.h> 7 #include <c++_tools/statistics/AveragerWeighted.h> 8 #include <c++_tools/classifier/Target.h> 3 #include "c++_tools/statistics/SNR.h" 4 #include "c++_tools/statistics/Averager.h" 5 #include "c++_tools/statistics/AveragerWeighted.h" 6 #include "c++_tools/classifier/DataLookupWeighted1D.h" 7 #include "c++_tools/classifier/Target.h" 9 8 10 9 namespace theplu { … … 39 38 40 39 double SNR::score(const classifier::Target& target, 40 const classifier::DataLookupWeighted1D& value) 41 { 42 weighted_=true; 43 statistics::AveragerWeighted positive; 44 statistics::AveragerWeighted negative; 45 for(size_t i=0; i<target.size(); i++){ 46 if (target.binary(i)) 47 positive.add(value.data(i),value.weight(i)); 48 else 49 negative.add(value.data(i),value.weight(i)); 50 } 51 double diff = positive.mean() - negative.mean(); 52 double denom=positive.std()+negative.std(); 53 assert(denom); 54 score_=diff/denom; 55 if(positive.sum_w()==0 || negative.sum_w()==0) 56 score_=0; 57 if (score_<0 && absolute_) 58 score_=-score_; 59 return score_; 60 } 61 62 63 64 double SNR::score(const classifier::Target& target, 41 65 const utility::vector& value, 42 66 const utility::vector& weight) -
trunk/c++_tools/statistics/SNR.h
r616 r623 11 11 namespace utility { 12 12 class vector; 13 } 14 namespace classifier { 15 class DataLookWeighted1D; 13 16 } 14 17 namespace statistics { … … 45 48 /// 46 49 double score(const classifier::Target& target, 50 const classifier::DataLookupWeighted1D& value); 51 52 /// 53 /// Weighted version of SNR @return t-score if absolute=true 54 /// absolute value of t-score is returned. 55 /// 56 double score(const classifier::Target& target, 47 57 const utility::vector& value, 48 58 const utility::vector& weight); -
trunk/c++_tools/statistics/Score.h
r616 r623 17 17 class Target; 18 18 class DataLookup1D; 19 class DataLookupWeighted1D; 19 20 } 20 21 … … 75 76 76 77 /// 78 /// Function calculating the score in a weighted fashion. In 79 /// absolute mode, also the score using negated class labels is 80 /// calculated, and the largest of the two scores are 81 /// calculated. Absolute mode should be used when two-tailed test 82 /// is wanted. 83 /// 84 virtual double 85 score(const classifier::Target& target, 86 const classifier::DataLookupWeighted1D& value) = 0; 87 88 /// 77 89 /// Function calculating the weighted version of score. In 78 90 /// absolute mode, also the score using negated class labels is -
trunk/c++_tools/statistics/WilcoxonFoldChange.cc
r616 r623 38 38 } 39 39 40 41 double WilcoxonFoldChange::score(const classifier::Target& target, 42 const classifier::DataLookupWeighted1D& value) 43 { 44 std::cerr << " WilcoxonFoldChange::score not implemented" << std::endl; 45 return 0; 46 } 47 48 40 49 double WilcoxonFoldChange::score(const classifier::Target& target, 41 50 const utility::vector& value, -
trunk/c++_tools/statistics/WilcoxonFoldChange.h
r616 r623 38 38 /// @return difference of the weighted means of the two classes 39 39 /// 40 /// @param value vector of the values (with weights) 41 /// @train_set defining which values to use (number of values used 42 /// in the calculation is equal to size of \a train_set) 43 /// 44 /// @note not implemented 45 /// 46 double score(const classifier::Target& target, 47 const classifier::DataLookupWeighted1D& value); 48 49 /// 50 /// @return difference of the weighted means of the two classes 51 /// 40 52 /// @param value vector of the values 41 53 /// @param weight vector of accompanied weight to the values -
trunk/c++_tools/statistics/tScore.cc
r616 r623 1 1 // $Id$ 2 2 3 // System includes 3 #include "c++_tools/statistics/tScore.h" 4 #include "c++_tools/statistics/Averager.h" 5 #include "c++_tools/statistics/AveragerWeighted.h" 6 #include "c++_tools/classifier/DataLookupWeighted1D.h" 7 #include "c++_tools/classifier/Target.h" 8 4 9 #include <cassert> 5 10 #include <cmath> 6 11 7 // Thep C++ Tools8 #include <c++_tools/statistics/tScore.h>9 #include <c++_tools/statistics/Averager.h>10 #include <c++_tools/statistics/AveragerWeighted.h>11 #include <c++_tools/classifier/Target.h>12 12 13 13 namespace theplu { … … 41 41 return t_; 42 42 } 43 44 45 double tScore::score(const classifier::Target& target, 46 const classifier::DataLookupWeighted1D& value) 47 { 48 weighted_=true; 49 50 statistics::AveragerWeighted positive; 51 statistics::AveragerWeighted negative; 52 for(size_t i=0; i<target.size(); i++){ 53 if (target.binary(i)) 54 positive.add(value.data(i),value.weight(i)); 55 else 56 negative.add(value.data(i),value.weight(i)); 57 } 58 double diff = positive.mean() - negative.mean(); 59 dof_=positive.n()+negative.n()-2; 60 double s2=(positive.sum_xx_centered()+negative.sum_xx_centered())/dof_; 61 t_=diff/sqrt(s2/positive.n()+s2/(negative.n())); 62 if (t_<0 && absolute_) 63 t_=-t_; 64 65 if(positive.sum_w()==0 || negative.sum_w()==0) 66 t_=0; 67 return t_; 68 } 69 43 70 44 71 double tScore::score(const classifier::Target& target, -
trunk/c++_tools/statistics/tScore.h
r616 r623 62 62 /// 63 63 double score(const classifier::Target& target, 64 const classifier::DataLookupWeighted1D& value); 65 66 /// 67 /// Calculates the weighted t-score, i.e. the ratio between 68 /// difference in mean and standard deviation of this 69 /// difference. \f$ t = \frac{ m_x - m_y }{ 70 /// \frac{s2}{n_x}+\frac{s2}{n_y}} \f$ where \f$ m \f$ is the 71 /// weighted mean, n is the weighted version of number of data 72 /// points and \f$ s2 \f$ is an estimation of the variance \f$ s^2 73 /// = \frac{ \sum_i w_i(x_i-m_x)^2 + \sum_i w_i(y_i-m_y)^2 }{ n_x 74 /// + n_y - 2 } \f$. See AveragerWeighted for details. 75 /// 76 /// @return t-score if absolute=true absolute value of t-score 77 /// is returned 78 /// 79 double score(const classifier::Target& target, 64 80 const utility::vector& value, 65 81 const utility::vector& weight);
Note: See TracChangeset
for help on using the changeset viewer.