- Timestamp:
- Mar 4, 2007, 2:20:01 PM (16 years ago)
- Location:
- trunk/yat/statistics
- Files:
-
- 1 edited
- 2 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/yat/statistics/Makefile.am
r772 r778 27 27 AveragerWeighted.cc AveragerPairWeighted.cc Distance.cc \ 28 28 Euclidean.cc Fisher.cc FoldChange.cc Histogram.cc Pearson.cc \ 29 PearsonDistance.cc ROC.cc SAM.cc Score.cc SNR.cc tScore.cc \ 29 PearsonDistance.cc ROC.cc ROCscore.cc \ 30 SAM.cc Score.cc SNR.cc tScore.cc \ 30 31 utility.cc WilcoxonFoldChange.cc 31 32 … … 35 36 AveragerWeighted.h AveragerPairWeighted.h Distance.h Euclidean.h \ 36 37 Fisher.h \ 37 FoldChange.h Histogram.h Pearson.h PearsonDistance.h ROC.h \ 38 SAM.h Score.h SNR.h tScore.h utility.h WilcoxonFoldChange.h 38 FoldChange.h Histogram.h Pearson.h PearsonDistance.h ROC.h ROCscore.h\ 39 SAM.h Score.h SNR.h tScore.h \ 40 utility.h WilcoxonFoldChange.h -
trunk/yat/statistics/ROCscore.cc
r776 r778 22 22 */ 23 23 24 #include "ROC .h"24 #include "ROCscore.h" 25 25 #include "yat/classifier/DataLookupWeighted1D.h" 26 26 #include "yat/classifier/Target.h" 27 27 #include "yat/utility/stl_utility.h" 28 28 #include "yat/utility/vector.h" 29 30 #include <gsl/gsl_cdf.h>31 29 32 30 #include <cmath> … … 38 36 namespace statistics { 39 37 40 ROC::ROC(bool b) 41 : Score(b), area_(0.5), minimum_size_(10), nof_pos_(0) 42 { 43 } 44 45 ROC::~ROC(void) 46 { 47 } 48 49 double ROC::get_p_approx(const double area) const 50 { 51 double x = area - 0.5; 52 // Not integrating from the middle of the bin, but from the inner edge. 53 if (x>0) 54 x -= 0.5/nof_pos_/(n()-nof_pos_); 55 else if(x<0) 56 x += 0.5/nof_pos_/(n()-nof_pos_); 57 58 double sigma = (std::sqrt( (n()-nof_pos_)*nof_pos_* 59 (n()+1.0)/12 ) / 60 ( n() - nof_pos_ ) / nof_pos_ ); 61 double p = gsl_cdf_gaussian_Q(x, sigma); 62 63 return p; 64 } 65 66 double ROC::get_p_exact(const double block, const double nof_pos, 67 const double nof_neg) const 68 { 69 double p; 70 if (block <= 0.0) 71 p = 1.0; 72 else if (block > nof_neg*nof_pos) 73 p = 0.0; 74 else { 75 double p1 = get_p_exact(block-nof_neg, nof_pos-1, nof_neg); 76 double p2 = get_p_exact(block, nof_pos, nof_neg-1); 77 p = nof_pos/(nof_pos+nof_neg)*p1 + nof_neg/(nof_pos+nof_neg)*p2; 78 } 79 return p; 80 } 81 82 u_int& ROC::minimum_size(void) 83 { 84 return minimum_size_; 85 } 86 87 size_t ROC::n(void) const 88 { 89 return vec_pair_.size(); 90 } 91 92 size_t ROC::n_pos(void) const 93 { 94 return nof_pos_; 95 } 96 97 double ROC::p_value(void) const 98 { 99 if (weighted_) 100 return 1.0; 101 else if (nof_pos_ < minimum_size_ & n()-nof_pos_ < minimum_size_) 102 return get_p_exact(area_*nof_pos_*(n()-nof_pos_), 103 nof_pos_, n()-nof_pos_); 104 else 105 return get_p_approx(area_); 106 107 } 108 109 double ROC::score(const classifier::Target& target, 110 const utility::vector& value) 38 double ROCscore::score(const classifier::Target& target, 39 const utility::vector& value) 111 40 { 112 41 assert(target.size()==value.size()); 113 42 weighted_=false; 114 43 115 vec_pair_.clear(); 116 vec_pair_.reserve(target.size());44 std::vector<std::pair<bool, double> > class_value; // class-value-pair 45 class_value.reserve(target.size()); 117 46 for (size_t i=0; i<target.size(); i++) 118 vec_pair_.push_back(std::make_pair(target.binary(i),value(i)));47 class_value.push_back(std::make_pair(target.binary(i),value(i))); 119 48 120 std::sort( vec_pair_.begin(),vec_pair_.end(),49 std::sort(class_value.begin(),class_value.end(), 121 50 utility::pair_value_compare<bool, double>()); 122 area_= 0;123 nof_pos_=0;124 for (size_t i=0; i< n(); i++){125 if ( vec_pair_[i].first){126 area _+=i;127 nof_pos_++;51 double area = 0; 52 u_int nof_pos =0; 53 for (size_t i=0; i<class_value.size(); i++){ 54 if (class_value[i].first){ 55 area+=i; 56 ++nof_pos; 128 57 } 129 58 } 130 59 131 60 // Normalizing the area to [0,1] 132 area _ = ( (area_-nof_pos_*(nof_pos_-1)/2 ) /133 (nof_pos_*(n()-nof_pos_)) );61 area = ( (area-nof_pos*(nof_pos-1)/2 ) / 62 (nof_pos*(class_value.size()-nof_pos)) ); 134 63 135 //Returning score larger 0.5 that you get by random 136 if (area_<0.5 && absolute_) 137 area_=1.0-area_; 138 139 return area_; 64 if (area<0.5 && absolute_) 65 return 1.0-area; 66 return area; 140 67 } 141 68 142 69 143 70 // Peter, should be possible to do this in NlogN 144 double ROC ::score(const classifier::Target& target,71 double ROCscore::score(const classifier::Target& target, 145 72 const classifier::DataLookupWeighted1D& value) 146 73 { 147 weighted_=true;148 74 149 vec_pair_.clear(); 150 vec_pair_.reserve(target.size()); 75 std::vector<std::pair<bool, double> > class_value; // class-value-pair 76 class_value.clear(); 77 class_value.reserve(target.size()); 151 78 for (unsigned int i=0; i<target.size(); i++) 152 79 if (value.weight(i)) 153 vec_pair_.push_back(std::make_pair(target.binary(i),value.data(i)));80 class_value.push_back(std::make_pair(target.binary(i),value.data(i))); 154 81 155 std::sort( vec_pair_.begin(),vec_pair_.end(),82 std::sort(class_value.begin(),class_value.end(), 156 83 utility::pair_value_compare<int, double>()); 157 84 158 area_=0; 159 nof_pos_=0; 85 double area=0; 160 86 double max_area=0; 161 87 … … 165 91 if (!target.binary(j)){ 166 92 if (value.data(i)>value.data(j)) 167 area _+=value.weight(i)*value.weight(j);93 area+=value.weight(i)*value.weight(j); 168 94 max_area+=value.weight(i)*value.weight(j); 169 95 } 170 96 171 area _/=max_area;97 area/=max_area; 172 98 173 if (area_<0.5 && absolute_) 174 area_=1.0-area_; 175 176 return area_; 99 if (area<0.5 && absolute_) 100 return 1.0-area; 101 return area; 177 102 } 178 103 179 104 180 105 // Peter, should be possible to do this in NlogN 181 double ROC ::score(const classifier::Target& target,182 const utility::vector& value,183 const utility::vector& weight)106 double ROCscore::score(const classifier::Target& target, 107 const utility::vector& value, 108 const utility::vector& weight) 184 109 { 185 110 weighted_=true; 186 111 187 vec_pair_.clear(); 188 vec_pair_.reserve(target.size()); 112 113 std::vector<std::pair<bool, double> > class_value; // class-value-pair 114 class_value.reserve(target.size()); 189 115 for (unsigned int i=0; i<target.size(); i++) 190 116 if (weight(i)) 191 vec_pair_.push_back(std::make_pair(target.binary(i),value(i)));117 class_value.push_back(std::make_pair(target.binary(i),value(i))); 192 118 193 std::sort( vec_pair_.begin(),vec_pair_.end(),119 std::sort(class_value.begin(),class_value.end(), 194 120 utility::pair_value_compare<int, double>()); 195 121 196 area_=0; 197 nof_pos_=0; 122 double area=0; 198 123 double max_area=0; 199 124 … … 203 128 if (!target.binary(j)){ 204 129 if (value(i)>value(j)) 205 area _+=weight(i)*weight(j);130 area+=weight(i)*weight(j); 206 131 max_area+=weight(i)*weight(j); 207 132 } 208 133 209 area _/=max_area;134 area/=max_area; 210 135 211 if (area_<0.5 && absolute_) 212 area_=1.0-area_; 213 214 return area_; 215 } 216 217 bool ROC::target(const size_t i) const 218 { 219 return vec_pair_[i].first; 220 } 221 222 std::ostream& operator<<(std::ostream& s, const ROC& r) 223 { 224 s.setf( std::ios::dec ); 225 s.precision(12); 226 double sens = 1; 227 double spec = 0; 228 size_t n = r.n(); 229 double nof_pos = r.n_pos(); 230 for(size_t i=0; i<n-1; ++i) { 231 s << sens << "\t"; 232 s << spec << "\n"; 233 if (r.target(i)) 234 spec -= 1/(n-nof_pos); 235 else 236 sens -= 1/nof_pos; 237 } 238 s << sens << "\t"; 239 s << spec ; 240 return s; 136 if (area<0.5 && absolute_) 137 return 1.0-area; 138 return area; 241 139 } 242 140 -
trunk/yat/statistics/ROCscore.h
r776 r778 1 #ifndef _theplu_yat_statistics_roc_ 2 #define _theplu_yat_statistics_roc_ 1 #ifndef _theplu_yat_statistics_roc_score_ 2 #define _theplu_yat_statistics_roc_score_ 3 3 4 4 // $Id$ … … 27 27 #include "Score.h" 28 28 29 #include <utility> 30 #include <vector> 29 #include <cctype> 31 30 32 31 namespace theplu { … … 43 42 /// @brief Class for Reciever Operating Characteristic. 44 43 /// 45 /// As the area under an ROC curve is equivalent to Mann-Whitney U 46 /// statistica, this class can be used to perform a Mann-Whitney 47 /// U-test (aka Wilcoxon). 48 /// 49 class ROC : public Score 44 class ROCscore : public Score 50 45 { 51 46 52 47 public: 53 ///54 /// @brief Default constructor55 ///56 ROC(bool absolute=true);57 58 ///59 /// @brief The destructor60 ///61 virtual ~ROC(void);62 63 ///64 /// minimum_size is the threshold for when a normal65 /// approximation is used for the p-value calculation.66 ///67 /// @return reference to minimum_size68 ///69 u_int& minimum_size(void);70 71 48 /// 72 49 /// @return number of samples … … 79 56 size_t n_pos(void) const; 80 57 81 ///82 ///Calculates the p-value, i.e. the probability of observing an83 ///area equally or larger if the null hypothesis is true. If P is84 ///near zero, this casts doubt on this hypothesis. The null85 ///hypothesis is that the values from the 2 classes are generated86 ///from 2 identical distributions. The alternative is that the87 ///median of the first distribution is shifted from the median of88 ///the second distribution by a non-zero amount. If the smallest89 ///group size is larger than minimum_size (default = 10), then P90 ///is calculated using a normal approximation. @return the91 ///one-sided p-value( if absolute true is used this is equivalent92 ///to the two-sided p-value.)93 ///94 double p_value(void) const;95 96 58 /// Function taking \a value, \a target (+1 or -1) and vector 97 59 /// defining what samples to use. The score is equivalent to … … 135 97 const utility::vector& weight); 136 98 137 ///138 /// Function returning true if target is positive (binary()) for139 /// the sample with ith lowest data value, so i=0 corresponds to140 /// the sample with the lowest data value and i=n()-1 the sample141 /// with highest data value.142 ///143 bool target(const size_t i) const;144 145 99 private: 146 100 147 /// Implemented as in MatLab 13.1148 double get_p_approx(const double) const;149 150 /// Implemented as in MatLab 13.1151 double get_p_exact(const double, const double, const double) const;152 153 double area_;154 u_int minimum_size_;155 u_int nof_pos_;156 std::vector<std::pair<bool, double> > vec_pair_; // class-value-pair157 101 }; 158 159 ///160 /// The output operator for the ROC class. The output is an Nx2161 /// matrix, where the first column is the sensitivity and second162 /// is the specificity.163 ///164 std::ostream& operator<< (std::ostream& s, const ROC&);165 102 166 103 }}} // of namespace statistics, yat, and theplu
Note: See TracChangeset
for help on using the changeset viewer.