Changeset 778 for trunk/yat/statistics/ROCscore.cc
- Timestamp:
- Mar 4, 2007, 2:20:01 PM (16 years ago)
- File:
-
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/yat/statistics/ROCscore.cc
r776 r778 22 22 */ 23 23 24 #include "ROC .h"24 #include "ROCscore.h" 25 25 #include "yat/classifier/DataLookupWeighted1D.h" 26 26 #include "yat/classifier/Target.h" 27 27 #include "yat/utility/stl_utility.h" 28 28 #include "yat/utility/vector.h" 29 30 #include <gsl/gsl_cdf.h>31 29 32 30 #include <cmath> … … 38 36 namespace statistics { 39 37 40 ROC::ROC(bool b) 41 : Score(b), area_(0.5), minimum_size_(10), nof_pos_(0) 42 { 43 } 44 45 ROC::~ROC(void) 46 { 47 } 48 49 double ROC::get_p_approx(const double area) const 50 { 51 double x = area - 0.5; 52 // Not integrating from the middle of the bin, but from the inner edge. 53 if (x>0) 54 x -= 0.5/nof_pos_/(n()-nof_pos_); 55 else if(x<0) 56 x += 0.5/nof_pos_/(n()-nof_pos_); 57 58 double sigma = (std::sqrt( (n()-nof_pos_)*nof_pos_* 59 (n()+1.0)/12 ) / 60 ( n() - nof_pos_ ) / nof_pos_ ); 61 double p = gsl_cdf_gaussian_Q(x, sigma); 62 63 return p; 64 } 65 66 double ROC::get_p_exact(const double block, const double nof_pos, 67 const double nof_neg) const 68 { 69 double p; 70 if (block <= 0.0) 71 p = 1.0; 72 else if (block > nof_neg*nof_pos) 73 p = 0.0; 74 else { 75 double p1 = get_p_exact(block-nof_neg, nof_pos-1, nof_neg); 76 double p2 = get_p_exact(block, nof_pos, nof_neg-1); 77 p = nof_pos/(nof_pos+nof_neg)*p1 + nof_neg/(nof_pos+nof_neg)*p2; 78 } 79 return p; 80 } 81 82 u_int& ROC::minimum_size(void) 83 { 84 return minimum_size_; 85 } 86 87 size_t ROC::n(void) const 88 { 89 return vec_pair_.size(); 90 } 91 92 size_t ROC::n_pos(void) const 93 { 94 return nof_pos_; 95 } 96 97 double ROC::p_value(void) const 98 { 99 if (weighted_) 100 return 1.0; 101 else if (nof_pos_ < minimum_size_ & n()-nof_pos_ < minimum_size_) 102 return get_p_exact(area_*nof_pos_*(n()-nof_pos_), 103 nof_pos_, n()-nof_pos_); 104 else 105 return get_p_approx(area_); 106 107 } 108 109 double ROC::score(const classifier::Target& target, 110 const utility::vector& value) 38 double ROCscore::score(const classifier::Target& target, 39 const utility::vector& value) 111 40 { 112 41 assert(target.size()==value.size()); 113 42 weighted_=false; 114 43 115 vec_pair_.clear(); 116 vec_pair_.reserve(target.size());44 std::vector<std::pair<bool, double> > class_value; // class-value-pair 45 class_value.reserve(target.size()); 117 46 for (size_t i=0; i<target.size(); i++) 118 vec_pair_.push_back(std::make_pair(target.binary(i),value(i)));47 class_value.push_back(std::make_pair(target.binary(i),value(i))); 119 48 120 std::sort( vec_pair_.begin(),vec_pair_.end(),49 std::sort(class_value.begin(),class_value.end(), 121 50 utility::pair_value_compare<bool, double>()); 122 area_= 0;123 nof_pos_=0;124 for (size_t i=0; i< n(); i++){125 if ( vec_pair_[i].first){126 area _+=i;127 nof_pos_++;51 double area = 0; 52 u_int nof_pos =0; 53 for (size_t i=0; i<class_value.size(); i++){ 54 if (class_value[i].first){ 55 area+=i; 56 ++nof_pos; 128 57 } 129 58 } 130 59 131 60 // Normalizing the area to [0,1] 132 area _ = ( (area_-nof_pos_*(nof_pos_-1)/2 ) /133 (nof_pos_*(n()-nof_pos_)) );61 area = ( (area-nof_pos*(nof_pos-1)/2 ) / 62 (nof_pos*(class_value.size()-nof_pos)) ); 134 63 135 //Returning score larger 0.5 that you get by random 136 if (area_<0.5 && absolute_) 137 area_=1.0-area_; 138 139 return area_; 64 if (area<0.5 && absolute_) 65 return 1.0-area; 66 return area; 140 67 } 141 68 142 69 143 70 // Peter, should be possible to do this in NlogN 144 double ROC ::score(const classifier::Target& target,71 double ROCscore::score(const classifier::Target& target, 145 72 const classifier::DataLookupWeighted1D& value) 146 73 { 147 weighted_=true;148 74 149 vec_pair_.clear(); 150 vec_pair_.reserve(target.size()); 75 std::vector<std::pair<bool, double> > class_value; // class-value-pair 76 class_value.clear(); 77 class_value.reserve(target.size()); 151 78 for (unsigned int i=0; i<target.size(); i++) 152 79 if (value.weight(i)) 153 vec_pair_.push_back(std::make_pair(target.binary(i),value.data(i)));80 class_value.push_back(std::make_pair(target.binary(i),value.data(i))); 154 81 155 std::sort( vec_pair_.begin(),vec_pair_.end(),82 std::sort(class_value.begin(),class_value.end(), 156 83 utility::pair_value_compare<int, double>()); 157 84 158 area_=0; 159 nof_pos_=0; 85 double area=0; 160 86 double max_area=0; 161 87 … … 165 91 if (!target.binary(j)){ 166 92 if (value.data(i)>value.data(j)) 167 area _+=value.weight(i)*value.weight(j);93 area+=value.weight(i)*value.weight(j); 168 94 max_area+=value.weight(i)*value.weight(j); 169 95 } 170 96 171 area _/=max_area;97 area/=max_area; 172 98 173 if (area_<0.5 && absolute_) 174 area_=1.0-area_; 175 176 return area_; 99 if (area<0.5 && absolute_) 100 return 1.0-area; 101 return area; 177 102 } 178 103 179 104 180 105 // Peter, should be possible to do this in NlogN 181 double ROC ::score(const classifier::Target& target,182 const utility::vector& value,183 const utility::vector& weight)106 double ROCscore::score(const classifier::Target& target, 107 const utility::vector& value, 108 const utility::vector& weight) 184 109 { 185 110 weighted_=true; 186 111 187 vec_pair_.clear(); 188 vec_pair_.reserve(target.size()); 112 113 std::vector<std::pair<bool, double> > class_value; // class-value-pair 114 class_value.reserve(target.size()); 189 115 for (unsigned int i=0; i<target.size(); i++) 190 116 if (weight(i)) 191 vec_pair_.push_back(std::make_pair(target.binary(i),value(i)));117 class_value.push_back(std::make_pair(target.binary(i),value(i))); 192 118 193 std::sort( vec_pair_.begin(),vec_pair_.end(),119 std::sort(class_value.begin(),class_value.end(), 194 120 utility::pair_value_compare<int, double>()); 195 121 196 area_=0; 197 nof_pos_=0; 122 double area=0; 198 123 double max_area=0; 199 124 … … 203 128 if (!target.binary(j)){ 204 129 if (value(i)>value(j)) 205 area _+=weight(i)*weight(j);130 area+=weight(i)*weight(j); 206 131 max_area+=weight(i)*weight(j); 207 132 } 208 133 209 area _/=max_area;134 area/=max_area; 210 135 211 if (area_<0.5 && absolute_) 212 area_=1.0-area_; 213 214 return area_; 215 } 216 217 bool ROC::target(const size_t i) const 218 { 219 return vec_pair_[i].first; 220 } 221 222 std::ostream& operator<<(std::ostream& s, const ROC& r) 223 { 224 s.setf( std::ios::dec ); 225 s.precision(12); 226 double sens = 1; 227 double spec = 0; 228 size_t n = r.n(); 229 double nof_pos = r.n_pos(); 230 for(size_t i=0; i<n-1; ++i) { 231 s << sens << "\t"; 232 s << spec << "\n"; 233 if (r.target(i)) 234 spec -= 1/(n-nof_pos); 235 else 236 sens -= 1/nof_pos; 237 } 238 s << sens << "\t"; 239 s << spec ; 240 return s; 136 if (area<0.5 && absolute_) 137 return 1.0-area; 138 return area; 241 139 } 242 140
Note: See TracChangeset
for help on using the changeset viewer.