Changeset 119
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/ROC.cc
r112 r119 16 16 namespace cpptools { 17 17 18 ROC::ROC(const gslapi::vector& target, const gslapi::vector& data,19 const std::vector<size_t>& train_set)20 21 : Score(), area_(-1), data_(data), minimum_size_(10), nof_pos_(0),22 target_(target), train_set_(train_set),23 value_(std::vector<std::pair<double, double> >())24 25 {26 if (!train_set_.size())27 for (size_t i=0; i<target_.size(); i++)28 train_set_.push_back(i);29 sort();30 }31 32 18 ROC::ROC() 33 19 : Score(), area_(-1), data_(), minimum_size_(10), nof_pos_(0), target_(), 34 20 train_set_(std::vector<size_t>()), 35 value_(std::vector<std::pair<double, double> >()) 21 value_(std::vector<std::pair<double, double> >()), 22 weight_(gslapi::vector()) 36 23 37 24 { … … 51 38 ( value_.size() - nof_pos_ ) / nof_pos_ ); 52 39 double p = gsl_cdf_gaussian_Q(x, sigma); 53 40 54 41 return p; 55 42 } … … 73 60 double ROC::p_value(void) 74 61 { 75 if (area_==-1)76 area_ = score();77 62 double p; 78 63 if (nof_pos_ < minimum_size_ & value_.size()-nof_pos_ < minimum_size_) … … 80 65 nof_pos_, value_.size()-nof_pos_); 81 66 else 82 p = get_p_approx(area_);67 p = get_p_approx(area_); 83 68 return p; 84 }85 86 double ROC::score()87 {88 if (area_==-1){89 double area_ = 0;90 for (unsigned int i=0; i<value_.size(); i++)91 if (value_[i].first==1)92 area_+=i;93 // Normalizing the area to 0-194 area_ = (area_/nof_pos_ - (nof_pos_ - 1)/2 )/(value_.size() - nof_pos_);95 }96 return area_;97 69 } 98 70 … … 110 82 train_set_ = train_set; 111 83 sort(); 112 area_ = score(); 84 area_ = 0; 85 for (size_t i=0; i<value_.size(); i++) 86 if (value_[i].first==1) 87 area_+=i; 88 // Normalizing the area to 0-1 89 area_ = (area_/nof_pos_-(nof_pos_-1)/2 )/(value_.size()-nof_pos_); 90 return area_; 91 } 92 93 double ROC::score(const gslapi::vector& target, const gslapi::vector& data, 94 const gslapi::vector& weight, 95 const std::vector<size_t>& train_set) 96 { 97 target_ = target; 98 data_ = data; 99 weight_=weight; 100 if (!train_set.size()){ 101 train_set_.resize(0); 102 for (size_t i=0; i<target_.size(); i++) 103 train_set_.push_back(i); 104 } 105 else 106 train_set_ = train_set; 107 sort(); 108 area_=0; 109 double max_area=0; 110 //Peter, use the sort to skip some ifs and loops 111 for (size_t i=0; i<value_.size(); i++){ 112 if (target_(train_set_[i])==1){ 113 for (size_t j=0; j<value_.size(); j++){ 114 if (target_(train_set_[j])==-1){ 115 if (data_(train_set_[i]) > data_(train_set_[j])){ 116 area_+=weight_(train_set_[i])*weight_(train_set_[j]); 117 } 118 max_area+=weight_(train_set_[i])*weight_(train_set_[j]); 119 } 120 } 121 } 122 } 123 area_/=max_area; 113 124 return area_; 114 125 } -
trunk/src/ROC.h
r112 r119 31 31 32 32 /// 33 /// Constructor taking a value vector, a target vector (+1 or -1)34 /// and a vector defining what samples to use.35 ///36 ROC(const gslapi::vector&, const gslapi::vector&,37 const std::vector<size_t>& = std::vector<size_t>());38 39 ///40 33 /// Destructor 41 34 /// 42 35 virtual ~ROC(void) {}; 43 36 44 /// Equivalent to the Mann-Whitney score, but normalized to be 45 /// between zero and one. @return the area under the ROC curve 37 /// Function taking \a value, target (+1 or -1) and vector 38 /// defining what samples to use. The score is equivalent to the 39 /// Mann-Whitney score but normalized to be between zero and 40 /// one. @return the area under the ROC curve 46 41 /// 47 double score() ; 42 double score(const gslapi::vector& value, const gslapi::vector& target, 43 const std::vector<size_t>& = std::vector<size_t>()); 48 44 49 /// Function taking a vector of values and a vector of target (+1 50 /// or -1). The score is equivalent to the Mann-Whitney score but 51 /// normalized to be between zero and one. @return the area under 52 /// the ROC curve 45 /// Function taking values, target, weight and a vector defining 46 /// what samples to use. The area is defines as \f$ \frac{\sum 47 /// w^+w^-}{\sum w^+w^-}\f$, where the sum in the numerator goes 48 /// over all pairs where value+ is larger than value-. The 49 /// denominator goes over all pairs. @return wheighted version of 50 /// area under the ROC curve 53 51 /// 54 double score(const gslapi::vector&, const gslapi::vector&, 52 double score(const gslapi::vector&, const gslapi::vector&, 53 const gslapi::vector&, 55 54 const std::vector<size_t>& = std::vector<size_t>()); 56 55 … … 88 87 gslapi::vector target_; 89 88 std::vector<size_t> train_set_; 89 std::vector<std::pair<double, double> > value_; 90 90 /// pair of target and data. should always be sorted with respect to 91 91 /// data. 92 std::vector<std::pair<double, double> > value_; 93 92 gslapi::vector weight_; 93 94 94 /// 95 95 /// -
trunk/src/Score.h
r112 r119 32 32 const std::vector<size_t>& = std::vector<size_t>()) = 0; 33 33 34 virtual double p_value() = 0; 34 virtual double 35 score(const gslapi::vector&, 36 const gslapi::vector&, 37 const gslapi::vector&, 38 const std::vector<size_t>& = std::vector<size_t>()) = 0; 39 40 35 41 36 42 private: -
trunk/src/tScore.cc
r112 r119 6 6 // Thep C++ Tools 7 7 #include "tScore.h" 8 #include "Averager.h" 8 9 #include "vector.h" 9 #include " Averager.h"10 #include "WeightedAverager.h" 10 11 11 12 namespace theplu { … … 13 14 14 15 tScore::tScore() 15 : Score(), value_(), target_(), train_set_()16 : Score(), t_(0), target_(), train_set_(), value_(), weight_() 16 17 { 17 18 } 18 19 19 tScore::tScore( const gslapi::vector& target, 20 const gslapi::vector& value, 21 const std::vector<size_t>& train_set) 22 : Score(), value_(value), target_(target) 20 double tScore::score(const gslapi::vector& target, 21 const gslapi::vector& value, 22 const std::vector<size_t>& train_set) 23 23 { 24 24 if (!train_set_.size()) 25 25 for (size_t i=0; i<target_.size(); i++) 26 26 train_set_.push_back(i); 27 } 28 29 double tScore::score() 30 { 27 else 28 train_set_=train_set; 29 target_ = target; 30 value_ = value; 31 weight_ = gslapi::vector(target.size(),1); 31 32 Averager positive; 32 33 Averager negative; … … 40 41 double s=sqrt((positive.sum_xsqr()+negative.sum_xsqr()) 41 42 /(positive.n()-1+negative.n()-1)); 42 return diff/s; 43 t_=diff/s; 44 return t_; 43 45 } 44 46 45 47 double tScore::score(const gslapi::vector& target, 46 48 const gslapi::vector& value, 49 const gslapi::vector& weight, 47 50 const std::vector<size_t>& train_set) 48 51 { 49 train_set_=train_set;50 52 if (!train_set_.size()) 51 53 for (size_t i=0; i<target_.size(); i++) 52 54 train_set_.push_back(i); 55 else 56 train_set_=train_set; 53 57 target_ = target; 54 58 value_ = value; 55 return score(); 59 weight_ = weight; 60 WeightedAverager positive; 61 WeightedAverager negative; 62 for(size_t i=0; i<train_set_.size(); i++){ 63 if (target_[train_set_[i]]==1) 64 positive.add(value_(train_set_[i]),weight_(train_set_[i])); 65 else 66 negative.add(value_(train_set_[i]),weight_(train_set_[i])); 67 } 68 double diff = positive.mean() - negative.mean(); 69 double s=sqrt((positive.squared_sum()+negative.squared_sum())/ 70 (positive.sum_w()+negative.sum_w())); 71 t_=diff/s; 72 return t_; 56 73 } 57 74 58 75 double tScore::p_value(void) 59 76 { 60 double t = score();61 77 double dof = target_.size()-2; 62 double p = gsl_cdf_tdist_Q(t , dof);78 double p = gsl_cdf_tdist_Q(t_, dof); 63 79 return dof > 0 ? p : 1; 64 80 } -
trunk/src/tScore.h
r112 r119 31 31 32 32 /// 33 /// Constructor taking a value vector and a target vector (+1 or -1).34 ///35 tScore(const gslapi::vector&, const gslapi::vector&,36 const std::vector<size_t>& = std::vector<size_t>());37 38 ///39 33 /// Destructor 40 34 /// … … 48 42 /// {\frac{\sum x_i^2 + \sum y_i^2}{n_x-1+n_y-1}} \f$ 49 43 /// 50 double score(); 51 44 double score(const gslapi::vector&, const gslapi::vector&, 45 const std::vector<size_t>& = std::vector<size_t>()); 46 52 47 /// 53 /// Calculates the t-score, i.e. the ratio between difference in 54 /// mean and standard deviation of this difference. 55 /// @return \f$ \frac{\frac{1}{n_x}\sum x_i - \frac{1}{n_y}\sum y_i} 56 /// {\frac{\sum x_i^2 + \sum y_i^2}{n_x-1+n_y-1}} \f$ 48 /// Weighted version of t-Score 57 49 /// 58 50 double score(const gslapi::vector&, const gslapi::vector&, 51 const gslapi::vector&, 59 52 const std::vector<size_t>& = std::vector<size_t>()); 60 53 … … 68 61 69 62 private: 70 gslapi::vector value_;63 double t_; 71 64 gslapi::vector target_; 72 65 std::vector<size_t> train_set_; 66 gslapi::vector value_; 67 gslapi::vector weight_; 73 68 74 69 };
Note: See TracChangeset
for help on using the changeset viewer.