Changeset 112
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/ROC.cc
r110 r112 4 4 #include <iostream> 5 5 //#include <algorithm> 6 //#include <utility> 7 //#include <vector> 6 #include <utility> 7 #include <vector> 8 #include <cmath> 8 9 9 10 // Thep C++ Tools … … 15 16 namespace cpptools { 16 17 17 ROC::ROC(const gslapi::vector& target, const gslapi::vector& value) 18 ROC::ROC(const gslapi::vector& target, const gslapi::vector& data, 19 const std::vector<size_t>& train_set) 18 20 19 : Score(), value_(), nof_pos_(0), minimum_size_(10), area_(-1) 21 : Score(), area_(-1), data_(data), minimum_size_(10), nof_pos_(0), 22 target_(target), train_set_(train_set), 23 value_(std::vector<std::pair<double, double> >()) 20 24 21 25 { 22 sort(target, value); 26 if (!train_set_.size()) 27 for (size_t i=0; i<target_.size(); i++) 28 train_set_.push_back(i); 29 sort(); 23 30 } 24 31 25 32 ROC::ROC() 26 : Score(), value_(), nof_pos_(0), minimum_size_(10), area_(-1) 33 : Score(), area_(-1), data_(), minimum_size_(10), nof_pos_(0), target_(), 34 train_set_(std::vector<size_t>()), 35 value_(std::vector<std::pair<double, double> >()) 27 36 28 37 { … … 38 47 x += 0.5/nof_pos_/(value_.size()-nof_pos_); 39 48 40 double sigma = (std::sqrt( (value_.size()-nof_pos_)* nof_pos_*41 (value_.size()+1)/12) /42 ( value_.size() - nof_pos_ ) / nof_pos_);49 double sigma = (std::sqrt( (value_.size()-nof_pos_)*nof_pos_* 50 (value_.size()+1.0)/12 ) / 51 ( value_.size() - nof_pos_ ) / nof_pos_ ); 43 52 double p = gsl_cdf_gaussian_Q(x, sigma); 44 53 … … 78 87 { 79 88 if (area_==-1){ 80 double area_ =0;89 double area_ = 0; 81 90 for (unsigned int i=0; i<value_.size(); i++) 82 91 if (value_[i].first==1) … … 88 97 } 89 98 90 double ROC::score(const gslapi::vector& target, const gslapi::vector& value) 99 double ROC::score(const gslapi::vector& target, const gslapi::vector& data, 100 const std::vector<size_t>& train_set) 91 101 { 92 sort(target, value); 93 double area_=0; 94 for (unsigned int i=0; i<value_.size(); i++) 95 if (value_[i].first==1) 96 area_+=i; 97 // Normalizing the area to 0-1 98 area_ = (area_/nof_pos_ - (nof_pos_ - 1)/2 )/(value_.size() - nof_pos_); 99 102 target_ = target; 103 data_ = data; 104 if (!train_set.size()){ 105 train_set_.resize(0); 106 for (size_t i=0; i<target_.size(); i++) 107 train_set_.push_back(i); 108 } 109 else 110 train_set_ = train_set; 111 sort(); 112 area_ = score(); 100 113 return area_; 101 114 } 102 115 103 void ROC::sort( const gslapi::vector& target, const gslapi::vector& value)116 void ROC::sort() 104 117 { 105 for (unsigned int i=0; i<target.size(); i++){106 int targ=static_cast<int>(target(i));107 std::pair< int, double> tmp(targ, value(i));118 value_.resize(0); 119 for (unsigned int i=0; i<train_set_.size(); i++){ 120 std::pair<double, double> tmp(target_(train_set_[i]), data_(train_set_[i])); 108 121 value_.push_back(tmp); 109 if (targ ==1)122 if (target_(train_set_[i])==1) 110 123 nof_pos_++; 111 124 } 112 125 std::sort(value_.begin(),value_.end(), 113 pair_value_compare< int,double>());126 pair_value_compare<double, double>()); 114 127 } 115 128 -
trunk/src/ROC.h
r103 r112 28 28 /// Default constructor 29 29 /// 30 ROC( void);30 ROC(); 31 31 32 32 /// 33 /// Constructor taking a value vector and a target vector (+1 or -1). 33 /// Constructor taking a value vector, a target vector (+1 or -1) 34 /// and a vector defining what samples to use. 34 35 /// 35 ROC(const gslapi::vector&, const gslapi::vector&); 36 ROC(const gslapi::vector&, const gslapi::vector&, 37 const std::vector<size_t>& = std::vector<size_t>()); 36 38 37 39 /// … … 50 52 /// the ROC curve 51 53 /// 52 double score(const gslapi::vector&, const gslapi::vector&); 54 double score(const gslapi::vector&, const gslapi::vector&, 55 const std::vector<size_t>& = std::vector<size_t>()); 53 56 54 57 /// … … 66 69 67 70 /// 68 /// @ return a vector of outputs that is sorted with respect to69 /// the corresponding score value71 /// @return the targets in train_set sorted with respect to the 72 /// corresponding data 70 73 /// 71 74 gslapi::vector ROC::target(void) const; … … 75 78 /// approximation is used for the p-value calculation. 76 79 /// 77 inline void minimum_size(const u_int minimum_size) {minimum_size_ = minimum_size; } 80 inline void minimum_size(const u_int minimum_size) 81 {minimum_size_ = minimum_size; } 78 82 79 83 private: 80 std::vector<std::pair<int, double> > value_; //sorted pair of id and value 81 double nof_pos_;84 double area_; 85 gslapi::vector data_; 82 86 u_int minimum_size_; 83 double area_; 87 u_int nof_pos_; 88 gslapi::vector target_; 89 std::vector<size_t> train_set_; 90 /// pair of target and data. should always be sorted with respect to 91 /// data. 92 std::vector<std::pair<double, double> > value_; 93 84 94 /// 85 95 /// … … 96 106 97 107 /// 98 /// sorting 108 /// sorting value_, should always be done when changing train_set_ 99 109 /// 100 void ROC::sort( const gslapi::vector&, const gslapi::vector&);110 void ROC::sort(); 101 111 102 112 }; -
trunk/src/Score.h
r102 r112 27 27 virtual ~Score(void) {}; 28 28 29 virtual double score(const gslapi::vector&, 30 const gslapi::vector&) = 0; 29 virtual double 30 score(const gslapi::vector&, 31 const gslapi::vector&, 32 const std::vector<size_t>& = std::vector<size_t>()) = 0; 31 33 32 34 virtual double p_value() = 0; -
trunk/src/tScore.cc
r102 r112 13 13 14 14 tScore::tScore() 15 : Score(), value_(), target_()15 : Score(), value_(), target_(), train_set_() 16 16 { 17 17 } 18 18 19 19 tScore::tScore( const gslapi::vector& target, 20 const gslapi::vector& value) 20 const gslapi::vector& value, 21 const std::vector<size_t>& train_set) 21 22 : Score(), value_(value), target_(target) 22 23 { 24 if (!train_set_.size()) 25 for (size_t i=0; i<target_.size(); i++) 26 train_set_.push_back(i); 23 27 } 24 28 … … 27 31 Averager positive; 28 32 Averager negative; 29 for(size_t i=0; i<t arget_.size(); i++){30 if (target_[ i]==1)31 positive.add(value_[ i]);33 for(size_t i=0; i<train_set_.size(); i++){ 34 if (target_[train_set_[i]]==1) 35 positive.add(value_[train_set_[i]]); 32 36 else 33 negative.add(value_[ i]);37 negative.add(value_[train_set_[i]]); 34 38 } 35 39 double diff = positive.mean() - negative.mean(); … … 39 43 } 40 44 45 double tScore::score(const gslapi::vector& target, 46 const gslapi::vector& value, 47 const std::vector<size_t>& train_set) 48 { 49 train_set_=train_set; 50 if (!train_set_.size()) 51 for (size_t i=0; i<target_.size(); i++) 52 train_set_.push_back(i); 53 target_ = target; 54 value_ = value; 55 return score(); 56 } 41 57 42 58 double tScore::p_value(void) -
trunk/src/tScore.h
r102 r112 33 33 /// Constructor taking a value vector and a target vector (+1 or -1). 34 34 /// 35 tScore(const gslapi::vector&, const gslapi::vector&); 35 tScore(const gslapi::vector&, const gslapi::vector&, 36 const std::vector<size_t>& = std::vector<size_t>()); 36 37 37 38 /// … … 49 50 double score(); 50 51 52 /// 53 /// Calculates the t-score, i.e. the ratio between difference in 54 /// mean and standard deviation of this difference. 55 /// @return \f$ \frac{\frac{1}{n_x}\sum x_i - \frac{1}{n_y}\sum y_i} 56 /// {\frac{\sum x_i^2 + \sum y_i^2}{n_x-1+n_y-1}} \f$ 57 /// 58 double score(const gslapi::vector&, const gslapi::vector&, 59 const std::vector<size_t>& = std::vector<size_t>()); 60 51 61 /// 52 62 ///Calculates the p-value, i.e. the probability of observing a t-score … … 60 70 gslapi::vector value_; 61 71 gslapi::vector target_; 62 72 std::vector<size_t> train_set_; 63 73 64 74 };
Note: See TracChangeset
for help on using the changeset viewer.