Changeset 179
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/Pearson.cc
r149 r179 63 63 r_ = a.correlation(); 64 64 weighted_=false; 65 r_=abs(r_); 65 if (r<0 && absolute_) 66 r_=-r_; 67 66 68 return r_; 67 69 } … … 97 99 weighted_=true; 98 100 99 r_=abs(r_); 101 if (r<0 && absolute_) 102 r_=-r_; 103 100 104 return r_; 101 105 } -
trunk/src/Pearson.h
r149 r179 39 39 40 40 /// 41 /// \f$ \frac{\vert \sum_i(x_i-\bar{x})(y_i-\bar{y})\vert }{\sqrt{\sum_i 42 /// (x_i-\bar{x})^2\sum_i (x_i-\bar{x})^2}}\f$. 43 /// @return absolute value of Pearson correlation. 41 /// \f$ \frac{\vert \sum_i(x_i-\bar{x})(y_i-\bar{y})\vert 42 /// }{\sqrt{\sum_i (x_i-\bar{x})^2\sum_i (x_i-\bar{x})^2}}\f$. 43 /// @return Pearson correlation, if absolute=true absolute value 44 /// of Pearson is used. 44 45 /// 45 46 double score(const gslapi::vector&, const gslapi::vector&, … … 73 74 const std::vector<size_t>& = std::vector<size_t>()); 74 75 76 /// 77 /// @return 1 if data correlates with target, other wise -1 78 /// 79 inline int sign(void) {return (r_>0) ? 1 : -1; } 80 81 75 82 /// 76 83 /// The p-value is the probability of getting a correlation as -
trunk/src/ROC.cc
r159 r179 18 18 19 19 ROC::ROC() 20 : Score(), area_(-1), data_(), minimum_size_(10), nof_pos_(0), target_(),20 : Score(), area_(-1), minimum_size_(10), nof_pos_(0), 21 21 train_set_(std::vector<size_t>()), 22 22 value_(std::vector<std::pair<double, double> >()), … … 93 93 94 94 //Returning score larger 0.5 that you get by random 95 if (area_ >0.5)96 returnarea_;97 else98 return 1.0-area_;95 if (area_<0.5 && absolute_) 96 area_=1.0-area_; 97 98 return area_; 99 99 } 100 100 … … 130 130 } 131 131 area_/=max_area; 132 if (area_>0.5) 133 return area_; 134 else 135 return 1-area_; 132 133 if (area_<0.5 && absolute_) 134 area_=1.0-area_; 135 136 return area_; 136 137 } 137 138 -
trunk/src/ROC.h
r160 r179 36 36 /// Function taking \a value, \a target (+1 or -1) and vector 37 37 /// defining what samples to use. The score is equivalent to 38 /// Mann-Whitney statistics. If target is equal to 1, 39 /// samplebelonges to class + otherwise sample belongs to class40 /// -. @return the area under the ROC 41 /// curve. If the area is less than 0.5, is 1-areareturned.38 /// Mann-Whitney statistics. If target is equal to 1, sample 39 /// belonges to class + otherwise sample belongs to class 40 /// -. @return the area under the ROC curve. If the area is less 41 /// than 0.5 and absolute=true, 1-area is returned. 42 42 /// 43 43 double score(const gslapi::vector& target, const gslapi::vector& value, … … 51 51 /// sample belonges to class + otherwise sample belongs to class 52 52 /// -. @return wheighted version of area under the ROC curve. If 53 /// the area is less than 0.5, is 1-area returned. 53 /// the area is less than 0.5 and absolute=true, 1-area is 54 /// returned. 54 55 /// 55 56 double score(const gslapi::vector& target, const gslapi::vector& value, … … 57 58 const std::vector<size_t>& = std::vector<size_t>()); 58 59 60 59 61 /// 60 ///Calculates the p-value, i.e. the probability of observing an area 61 ///equally or larger if the null hypothesis is true. If P is near zero, 62 ///this casts doubt on this hypothesis. The null hypothesis is that the 63 ///values from the 2 classes are generated from 2 identical 64 ///distributions. The alternative is that the median of the first 65 ///distribution is shifted from the median of the second distribution by a 66 ///non-zero amount. If the smallest group size is larger than minimum_size 67 ///(default = 10), then P is calculated using a normal approximation. 68 /// @return the one-sided p-value 62 ///Calculates the p-value, i.e. the probability of observing an 63 ///area equally or larger if the null hypothesis is true. If P is 64 ///near zero, this casts doubt on this hypothesis. The null 65 ///hypothesis is that the values from the 2 classes are generated 66 ///from 2 identical distributions. The alternative is that the 67 ///median of the first distribution is shifted from the median of 68 ///the second distribution by a non-zero amount. If the smallest 69 ///group size is larger than minimum_size (default = 10), then P 70 ///is calculated using a normal approximation. @return the 71 ///one-sided p-value( if absolute true is used this is equivalent 72 ///to the two-sided p-value.) 69 73 /// 70 double p_value( ) ;74 double p_value(void) ; 71 75 72 76 /// … … 85 89 private: 86 90 double area_; 87 gslapi::vector data_;88 91 u_int minimum_size_; 89 92 u_int nof_pos_; 90 gslapi::vector target_; 91 std::vector<size_t> train_set_; 93 std::vector<size_t> train_set_; 92 94 std::vector<std::pair<double, double> > value_; 93 95 /// pair of target and data. should always be sorted with respect to -
trunk/src/Score.h
r119 r179 19 19 /// Constructor 20 20 /// 21 Score(void) {}; 22 21 Score(bool absolute=true) ; 23 22 24 23 /// … … 27 26 virtual ~Score(void) {}; 28 27 29 virtual double 28 /// 29 /// Function changing mode of Score 30 /// 31 inline void absolute(bool absolute) {absolute_=absolute;} 32 33 virtual double 30 34 score(const gslapi::vector&, 31 35 const gslapi::vector&, … … 40 44 41 45 42 private: 46 protected: 47 bool absolute_; 43 48 gslapi::vector data_; 44 49 gslapi::vector target_; 50 45 51 46 52 }; // class Score -
trunk/src/tScore.cc
r148 r179 14 14 15 15 tScore::tScore() 16 : Score(), t_(0), t arget_(), train_set_(), value_(), weight_()16 : Score(), t_(0), train_set_(), weight_() 17 17 { 18 18 } 19 19 20 20 double tScore::score(const gslapi::vector& target, 21 const gslapi::vector& value,21 const gslapi::vector& data, 22 22 const std::vector<size_t>& train_set) 23 23 { … … 28 28 train_set_=train_set; 29 29 target_ = target; 30 value_ = value;30 data_ = data; 31 31 weight_ = gslapi::vector(target.size(),1); 32 32 Averager positive; … … 34 34 for(size_t i=0; i<train_set_.size(); i++){ 35 35 if (target_[train_set_[i]]==1) 36 positive.add( value_[train_set_[i]]);36 positive.add(data_[train_set_[i]]); 37 37 else 38 negative.add( value_[train_set_[i]]);38 negative.add(data_[train_set_[i]]); 39 39 } 40 40 double diff = positive.mean() - negative.mean(); … … 42 42 /(positive.n()-1+negative.n()-1)); 43 43 t_=diff/s; 44 45 if (t_>0) 46 return t_; 47 else 48 return -t_; 44 if (t_<0 && absolute_) 45 t_=-t_; 46 47 return t_; 49 48 } 50 49 … … 60 59 train_set_=train_set; 61 60 target_ = target; 62 value_ = value;63 61 weight_ = weight; 64 62 WeightedAverager positive; … … 66 64 for(size_t i=0; i<train_set_.size(); i++){ 67 65 if (target_[train_set_[i]]==1) 68 positive.add( value_(train_set_[i]),weight_(train_set_[i]));66 positive.add(data_(train_set_[i]),weight_(train_set_[i])); 69 67 else 70 negative.add( value_(train_set_[i]),weight_(train_set_[i]));68 negative.add(data_(train_set_[i]),weight_(train_set_[i])); 71 69 } 72 70 double diff = positive.mean() - negative.mean(); … … 74 72 (positive.sum_w()+negative.sum_w())); 75 73 t_=diff/s; 76 if (t_ >0)77 returnt_;78 else79 return -t_;74 if (t_<0 && absolute_) 75 t_=-t_; 76 77 return t_; 80 78 } 81 79 -
trunk/src/tScore.h
r148 r179 37 37 38 38 /// 39 /// Calculates the absolute value of t-score, i.e. the ratio 40 /// between difference in mean and standard deviation of this 41 /// difference. /// @return \f$ \frac{ \vert \frac{1}{n_x}\sum x_i 42 /// - \frac{1}{n_y}\sum y_i \vert } {\frac{\sum x_i^2 + \sum 43 /// y_i^2}{n_x-1+n_y-1}} \f$ 39 /// Calculates the value of t-score, i.e. the ratio between 40 /// difference in mean and standard deviation of this 41 /// difference. \f$ \frac{ \vert \frac{1}{n_x}\sum x_i - 42 /// \frac{1}{n_y}\sum y_i \vert } {\frac{\sum x_i^2 + \sum 43 /// y_i^2}{n_x-1+n_y-1}} \f$ @return t-score if absolute=true 44 /// absolute value of t-score is returned 44 45 /// 45 46 double score(const gslapi::vector&, const gslapi::vector&, … … 47 48 48 49 /// 49 /// Weighted version of t-Score 50 /// Weighted version of t-Score @return t-score if absolute=true 51 /// absolute value of t-score is returned 50 52 /// 51 53 double score(const gslapi::vector&, const gslapi::vector&, … … 54 56 55 57 /// 56 ///Calculates the p-value, i.e. the probability of observing a t-score 57 ///equally or larger if the null hypothesis is true. If P is near zero, 58 ///this casts doubt on this hypothesis. The null hypothesis is ... 59 /// @return the one-sided p-value 58 ///Calculates the p-value, i.e. the probability of observing a 59 ///t-score equally or larger if the null hypothesis is true. If P 60 ///is near zero, this casts doubt on this hypothesis. The null 61 ///hypothesis is ... @return the one-sided p-value( if 62 ///absolute=true is used the two-sided p-value) 60 63 /// 61 64 double p_value(); 65 66 62 67 63 68 private:
Note: See TracChangeset
for help on using the changeset viewer.