Changeset 1701 for trunk/yat/statistics
- Timestamp:
- Jan 7, 2009, 11:53:05 PM (14 years ago)
- Location:
- trunk/yat/statistics
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/yat/statistics/KolmogorovSmirnov.cc
r1687 r1701 2 2 3 3 /* 4 Copyright (C) 2008 Peter Johansson4 Copyright (C) 2008, 2009 Peter Johansson 5 5 6 6 This file is part of the yat library, http://dev.thep.lu.se/yat … … 19 19 along with yat. If not, see <http://www.gnu.org/licenses/>. 20 20 */ 21 22 #include <config.h> 21 23 22 24 #include "KolmogorovSmirnov.h" … … 47 49 return; 48 50 49 data_.insert( std::make_pair(std::make_pair(x,w),target));51 data_.insert(Element(x,target,w)); 50 52 if (target){ 51 53 sum_w1_+=w; … … 80 82 { 81 83 size_t count=0; 82 std::deque<bool> targets;83 typedef data_w::const_iterator const_iter;84 for (const_iter i(data_.begin()); i!=data_.end(); ++i)85 targets.push_back(i->second);86 87 84 double score_threshold = score()-10*std::numeric_limits<double>().epsilon(); 85 KolmogorovSmirnov ks(*this); 88 86 89 87 for (size_t i=0; i<perm; ++i){ 90 random::random_shuffle(targets.begin(), targets.end()); 91 KolmogorovSmirnov ks; 92 std::deque<bool>::const_iterator target_i(targets.begin()); 93 const_iter end(data_.end()); 94 for (const_iter iter(data_.begin()); iter!=end; ++iter, ++target_i) 95 ks.add(iter->first.first, *target_i, iter->first.second); 96 88 ks.shuffle(); 97 89 if (ks.score()>=score_threshold) 98 90 ++count; … … 117 109 118 110 111 void KolmogorovSmirnov::scores(std::vector<double>& res) const 112 { 113 res.clear(); 114 res.reserve(data_.size()); 115 data_w::const_iterator iter(data_.begin()); 116 double f1=0; 117 double f2=0; 118 while(iter!=data_.end()){ 119 size_t count=0; 120 double value = iter->value; 121 while (iter!=data_.end() && iter->value==value) { 122 if (iter->label) { 123 f1 += iter->weight; 124 } 125 else { 126 f2 += iter->weight; 127 } 128 ++count; 129 ++iter; 130 } 131 res.resize(res.size()+count, f1/sum_w1_-f2/sum_w2_); 132 } 133 } 134 135 136 void KolmogorovSmirnov::shuffle(void) 137 { 138 std::deque<bool> labels; 139 for (data_w::const_iterator iter=data_.begin(); 140 iter!=data_.end(); ++iter) { 141 labels.push_back(iter->label); 142 } 143 random::random_shuffle(labels.begin(), labels.end()); 144 std::deque<bool>::const_iterator label = labels.begin(); 145 for (data_w::iterator iter=data_.begin(); 146 iter!=data_.end(); ++iter, ++label) { 147 // label does not affect sorting of set, so modifying label is safe 148 #ifdef MULTISET_ITERATOR_IS_MUTABLE 149 (*iter).label = *label; 150 #else 151 // standard requires that multiset::iterator is modifiable, but 152 // some implementations do not fulfill the requirement 153 const_cast<Element&>(*iter).label = *label; 154 #endif 155 } 156 sum_w1_ = 0; 157 sum_w2_ = 0; 158 add_sum_w(data_.begin(), data_.end()); 159 cached_ = false; 160 } 161 162 119 163 double KolmogorovSmirnov::signed_score(void) const 120 164 { … … 136 180 137 181 138 void KolmogorovSmirnov::scores(std::vector<double>& res) const139 {140 res.clear();141 res.reserve(data_.size());142 data_w::const_iterator iter(data_.begin());143 double f1=0;144 double f2=0;145 while(iter!=data_.end()){146 size_t count=0;147 double value = iter->first.first;148 while (iter!=data_.end() && iter->first.first==value) {149 if (iter->second) {150 f1 += iter->first.second;151 }152 else {153 f2 += iter->first.second;154 }155 ++count;156 ++iter;157 }158 res.resize(res.size()+count, f1/sum_w1_-f2/sum_w2_);159 }160 }161 162 163 182 std::ostream& operator<<(std::ostream& os, const KolmogorovSmirnov& ks) 164 183 { … … 171 190 172 191 192 bool KolmogorovSmirnov::Element::operator< 193 (const KolmogorovSmirnov::Element rhs) const 194 { 195 return value<rhs.value; 196 } 197 173 198 }}} // of namespace theplu yat statistics -
trunk/yat/statistics/KolmogorovSmirnov.h
r1677 r1701 5 5 6 6 /* 7 Copyright (C) 2008 Peter Johansson7 Copyright (C) 2008, 2009 Peter Johansson 8 8 9 9 This file is part of the yat library, http://dev.thep.lu.se/yat … … 36 36 { 37 37 public: 38 /** 39 struct used to store data in KolmogorovSmirnov. 40 41 This struct is public to make usage of the add range function 42 more convenient. 43 44 \since New in yat 0.5 45 */ 46 struct Element 47 { 48 /** 49 \brief default constructor 50 */ 51 inline Element(void) {}; 52 53 /** 54 \brief Constructor 55 */ 56 inline Element(double x, bool class_label, double w=1.0) 57 : value(x), label(class_label), weight(w) {}; 58 59 /** 60 \brief data value 61 */ 62 double value; 63 64 /** 65 bool telling which group the data point belong to 66 */ 67 bool label; 68 69 /** 70 weight for the data point 71 */ 72 double weight; 73 74 /** 75 \return true if value is less than rhs.value 76 */ 77 bool operator<(const Element rhs) const; 78 }; 79 38 80 /** 39 81 \brief Constructor … … 45 87 */ 46 88 void add(double value, bool class_label, double weight=1.0); 89 90 /** 91 \brief add a range 92 93 value_type of \a ForwardIterator must be convertible to 94 KolmogorovSmirnov::Element 95 96 Insertion takes typically N*log(N). However, if range is 97 sorted, insertion takes linear time. A common use case of this 98 function is when calculating several KS statistics on a data 99 set (values and weights) with different class labels. 100 101 \since New in yat 0.5 102 */ 103 template <typename ForwardIterator> 104 void add(ForwardIterator first, ForwardIterator last); 47 105 48 106 /** … … 69 127 and calculate how often a score equal or larger than score() is 70 128 obtained. 129 130 \see shuffle 71 131 */ 72 132 double p_value(size_t perm) const; … … 86 146 87 147 /** 148 \brief shuffle class labels 149 150 This is equivalent to reset and re-add values with shuffled 151 class labels. 152 153 \since New in yat 0.5 154 */ 155 void shuffle(void); 156 157 /** 88 158 Same as score() but keeping the sign, in other words, 89 159 abs(signed_score())==score() … … 95 165 private: 96 166 void scores(std::vector<double>&) const; 97 167 // add weights to sum_w1 and sum_w2 respectively depending on 168 // label in element. 169 template <typename ForwardIterator> 170 void add_sum_w(ForwardIterator first, ForwardIterator last); 171 98 172 mutable bool cached_; 99 173 mutable double score_; 100 typedef std::pair<std::pair<double,double>, bool> trip; 101 typedef std::multiset<trip> data_w; 174 typedef std::multiset<Element> data_w; 102 175 data_w data_; 103 176 double sum_w1_; … … 106 179 friend std::ostream& operator<<(std::ostream&, const KolmogorovSmirnov&); 107 180 108 // Copy not allowed109 KolmogorovSmirnov(const KolmogorovSmirnov&);110 KolmogorovSmirnov& operator=(const KolmogorovSmirnov&);181 // using compiler generated copy and assignment 182 //KolmogorovSmirnov(const KolmogorovSmirnov&); 183 //KolmogorovSmirnov& operator=(const KolmogorovSmirnov&); 111 184 }; 112 185 … … 117 190 118 191 192 // template implementations 193 194 template <typename ForwardIterator> 195 void KolmogorovSmirnov::add(ForwardIterator first, ForwardIterator last) 196 { 197 ForwardIterator iter(first); 198 typename data_w::const_iterator hint(data_.begin()); 199 for ( ; iter!=last; ++iter) 200 if (iter->weight) // ignore data points with zero weight 201 hint = data_.insert(hint, *iter); 202 add_sum_w(first, last); 203 cached_=false; 204 } 205 206 207 template <typename ForwardIterator> 208 void KolmogorovSmirnov::add_sum_w(ForwardIterator first, 209 ForwardIterator last) 210 { 211 while (first!=last) { 212 if (first->label) 213 sum_w1_ += first->weight; 214 else 215 sum_w2_ += first->weight; 216 ++first; 217 } 218 } 219 119 220 }}} // of namespace theplu yat statistics 120 221
Note: See TracChangeset
for help on using the changeset viewer.