Changeset 1701
- Timestamp:
- Jan 7, 2009, 11:53:05 PM (15 years ago)
- Location:
- trunk
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/NEWS
r1651 r1701 5 5 Version 0.5 (released DATE) 6 6 7 - Copy and assignment is now allowed for KolmogorovSmirnov 7 8 - Imlicit conversion between Vector and its Views disabled (ticket:467) 8 9 - Averager::add (and siblings) take a (signed) long (ticket:361) … … 73 74 {{{ 74 75 Copyright (C) 2003, 2006 Jari Häkkinen 75 Copyright (C) 2007 Jari Häkkinen, Peter Johansson76 Copyright (C) 200 8 Jari Häkkinen,Peter Johansson76 Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson 77 Copyright (C) 2009 Peter Johansson 77 78 78 79 This file is part of yat library, http://dev.thep.lu.se/yat -
trunk/configure.ac
r1693 r1701 274 274 AC_MSG_CHECKING([g++ deprecation attribute]) 275 275 AC_TRY_COMPILE([void f() __attribute__ ((deprecated));], [], 276 AC_DEFINE([YAT_HAVE_GCC_DEPRECATED], [1],276 [AC_DEFINE([YAT_HAVE_GCC_DEPRECATED], [1], 277 277 [Define if compiler supports deprecated attribute, as in g++ 4.0]) 278 AC_MSG_RESULT([yes]), 279 AC_MSG_RESULT([no]) ) 278 AC_MSG_RESULT([yes])], 279 [AC_MSG_RESULT([no])] ) 280 281 AC_MSG_CHECKING([if std::multiset::iterator is mutable]) 282 AC_TRY_COMPILE([@%:@include <set> 283 @%:@include <vector> 284 ], 285 [std::set<std::vector<double> > s; 286 std::vector<double> v; 287 s.insert(v); 288 s.begin()->reserve(100); 289 ], 290 [AC_DEFINE([MULTISET_ITERATOR_IS_MUTABLE], [1], 291 [Define if std::multiset::iterator is mutable]) 292 AC_MSG_RESULT([yes]) 293 ], 294 [AC_MSG_RESULT([no])] ) 280 295 281 296 -
trunk/test/kolmogorov_smirnov_test.cc
r1688 r1701 2 2 3 3 /* 4 Copyright (C) 2008 Peter Johansson4 Copyright (C) 2008, 2009 Peter Johansson 5 5 6 6 This file is part of the yat library, http://dev.thep.lu.se/yat … … 22 22 #include "Suite.h" 23 23 24 #include "yat/random/random.h" 24 25 #include "yat/statistics/Averager.h" 25 26 #include "yat/statistics/KolmogorovSmirnov.h" 26 #include "yat/random/random.h"27 27 28 28 #include <cmath> 29 #include <deque> 29 30 #include <iostream> 31 #include <vector> 30 32 31 33 using namespace theplu::yat; … … 34 36 void test_two_sample(test::Suite&); 35 37 void test_p_value(test::Suite&); 38 void test_shuffle(test::Suite&); 39 void test_range(test::Suite&); 36 40 void test_reset(test::Suite&); 37 41 void test_ties(test::Suite&); … … 44 48 test_two_sample(suite); 45 49 test_p_value(suite); 50 test_shuffle(suite); 51 test_range(suite); 46 52 test_reset(suite); 47 53 test_ties(suite); … … 136 142 } 137 143 144 145 void test_range(test::Suite& suite) 146 { 147 suite.err() << "testing range" << std::endl; 148 std::deque<bool> labels; 149 for (size_t i=0; i<10; ++i) { 150 labels.push_back(i<5); 151 } 152 std::vector<statistics::KolmogorovSmirnov::Element> data; 153 statistics::KolmogorovSmirnov::Element elem; 154 elem.weight = 1.0; 155 for (size_t i=0; i<10; ++i) { 156 elem.value = i; 157 elem.label = labels[i]; 158 data.push_back(elem); 159 } 160 statistics::KolmogorovSmirnov ks; 161 ks.add(data.begin(), data.end()); 162 suite.add(suite.equal(ks.score(), 1.0)); 163 164 // testing that adding a range gives same result as adding elements 165 // sequentially 166 statistics::KolmogorovSmirnov ks2; 167 for (size_t i=0; i<data.size(); ++i) 168 ks2.add(data[i].value, data[i].label, data[i].weight); 169 suite.add(suite.equal(ks2.signed_score(), ks.signed_score())); 170 171 theplu::yat::random::random_shuffle(labels.begin(), labels.end()); 172 for (size_t i=0; i<data.size(); ++i) { 173 data[i].label = labels[i]; 174 } 175 ks.reset(); 176 ks.add(data.begin(), data.end()); 177 ks2.reset(); 178 for (size_t i=0; i<data.size(); ++i) 179 ks2.add(data[i].value, data[i].label, data[i].weight); 180 suite.add(suite.equal(ks2.signed_score(), ks.signed_score())); 181 } 182 183 138 184 void test_reset(test::Suite& suite) 139 185 { … … 163 209 suite.add(suite.equal(ks.score(), 1.0-0.2)); 164 210 } 211 212 213 void test_shuffle(test::Suite& suite) 214 { 215 suite.err() << "testing shuffle" << std::endl; 216 statistics::KolmogorovSmirnov ks; 217 for (size_t i=0; i<10; ++i) 218 ks.add(i, i<5); 219 ks.shuffle(); 220 } 221 222 -
trunk/yat/statistics/KolmogorovSmirnov.cc
r1687 r1701 2 2 3 3 /* 4 Copyright (C) 2008 Peter Johansson4 Copyright (C) 2008, 2009 Peter Johansson 5 5 6 6 This file is part of the yat library, http://dev.thep.lu.se/yat … … 19 19 along with yat. If not, see <http://www.gnu.org/licenses/>. 20 20 */ 21 22 #include <config.h> 21 23 22 24 #include "KolmogorovSmirnov.h" … … 47 49 return; 48 50 49 data_.insert( std::make_pair(std::make_pair(x,w),target));51 data_.insert(Element(x,target,w)); 50 52 if (target){ 51 53 sum_w1_+=w; … … 80 82 { 81 83 size_t count=0; 82 std::deque<bool> targets;83 typedef data_w::const_iterator const_iter;84 for (const_iter i(data_.begin()); i!=data_.end(); ++i)85 targets.push_back(i->second);86 87 84 double score_threshold = score()-10*std::numeric_limits<double>().epsilon(); 85 KolmogorovSmirnov ks(*this); 88 86 89 87 for (size_t i=0; i<perm; ++i){ 90 random::random_shuffle(targets.begin(), targets.end()); 91 KolmogorovSmirnov ks; 92 std::deque<bool>::const_iterator target_i(targets.begin()); 93 const_iter end(data_.end()); 94 for (const_iter iter(data_.begin()); iter!=end; ++iter, ++target_i) 95 ks.add(iter->first.first, *target_i, iter->first.second); 96 88 ks.shuffle(); 97 89 if (ks.score()>=score_threshold) 98 90 ++count; … … 117 109 118 110 111 void KolmogorovSmirnov::scores(std::vector<double>& res) const 112 { 113 res.clear(); 114 res.reserve(data_.size()); 115 data_w::const_iterator iter(data_.begin()); 116 double f1=0; 117 double f2=0; 118 while(iter!=data_.end()){ 119 size_t count=0; 120 double value = iter->value; 121 while (iter!=data_.end() && iter->value==value) { 122 if (iter->label) { 123 f1 += iter->weight; 124 } 125 else { 126 f2 += iter->weight; 127 } 128 ++count; 129 ++iter; 130 } 131 res.resize(res.size()+count, f1/sum_w1_-f2/sum_w2_); 132 } 133 } 134 135 136 void KolmogorovSmirnov::shuffle(void) 137 { 138 std::deque<bool> labels; 139 for (data_w::const_iterator iter=data_.begin(); 140 iter!=data_.end(); ++iter) { 141 labels.push_back(iter->label); 142 } 143 random::random_shuffle(labels.begin(), labels.end()); 144 std::deque<bool>::const_iterator label = labels.begin(); 145 for (data_w::iterator iter=data_.begin(); 146 iter!=data_.end(); ++iter, ++label) { 147 // label does not affect sorting of set, so modifying label is safe 148 #ifdef MULTISET_ITERATOR_IS_MUTABLE 149 (*iter).label = *label; 150 #else 151 // standard requires that multiset::iterator is modifiable, but 152 // some implementations do not fulfill the requirement 153 const_cast<Element&>(*iter).label = *label; 154 #endif 155 } 156 sum_w1_ = 0; 157 sum_w2_ = 0; 158 add_sum_w(data_.begin(), data_.end()); 159 cached_ = false; 160 } 161 162 119 163 double KolmogorovSmirnov::signed_score(void) const 120 164 { … … 136 180 137 181 138 void KolmogorovSmirnov::scores(std::vector<double>& res) const139 {140 res.clear();141 res.reserve(data_.size());142 data_w::const_iterator iter(data_.begin());143 double f1=0;144 double f2=0;145 while(iter!=data_.end()){146 size_t count=0;147 double value = iter->first.first;148 while (iter!=data_.end() && iter->first.first==value) {149 if (iter->second) {150 f1 += iter->first.second;151 }152 else {153 f2 += iter->first.second;154 }155 ++count;156 ++iter;157 }158 res.resize(res.size()+count, f1/sum_w1_-f2/sum_w2_);159 }160 }161 162 163 182 std::ostream& operator<<(std::ostream& os, const KolmogorovSmirnov& ks) 164 183 { … … 171 190 172 191 192 bool KolmogorovSmirnov::Element::operator< 193 (const KolmogorovSmirnov::Element rhs) const 194 { 195 return value<rhs.value; 196 } 197 173 198 }}} // of namespace theplu yat statistics -
trunk/yat/statistics/KolmogorovSmirnov.h
r1677 r1701 5 5 6 6 /* 7 Copyright (C) 2008 Peter Johansson7 Copyright (C) 2008, 2009 Peter Johansson 8 8 9 9 This file is part of the yat library, http://dev.thep.lu.se/yat … … 36 36 { 37 37 public: 38 /** 39 struct used to store data in KolmogorovSmirnov. 40 41 This struct is public to make usage of the add range function 42 more convenient. 43 44 \since New in yat 0.5 45 */ 46 struct Element 47 { 48 /** 49 \brief default constructor 50 */ 51 inline Element(void) {}; 52 53 /** 54 \brief Constructor 55 */ 56 inline Element(double x, bool class_label, double w=1.0) 57 : value(x), label(class_label), weight(w) {}; 58 59 /** 60 \brief data value 61 */ 62 double value; 63 64 /** 65 bool telling which group the data point belong to 66 */ 67 bool label; 68 69 /** 70 weight for the data point 71 */ 72 double weight; 73 74 /** 75 \return true if value is less than rhs.value 76 */ 77 bool operator<(const Element rhs) const; 78 }; 79 38 80 /** 39 81 \brief Constructor … … 45 87 */ 46 88 void add(double value, bool class_label, double weight=1.0); 89 90 /** 91 \brief add a range 92 93 value_type of \a ForwardIterator must be convertible to 94 KolmogorovSmirnov::Element 95 96 Insertion takes typically N*log(N). However, if range is 97 sorted, insertion takes linear time. A common use case of this 98 function is when calculating several KS statistics on a data 99 set (values and weights) with different class labels. 100 101 \since New in yat 0.5 102 */ 103 template <typename ForwardIterator> 104 void add(ForwardIterator first, ForwardIterator last); 47 105 48 106 /** … … 69 127 and calculate how often a score equal or larger than score() is 70 128 obtained. 129 130 \see shuffle 71 131 */ 72 132 double p_value(size_t perm) const; … … 86 146 87 147 /** 148 \brief shuffle class labels 149 150 This is equivalent to reset and re-add values with shuffled 151 class labels. 152 153 \since New in yat 0.5 154 */ 155 void shuffle(void); 156 157 /** 88 158 Same as score() but keeping the sign, in other words, 89 159 abs(signed_score())==score() … … 95 165 private: 96 166 void scores(std::vector<double>&) const; 97 167 // add weights to sum_w1 and sum_w2 respectively depending on 168 // label in element. 169 template <typename ForwardIterator> 170 void add_sum_w(ForwardIterator first, ForwardIterator last); 171 98 172 mutable bool cached_; 99 173 mutable double score_; 100 typedef std::pair<std::pair<double,double>, bool> trip; 101 typedef std::multiset<trip> data_w; 174 typedef std::multiset<Element> data_w; 102 175 data_w data_; 103 176 double sum_w1_; … … 106 179 friend std::ostream& operator<<(std::ostream&, const KolmogorovSmirnov&); 107 180 108 // Copy not allowed109 KolmogorovSmirnov(const KolmogorovSmirnov&);110 KolmogorovSmirnov& operator=(const KolmogorovSmirnov&);181 // using compiler generated copy and assignment 182 //KolmogorovSmirnov(const KolmogorovSmirnov&); 183 //KolmogorovSmirnov& operator=(const KolmogorovSmirnov&); 111 184 }; 112 185 … … 117 190 118 191 192 // template implementations 193 194 template <typename ForwardIterator> 195 void KolmogorovSmirnov::add(ForwardIterator first, ForwardIterator last) 196 { 197 ForwardIterator iter(first); 198 typename data_w::const_iterator hint(data_.begin()); 199 for ( ; iter!=last; ++iter) 200 if (iter->weight) // ignore data points with zero weight 201 hint = data_.insert(hint, *iter); 202 add_sum_w(first, last); 203 cached_=false; 204 } 205 206 207 template <typename ForwardIterator> 208 void KolmogorovSmirnov::add_sum_w(ForwardIterator first, 209 ForwardIterator last) 210 { 211 while (first!=last) { 212 if (first->label) 213 sum_w1_ += first->weight; 214 else 215 sum_w2_ += first->weight; 216 ++first; 217 } 218 } 219 119 220 }}} // of namespace theplu yat statistics 120 221
Note: See TracChangeset
for help on using the changeset viewer.