Changeset 1160
- Timestamp:
- Feb 26, 2008, 4:29:50 PM (16 years ago)
- Location:
- trunk/yat/classifier
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/yat/classifier/KNN.h
r1158 r1160 107 107 /// class. 108 108 /// 109 /// 110 void predict(const DataLookup2D&, utility::Matrix&) const; 109 void predict(const MatrixLookup&, utility::Matrix&) const; 110 111 /// 112 /// For each sample, calculate the number of neighbors for each 113 /// class. 114 /// 115 void predict(const MatrixLookupWeighted&, utility::Matrix&) const; 111 116 112 117 113 118 private: 114 119 115 // data_ has to be of type DataLookup2D to accomodate both 116 // MatrixLookup and MatrixLookupWeighted 117 const DataLookup2D* data_; 120 const MatrixLookup* data_ml_; 121 const MatrixLookupWeighted* data_mlw_; 118 122 const Target* target_; 119 123 … … 124 128 125 129 NeighborWeighting weighting_; 126 127 ///128 /// Calculates the distances between a data set and the training129 /// data. The rows are training and the columns test samples,130 /// respectively. The returned distance matrix is dynamically131 /// generated and needs to be deleted by the caller.132 ///133 utility::Matrix* calculate_distances(const DataLookup2D&) const;134 130 135 131 void calculate_unweighted(const MatrixLookup&, … … 139 135 const MatrixLookupWeighted&, 140 136 utility::Matrix*) const; 137 138 void predict_common(const utility::Matrix& distances, 139 utility::Matrix& prediction) const; 140 141 141 }; 142 142 … … 146 146 template <typename Distance, typename NeighborWeighting> 147 147 KNN<Distance, NeighborWeighting>::KNN() 148 : SupervisedClassifier(),data_ (0),target_(0),k_(3)148 : SupervisedClassifier(),data_ml_(0),data_mlw_(0),target_(0),k_(3) 149 149 { 150 150 } … … 152 152 template <typename Distance, typename NeighborWeighting> 153 153 KNN<Distance, NeighborWeighting>::KNN(const Distance& dist) 154 : SupervisedClassifier(),data_ (0),target_(0),k_(3), distance_(dist)154 : SupervisedClassifier(),data_ml_(0),data_mlw_(0),target_(0),k_(3), distance_(dist) 155 155 { 156 156 } … … 162 162 } 163 163 164 template <typename Distance, typename NeighborWeighting>165 utility::Matrix* KNN<Distance, NeighborWeighting>::calculate_distances166 (const DataLookup2D& test) const167 {168 // matrix with training samples as rows and test samples as columns169 utility::Matrix* distances =170 new utility::Matrix(data_->columns(),test.columns());171 172 173 // unweighted test data174 if(const MatrixLookup* test_unweighted =175 dynamic_cast<const MatrixLookup*>(&test)) {176 // unweighted training data177 if(const MatrixLookup* training_unweighted =178 dynamic_cast<const MatrixLookup*>(data_))179 calculate_unweighted(*training_unweighted,*test_unweighted,distances);180 // weighted training data181 else if(const MatrixLookupWeighted* training_weighted =182 dynamic_cast<const MatrixLookupWeighted*>(data_))183 calculate_weighted(*training_weighted,MatrixLookupWeighted(*test_unweighted),184 distances);185 // Training data can not be of incorrect type186 }187 // weighted test data188 else if (const MatrixLookupWeighted* test_weighted =189 dynamic_cast<const MatrixLookupWeighted*>(&test)) {190 // unweighted training data191 if(const MatrixLookup* training_unweighted =192 dynamic_cast<const MatrixLookup*>(data_)) {193 calculate_weighted(MatrixLookupWeighted(*training_unweighted),194 *test_weighted,distances);195 }196 // weighted training data197 else if(const MatrixLookupWeighted* training_weighted =198 dynamic_cast<const MatrixLookupWeighted*>(data_))199 calculate_weighted(*training_weighted,*test_weighted,distances);200 // Training data can not be of incorrect type201 }202 else {203 std::string str;204 str = "Error in KNN::calculate_distances: test data has to be either MatrixLookup or MatrixLookupWeighted";205 throw std::runtime_error(str);206 }207 return distances;208 }209 164 210 165 template <typename Distance, typename NeighborWeighting> … … 214 169 { 215 170 for(size_t i=0; i<training.columns(); i++) { 216 classifier::DataLookup1D training1(training,i,false);217 171 for(size_t j=0; j<test.columns(); j++) { 218 classifier::DataLookup1D test1(test,j,false);219 (*distances)(i,j) = distance_(training1.begin(), training1.end(), test1.begin());172 (*distances)(i,j) = distance_(training.begin_column(i), training.end_column(i), 173 test.begin_column(j)); 220 174 utility::yat_assert<std::runtime_error>(!std::isnan((*distances)(i,j))); 221 175 } 222 176 } 223 177 } 178 224 179 225 180 template <typename Distance, typename NeighborWeighting> … … 229 184 utility::Matrix* distances) const 230 185 { 231 for(size_t i=0; i<training.columns(); i++) { 232 classifier::DataLookupWeighted1D training1(training,i,false); 186 for(size_t i=0; i<training.columns(); i++) { 233 187 for(size_t j=0; j<test.columns(); j++) { 234 classifier::DataLookupWeighted1D test1(test,j,false); 235 (*distances)(i,j) = distance_(training1.begin(), training1.end(), 236 test1.begin()); 188 (*distances)(i,j) = distance_(training.begin_column(i), training.end_column(i), 189 test.begin_column(j)); 237 190 // If the distance is NaN (no common variables with non-zero weights), 238 191 // the distance is set to infinity to be sorted as a neighbor at the end … … 277 230 if(data.columns()<k_) 278 231 k_=data.columns(); 279 data_=&data; 232 data_ml_=&data; 233 data_mlw_=0; 280 234 target_=⌖ 281 235 trained_=true; … … 292 246 if(data.columns()<k_) 293 247 k_=data.columns(); 294 data_=&data; 248 data_ml_=0; 249 data_mlw_=&data; 295 250 target_=⌖ 296 251 trained_=true; … … 299 254 300 255 template <typename Distance, typename NeighborWeighting> 301 void KNN<Distance, NeighborWeighting>::predict(const DataLookup2D& test,256 void KNN<Distance, NeighborWeighting>::predict(const MatrixLookup& test, 302 257 utility::Matrix& prediction) const 303 258 { 304 utility::yat_assert<std::runtime_error>(data_->rows()==test.rows(),"KNN::predict different number of rows in training and test data"); 305 306 utility::Matrix* distances=calculate_distances(test); 307 259 // matrix with training samples as rows and test samples as columns 260 utility::Matrix* distances = 0; 261 // unweighted training data 262 if(data_ml_ && !data_mlw_) { 263 utility::yat_assert<std::runtime_error> 264 (data_ml_->rows()==test.rows(), 265 "KNN::predict different number of rows in training and test data"); 266 distances=new utility::Matrix(data_ml_->columns(),test.columns()); 267 calculate_unweighted(*data_ml_,test,distances); 268 } 269 else if (data_mlw_ && !data_ml_) { 270 // weighted training data 271 utility::yat_assert<std::runtime_error> 272 (data_mlw_->rows()==test.rows(), 273 "KNN::predict different number of rows in training and test data"); 274 distances=new utility::Matrix(data_mlw_->columns(),test.columns()); 275 calculate_weighted(*data_mlw_,MatrixLookupWeighted(test), 276 distances); 277 } 278 else { 279 std::runtime_error("KNN::predict no training data"); 280 } 281 308 282 prediction.resize(target_->nof_classes(),test.columns(),0.0); 309 for(size_t sample=0;sample<distances->columns();sample++) { 283 predict_common(*distances,prediction); 284 if(distances) 285 delete distances; 286 } 287 288 template <typename Distance, typename NeighborWeighting> 289 void KNN<Distance, NeighborWeighting>::predict(const MatrixLookupWeighted& test, 290 utility::Matrix& prediction) const 291 { 292 // matrix with training samples as rows and test samples as columns 293 utility::Matrix* distances=0; 294 // unweighted training data 295 if(data_ml_ && !data_mlw_) { 296 utility::yat_assert<std::runtime_error> 297 (data_ml_->rows()==test.rows(), 298 "KNN::predict different number of rows in training and test data"); 299 distances=new utility::Matrix(data_ml_->columns(),test.columns()); 300 calculate_weighted(MatrixLookupWeighted(*data_ml_),test,distances); 301 } 302 // weighted training data 303 else if (data_mlw_ && !data_ml_) { 304 utility::yat_assert<std::runtime_error> 305 (data_mlw_->rows()==test.rows(), 306 "KNN::predict different number of rows in training and test data"); 307 distances=new utility::Matrix(data_mlw_->columns(),test.columns()); 308 calculate_weighted(*data_mlw_,test,distances); 309 } 310 else { 311 std::runtime_error("KNN::predict no training data"); 312 } 313 314 prediction.resize(target_->nof_classes(),test.columns(),0.0); 315 predict_common(*distances,prediction); 316 317 if(distances) 318 delete distances; 319 } 320 321 template <typename Distance, typename NeighborWeighting> 322 void KNN<Distance, NeighborWeighting>::predict_common 323 (const utility::Matrix& distances, utility::Matrix& prediction) const 324 { 325 for(size_t sample=0;sample<distances.columns();sample++) { 310 326 std::vector<size_t> k_index; 311 utility::VectorConstView dist=distances ->column_const_view(sample);327 utility::VectorConstView dist=distances.column_const_view(sample); 312 328 utility::sort_smallest_index(k_index,k_,dist); 313 329 utility::VectorView pred=prediction.column_view(sample); 314 330 weighting_(dist,k_index,*target_,pred); 315 331 } 316 delete distances; 317 332 318 333 // classes for which there are no training samples should be set 319 334 // to nan in the predictions … … 324 339 } 325 340 341 326 342 }}} // of namespace classifier, yat, and theplu 327 343 -
trunk/yat/classifier/NBC.cc
r1157 r1160 24 24 25 25 #include "NBC.h" 26 #include "DataLookup2D.h"27 26 #include "MatrixLookup.h" 28 27 #include "MatrixLookupWeighted.h" 29 28 #include "Target.h" 29 #include "yat/statistics/Averager.h" 30 30 #include "yat/statistics/AveragerWeighted.h" 31 31 #include "yat/utility/Matrix.h" … … 120 120 121 121 122 void NBC::predict(const DataLookup2D& x,122 void NBC::predict(const MatrixLookup& ml, 123 123 utility::Matrix& prediction) const 124 124 { 125 assert(x.rows()==sigma2_.rows()); 126 assert(x.rows()==centroids_.rows()); 127 128 125 assert(ml.rows()==sigma2_.rows()); 126 assert(ml.rows()==centroids_.rows()); 129 127 // each row in prediction corresponds to a sample label (class) 130 prediction.resize(centroids_.columns(), x.columns(), 0); 131 // weighted calculation 132 if (const MatrixLookupWeighted* mlw = 133 dynamic_cast<const MatrixLookupWeighted*>(&x)) { 134 // first calculate -lnP = sum ln_sigma_i + (x_i-m_i)^2/2sigma_i^2 135 for (size_t label=0; label<centroids_.columns(); ++label) { 136 double sum_log_sigma = sum_logsigma(label); 137 for (size_t sample=0; sample<prediction.rows(); ++sample) { 138 prediction(label,sample) = sum_log_sigma; 139 for (size_t i=0; i<x.rows(); ++i) 140 // taking care of NaN and missing training features 141 if (mlw->weight(i, label) && !std::isnan(sigma2_(i, label))) { 142 prediction(label, sample) += mlw->weight(i, label)* 143 std::pow(mlw->data(i, label)-centroids_(i, label),2)/ 144 sigma2_(i, label); 145 } 128 prediction.resize(centroids_.columns(), ml.columns(), 0); 129 130 // first calculate -lnP = sum sigma_i + (x_i-m_i)^2/2sigma_i^2 131 for (size_t label=0; label<centroids_.columns(); ++label) { 132 double sum_log_sigma = sum_logsigma(label); 133 for (size_t sample=0; sample<prediction.rows(); ++sample) { 134 prediction(label,sample) = sum_log_sigma; 135 for (size_t i=0; i<ml.rows(); ++i) 136 // Ignoring missing features 137 if (!std::isnan(sigma2_(i, label))) 138 prediction(label, sample) += 139 std::pow(ml(i, label)-centroids_(i, label),2)/ 140 sigma2_(i, label); 141 } 142 } 143 standardize_lnP(prediction); 144 } 145 146 146 147 } 148 } 149 } 150 // no weights 151 else if (const MatrixLookup* ml = dynamic_cast<const MatrixLookup*>(&x)) { 152 // first calculate -lnP = sum sigma_i + (x_i-m_i)^2/2sigma_i^2 153 for (size_t label=0; label<centroids_.columns(); ++label) { 154 double sum_log_sigma = sum_logsigma(label); 155 for (size_t sample=0; sample<prediction.rows(); ++sample) { 156 prediction(label,sample) = sum_log_sigma; 157 for (size_t i=0; i<ml->rows(); ++i) 158 // Ignoring missing features 159 if (!std::isnan(sigma2_(i, label))) 160 prediction(label, sample) += 161 std::pow((*ml)(i, label)-centroids_(i, label),2)/ 162 sigma2_(i, label); 163 } 164 } 165 } 166 else { 167 std::string str = 168 "Error in NBC::predict: DataLookup2D of unexpected class."; 169 throw std::runtime_error(str); 170 } 171 172 147 void NBC::predict(const MatrixLookupWeighted& mlw, 148 utility::Matrix& prediction) const 149 { 150 assert(mlw.rows()==sigma2_.rows()); 151 assert(mlw.rows()==centroids_.rows()); 152 153 // each row in prediction corresponds to a sample label (class) 154 prediction.resize(centroids_.columns(), mlw.columns(), 0); 155 156 // first calculate -lnP = sum sigma_i + (x_i-m_i)^2/2sigma_i^2 157 for (size_t label=0; label<centroids_.columns(); ++label) { 158 double sum_log_sigma = sum_logsigma(label); 159 for (size_t sample=0; sample<prediction.rows(); ++sample) { 160 prediction(label,sample) = sum_log_sigma; 161 for (size_t i=0; i<mlw.rows(); ++i) 162 // taking care of NaN and missing training features 163 if (mlw.weight(i, label) && !std::isnan(sigma2_(i, label))) { 164 prediction(label, sample) += mlw.weight(i, label)* 165 std::pow(mlw.data(i, label)-centroids_(i, label),2)/ 166 sigma2_(i, label); 167 } 168 169 } 170 } 171 standardize_lnP(prediction); 172 } 173 174 void NBC::standardize_lnP(utility::Matrix& prediction) const 175 { 173 176 // -lnP might be a large number, in order to avoid out of bound 174 177 // problems when calculating P = exp(- -lnP), we centralize matrix … … 177 180 add(a, prediction.begin(), prediction.end()); 178 181 prediction -= a.mean(); 179 182 180 183 // exponentiate 181 184 for (size_t i=0; i<prediction.rows(); ++i) 182 185 for (size_t j=0; j<prediction.columns(); ++j) 183 186 prediction(i,j) = std::exp(prediction(i,j)); 184 187 185 188 // normalize each row (label) to sum up to unity (probability) 186 189 for (size_t i=0; i<prediction.rows(); ++i){ -
trunk/yat/classifier/NBC.h
r1157 r1160 33 33 namespace classifier { 34 34 35 class DataLookup1D;36 class DataLookup2D;37 35 class MatrixLookup; 38 36 class MatrixLookupWeighted; … … 100 98 equivalent to using all weight equal to unity. 101 99 */ 102 void predict(const DataLookup2D& data, utility::Matrix& res) const; 100 void predict(const MatrixLookup& data, utility::Matrix& res) const; 101 102 /** 103 @see above 104 */ 105 void predict(const MatrixLookupWeighted& data, utility::Matrix& res) const; 103 106 104 107 105 108 private: 109 void standardize_lnP(utility::Matrix& prediction) const; 110 106 111 utility::Matrix centroids_; 107 112 utility::Matrix sigma2_; -
trunk/yat/classifier/NCC.h
r1158 r1160 27 27 */ 28 28 29 #include "DataLookup1D.h"30 #include "DataLookup2D.h"31 #include "DataLookupWeighted1D.h"32 29 #include "MatrixLookup.h" 33 30 #include "MatrixLookupWeighted.h" … … 104 101 /// Calculate the distance to each centroid for test samples 105 102 /// 106 void predict(const DataLookup2D&, utility::Matrix&) const; 107 103 void predict(const MatrixLookup&, utility::Matrix&) const; 104 105 /// 106 /// Calculate the distance to each centroid for weighted test samples 107 /// 108 void predict(const MatrixLookupWeighted&, utility::Matrix&) const; 109 108 110 109 111 private: … … 203 205 204 206 template <typename Distance> 205 void NCC<Distance>::predict(const DataLookup2D& test,207 void NCC<Distance>::predict(const MatrixLookup& test, 206 208 utility::Matrix& prediction) const 207 209 { … … 214 216 prediction.resize(centroids_->columns(), test.columns()); 215 217 216 // unweighted test data 217 if (const MatrixLookup* test_unweighted = 218 dynamic_cast<const MatrixLookup*>(&test)) { 219 // If weighted training data has resulted in NaN in centroids: weighted calculations 220 if(centroids_nan_) { 221 predict_weighted(MatrixLookupWeighted(*test_unweighted),prediction); 222 } 223 // If unweighted training data: unweighted calculations 224 else { 225 predict_unweighted(*test_unweighted,prediction); 226 } 227 } 228 // weighted test data: weighted calculations 229 else if (const MatrixLookupWeighted* test_weighted = 230 dynamic_cast<const MatrixLookupWeighted*>(&test)) { 231 predict_weighted(*test_weighted,prediction); 232 } 218 // If weighted training data has resulted in NaN in centroids: weighted calculations 219 if(centroids_nan_) { 220 predict_weighted(MatrixLookupWeighted(test),prediction); 221 } 222 // If unweighted training data: unweighted calculations 233 223 else { 234 std::string str = 235 "Error in NCC<Distance>::predict: DataLookup2D of unexpected class."; 236 throw std::runtime_error(str); 237 } 238 } 224 predict_unweighted(test,prediction); 225 } 226 } 227 228 template <typename Distance> 229 void NCC<Distance>::predict(const MatrixLookupWeighted& test, 230 utility::Matrix& prediction) const 231 { 232 utility::yat_assert<std::runtime_error> 233 (centroids_,"NCC::predict called for untrained classifier"); 234 utility::yat_assert<std::runtime_error> 235 (centroids_->rows()==test.rows(), 236 "NCC::predict test data with incorrect number of rows"); 237 238 prediction.resize(centroids_->columns(), test.columns()); 239 predict_weighted(test,prediction); 240 } 241 239 242 240 243 template <typename Distance> -
trunk/yat/classifier/SupervisedClassifier.h
r1157 r1160 38 38 namespace classifier { 39 39 40 class DataLookup2D;41 40 class MatrixLookup; 42 41 class MatrixLookupWeighted; … … 79 78 /// Generate output values for a data set 80 79 /// 81 virtual void predict(const DataLookup2D&, utility::Matrix&) const =0; 80 virtual void predict(const MatrixLookup&, utility::Matrix&) const =0; 81 82 /// 83 /// Generate output values for a weighted data set 84 /// 85 virtual void predict(const MatrixLookupWeighted&, utility::Matrix&) const =0; 82 86 83 87
Note: See TracChangeset
for help on using the changeset viewer.