Changeset 1160 for trunk/yat/classifier


Ignore:
Timestamp:
Feb 26, 2008, 4:29:50 PM (13 years ago)
Author:
Markus Ringnér
Message:

Fixes #333

Location:
trunk/yat/classifier
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/yat/classifier/KNN.h

    r1158 r1160  
    107107    /// class.
    108108    ///
    109     ///
    110     void predict(const DataLookup2D&, utility::Matrix&) const;
     109    void predict(const MatrixLookup&, utility::Matrix&) const;
     110
     111    ///
     112    /// For each sample, calculate the number of neighbors for each
     113    /// class.
     114    ///
     115    void predict(const MatrixLookupWeighted&, utility::Matrix&) const;
    111116
    112117
    113118  private:
    114119
    115     // data_ has to be of type DataLookup2D to accomodate both
    116     // MatrixLookup and MatrixLookupWeighted
    117     const DataLookup2D* data_;
     120    const MatrixLookup* data_ml_;
     121    const MatrixLookupWeighted* data_mlw_;
    118122    const Target* target_;
    119123
     
    124128
    125129    NeighborWeighting weighting_;
    126 
    127     ///
    128     /// Calculates the distances between a data set and the training
    129     /// data. The rows are training and the columns test samples,
    130     /// respectively. The returned distance matrix is dynamically
    131     /// generated and needs to be deleted by the caller.
    132     ///
    133     utility::Matrix* calculate_distances(const DataLookup2D&) const;
    134130
    135131    void calculate_unweighted(const MatrixLookup&,
     
    139135                            const MatrixLookupWeighted&,
    140136                            utility::Matrix*) const;
     137
     138    void predict_common(const utility::Matrix& distances,
     139                        utility::Matrix& prediction) const;
     140
    141141  };
    142142 
     
    146146  template <typename Distance, typename NeighborWeighting>
    147147  KNN<Distance, NeighborWeighting>::KNN()
    148     : SupervisedClassifier(),data_(0),target_(0),k_(3)
     148    : SupervisedClassifier(),data_ml_(0),data_mlw_(0),target_(0),k_(3)
    149149  {
    150150  }
     
    152152  template <typename Distance, typename NeighborWeighting>
    153153  KNN<Distance, NeighborWeighting>::KNN(const Distance& dist)
    154     : SupervisedClassifier(),data_(0),target_(0),k_(3), distance_(dist)
     154    : SupervisedClassifier(),data_ml_(0),data_mlw_(0),target_(0),k_(3), distance_(dist)
    155155  {
    156156  }
     
    162162  }
    163163 
    164   template <typename Distance, typename NeighborWeighting>
    165   utility::Matrix* KNN<Distance, NeighborWeighting>::calculate_distances
    166   (const DataLookup2D& test) const
    167   {
    168     // matrix with training samples as rows and test samples as columns
    169     utility::Matrix* distances =
    170       new utility::Matrix(data_->columns(),test.columns());
    171    
    172    
    173     // unweighted test data
    174     if(const MatrixLookup* test_unweighted =
    175        dynamic_cast<const MatrixLookup*>(&test)) {     
    176       // unweighted training data
    177       if(const MatrixLookup* training_unweighted =
    178          dynamic_cast<const MatrixLookup*>(data_))
    179         calculate_unweighted(*training_unweighted,*test_unweighted,distances);
    180       // weighted training data
    181       else if(const MatrixLookupWeighted* training_weighted =
    182               dynamic_cast<const MatrixLookupWeighted*>(data_))
    183         calculate_weighted(*training_weighted,MatrixLookupWeighted(*test_unweighted),
    184                            distances);             
    185       // Training data can not be of incorrect type
    186     }
    187     // weighted test data
    188     else if (const MatrixLookupWeighted* test_weighted =
    189              dynamic_cast<const MatrixLookupWeighted*>(&test)) {     
    190       // unweighted training data
    191       if(const MatrixLookup* training_unweighted =
    192          dynamic_cast<const MatrixLookup*>(data_)) {
    193         calculate_weighted(MatrixLookupWeighted(*training_unweighted),
    194                            *test_weighted,distances);
    195       }
    196       // weighted training data
    197       else if(const MatrixLookupWeighted* training_weighted =
    198               dynamic_cast<const MatrixLookupWeighted*>(data_))
    199         calculate_weighted(*training_weighted,*test_weighted,distances);             
    200       // Training data can not be of incorrect type
    201     }
    202     else {
    203       std::string str;
    204       str = "Error in KNN::calculate_distances: test data has to be either MatrixLookup or MatrixLookupWeighted";
    205       throw std::runtime_error(str);
    206     }
    207     return distances;
    208   }
    209164
    210165  template <typename Distance, typename NeighborWeighting>
     
    214169  {
    215170    for(size_t i=0; i<training.columns(); i++) {
    216       classifier::DataLookup1D training1(training,i,false);
    217171      for(size_t j=0; j<test.columns(); j++) {
    218         classifier::DataLookup1D test1(test,j,false);
    219         (*distances)(i,j) = distance_(training1.begin(), training1.end(), test1.begin());
     172        (*distances)(i,j) = distance_(training.begin_column(i), training.end_column(i),
     173                                      test.begin_column(j));
    220174        utility::yat_assert<std::runtime_error>(!std::isnan((*distances)(i,j)));
    221175      }
    222176    }
    223177  }
     178
    224179 
    225180  template <typename Distance, typename NeighborWeighting>
     
    229184   utility::Matrix* distances) const
    230185  {
    231     for(size_t i=0; i<training.columns(); i++) {
    232       classifier::DataLookupWeighted1D training1(training,i,false);
     186    for(size_t i=0; i<training.columns(); i++) {
    233187      for(size_t j=0; j<test.columns(); j++) {
    234         classifier::DataLookupWeighted1D test1(test,j,false);
    235         (*distances)(i,j) = distance_(training1.begin(), training1.end(),
    236                                       test1.begin());
     188        (*distances)(i,j) = distance_(training.begin_column(i), training.end_column(i),
     189                                      test.begin_column(j));
    237190        // If the distance is NaN (no common variables with non-zero weights),
    238191        // the distance is set to infinity to be sorted as a neighbor at the end
     
    277230    if(data.columns()<k_)
    278231      k_=data.columns();
    279     data_=&data;
     232    data_ml_=&data;
     233    data_mlw_=0;
    280234    target_=&target;
    281235    trained_=true;
     
    292246    if(data.columns()<k_)
    293247      k_=data.columns();
    294     data_=&data;
     248    data_ml_=0;
     249    data_mlw_=&data;
    295250    target_=&target;
    296251    trained_=true;
     
    299254
    300255  template <typename Distance, typename NeighborWeighting>
    301   void KNN<Distance, NeighborWeighting>::predict(const DataLookup2D& test,
     256  void KNN<Distance, NeighborWeighting>::predict(const MatrixLookup& test,
    302257                                                 utility::Matrix& prediction) const
    303258  {   
    304     utility::yat_assert<std::runtime_error>(data_->rows()==test.rows(),"KNN::predict different number of rows in training and test data");
    305 
    306     utility::Matrix* distances=calculate_distances(test);
    307    
     259    // matrix with training samples as rows and test samples as columns
     260    utility::Matrix* distances = 0;
     261    // unweighted training data
     262    if(data_ml_ && !data_mlw_) {
     263      utility::yat_assert<std::runtime_error>
     264        (data_ml_->rows()==test.rows(),
     265         "KNN::predict different number of rows in training and test data");     
     266      distances=new utility::Matrix(data_ml_->columns(),test.columns());
     267      calculate_unweighted(*data_ml_,test,distances);
     268    }
     269    else if (data_mlw_ && !data_ml_) {
     270      // weighted training data
     271      utility::yat_assert<std::runtime_error>
     272        (data_mlw_->rows()==test.rows(),
     273         "KNN::predict different number of rows in training and test data");           
     274      distances=new utility::Matrix(data_mlw_->columns(),test.columns());
     275      calculate_weighted(*data_mlw_,MatrixLookupWeighted(test),
     276                         distances);             
     277    }
     278    else {
     279      std::runtime_error("KNN::predict no training data");
     280    }
     281
    308282    prediction.resize(target_->nof_classes(),test.columns(),0.0);
    309     for(size_t sample=0;sample<distances->columns();sample++) {
     283    predict_common(*distances,prediction);
     284    if(distances)
     285      delete distances;
     286  }
     287
     288  template <typename Distance, typename NeighborWeighting>
     289  void KNN<Distance, NeighborWeighting>::predict(const MatrixLookupWeighted& test,
     290                                                 utility::Matrix& prediction) const
     291  {   
     292    // matrix with training samples as rows and test samples as columns
     293    utility::Matrix* distances=0;
     294    // unweighted training data
     295    if(data_ml_ && !data_mlw_) {
     296      utility::yat_assert<std::runtime_error>
     297        (data_ml_->rows()==test.rows(),
     298         "KNN::predict different number of rows in training and test data");   
     299      distances=new utility::Matrix(data_ml_->columns(),test.columns());
     300      calculate_weighted(MatrixLookupWeighted(*data_ml_),test,distances);   
     301    }
     302    // weighted training data
     303    else if (data_mlw_ && !data_ml_) {
     304      utility::yat_assert<std::runtime_error>
     305        (data_mlw_->rows()==test.rows(),
     306         "KNN::predict different number of rows in training and test data");   
     307      distances=new utility::Matrix(data_mlw_->columns(),test.columns());
     308      calculate_weighted(*data_mlw_,test,distances);             
     309    }
     310    else {
     311      std::runtime_error("KNN::predict no training data");
     312    }
     313
     314    prediction.resize(target_->nof_classes(),test.columns(),0.0);
     315    predict_common(*distances,prediction);
     316   
     317    if(distances)
     318      delete distances;
     319  }
     320 
     321  template <typename Distance, typename NeighborWeighting>
     322  void KNN<Distance, NeighborWeighting>::predict_common
     323  (const utility::Matrix& distances, utility::Matrix& prediction) const
     324  {   
     325    for(size_t sample=0;sample<distances.columns();sample++) {
    310326      std::vector<size_t> k_index;
    311       utility::VectorConstView dist=distances->column_const_view(sample);
     327      utility::VectorConstView dist=distances.column_const_view(sample);
    312328      utility::sort_smallest_index(k_index,k_,dist);
    313329      utility::VectorView pred=prediction.column_view(sample);
    314330      weighting_(dist,k_index,*target_,pred);
    315331    }
    316     delete distances;
    317 
     332   
    318333    // classes for which there are no training samples should be set
    319334    // to nan in the predictions
     
    324339  }
    325340
     341
    326342}}} // of namespace classifier, yat, and theplu
    327343
  • trunk/yat/classifier/NBC.cc

    r1157 r1160  
    2424
    2525#include "NBC.h"
    26 #include "DataLookup2D.h"
    2726#include "MatrixLookup.h"
    2827#include "MatrixLookupWeighted.h"
    2928#include "Target.h"
     29#include "yat/statistics/Averager.h"
    3030#include "yat/statistics/AveragerWeighted.h"
    3131#include "yat/utility/Matrix.h"
     
    120120
    121121
    122   void NBC::predict(const DataLookup2D& x,                   
     122  void NBC::predict(const MatrixLookup& ml,                     
    123123                    utility::Matrix& prediction) const
    124124  {   
    125     assert(x.rows()==sigma2_.rows());
    126     assert(x.rows()==centroids_.rows());
    127    
    128    
     125    assert(ml.rows()==sigma2_.rows());
     126    assert(ml.rows()==centroids_.rows());
    129127    // each row in prediction corresponds to a sample label (class)
    130     prediction.resize(centroids_.columns(), x.columns(), 0);
    131     // weighted calculation
    132     if (const MatrixLookupWeighted* mlw =
    133         dynamic_cast<const MatrixLookupWeighted*>(&x)) {
    134       // first calculate -lnP = sum ln_sigma_i + (x_i-m_i)^2/2sigma_i^2
    135       for (size_t label=0; label<centroids_.columns(); ++label) {
    136         double sum_log_sigma = sum_logsigma(label);
    137         for (size_t sample=0; sample<prediction.rows(); ++sample) {
    138           prediction(label,sample) = sum_log_sigma;
    139           for (size_t i=0; i<x.rows(); ++i)
    140             // taking care of NaN and missing training features
    141             if (mlw->weight(i, label) && !std::isnan(sigma2_(i, label))) {
    142               prediction(label, sample) += mlw->weight(i, label)*
    143                 std::pow(mlw->data(i, label)-centroids_(i, label),2)/
    144                 sigma2_(i, label);
    145             }
     128    prediction.resize(centroids_.columns(), ml.columns(), 0);
     129
     130    // first calculate -lnP = sum sigma_i + (x_i-m_i)^2/2sigma_i^2
     131    for (size_t label=0; label<centroids_.columns(); ++label) {
     132      double sum_log_sigma = sum_logsigma(label);
     133      for (size_t sample=0; sample<prediction.rows(); ++sample) {
     134        prediction(label,sample) = sum_log_sigma;
     135        for (size_t i=0; i<ml.rows(); ++i)
     136          // Ignoring missing features
     137          if (!std::isnan(sigma2_(i, label)))
     138            prediction(label, sample) +=
     139              std::pow(ml(i, label)-centroids_(i, label),2)/
     140              sigma2_(i, label);
     141      }
     142    }
     143    standardize_lnP(prediction);
     144  }
     145
    146146     
    147         }
    148       }
    149     }
    150       // no weights
    151     else if (const MatrixLookup* ml = dynamic_cast<const MatrixLookup*>(&x)) {
    152       // first calculate -lnP = sum sigma_i + (x_i-m_i)^2/2sigma_i^2
    153       for (size_t label=0; label<centroids_.columns(); ++label) {
    154         double sum_log_sigma = sum_logsigma(label);
    155         for (size_t sample=0; sample<prediction.rows(); ++sample) {
    156           prediction(label,sample) = sum_log_sigma;
    157           for (size_t i=0; i<ml->rows(); ++i)
    158             // Ignoring missing features
    159             if (!std::isnan(sigma2_(i, label)))
    160               prediction(label, sample) +=
    161                 std::pow((*ml)(i, label)-centroids_(i, label),2)/
    162                 sigma2_(i, label);
    163         }
    164       }
    165     }
    166     else {
    167       std::string str =
    168         "Error in NBC::predict: DataLookup2D of unexpected class.";
    169       throw std::runtime_error(str);
    170     }
    171 
    172 
     147  void NBC::predict(const MatrixLookupWeighted& mlw,                   
     148                    utility::Matrix& prediction) const
     149  {   
     150    assert(mlw.rows()==sigma2_.rows());
     151    assert(mlw.rows()==centroids_.rows());
     152   
     153    // each row in prediction corresponds to a sample label (class)
     154    prediction.resize(centroids_.columns(), mlw.columns(), 0);
     155
     156    // first calculate -lnP = sum sigma_i + (x_i-m_i)^2/2sigma_i^2
     157    for (size_t label=0; label<centroids_.columns(); ++label) {
     158      double sum_log_sigma = sum_logsigma(label);
     159      for (size_t sample=0; sample<prediction.rows(); ++sample) {
     160        prediction(label,sample) = sum_log_sigma;
     161        for (size_t i=0; i<mlw.rows(); ++i)
     162          // taking care of NaN and missing training features
     163          if (mlw.weight(i, label) && !std::isnan(sigma2_(i, label))) {
     164            prediction(label, sample) += mlw.weight(i, label)*
     165              std::pow(mlw.data(i, label)-centroids_(i, label),2)/
     166              sigma2_(i, label);
     167          }
     168       
     169      }
     170    }
     171    standardize_lnP(prediction);
     172  }
     173
     174  void NBC::standardize_lnP(utility::Matrix& prediction) const
     175  {
    173176    // -lnP might be a large number, in order to avoid out of bound
    174177    // problems when calculating P = exp(- -lnP), we centralize matrix
     
    177180    add(a, prediction.begin(), prediction.end());
    178181    prediction -= a.mean();
    179 
     182   
    180183    // exponentiate
    181184    for (size_t i=0; i<prediction.rows(); ++i)
    182185      for (size_t j=0; j<prediction.columns(); ++j)
    183186        prediction(i,j) = std::exp(prediction(i,j));
    184 
     187   
    185188    // normalize each row (label) to sum up to unity (probability)
    186189    for (size_t i=0; i<prediction.rows(); ++i){
  • trunk/yat/classifier/NBC.h

    r1157 r1160  
    3333namespace classifier { 
    3434
    35   class DataLookup1D;
    36   class DataLookup2D;
    3735  class MatrixLookup;
    3836  class MatrixLookupWeighted;
     
    10098       equivalent to using all weight equal to unity.
    10199    */
    102     void predict(const DataLookup2D& data, utility::Matrix& res) const;
     100    void predict(const MatrixLookup& data, utility::Matrix& res) const;
     101
     102    /**
     103       @see above
     104     */
     105    void predict(const MatrixLookupWeighted& data, utility::Matrix& res) const;
    103106
    104107
    105108  private:
     109    void standardize_lnP(utility::Matrix& prediction) const;
     110
    106111    utility::Matrix centroids_;
    107112    utility::Matrix sigma2_;
  • trunk/yat/classifier/NCC.h

    r1158 r1160  
    2727*/
    2828
    29 #include "DataLookup1D.h"
    30 #include "DataLookup2D.h"
    31 #include "DataLookupWeighted1D.h"
    3229#include "MatrixLookup.h"
    3330#include "MatrixLookupWeighted.h"
     
    104101    /// Calculate the distance to each centroid for test samples
    105102    ///
    106     void predict(const DataLookup2D&, utility::Matrix&) const;
    107    
     103    void predict(const MatrixLookup&, utility::Matrix&) const;
     104   
     105    ///
     106    /// Calculate the distance to each centroid for weighted test samples
     107    ///
     108    void predict(const MatrixLookupWeighted&, utility::Matrix&) const;
     109
    108110   
    109111  private:
     
    203205
    204206  template <typename Distance>
    205   void NCC<Distance>::predict(const DataLookup2D& test,                     
     207  void NCC<Distance>::predict(const MatrixLookup& test,                     
    206208                              utility::Matrix& prediction) const
    207209  {   
     
    214216    prediction.resize(centroids_->columns(), test.columns());
    215217
    216     // unweighted test data
    217     if (const MatrixLookup* test_unweighted =
    218         dynamic_cast<const MatrixLookup*>(&test)) {
    219       // If weighted training data has resulted in NaN in centroids: weighted calculations
    220       if(centroids_nan_) {
    221         predict_weighted(MatrixLookupWeighted(*test_unweighted),prediction);
    222       }
    223       // If unweighted training data: unweighted calculations
    224       else {
    225         predict_unweighted(*test_unweighted,prediction);
    226       }
    227     }
    228     // weighted test data: weighted calculations
    229     else if (const MatrixLookupWeighted* test_weighted =
    230              dynamic_cast<const MatrixLookupWeighted*>(&test)) {
    231       predict_weighted(*test_weighted,prediction);
    232     }
     218    // If weighted training data has resulted in NaN in centroids: weighted calculations
     219    if(centroids_nan_) {
     220      predict_weighted(MatrixLookupWeighted(test),prediction);
     221    }
     222    // If unweighted training data: unweighted calculations
    233223    else {
    234       std::string str =
    235         "Error in NCC<Distance>::predict: DataLookup2D of unexpected class.";
    236       throw std::runtime_error(str);
    237     }
    238   }
     224      predict_unweighted(test,prediction);
     225    }
     226  }
     227
     228  template <typename Distance>
     229  void NCC<Distance>::predict(const MatrixLookupWeighted& test,                     
     230                              utility::Matrix& prediction) const
     231  {   
     232    utility::yat_assert<std::runtime_error>
     233      (centroids_,"NCC::predict called for untrained classifier");
     234    utility::yat_assert<std::runtime_error>
     235      (centroids_->rows()==test.rows(),
     236       "NCC::predict test data with incorrect number of rows");
     237   
     238    prediction.resize(centroids_->columns(), test.columns());
     239    predict_weighted(test,prediction);
     240  }
     241
    239242 
    240243  template <typename Distance>
  • trunk/yat/classifier/SupervisedClassifier.h

    r1157 r1160  
    3838namespace classifier { 
    3939
    40   class DataLookup2D;
    4140  class MatrixLookup;
    4241  class MatrixLookupWeighted;
     
    7978    /// Generate output values for a data set
    8079    ///
    81     virtual void predict(const DataLookup2D&, utility::Matrix&) const =0;   
     80    virtual void predict(const MatrixLookup&, utility::Matrix&) const =0;   
     81
     82    ///
     83    /// Generate output values for a weighted data set
     84    ///
     85    virtual void predict(const MatrixLookupWeighted&, utility::Matrix&) const =0;   
    8286
    8387
Note: See TracChangeset for help on using the changeset viewer.