Changeset 526


Ignore:
Timestamp:
Mar 1, 2006, 9:49:48 AM (16 years ago)
Author:
Markus Ringnér
Message:

Fixed bug in tScore and in MatrixLookup?. Added support for scoring inputs in SupervisedClassifier? and for using this in training and prediction in NCC.

Location:
trunk/lib
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/classifier/MatrixLookup.cc

    r482 r526  
    8484                                const std::vector<size_t>& val) const
    8585  {
    86     return new MatrixLookup(*this,train);
     86    return new MatrixLookup(*this,val);
    8787  }
    8888
  • trunk/lib/classifier/NCC.cc

    r525 r526  
    55#include <c++_tools/classifier/DataLookup1D.h>
    66#include <c++_tools/classifier/DataLookup2D.h>
     7#include <c++_tools/classifier/MatrixLookup.h>
    78#include <c++_tools/classifier/InputRanker.h>
    89#include <c++_tools/classifier/Target.h>
     
    1920namespace classifier {
    2021
    21   NCC::NCC(const DataLookup2D& data, const Target& target,
     22  NCC::NCC(const MatrixLookup& data, const Target& target,
    2223           const statistics::Distance& distance)
    2324    : SupervisedClassifier(target), distance_(distance), matrix_(data)
     
    2526  }
    2627
    27   NCC::NCC(const DataLookup2D& data, const Target& target,
     28  NCC::NCC(const MatrixLookup& data, const Target& target,
    2829           const statistics::Distance& distance,
    2930           statistics::Score& score, size_t nof_inputs)
     
    4445                       const Target& target) const
    4546  {     
    46     NCC* ncc= new NCC(data,target,this->distance_);
     47    const MatrixLookup& tmp = dynamic_cast<const MatrixLookup&>(data);
     48
     49    NCC* ncc= new NCC(tmp,target,this->distance_);
    4750    ncc->score_=this->score_;
    4851    ncc->nof_inputs_=this->nof_inputs_;
     
    5356  bool NCC::train()
    5457  {
     58    // If score is set calculate centroids only for nof_inputs_ number
     59    // of top ranked inputs. Otherwise calculate centroids based on
     60    // all inputs ( = all rows in data matrix).
    5561    if(ranker_)
    5662      delete ranker_;
    57     if(score_)
    58       ranker_=new InputRanker(matrix_, target_, *score_);
    59     // Markus : ranker_ should be taken into account if used!!!
    60 
    61     // Calculate the centroids for each class
    62     centroids_=gslapi::matrix(matrix_.rows(),target_.nof_classes());
    63     gslapi::matrix nof_in_class(matrix_.rows(),target_.nof_classes());
    64     for(size_t i=0; i<matrix_.rows(); i++) {
     63    size_t rows=matrix_.rows();
     64    if(score_) {
     65      // Markus: missing values should not be handled here, but a weight matrix
     66      // should be supported throughout the classifier class structure.
     67      gslapi::matrix weight(matrix_.rows(),matrix_.columns(),0.0);
     68      for(size_t i=0; i<matrix_.rows(); i++)
     69        for(size_t j=0; j<matrix_.columns(); j++)
     70          if(!std::isnan(matrix_(i,j)))
     71            weight(i,j)=1.0;
     72      MatrixLookup weightview(weight);
     73      ranker_=new InputRanker(matrix_, target_, *score_, weightview);
     74      rows=nof_inputs_;
     75    }
     76    centroids_=gslapi::matrix(rows, target_.nof_classes());
     77    gslapi::matrix nof_in_class(rows, target_.nof_classes());
     78    for(size_t i=0; i<rows; i++) {
    6579      for(size_t j=0; j<matrix_.columns(); j++) {
    66         if(!std::isnan(matrix_(i,j))) {
    67           centroids_(i,target_(j)) += matrix_(i,j);
     80        double value=matrix_(i,j);
     81        if(score_)
     82          value=matrix_(ranker_->id(i),j);
     83        if(!std::isnan(value)) {
     84          centroids_(i,target_(j)) += value;
    6885          nof_in_class(i,target_(j))++;
    6986        }
     
    7592  }
    7693
     94
    7795  void NCC::predict(const DataLookup1D& input,
    7896                    gslapi::vector& prediction) const
    7997  {
    80     // Markus : ranker_ should be taken into account if used!!!
    81 
    8298    prediction=gslapi::vector(centroids_.columns());   
    83     gslapi::vector w(input.size(),0);
    84     for(size_t i=0; i<input.size(); i++)  // take care of missing values
    85       if(!std::isnan(input(i)))
     99    size_t size=input.size();
     100    if(ranker_)
     101      size=nof_inputs_;
     102    gslapi::vector w(size,0);
     103    gslapi::vector value(size,0);
     104    for(size_t i=0; i<size; i++)  { // take care of missing values
     105      value(i)=input(i);
     106      if(ranker_)
     107        value(i)=input(ranker_->id(i));
     108      if(!std::isnan(value(i)))
    86109        w(i)=1.0;
     110    }
    87111    for(size_t j=0; j<centroids_.columns(); j++)
    88       prediction(j)=distance_(gslapi::vector(input),
    89                              gslapi::vector(centroids_,j,false),w, w);   
     112      prediction(j)=distance_(value,gslapi::vector(centroids_,j,false),w, w);   
    90113  }
    91114
     
    94117                    gslapi::matrix& prediction) const
    95118  {
    96     // Markus : ranker_ should be taken into account if used!!!
    97    
    98119    prediction=gslapi::matrix(centroids_.columns(), input.columns());   
    99120    for(size_t j=0; j<input.columns();j++) {     
     
    108129  // additional operators
    109130
    110   std::ostream& operator<< (std::ostream& s, const NCC& ncc) {
     131//  std::ostream& operator<< (std::ostream& s, const NCC& ncc) {
    111132//    std::copy(ncc.classes().begin(), ncc.classes().end(),
    112133//              std::ostream_iterator<std::map<double, u_int>::value_type>
    113134//              (s, "\n"));
    114     s << "\n" << ncc.centroids() << "\n";
    115     return s;
    116   }
     135//    s << "\n" << ncc.centroids() << "\n";
     136//    return s;
     137//  }
    117138
    118139}} // of namespace classifier and namespace theplu
  • trunk/lib/classifier/NCC.h

    r525 r526  
    2222  class DataLookup1D;
    2323  class DataLookup2D;
     24  class MatrixLookup;
    2425
    2526  ///
    26   /// Class for Nearest Centroid Classification
     27  /// Class for Nearest Centroid Classification.
    2728  ///
    2829
     
    3536    /// the distance measure as input.
    3637    ///
    37     NCC(const DataLookup2D&, const Target&, const statistics::Distance&);
     38    NCC(const MatrixLookup&, const Target&, const statistics::Distance&);
    3839
    39     NCC(const DataLookup2D&, const Target&, const statistics::Distance&,
     40
     41    ///
     42    /// Constructor taking the training data, the target vector, the
     43    /// distance measure, the score used to rank data inputs, and the
     44    /// number of top ranked data inputs to use in the classification.
     45    ///
     46    NCC(const MatrixLookup&, const Target&, const statistics::Distance&,
    4047        statistics::Score&, const size_t);
    4148
    4249    virtual ~NCC();
    4350
     51    ///
     52    /// @return the centroids for each class as columns in a matrix.
     53    ///
    4454    const gslapi::matrix& centroids(void) const {return centroids_;}
    4555
     
    4757    make_classifier(const DataLookup2D&, const Target&) const;
    4858   
     59    ///
     60    /// Train the classifier using the training data. Centroids are
     61    /// calculated for each class.
     62    ///
     63    /// @return true if training succedeed.
     64    ///
    4965    bool train();
    5066
    5167
    5268    ///
    53     /// Calculate the scores to each centroid for a test sample
     69    /// Calculate the distance to each centroid for a test sample
    5470    ///
    5571    void predict(const DataLookup1D&, gslapi::vector&) const;
    5672   
    5773    ///
    58     /// Calculate the scores to each centroid for test samples
     74    /// Calculate the distance to each centroid for test samples
    5975    ///
    6076    void predict(const DataLookup2D&, gslapi::matrix&) const;
     
    6480    gslapi::matrix centroids_;
    6581    const statistics::Distance& distance_;                 
    66     const DataLookup2D& matrix_;
     82    const MatrixLookup& matrix_;
    6783
    6884  };
     
    7187  /// The output operator for the NCC class.
    7288  ///
    73   std::ostream& operator<< (std::ostream&, const NCC&);
     89  //  std::ostream& operator<< (std::ostream&, const NCC&);
    7490 
    7591 
  • trunk/lib/classifier/SupervisedClassifier.cc

    r525 r526  
    66namespace classifier {
    77
    8   SupervisedClassifier::SupervisedClassifier(const Target& target,
     8  SupervisedClassifier::SupervisedClassifier(const Target& target)
     9    : target_(target), score_(0), ranker_(0), nof_inputs_(0),
     10      trained_(false)
     11  {
     12  }
     13
     14    SupervisedClassifier::SupervisedClassifier(const Target& target,
    915                                             statistics::Score* score,
    1016                                             const size_t nof_inputs)
     
    1420  }
    1521
     22
    1623}}
  • trunk/lib/classifier/SupervisedClassifier.h

    r525 r526  
    3535    /// Constructor. Taking a vector of target values.
    3636    ///
    37     SupervisedClassifier(const Target&, statistics::Score* =0,
    38                          const size_t=0);
     37    SupervisedClassifier(const Target&);
    3938   
     39
     40    ///
     41    /// Constructor. Taking a vector of target values, a score used to
     42    /// rank data inputs, and the number of top ranked inputs to use
     43    /// in classification.
     44    ///
     45    SupervisedClassifier(const Target&, statistics::Score*, const size_t);
     46
    4047    ///
    4148    /// Destructor
  • trunk/lib/statistics/tScore.cc

    r514 r526  
    6161      (positive.n()+negative.n()-2);
    6262    t_=diff/sqrt(s2*(1.0/positive.sum_w()+1.0/negative.sum_w()));
    63     assert(0);
    6463    if (t_<0 && absolute_)
    6564      t_=-t_;
Note: See TracChangeset for help on using the changeset viewer.