Changeset 607


Ignore:
Timestamp:
Aug 29, 2006, 4:42:28 PM (15 years ago)
Author:
Peter
Message:

ref #96 included FeatureSelection? in CrossSplitter?. Also happened to add properties to files

Location:
trunk/c++_tools/classifier
Files:
29 edited

Legend:

Unmodified
Added
Removed
  • trunk/c++_tools/classifier/ConsensusInputRanker.cc

    • Property svn:keywords changed from Author Date Id Revision to Id
  • trunk/c++_tools/classifier/ConsensusInputRanker.h

    • Property svn:keywords changed from Author Date Id Revision to Id
  • trunk/c++_tools/classifier/CrossSplitter.cc

    • Property svn:keywords changed from Author Date Id Revision to Id
    r558 r607  
    44#include <c++_tools/classifier/CrossSplitter.h>
    55#include <c++_tools/classifier/DataLookup2D.h>
     6#include <c++_tools/classifier/FeatureSelector.h>
    67#include <c++_tools/classifier/Target.h>
    78#include <c++_tools/random/random.h>
     
    4142      validation_target_.push_back(Target(target,validation_index_[i]));
    4243    }
     44
     45    // No feature selection, hence features same for all partitions
     46    // and can be stored in features_[0]
     47    features_.resize(1);
     48    features_[0].reserve(data.rows());
     49    for (size_t i=0; i<data.rows(); ++i)
     50      features_[0].push_back(i);
     51
    4352    assert(training_data_.size()==N);
    4453    assert(training_weight_.size()==N);
     
    7382      validation_target_.push_back(Target(target,validation_index_[i]));
    7483    }
     84    assert(training_data_.size()==N);
     85    assert(training_weight_.size()==N);
     86    assert(training_target_.size()==N);
     87    assert(validation_data_.size()==N);
     88    assert(validation_weight_.size()==N);
     89    assert(validation_target_.size()==N);
     90  }
     91
     92  CrossSplitter::CrossSplitter(const Target& target, const DataLookup2D& data,
     93                               const size_t N, const size_t k,
     94                               FeatureSelector& fs)
     95    : k_(k), state_(0), target_(target), weighted_(false), f_selector_(&fs)
     96  {
     97    assert(target.size()>1);
     98    assert(target.size()==data.columns());
     99
     100    build(target, N, k);
     101    features_.reserve(N);
     102    training_data_.reserve(N);
     103    training_weight_.reserve(N);
     104    validation_data_.reserve(N);
     105    validation_weight_.reserve(N);
     106     
     107    for (size_t i=0; i<N; i++){
     108     
     109      // training data with no feature selection
     110      const DataLookup2D* train_data_all_feat =
     111        data.training_data(training_index_[i]);
     112      // use these data to create feature selection
     113      f_selector_->update(*train_data_all_feat, training_target_[i]);
     114      // get features
     115      features_.push_back(f_selector_->features());
     116      delete train_data_all_feat;
     117
     118      // Dynamically allocated. Must be deleted in destructor.
     119      training_data_.push_back(data.training_data(features_[i],
     120                                                  training_index_[i]));
     121      training_weight_.push_back
     122        (new MatrixLookup(training_data_.back()->rows(),
     123                          training_data_.back()->columns(),1));
     124      validation_data_.push_back(data.validation_data(features_[i],
     125                                                      training_index_[i],
     126                                                      validation_index_[i]));
     127      validation_weight_.push_back
     128        (new MatrixLookup(validation_data_.back()->rows(),
     129                          validation_data_.back()->columns(),1));
     130
     131
     132      training_target_.push_back(Target(target,training_index_[i]));
     133      validation_target_.push_back(Target(target,validation_index_[i]));
     134    }
     135
     136    // No feature selection, hence features same for all partitions
     137    // and can be stored in features_[0]
     138    features_.resize(1);
     139    features_[0].reserve(data.rows());
     140    for (size_t i=0; i<data.rows(); ++i)
     141      features_[0].push_back(i);
     142
    75143    assert(training_data_.size()==N);
    76144    assert(training_weight_.size()==N);
  • trunk/c++_tools/classifier/CrossSplitter.h

    • Property svn:keywords changed from Author Date Id Revision to Id
    r560 r607  
    1414namespace classifier { 
    1515  class DataLookup2D;
    16 
     16  class FeatureSelector;
    1717
    1818  ///
     
    5050    /// @parameter k for k-fold crossvalidation
    5151    ///
     52    /// @todo This most likely be removed.
    5253    CrossSplitter(const Target& target, const DataLookup2D& data,
    5354                  const MatrixLookup& weight,
    5455                  const size_t N, const size_t k);
    5556
     57    ///
     58    /// @brief Constructor
     59    /// 
     60    /// @parameter Target targets
     61    /// @parameter data data to split up in validation and training.
     62    /// @parameter N total number of partitions
     63    /// @parameter k for k-fold crossvalidation
     64    ///
     65    CrossSplitter(const Target& target, const DataLookup2D& data,
     66                  const size_t N, const size_t k, FeatureSelector&);
     67
    5668    ///
    5769    /// Destructor
     
    98110    /// @note if state is invalid the result is undefined
    99111    ///
     112    inline const std::vector<size_t>& training_features(void) const
     113    { assert(more()); return f_selector_ ? features_[state_] : features_[0]; }
     114
     115
     116    /// @return training index
     117    ///
     118    /// @note if state is invalid the result is undefined
     119    ///
    100120    inline const std::vector<size_t>& training_index(void) const
    101121    { assert(more()); return training_index_[state_]; }
    102 
    103122
    104123    ///
     
    172191    std::vector<std::vector<size_t> > validation_index_;
    173192    std::vector<Target> validation_target_;
     193
     194    FeatureSelector* f_selector_;
     195    std::vector<std::vector<size_t> > features_;
     196
    174197  };
    175198
  • trunk/c++_tools/classifier/DataLookup2D.cc

    • Property svn:eol-style set to native
    • Property svn:keywords set to Id
  • trunk/c++_tools/classifier/DataLookup2D.h

    • Property svn:eol-style set to native
    • Property svn:keywords set to Id
    r604 r607  
    8282
    8383    ///
    84     /// @return sub-Lookup of the DataLookup2D
    8584    ///
    86     /// @Note Returns a dynamically allocated DataLookup2D, which has
    87     /// to be deleted by the caller to avoid memory leaks.
    88     ///
    89     virtual const DataLookup2D*
    90     training_data(const std::vector<size_t>&) const=0;
    91 
    92     ///
    93     /// @return number of rows
    94     ///
    95     inline size_t rows(void) const { return row_index_.size(); }
    96 
    97     ///
    98     /// @todo doc
    9985    ///
    10086    virtual const DataLookup2D* selected(const std::vector< size_t > &) const=0;
     
    10793    ///
    10894    virtual const DataLookup2D*
     95    training_data(const std::vector<size_t>&) const=0;
     96
     97    ///
     98    /// @return sub-Lookup of the DataLookup2D
     99    ///
     100    /// @Note Returns a dynamically allocated DataLookup2D, which has
     101    /// to be deleted by the caller to avoid memory leaks.
     102    ///
     103    virtual const DataLookup2D*
     104    training_data(const std::vector<size_t>& features,
     105                  const std::vector<size_t>& samples) const=0;
     106
     107    ///
     108    /// @return number of rows
     109    ///
     110    inline size_t rows(void) const { return row_index_.size(); }
     111
     112    ///
     113    /// @return sub-Lookup of the DataLookup2D
     114    ///
     115    /// @Note Returns a dynamically allocated DataLookup2D, which has
     116    /// to be deleted by the caller to avoid memory leaks.
     117    ///
     118    virtual const DataLookup2D*
    109119    validation_data(const std::vector<size_t>& train,
     120                    const std::vector<size_t>& val) const=0;
     121
     122    ///
     123    /// @return sub-Lookup of the DataLookup2D
     124    ///
     125    /// @Note Returns a dynamically allocated DataLookup2D, which has
     126    /// to be deleted by the caller to avoid memory leaks.
     127    ///
     128    virtual const DataLookup2D*
     129    validation_data(const std::vector<size_t>& features,
     130                    const std::vector<size_t>& train,
    110131                    const std::vector<size_t>& val) const=0;
    111132
  • trunk/c++_tools/classifier/FeatureSelector.cc

    • Property svn:eol-style set to native
    • Property svn:keywords set to Id
  • trunk/c++_tools/classifier/FeatureSelector.h

    • Property svn:eol-style set to native
    • Property svn:keywords set to Id
    r604 r607  
    3636
    3737    ///
     38    ///
     39    ///
     40    inline const std::vector<size_t> features(void) const { return features_; }
     41
     42    ///
    3843    /// Uses @a data to select features.
    3944    ///
  • trunk/c++_tools/classifier/FeatureSelectorIR.cc

    • Property svn:eol-style set to native
    • Property svn:keywords set to Id
  • trunk/c++_tools/classifier/FeatureSelectorIR.h

    • Property svn:eol-style set to native
    • Property svn:keywords set to Id
  • trunk/c++_tools/classifier/GaussianKernelFunction.cc

    • Property svn:keywords changed from Author Date Id Revision to Id
  • trunk/c++_tools/classifier/GaussianKernelFunction.h

    • Property svn:keywords changed from Author Date Id Revision to Id
  • trunk/c++_tools/classifier/InputRanker.cc

    • Property svn:keywords changed from Author Date Id Revision to Id
  • trunk/c++_tools/classifier/InputRanker.h

    • Property svn:keywords changed from Author Date Id Revision to Id
  • trunk/c++_tools/classifier/KernelFunction.h

    • Property svn:keywords changed from Author Date Id Revision to Id
  • trunk/c++_tools/classifier/KernelLookup.cc

    r568 r607  
    8888
    8989  const KernelLookup*
     90  KernelLookup::training_data(const std::vector<size_t>& features,
     91                              const std::vector<size_t>& train) const
     92  {
     93    const Kernel* kernel = kernel_->selected(features);
     94    return new KernelLookup(*kernel, train, train, true);
     95  }
     96
     97
     98  const KernelLookup*
    9099  KernelLookup::validation_data(const std::vector<size_t>& train,
    91100                                const std::vector<size_t>& validation) const
    92101  {
    93102    return new KernelLookup(*this,train,validation);
     103  }
     104
     105
     106  const KernelLookup*
     107  KernelLookup::validation_data(const std::vector<size_t>& features,
     108                                const std::vector<size_t>& train,
     109                                const std::vector<size_t>& validation) const
     110  {
     111    const Kernel* kernel = kernel_->selected(features);
     112    return new KernelLookup(*kernel, train, validation, true);
    94113  }
    95114
  • trunk/c++_tools/classifier/KernelLookup.h

    r597 r607  
    135135
    136136    ///
     137    /// @retun a sub-kernel of kernel calculated using data defined by
     138    /// @a features. Each row and each columns corresponds to a traing
     139    /// sample defined by @a train.
     140    ///
     141    /// @return pointer to dynamically allocated sub-Lookup of the KernelLookup
     142    ///
     143    /// @Note Returns a dynamically allocated DataLookup2D, which has
     144    /// to be deleted by the caller to avoid memory leaks.
     145    ///
     146    const KernelLookup* training_data(const std::vector<size_t>& features,
     147                                      const std::vector<size_t>& train) const;
     148
     149
     150    ///
    137151    /// In returned kernel each row corresponds to a training sample
    138152    /// and each column corresponds to a validation sample. The
     
    147161    const KernelLookup*
    148162    validation_data(const std::vector<size_t>& train,
     163                    const std::vector<size_t>& validation) const;
     164
     165
     166    ///
     167    /// In returned kernel each row corresponds to a training sample
     168    /// and each column corresponds to a validation sample. The kernel
     169    /// is based on the features defined by @a features.
     170    ///
     171    /// @Note Returns a dynamically allocated DataLookup2D, which has
     172    /// to be deleted by the caller to avoid memory leaks.
     173    ///
     174    const KernelLookup*
     175    validation_data(const std::vector<size_t>& features,
     176                    const std::vector<size_t>& train,
    149177                    const std::vector<size_t>& validation) const;
    150178
  • trunk/c++_tools/classifier/Kernel_MEV.h

    • Property svn:keywords changed from Author Date Id Revision to Id
  • trunk/c++_tools/classifier/Makefile.am

    • Property svn:eol-style set to native
    • Property svn:keywords set to Id
    r604 r607  
    11## Process this file with automake to produce Makefile.in
    22##
    3 ## $Id: Makefile.am 281 2005-04-20 16:45:02Z peter $
     3## $Id$
    44
    55# Copyright (C) 2005, 2006 Jari Häkkinen, Peter Johansson, Markus Ringnèr
  • trunk/c++_tools/classifier/MatrixLookup.cc

    • Property svn:eol-style set to native
    • Property svn:keywords set to Id
    r604 r607  
    140140
    141141  const MatrixLookup*
     142  MatrixLookup::training_data(const std::vector<size_t>& features,
     143                              const std::vector<size_t>& samples) const
     144  {
     145    return new MatrixLookup(*this, features, samples);
     146  }
     147
     148
     149
     150  const MatrixLookup*
    142151  MatrixLookup::validation_data(const std::vector<size_t>& train,
    143152                                const std::vector<size_t>& val) const
    144153  {
    145154    return new MatrixLookup(*this,val, false);
     155  }
     156
     157
     158
     159  const MatrixLookup*
     160  MatrixLookup::validation_data(const std::vector<size_t>& features,
     161                                const std::vector<size_t>& train,
     162                                const std::vector<size_t>& val) const
     163  {
     164    return new MatrixLookup(*this,features, val);
    146165  }
    147166
  • trunk/c++_tools/classifier/MatrixLookup.h

    • Property svn:eol-style set to native
    • Property svn:keywords set to Id
    r604 r607  
    181181   
    182182    ///
     183    /// The created MatrixLookup corresponds the columns defines by @a
     184    /// samples and the rows defined by @a samples in the original
     185    /// MatrixLookup. The created MatrixLookup will fullfill: \f$
     186    /// novel_ml(i,j)=original(samp[i],feat[j]) \f$.
     187    ///
     188    /// @return pointer to sub-Lookup of the MatrixLookup
     189    ///
     190    /// @note If underlying matrix goes out of scope or is deleted, the
     191    /// returned pointer becomes invalid and the result of further use is
     192    /// undefined.
     193    ///
     194    const MatrixLookup* training_data(const std::vector<size_t>& features,
     195                                      const std::vector<size_t>& samples) const;
     196   
     197    ///
    183198    /// The created MatrixLookup corresponds to all rows and the
    184199    /// columns defined by @a index in the original MatrixLookup. The
     
    194209    const MatrixLookup* validation_data(const std::vector<size_t>&,
    195210                                        const std::vector<size_t>&) const;
     211    ///
     212    /// The created MatrixLookup corresponds to rows defined by @a features and
     213    /// columns defined by @a val in the original MatrixLookup. The
     214    /// created MatrixLookup will fullfill:
     215    /// \f$ novel_ml(i,j)=original(features[i],val[j]) \f$.
     216    ///
     217    /// @return pointer to sub-Lookup of the MatrixLookup
     218    ///
     219    /// @note If underlying matrix goes out of scope or is deleted, the
     220    /// returned pointer becomes invalid and the result of further use is
     221    /// undefined.
     222    ///
     223    const MatrixLookup* validation_data(const std::vector<size_t>& features,
     224                                        const std::vector<size_t>& train,
     225                                        const std::vector<size_t>& val) const;
    196226    ///
    197227    /// Access operator
  • trunk/c++_tools/classifier/MatrixLookupWeighted.cc

    • Property svn:eol-style set to native
    • Property svn:keywords set to Id
    r604 r607  
    179179
    180180  const MatrixLookupWeighted*
     181  MatrixLookupWeighted::training_data(const std::vector<size_t>& features,
     182                                      const std::vector<size_t>& samples) const
     183  {
     184    return new MatrixLookupWeighted(*this,features, samples);
     185  }
     186
     187
     188
     189  const MatrixLookupWeighted*
    181190  MatrixLookupWeighted::validation_data(const std::vector<size_t>& train,
    182                                 const std::vector<size_t>& val) const
     191                                        const std::vector<size_t>& val) const
    183192  {
    184193    return new MatrixLookupWeighted(*this,val, false);
     194  }
     195
     196
     197
     198  const MatrixLookupWeighted*
     199  MatrixLookupWeighted::validation_data(const std::vector<size_t>& features,
     200                                        const std::vector<size_t>& training,
     201                                        const std::vector<size_t>& val) const
     202  {
     203    return new MatrixLookupWeighted(*this,features, val);
    185204  }
    186205
  • trunk/c++_tools/classifier/MatrixLookupWeighted.h

    • Property svn:eol-style set to native
    • Property svn:keywords set to Id
    r604 r607  
    179179    ///
    180180    /// @todo doc
    181     ///   
    182     const MatrixLookupWeighted*
    183     selected(const std::vector<size_t>& index) const;
     181    ///
     182    const MatrixLookupWeighted* selected(const std::vector<size_t>& i) const;
    184183
    185184    ///
     
    199198   
    200199    ///
     200    /// The created MatrixLookupWeighted corresponds to rows defined
     201    /// by @a features and columns defined by @a samples in the
     202    /// original MatrixLookupWeighted. The created
     203    /// MatrixLookupWeighted will fullfill:
     204    /// \f$ novel_ml(i,j)=original(features[i],samples[j]) \f$.
     205    ///
     206    /// @return pointer to sub-Lookup of the MatrixLookupWeighted
     207    ///
     208    /// @note If underlying matrix goes out of scope or is deleted, the
     209    /// returned pointer becomes invalid and the result of further use is
     210    /// undefined.
     211    ///
     212    const MatrixLookupWeighted*
     213    training_data(const std::vector<size_t>& features,
     214                  const std::vector<size_t>& samples) const;
     215   
     216    ///
    201217    /// The created MatrixLookupWeighted corresponds to all rows and the
    202218    /// columns defined by @a index in the original MatrixLookupWeighted. The
     
    212228    const MatrixLookupWeighted* validation_data(const std::vector<size_t>&,
    213229                                        const std::vector<size_t>&) const;
     230
     231    ///
     232    /// The created MatrixLookupWeighted corresponds to rows defined
     233    /// by @a features and columns defined by @a val in the original
     234    /// MatrixLookupWeighted. The created MatrixLookupWeighted will
     235    /// fullfill: \f$ novel_ml(i,j)=original(features[i],val[j]) \f$.
     236    ///
     237    /// @return pointer to sub-Lookup of the MatrixLookupWeighted
     238    ///
     239    /// @note If underlying matrix goes out of scope or is deleted, the
     240    /// returned pointer becomes invalid and the result of further use is
     241    /// undefined.
     242    ///
     243    const MatrixLookupWeighted*
     244    validation_data(const std::vector<size_t>& features,
     245                    const std::vector<size_t>& training,
     246                    const std::vector<size_t>& val) const;
    214247
    215248    ///
  • trunk/c++_tools/classifier/NCC.cc

    • Property svn:eol-style set to native
    • Property svn:keywords set to Id
  • trunk/c++_tools/classifier/NCC.h

    • Property svn:eol-style set to native
    • Property svn:keywords set to Id
  • trunk/c++_tools/classifier/PolynomialKernelFunction.cc

    • Property svn:keywords changed from Author Date Id Revision to Id
  • trunk/c++_tools/classifier/PolynomialKernelFunction.h

    • Property svn:keywords changed from Author Date Id Revision to Id
  • trunk/c++_tools/classifier/SVM.cc

    • Property svn:keywords changed from Author Date Id Revision to Id
  • trunk/c++_tools/classifier/SVM.h

    • Property svn:keywords changed from Author Date Id Revision to Id
Note: See TracChangeset for help on using the changeset viewer.