Changeset 1220 for trunk/yat/classifier


Ignore:
Timestamp:
Mar 11, 2008, 1:07:42 AM (16 years ago)
Author:
Peter
Message:

refs #341

Location:
trunk/yat/classifier
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/yat/classifier/Sampler.cc

    r1134 r1220  
    4343  }
    4444
    45   u_long Sampler::size(void) const
     45  size_t Sampler::size(void) const
    4646  {
    4747    return training_index_.size();
  • trunk/yat/classifier/Sampler.h

    r1219 r1220  
    5959    /// @return number of partitions
    6060    ///
    61     u_long size(void) const;
     61    size_t size(void) const;
    6262
    6363    ///
     
    7070    ///
    7171    const utility::Index&
    72     training_index(std::vector<size_t>::size_type i) const;
     72    training_index(size_t i) const;
    7373
    7474    ///
  • trunk/yat/classifier/SubsetGenerator.h

    r1206 r1220  
    66/*
    77  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
    8   Copyright (C) 2007 Peter Johansson
     8  Copyright (C) 2007, 2008 Peter Johansson
    99
    1010  This file is part of the yat library, http://trac.thep.lu.se/yat
     
    3333#include "Sampler.h"
    3434#include "yat/utility/Index.h"
    35 #include "yat/utility/SmartPtr.h"
    3635#include "yat/utility/yat_assert.h"
    3736
     
    4544namespace yat {
    4645namespace classifier { 
    47 
    4846  ///
    49   /// @brief Class splitting a set into training set and validation set.
     47  /// @brief Class splitting Data into training and validation set.
    5048  ///
    51   template <typename T>
     49  /// A SubsetGenerator splits a Data into several training and
     50  /// validation data. A Sampler is used to select samples for a
     51  /// training Data set and a validation Data set, respectively. In
     52  /// addition a FeatureSelector can be used to select Features. For
     53  /// more details see constructors.
     54  ///
     55  /// \note Data must be one of MatrixLookup, MatrixLookupWeighted, or
     56  /// KernelLookup.
     57  ///
     58  template <typename Data>
    5259  class SubsetGenerator
    5360  {
    5461  public:
    5562    /**
    56        type of data that is stored in SubsetGenerator
     63       type of Data that is stored in SubsetGenerator
    5764     */
    58     typedef T value_type;
    59 
    60     ///
    61     /// @brief Constructor
     65    typedef Data value_type;
     66
     67    ///
     68    /// @brief Create SubDataSets
    6269    /// 
    63     /// @param sampler sampler
    64     /// @param data data to split up in validation and training.
    65     ///
    66     SubsetGenerator(const Sampler& sampler, const T& data);
    67 
    68     ///
    69     /// @brief Constructor
     70    /// Creates N training data sets and N validation data sets, where
     71    /// N equals the size of \a sampler. Data must be one of
     72    /// MatrixLookup, MatrixLookupWeighted, or KernelLookup.
     73    ///
     74    /// In case of MatrixLookup or MatrixLookupWeighted, each column
     75    /// corresponds to a sample and the \a sampler is used to select
     76    /// columns. Sampler::training_index(size_t) is used to select
     77    /// columns for the corresponding traing_data, and
     78    /// Sampler::validation_index(size_t) is used to select columns
     79    /// for the corresponding validation_data.
     80    ///
     81    /// In case of a KernelLookup it is a bit different. A symmetric
     82    /// training kernel is created using
     83    /// Sampler::training_index(size_t) to select rows and
     84    /// columns. The validation kernel is typically not symmetric, but
     85    /// the columns correspond to a validation sample and each row
     86    /// corresponds to a training sample. Consequently
     87    /// Sampler::training_index(size_t) is used to select rows, and
     88    /// Sampler::validation_index(size_t) is used to select columns.
     89    ///
     90    /// @param sampler Sampler that is used to select samples.
     91    /// @param data Data to split up in validation and training.
     92    ///
     93    SubsetGenerator(const Sampler& sampler, const Data& data);
     94
     95    ///
     96    /// @brief Create SubDataSets with feature selection
    7097    /// 
     98    /// Creates N training data sets and N validation data sets, where
     99    /// N equals the size of \a sampler. The Sampler defines which
     100    /// samples are included in a subset. Likewise a FeatureSelector,
     101    /// \a fs, is used to select features. The selection is based on
     102    /// not based on the entire dataset but solely on the training
     103    /// dataset. Data must be one of MatrixLookup,
     104    /// MatrixLookupWeighted, or KernelLookup.
     105    ///
     106    /// In case of MatrixLookup or MatrixLookupWeighted, each column
     107    /// corresponds to a sample and the \a sampler is used to select
     108    /// columns. Sampler::training_index(size_t) is used to select
     109    /// columns for the corresponding traing_data, and
     110    /// Sampler::validation_index(size_t) is used to select columns
     111    /// for the corresponding validation_data. The FeatureSelector is
     112    /// used to select features, i.e., to select rows to be included
     113    /// in the subsets.
     114    ///
     115    /// In case of a KernelLookup it is a bit different. A symmetric
     116    /// training kernel is created using
     117    /// Sampler::training_index(size_t) to select rows and
     118    /// columns. However, the created KernelLookup is not simply the
     119    /// subkernel of \a data, but each element is recalculated using
     120    /// the features selected by FeatureSelector \a fs. In the
     121    /// validation kernel each column corresponds to a validation
     122    /// sample and each row corresponds to a training
     123    /// sample. Consequently Sampler::training_index(size_t) is used
     124    /// to select rows, and Sampler::validation_index(size_t) is used
     125    /// to select columns. The same set of features are used to
     126    /// caclulate the elements as for the training kernel, i.e.,
     127    /// feature selection is based on training data.
     128    ///
    71129    /// @param sampler taking care of partioning dataset
    72130    /// @param data data to be split up in validation and training.
    73131    /// @param fs Object selecting features for each subset
    74132    ///
    75     SubsetGenerator(const Sampler& sampler, const T& data,
     133    SubsetGenerator(const Sampler& sampler, const Data& data,
    76134                    FeatureSelector& fs);
    77135
     
    84142    /// @return number of subsets
    85143    ///
    86     u_long size(void) const;
     144    size_t size(void) const;
    87145
    88146    ///
     
    92150
    93151    ///
    94     /// @return the sampler for the total set
    95     ///
    96     //    const Sampler& sampler(void) const;
    97 
    98     ///
    99     /// @return training data
    100     ///
    101     const T& training_data(size_t i) const;
    102 
     152    /// See constructors for details on how training data are
     153    /// generated.
     154    ///
     155    /// @return ith training data
     156    ///
     157    const Data& training_data(size_t i) const;
     158
     159    ///
     160    /// Features that are used to create ith training data and
     161    /// validation data.
    103162    ///
    104163    /// @return training features
     
    107166
    108167    ///
    109     /// @return training index
     168    /// @return Index of samples included in ith training samples.
    110169    ///
    111170    const utility::Index& training_index(size_t i) const;
    112171
    113172    ///
    114     /// @return training target
    115     ///
    116     const Target& training_target(std::vector<Target>::size_type i) const;
    117 
    118     ///
    119     /// @return validation data
    120     ///
    121     const T& validation_data(size_t i) const;
    122 
    123     ///
    124     /// @return validation index
    125     ///
    126     const utility::Index&
    127     validation_index(std::vector<size_t>::size_type i) const;
    128 
    129     ///
    130     /// @return validation target
    131     ///
    132     const Target& validation_target(std::vector<Target>::size_type i) const;
     173    /// @return Targets of ith set of training samples
     174    ///
     175    const Target& training_target(size_t i) const;
     176
     177    ///
     178    /// See constructors for details on how validation data are
     179    /// generated.
     180    ///
     181    /// @return ith validation data
     182    ///
     183    const Data& validation_data(size_t i) const;
     184
     185    ///
     186    /// @return Index of samples included in ith validation samples.
     187    ///
     188    const utility::Index& validation_index(size_t i) const;
     189
     190    ///
     191    /// @return Targets of ith set validation samples
     192    ///
     193    const Target& validation_target(size_t i) const;
    133194
    134195  private:
     
    143204    std::vector<utility::Index > features_;
    144205    const Sampler& sampler_;
    145     std::vector<const T*> training_data_;
     206    std::vector<const Data*> training_data_;
    146207    std::vector<Target> training_target_;
    147     std::vector<const T*> validation_data_;
     208    std::vector<const Data*> validation_data_;
    148209    std::vector<Target> validation_target_;
    149210
     
    153214  // templates
    154215
    155   template<typename T>
    156   SubsetGenerator<T>::SubsetGenerator(const Sampler& sampler,
    157                                       const T& data)
     216  template<typename Data>
     217  SubsetGenerator<Data>::SubsetGenerator(const Sampler& sampler,
     218                                         const Data& data)
    158219    : f_selector_(NULL), sampler_(sampler)
    159220  {
     
    170231
    171232
    172   template<typename T>
    173   SubsetGenerator<T>::SubsetGenerator(const Sampler& sampler,
    174                                       const T& data,
     233  template<typename Data>
     234  SubsetGenerator<Data>::SubsetGenerator(const Sampler& sampler,
     235                                      const Data& data,
    175236                                      FeatureSelector& fs)
    176237    : f_selector_(&fs), sampler_(sampler)
     
    188249
    189250
    190   template<typename T>
    191   SubsetGenerator<T>::~SubsetGenerator()
     251  template<typename Data>
     252  SubsetGenerator<Data>::~SubsetGenerator()
    192253  {
    193254    utility::yat_assert<std::runtime_error>(training_data_.size()==validation_data_.size());
     
    199260
    200261
    201   template<typename T>
    202   void SubsetGenerator<T>::build(const MatrixLookup& ml)
     262  template<typename Data>
     263  void SubsetGenerator<Data>::build(const MatrixLookup& ml)
    203264  {
    204265    if (!f_selector_)// no feature selection
     
    231292
    232293
    233   template<typename T>
    234   void SubsetGenerator<T>::build(const MatrixLookupWeighted& ml)
     294  template<typename Data>
     295  void SubsetGenerator<Data>::build(const MatrixLookupWeighted& ml)
    235296  {
    236297    if (!f_selector_)// no feature selection
     
    260321  }
    261322
    262   template<typename T>
    263   void SubsetGenerator<T>::build(const KernelLookup& kernel)
     323  template<typename Data>
     324  void SubsetGenerator<Data>::build(const KernelLookup& kernel)
    264325  {
    265326    for (u_long k=0; k<size(); k++){
     
    304365
    305366
    306   template<typename T>
    307   u_long SubsetGenerator<T>::size(void) const
     367  template<typename Data>
     368  size_t SubsetGenerator<Data>::size(void) const
    308369  {
    309370    return sampler_.size();
     
    311372
    312373
    313   template<typename T>
    314   const Target& SubsetGenerator<T>::target(void) const
     374  template<typename Data>
     375  const Target& SubsetGenerator<Data>::target(void) const
    315376  {
    316377    return sampler_.target();
     
    318379
    319380
    320   template<typename T>
    321   const T&
    322   SubsetGenerator<T>::training_data(size_t i) const
     381  template<typename Data>
     382  const Data&
     383  SubsetGenerator<Data>::training_data(size_t i) const
    323384  {
    324385    return *(training_data_[i]);
     
    326387
    327388
    328   template<typename T>
     389  template<typename Data>
    329390  const utility::Index&
    330   SubsetGenerator<T>::training_features(size_t i) const
     391  SubsetGenerator<Data>::training_features(size_t i) const
    331392  {
    332393    utility::yat_assert<std::runtime_error>(features_.size(),
     
    336397
    337398
    338   template<typename T>
     399  template<typename Data>
    339400  const utility::Index&
    340   SubsetGenerator<T>::training_index(size_t i) const
     401  SubsetGenerator<Data>::training_index(size_t i) const
    341402  {
    342403    return sampler_.training_index(i);
     
    344405
    345406
    346   template<typename T>
     407  template<typename Data>
    347408  const Target&
    348   SubsetGenerator<T>::training_target(std::vector<Target>::size_type i) const
     409  SubsetGenerator<Data>::training_target(size_t i) const
    349410  {
    350411    return training_target_[i];
     
    352413
    353414
    354   template<typename T>
    355   const T&
    356   SubsetGenerator<T>::validation_data(size_t i) const
     415  template<typename Data>
     416  const Data&
     417  SubsetGenerator<Data>::validation_data(size_t i) const
    357418  {
    358419    return *(validation_data_[i]);
     
    360421
    361422
    362   template<typename T>
     423  template<typename Data>
    363424  const utility::Index&
    364   SubsetGenerator<T>::validation_index(std::vector<size_t>::size_type i) const
     425  SubsetGenerator<Data>::validation_index(size_t i) const
    365426  {
    366427    return sampler_.validation_index(i);
     
    368429
    369430
    370   template<typename T>
     431  template<typename Data>
    371432  const Target&
    372   SubsetGenerator<T>::validation_target(std::vector<Target>::size_type i) const
     433  SubsetGenerator<Data>::validation_target(size_t i) const
    373434  {
    374435    return validation_target_[i];
Note: See TracChangeset for help on using the changeset viewer.