source: trunk/yat/classifier/SubsetGenerator.h @ 1201

Last change on this file since 1201 was 1201, checked in by Peter, 14 years ago

returning SmartPtr? rather than conventional pointer when object is dynamically allocated - also fixed bug in SubsetGenerator? for Kernel with feature selection

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 10.2 KB
Line 
1#ifndef _theplu_yat_classifier_subset_generator_
2#define _theplu_yat_classifier_subset_generator_
3
4// $Id: SubsetGenerator.h 1201 2008-03-05 02:56:13Z peter $
5
6/*
7  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
8  Copyright (C) 2007 Peter Johansson
9
10  This file is part of the yat library, http://trac.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 2 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25  02111-1307, USA.
26*/
27
28#include "FeatureSelector.h"
29#include "KernelLookup.h"
30#include "MatrixLookup.h"
31#include "MatrixLookupWeighted.h"
32#include "Target.h"
33#include "Sampler.h"
34#include "yat/utility/Index.h"
35#include "yat/utility/SmartPtr.h"
36#include "yat/utility/yat_assert.h"
37
38#include <algorithm>
39#include <cassert>
40#include <utility>
41#include <typeinfo>
42#include <vector>
43
44namespace theplu {
45namespace yat {
46namespace classifier { 
47
48  ///
49  /// @brief Class splitting a set into training set and validation set.
50  ///
51  template <typename T> 
52  class SubsetGenerator
53  {
54  public:
55    /**
56       type of data that is stored in SubsetGenerator
57     */
58    typedef T value_type;
59
60    ///
61    /// @brief Constructor
62    /// 
63    /// @param sampler sampler
64    /// @param data data to split up in validation and training.
65    ///
66    SubsetGenerator(const Sampler& sampler, const T& data);
67
68    ///
69    /// @brief Constructor
70    /// 
71    /// @param sampler taking care of partioning dataset
72    /// @param data data to be split up in validation and training.
73    /// @param fs Object selecting features for each subset
74    ///
75    SubsetGenerator(const Sampler& sampler, const T& data, 
76                    FeatureSelector& fs);
77
78    ///
79    /// Destructor
80    ///
81    ~SubsetGenerator();
82 
83    ///
84    /// @return number of subsets
85    ///
86    u_long size(void) const;
87
88    ///
89    /// @return the target for the total set
90    ///
91    const Target& target(void) const;
92
93    ///
94    /// @return the sampler for the total set
95    ///
96    //    const Sampler& sampler(void) const;
97
98    ///
99    /// @return training data
100    ///
101    const T& training_data(size_t i) const;
102
103    ///
104    /// @return training features
105    ///
106    const utility::Index& training_features(size_t i) const;
107
108    ///
109    /// @return training index
110    ///
111    const utility::Index& training_index(size_t i) const;
112
113    ///
114    /// @return training target
115    ///
116    const Target& training_target(std::vector<Target>::size_type i) const;
117
118    ///
119    /// @return validation data
120    ///
121    const T& validation_data(size_t i) const;
122
123    ///
124    /// @return validation index
125    ///
126    const utility::Index&
127    validation_index(std::vector<size_t>::size_type i) const;
128
129    ///
130    /// @return validation target
131    ///
132    const Target& validation_target(std::vector<Target>::size_type i) const;
133
134  private:
135    void build(const MatrixLookup&);
136    void build(const MatrixLookupWeighted&);
137    void build(const KernelLookup&);
138
139    SubsetGenerator(const SubsetGenerator&);
140    const SubsetGenerator& operator=(const SubsetGenerator&) const;
141
142    FeatureSelector* f_selector_;
143    std::vector<utility::Index > features_;
144    const Sampler& sampler_;
145    std::vector<const T*> training_data_;
146    std::vector<Target> training_target_;
147    std::vector<const T*> validation_data_;
148    std::vector<Target> validation_target_;
149
150  };
151
152
153  // templates
154
155  template<typename T>
156  SubsetGenerator<T>::SubsetGenerator(const Sampler& sampler, 
157                                      const T& data)
158    : f_selector_(NULL), sampler_(sampler)
159  { 
160    utility::yat_assert<std::runtime_error>(target().size()==data.columns());
161
162    training_data_.reserve(sampler_.size());
163    validation_data_.reserve(sampler_.size());
164    build(data);
165    utility::yat_assert<std::runtime_error>(training_data_.size()==size());
166    utility::yat_assert<std::runtime_error>(training_target_.size()==size());
167    utility::yat_assert<std::runtime_error>(validation_data_.size()==size());
168    utility::yat_assert<std::runtime_error>(validation_target_.size()==size());
169  }
170
171
172  template<typename T>
173  SubsetGenerator<T>::SubsetGenerator(const Sampler& sampler, 
174                                      const T& data, 
175                                      FeatureSelector& fs)
176    : f_selector_(&fs), sampler_(sampler)
177  { 
178    utility::yat_assert<std::runtime_error>(target().size()==data.columns());
179    features_.reserve(size());
180    training_data_.reserve(size());
181    validation_data_.reserve(size());
182    build(data);
183    utility::yat_assert<std::runtime_error>(training_data_.size()==size());
184    utility::yat_assert<std::runtime_error>(training_target_.size()==size());
185    utility::yat_assert<std::runtime_error>(validation_data_.size()==size());
186    utility::yat_assert<std::runtime_error>(validation_target_.size()==size());
187  }
188
189
190  template<typename T>
191  SubsetGenerator<T>::~SubsetGenerator()
192  {
193    utility::yat_assert<std::runtime_error>(training_data_.size()==validation_data_.size());
194    for (size_t i=0; i<training_data_.size(); i++) 
195      delete training_data_[i];
196    for (size_t i=0; i<validation_data_.size(); i++) 
197      delete validation_data_[i];
198  }
199
200
201  template<typename T>
202  void SubsetGenerator<T>::build(const MatrixLookup& ml)
203  {
204    for (size_t k=0; k<size(); k++){
205      training_target_.push_back(Target(target(),training_index(k)));
206      validation_target_.push_back(Target(target(),validation_index(k)));
207      if (f_selector_){
208        // training data with no feature selection
209        const MatrixLookup* train_data_all_feat = 
210          new MatrixLookup(ml, training_index(k), false);
211        // use these data to create feature selection
212        utility::yat_assert<std::runtime_error>(train_data_all_feat);
213        f_selector_->update(*train_data_all_feat, training_target(k));
214        // get features
215        features_.push_back(f_selector_->features());
216        utility::yat_assert<std::runtime_error>(train_data_all_feat);
217        delete train_data_all_feat;
218      }
219      else // no feature selection
220        features_.push_back(utility::Index(ml.rows()));
221
222     
223      // Dynamically allocated. Must be deleted in destructor.
224      training_data_.push_back(new MatrixLookup(ml,features_.back(), 
225                                                training_index(k)));
226      validation_data_.push_back(new MatrixLookup(ml,features_.back(), 
227                                                  validation_index(k)));     
228    }
229
230  }
231
232
233  template<typename T>
234  void SubsetGenerator<T>::build(const MatrixLookupWeighted& ml)
235  {
236    for (u_long k=0; k<size(); k++){
237      training_target_.push_back(Target(target(),training_index(k)));
238      validation_target_.push_back(Target(target(),validation_index(k)));
239      if (f_selector_){
240        // training data with no feature selection
241        const MatrixLookupWeighted* train_data_all_feat = 
242          new MatrixLookupWeighted(ml, training_index(k), false);
243        // use these data to create feature selection
244        f_selector_->update(*train_data_all_feat, training_target(k));
245        // get features
246        features_.push_back(f_selector_->features());
247        delete train_data_all_feat;
248      }
249      else // no feature selection
250        features_.push_back(utility::Index(ml.rows()));
251
252
253      // Dynamically allocated. Must be deleted in destructor.
254      training_data_.push_back(new MatrixLookupWeighted(ml, features_.back(), 
255                                                        training_index(k)));
256      validation_data_.push_back(new MatrixLookupWeighted(ml, features_.back(), 
257                                                          validation_index(k)));
258    }
259  }
260
261  template<typename T>
262  void SubsetGenerator<T>::build(const KernelLookup& kernel)
263  {
264    for (u_long k=0; k<size(); k++){
265      training_target_.push_back(Target(target(),training_index(k)));
266      validation_target_.push_back(Target(target(),validation_index(k)));
267
268      if (f_selector_){
269        if (kernel.weighted()){
270          utility::SmartPtr<const MatrixLookupWeighted> ml=
271            kernel.data_weighted();
272          f_selector_->update(MatrixLookupWeighted(*ml,training_index(k),false), 
273                              training_target(k));
274        }
275        else {
276          utility::SmartPtr<const MatrixLookup> ml=kernel.data();
277          f_selector_->update(MatrixLookup(*ml,training_index(k), false), 
278                              training_target(k));
279        } 
280        features_.push_back(f_selector_->features());
281        utility::SmartPtr<const KernelLookup> kl = 
282          kernel.selected(features_.back());
283        // Dynamically allocated. Must be deleted in destructor.
284        training_data_.push_back(new KernelLookup(*kl,training_index(k),
285                                                  training_index(k)));
286        validation_data_.push_back(new KernelLookup(*kl, training_index(k), 
287                                                    validation_index(k)));
288      }
289      else {// no feature selection
290        training_data_.push_back(new KernelLookup(kernel, training_index(k),
291                                                  training_index(k)));
292        validation_data_.push_back(new KernelLookup(kernel, 
293                                                    training_index(k), 
294                                                    validation_index(k)));
295      }
296     
297    }
298  }
299
300
301  template<typename T>
302  u_long SubsetGenerator<T>::size(void) const
303  {
304    return sampler_.size();
305  }
306
307
308  template<typename T>
309  const Target& SubsetGenerator<T>::target(void) const
310  {
311    return sampler_.target();
312  }
313
314
315  template<typename T>
316  const T&
317  SubsetGenerator<T>::training_data(size_t i) const 
318  {
319    return *(training_data_[i]);
320  }
321
322
323  template<typename T>
324  const utility::Index&
325  SubsetGenerator<T>::training_features(size_t i) const
326  {
327    return f_selector_ ? features_[i] : features_[0];
328  }
329
330
331  template<typename T>
332  const utility::Index&
333  SubsetGenerator<T>::training_index(size_t i) const
334  {
335    return sampler_.training_index(i);
336  }
337
338
339  template<typename T>
340  const Target&
341  SubsetGenerator<T>::training_target(std::vector<Target>::size_type i) const
342  {
343    return training_target_[i];
344  }
345
346
347  template<typename T>
348  const T&
349  SubsetGenerator<T>::validation_data(size_t i) const
350  {
351    return *(validation_data_[i]);
352  }
353
354
355  template<typename T>
356  const utility::Index&
357  SubsetGenerator<T>::validation_index(std::vector<size_t>::size_type i) const
358  {
359    return sampler_.validation_index(i);
360  }
361
362
363  template<typename T>
364  const Target&
365  SubsetGenerator<T>::validation_target(std::vector<Target>::size_type i) const
366  {
367    return validation_target_[i];
368  }
369
370}}} // of namespace classifier, yat, and theplu
371
372#endif
373
Note: See TracBrowser for help on using the repository browser.