source: trunk/yat/classifier/SubsetGenerator.h @ 1186

Last change on this file since 1186 was 1186, checked in by Peter, 14 years ago

minor docs issues fixed

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 10.2 KB
Line 
1#ifndef _theplu_yat_classifier_subset_generator_
2#define _theplu_yat_classifier_subset_generator_
3
4// $Id: SubsetGenerator.h 1186 2008-02-28 21:00:32Z peter $
5
6/*
7  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
8  Copyright (C) 2007 Peter Johansson
9
10  This file is part of the yat library, http://trac.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 2 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25  02111-1307, USA.
26*/
27
28#include "FeatureSelector.h"
29#include "KernelLookup.h"
30#include "MatrixLookup.h"
31#include "MatrixLookupWeighted.h"
32#include "Target.h"
33#include "Sampler.h"
34#include "yat/utility/Index.h"
35#include "yat/utility/yat_assert.h"
36
37#include <algorithm>
38#include <cassert>
39#include <utility>
40#include <typeinfo>
41#include <vector>
42
43namespace theplu {
44namespace yat {
45namespace classifier { 
46
47  ///
48  /// @brief Class splitting a set into training set and validation set.
49  ///
50  template <typename T> 
51  class SubsetGenerator
52  {
53  public:
54    /**
55       type of data that is stored in SubsetGenerator
56     */
57    typedef T value_type;
58
59    ///
60    /// @brief Constructor
61    /// 
62    /// @param sampler sampler
63    /// @param data data to split up in validation and training.
64    ///
65    SubsetGenerator(const Sampler& sampler, const T& data);
66
67    ///
68    /// @brief Constructor
69    /// 
70    /// @param sampler taking care of partioning dataset
71    /// @param data data to be split up in validation and training.
72    /// @param fs Object selecting features for each subset
73    ///
74    SubsetGenerator(const Sampler& sampler, const T& data, 
75                    FeatureSelector& fs);
76
77    ///
78    /// Destructor
79    ///
80    ~SubsetGenerator();
81 
82    ///
83    /// @return number of subsets
84    ///
85    u_long size(void) const;
86
87    ///
88    /// @return the target for the total set
89    ///
90    const Target& target(void) const;
91
92    ///
93    /// @return the sampler for the total set
94    ///
95    //    const Sampler& sampler(void) const;
96
97    ///
98    /// @return training data
99    ///
100    const T& training_data(size_t i) const;
101
102    ///
103    /// @return training features
104    ///
105    const utility::Index& training_features(size_t i) const;
106
107    ///
108    /// @return training index
109    ///
110    const utility::Index& training_index(size_t i) const;
111
112    ///
113    /// @return training target
114    ///
115    const Target& training_target(std::vector<Target>::size_type i) const;
116
117    ///
118    /// @return validation data
119    ///
120    const T& validation_data(size_t i) const;
121
122    ///
123    /// @return validation index
124    ///
125    const utility::Index&
126    validation_index(std::vector<size_t>::size_type i) const;
127
128    ///
129    /// @return validation target
130    ///
131    const Target& validation_target(std::vector<Target>::size_type i) const;
132
133  private:
134    void build(const MatrixLookup&);
135    void build(const MatrixLookupWeighted&);
136    void build(const KernelLookup&);
137
138    SubsetGenerator(const SubsetGenerator&);
139    const SubsetGenerator& operator=(const SubsetGenerator&) const;
140
141    FeatureSelector* f_selector_;
142    std::vector<utility::Index > features_;
143    const Sampler& sampler_;
144    std::vector<const T*> training_data_;
145    std::vector<Target> training_target_;
146    std::vector<const T*> validation_data_;
147    std::vector<Target> validation_target_;
148
149  };
150
151
152  // templates
153
154  template<typename T>
155  SubsetGenerator<T>::SubsetGenerator(const Sampler& sampler, 
156                                      const T& data)
157    : f_selector_(NULL), sampler_(sampler)
158  { 
159    utility::yat_assert<std::runtime_error>(target().size()==data.columns());
160
161    training_data_.reserve(sampler_.size());
162    validation_data_.reserve(sampler_.size());
163    build(data);
164    utility::yat_assert<std::runtime_error>(training_data_.size()==size());
165    utility::yat_assert<std::runtime_error>(training_target_.size()==size());
166    utility::yat_assert<std::runtime_error>(validation_data_.size()==size());
167    utility::yat_assert<std::runtime_error>(validation_target_.size()==size());
168  }
169
170
171  template<typename T>
172  SubsetGenerator<T>::SubsetGenerator(const Sampler& sampler, 
173                                      const T& data, 
174                                      FeatureSelector& fs)
175    : f_selector_(&fs), sampler_(sampler)
176  { 
177    utility::yat_assert<std::runtime_error>(target().size()==data.columns());
178    features_.reserve(size());
179    training_data_.reserve(size());
180    validation_data_.reserve(size());
181    build(data);
182    utility::yat_assert<std::runtime_error>(training_data_.size()==size());
183    utility::yat_assert<std::runtime_error>(training_target_.size()==size());
184    utility::yat_assert<std::runtime_error>(validation_data_.size()==size());
185    utility::yat_assert<std::runtime_error>(validation_target_.size()==size());
186  }
187
188
189  template<typename T>
190  SubsetGenerator<T>::~SubsetGenerator()
191  {
192    utility::yat_assert<std::runtime_error>(training_data_.size()==validation_data_.size());
193    for (size_t i=0; i<training_data_.size(); i++) 
194      delete training_data_[i];
195    for (size_t i=0; i<validation_data_.size(); i++) 
196      delete validation_data_[i];
197  }
198
199
200  template<typename T>
201  void SubsetGenerator<T>::build(const MatrixLookup& ml)
202  {
203    for (size_t k=0; k<size(); k++){
204      training_target_.push_back(Target(target(),training_index(k)));
205      validation_target_.push_back(Target(target(),validation_index(k)));
206      if (f_selector_){
207        // training data with no feature selection
208        const MatrixLookup* train_data_all_feat = 
209          new MatrixLookup(ml, training_index(k), false);
210        // use these data to create feature selection
211        utility::yat_assert<std::runtime_error>(train_data_all_feat);
212        f_selector_->update(*train_data_all_feat, training_target(k));
213        // get features
214        features_.push_back(f_selector_->features());
215        utility::yat_assert<std::runtime_error>(train_data_all_feat);
216        delete train_data_all_feat;
217      }
218      else // no feature selection
219        features_.push_back(utility::Index(ml.rows()));
220
221     
222      // Dynamically allocated. Must be deleted in destructor.
223      training_data_.push_back(new MatrixLookup(ml,features_.back(), 
224                                                training_index(k)));
225      validation_data_.push_back(new MatrixLookup(ml,features_.back(), 
226                                                  validation_index(k)));     
227    }
228
229  }
230
231
232  template<typename T>
233  void SubsetGenerator<T>::build(const MatrixLookupWeighted& ml)
234  {
235    for (u_long k=0; k<size(); k++){
236      training_target_.push_back(Target(target(),training_index(k)));
237      validation_target_.push_back(Target(target(),validation_index(k)));
238      if (f_selector_){
239        // training data with no feature selection
240        const MatrixLookupWeighted* train_data_all_feat = 
241          new MatrixLookupWeighted(ml, training_index(k), false);
242        // use these data to create feature selection
243        f_selector_->update(*train_data_all_feat, training_target(k));
244        // get features
245        features_.push_back(f_selector_->features());
246        delete train_data_all_feat;
247      }
248      else // no feature selection
249        features_.push_back(utility::Index(ml.rows()));
250
251
252      // Dynamically allocated. Must be deleted in destructor.
253      training_data_.push_back(new MatrixLookupWeighted(ml, features_.back(), 
254                                                        training_index(k)));
255      validation_data_.push_back(new MatrixLookupWeighted(ml, features_.back(), 
256                                                          validation_index(k)));
257    }
258  }
259
260  template<typename T>
261  void SubsetGenerator<T>::build(const KernelLookup& kernel)
262  {
263    for (u_long k=0; k<size(); k++){
264      training_target_.push_back(Target(target(),training_index(k)));
265      validation_target_.push_back(Target(target(),validation_index(k)));
266
267      if (f_selector_){
268        if (kernel.weighted()){
269          utility::SmartPtr<const MatrixLookupWeighted> ml=
270            kernel.data_weighted();
271          f_selector_->update(MatrixLookupWeighted(*ml,training_index(k),false), 
272                              training_target(k));
273        }
274        else {
275          utility::SmartPtr<const MatrixLookup> ml=kernel.data();
276          f_selector_->update(MatrixLookup(*ml,training_index(k), false), 
277                              training_target(k));
278        } 
279        features_.push_back(f_selector_->features());
280        const KernelLookup* kl = kernel.selected(features_.back());
281        // Dynamically allocated. Must be deleted in destructor.
282        training_data_.push_back(new KernelLookup(kernel,training_index(k),
283                                                  training_index(k)));
284        validation_data_.push_back(new KernelLookup(kernel, training_index(k), 
285                                                    validation_index(k)));
286        utility::yat_assert<std::runtime_error>(kl);
287        delete kl;
288      }
289      else {// no feature selection
290        training_data_.push_back(new KernelLookup(kernel, training_index(k),
291                                                  training_index(k)));
292        validation_data_.push_back(new KernelLookup(kernel, 
293                                                    training_index(k), 
294                                                    validation_index(k)));
295      }
296     
297    }
298  }
299
300
301  template<typename T>
302  u_long SubsetGenerator<T>::size(void) const
303  {
304    return sampler_.size();
305  }
306
307
308  template<typename T>
309  const Target& SubsetGenerator<T>::target(void) const
310  {
311    return sampler_.target();
312  }
313
314
315  template<typename T>
316  const T&
317  SubsetGenerator<T>::training_data(size_t i) const 
318  {
319    return *(training_data_[i]);
320  }
321
322
323  template<typename T>
324  const utility::Index&
325  SubsetGenerator<T>::training_features(size_t i) const
326  {
327    return f_selector_ ? features_[i] : features_[0];
328  }
329
330
331  template<typename T>
332  const utility::Index&
333  SubsetGenerator<T>::training_index(size_t i) const
334  {
335    return sampler_.training_index(i);
336  }
337
338
339  template<typename T>
340  const Target&
341  SubsetGenerator<T>::training_target(std::vector<Target>::size_type i) const
342  {
343    return training_target_[i];
344  }
345
346
347  template<typename T>
348  const T&
349  SubsetGenerator<T>::validation_data(size_t i) const
350  {
351    return *(validation_data_[i]);
352  }
353
354
355  template<typename T>
356  const utility::Index&
357  SubsetGenerator<T>::validation_index(std::vector<size_t>::size_type i) const
358  {
359    return sampler_.validation_index(i);
360  }
361
362
363  template<typename T>
364  const Target&
365  SubsetGenerator<T>::validation_target(std::vector<Target>::size_type i) const
366  {
367    return validation_target_[i];
368  }
369
370}}} // of namespace classifier, yat, and theplu
371
372#endif
373
Note: See TracBrowser for help on using the repository browser.