source: trunk/yat/classifier/SubsetGenerator.h @ 1167

Last change on this file since 1167 was 1167, checked in by Peter, 16 years ago

refs #342 in KernelLookup? and DataLookup2D - nothing changed in MatrixLookups?

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 10.2 KB
Line 
1#ifndef _theplu_yat_classifier_subset_generator_
2#define _theplu_yat_classifier_subset_generator_
3
4// $Id: SubsetGenerator.h 1167 2008-02-26 20:02:28Z peter $
5
6/*
7  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
8  Copyright (C) 2007 Peter Johansson
9
10  This file is part of the yat library, http://trac.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 2 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25  02111-1307, USA.
26*/
27
28#include "FeatureSelector.h"
29#include "KernelLookup.h"
30#include "MatrixLookup.h"
31#include "MatrixLookupWeighted.h"
32#include "Target.h"
33#include "Sampler.h"
34#include "yat/utility/Index.h"
35#include "yat/utility/yat_assert.h"
36
37#include <algorithm>
38#include <cassert>
39#include <utility>
40#include <typeinfo>
41#include <vector>
42
43namespace theplu {
44namespace yat {
45namespace classifier { 
46
47  ///
48  /// @brief Class splitting a set into training set and validation set.
49  ///
50  template <typename T> 
51  class SubsetGenerator
52  {
53  public:
54    /**
55       type of data that is stored in SubsetGenerator
56     */
57    typedef T value_type;
58
59    ///
60    /// @brief Constructor
61    /// 
62    /// @param sampler sampler
63    /// @param data data to split up in validation and training.
64    ///
65    SubsetGenerator(const Sampler& sampler, const T& data);
66
67    ///
68    /// @brief Constructor
69    /// 
70    /// @param sampler taking care of partioning dataset
71    /// @param data data to be split up in validation and training.
72    /// @param fs Object selecting features for each subset
73    ///
74    SubsetGenerator(const Sampler& sampler, const T& data, 
75                    FeatureSelector& fs);
76
77    ///
78    /// Destructor
79    ///
80    ~SubsetGenerator();
81 
82    ///
83    /// @return number of subsets
84    ///
85    u_long size(void) const;
86
87    ///
88    /// @return the target for the total set
89    ///
90    const Target& target(void) const;
91
92    ///
93    /// @return the sampler for the total set
94    ///
95    //    const Sampler& sampler(void) const;
96
97    ///
98    /// @return training data
99    ///
100    const T& training_data(size_t i) const;
101
102    ///
103    /// @return training features
104    ///
105    const utility::Index&
106    training_features(std::vector<size_t>::size_type i) const;
107
108    ///
109    /// @return training index
110    ///
111    const utility::Index&
112    training_index(std::vector<size_t>::size_type i) const;
113
114    ///
115    /// @return training target
116    ///
117    const Target& training_target(std::vector<Target>::size_type i) const;
118
119    ///
120    /// @return validation data
121    ///
122    const T& validation_data(size_t i) const;
123
124    ///
125    /// @return validation index
126    ///
127    const utility::Index&
128    validation_index(std::vector<size_t>::size_type i) const;
129
130    ///
131    /// @return validation target
132    ///
133    const Target& validation_target(std::vector<Target>::size_type i) const;
134
135  private:
136    void build(const MatrixLookup&);
137    void build(const MatrixLookupWeighted&);
138    void build(const KernelLookup&);
139
140    SubsetGenerator(const SubsetGenerator&);
141    const SubsetGenerator& operator=(const SubsetGenerator&) const;
142
143    FeatureSelector* f_selector_;
144    std::vector<utility::Index > features_;
145    const Sampler& sampler_;
146    std::vector<const T*> training_data_;
147    std::vector<Target> training_target_;
148    std::vector<const T*> validation_data_;
149    std::vector<Target> validation_target_;
150
151  };
152
153
154  // templates
155
156  template<typename T>
157  SubsetGenerator<T>::SubsetGenerator(const Sampler& sampler, 
158                                      const T& data)
159    : f_selector_(NULL), sampler_(sampler)
160  { 
161    utility::yat_assert<std::runtime_error>(target().size()==data.columns());
162
163    training_data_.reserve(sampler_.size());
164    validation_data_.reserve(sampler_.size());
165    build(data);
166    utility::yat_assert<std::runtime_error>(training_data_.size()==size());
167    utility::yat_assert<std::runtime_error>(training_target_.size()==size());
168    utility::yat_assert<std::runtime_error>(validation_data_.size()==size());
169    utility::yat_assert<std::runtime_error>(validation_target_.size()==size());
170  }
171
172
173  template<typename T>
174  SubsetGenerator<T>::SubsetGenerator(const Sampler& sampler, 
175                                      const T& data, 
176                                      FeatureSelector& fs)
177    : f_selector_(&fs), sampler_(sampler)
178  { 
179    utility::yat_assert<std::runtime_error>(target().size()==data.columns());
180    features_.reserve(size());
181    training_data_.reserve(size());
182    validation_data_.reserve(size());
183    build(data);
184    utility::yat_assert<std::runtime_error>(training_data_.size()==size());
185    utility::yat_assert<std::runtime_error>(training_target_.size()==size());
186    utility::yat_assert<std::runtime_error>(validation_data_.size()==size());
187    utility::yat_assert<std::runtime_error>(validation_target_.size()==size());
188  }
189
190
191  template<typename T>
192  SubsetGenerator<T>::~SubsetGenerator()
193  {
194    utility::yat_assert<std::runtime_error>(training_data_.size()==validation_data_.size());
195    for (size_t i=0; i<training_data_.size(); i++) 
196      delete training_data_[i];
197    for (size_t i=0; i<validation_data_.size(); i++) 
198      delete validation_data_[i];
199  }
200
201
202  template<typename T>
203  void SubsetGenerator<T>::build(const MatrixLookup& ml)
204  {
205    for (size_t k=0; k<size(); k++){
206      training_target_.push_back(Target(target(),training_index(k)));
207      validation_target_.push_back(Target(target(),validation_index(k)));
208      if (f_selector_){
209        // training data with no feature selection
210        const MatrixLookup* train_data_all_feat = 
211          ml.training_data(training_index(k));
212        // use these data to create feature selection
213        utility::yat_assert<std::runtime_error>(train_data_all_feat);
214        f_selector_->update(*train_data_all_feat, training_target(k));
215        // get features
216        features_.push_back(f_selector_->features());
217        utility::yat_assert<std::runtime_error>(train_data_all_feat);
218        delete train_data_all_feat;
219      }
220      else // no feature selection
221        features_.push_back(utility::Index(ml.rows()));
222
223     
224      // Dynamically allocated. Must be deleted in destructor.
225      training_data_.push_back(new MatrixLookup(ml,features_.back(), 
226                                                training_index(k)));
227      validation_data_.push_back(new MatrixLookup(ml,features_.back(), 
228                                                  validation_index(k)));     
229    }
230
231  }
232
233
234  template<typename T>
235  void SubsetGenerator<T>::build(const MatrixLookupWeighted& ml)
236  {
237    for (u_long k=0; k<size(); k++){
238      training_target_.push_back(Target(target(),training_index(k)));
239      validation_target_.push_back(Target(target(),validation_index(k)));
240      if (f_selector_){
241        // training data with no feature selection
242        const MatrixLookupWeighted* train_data_all_feat = 
243          ml.training_data(training_index(k));
244        // use these data to create feature selection
245        f_selector_->update(*train_data_all_feat, training_target(k));
246        // get features
247        features_.push_back(f_selector_->features());
248        delete train_data_all_feat;
249      }
250      else // no feature selection
251        features_.push_back(utility::Index(ml.rows()));
252
253
254      // Dynamically allocated. Must be deleted in destructor.
255      training_data_.push_back(new MatrixLookupWeighted(ml, features_.back(), 
256                                                        training_index(k)));
257      validation_data_.push_back(new MatrixLookupWeighted(ml, features_.back(), 
258                                                          validation_index(k)));
259    }
260  }
261
262  template<typename T>
263  void SubsetGenerator<T>::build(const KernelLookup& kernel)
264  {
265    for (u_long k=0; k<size(); k++){
266      training_target_.push_back(Target(target(),training_index(k)));
267      validation_target_.push_back(Target(target(),validation_index(k)));
268
269      if (f_selector_){
270        if (kernel.weighted()){
271          utility::SmartPtr<const MatrixLookupWeighted> ml=
272            kernel.data_weighted();
273          f_selector_->update(MatrixLookupWeighted(*ml,training_index(k),false), 
274                              training_target(k));
275        }
276        else {
277          utility::SmartPtr<const MatrixLookup> ml=kernel.data();
278          f_selector_->update(MatrixLookup(*ml,training_index(k), false), 
279                              training_target(k));
280        } 
281        features_.push_back(f_selector_->features());
282        const KernelLookup* kl = kernel.selected(features_.back());
283        // Dynamically allocated. Must be deleted in destructor.
284        training_data_.push_back(new KernelLookup(kernel,training_index(k),
285                                                  training_index(k)));
286        validation_data_.push_back(new KernelLookup(kernel, training_index(k), 
287                                                    validation_index(k)));
288        utility::yat_assert<std::runtime_error>(kl);
289        delete kl;
290      }
291      else {// no feature selection
292        training_data_.push_back(new KernelLookup(kernel, training_index(k),
293                                                  training_index(k)));
294        validation_data_.push_back(new KernelLookup(kernel, 
295                                                    training_index(k), 
296                                                    validation_index(k)));
297      }
298     
299    }
300  }
301
302
303  template<typename T>
304  u_long SubsetGenerator<T>::size(void) const
305  {
306    return sampler_.size();
307  }
308
309
310  template<typename T>
311  const Target& SubsetGenerator<T>::target(void) const
312  {
313    return sampler_.target();
314  }
315
316
317  template<typename T>
318  const T&
319  SubsetGenerator<T>::training_data(size_t i) const 
320  {
321    return *(training_data_[i]);
322  }
323
324
325  template<typename T>
326  const utility::Index&
327  SubsetGenerator<T>::training_features(size_t i) const
328  {
329    return f_selector_ ? features_[i] : features_[0];
330  }
331
332
333  template<typename T>
334  const utility::Index&
335  SubsetGenerator<T>::training_index(size_t i) const
336  {
337    return sampler_.training_index(i);
338  }
339
340
341  template<typename T>
342  const Target&
343  SubsetGenerator<T>::training_target(std::vector<Target>::size_type i) const
344  {
345    return training_target_[i];
346  }
347
348
349  template<typename T>
350  const T&
351  SubsetGenerator<T>::validation_data(size_t i) const
352  {
353    return *(validation_data_[i]);
354  }
355
356
357  template<typename T>
358  const utility::Index&
359  SubsetGenerator<T>::validation_index(std::vector<size_t>::size_type i) const
360  {
361    return sampler_.validation_index(i);
362  }
363
364
365  template<typename T>
366  const Target&
367  SubsetGenerator<T>::validation_target(std::vector<Target>::size_type i) const
368  {
369    return validation_target_[i];
370  }
371
372}}} // of namespace classifier, yat, and theplu
373
374#endif
375
Note: See TracBrowser for help on using the repository browser.