source: trunk/yat/classifier/SubsetGenerator.h @ 1134

Last change on this file since 1134 was 1134, checked in by Peter, 14 years ago

using Index class instead of std::vector<size_t>

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 11.0 KB
Line 
1#ifndef _theplu_yat_classifier_subset_generator_
2#define _theplu_yat_classifier_subset_generator_
3
4// $Id: SubsetGenerator.h 1134 2008-02-23 22:52:43Z peter $
5
6/*
7  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
8  Copyright (C) 2007 Peter Johansson
9
10  This file is part of the yat library, http://trac.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 2 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25  02111-1307, USA.
26*/
27
28#include "DataLookup2D.h"
29#include "FeatureSelector.h"
30#include "KernelLookup.h"
31#include "MatrixLookup.h"
32#include "MatrixLookupWeighted.h"
33#include "Target.h"
34#include "Sampler.h"
35#include "yat/utility/Index.h"
36#include "yat/utility/yat_assert.h"
37
38#include <algorithm>
39#include <cassert>
40#include <utility>
41#include <typeinfo>
42#include <vector>
43
44namespace theplu {
45namespace yat {
46namespace classifier { 
47
48  ///
49  /// @brief Class splitting a set into training set and validation set.
50  ///
51  template <typename T> 
52  class SubsetGenerator
53  {
54  public:
55    /**
56       type of data that is stored in SubsetGenerator
57     */
58    typedef T value_type;
59
60    ///
61    /// @brief Constructor
62    /// 
63    /// @param sampler sampler
64    /// @param data data to split up in validation and training.
65    ///
66    SubsetGenerator(const Sampler& sampler, const T& data);
67
68    ///
69    /// @brief Constructor
70    /// 
71    /// @param sampler taking care of partioning dataset
72    /// @param data data to be split up in validation and training.
73    /// @param fs Object selecting features for each subset
74    ///
75    SubsetGenerator(const Sampler& sampler, const T& data, 
76                    FeatureSelector& fs);
77
78    ///
79    /// Destructor
80    ///
81    ~SubsetGenerator();
82 
83    ///
84    /// @return number of subsets
85    ///
86    u_long size(void) const;
87
88    ///
89    /// @return the target for the total set
90    ///
91    const Target& target(void) const;
92
93    ///
94    /// @return the sampler for the total set
95    ///
96    //    const Sampler& sampler(void) const;
97
98    ///
99    /// @return training data
100    ///
101    const T& training_data(size_t i) const;
102
103    ///
104    /// @return training features
105    ///
106    const utility::Index&
107    training_features(std::vector<size_t>::size_type i) const;
108
109    ///
110    /// @return training index
111    ///
112    const utility::Index&
113    training_index(std::vector<size_t>::size_type i) const;
114
115    ///
116    /// @return training target
117    ///
118    const Target& training_target(std::vector<Target>::size_type i) const;
119
120    ///
121    /// @return validation data
122    ///
123    const T& validation_data(size_t i) const;
124
125    ///
126    /// @return validation index
127    ///
128    const utility::Index&
129    validation_index(std::vector<size_t>::size_type i) const;
130
131    ///
132    /// @return validation target
133    ///
134    const Target& validation_target(std::vector<Target>::size_type i) const;
135
136    ///
137    /// @return true if weighted
138    /// @todo remove this function
139    //bool weighted(void) const;
140
141  private:
142    void build(const MatrixLookup&);
143    void build(const MatrixLookupWeighted&);
144    void build(const KernelLookup&);
145
146    SubsetGenerator(const SubsetGenerator&);
147    const SubsetGenerator& operator=(const SubsetGenerator&) const;
148
149    FeatureSelector* f_selector_;
150    std::vector<utility::Index > features_;
151    const Sampler& sampler_;
152    std::vector<const T*> training_data_;
153    std::vector<Target> training_target_;
154    std::vector<const T*> validation_data_;
155    std::vector<Target> validation_target_;
156
157  };
158
159
160  // templates
161
162  template<typename T>
163  SubsetGenerator<T>::SubsetGenerator(const Sampler& sampler, 
164                                      const T& data)
165    : f_selector_(NULL), sampler_(sampler)
166  { 
167    utility::yat_assert<std::runtime_error>(target().size()==data.columns());
168
169    training_data_.reserve(sampler_.size());
170    validation_data_.reserve(sampler_.size());
171    for (size_t i=0; i<sampler_.size(); ++i){
172      // Dynamically allocated. Must be deleted in destructor.
173      training_data_.push_back(data.training_data(sampler.training_index(i)));
174      validation_data_.push_back(data.validation_data(sampler.training_index(i),
175                                                      sampler.validation_index(i)));
176
177      training_target_.push_back(Target(target(),sampler.training_index(i)));
178      validation_target_.push_back(Target(target(),
179                                          sampler.validation_index(i)));
180      utility::yat_assert<std::runtime_error>(training_data_.size()==i+1);
181      utility::yat_assert<std::runtime_error>(training_target_.size()==i+1);
182      utility::yat_assert<std::runtime_error>(validation_data_.size()==i+1);
183      utility::yat_assert<std::runtime_error>(validation_target_.size()==i+1);
184    }
185
186    // No feature selection, hence features same for all partitions
187    // and can be stored in features_[0]
188    features_.push_back(utility::Index(data.rows()));
189
190    utility::yat_assert<std::runtime_error>(training_data_.size()==size());
191    utility::yat_assert<std::runtime_error>(training_target_.size()==size());
192    utility::yat_assert<std::runtime_error>(validation_data_.size()==size());
193    utility::yat_assert<std::runtime_error>(validation_target_.size()==size());
194  }
195
196
197  template<typename T>
198  SubsetGenerator<T>::SubsetGenerator(const Sampler& sampler, 
199                                   const T& data, 
200                                   FeatureSelector& fs)
201    : f_selector_(&fs), sampler_(sampler)
202  { 
203    utility::yat_assert<std::runtime_error>(target().size()==data.columns());
204    features_.reserve(size());
205    training_data_.reserve(size());
206    validation_data_.reserve(size());
207    build(data);
208    utility::yat_assert<std::runtime_error>(training_data_.size()==size());
209    utility::yat_assert<std::runtime_error>(training_target_.size()==size());
210    utility::yat_assert<std::runtime_error>(validation_data_.size()==size());
211    utility::yat_assert<std::runtime_error>(validation_target_.size()==size());
212  }
213
214
215  template<typename T>
216  SubsetGenerator<T>::~SubsetGenerator()
217  {
218    utility::yat_assert<std::runtime_error>(training_data_.size()==validation_data_.size());
219    for (size_t i=0; i<training_data_.size(); i++) 
220      delete training_data_[i];
221    for (size_t i=0; i<validation_data_.size(); i++) 
222      delete validation_data_[i];
223  }
224
225
226  template<typename T>
227  void SubsetGenerator<T>::build(const MatrixLookup& ml)
228  {
229    for (size_t k=0; k<size(); k++){
230      training_target_.push_back(Target(target(),training_index(k)));
231      validation_target_.push_back(Target(target(),validation_index(k)));
232      // training data with no feature selection
233      const MatrixLookup* train_data_all_feat = 
234        ml.training_data(training_index(k));
235      // use these data to create feature selection
236      utility::yat_assert<std::runtime_error>(train_data_all_feat);
237      f_selector_->update(*train_data_all_feat, training_target(k));
238        // get features
239      features_.push_back(f_selector_->features());
240      utility::yat_assert<std::runtime_error>(train_data_all_feat);
241      delete train_data_all_feat;
242     
243      // Dynamically allocated. Must be deleted in destructor.
244      training_data_.push_back(new MatrixLookup(ml,features_.back(), 
245                                                training_index(k)));
246      validation_data_.push_back(new MatrixLookup(ml,features_.back(), 
247                                                  validation_index(k)));     
248    }
249
250  }
251
252
253  template<typename T>
254  void SubsetGenerator<T>::build(const MatrixLookupWeighted& ml)
255  {
256    for (u_long k=0; k<size(); k++){
257      training_target_.push_back(Target(target(),training_index(k)));
258      validation_target_.push_back(Target(target(),validation_index(k)));
259      // training data with no feature selection
260      const MatrixLookupWeighted* train_data_all_feat = 
261        ml.training_data(training_index(k));
262      // use these data to create feature selection
263      f_selector_->update(*train_data_all_feat, training_target(k));
264      // get features
265      features_.push_back(f_selector_->features());
266      delete train_data_all_feat;
267     
268      // Dynamically allocated. Must be deleted in destructor.
269      training_data_.push_back(new MatrixLookupWeighted(ml, features_.back(), 
270                                                        training_index(k)));
271      validation_data_.push_back(new MatrixLookupWeighted(ml, features_.back(), 
272                                                          validation_index(k)));
273    }
274  }
275
276  template<typename T>
277  void SubsetGenerator<T>::build(const KernelLookup& kernel)
278  {
279    for (u_long k=0; k<size(); k++){
280      training_target_.push_back(Target(target(),training_index(k)));
281      validation_target_.push_back(Target(target(),validation_index(k)));
282      const DataLookup2D* matrix = kernel.data();
283      // dynamically allocated must be deleted
284      const DataLookup2D* training_matrix = 
285        matrix->training_data(training_index(k));
286      if (matrix->weighted()){
287        const MatrixLookupWeighted& ml = 
288          dynamic_cast<const MatrixLookupWeighted&>(*matrix);
289        f_selector_->update(MatrixLookupWeighted(ml,training_index(k),false), 
290                            training_target(k));
291      }
292      else {
293        const MatrixLookup& ml = 
294          dynamic_cast<const MatrixLookup&>(*matrix);
295        f_selector_->update(MatrixLookup(ml,training_index(k), false), 
296                            training_target(k));
297      } 
298      utility::Index dummie=f_selector_->features();
299      features_.push_back(dummie);
300      //features_.push_back(f_selector_->features());
301      const KernelLookup* kl = kernel.selected(features_.back());
302      utility::yat_assert<std::runtime_error>(training_matrix);
303      delete training_matrix;
304     
305      // Dynamically allocated. Must be deleted in destructor.
306      training_data_.push_back(kl->training_data(training_index(k)));
307      validation_data_.push_back(kl->validation_data(training_index(k), 
308                                                     validation_index(k)));
309      utility::yat_assert<std::runtime_error>(kl);
310      delete kl;
311    }
312  }
313
314
315  template<typename T>
316  u_long SubsetGenerator<T>::size(void) const
317  {
318    return sampler_.size();
319  }
320
321
322  template<typename T>
323  const Target& SubsetGenerator<T>::target(void) const
324  {
325    return sampler_.target();
326  }
327
328
329  template<typename T>
330  const T&
331  SubsetGenerator<T>::training_data(size_t i) const 
332  {
333    return *(training_data_[i]);
334  }
335
336
337  template<typename T>
338  const utility::Index&
339  SubsetGenerator<T>::training_features(size_t i) const
340  {
341    return f_selector_ ? features_[i] : features_[0];
342  }
343
344
345  template<typename T>
346  const utility::Index&
347  SubsetGenerator<T>::training_index(size_t i) const
348  {
349    return sampler_.training_index(i);
350  }
351
352
353  template<typename T>
354  const Target&
355  SubsetGenerator<T>::training_target(std::vector<Target>::size_type i) const
356  {
357    return training_target_[i];
358  }
359
360
361  template<typename T>
362  const T&
363  SubsetGenerator<T>::validation_data(size_t i) const
364  {
365    return *(validation_data_[i]);
366  }
367
368
369  template<typename T>
370  const utility::Index&
371  SubsetGenerator<T>::validation_index(std::vector<size_t>::size_type i) const
372  {
373    return sampler_.validation_index(i);
374  }
375
376
377  template<typename T>
378  const Target&
379  SubsetGenerator<T>::validation_target(std::vector<Target>::size_type i) const
380  {
381    return validation_target_[i];
382  }
383
384}}} // of namespace classifier, yat, and theplu
385
386#endif
387
Note: See TracBrowser for help on using the repository browser.