source: trunk/yat/classifier/SubsetGenerator.h @ 1206

Last change on this file since 1206 was 1206, checked in by Peter, 14 years ago

fixes #345

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 10.4 KB
Line 
1#ifndef _theplu_yat_classifier_subset_generator_
2#define _theplu_yat_classifier_subset_generator_
3
4// $Id: SubsetGenerator.h 1206 2008-03-05 17:56:01Z peter $
5
6/*
7  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
8  Copyright (C) 2007 Peter Johansson
9
10  This file is part of the yat library, http://trac.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 2 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25  02111-1307, USA.
26*/
27
28#include "FeatureSelector.h"
29#include "KernelLookup.h"
30#include "MatrixLookup.h"
31#include "MatrixLookupWeighted.h"
32#include "Target.h"
33#include "Sampler.h"
34#include "yat/utility/Index.h"
35#include "yat/utility/SmartPtr.h"
36#include "yat/utility/yat_assert.h"
37
38#include <algorithm>
39#include <cassert>
40#include <utility>
41#include <typeinfo>
42#include <vector>
43
44namespace theplu {
45namespace yat {
46namespace classifier { 
47
48  ///
49  /// @brief Class splitting a set into training set and validation set.
50  ///
51  template <typename T> 
52  class SubsetGenerator
53  {
54  public:
55    /**
56       type of data that is stored in SubsetGenerator
57     */
58    typedef T value_type;
59
60    ///
61    /// @brief Constructor
62    /// 
63    /// @param sampler sampler
64    /// @param data data to split up in validation and training.
65    ///
66    SubsetGenerator(const Sampler& sampler, const T& data);
67
68    ///
69    /// @brief Constructor
70    /// 
71    /// @param sampler taking care of partioning dataset
72    /// @param data data to be split up in validation and training.
73    /// @param fs Object selecting features for each subset
74    ///
75    SubsetGenerator(const Sampler& sampler, const T& data, 
76                    FeatureSelector& fs);
77
78    ///
79    /// Destructor
80    ///
81    ~SubsetGenerator();
82 
83    ///
84    /// @return number of subsets
85    ///
86    u_long size(void) const;
87
88    ///
89    /// @return the target for the total set
90    ///
91    const Target& target(void) const;
92
93    ///
94    /// @return the sampler for the total set
95    ///
96    //    const Sampler& sampler(void) const;
97
98    ///
99    /// @return training data
100    ///
101    const T& training_data(size_t i) const;
102
103    ///
104    /// @return training features
105    ///
106    const utility::Index& training_features(size_t i) const;
107
108    ///
109    /// @return training index
110    ///
111    const utility::Index& training_index(size_t i) const;
112
113    ///
114    /// @return training target
115    ///
116    const Target& training_target(std::vector<Target>::size_type i) const;
117
118    ///
119    /// @return validation data
120    ///
121    const T& validation_data(size_t i) const;
122
123    ///
124    /// @return validation index
125    ///
126    const utility::Index&
127    validation_index(std::vector<size_t>::size_type i) const;
128
129    ///
130    /// @return validation target
131    ///
132    const Target& validation_target(std::vector<Target>::size_type i) const;
133
134  private:
135    void build(const MatrixLookup&);
136    void build(const MatrixLookupWeighted&);
137    void build(const KernelLookup&);
138
139    SubsetGenerator(const SubsetGenerator&);
140    const SubsetGenerator& operator=(const SubsetGenerator&) const;
141
142    FeatureSelector* f_selector_;
143    std::vector<utility::Index > features_;
144    const Sampler& sampler_;
145    std::vector<const T*> training_data_;
146    std::vector<Target> training_target_;
147    std::vector<const T*> validation_data_;
148    std::vector<Target> validation_target_;
149
150  };
151
152
153  // templates
154
155  template<typename T>
156  SubsetGenerator<T>::SubsetGenerator(const Sampler& sampler, 
157                                      const T& data)
158    : f_selector_(NULL), sampler_(sampler)
159  { 
160    utility::yat_assert<std::runtime_error>(target().size()==data.columns());
161
162    training_data_.reserve(sampler_.size());
163    validation_data_.reserve(sampler_.size());
164    build(data);
165    utility::yat_assert<std::runtime_error>(training_data_.size()==size());
166    utility::yat_assert<std::runtime_error>(training_target_.size()==size());
167    utility::yat_assert<std::runtime_error>(validation_data_.size()==size());
168    utility::yat_assert<std::runtime_error>(validation_target_.size()==size());
169  }
170
171
172  template<typename T>
173  SubsetGenerator<T>::SubsetGenerator(const Sampler& sampler, 
174                                      const T& data, 
175                                      FeatureSelector& fs)
176    : f_selector_(&fs), sampler_(sampler)
177  { 
178    utility::yat_assert<std::runtime_error>(target().size()==data.columns());
179    features_.reserve(size());
180    training_data_.reserve(size());
181    validation_data_.reserve(size());
182    build(data);
183    utility::yat_assert<std::runtime_error>(training_data_.size()==size());
184    utility::yat_assert<std::runtime_error>(training_target_.size()==size());
185    utility::yat_assert<std::runtime_error>(validation_data_.size()==size());
186    utility::yat_assert<std::runtime_error>(validation_target_.size()==size());
187  }
188
189
190  template<typename T>
191  SubsetGenerator<T>::~SubsetGenerator()
192  {
193    utility::yat_assert<std::runtime_error>(training_data_.size()==validation_data_.size());
194    for (size_t i=0; i<training_data_.size(); i++) 
195      delete training_data_[i];
196    for (size_t i=0; i<validation_data_.size(); i++) 
197      delete validation_data_[i];
198  }
199
200
201  template<typename T>
202  void SubsetGenerator<T>::build(const MatrixLookup& ml)
203  {
204    if (!f_selector_)// no feature selection
205      features_.push_back(utility::Index(ml.rows()));
206
207    for (size_t k=0; k<size(); k++){
208      training_target_.push_back(Target(target(),training_index(k)));
209      validation_target_.push_back(Target(target(),validation_index(k)));
210      if (f_selector_){
211        // training data with no feature selection
212        const MatrixLookup* train_data_all_feat = 
213          new MatrixLookup(ml, training_index(k), false);
214        // use these data to create feature selection
215        utility::yat_assert<std::runtime_error>(train_data_all_feat);
216        f_selector_->update(*train_data_all_feat, training_target(k));
217        // get features
218        features_.push_back(f_selector_->features());
219        utility::yat_assert<std::runtime_error>(train_data_all_feat);
220        delete train_data_all_feat;
221      }
222     
223      // Dynamically allocated. Must be deleted in destructor.
224      training_data_.push_back(new MatrixLookup(ml,features_.back(), 
225                                                training_index(k)));
226      validation_data_.push_back(new MatrixLookup(ml,features_.back(), 
227                                                  validation_index(k)));     
228    }
229
230  }
231
232
233  template<typename T>
234  void SubsetGenerator<T>::build(const MatrixLookupWeighted& ml)
235  {
236    if (!f_selector_)// no feature selection
237      features_.push_back(utility::Index(ml.rows()));
238
239    for (u_long k=0; k<size(); k++){
240      training_target_.push_back(Target(target(),training_index(k)));
241      validation_target_.push_back(Target(target(),validation_index(k)));
242      if (f_selector_){
243        // training data with no feature selection
244        const MatrixLookupWeighted* train_data_all_feat = 
245          new MatrixLookupWeighted(ml, training_index(k), false);
246        // use these data to create feature selection
247        f_selector_->update(*train_data_all_feat, training_target(k));
248        // get features
249        features_.push_back(f_selector_->features());
250        delete train_data_all_feat;
251      }
252
253
254      // Dynamically allocated. Must be deleted in destructor.
255      training_data_.push_back(new MatrixLookupWeighted(ml, features_.back(), 
256                                                        training_index(k)));
257      validation_data_.push_back(new MatrixLookupWeighted(ml, features_.back(), 
258                                                          validation_index(k)));
259    }
260  }
261
262  template<typename T>
263  void SubsetGenerator<T>::build(const KernelLookup& kernel)
264  {
265    for (u_long k=0; k<size(); k++){
266      training_target_.push_back(Target(target(),training_index(k)));
267      validation_target_.push_back(Target(target(),validation_index(k)));
268
269      if (f_selector_){
270        if (kernel.weighted()){
271          MatrixLookupWeighted ml = kernel.data_weighted();
272          f_selector_->update(MatrixLookupWeighted(ml,training_index(k),false), 
273                              training_target(k));
274        }
275        else {
276          MatrixLookup ml=kernel.data();
277          f_selector_->update(MatrixLookup(ml,training_index(k), false), 
278                              training_target(k));
279        } 
280        features_.push_back(f_selector_->features());
281        KernelLookup kl = kernel.selected(features_.back());
282        // Dynamically allocated. Must be deleted in destructor.
283        training_data_.push_back(new KernelLookup(kl,training_index(k),
284                                                  training_index(k)));
285        validation_data_.push_back(new KernelLookup(kl, training_index(k), 
286                                                    validation_index(k)));
287      }
288      else {// no feature selection
289        training_data_.push_back(new KernelLookup(kernel, training_index(k),
290                                                  training_index(k)));
291        validation_data_.push_back(new KernelLookup(kernel, 
292                                                    training_index(k), 
293                                                    validation_index(k)));
294      }
295     
296    }
297    if (!f_selector_){
298      if (kernel.weighted())
299        features_.push_back(utility::Index(kernel.data_weighted().rows()));
300      else
301        features_.push_back(utility::Index(kernel.data().rows()));
302    }
303  }
304
305
306  template<typename T>
307  u_long SubsetGenerator<T>::size(void) const
308  {
309    return sampler_.size();
310  }
311
312
313  template<typename T>
314  const Target& SubsetGenerator<T>::target(void) const
315  {
316    return sampler_.target();
317  }
318
319
320  template<typename T>
321  const T&
322  SubsetGenerator<T>::training_data(size_t i) const 
323  {
324    return *(training_data_[i]);
325  }
326
327
328  template<typename T>
329  const utility::Index&
330  SubsetGenerator<T>::training_features(size_t i) const
331  {
332    utility::yat_assert<std::runtime_error>(features_.size(),
333                                           "SubsetGenerator::training_features");
334    return f_selector_ ? features_[i] : features_[0];
335  }
336
337
338  template<typename T>
339  const utility::Index&
340  SubsetGenerator<T>::training_index(size_t i) const
341  {
342    return sampler_.training_index(i);
343  }
344
345
346  template<typename T>
347  const Target&
348  SubsetGenerator<T>::training_target(std::vector<Target>::size_type i) const
349  {
350    return training_target_[i];
351  }
352
353
354  template<typename T>
355  const T&
356  SubsetGenerator<T>::validation_data(size_t i) const
357  {
358    return *(validation_data_[i]);
359  }
360
361
362  template<typename T>
363  const utility::Index&
364  SubsetGenerator<T>::validation_index(std::vector<size_t>::size_type i) const
365  {
366    return sampler_.validation_index(i);
367  }
368
369
370  template<typename T>
371  const Target&
372  SubsetGenerator<T>::validation_target(std::vector<Target>::size_type i) const
373  {
374    return validation_target_[i];
375  }
376
377}}} // of namespace classifier, yat, and theplu
378
379#endif
380
Note: See TracBrowser for help on using the repository browser.