source: trunk/yat/classifier/EnsembleBuilder.h @ 2226

Last change on this file since 2226 was 2226, checked in by Peter, 13 years ago

remove usage of deprecated constructors also in templates and tests. (refs #543)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 7.9 KB
Line 
1#ifndef _theplu_yat_classifier_ensemblebuilder_
2#define _theplu_yat_classifier_ensemblebuilder_
3
4// $Id$
5
6/*
7  Copyright (C) 2005 Markus Ringnér
8  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2007 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér
11  Copyright (C) 2009 Jari Häkkinen, Peter Johansson
12  Copyright (C) 2010 Peter Johansson
13
14  This file is part of the yat library, http://dev.thep.lu.se/yat
15
16  The yat library is free software; you can redistribute it and/or
17  modify it under the terms of the GNU General Public License as
18  published by the Free Software Foundation; either version 3 of the
19  License, or (at your option) any later version.
20
21  The yat library is distributed in the hope that it will be useful,
22  but WITHOUT ANY WARRANTY; without even the implied warranty of
23  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24  General Public License for more details.
25
26  You should have received a copy of the GNU General Public License
27  along with yat. If not, see <http://www.gnu.org/licenses/>.
28*/
29
30#include "FeatureSelector.h"
31#include "Sampler.h"
32#include "SubsetGenerator.h"
33#include "yat/statistics/Averager.h"
34#include "yat/utility/Matrix.h"
35#include "yat/utility/yat_assert.h"
36
37#include <vector>
38
39namespace theplu {
40namespace yat {
41namespace classifier { 
42
43  ///
44  /// @brief Class for ensembles of supervised classifiers
45  ///
46  template <class Classifier, class Data>
47  class EnsembleBuilder
48  {
49  public:
50    /**
51       \brief Type of classifier that ensemble is built on.
52     */
53    typedef Classifier classifier_type;
54
55    /**
56       Type of container used for storing data. Must be MatrixLookup,
57       MatrixLookupWeighted, or KernelLookup
58     */
59    typedef Data data_type;
60
61    ///
62    /// Constructor.
63    ///
64    EnsembleBuilder(const Classifier&, const Data&, const Sampler&);
65
66    ///
67    /// Constructor.
68    ///
69    EnsembleBuilder(const Classifier&, const Data&, const Sampler&, 
70                    FeatureSelector&);
71
72    ///
73    /// Destructor.
74    ///
75    virtual ~EnsembleBuilder(void);
76
77    /**
78       \brief Generate ensemble.
79       
80       Function trains each member of the Ensemble.
81    */
82    void build(void);
83
84    ///
85    /// @return ith classifier
86    ///
87    const Classifier& classifier(size_t i) const;
88     
89    ///
90    /// @return Number of classifiers in ensemble. Prior build(void)
91    /// is issued size is zero.
92    ///
93    unsigned long size(void) const;
94
95    ///
96    /// @brief Generate validation data for ensemble
97    ///
98    /// validate()[i][j] return averager for class @a i for sample @a j
99    ///
100    const std::vector<std::vector<statistics::Averager> >& validate(void);
101   
102    /**
103       Predict a dataset using the ensemble.
104       
105       If @a data is a KernelLookup each column should correspond to a
106       test sample and each row should correspond to a training
107       sample. More exactly row \f$ i \f$ in @a data should correspond
108       to the same sample as row/column \f$ i \f$ in the training
109       kernel corresponds to.
110    */
111    void predict(const Data& data, 
112                 std::vector<std::vector<statistics::Averager> > &);
113
114  private:
115    // no copying
116    EnsembleBuilder(const EnsembleBuilder&);
117    const EnsembleBuilder& operator=(const EnsembleBuilder&);
118   
119
120    const Classifier& mother_;
121    SubsetGenerator<Data>* subset_;
122    std::vector<Classifier*> classifier_;
123    KernelLookup test_data(const KernelLookup&, size_t k);
124    MatrixLookup test_data(const MatrixLookup&, size_t k);
125    MatrixLookupWeighted test_data(const MatrixLookupWeighted&, size_t k);
126    std::vector<std::vector<statistics::Averager> > validation_result_;
127
128  };
129 
130
131  // implementation
132
133  template <class Classifier, class Data>
134  EnsembleBuilder<Classifier, Data>::EnsembleBuilder(const Classifier& sc,
135                                                     const Data& data,
136                                                     const Sampler& sampler)
137    : mother_(sc),subset_(new SubsetGenerator<Data>(sampler,data))
138  {
139  }
140
141
142  template <class Classifier, class Data>
143  EnsembleBuilder<Classifier, Data>::EnsembleBuilder(const Classifier& sc,
144                                                     const Data& data,
145                                                     const Sampler& sampler,
146                                                     FeatureSelector& fs)
147    : mother_(sc),
148      subset_(new SubsetGenerator<Data>(sampler,data,fs))
149  {
150  }
151
152
153  template <class Classifier, class Data>
154  EnsembleBuilder<Classifier, Data>::~EnsembleBuilder(void)
155  {
156    for(size_t i=0; i<classifier_.size(); i++)
157      delete classifier_[i];
158    delete subset_;
159  }
160
161
162  template <class Classifier, class Data>
163  void EnsembleBuilder<Classifier, Data>::build(void)
164  {
165    if (classifier_.empty()){
166      for(unsigned long i=0; i<subset_->size();++i) {
167        Classifier* classifier = mother_.make_classifier();
168        classifier->train(subset_->training_data(i), 
169                          subset_->training_target(i));
170        classifier_.push_back(classifier);
171      }   
172    }
173  }
174
175
176  template <class Classifier, class Data>
177  const Classifier& EnsembleBuilder<Classifier, Data>::classifier(size_t i) const
178  {
179    return *(classifier_[i]);
180  }
181
182
183  template <class Classifier, class Data>
184  void EnsembleBuilder<Classifier, Data>::predict
185  (const Data& data, std::vector<std::vector<statistics::Averager> >& result)
186  {
187    result = std::vector<std::vector<statistics::Averager> >
188      (subset_->target().nof_classes(), 
189       std::vector<statistics::Averager>(data.columns()));
190   
191    utility::Matrix prediction; 
192
193    for(unsigned long k=0;k<size();++k) {       
194      Data sub_data = test_data(data, k);
195      classifier(k).predict(sub_data,prediction);
196      for(size_t i=0; i<prediction.rows();i++) 
197        for(size_t j=0; j<prediction.columns();j++) 
198          result[i][j].add(prediction(i,j));   
199    }
200  }
201
202 
203  template <class Classifier, class Data>
204  unsigned long EnsembleBuilder<Classifier, Data>::size(void) const
205  {
206    return classifier_.size();
207  }
208
209
210  template <class Classifier, class Data>
211  MatrixLookup EnsembleBuilder<Classifier,
212                               Data>::test_data(const MatrixLookup& data,
213                                                size_t k)
214  {
215    return MatrixLookup(data, subset_->training_features(k), true);
216  }
217 
218
219  template <class Classifier, class Data>
220  MatrixLookupWeighted
221  EnsembleBuilder<Classifier, Data>::test_data(const MatrixLookupWeighted& data,
222                                               size_t k)
223  {
224    return MatrixLookupWeighted(data, subset_->training_features(k), 
225                                utility::Index(data.columns()));
226  }
227 
228
229  template <class Classifier, class Data>
230  KernelLookup
231  EnsembleBuilder<Classifier, Data>::test_data(const KernelLookup& kernel,
232                                               size_t k)
233  {
234    // weighted case
235    if (kernel.weighted()){
236      // no feature selection
237      if (kernel.data_weighted().rows()==subset_->training_features(k).size())
238        return KernelLookup(kernel, subset_->training_index(k), true);
239      MatrixLookupWeighted mlw = test_data(kernel.data_weighted(), k);
240      return subset_->training_data(k).test_kernel(mlw);
241
242    }
243    // unweighted case
244
245    // no feature selection
246    if (kernel.data().rows()==subset_->training_features(k).size())
247      return KernelLookup(kernel, subset_->training_index(k), true);
248   
249    // feature selection
250    MatrixLookup ml = test_data(kernel.data(),k);
251    return subset_->training_data(k).test_kernel(ml);
252  }
253 
254
255  template <class Classifier, class Data>
256  const std::vector<std::vector<statistics::Averager> >& 
257  EnsembleBuilder<Classifier, Data>::validate(void)
258  {
259    // Don't recalculate validation_result_
260    if (!validation_result_.empty())
261      return validation_result_;
262
263    validation_result_ = std::vector<std::vector<statistics::Averager> >
264      (subset_->target().nof_classes(), 
265       std::vector<statistics::Averager>(subset_->target().size()));
266
267    utility::Matrix prediction; 
268    for(unsigned long k=0;k<size();k++) {
269      classifier(k).predict(subset_->validation_data(k),prediction);
270     
271      // map results to indices of samples in training + validation data set
272      for(size_t i=0; i<prediction.rows();i++) 
273        for(size_t j=0; j<prediction.columns();j++) {
274          validation_result_[i][subset_->validation_index(k)[j]].
275            add(prediction(i,j));
276        }           
277    }
278    return validation_result_;
279  }
280
281}}} // of namespace classifier, yat, and theplu
282
283#endif
Note: See TracBrowser for help on using the repository browser.