source: trunk/yat/classifier/EnsembleBuilder.h @ 1706

Last change on this file since 1706 was 1487, checked in by Jari Häkkinen, 13 years ago

Addresses #436. GPL license copy reference should also be updated.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 7.3 KB
Line 
1#ifndef _theplu_yat_classifier_ensemblebuilder_
2#define _theplu_yat_classifier_ensemblebuilder_
3
4// $Id$
5
6/*
7  Copyright (C) 2005 Markus Ringnér
8  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2007 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér
11
12  This file is part of the yat library, http://dev.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 3 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with yat. If not, see <http://www.gnu.org/licenses/>.
26*/
27
28#include "FeatureSelector.h"
29#include "Sampler.h"
30#include "SubsetGenerator.h"
31#include "yat/statistics/Averager.h"
32#include "yat/utility/Matrix.h"
33
34#include <vector>
35
36namespace theplu {
37namespace yat {
38namespace classifier { 
39
40  ///
41  /// @brief Class for ensembles of supervised classifiers
42  ///
43  template <class Classifier, class Data>
44  class EnsembleBuilder
45  {
46  public:
47    /**
48       \brief Type of classifier that ensemble is built on.
49     */
50    typedef Classifier classifier_type;
51
52    /**
53       Type of container used for storing data. Must be MatrixLookup,
54       MatrixLookupWeighted, or KernelLookup
55     */
56    typedef Data data_type;
57
58    ///
59    /// Constructor.
60    ///
61    EnsembleBuilder(const Classifier&, const Data&, const Sampler&);
62
63    ///
64    /// Constructor.
65    ///
66    EnsembleBuilder(const Classifier&, const Data&, const Sampler&, 
67                    FeatureSelector&);
68
69    ///
70    /// Destructor.
71    ///
72    virtual ~EnsembleBuilder(void);
73
74    /**
75       \brief Generate ensemble.
76       
77       Function trains each member of the Ensemble.
78    */
79    void build(void);
80
81    ///
82    /// @return ith classifier
83    ///
84    const Classifier& classifier(size_t i) const;
85     
86    ///
87    /// @return Number of classifiers in ensemble. Prior build(void)
88    /// is issued size is zero.
89    ///
90    unsigned long size(void) const;
91
92    ///
93    /// @brief Generate validation data for ensemble
94    ///
95    /// validate()[i][j] return averager for class @a i for sample @a j
96    ///
97    const std::vector<std::vector<statistics::Averager> >& validate(void);
98   
99    /**
100       Predict a dataset using the ensemble.
101       
102       If @a data is a KernelLookup each column should correspond to a
103       test sample and each row should correspond to a training
104       sample. More exactly row \f$ i \f$ in @a data should correspond
105       to the same sample as row/column \f$ i \f$ in the training
106       kernel corresponds to.
107    */
108    void predict(const Data& data, 
109                 std::vector<std::vector<statistics::Averager> > &);
110
111  private:
112    // no copying
113    EnsembleBuilder(const EnsembleBuilder&);
114    const EnsembleBuilder& operator=(const EnsembleBuilder&);
115   
116
117    const Classifier& mother_;
118    SubsetGenerator<Data>* subset_;
119    std::vector<Classifier*> classifier_;
120    KernelLookup test_data(const KernelLookup&, size_t k);
121    MatrixLookup test_data(const MatrixLookup&, size_t k);
122    MatrixLookupWeighted test_data(const MatrixLookupWeighted&, size_t k);
123    std::vector<std::vector<statistics::Averager> > validation_result_;
124
125  };
126 
127
128  // implementation
129
130  template <class C, class D> 
131  EnsembleBuilder<C,D>::EnsembleBuilder(const C& sc, const D& data,
132                                        const Sampler& sampler) 
133    : mother_(sc),subset_(new SubsetGenerator<D>(sampler,data))
134  {
135  }
136
137
138  template <class C, class D> 
139  EnsembleBuilder<C, D>::EnsembleBuilder(const C& sc, const D& data, 
140                                         const Sampler& sampler,
141                                         FeatureSelector& fs) 
142    : mother_(sc),
143      subset_(new SubsetGenerator<D>(sampler,data,fs))
144  {
145  }
146
147
148  template <class C, class D> 
149  EnsembleBuilder<C, D>::~EnsembleBuilder(void) 
150  {
151    for(size_t i=0; i<classifier_.size(); i++)
152      delete classifier_[i];
153    delete subset_;
154  }
155
156
157  template <class C, class D> 
158  void EnsembleBuilder<C, D>::build(void) 
159  {
160    if (classifier_.empty()){
161      for(unsigned long i=0; i<subset_->size();++i) {
162        C* classifier = mother_.make_classifier();
163        classifier->train(subset_->training_data(i), 
164                          subset_->training_target(i));
165        classifier_.push_back(classifier);
166      }   
167    }
168  }
169
170
171  template <class C, class D> 
172  const C& EnsembleBuilder<C, D>::classifier(size_t i) const
173  {
174    return *(classifier_[i]);
175  }
176
177
178  template <class C, class D> 
179  void EnsembleBuilder<C, D>::predict
180  (const D& data, std::vector<std::vector<statistics::Averager> >& result)
181  {
182    result = std::vector<std::vector<statistics::Averager> >
183      (subset_->target().nof_classes(), 
184       std::vector<statistics::Averager>(data.columns()));
185   
186    utility::Matrix prediction; 
187
188    for(unsigned long k=0;k<size();++k) {       
189      D sub_data =  test_data(data, k);
190      classifier(k).predict(sub_data,prediction);
191    }
192
193    for(size_t i=0; i<prediction.rows();i++) 
194      for(size_t j=0; j<prediction.columns();j++) 
195        result[i][j].add(prediction(i,j));   
196  }
197
198 
199  template <class C, class D> 
200  unsigned long EnsembleBuilder<C, D>::size(void) const
201  {
202    return classifier_.size();
203  }
204
205
206  template <class C, class D> 
207  MatrixLookup EnsembleBuilder<C, D>::test_data(const MatrixLookup& data, 
208                                                size_t k)
209  {
210    return MatrixLookup(data, subset_->training_features(k), true);
211  }
212 
213
214  template <class C, class D> 
215  MatrixLookupWeighted
216  EnsembleBuilder<C, D>::test_data(const MatrixLookupWeighted& data, size_t k)
217  {
218    return MatrixLookupWeighted(data, subset_->training_features(k), true);
219  }
220 
221
222  template <class C, class D> 
223  KernelLookup
224  EnsembleBuilder<C, D>::test_data(const KernelLookup& kernel, size_t k)
225  {
226    // weighted case
227    if (kernel.weighted()){
228      assert(false);
229      // no feature selection
230      if (kernel.data_weighted().rows()==subset_->training_features(k).size())
231        return KernelLookup(kernel, subset_->training_index(k), true);
232      MatrixLookupWeighted mlw = test_data(kernel.data_weighted(), k);
233      return subset_->training_data(k).test_kernel(mlw);
234
235    }
236    // unweighted case
237
238    // no feature selection
239    if (kernel.data().rows()==subset_->training_features(k).size())
240      return KernelLookup(kernel, subset_->training_index(k), true);
241   
242    // feature selection
243    return subset_->training_data(k).test_kernel(test_data(kernel.data(),k));
244  }
245 
246
247  template <class C, class D> 
248  const std::vector<std::vector<statistics::Averager> >& 
249  EnsembleBuilder<C, D>::validate(void)
250  {
251    // Don't recalculate validation_result_
252    if (!validation_result_.empty())
253      return validation_result_;
254
255    validation_result_ = std::vector<std::vector<statistics::Averager> >
256      (subset_->target().nof_classes(), 
257       std::vector<statistics::Averager>(subset_->target().size()));
258
259    utility::Matrix prediction; 
260    for(unsigned long k=0;k<size();k++) {
261      classifier(k).predict(subset_->validation_data(k),prediction);
262     
263      // map results to indices of samples in training + validation data set
264      for(size_t i=0; i<prediction.rows();i++) 
265        for(size_t j=0; j<prediction.columns();j++) {
266          validation_result_[i][subset_->validation_index(k)[j]].
267            add(prediction(i,j));
268        }           
269    }
270    return validation_result_;
271  }
272
273}}} // of namespace classifier, yat, and theplu
274
275#endif
Note: See TracBrowser for help on using the repository browser.