source: branches/0.4-stable/yat/classifier/EnsembleBuilder.h @ 1392

Last change on this file since 1392 was 1392, checked in by Peter, 15 years ago

trac has moved

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 7.4 KB
Line 
1#ifndef _theplu_yat_classifier_ensemblebuilder_
2#define _theplu_yat_classifier_ensemblebuilder_
3
4// $Id$
5
6/*
7  Copyright (C) 2005 Markus Ringnér
8  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2007 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér
11
12  This file is part of the yat library, http://dev.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 2 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with this program; if not, write to the Free Software
26  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
27  02111-1307, USA.
28*/
29
30#include "FeatureSelector.h"
31#include "Sampler.h"
32#include "SubsetGenerator.h"
33#include "yat/statistics/Averager.h"
34#include "yat/utility/Matrix.h"
35
36#include <vector>
37
38namespace theplu {
39namespace yat {
40namespace classifier { 
41
42  ///
43  /// @brief Class for ensembles of supervised classifiers
44  ///
45  template <class Classifier, class Data>
46  class EnsembleBuilder
47  {
48  public:
49    /**
50       \brief Type of classifier that ensemble is built on.
51     */
52    typedef Classifier classifier_type;
53
54    /**
55       Type of container used for storing data. Must be MatrixLookup,
56       MatrixLookupWeighted, or KernelLookup
57     */
58    typedef Data data_type;
59
60    ///
61    /// Constructor.
62    ///
63    EnsembleBuilder(const Classifier&, const Data&, const Sampler&);
64
65    ///
66    /// Constructor.
67    ///
68    EnsembleBuilder(const Classifier&, const Data&, const Sampler&, 
69                    FeatureSelector&);
70
71    ///
72    /// Destructor.
73    ///
74    virtual ~EnsembleBuilder(void);
75
76    /**
77       \brief Generate ensemble.
78       
79       Function trains each member of the Ensemble.
80    */
81    void build(void);
82
83    ///
84    /// @return ith classifier
85    ///
86    const Classifier& classifier(size_t i) const;
87     
88    ///
89    /// @return Number of classifiers in ensemble. Prior build(void)
90    /// is issued size is zero.
91    ///
92    unsigned long size(void) const;
93
94    ///
95    /// @brief Generate validation data for ensemble
96    ///
97    /// validate()[i][j] return averager for class @a i for sample @a j
98    ///
99    const std::vector<std::vector<statistics::Averager> >& validate(void);
100   
101    /**
102       Predict a dataset using the ensemble.
103       
104       If @a data is a KernelLookup each column should correspond to a
105       test sample and each row should correspond to a training
106       sample. More exactly row \f$ i \f$ in @a data should correspond
107       to the same sample as row/column \f$ i \f$ in the training
108       kernel corresponds to.
109    */
110    void predict(const Data& data, 
111                 std::vector<std::vector<statistics::Averager> > &);
112
113  private:
114    // no copying
115    EnsembleBuilder(const EnsembleBuilder&);
116    const EnsembleBuilder& operator=(const EnsembleBuilder&);
117   
118
119    const Classifier& mother_;
120    SubsetGenerator<Data>* subset_;
121    std::vector<Classifier*> classifier_;
122    KernelLookup test_data(const KernelLookup&, size_t k);
123    MatrixLookup test_data(const MatrixLookup&, size_t k);
124    MatrixLookupWeighted test_data(const MatrixLookupWeighted&, size_t k);
125    std::vector<std::vector<statistics::Averager> > validation_result_;
126
127  };
128 
129
130  // implementation
131
132  template <class C, class D> 
133  EnsembleBuilder<C,D>::EnsembleBuilder(const C& sc, const D& data,
134                                        const Sampler& sampler) 
135    : mother_(sc),subset_(new SubsetGenerator<D>(sampler,data))
136  {
137  }
138
139
140  template <class C, class D> 
141  EnsembleBuilder<C, D>::EnsembleBuilder(const C& sc, const D& data, 
142                                         const Sampler& sampler,
143                                         FeatureSelector& fs) 
144    : mother_(sc),
145      subset_(new SubsetGenerator<D>(sampler,data,fs))
146  {
147  }
148
149
150  template <class C, class D> 
151  EnsembleBuilder<C, D>::~EnsembleBuilder(void) 
152  {
153    for(size_t i=0; i<classifier_.size(); i++)
154      delete classifier_[i];
155    delete subset_;
156  }
157
158
159  template <class C, class D> 
160  void EnsembleBuilder<C, D>::build(void) 
161  {
162    if (classifier_.empty()){
163      for(unsigned long i=0; i<subset_->size();++i) {
164        C* classifier = mother_.make_classifier();
165        classifier->train(subset_->training_data(i), 
166                          subset_->training_target(i));
167        classifier_.push_back(classifier);
168      }   
169    }
170  }
171
172
173  template <class C, class D> 
174  const C& EnsembleBuilder<C, D>::classifier(size_t i) const
175  {
176    return *(classifier_[i]);
177  }
178
179
180  template <class C, class D> 
181  void EnsembleBuilder<C, D>::predict
182  (const D& data, std::vector<std::vector<statistics::Averager> >& result)
183  {
184    result = std::vector<std::vector<statistics::Averager> >
185      (subset_->target().nof_classes(), 
186       std::vector<statistics::Averager>(data.columns()));
187   
188    utility::Matrix prediction; 
189
190    for(unsigned long k=0;k<size();++k) {       
191      D sub_data =  test_data(data, k);
192      classifier(k).predict(sub_data,prediction);
193    }
194
195    for(size_t i=0; i<prediction.rows();i++) 
196      for(size_t j=0; j<prediction.columns();j++) 
197        result[i][j].add(prediction(i,j));   
198  }
199
200 
201  template <class C, class D> 
202  unsigned long EnsembleBuilder<C, D>::size(void) const
203  {
204    return classifier_.size();
205  }
206
207
208  template <class C, class D> 
209  MatrixLookup EnsembleBuilder<C, D>::test_data(const MatrixLookup& data, 
210                                                size_t k)
211  {
212    return MatrixLookup(data, subset_->training_features(k), true);
213  }
214 
215
216  template <class C, class D> 
217  MatrixLookupWeighted
218  EnsembleBuilder<C, D>::test_data(const MatrixLookupWeighted& data, size_t k)
219  {
220    return MatrixLookupWeighted(data, subset_->training_features(k), true);
221  }
222 
223
224  template <class C, class D> 
225  KernelLookup
226  EnsembleBuilder<C, D>::test_data(const KernelLookup& kernel, size_t k)
227  {
228    // weighted case
229    if (kernel.weighted()){
230      assert(false);
231      // no feature selection
232      if (kernel.data_weighted().rows()==subset_->training_features(k).size())
233        return KernelLookup(kernel, subset_->training_index(k), true);
234      MatrixLookupWeighted mlw = test_data(kernel.data_weighted(), k);
235      return subset_->training_data(k).test_kernel(mlw);
236
237    }
238    // unweighted case
239
240    // no feature selection
241    if (kernel.data().rows()==subset_->training_features(k).size())
242      return KernelLookup(kernel, subset_->training_index(k), true);
243   
244    // feature selection
245    return subset_->training_data(k).test_kernel(test_data(kernel.data(),k));
246  }
247 
248
249  template <class C, class D> 
250  const std::vector<std::vector<statistics::Averager> >& 
251  EnsembleBuilder<C, D>::validate(void)
252  {
253    // Don't recalculate validation_result_
254    if (!validation_result_.empty())
255      return validation_result_;
256
257    validation_result_ = std::vector<std::vector<statistics::Averager> >
258      (subset_->target().nof_classes(), 
259       std::vector<statistics::Averager>(subset_->target().size()));
260
261    utility::Matrix prediction; 
262    for(unsigned long k=0;k<size();k++) {
263      classifier(k).predict(subset_->validation_data(k),prediction);
264     
265      // map results to indices of samples in training + validation data set
266      for(size_t i=0; i<prediction.rows();i++) 
267        for(size_t j=0; j<prediction.columns();j++) {
268          validation_result_[i][subset_->validation_index(k)[j]].
269            add(prediction(i,j));
270        }           
271    }
272    return validation_result_;
273  }
274
275}}} // of namespace classifier, yat, and theplu
276
277#endif
Note: See TracBrowser for help on using the repository browser.