source: trunk/yat/classifier/SubsetGenerator.cc @ 680

Last change on this file since 680 was 680, checked in by Jari Häkkinen, 16 years ago

Addresses #153. Introduced yat namespace. Removed alignment namespace. Clean up of code.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 7.3 KB
Line 
1// $Id$
2
3/*
4  Copyright (C) The authors contributing to this file.
5
6  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 2 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with this program; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21  02111-1307, USA.
22*/
23
24#include "SubsetGenerator.h"
25#include "DataLookup2D.h"
26#include "FeatureSelector.h"
27#include "KernelLookup.h"
28#include "MatrixLookup.h"
29#include "MatrixLookupWeighted.h"
30#include "Target.h"
31
32#include <algorithm>
33#include <cassert>
34#include <utility>
35#include <typeinfo>
36#include <vector>
37
38namespace theplu {
39namespace yat {
40namespace classifier { 
41
42  SubsetGenerator::SubsetGenerator(const Sampler& sampler, 
43                                   const DataLookup2D& data)
44    : f_selector_(NULL), sampler_(sampler), state_(0), weighted_(false)
45  { 
46    assert(target().size()==data.columns());
47
48    training_data_.reserve(sampler_.size());
49    validation_data_.reserve(sampler_.size());
50    for (size_t i=0; i<sampler_.size(); ++i){
51      // Dynamically allocated. Must be deleted in destructor.
52      training_data_.push_back(data.training_data(sampler.training_index(i)));
53      validation_data_.push_back(data.validation_data(sampler.training_index(i),
54                                                      sampler.validation_index(i)));
55
56      training_target_.push_back(Target(target(),sampler.training_index(i)));
57      validation_target_.push_back(Target(target(),
58                                          sampler.validation_index(i)));
59      assert(training_data_.size()==i+1);
60      assert(training_target_.size()==i+1);
61      assert(validation_data_.size()==i+1);
62      assert(validation_target_.size()==i+1);
63    }
64
65    // No feature selection, hence features same for all partitions
66    // and can be stored in features_[0]
67    features_.resize(1);
68    features_[0].reserve(data.rows());
69    for (size_t i=0; i<data.rows(); ++i)
70      features_[0].push_back(i);
71
72    assert(training_data_.size()==size());
73    assert(training_target_.size()==size());
74    assert(validation_data_.size()==size());
75    assert(validation_target_.size()==size());
76  }
77
78
79  SubsetGenerator::SubsetGenerator(const Sampler& sampler, 
80                                   const DataLookup2D& data, 
81                                   FeatureSelector& fs)
82    : f_selector_(&fs), sampler_(sampler), state_(0), weighted_(false)
83  { 
84    assert(target().size()==data.columns());
85
86    features_.reserve(size());
87    training_data_.reserve(size());
88    validation_data_.reserve(size());
89
90    // Taking care of three different case.
91    // We start with the case of MatrixLookup
92    const MatrixLookup* ml = dynamic_cast<const MatrixLookup*>(&data);
93    if (ml){
94      for (reset(); more(); next()){
95     
96        training_target_.push_back(Target(target(),training_index()));
97        validation_target_.push_back(Target(target(),validation_index()));
98        // training data with no feature selection
99        const MatrixLookup* train_data_all_feat = 
100          ml->training_data(training_index());
101        // use these data to create feature selection
102        assert(train_data_all_feat);
103        f_selector_->update(*train_data_all_feat, training_target());
104        // get features
105        features_.push_back(f_selector_->features());
106        assert(train_data_all_feat);
107        delete train_data_all_feat;
108       
109        // Dynamically allocated. Must be deleted in destructor.
110        training_data_.push_back(new MatrixLookup(*ml,features_.back(), 
111                                                  training_index()));
112        validation_data_.push_back(new MatrixLookup(*ml,features_.back(), 
113                                                    validation_index()));     
114      }
115    }
116    else {
117      // Second the case of MatrixLookupWeighted
118      const MatrixLookupWeighted* ml = 
119        dynamic_cast<const MatrixLookupWeighted*>(&data);
120      if (ml){
121        for (reset(); more(); next()){
122     
123          training_target_.push_back(Target(target(),training_index()));
124          validation_target_.push_back(Target(target(),validation_index()));
125          // training data with no feature selection
126          const MatrixLookupWeighted* train_data_all_feat = 
127            ml->training_data(training_index());
128          // use these data to create feature selection
129          f_selector_->update(*train_data_all_feat, training_target());
130          // get features
131          features_.push_back(f_selector_->features());
132          delete train_data_all_feat;
133         
134          // Dynamically allocated. Must be deleted in destructor.
135          training_data_.push_back(new MatrixLookupWeighted(*ml,
136                                                            features_.back(), 
137                                                            training_index()
138                                                            ));
139          validation_data_.push_back(new MatrixLookupWeighted(*ml,
140                                                              features_.back(), 
141                                                              validation_index()
142                                                              ));     
143        }
144      }
145      else {
146        // Third the case of MatrixLookupWeighted
147        const KernelLookup* kernel = dynamic_cast<const KernelLookup*>(&data);
148        if (kernel){
149          for (reset(); more(); next()){
150            training_target_.push_back(Target(target(),training_index()));
151            validation_target_.push_back(Target(target(),validation_index()));
152            const DataLookup2D* matrix = kernel->data();
153            // dynamically allocated must be deleted
154            const DataLookup2D* training_matrix = 
155              matrix->training_data(training_index());
156            if (matrix->weighted()){
157              const MatrixLookupWeighted& ml = 
158                dynamic_cast<const MatrixLookupWeighted&>(*matrix);
159              f_selector_->update(MatrixLookupWeighted(ml,training_index(),false), 
160                                  training_target());
161            }
162            else {
163              const MatrixLookup& ml = 
164                dynamic_cast<const MatrixLookup&>(*matrix);
165              f_selector_->update(MatrixLookup(ml,training_index(), false), 
166                                  training_target());
167            } 
168            std::vector<size_t> dummie=f_selector_->features();
169            features_.push_back(dummie);
170            //features_.push_back(f_selector_->features());
171            assert(kernel);
172            const KernelLookup* kl = kernel->selected(features_.back());
173            assert(training_matrix);
174            delete training_matrix;
175                     
176            // Dynamically allocated. Must be deleted in destructor.
177            training_data_.push_back(kl->training_data(training_index()));
178            validation_data_.push_back(kl->validation_data(training_index(), 
179                                                           validation_index()));
180            assert(kl);
181            delete kl;
182          }
183        }
184        else {
185        std::cerr << "Sorry, your type of DataLookup2D (" 
186                  << typeid(data).name() << ")\nis not supported in " 
187                  << "SubsetGenerator with\nFeatureSelection\n";
188        exit(-1);
189        }
190      }
191    }
192    assert(training_data_.size()==size());
193    assert(training_target_.size()==size());
194    assert(validation_data_.size()==size());
195    assert(validation_target_.size()==size());
196    reset();
197  }
198
199
200  SubsetGenerator::~SubsetGenerator()
201  {
202    assert(training_data_.size()==validation_data_.size());
203    for (size_t i=0; i<training_data_.size(); i++) 
204      delete training_data_[i];
205    for (size_t i=0; i<validation_data_.size(); i++) 
206      delete validation_data_[i];
207  }
208
209}}} // of namespace classifier, yat, and theplu
Note: See TracBrowser for help on using the repository browser.