source: trunk/c++_tools/classifier/SubsetGenerator.cc @ 615

Last change on this file since 615 was 615, checked in by Peter, 15 years ago

ref #60 NOTE: there is most likely a bug around. I have removed the ensemble.build() test in the ensemble_test to get the test go through. I will try to find and remove this bug asap.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 6.1 KB
Line 
1// $Id$
2
3
4#include <c++_tools/classifier/SubsetGenerator.h>
5#include <c++_tools/classifier/DataLookup2D.h>
6#include <c++_tools/classifier/FeatureSelector.h>
7#include <c++_tools/classifier/MatrixLookup.h>
8#include <c++_tools/classifier/Target.h>
9
10#include <algorithm>
11#include <cassert>
12#include <utility>
13#include <vector>
14
15namespace theplu {
16namespace classifier { 
17
18  SubsetGenerator::SubsetGenerator(const Sampler& sampler, 
19                                   const DataLookup2D& data)
20    : f_selector_(NULL), sampler_(sampler), state_(0), weighted_(false)
21  { 
22    assert(target().size()==data.columns());
23
24    training_data_.reserve(sampler_.size());
25    training_weight_.reserve(sampler_.size());
26    validation_data_.reserve(sampler_.size());
27    validation_weight_.reserve(sampler_.size());
28    for (size_t i=0; i<sampler_.size(); ++i){
29     
30      // Dynamically allocated. Must be deleted in destructor.
31      training_data_.push_back(data.training_data(sampler.training_index(i)));
32      training_weight_.push_back
33        (new MatrixLookup(training_data_.back()->rows(),
34                          training_data_.back()->columns(),1));
35      validation_data_.push_back(data.validation_data(sampler.training_index(i),
36                                                      sampler.validation_index(i)));
37      validation_weight_.push_back
38        (new MatrixLookup(validation_data_.back()->rows(),
39                          validation_data_.back()->columns(),1));
40
41
42      training_target_.push_back(Target(target(),sampler.training_index(1)));
43      validation_target_.push_back(Target(target(),
44                                          sampler.validation_index(i)));
45      assert(training_data_.size()==i+1);
46      assert(training_weight_.size()==i+1);
47      assert(training_target_.size()==i+1);
48      assert(validation_data_.size()==i+1);
49      assert(validation_weight_.size()==i+1);
50      assert(validation_target_.size()==i+1);
51    }
52
53    // No feature selection, hence features same for all partitions
54    // and can be stored in features_[0]
55    features_.resize(1);
56    features_[0].reserve(data.rows());
57    for (size_t i=0; i<data.rows(); ++i)
58      features_[0].push_back(i);
59
60    assert(training_data_.size()==size());
61    assert(training_weight_.size()==size());
62    assert(training_target_.size()==size());
63    assert(validation_data_.size()==size());
64    assert(validation_weight_.size()==size());
65    assert(validation_target_.size()==size());
66  }
67
68  SubsetGenerator::SubsetGenerator(const Sampler& sampler, 
69                                   const DataLookup2D& data,
70                                   const MatrixLookup& weight)
71    : sampler_(sampler), state_(0), weighted_(true)
72  { 
73    std::cout << "Creating SubsetGenerator" << this << std::endl;
74    assert(target().size()==data.columns());
75    training_data_.reserve(size());
76    training_weight_.reserve(size());
77    validation_data_.reserve(size());
78    validation_weight_.reserve(size());
79    for (reset(); more(); next()){
80     
81      // Dynamically allocated. Must be deleted in destructor.
82      training_data_.push_back(data.training_data(training_index()));
83      validation_data_.push_back(data.validation_data(training_index(), 
84                                                    validation_index()));
85      training_weight_.push_back(weight.training_data(training_index()));
86      validation_weight_.push_back(weight.validation_data(training_index(), 
87                                                          validation_index()));
88
89
90      training_target_.push_back(Target(target(),training_index()));
91      validation_target_.push_back(Target(target(),validation_index()));
92    }
93    // No feature selection, hence features same for all partitions
94    // and can be stored in features_[0]
95    features_.resize(1);
96    features_[0].reserve(data.rows());
97    for (size_t i=0; i<data.rows(); ++i)
98      features_[0].push_back(i);
99
100    assert(training_data_.size()==size());
101    assert(training_weight_.size()==size());
102    assert(training_target_.size()==size());
103    assert(validation_data_.size()==size());
104    assert(validation_weight_.size()==size());
105    assert(validation_target_.size()==size());
106    reset();
107  }
108
109
110  SubsetGenerator::SubsetGenerator(const Sampler& sampler, 
111                                   const DataLookup2D& data, 
112                                   FeatureSelector& fs)
113    : f_selector_(&fs), sampler_(sampler), state_(0), weighted_(false)
114  { 
115    std::cout << "Creating SubsetGenerator" << this << std::endl;
116    assert(target().size()==data.columns());
117
118    features_.reserve(size());
119    training_data_.reserve(size());
120    training_weight_.reserve(size());
121    validation_data_.reserve(size());
122    validation_weight_.reserve(size());
123
124    for (reset(); more(); next()){
125     
126      // training data with no feature selection
127      const DataLookup2D* train_data_all_feat = 
128        data.training_data(training_index());
129      // use these data to create feature selection
130      f_selector_->update(*train_data_all_feat, training_target());
131      // get features
132      features_.push_back(f_selector_->features());
133      delete train_data_all_feat;
134
135      // Dynamically allocated. Must be deleted in destructor.
136      training_data_.push_back(data.training_data(features_.back(), 
137                                                  training_index()));
138      training_weight_.push_back
139        (new MatrixLookup(training_data_.back()->rows(),
140                          training_data_.back()->columns(),1));
141      validation_data_.push_back(data.validation_data(features_.back(),
142                                                      training_index(), 
143                                                      validation_index()));
144      validation_weight_.push_back
145        (new MatrixLookup(validation_data_.back()->rows(),
146                          validation_data_.back()->columns(),1));
147
148
149      training_target_.push_back(Target(target(),training_index()));
150      validation_target_.push_back(Target(target(),validation_index()));
151    }
152
153    assert(training_data_.size()==size());
154    assert(training_weight_.size()==size());
155    assert(training_target_.size()==size());
156    assert(validation_data_.size()==size());
157    assert(validation_weight_.size()==size());
158    assert(validation_target_.size()==size());
159    reset();
160  }
161
162
163  SubsetGenerator::~SubsetGenerator()
164  {
165    assert(training_data_.size()==validation_data_.size());
166    for (size_t i=0; i<training_data_.size(); i++) 
167      delete training_data_[i];
168    for (size_t i=0; i<validation_data_.size(); i++) 
169      delete validation_data_[i];
170    for (size_t i=0; i<training_weight_.size(); i++) 
171      delete training_weight_[i];
172    for (size_t i=0; i<validation_weight_.size(); i++) 
173      delete validation_weight_[i];
174  }
175
176}} // of namespace classifier and namespace theplu
Note: See TracBrowser for help on using the repository browser.