source: trunk/c++_tools/classifier/SubsetGenerator.cc @ 619

Last change on this file since 619 was 619, checked in by Peter, 16 years ago

fixes #107 and refs #105 fixed bug in EnsembleBuilder? and removed some assert in header files

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 6.0 KB
Line 
1// $Id$
2
3
4#include <c++_tools/classifier/SubsetGenerator.h>
5#include <c++_tools/classifier/DataLookup2D.h>
6#include <c++_tools/classifier/FeatureSelector.h>
7#include <c++_tools/classifier/MatrixLookup.h>
8#include <c++_tools/classifier/Target.h>
9
10#include <algorithm>
11#include <cassert>
12#include <utility>
13#include <vector>
14
15namespace theplu {
16namespace classifier { 
17
18  SubsetGenerator::SubsetGenerator(const Sampler& sampler, 
19                                   const DataLookup2D& data)
20    : f_selector_(NULL), sampler_(sampler), state_(0), weighted_(false)
21  { 
22    assert(target().size()==data.columns());
23
24    training_data_.reserve(sampler_.size());
25    training_weight_.reserve(sampler_.size());
26    validation_data_.reserve(sampler_.size());
27    validation_weight_.reserve(sampler_.size());
28    for (size_t i=0; i<sampler_.size(); ++i){
29      // Dynamically allocated. Must be deleted in destructor.
30      training_data_.push_back(data.training_data(sampler.training_index(i)));
31      training_weight_.push_back
32        (new MatrixLookup(training_data_.back()->rows(),
33                          training_data_.back()->columns(),1));
34      validation_data_.push_back(data.validation_data(sampler.training_index(i),
35                                                      sampler.validation_index(i)));
36      validation_weight_.push_back
37        (new MatrixLookup(validation_data_.back()->rows(),
38                          validation_data_.back()->columns(),1));
39
40
41      training_target_.push_back(Target(target(),sampler.training_index(i)));
42      validation_target_.push_back(Target(target(),
43                                          sampler.validation_index(i)));
44      assert(training_data_.size()==i+1);
45      assert(training_weight_.size()==i+1);
46      assert(training_target_.size()==i+1);
47      assert(validation_data_.size()==i+1);
48      assert(validation_weight_.size()==i+1);
49      assert(validation_target_.size()==i+1);
50    }
51
52    // No feature selection, hence features same for all partitions
53    // and can be stored in features_[0]
54    features_.resize(1);
55    features_[0].reserve(data.rows());
56    for (size_t i=0; i<data.rows(); ++i)
57      features_[0].push_back(i);
58
59    assert(training_data_.size()==size());
60    assert(training_weight_.size()==size());
61    assert(training_target_.size()==size());
62    assert(validation_data_.size()==size());
63    assert(validation_weight_.size()==size());
64    assert(validation_target_.size()==size());
65  }
66
67  SubsetGenerator::SubsetGenerator(const Sampler& sampler, 
68                                   const DataLookup2D& data,
69                                   const MatrixLookup& weight)
70    : sampler_(sampler), state_(0), weighted_(true)
71  { 
72    assert(target().size()==data.columns());
73    training_data_.reserve(size());
74    training_weight_.reserve(size());
75    validation_data_.reserve(size());
76    validation_weight_.reserve(size());
77    for (reset(); more(); next()){
78     
79      // Dynamically allocated. Must be deleted in destructor.
80      training_data_.push_back(data.training_data(training_index()));
81      validation_data_.push_back(data.validation_data(training_index(), 
82                                                    validation_index()));
83      training_weight_.push_back(weight.training_data(training_index()));
84      validation_weight_.push_back(weight.validation_data(training_index(), 
85                                                          validation_index()));
86
87
88      training_target_.push_back(Target(target(),training_index()));
89      validation_target_.push_back(Target(target(),validation_index()));
90    }
91    // No feature selection, hence features same for all partitions
92    // and can be stored in features_[0]
93    features_.resize(1);
94    features_[0].reserve(data.rows());
95    for (size_t i=0; i<data.rows(); ++i)
96      features_[0].push_back(i);
97
98    assert(training_data_.size()==size());
99    assert(training_weight_.size()==size());
100    assert(training_target_.size()==size());
101    assert(validation_data_.size()==size());
102    assert(validation_weight_.size()==size());
103    assert(validation_target_.size()==size());
104    reset();
105  }
106
107
108  SubsetGenerator::SubsetGenerator(const Sampler& sampler, 
109                                   const DataLookup2D& data, 
110                                   FeatureSelector& fs)
111    : f_selector_(&fs), sampler_(sampler), state_(0), weighted_(false)
112  { 
113    assert(target().size()==data.columns());
114
115    features_.reserve(size());
116    training_data_.reserve(size());
117    training_weight_.reserve(size());
118    validation_data_.reserve(size());
119    validation_weight_.reserve(size());
120
121    for (reset(); more(); next()){
122     
123      // training data with no feature selection
124      const DataLookup2D* train_data_all_feat = 
125        data.training_data(training_index());
126      // use these data to create feature selection
127      f_selector_->update(*train_data_all_feat, training_target());
128      // get features
129      features_.push_back(f_selector_->features());
130      delete train_data_all_feat;
131
132      // Dynamically allocated. Must be deleted in destructor.
133      training_data_.push_back(data.training_data(features_.back(), 
134                                                  training_index()));
135      training_weight_.push_back
136        (new MatrixLookup(training_data_.back()->rows(),
137                          training_data_.back()->columns(),1));
138      validation_data_.push_back(data.validation_data(features_.back(),
139                                                      training_index(), 
140                                                      validation_index()));
141      validation_weight_.push_back
142        (new MatrixLookup(validation_data_.back()->rows(),
143                          validation_data_.back()->columns(),1));
144
145
146      training_target_.push_back(Target(target(),training_index()));
147      validation_target_.push_back(Target(target(),validation_index()));
148    }
149
150    assert(training_data_.size()==size());
151    assert(training_weight_.size()==size());
152    assert(training_target_.size()==size());
153    assert(validation_data_.size()==size());
154    assert(validation_weight_.size()==size());
155    assert(validation_target_.size()==size());
156    reset();
157  }
158
159
160  SubsetGenerator::~SubsetGenerator()
161  {
162    assert(training_data_.size()==validation_data_.size());
163    for (size_t i=0; i<training_data_.size(); i++) 
164      delete training_data_[i];
165    for (size_t i=0; i<validation_data_.size(); i++) 
166      delete validation_data_[i];
167    for (size_t i=0; i<training_weight_.size(); i++) 
168      delete training_weight_[i];
169    for (size_t i=0; i<validation_weight_.size(); i++) 
170      delete validation_weight_[i];
171  }
172
173}} // of namespace classifier and namespace theplu
Note: See TracBrowser for help on using the repository browser.