source: trunk/c++_tools/classifier/CrossValidationSampler.cc @ 615

Last change on this file since 615 was 615, checked in by Peter, 16 years ago

ref #60 NOTE: there is most likely a bug around. I have removed the ensemble.build() test in the ensemble_test to get the test go through. I will try to find and remove this bug asap.

File size: 3.0 KB
Line 
1// $Id$
2
3/*
4  Copyright (C) 2006 Peter Johansson
5
6  This file is part of the thep c++ tools library,
7                                http://lev.thep.lu.se/trac/c++_tools
8
9  The c++ tools library is free software; you can redistribute it
10  and/or modify it under the terms of the GNU General Public License
11  as published by the Free Software Foundation; either version 2 of
12  the License, or (at your option) any later version.
13
14  The c++ tools library is distributed in the hope that it will be
15  useful, but WITHOUT ANY WARRANTY; without even the implied warranty
16  of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with this program; if not, write to the Free Software
21  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22  02111-1307, USA.
23*/
24
25#include <c++_tools/classifier/CrossValidationSampler.h>
26#include <c++_tools/classifier/Target.h>
27#include <c++_tools/random/random.h>
28
29#include <algorithm>
30#include <cassert>
31#include <utility>
32#include <vector>
33
34namespace theplu {
35namespace classifier { 
36
37  CrossValidationSampler::CrossValidationSampler(const Target& target, 
38                                                 const size_t N, 
39                                                 const size_t k)
40    : Sampler(target), k_(k)
41  { 
42    assert(target.size()>1);
43    build(target, N, k);
44  }
45
46  CrossValidationSampler::~CrossValidationSampler()
47  {
48  }
49
50  void CrossValidationSampler::build(const Target& target, size_t N, size_t k)
51  {
52    std::vector<std::pair<size_t,size_t> > v;
53    for (size_t i=0; i<target.size(); i++)
54      v.push_back(std::make_pair(target(i),i));
55    // sorting with respect to class
56    std::sort(v.begin(),v.end());
57   
58    // my_begin[i] is index of first sample of class i
59    std::vector<size_t> my_begin;
60    my_begin.reserve(target.nof_classes());
61    my_begin.push_back(0);
62    for (size_t i=1; i<target.size(); i++)
63      while (v[i].first > my_begin.size()-1)
64        my_begin.push_back(i);
65    my_begin.push_back(target.size());
66
67    random::DiscreteUniform rnd;
68
69    for (size_t i=0; i<N; ) {
70      // shuffle indices within class each class
71      for (size_t j=0; j<target.nof_classes(); j++)
72        random_shuffle(v.begin()+my_begin[j],v.begin()+my_begin[j+1],rnd);
73     
74      for (size_t part=0; part<k && i<N; i++, part++) {
75        std::vector<size_t> training_index;
76        std::vector<size_t> validation_index;
77        for (size_t j=0; j<v.size(); j++) {
78          if (j%k==part)
79            validation_index.push_back(v[j].second);
80          else
81            training_index.push_back(v[j].second);
82        }
83
84        training_index_.push_back(training_index);
85        validation_index_.push_back(validation_index);
86      }
87    }
88    assert(training_index_.size()==N);
89    assert(validation_index_.size()==N);
90   
91    for (size_t i=0; i<N; ++i){
92      training_target_.push_back(Target(target,training_index_[i]));
93      validation_target_.push_back(Target(target,validation_index_[i]));
94    }
95    assert(training_target_.size()==N);
96    assert(validation_target_.size()==N);
97  }
98
99}} // of namespace classifier and namespace theplu
Note: See TracBrowser for help on using the repository browser.