source: trunk/yat/classifier/SVM.h @ 1090

Last change on this file since 1090 was 1087, checked in by Peter, 13 years ago

fixes #312 and added passing of data in EB constructors

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 7.5 KB
Line 
1#ifndef _theplu_yat_classifier_svm_
2#define _theplu_yat_classifier_svm_
3
4// $Id$
5
6/*
7  Copyright (C) 2004, 2005 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
9  Copyright (C) 2007 Peter Johansson
10
11  This file is part of the yat library, http://trac.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26  02111-1307, USA.
27*/
28
29#include "KernelLookup.h"
30#include "SVindex.h"
31#include "Target.h"
32#include "yat/utility/vector.h"
33
34#include <utility>
35#include <vector>
36
37namespace theplu {
38namespace yat {
39namespace classifier { 
40
41  class DataLookup2D;
42  class Target;
43
44  ///
45  /// @brief Support Vector Machine
46  ///
47  ///
48  ///
49  /// Class for SVM using Keerthi's second modification of Platt's
50  /// Sequential Minimal Optimization. The SVM uses all data given for
51  /// training. If validation or testing is wanted this should be
52  /// taken care of outside (in the kernel).
53  ///   
54  class SVM
55  {
56 
57  public:
58    ///
59    /// Constructor taking the kernel and the target vector as
60    /// input.
61    ///
62    /// @note if the @a target or @a kernel
63    /// is destroyed the behaviour is undefined.
64    ///
65    SVM(const KernelLookup& kernel, const Target& target);
66
67    ///
68    /// Destructor
69    ///
70    virtual ~SVM();
71
72    const DataLookup2D& data(void) const;
73
74    ///
75    /// If DataLookup2D is not a KernelLookup a bad_cast exception is thrown.
76    ///
77    SVM* make_classifier(const DataLookup2D&, const Target&) const;
78
79    ///
80    /// @return \f$ \alpha \f$
81    ///
82    const utility::vector& alpha(void) const;
83
84    ///
85    /// The C-parameter is the balance term (see train()). A very
86    /// large C means the training will be focused on getting samples
87    /// correctly classified, with risk for overfitting and poor
88    /// generalisation. A too small C will result in a training in which
89    /// misclassifications are not penalized. C is weighted with
90    /// respect to the size, so \f$ n_+C_+ = n_-C_- \f$, meaning a
91    /// misclassificaion of the smaller group is penalized
92    /// harder. This balance is equivalent to the one occuring for
93    /// regression with regularisation, or ANN-training with a
94    /// weight-decay term. Default is C set to infinity.
95    ///
96    /// @returns mean of vector \f$ C_i \f$
97    ///
98    double C(void) const;
99
100    ///
101    /// Default is max_epochs set to 100,000.
102    ///
103    /// @return number of maximal epochs
104    ///
105    long int max_epochs(void) const;
106   
107    /**
108      \brief set maximal number of epochs in training
109    */
110    void max_epochs(long int);
111   
112    /**
113        The output is calculated as \f$ o_i = \sum \alpha_j t_j K_{ij}
114        + bias \f$, where \f$ t \f$ is the target.
115   
116        @return output
117    */ 
118    const theplu::yat::utility::vector& output(void) const;
119
120    /**
121       Generate prediction @a predict from @a input. The prediction
122       is calculated as the output times the margin, i.e., geometric
123       distance from decision hyperplane: \f$ \frac{ \sum \alpha_j
124       t_j K_{ij} + bias}{w} \f$ The output has 2 rows. The first row
125       is for binary target true, and the second is for binary target
126       false. The second row is superfluous as it is the first row
127       negated. It exist just to be aligned with multi-class
128       SupervisedClassifiers. Each column in @a input and @a output
129       corresponds to a sample to predict. Each row in @a input
130       corresponds to a training sample, and more exactly row i in @a
131       input should correspond to row i in KernelLookup that was used
132       for training.
133    */
134    void predict(const DataLookup2D& input, utility::matrix& predict) const;
135
136    ///
137    /// @return output times margin (i.e. geometric distance from
138    /// decision hyperplane) from data @a input
139    ///
140    double predict(const DataLookup1D& input) const;
141
142    ///
143    /// @return output times margin from data @a input with
144    /// corresponding @a weight
145    ///
146    double predict(const DataLookupWeighted1D& input) const;
147
148    ///
149    /// @brief Function sets \f$ \alpha=0 \f$ and makes SVM untrained.
150    ///
151    void reset(void);
152
153    ///
154    /// @brief sets the C-Parameter
155    ///
156    void set_C(const double);
157
158    /**
159       Training the SVM following Platt's SMO, with Keerti's
160       modifacation. Minimizing \f$ \frac{1}{2}\sum
161       y_iy_j\alpha_i\alpha_j(K_{ij}+\frac{1}{C_i}\delta_{ij}) - \sum
162       alpha_i\f$ , which corresponds to minimizing \f$ \sum
163       w_i^2+\sum C_i\xi_i^2 \f$.
164
165       @note If the training problem is not linearly separable and C
166       is set to infinity, the minima will be located in the infinity,
167       and thus the minimum will not be reached within the maximal
168       number of epochs. More exactly, when the problem is not
169       linearly separable, there exists an eigenvector to \f$
170       H_{ij}=y_iy_jK_{ij} \f$ within the space defined by the
171       conditions: \f$ \alpha_i>0 \f$ and \f$ \sum \alpha_i y_i = 0
172       \f$. As the eigenvalue is zero in this direction the quadratic
173       term does not contribute to the objective, but the objective
174       only consists of the linear term and hence there is no
175       minumum. This problem only occurs when \f$ C \f$ is set to
176       infinity because for a finite \f$ C \f$ all eigenvalues are
177       finite. However, for a large \f$ C \f$ (and training problem is
178       non-linearly separable) there exists an eigenvector
179       corresponding to a small eigenvalue, which means the minima has
180       moved from infinity to "very far away". In practice this will
181       also result in that the minima is not reached withing the
182       maximal number of epochs and the of \f$ C \f$ should be
183       decreased.
184
185       \throw if maximal number of epoch is reach.
186 
187       @return true if succesful
188    */
189    void train();
190
191       
192     
193  private:
194    ///
195    /// Copy constructor. (not implemented)
196    ///
197    SVM(const SVM&);
198         
199    ///
200    /// Calculates bounds for alpha2
201    ///
202    void bounds(double&, double&) const;
203
204    ///
205    /// @brief calculates the bias term
206    ///
207    /// @return true if successful
208    ///
209    void calculate_bias(void);
210
211    ///
212    /// Calculate margin that is inverse of w
213    ///
214    void calculate_margin(void);
215
216    ///
217    ///   Private function choosing which two elements that should be
218    ///   updated. First checking for the biggest violation (output - target =
219    ///   0) among support vectors (alpha!=0). If no violation was found check
220    ///   sequentially among the other samples. If no violation there as
221    ///   well training is completed
222    ///
223    ///  @return true if a pair of samples that violate the conditions
224    ///  can be found
225    ///
226    bool choose(const theplu::yat::utility::vector&);
227
228    ///
229    /// @return kernel modified with diagonal term (soft margin)
230    ///
231    double kernel_mod(const size_t i, const size_t j) const;
232
233    ///
234    /// @return 1 if i belong to binary target true else -1
235    ///
236    int target(size_t i) const;
237
238    utility::vector alpha_;
239    double bias_;
240    double C_inverse_;
241    const KernelLookup* kernel_; 
242    double margin_;
243    unsigned long int max_epochs_;
244    utility::vector output_;
245    bool owner_;
246    SVindex sample_;
247    Target target_;
248    bool trained_;
249    double tolerance_;
250
251  };
252
253}}} // of namespace classifier, yat, and theplu
254
255#endif
Note: See TracBrowser for help on using the repository browser.