source: trunk/yat/classifier/SVM.h @ 1170

Last change on this file since 1170 was 1170, checked in by Peter, 14 years ago

removing DataLookup2D closes ticket:243

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 7.1 KB
Line 
1#ifndef _theplu_yat_classifier_svm_
2#define _theplu_yat_classifier_svm_
3
4// $Id$
5
6/*
7  Copyright (C) 2004, 2005 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
9  Copyright (C) 2007 Peter Johansson
10
11  This file is part of the yat library, http://trac.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26  02111-1307, USA.
27*/
28
29#include "SVindex.h"
30#include "Target.h"
31#include "yat/utility/Vector.h"
32
33#include <utility>
34#include <vector>
35
36namespace theplu {
37namespace yat {
38namespace utility{
39  class Matrix;
40}
41
42namespace classifier { 
43
44  class DataLookup1D;
45  class DataLookupWeighted1D;
46  class KernelLookup;
47
48  ///
49  /// @brief Support Vector Machine
50  ///
51  ///
52  ///
53  /// Class for SVM using Keerthi's second modification of Platt's
54  /// Sequential Minimal Optimization. The SVM uses all data given for
55  /// training. If validation or testing is wanted this should be
56  /// taken care of outside (in the kernel).
57  ///   
58  class SVM
59  {
60 
61  public:
62    ///
63    /// \brief Constructor
64    ///
65    SVM(void);
66
67    /**
68       Copy constructor.
69    */ 
70    SVM(const SVM&);
71         
72    ///
73    /// Destructor
74    ///
75    virtual ~SVM();
76
77    ///
78    /// Same as copy constructor.
79    ///
80    SVM* make_classifier(void) const;
81
82    ///
83    /// @return \f$ \alpha \f$
84    ///
85    const utility::Vector& alpha(void) const;
86
87    ///
88    /// The C-parameter is the balance term (see train()). A very
89    /// large C means the training will be focused on getting samples
90    /// correctly classified, with risk for overfitting and poor
91    /// generalisation. A too small C will result in a training in which
92    /// misclassifications are not penalized. C is weighted with
93    /// respect to the size, so \f$ n_+C_+ = n_-C_- \f$, meaning a
94    /// misclassificaion of the smaller group is penalized
95    /// harder. This balance is equivalent to the one occuring for
96    /// regression with regularisation, or ANN-training with a
97    /// weight-decay term. Default is C set to infinity.
98    ///
99    /// @returns mean of vector \f$ C_i \f$
100    ///
101    double C(void) const;
102
103    ///
104    /// Default is max_epochs set to 100,000.
105    ///
106    /// @return number of maximal epochs
107    ///
108    long int max_epochs(void) const;
109   
110    /**
111      \brief set maximal number of epochs in training
112    */
113    void max_epochs(long int);
114   
115    /**
116        The output is calculated as \f$ o_i = \sum \alpha_j t_j K_{ij}
117        + bias \f$, where \f$ t \f$ is the target.
118   
119        @return output
120    */ 
121    const theplu::yat::utility::Vector& output(void) const;
122
123    /**
124       Generate prediction @a predict from @a input. The prediction
125       is calculated as the output times the margin, i.e., geometric
126       distance from decision hyperplane: \f$ \frac{ \sum \alpha_j
127       t_j K_{ij} + bias}{w} \f$ The output has 2 rows. The first row
128       is for binary target true, and the second is for binary target
129       false. The second row is superfluous as it is the first row
130       negated. It exist just to be aligned with multi-class
131       SupervisedClassifiers. Each column in @a input and @a output
132       corresponds to a sample to predict. Each row in @a input
133       corresponds to a training sample, and more exactly row i in @a
134       input should correspond to row i in KernelLookup that was used
135       for training.
136    */
137    void predict(const KernelLookup& input, utility::Matrix& predict) const;
138
139    ///
140    /// @return output times margin (i.e. geometric distance from
141    /// decision hyperplane) from data @a input
142    ///
143    double predict(const DataLookup1D& input) const;
144
145    ///
146    /// @return output times margin from data @a input with
147    /// corresponding @a weight
148    ///
149    double predict(const DataLookupWeighted1D& input) const;
150
151    ///
152    /// @brief sets the C-Parameter
153    ///
154    void set_C(const double);
155
156    /**
157       Training the SVM following Platt's SMO, with Keerti's
158       modifacation. Minimizing \f$ \frac{1}{2}\sum
159       y_iy_j\alpha_i\alpha_j(K_{ij}+\frac{1}{C_i}\delta_{ij}) - \sum
160       alpha_i\f$ , which corresponds to minimizing \f$ \sum
161       w_i^2+\sum C_i\xi_i^2 \f$.
162
163       @note If the training problem is not linearly separable and C
164       is set to infinity, the minima will be located in the infinity,
165       and thus the minimum will not be reached within the maximal
166       number of epochs. More exactly, when the problem is not
167       linearly separable, there exists an eigenvector to \f$
168       H_{ij}=y_iy_jK_{ij} \f$ within the space defined by the
169       conditions: \f$ \alpha_i>0 \f$ and \f$ \sum \alpha_i y_i = 0
170       \f$. As the eigenvalue is zero in this direction the quadratic
171       term does not contribute to the objective, but the objective
172       only consists of the linear term and hence there is no
173       minumum. This problem only occurs when \f$ C \f$ is set to
174       infinity because for a finite \f$ C \f$ all eigenvalues are
175       finite. However, for a large \f$ C \f$ (and training problem is
176       non-linearly separable) there exists an eigenvector
177       corresponding to a small eigenvalue, which means the minima has
178       moved from infinity to "very far away". In practice this will
179       also result in that the minima is not reached withing the
180       maximal number of epochs and the of \f$ C \f$ should be
181       decreased.
182
183       \throw if maximal number of epoch is reach.
184    */
185    void train(const KernelLookup& kernel, const Target& target);
186
187       
188     
189  private:
190    ///
191    /// Calculates bounds for alpha2
192    ///
193    void bounds(double&, double&) const;
194
195    ///
196    /// @brief calculates the bias term
197    ///
198    /// @return true if successful
199    ///
200    void calculate_bias(void);
201
202    ///
203    /// Calculate margin that is inverse of w
204    ///
205    void calculate_margin(void);
206
207    ///
208    ///   Private function choosing which two elements that should be
209    ///   updated. First checking for the biggest violation (output - target =
210    ///   0) among support vectors (alpha!=0). If no violation was found check
211    ///   sequentially among the other samples. If no violation there as
212    ///   well training is completed
213    ///
214    ///  @return true if a pair of samples that violate the conditions
215    ///  can be found
216    ///
217    bool choose(const theplu::yat::utility::Vector&);
218
219    ///
220    /// @return kernel modified with diagonal term (soft margin)
221    ///
222    double kernel_mod(const size_t i, const size_t j) const;
223
224    ///
225    /// @return 1 if i belong to binary target true else -1
226    ///
227    int target(size_t i) const;
228
229    utility::Vector alpha_;
230    double bias_;
231    double C_inverse_;
232    const KernelLookup* kernel_; 
233    double margin_;
234    unsigned long int max_epochs_;
235    utility::Vector output_;
236    SVindex sample_;
237    Target target_;
238    double tolerance_;
239    bool trained_;
240
241  };
242
243}}} // of namespace classifier, yat, and theplu
244
245#endif
Note: See TracBrowser for help on using the repository browser.