source: trunk/yat/classifier/SVM.h @ 1124

Last change on this file since 1124 was 1124, checked in by Peter, 14 years ago

train returns nothing, removed docs saying else

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 7.1 KB
Line 
1#ifndef _theplu_yat_classifier_svm_
2#define _theplu_yat_classifier_svm_
3
4// $Id$
5
6/*
7  Copyright (C) 2004, 2005 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
9  Copyright (C) 2007 Peter Johansson
10
11  This file is part of the yat library, http://trac.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26  02111-1307, USA.
27*/
28
29#include "SVindex.h"
30#include "Target.h"
31#include "yat/utility/Vector.h"
32
33#include <utility>
34#include <vector>
35
36namespace theplu {
37namespace yat {
38namespace utility{
39  class Matrix;
40}
41
42namespace classifier { 
43
44  class DataLookup1D;
45  class DataLookupWeighted1D;
46  class KernelLookup;
47
48  ///
49  /// @brief Support Vector Machine
50  ///
51  ///
52  ///
53  /// Class for SVM using Keerthi's second modification of Platt's
54  /// Sequential Minimal Optimization. The SVM uses all data given for
55  /// training. If validation or testing is wanted this should be
56  /// taken care of outside (in the kernel).
57  ///   
58  class SVM
59  {
60 
61  public:
62    ///
63    /// \brief Constructor
64    ///
65    SVM(void);
66
67    /**
68       Copy constructor.
69    */ 
70    SVM(const SVM&);
71         
72    ///
73    /// Destructor
74    ///
75    virtual ~SVM();
76
77    //const DataLookup2D& data(void) const;
78
79    ///
80    ///
81    ///
82    SVM* make_classifier(void) const;
83
84    ///
85    /// @return \f$ \alpha \f$
86    ///
87    const utility::Vector& alpha(void) const;
88
89    ///
90    /// The C-parameter is the balance term (see train()). A very
91    /// large C means the training will be focused on getting samples
92    /// correctly classified, with risk for overfitting and poor
93    /// generalisation. A too small C will result in a training in which
94    /// misclassifications are not penalized. C is weighted with
95    /// respect to the size, so \f$ n_+C_+ = n_-C_- \f$, meaning a
96    /// misclassificaion of the smaller group is penalized
97    /// harder. This balance is equivalent to the one occuring for
98    /// regression with regularisation, or ANN-training with a
99    /// weight-decay term. Default is C set to infinity.
100    ///
101    /// @returns mean of vector \f$ C_i \f$
102    ///
103    double C(void) const;
104
105    ///
106    /// Default is max_epochs set to 100,000.
107    ///
108    /// @return number of maximal epochs
109    ///
110    long int max_epochs(void) const;
111   
112    /**
113      \brief set maximal number of epochs in training
114    */
115    void max_epochs(long int);
116   
117    /**
118        The output is calculated as \f$ o_i = \sum \alpha_j t_j K_{ij}
119        + bias \f$, where \f$ t \f$ is the target.
120   
121        @return output
122    */ 
123    const theplu::yat::utility::Vector& output(void) const;
124
125    /**
126       Generate prediction @a predict from @a input. The prediction
127       is calculated as the output times the margin, i.e., geometric
128       distance from decision hyperplane: \f$ \frac{ \sum \alpha_j
129       t_j K_{ij} + bias}{w} \f$ The output has 2 rows. The first row
130       is for binary target true, and the second is for binary target
131       false. The second row is superfluous as it is the first row
132       negated. It exist just to be aligned with multi-class
133       SupervisedClassifiers. Each column in @a input and @a output
134       corresponds to a sample to predict. Each row in @a input
135       corresponds to a training sample, and more exactly row i in @a
136       input should correspond to row i in KernelLookup that was used
137       for training.
138    */
139    void predict(const KernelLookup& input, utility::Matrix& predict) const;
140
141    ///
142    /// @return output times margin (i.e. geometric distance from
143    /// decision hyperplane) from data @a input
144    ///
145    double predict(const DataLookup1D& input) const;
146
147    ///
148    /// @return output times margin from data @a input with
149    /// corresponding @a weight
150    ///
151    double predict(const DataLookupWeighted1D& input) const;
152
153    ///
154    /// @brief sets the C-Parameter
155    ///
156    void set_C(const double);
157
158    /**
159       Training the SVM following Platt's SMO, with Keerti's
160       modifacation. Minimizing \f$ \frac{1}{2}\sum
161       y_iy_j\alpha_i\alpha_j(K_{ij}+\frac{1}{C_i}\delta_{ij}) - \sum
162       alpha_i\f$ , which corresponds to minimizing \f$ \sum
163       w_i^2+\sum C_i\xi_i^2 \f$.
164
165       @note If the training problem is not linearly separable and C
166       is set to infinity, the minima will be located in the infinity,
167       and thus the minimum will not be reached within the maximal
168       number of epochs. More exactly, when the problem is not
169       linearly separable, there exists an eigenvector to \f$
170       H_{ij}=y_iy_jK_{ij} \f$ within the space defined by the
171       conditions: \f$ \alpha_i>0 \f$ and \f$ \sum \alpha_i y_i = 0
172       \f$. As the eigenvalue is zero in this direction the quadratic
173       term does not contribute to the objective, but the objective
174       only consists of the linear term and hence there is no
175       minumum. This problem only occurs when \f$ C \f$ is set to
176       infinity because for a finite \f$ C \f$ all eigenvalues are
177       finite. However, for a large \f$ C \f$ (and training problem is
178       non-linearly separable) there exists an eigenvector
179       corresponding to a small eigenvalue, which means the minima has
180       moved from infinity to "very far away". In practice this will
181       also result in that the minima is not reached withing the
182       maximal number of epochs and the of \f$ C \f$ should be
183       decreased.
184
185       \throw if maximal number of epoch is reach.
186    */
187    void train(const KernelLookup& kernel, const Target& target);
188
189       
190     
191  private:
192    ///
193    /// Calculates bounds for alpha2
194    ///
195    void bounds(double&, double&) const;
196
197    ///
198    /// @brief calculates the bias term
199    ///
200    /// @return true if successful
201    ///
202    void calculate_bias(void);
203
204    ///
205    /// Calculate margin that is inverse of w
206    ///
207    void calculate_margin(void);
208
209    ///
210    ///   Private function choosing which two elements that should be
211    ///   updated. First checking for the biggest violation (output - target =
212    ///   0) among support vectors (alpha!=0). If no violation was found check
213    ///   sequentially among the other samples. If no violation there as
214    ///   well training is completed
215    ///
216    ///  @return true if a pair of samples that violate the conditions
217    ///  can be found
218    ///
219    bool choose(const theplu::yat::utility::Vector&);
220
221    ///
222    /// @return kernel modified with diagonal term (soft margin)
223    ///
224    double kernel_mod(const size_t i, const size_t j) const;
225
226    ///
227    /// @return 1 if i belong to binary target true else -1
228    ///
229    int target(size_t i) const;
230
231    utility::Vector alpha_;
232    double bias_;
233    double C_inverse_;
234    const KernelLookup* kernel_; 
235    double margin_;
236    unsigned long int max_epochs_;
237    utility::Vector output_;
238    SVindex sample_;
239    Target target_;
240    double tolerance_;
241    bool trained_;
242
243  };
244
245}}} // of namespace classifier, yat, and theplu
246
247#endif
Note: See TracBrowser for help on using the repository browser.