source: trunk/yat/classifier/SVM.h @ 1615

Last change on this file since 1615 was 1487, checked in by Jari Häkkinen, 13 years ago

Addresses #436. GPL license copy reference should also be updated.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 7.2 KB
Line 
1#ifndef _theplu_yat_classifier_svm_
2#define _theplu_yat_classifier_svm_
3
4// $Id$
5
6/*
7  Copyright (C) 2004, 2005 Jari Häkkinen, Peter Johansson
8  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2007 Jari Häkkinen, Peter Johansson
10  Copyright (C) 2008 Peter Johansson
11
12  This file is part of the yat library, http://dev.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 3 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with yat. If not, see <http://www.gnu.org/licenses/>.
26*/
27
28#include "SVindex.h"
29#include "Target.h"
30#include "yat/utility/Vector.h"
31
32#include <utility>
33#include <vector>
34
35namespace theplu {
36namespace yat {
37namespace utility{
38  class Matrix;
39}
40
41namespace classifier { 
42
43  class DataLookup1D;
44  class DataLookupWeighted1D;
45  class KernelLookup;
46
47  /**
48     \brief Support Vector Machine
49  */
50  class SVM
51  {
52 
53  public:
54    ///
55    /// \brief Constructor
56    ///
57    SVM(void);
58
59    /**
60       \brief Copy constructor.
61    */ 
62    SVM(const SVM&);
63         
64    ///
65    /// \brief Destructor
66    ///
67    virtual ~SVM();
68
69    /**
70       \brief Create an untrained copy of SVM.
71
72       \returns A dynamically allocated SVM, which has to be deleted
73       by the caller to avoid memory leaks.
74    */
75    SVM* make_classifier(void) const;
76
77    ///
78    /// @return alpha parameters
79    ///
80    const utility::Vector& alpha(void) const;
81
82    ///
83    /// The C-parameter is the balance term (see train()). A very
84    /// large C means the training will be focused on getting samples
85    /// correctly classified, with risk for overfitting and poor
86    /// generalisation. A too small C will result in a training, in
87    /// which misclassifications are not penalized. C is weighted with
88    /// respect to the size such that \f$ n_+C_+ = n_-C_- \f$, meaning
89    /// a misclassificaion of the smaller group is penalized
90    /// harder. This balance is equivalent to the one occuring for
91    /// %regression with regularisation, or ANN-training with a
92    /// weight-decay term. Default is C set to infinity.
93    ///
94    /// @returns mean of vector \f$ C_i \f$
95    ///
96    double C(void) const;
97
98    ///
99    /// Default is max_epochs set to 100,000.
100    ///
101    /// @return number of maximal epochs
102    ///
103    long int max_epochs(void) const;
104   
105    /**
106      \brief set maximal number of epochs in training
107    */
108    void max_epochs(long int);
109   
110    /**
111        The output is calculated as \f$ o_i = \sum \alpha_j t_j K_{ij}
112        + bias \f$, where \f$ t \f$ is the target.
113   
114        @return output of training samples
115    */ 
116    const theplu::yat::utility::Vector& output(void) const;
117
118    /**
119       Generate prediction @a predict from @a input. The prediction
120       is calculated as the output times the margin, i.e., geometric
121       distance from decision hyperplane: \f$ \frac{ \sum \alpha_j
122       t_j K_{ij} + bias}{|w|} \f$ The output has 2 rows. The first row
123       is for binary target true, and the second is for binary target
124       false. The second row is superfluous as it is the first row
125       negated. It exist just to be aligned with multi-class
126       SupervisedClassifiers. Each column in @a input and @a output
127       corresponds to a sample to predict. Each row in @a input
128       corresponds to a training sample, and more exactly row i in @a
129       input should correspond to row i in KernelLookup that was used
130       for training.
131    */
132    void predict(const KernelLookup& input, utility::Matrix& predict) const;
133
134    /*
135    ///
136    /// @return output times margin (i.e. geometric distance from
137    /// decision hyperplane) from data @a input
138    ///
139    double predict(const DataLookup1D& input) const;
140
141    ///
142    /// @return output times margin from data @a input with
143    /// corresponding @a weight
144    ///
145    double predict(const DataLookupWeighted1D& input) const;
146    */
147
148    ///
149    /// @brief sets the C-Parameter
150    ///
151    void set_C(const double);
152
153    /**
154       Training the SVM following Platt's SMO, with Keerti's
155       modifacation. Minimizing \f$ \frac{1}{2}\sum
156       y_iy_j\alpha_i\alpha_j(K_{ij}+\frac{1}{C_i}\delta_{ij}) - \sum
157       \alpha_i\f$, which corresponds to minimizing \f$ \sum
158       w_i^2+\sum C_i\xi_i^2 \f$.
159
160       @note If the training problem is not linearly separable and C
161       is set to infinity, the minima will be located in the infinity,
162       and thus the minimum will not be reached within the maximal
163       number of epochs. More exactly, when the problem is not
164       linearly separable, there exists an eigenvector to \f$
165       H_{ij}=y_iy_jK_{ij} \f$ within the space defined by the
166       conditions: \f$ \alpha_i>0 \f$ and \f$ \sum \alpha_i y_i = 0
167       \f$. As the eigenvalue is zero in this direction the quadratic
168       term does not contribute to the objective, but the objective
169       only consists of the linear term and hence there is no
170       minumum. This problem only occurs when \f$ C \f$ is set to
171       infinity because for a finite \f$ C \f$ all eigenvalues are
172       finite. However, for a large \f$ C \f$ (and training problem is
173       non-linearly separable) there exists an eigenvector
174       corresponding to a small eigenvalue, which means the minima has
175       moved from infinity to "very far away". In practice this will
176       also result in that the minima is not reached withing the
177       maximal number of epochs and the of \f$ C \f$ should be
178       decreased.
179
180       Class for SVM using Keerthi's second modification of Platt's
181       Sequential Minimal Optimization. The SVM uses all data given for
182       training.
183       
184       \throw std::runtime_error if maximal number of epoch is reach.
185    */
186    void train(const KernelLookup& kernel, const Target& target);
187
188       
189     
190  private:
191    ///
192    /// Calculates bounds for alpha2
193    ///
194    void bounds(double&, double&) const;
195
196    ///
197    /// @brief calculates the bias term
198    ///
199    /// @return true if successful
200    ///
201    void calculate_bias(void);
202
203    ///
204    /// Calculate margin that is inverse of w
205    ///
206    void calculate_margin(void);
207
208    ///
209    ///   Private function choosing which two elements that should be
210    ///   updated. First checking for the biggest violation (output - target =
211    ///   0) among support vectors (alpha!=0). If no violation was found check
212    ///   sequentially among the other samples. If no violation there as
213    ///   well training is completed
214    ///
215    ///  @return true if a pair of samples that violate the conditions
216    ///  can be found
217    ///
218    bool choose(const theplu::yat::utility::Vector&);
219
220    ///
221    /// @return kernel modified with diagonal term (soft margin)
222    ///
223    double kernel_mod(const size_t i, const size_t j) const;
224
225    ///
226    /// @return 1 if i belong to binary target true else -1
227    ///
228    int target(size_t i) const;
229
230    utility::Vector alpha_;
231    double bias_;
232    double C_inverse_;
233    // not owned by SVM
234    const KernelLookup* kernel_; 
235    double margin_;
236    unsigned long int max_epochs_;
237    utility::Vector output_;
238    SVindex sample_;
239    Target target_;
240    double tolerance_;
241    bool trained_;
242
243  };
244
245}}} // of namespace classifier, yat, and theplu
246
247#endif
Note: See TracBrowser for help on using the repository browser.