source: trunk/yat/classifier/NBC.h @ 1121

Last change on this file since 1121 was 1121, checked in by Peter, 14 years ago

fixes #308

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 3.3 KB
Line 
1#ifndef _theplu_yat_classifier_nbc_
2#define _theplu_yat_classifier_nbc_
3
4// $Id: NBC.h 1121 2008-02-22 15:29:56Z peter $
5
6/*
7  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
8  Copyright (C) 2007 Peter Johansson
9
10  This file is part of the yat library, http://trac.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 2 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25  02111-1307, USA.
26*/
27
28#include "SupervisedClassifier.h"
29#include "yat/utility/Matrix.h"
30
31namespace theplu {
32namespace yat {
33namespace classifier { 
34
35  class DataLookup1D;
36  class DataLookup2D;
37  class MatrixLookup;
38  class MatrixLookupWeighted;
39  class Target;
40
41  /**
42     @brief Naive Bayesian Classification.
43 
44     Each class is modelled as a multinormal distribution with
45     features being independent: \f$ p(x|c) = \prod
46     \frac{1}{\sqrt{2\pi\sigma_i^2}} \exp \left(
47     \frac{(x_i-m_i)^2}{2\sigma_i^2)} \right)\f$
48  */
49  class NBC : public SupervisedClassifier
50  {
51 
52  public:
53    ///
54    /// Constructor taking the training data, the target vector, and
55    /// the distance measure as input.
56    ///
57    NBC(const MatrixLookup&, const Target&);
58   
59    ///
60    /// Constructor taking the training data with weights, the target
61    /// vector, the distance measure, and a weight matrix for the
62    /// training data as input.
63    ///
64    NBC(const MatrixLookupWeighted&, const Target&);
65
66    virtual ~NBC();
67
68    const DataLookup2D& data(void) const;
69
70
71    SupervisedClassifier* make_classifier(const DataLookup2D&, 
72                                          const Target&) const;
73   
74    ///
75    /// Train the classifier using the training data.
76    ///
77    /// For each class mean and variance are estimated for each
78    /// feature (see Averager and AveragerWeighted for details).
79    ///
80    /// If variance can not be estimated (too few data points or all
81    /// points identical) for a feature and label, then that feature
82    /// is ignored for that specific label.
83    ///
84    /// @return true if training succedeed.
85    ///
86    void train();
87
88   
89    /**
90       Each sample (column) in \a data is predicted and predictions
91       are returned in the corresponding column in passed \a res. Each
92       row in \a res corresponds to a class. The prediction is the
93       estimated probability that sample belong to class \f$ j \f$
94
95       \f$ P_j = \frac{1}{Z}\prod_i{\frac{1}{\sigma_i}}
96       \exp(\frac{w_i(x_i-\mu_i)^2}{\sigma_i^2})\f$, where \f$ \mu_i
97       \f$ and \f$ \sigma_i^2 \f$ are the estimated mean and variance,
98       respectively. If \a data is a MatrixLookup is equivalent to
99       using all weight equal to unity.
100    */
101    void predict(const DataLookup2D& data, utility::Matrix& res) const;
102
103
104  private:
105    utility::Matrix centroids_;
106    utility::Matrix sigma2_;
107    const DataLookup2D& data_;
108
109    double sum_logsigma(size_t i) const;
110
111
112  };
113 
114}}} // of namespace classifier, yat, and theplu
115
116#endif
Note: See TracBrowser for help on using the repository browser.