source: trunk/yat/classifier/NBC.h @ 1152

Last change on this file since 1152 was 1152, checked in by Peter, 14 years ago

some docs for NBC, note docs that not yet match implementation refs #335

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 3.4 KB
Line 
1#ifndef _theplu_yat_classifier_nbc_
2#define _theplu_yat_classifier_nbc_
3
4// $Id: NBC.h 1152 2008-02-25 23:31:46Z peter $
5
6/*
7  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér, Peter Johansson
8  Copyright (C) 2007 Peter Johansson
9
10  This file is part of the yat library, http://trac.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 2 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25  02111-1307, USA.
26*/
27
28#include "SupervisedClassifier.h"
29#include "yat/utility/Matrix.h"
30
31namespace theplu {
32namespace yat {
33namespace classifier { 
34
35  class DataLookup1D;
36  class DataLookup2D;
37  class MatrixLookup;
38  class MatrixLookupWeighted;
39  class Target;
40
41  /**
42     @brief Naive Bayesian Classifier.
43 
44     Each class is modelled as a multinormal distribution with
45     features being independent: \f$ p(x|c) = \prod
46     \frac{1}{\sqrt{2\pi\sigma_i^2}} \exp \left(
47     \frac{(x_i-m_i)^2}{2\sigma_i^2)} \right)\f$
48  */
49  class NBC : public SupervisedClassifier
50  {
51 
52  public:
53    ///
54    /// Constructor taking the training data, the target vector.
55    ///
56    NBC(const MatrixLookup&, const Target&);
57   
58    ///
59    /// Constructor taking the training data with weights, the target
60    /// vector, the distance measure, and a weight matrix for the
61    /// training data as input.
62    ///
63    NBC(const MatrixLookupWeighted&, const Target&);
64
65    virtual ~NBC();
66
67    const DataLookup2D& data(void) const;
68
69
70    NBC* make_classifier(const DataLookup2D&, 
71                         const Target&) const;
72   
73    ///
74    /// Train the classifier using the training data.
75    ///
76    /// For each class mean and variance are estimated for each
77    /// feature (see Averager and AveragerWeighted for details).
78    ///
79    /// If variance can not be estimated (only one valid data point)
80    /// for a feature and label, then that feature is ignored for that
81    /// specific label.
82    ///
83    void train();
84
85   
86    /**
87       Each sample (column) in \a data is predicted and predictions
88       are returned in the corresponding column in passed \a res. Each
89       row in \a res corresponds to a class. The prediction is the
90       estimated probability that sample belong to class \f$ j \f$
91
92       \f$ P_j = \frac{1}{Z}\prod_i{\frac{1}{\sqrt{2\pi\sigma_i^2}}}
93       \exp(\frac{w_i(x_i-\mu_i)^2}{\sigma_i^2})\f$, where \f$ \mu_i
94       \f$ and \f$ \sigma_i^2 \f$ are the estimated mean and variance,
95       respectively. If a \f$ \sigma_i \f$ could not be estimated
96       during training, corresponding factor is set to unity, in other
97       words, that feature is ignored for the prediction of that
98       particular class. Z is chosen such that total probability, \f$
99       \sum P_j \f$, equals unity. If \a data is a MatrixLookup is
100       equivalent to using all weight equal to unity.
101    */
102    void predict(const DataLookup2D& data, utility::Matrix& res) const;
103
104
105  private:
106    utility::Matrix centroids_;
107    utility::Matrix sigma2_;
108    const DataLookup2D& data_;
109
110    double sum_logsigma(size_t i) const;
111
112
113  };
114 
115}}} // of namespace classifier, yat, and theplu
116
117#endif
Note: See TracBrowser for help on using the repository browser.