source: trunk/yat/classifier/NBC.h

Last change on this file was 4207, checked in by Peter, 6 weeks ago

update copyright statements

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 4.6 KB
Line 
1#ifndef _theplu_yat_classifier_nbc_
2#define _theplu_yat_classifier_nbc_
3
4// $Id: NBC.h 4207 2022-08-26 04:36:28Z peter $
5
6/*
7  Copyright (C) 2006 Jari Häkkinen, Peter Johansson, Markus Ringnér
8  Copyright (C) 2007 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér
10  Copyright (C) 2022 Peter Johansson
11
12  This file is part of the yat library, http://dev.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 3 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with yat. If not, see <http://www.gnu.org/licenses/>.
26*/
27
28#include "SupervisedClassifier.h"
29#include "yat/utility/Matrix.h"
30
31namespace theplu {
32namespace yat {
33namespace classifier {
34
35  class MatrixLookup;
36  class MatrixLookupWeighted;
37  class Target;
38
39  /**
40     @brief Naive Bayesian Classifier.
41
42     Each class is modelled as a multinormal distribution with
43     features being independent: \f$ P(x|c) \propto \prod
44     \frac{1}{\sqrt{2\pi\sigma_i^2}} \exp \left(
45     -\frac{(x_i-\mu_i)^2}{2\sigma_i^2)} \right)\f$
46  */
47  class NBC : public SupervisedClassifier
48  {
49
50  public:
51    ///
52    /// @brief Constructor
53    ///
54    NBC(void);
55
56
57    ///
58    /// @brief Destructor
59    ///
60    virtual ~NBC();
61
62
63    NBC* make_classifier(void) const;
64
65    ///
66    /// \brief Train the NBC using training data and targets.
67    ///
68    /// For each class mean and variance are estimated for each
69    /// feature (see statistics::Averager for details).
70    ///
71    /// If there is only one (or zero) samples in a class, parameters
72    /// cannot be estimated. In that case, parameters are set to NaN
73    /// for that particular class.
74    ///
75    void train(const MatrixLookup&, const Target&);
76
77    ///
78    /// \brief Train the NBC using weighted training data and
79    /// targets.
80    ///
81    /// For each class mean and variance are estimated for each
82    /// feature (see statistics::AveragerWeighted for details).
83    ///
84    /// To estimate the parameters of a class, each feature of the
85    /// class must have at least two non-zero data points. Otherwise
86    /// the parameters are set to NaN and any prediction will result
87    /// in NaN for that particular class.
88    ///
89    void train(const MatrixLookupWeighted&, const Target&);
90
91    /**
92       \brief Predict samples using unweighted data
93
94       Each sample (column) in \a data is predicted and predictions
95       are returned in the corresponding column in passed \a
96       result. Each row in \a result corresponds to a class. The
97       prediction is the estimated probability that sample belong to
98       class \f$ j \f$:
99
100       \f$ P_j = \frac{1}{Z}\prod_i\frac{1}{\sqrt{2\pi\sigma_i^2}}
101       \exp\left(-\frac{(x_i-\mu_i)^2}{2\sigma_i^2}\right)\f$, where \f$ \mu_i
102       \f$ and \f$ \sigma_i^2 \f$ are the estimated mean and variance,
103       respectively. Z is chosen such that total probability equals unity, \f$
104       \sum P_j = 1 \f$.
105
106       \note If parameters could not be estimated during training, due
107       to lack of number of sufficient data points, the output for
108       that class is NaN and not included in calculation of
109       normalization factor \f$ Z \f$.
110    */
111    void predict(const MatrixLookup& data, utility::Matrix& result) const;
112
113    /**
114       \brief Predict samples using weighted data
115
116       Each sample (column) in \a data is predicted and predictions
117       are returned in the corresponding column in passed \a
118       result. Each row in \a result corresponds to a class. The
119       prediction is the estimated probability that sample belong to
120       class \f$ j \f$:
121
122       \f$ P_j = \frac{1}{Z} \exp\left(-N\frac{\sum
123       {w_i(x_i-\mu_i)^2}/(2\sigma_i^2)}{\sum w_i}\right)
124       \prod_i\frac{1}{\sqrt{2\pi\sigma_i^2}}\f$, where \f$ \mu_i \f$
125       and \f$ \sigma_i^2 \f$ are the estimated mean and variance,
126       respectively. Z is chosen such that total probability equals
127       unity, \f$ \sum P_j = 1 \f$.
128
129       \note If parameters could not be estimated during training, due
130       to lack of number of sufficient data points, the output for
131       that class is NaN and not included in calculation of
132       normalization factor \f$ Z \f$.
133     */
134    void predict(const MatrixLookupWeighted& data,utility::Matrix& result) const;
135
136
137  private:
138    void standardize_lnP(utility::Matrix& prediction) const;
139
140    utility::Matrix centroids_;
141    utility::Matrix sigma2_;
142
143    double sum_logsigma(size_t i) const;
144
145
146  };
147
148}}} // of namespace classifier, yat, and theplu
149
150#endif
Note: See TracBrowser for help on using the repository browser.