source: trunk/c++_tools/utility/NNI.h @ 649

Last change on this file since 649 was 649, checked in by Jari Häkkinen, 17 years ago

Fixed typo in doxygen comment.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.8 KB
Line 
1#ifndef _theplu_utility_nni_
2#define _theplu_utility_nni_
3
4// $Id: NNI.h 649 2006-09-15 14:03:31Z jari $
5
6/*
7  Copyright (C) 2004 Jari Häkkinen
8  Copyright (C) 2005 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2006 Jari Häkkinen
10
11  This file is part of the thep c++ tools library,
12                                http://lev.thep.lu.se/trac/c++_tools
13
14  The c++ tools library is free software; you can redistribute it
15  and/or modify it under the terms of the GNU General Public License
16  as published by the Free Software Foundation; either version 2 of
17  the License, or (at your option) any later version.
18
19  The c++ tools library is distributed in the hope that it will be
20  useful, but WITHOUT ANY WARRANTY; without even the implied warranty
21  of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with this program; if not, write to the Free Software
26  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
27  02111-1307, USA.
28*/
29
30#include <iostream>
31#include <utility>
32#include <vector>
33
34#include <c++_tools/utility/matrix.h>
35
36namespace theplu {
37namespace utility {
38
39  ///
40  /// NNI is an abstract base class defining the interface for nearest
41  /// neighbour imputation (NNI) algorithms.
42  ///
43  /// NNI algorithms implemented here is discussed in documents
44  /// created in the WeNNI project. This document will be released for
45  /// public access, and the necessary information for retrieving that
46  /// document will be provided here.
47  ///
48  /// Short introduction to NNI is that one may want to improve
49  /// (correct) uncertain data. Here, the data to be imputed is stored in a
50  /// matrix where rows similar to each other are used to adjust
51  /// uncertain data. The data matrix is accompanied by a weight
52  /// (uncertainty) matrix defining what data is to be considered as
53  /// 'certain' and what data is uncertain. The weight matrix can be
54  /// binary with 1's indicating that the data does not need
55  /// corrections, whereas a 0 means that the data should be replaced
56  /// by an imputed value. Naturally, the weight matrix can also be
57  /// continuous where values between 0 and 1 defines how certain a
58  /// data element is.
59  ///
60  /// The imputation depends on how similarity of rows of data is
61  /// defined and on the number of closest neighbours (here; rows) to
62  /// use in the imputation can be set.
63  ///
64  /// Implementation issues
65  ///
66  /// The current implementation treats rows where all data are tagged
67  /// are completely uncertain, i.e. all weights are zero, by
68  /// ignoring these lines in nearest neighbourhood
69  /// calculations. Importantly, this type of data are not changed
70  /// (imputed) either since there is no close neighbourhood defined
71  /// for this data.
72  ///
73  /// Rows that is completely identical in an imputation algorithm
74  /// sense will give problems since the distance between will usually
75  /// become zero. This is solved by setting zero distance to a small
76  /// number. Identical rows in this context are basically a
77  /// comparison between elements with non-zero uncertainty weights
78  /// only, and all these elements are equal. Zero weight elements are
79  /// not used in the comparison since these are considered as
80  /// non/sense values.
81  ///
82  class NNI
83  {
84  public:
85
86    ///
87    /// Base constructor for the nearest neighbour imputation
88    /// algorithms.
89    ///
90    NNI(const utility::matrix& matrix,const utility::matrix& weight,
91        const u_int neighbours);
92
93    virtual ~NNI(void) {};
94
95    ///
96    /// Function doing the imputation.
97    ///
98    /// @return number of rows not imputed
99    ///
100    virtual u_int estimate(void)=0;
101
102    ///
103    /// @return A const reference to the modified data.
104    ///
105    const utility::matrix& imputed_data(void) const { return imputed_data_; }
106
107    ///
108    /// @return indices of rows in data matrix not imputed
109    ///
110    inline std::vector<size_t> not_imputed(void) const { return not_imputed_; }
111
112  protected:
113    /**
114       \f$ d_{ij}^2=\frac {\sum_{k=1}^C w_{ik} w_{jk} (x_{ik}-x_{jk})^2
115       }{\sum_{k=l}^C w_{ik} w_{jk} } \f$ where C is the number of columns
116    */
117    std::vector<std::pair<u_int,double> > calculate_distances(const u_int) const;
118    /// Contributing nearest neighbours are added up to the user set
119    /// number, and neighbours are disqualified if their element
120    /// (column) weight is zero
121    std::vector<u_int> nearest_neighbours(const u_int,
122                             const std::vector<std::pair<u_int,double> >&) const;
123    ///
124    /// original data matrix
125    ///
126    const utility::matrix& data_;
127
128    ///
129    /// data after imputation
130    ///
131    utility::matrix imputed_data_;
132
133    ///
134    /// number of neighbor to use
135    ///
136    u_int neighbours_;
137
138    ///
139    /// which rows are not imputed due to lack of data
140    ///
141    std::vector<size_t> not_imputed_;
142
143    ///
144    /// weight matrix
145    ///
146    const utility::matrix& weight_;
147  };
148
149}} // of namespace utility and namespace theplu
150
151#endif
Note: See TracBrowser for help on using the repository browser.