source: trunk/c++_tools/utility/NNI.h @ 675

Last change on this file since 675 was 675, checked in by Jari Häkkinen, 15 years ago

References #83. Changing project name to yat. Compilation will fail in this revision.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.7 KB
Line 
1#ifndef _theplu_utility_nni_
2#define _theplu_utility_nni_
3
4// $Id: NNI.h 675 2006-10-10 12:08:45Z jari $
5
6/*
7  Copyright (C) 2004 Jari Häkkinen
8  Copyright (C) 2005 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2006 Jari Häkkinen
10
11  This file is part of the yat library, http://lev.thep.lu.se/trac/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26  02111-1307, USA.
27*/
28
29#include "yat/utility/matrix.h"
30
31#include <iostream>
32#include <utility>
33#include <vector>
34
35namespace theplu {
36namespace utility {
37
38  ///
39  /// NNI is an abstract base class defining the interface for nearest
40  /// neighbour imputation (NNI) algorithms.
41  ///
42  /// NNI algorithms implemented here is discussed in documents
43  /// created in the WeNNI project. This document will be released for
44  /// public access, and the necessary information for retrieving that
45  /// document will be provided here.
46  ///
47  /// Short introduction to NNI is that one may want to improve
48  /// (correct) uncertain data. Here, the data to be imputed is stored in a
49  /// matrix where rows similar to each other are used to adjust
50  /// uncertain data. The data matrix is accompanied by a weight
51  /// (uncertainty) matrix defining what data is to be considered as
52  /// 'certain' and what data is uncertain. The weight matrix can be
53  /// binary with 1's indicating that the data does not need
54  /// corrections, whereas a 0 means that the data should be replaced
55  /// by an imputed value. Naturally, the weight matrix can also be
56  /// continuous where values between 0 and 1 defines how certain a
57  /// data element is.
58  ///
59  /// The imputation depends on how similarity of rows of data is
60  /// defined and on the number of closest neighbours (here; rows) to
61  /// use in the imputation can be set.
62  ///
63  /// Implementation issues
64  ///
65  /// The current implementation treats rows where all data are tagged
66  /// are completely uncertain, i.e. all weights are zero, by
67  /// ignoring these lines in nearest neighbourhood
68  /// calculations. Importantly, this type of data are not changed
69  /// (imputed) either since there is no close neighbourhood defined
70  /// for this data.
71  ///
72  /// Rows that is completely identical in an imputation algorithm
73  /// sense will give problems since the distance between will usually
74  /// become zero. This is solved by setting zero distance to a small
75  /// number. Identical rows in this context are basically a
76  /// comparison between elements with non-zero uncertainty weights
77  /// only, and all these elements are equal. Zero weight elements are
78  /// not used in the comparison since these are considered as
79  /// non/sense values.
80  ///
81  class NNI
82  {
83  public:
84
85    ///
86    /// Base constructor for the nearest neighbour imputation
87    /// algorithms.
88    ///
89    NNI(const utility::matrix& matrix,const utility::matrix& weight,
90        const u_int neighbours);
91
92    virtual ~NNI(void) {};
93
94    ///
95    /// Function doing the imputation.
96    ///
97    /// @return number of rows not imputed
98    ///
99    virtual u_int estimate(void)=0;
100
101    ///
102    /// @return A const reference to the modified data.
103    ///
104    const utility::matrix& imputed_data(void) const { return imputed_data_; }
105
106    ///
107    /// @return indices of rows in data matrix not imputed
108    ///
109    inline std::vector<size_t> not_imputed(void) const { return not_imputed_; }
110
111  protected:
112    /**
113       \f$ d_{ij}^2=\frac {\sum_{k=1}^C w_{ik} w_{jk} (x_{ik}-x_{jk})^2
114       }{\sum_{k=l}^C w_{ik} w_{jk} } \f$ where C is the number of columns
115    */
116    std::vector<std::pair<u_int,double> > calculate_distances(const u_int) const;
117    /// Contributing nearest neighbours are added up to the user set
118    /// number, and neighbours are disqualified if their element
119    /// (column) weight is zero
120    std::vector<u_int> nearest_neighbours(const u_int,
121                             const std::vector<std::pair<u_int,double> >&) const;
122    ///
123    /// original data matrix
124    ///
125    const utility::matrix& data_;
126
127    ///
128    /// data after imputation
129    ///
130    utility::matrix imputed_data_;
131
132    ///
133    /// number of neighbor to use
134    ///
135    u_int neighbours_;
136
137    ///
138    /// which rows are not imputed due to lack of data
139    ///
140    std::vector<size_t> not_imputed_;
141
142    ///
143    /// weight matrix
144    ///
145    const utility::matrix& weight_;
146  };
147
148}} // of namespace utility and namespace theplu
149
150#endif
Note: See TracBrowser for help on using the repository browser.