source: trunk/yat/utility/NNI.h @ 1566

Last change on this file since 1566 was 1487, checked in by Jari Häkkinen, 13 years ago

Addresses #436. GPL license copy reference should also be updated.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.6 KB
Line 
1#ifndef _theplu_yat_utility_nni_
2#define _theplu_yat_utility_nni_
3
4// $Id: NNI.h 1487 2008-09-10 08:41:36Z jari $
5
6/*
7  Copyright (C) 2004 Jari Häkkinen
8  Copyright (C) 2005, 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 3 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with yat. If not, see <http://www.gnu.org/licenses/>.
24*/
25
26#include "Matrix.h"
27
28#include <iostream>
29#include <utility>
30#include <vector>
31
32namespace theplu {
33namespace yat {
34namespace utility {
35
36  ///
37  /// @brief Interface class for nearest
38  /// neighbour imputation (NNI) algorithms.
39  ///
40  /// NNI algorithms implemented here is discussed in documents
41  /// created in the WeNNI project. This document will be released for
42  /// public access, and the necessary information for retrieving that
43  /// document will be provided here.
44  ///
45  /// Short introduction to NNI is that one may want to improve
46  /// (correct) uncertain data. Here, the data to be imputed is stored in a
47  /// matrix where rows similar to each other are used to adjust
48  /// uncertain data. The data matrix is accompanied by a weight
49  /// (uncertainty) matrix defining what data is to be considered as
50  /// 'certain' and what data is uncertain. The weight matrix can be
51  /// binary with 1's indicating that the data does not need
52  /// corrections, whereas a 0 means that the data should be replaced
53  /// by an imputed value. Naturally, the weight matrix can also be
54  /// continuous where values between 0 and 1 defines how certain a
55  /// data element is.
56  ///
57  /// The imputation depends on how similarity of rows of data is
58  /// defined and on the number of closest neighbours (here; rows) to
59  /// use in the imputation can be set.
60  ///
61  /// Implementation issues
62  ///
63  /// The current implementation treats rows where all data are tagged
64  /// are completely uncertain, i.e. all weights are zero, by
65  /// ignoring these lines in nearest neighbourhood
66  /// calculations. Importantly, this type of data are not changed
67  /// (imputed) either since there is no close neighbourhood defined
68  /// for this data.
69  ///
70  /// Rows that is completely identical in an imputation algorithm
71  /// sense will give problems since the distance between will usually
72  /// become zero. This is solved by setting zero distance to a small
73  /// number. Identical rows in this context are basically a
74  /// comparison between elements with non-zero uncertainty weights
75  /// only, and all these elements are equal. Zero weight elements are
76  /// not used in the comparison since these are considered as
77  /// non/sense values.
78  ///
79  class NNI
80  {
81  public:
82
83    ///
84    /// Base constructor for the nearest neighbour imputation
85    /// algorithms.
86    ///
87    NNI(const utility::Matrix& matrix,const utility::Matrix& weight,
88        const unsigned int neighbours);
89
90    virtual ~NNI(void) {};
91
92    ///
93    /// Function doing the imputation.
94    ///
95    /// @return number of rows not imputed
96    ///
97    virtual unsigned int estimate(void)=0;
98
99    ///
100    /// @return A const reference to the modified data.
101    ///
102    const utility::Matrix& imputed_data(void) const;
103
104    ///
105    /// @return indices of rows in data matrix not imputed
106    ///
107    const std::vector<size_t>& not_imputed(void) const;
108
109  protected:
110    /**
111       \f$ d_{ij}^2=\frac {\sum_{k=1}^C w_{ik} w_{jk} (x_{ik}-x_{jk})^2
112       }{\sum_{k=l}^C w_{ik} w_{jk} } \f$ where C is the number of columns
113    */
114    std::vector<std::pair<size_t,double> > 
115    calculate_distances(const size_t) const;
116    /// Contributing nearest neighbours are added up to the user set
117    /// number, and neighbours are disqualified if their element
118    /// (column) weight is zero
119    std::vector<size_t> 
120    nearest_neighbours(const size_t,
121                       const std::vector<std::pair<size_t,double> >&) const;
122    ///
123    /// original data matrix
124    ///
125    const utility::Matrix& data_;
126
127    ///
128    /// data after imputation
129    ///
130    utility::Matrix imputed_data_;
131
132    ///
133    /// number of neighbor to use
134    ///
135    unsigned int neighbours_;
136
137    ///
138    /// which rows are not imputed due to lack of data
139    ///
140    std::vector<size_t> not_imputed_;
141
142    ///
143    /// weight matrix
144    ///
145    const utility::Matrix& weight_;
146  };
147
148}}} // of namespace utility, yat, and theplu
149
150#endif
Note: See TracBrowser for help on using the repository browser.