source: trunk/yat/utility/NNI.h @ 1437

Last change on this file since 1437 was 1437, checked in by Peter, 13 years ago

merge patch release 0.4.2 to trunk. Delta 0.4.2-0.4.1

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.6 KB
Line 
1#ifndef _theplu_yat_utility_nni_
2#define _theplu_yat_utility_nni_
3
4// $Id: NNI.h 1437 2008-08-25 17:55:00Z peter $
5
6/*
7  Copyright (C) 2004 Jari Häkkinen
8  Copyright (C) 2005, 2006, 2007, 2008 Jari Häkkinen, Peter Johansson
9
10  This file is part of the yat library, http://dev.thep.lu.se/yat
11
12  The yat library is free software; you can redistribute it and/or
13  modify it under the terms of the GNU General Public License as
14  published by the Free Software Foundation; either version 2 of the
15  License, or (at your option) any later version.
16
17  The yat library is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  General Public License for more details.
21
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25  02111-1307, USA.
26*/
27
28#include "Matrix.h"
29
30#include <iostream>
31#include <utility>
32#include <vector>
33
34namespace theplu {
35namespace yat {
36namespace utility {
37
38  ///
39  /// @brief Interface class for nearest
40  /// neighbour imputation (NNI) algorithms.
41  ///
42  /// NNI algorithms implemented here is discussed in documents
43  /// created in the WeNNI project. This document will be released for
44  /// public access, and the necessary information for retrieving that
45  /// document will be provided here.
46  ///
47  /// Short introduction to NNI is that one may want to improve
48  /// (correct) uncertain data. Here, the data to be imputed is stored in a
49  /// matrix where rows similar to each other are used to adjust
50  /// uncertain data. The data matrix is accompanied by a weight
51  /// (uncertainty) matrix defining what data is to be considered as
52  /// 'certain' and what data is uncertain. The weight matrix can be
53  /// binary with 1's indicating that the data does not need
54  /// corrections, whereas a 0 means that the data should be replaced
55  /// by an imputed value. Naturally, the weight matrix can also be
56  /// continuous where values between 0 and 1 defines how certain a
57  /// data element is.
58  ///
59  /// The imputation depends on how similarity of rows of data is
60  /// defined and on the number of closest neighbours (here; rows) to
61  /// use in the imputation can be set.
62  ///
63  /// Implementation issues
64  ///
65  /// The current implementation treats rows where all data are tagged
66  /// are completely uncertain, i.e. all weights are zero, by
67  /// ignoring these lines in nearest neighbourhood
68  /// calculations. Importantly, this type of data are not changed
69  /// (imputed) either since there is no close neighbourhood defined
70  /// for this data.
71  ///
72  /// Rows that is completely identical in an imputation algorithm
73  /// sense will give problems since the distance between will usually
74  /// become zero. This is solved by setting zero distance to a small
75  /// number. Identical rows in this context are basically a
76  /// comparison between elements with non-zero uncertainty weights
77  /// only, and all these elements are equal. Zero weight elements are
78  /// not used in the comparison since these are considered as
79  /// non/sense values.
80  ///
81  class NNI
82  {
83  public:
84
85    ///
86    /// Base constructor for the nearest neighbour imputation
87    /// algorithms.
88    ///
89    NNI(const utility::Matrix& matrix,const utility::Matrix& weight,
90        const unsigned int neighbours);
91
92    virtual ~NNI(void) {};
93
94    ///
95    /// Function doing the imputation.
96    ///
97    /// @return number of rows not imputed
98    ///
99    virtual unsigned int estimate(void)=0;
100
101    ///
102    /// @return A const reference to the modified data.
103    ///
104    const utility::Matrix& imputed_data(void) const;
105
106    ///
107    /// @return indices of rows in data matrix not imputed
108    ///
109    const std::vector<size_t>& not_imputed(void) const;
110
111  protected:
112    /**
113       \f$ d_{ij}^2=\frac {\sum_{k=1}^C w_{ik} w_{jk} (x_{ik}-x_{jk})^2
114       }{\sum_{k=l}^C w_{ik} w_{jk} } \f$ where C is the number of columns
115    */
116    std::vector<std::pair<size_t,double> > 
117    calculate_distances(const size_t) const;
118    /// Contributing nearest neighbours are added up to the user set
119    /// number, and neighbours are disqualified if their element
120    /// (column) weight is zero
121    std::vector<size_t> 
122    nearest_neighbours(const size_t,
123                       const std::vector<std::pair<size_t,double> >&) const;
124    ///
125    /// original data matrix
126    ///
127    const utility::Matrix& data_;
128
129    ///
130    /// data after imputation
131    ///
132    utility::Matrix imputed_data_;
133
134    ///
135    /// number of neighbor to use
136    ///
137    unsigned int neighbours_;
138
139    ///
140    /// which rows are not imputed due to lack of data
141    ///
142    std::vector<size_t> not_imputed_;
143
144    ///
145    /// weight matrix
146    ///
147    const utility::Matrix& weight_;
148  };
149
150}}} // of namespace utility, yat, and theplu
151
152#endif
Note: See TracBrowser for help on using the repository browser.