source: trunk/yat/utility/NNI.h @ 1271

Last change on this file since 1271 was 1271, checked in by Peter, 13 years ago

replaced u_int with unsigned int or size_t

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.7 KB
Line 
1#ifndef _theplu_yat_utility_nni_
2#define _theplu_yat_utility_nni_
3
4// $Id: NNI.h 1271 2008-04-09 16:11:07Z peter $
5
6/*
7  Copyright (C) 2004 Jari Häkkinen
8  Copyright (C) 2005, 2006 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2007 Peter Johansson
10  Copyright (C) 2008 Jari Häkkinen, Peter Johansson
11
12  This file is part of the yat library, http://trac.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 2 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with this program; if not, write to the Free Software
26  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
27  02111-1307, USA.
28*/
29
30#include "Matrix.h"
31
32#include <iostream>
33#include <utility>
34#include <vector>
35
36namespace theplu {
37namespace yat {
38namespace utility {
39
40  ///
41  /// @brief Interface class for nearest
42  /// neighbour imputation (NNI) algorithms.
43  ///
44  /// NNI algorithms implemented here is discussed in documents
45  /// created in the WeNNI project. This document will be released for
46  /// public access, and the necessary information for retrieving that
47  /// document will be provided here.
48  ///
49  /// Short introduction to NNI is that one may want to improve
50  /// (correct) uncertain data. Here, the data to be imputed is stored in a
51  /// matrix where rows similar to each other are used to adjust
52  /// uncertain data. The data matrix is accompanied by a weight
53  /// (uncertainty) matrix defining what data is to be considered as
54  /// 'certain' and what data is uncertain. The weight matrix can be
55  /// binary with 1's indicating that the data does not need
56  /// corrections, whereas a 0 means that the data should be replaced
57  /// by an imputed value. Naturally, the weight matrix can also be
58  /// continuous where values between 0 and 1 defines how certain a
59  /// data element is.
60  ///
61  /// The imputation depends on how similarity of rows of data is
62  /// defined and on the number of closest neighbours (here; rows) to
63  /// use in the imputation can be set.
64  ///
65  /// Implementation issues
66  ///
67  /// The current implementation treats rows where all data are tagged
68  /// are completely uncertain, i.e. all weights are zero, by
69  /// ignoring these lines in nearest neighbourhood
70  /// calculations. Importantly, this type of data are not changed
71  /// (imputed) either since there is no close neighbourhood defined
72  /// for this data.
73  ///
74  /// Rows that is completely identical in an imputation algorithm
75  /// sense will give problems since the distance between will usually
76  /// become zero. This is solved by setting zero distance to a small
77  /// number. Identical rows in this context are basically a
78  /// comparison between elements with non-zero uncertainty weights
79  /// only, and all these elements are equal. Zero weight elements are
80  /// not used in the comparison since these are considered as
81  /// non/sense values.
82  ///
83  class NNI
84  {
85  public:
86
87    ///
88    /// Base constructor for the nearest neighbour imputation
89    /// algorithms.
90    ///
91    NNI(const utility::Matrix& matrix,const utility::Matrix& weight,
92        const unsigned int neighbours);
93
94    virtual ~NNI(void) {};
95
96    ///
97    /// Function doing the imputation.
98    ///
99    /// @return number of rows not imputed
100    ///
101    virtual unsigned int estimate(void)=0;
102
103    ///
104    /// @return A const reference to the modified data.
105    ///
106    const utility::Matrix& imputed_data(void) const;
107
108    ///
109    /// @return indices of rows in data matrix not imputed
110    ///
111    const std::vector<size_t>& not_imputed(void) const;
112
113  protected:
114    /**
115       \f$ d_{ij}^2=\frac {\sum_{k=1}^C w_{ik} w_{jk} (x_{ik}-x_{jk})^2
116       }{\sum_{k=l}^C w_{ik} w_{jk} } \f$ where C is the number of columns
117    */
118    std::vector<std::pair<size_t,double> > 
119    calculate_distances(const size_t) const;
120    /// Contributing nearest neighbours are added up to the user set
121    /// number, and neighbours are disqualified if their element
122    /// (column) weight is zero
123    std::vector<size_t> 
124    nearest_neighbours(const size_t,
125                       const std::vector<std::pair<size_t,double> >&) const;
126    ///
127    /// original data matrix
128    ///
129    const utility::Matrix& data_;
130
131    ///
132    /// data after imputation
133    ///
134    utility::Matrix imputed_data_;
135
136    ///
137    /// number of neighbor to use
138    ///
139    unsigned int neighbours_;
140
141    ///
142    /// which rows are not imputed due to lack of data
143    ///
144    std::vector<size_t> not_imputed_;
145
146    ///
147    /// weight matrix
148    ///
149    const utility::Matrix& weight_;
150  };
151
152}}} // of namespace utility, yat, and theplu
153
154#endif
Note: See TracBrowser for help on using the repository browser.