source: trunk/yat/utility/NNI.h @ 1121

Last change on this file since 1121 was 1121, checked in by Peter, 14 years ago

fixes #308

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.6 KB
Line 
1#ifndef _theplu_yat_utility_nni_
2#define _theplu_yat_utility_nni_
3
4// $Id: NNI.h 1121 2008-02-22 15:29:56Z peter $
5
6/*
7  Copyright (C) 2004 Jari Häkkinen
8  Copyright (C) 2005, 2006 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2007 Peter Johansson
10
11  This file is part of the yat library, http://trac.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 2 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with this program; if not, write to the Free Software
25  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26  02111-1307, USA.
27*/
28
29#include "Matrix.h"
30
31#include <iostream>
32#include <utility>
33#include <vector>
34
35namespace theplu {
36namespace yat {
37namespace utility {
38
39  ///
40  /// @brief Interface class for nearest
41  /// neighbour imputation (NNI) algorithms.
42  ///
43  /// NNI algorithms implemented here is discussed in documents
44  /// created in the WeNNI project. This document will be released for
45  /// public access, and the necessary information for retrieving that
46  /// document will be provided here.
47  ///
48  /// Short introduction to NNI is that one may want to improve
49  /// (correct) uncertain data. Here, the data to be imputed is stored in a
50  /// matrix where rows similar to each other are used to adjust
51  /// uncertain data. The data matrix is accompanied by a weight
52  /// (uncertainty) matrix defining what data is to be considered as
53  /// 'certain' and what data is uncertain. The weight matrix can be
54  /// binary with 1's indicating that the data does not need
55  /// corrections, whereas a 0 means that the data should be replaced
56  /// by an imputed value. Naturally, the weight matrix can also be
57  /// continuous where values between 0 and 1 defines how certain a
58  /// data element is.
59  ///
60  /// The imputation depends on how similarity of rows of data is
61  /// defined and on the number of closest neighbours (here; rows) to
62  /// use in the imputation can be set.
63  ///
64  /// Implementation issues
65  ///
66  /// The current implementation treats rows where all data are tagged
67  /// are completely uncertain, i.e. all weights are zero, by
68  /// ignoring these lines in nearest neighbourhood
69  /// calculations. Importantly, this type of data are not changed
70  /// (imputed) either since there is no close neighbourhood defined
71  /// for this data.
72  ///
73  /// Rows that is completely identical in an imputation algorithm
74  /// sense will give problems since the distance between will usually
75  /// become zero. This is solved by setting zero distance to a small
76  /// number. Identical rows in this context are basically a
77  /// comparison between elements with non-zero uncertainty weights
78  /// only, and all these elements are equal. Zero weight elements are
79  /// not used in the comparison since these are considered as
80  /// non/sense values.
81  ///
82  class NNI
83  {
84  public:
85
86    ///
87    /// Base constructor for the nearest neighbour imputation
88    /// algorithms.
89    ///
90    NNI(const utility::Matrix& matrix,const utility::Matrix& weight,
91        const u_int neighbours);
92
93    virtual ~NNI(void) {};
94
95    ///
96    /// Function doing the imputation.
97    ///
98    /// @return number of rows not imputed
99    ///
100    virtual u_int estimate(void)=0;
101
102    ///
103    /// @return A const reference to the modified data.
104    ///
105    const utility::Matrix& imputed_data(void) const;
106
107    ///
108    /// @return indices of rows in data matrix not imputed
109    ///
110    const std::vector<size_t>& not_imputed(void) const;
111
112  protected:
113    /**
114       \f$ d_{ij}^2=\frac {\sum_{k=1}^C w_{ik} w_{jk} (x_{ik}-x_{jk})^2
115       }{\sum_{k=l}^C w_{ik} w_{jk} } \f$ where C is the number of columns
116    */
117    std::vector<std::pair<u_int,double> > calculate_distances(const u_int) const;
118    /// Contributing nearest neighbours are added up to the user set
119    /// number, and neighbours are disqualified if their element
120    /// (column) weight is zero
121    std::vector<u_int> nearest_neighbours(const u_int,
122                             const std::vector<std::pair<u_int,double> >&) const;
123    ///
124    /// original data matrix
125    ///
126    const utility::Matrix& data_;
127
128    ///
129    /// data after imputation
130    ///
131    utility::Matrix imputed_data_;
132
133    ///
134    /// number of neighbor to use
135    ///
136    u_int neighbours_;
137
138    ///
139    /// which rows are not imputed due to lack of data
140    ///
141    std::vector<size_t> not_imputed_;
142
143    ///
144    /// weight matrix
145    ///
146    const utility::Matrix& weight_;
147  };
148
149}}} // of namespace utility, yat, and theplu
150
151#endif
Note: See TracBrowser for help on using the repository browser.