source: trunk/yat/utility/NNI.h @ 1260

Last change on this file since 1260 was 1260, checked in by Jari Häkkinen, 13 years ago

Made the project to compile on my Intel Mac running Leopard.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.7 KB
Line 
1#ifndef _theplu_yat_utility_nni_
2#define _theplu_yat_utility_nni_
3
4// $Id: NNI.h 1260 2008-04-08 06:16:53Z jari $
5
6/*
7  Copyright (C) 2004 Jari Häkkinen
8  Copyright (C) 2005, 2006 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2007 Peter Johansson
10  Copyright (C) 2008 Jari Häkkinen, Peter Johansson
11
12  This file is part of the yat library, http://trac.thep.lu.se/yat
13
14  The yat library is free software; you can redistribute it and/or
15  modify it under the terms of the GNU General Public License as
16  published by the Free Software Foundation; either version 2 of the
17  License, or (at your option) any later version.
18
19  The yat library is distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  General Public License for more details.
23
24  You should have received a copy of the GNU General Public License
25  along with this program; if not, write to the Free Software
26  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
27  02111-1307, USA.
28*/
29
30#include "Matrix.h"
31
32#include <iostream>
33#include <utility>
34#include <vector>
35
36#include <sys/types.h>
37
38namespace theplu {
39namespace yat {
40namespace utility {
41
42  ///
43  /// @brief Interface class for nearest
44  /// neighbour imputation (NNI) algorithms.
45  ///
46  /// NNI algorithms implemented here is discussed in documents
47  /// created in the WeNNI project. This document will be released for
48  /// public access, and the necessary information for retrieving that
49  /// document will be provided here.
50  ///
51  /// Short introduction to NNI is that one may want to improve
52  /// (correct) uncertain data. Here, the data to be imputed is stored in a
53  /// matrix where rows similar to each other are used to adjust
54  /// uncertain data. The data matrix is accompanied by a weight
55  /// (uncertainty) matrix defining what data is to be considered as
56  /// 'certain' and what data is uncertain. The weight matrix can be
57  /// binary with 1's indicating that the data does not need
58  /// corrections, whereas a 0 means that the data should be replaced
59  /// by an imputed value. Naturally, the weight matrix can also be
60  /// continuous where values between 0 and 1 defines how certain a
61  /// data element is.
62  ///
63  /// The imputation depends on how similarity of rows of data is
64  /// defined and on the number of closest neighbours (here; rows) to
65  /// use in the imputation can be set.
66  ///
67  /// Implementation issues
68  ///
69  /// The current implementation treats rows where all data are tagged
70  /// are completely uncertain, i.e. all weights are zero, by
71  /// ignoring these lines in nearest neighbourhood
72  /// calculations. Importantly, this type of data are not changed
73  /// (imputed) either since there is no close neighbourhood defined
74  /// for this data.
75  ///
76  /// Rows that is completely identical in an imputation algorithm
77  /// sense will give problems since the distance between will usually
78  /// become zero. This is solved by setting zero distance to a small
79  /// number. Identical rows in this context are basically a
80  /// comparison between elements with non-zero uncertainty weights
81  /// only, and all these elements are equal. Zero weight elements are
82  /// not used in the comparison since these are considered as
83  /// non/sense values.
84  ///
85  class NNI
86  {
87  public:
88
89    ///
90    /// Base constructor for the nearest neighbour imputation
91    /// algorithms.
92    ///
93    NNI(const utility::Matrix& matrix,const utility::Matrix& weight,
94        const u_int neighbours);
95
96    virtual ~NNI(void) {};
97
98    ///
99    /// Function doing the imputation.
100    ///
101    /// @return number of rows not imputed
102    ///
103    virtual u_int estimate(void)=0;
104
105    ///
106    /// @return A const reference to the modified data.
107    ///
108    const utility::Matrix& imputed_data(void) const;
109
110    ///
111    /// @return indices of rows in data matrix not imputed
112    ///
113    const std::vector<size_t>& not_imputed(void) const;
114
115  protected:
116    /**
117       \f$ d_{ij}^2=\frac {\sum_{k=1}^C w_{ik} w_{jk} (x_{ik}-x_{jk})^2
118       }{\sum_{k=l}^C w_{ik} w_{jk} } \f$ where C is the number of columns
119    */
120    std::vector<std::pair<u_int,double> > calculate_distances(const u_int) const;
121    /// Contributing nearest neighbours are added up to the user set
122    /// number, and neighbours are disqualified if their element
123    /// (column) weight is zero
124    std::vector<u_int> nearest_neighbours(const u_int,
125                             const std::vector<std::pair<u_int,double> >&) const;
126    ///
127    /// original data matrix
128    ///
129    const utility::Matrix& data_;
130
131    ///
132    /// data after imputation
133    ///
134    utility::Matrix imputed_data_;
135
136    ///
137    /// number of neighbor to use
138    ///
139    u_int neighbours_;
140
141    ///
142    /// which rows are not imputed due to lack of data
143    ///
144    std::vector<size_t> not_imputed_;
145
146    ///
147    /// weight matrix
148    ///
149    const utility::Matrix& weight_;
150  };
151
152}}} // of namespace utility, yat, and theplu
153
154#endif
Note: See TracBrowser for help on using the repository browser.