source: trunk/src/kNNI.cc @ 228

Last change on this file since 228 was 228, checked in by Peter, 18 years ago

moved estimation from constructor, added function telling which rows were not imputed (due too many missing values).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 1.8 KB
Line 
1// $Id: kNNI.cc 228 2005-02-01 14:06:51Z peter $
2
3#include "kNNI.h"
4
5#include <algorithm>
6#include <cmath>
7#include <fstream>
8#include <vector>
9
10#include "stl_utility.h"
11
12namespace theplu {
13namespace cpptools {
14
15  kNNI::kNNI(const gslapi::matrix& matrix,const gslapi::matrix& flag,
16             const u_int neighbours)
17    : NNI(matrix,flag,neighbours)
18  {
19    for (unsigned int i=0; i<weight_.rows(); i++)
20      for (unsigned int j=0; j<weight_.columns(); j++)
21        if (!weight_(i,j)) {
22          mv_rows_.push_back(i);
23          break;
24        }
25    //estimate();
26  }
27
28
29
30  // \hat{x_{ij}}=\frac{ \sum_{k=1,N} \frac{x_{kj}}{d_{ki}} }
31  //                   { \sum_{k=1,N} \frac{1     }{d_{ki}} },
32  // where N is defined in the paper cited in the NNI class definition
33  // documentation.
34  u_int kNNI::estimate(void)
35  {
36    using namespace std;
37    for (unsigned int i=0; i<mv_rows_.size(); i++) {
38      // Jari, avoid copying in next line
39      vector<pair<u_int,double> > distance=calculate_distances(mv_rows_[i]);
40      sort(distance.begin(),distance.end(),
41                pair_value_compare<u_int,double>());
42      for (unsigned int j=0; j<data_.columns(); j++)
43        if (!weight_(mv_rows_[i],j)) {
44          vector<u_int> knn=nearest_neighbours(j,distance);
45          double new_value=0.0;
46          double norm=0.0;
47          for (vector<u_int>::const_iterator k=knn.begin(); k!=knn.end(); k++){
48            // Jari, a small number needed here, use something standardized.
49            // Avoid division with zero (perfect match vectors)
50            double d=(distance[*k].second ? distance[*k].second : 1e-10);
51            new_value+=data_(distance[*k].first,j)/d;
52            norm+=1.0/d;
53          }
54          // No impute if no contributions from neighbours.
55          if (norm)
56            imputed_data_(mv_rows_[i],j)=new_value/norm;
57          else
58            not_imputed_.push_back(i);
59        }
60    }
61    return not_imputed_.size();
62  }
63
64
65}} // of namespace cpptools and namespace theplu
Note: See TracBrowser for help on using the repository browser.