source: trunk/yat/classifier/IGP.h @ 3320

Last change on this file since 3320 was 3320, checked in by Peter, 9 years ago

speedup. assume Distance is symmetric. closes #814

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 4.1 KB
Line 
1#ifndef _theplu_yat_classifier_igp_
2#define _theplu_yat_classifier_igp_
3
4// $Id$
5
6/*
7  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér
8  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2009, 2010, 2014 Peter Johansson
10
11  This file is part of the yat library, http://dev.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 3 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with yat. If not, see <http://www.gnu.org/licenses/>.
25*/
26
27#include "MatrixLookup.h"
28#include "Target.h"
29#include "yat/utility/concept_check.h"
30#include "yat/utility/Matrix.h"
31#include "yat/utility/Vector.h"
32#include "yat/utility/yat_assert.h"
33
34#include <boost/concept_check.hpp>
35
36#include <cmath>
37#include <limits>
38#include <stdexcept>
39
40namespace theplu {
41namespace yat {
42namespace classifier {
43
44  class Target;
45  class MatrixLookup;
46
47  /**
48     \brief Class for In Group Proportions (IGP)
49
50     IGP is defined to be the proportion of samples in a group whose
51     nearest neighbours are also in the same group.
52
53     \see <a HREF="
54     http://biostatistics.oxfordjournals.org/cgi/content/abstract/kxj029v1">
55     Kapp and Tibshirani, Biostatistics (2006)</a>.
56
57     \note Distance must be symmetric, i.e., Distance(x,y) == Distance(y,x).
58  */
59  template <typename Distance>
60  class IGP
61  {
62
63  public:
64    ///
65    /// Constructor taking the training data and the target vector and
66    /// as input.
67    ///
68    IGP(const MatrixLookup&, const Target&);
69
70
71    ///
72    /// Constructor taking the training data, the target vector and
73    /// the distance measure as input.
74    ///
75    IGP(const MatrixLookup&, const Target&, const Distance&);
76
77    ///
78    /// Destrucutor
79    ///
80    virtual ~IGP();
81
82    ///
83    /// @return the IGP score for each class as elements in a vector.
84    ///
85    const utility::Vector& score(void) const;
86
87
88  private:
89    void calculate();
90
91    utility::Vector igp_;
92    Distance distance_;
93
94    const MatrixLookup& matrix_;
95    const Target& target_;
96  };
97
98
99  // templates
100
101  template <typename Distance>
102  IGP<Distance>::IGP(const MatrixLookup& data, const Target& target)
103    : matrix_(data), target_(target)
104  {
105    BOOST_CONCEPT_ASSERT((utility::DistanceConcept<Distance>));
106    calculate();
107  }
108
109  template <typename Distance>
110  IGP<Distance>::IGP(const MatrixLookup& data, const Target& target,
111                     const Distance& dist)
112    : matrix_(data), target_(target), distance_(dist)
113  {
114    BOOST_CONCEPT_ASSERT((utility::DistanceConcept<Distance>));
115    calculate();
116  }
117
118
119  template <typename Distance>
120  IGP<Distance>::~IGP()
121  {
122  }
123
124
125  template <typename Distance>
126  void IGP<Distance>::calculate()
127  {
128    YAT_ASSERT(target_.size()==matrix_.columns());
129
130    // Calculate IGP for each class
131    igp_.resize(target_.nof_classes(), 0.0);
132
133    // calculate distances
134    utility::Matrix dist(matrix_.columns(), matrix_.columns());
135    for (size_t i=0; i<dist.rows(); ++i)
136      for (size_t j=i+1; j<dist.rows(); ++j) {
137        dist(i,j) = dist(j,i) = distance_(matrix_.begin_column(i),
138                                          matrix_.end_column(i),
139                                          matrix_.begin_column(j));
140      }
141
142    // find nearest neigbour for each sample
143    for(size_t i=0; i<target_.size(); i++) {
144      size_t neighbor=i;
145      double mindist=std::numeric_limits<double>::max();
146      for(size_t j=0; j<target_.size(); j++) {
147        if (i==j) // avoid self-self comparison
148          continue;
149        if(dist(i,j)<mindist) {
150          mindist=dist(i,j);
151          neighbor=j;
152        }
153      }
154      if(target_(i)==target_(neighbor))
155        igp_(target_(i))++;
156
157    }
158    for(size_t i=0; i<target_.nof_classes(); i++) {
159      igp_(i)/=static_cast<double>(target_.size(i));
160    }
161  }
162
163
164  template <typename Distance>
165  const utility::Vector& IGP<Distance>::score(void) const
166  {
167    return igp_;
168  }
169
170}}} // of namespace classifier, yat, and theplu
171
172#endif
Note: See TracBrowser for help on using the repository browser.