source: trunk/yat/classifier/IGP.h

Last change on this file was 3562, checked in by Peter, 5 years ago

update copyright years

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date ID
File size: 4.2 KB
Line 
1#ifndef _theplu_yat_classifier_igp_
2#define _theplu_yat_classifier_igp_
3
4// $Id$
5
6/*
7  Copyright (C) 2006 Jari Häkkinen, Markus Ringnér
8  Copyright (C) 2007, 2008 Jari Häkkinen, Peter Johansson, Markus Ringnér
9  Copyright (C) 2009, 2010, 2014, 2017 Peter Johansson
10
11  This file is part of the yat library, http://dev.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 3 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with yat. If not, see <http://www.gnu.org/licenses/>.
25*/
26
27#include "MatrixLookup.h"
28#include "Target.h"
29#include "yat/utility/concept_check.h"
30#include "yat/utility/Matrix.h"
31#include "yat/utility/Vector.h"
32#include "yat/utility/yat_assert.h"
33
34#include <boost/concept_check.hpp>
35
36#include <cmath>
37#include <limits>
38#include <stdexcept>
39
40namespace theplu {
41namespace yat {
42namespace classifier {
43
44  class Target;
45  class MatrixLookup;
46
47  /**
48     \brief Class for In Group Proportions (IGP)
49
50     IGP is defined to be the proportion of samples in a group whose
51     nearest neighbours are also in the same group.
52
53     \see <a HREF="
54     http://biostatistics.oxfordjournals.org/cgi/content/abstract/kxj029v1">
55     Kapp and Tibshirani, Biostatistics (2006)</a>.
56
57     Distance should model concept \ref concept_distance (but support
58     for weighted iterators is not required).
59
60     \note Distance must be symmetric, i.e., Distance(x,y) == Distance(y,x).
61  */
62  template <typename Distance>
63  class IGP
64  {
65
66  public:
67    ///
68    /// Constructor taking the training data and the target vector and
69    /// as input.
70    ///
71    IGP(const MatrixLookup&, const Target&);
72
73
74    ///
75    /// Constructor taking the training data, the target vector and
76    /// the distance measure as input.
77    ///
78    IGP(const MatrixLookup&, const Target&, const Distance&);
79
80    ///
81    /// Destrucutor
82    ///
83    virtual ~IGP();
84
85    ///
86    /// @return the IGP score for each class as elements in a vector.
87    ///
88    const utility::Vector& score(void) const;
89
90
91  private:
92    void calculate();
93
94    utility::Vector igp_;
95    Distance distance_;
96
97    const MatrixLookup& matrix_;
98    const Target& target_;
99  };
100
101
102  // templates
103
104  template <typename Distance>
105  IGP<Distance>::IGP(const MatrixLookup& data, const Target& target)
106    : matrix_(data), target_(target)
107  {
108    BOOST_CONCEPT_ASSERT((utility::DistanceConcept<Distance>));
109    calculate();
110  }
111
112  template <typename Distance>
113  IGP<Distance>::IGP(const MatrixLookup& data, const Target& target,
114                     const Distance& dist)
115    : matrix_(data), target_(target), distance_(dist)
116  {
117    BOOST_CONCEPT_ASSERT((utility::DistanceConcept<Distance>));
118    calculate();
119  }
120
121
122  template <typename Distance>
123  IGP<Distance>::~IGP()
124  {
125  }
126
127
128  template <typename Distance>
129  void IGP<Distance>::calculate()
130  {
131    YAT_ASSERT(target_.size()==matrix_.columns());
132
133    // Calculate IGP for each class
134    igp_.resize(target_.nof_classes(), 0.0);
135
136    // calculate distances
137    utility::Matrix dist(matrix_.columns(), matrix_.columns());
138    for (size_t i=0; i<dist.rows(); ++i)
139      for (size_t j=i+1; j<dist.rows(); ++j) {
140        dist(i,j) = dist(j,i) = distance_(matrix_.begin_column(i),
141                                          matrix_.end_column(i),
142                                          matrix_.begin_column(j));
143      }
144
145    // find nearest neigbour for each sample
146    for(size_t i=0; i<target_.size(); i++) {
147      size_t neighbor=i;
148      double mindist=std::numeric_limits<double>::max();
149      for(size_t j=0; j<target_.size(); j++) {
150        if (i==j) // avoid self-self comparison
151          continue;
152        if(dist(i,j)<mindist) {
153          mindist=dist(i,j);
154          neighbor=j;
155        }
156      }
157      if(target_(i)==target_(neighbor))
158        igp_(target_(i))++;
159
160    }
161    for(size_t i=0; i<target_.nof_classes(); i++) {
162      igp_(i)/=static_cast<double>(target_.size(i));
163    }
164  }
165
166
167  template <typename Distance>
168  const utility::Vector& IGP<Distance>::score(void) const
169  {
170    return igp_;
171  }
172
173}}} // of namespace classifier, yat, and theplu
174
175#endif
Note: See TracBrowser for help on using the repository browser.