Changeset 1112


Ignore:
Timestamp:
Feb 21, 2008, 3:59:30 PM (16 years ago)
Author:
Markus Ringnér
Message:

Mostly related to #295 and #182

Location:
trunk
Files:
6 added
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/doc/concepts.doxygen

    r1094 r1112  
    55// This file is part of the yat library, http://trac.thep.lu.se/yat
    66//
    7 // The yat library is free software; you can redistribute it and/or
     7// The yat library is free software; you can redistribute it and./or
    88// modify it under the terms of the GNU General Public License as
    99// published by the Free Software Foundation; either version 2 of the
     
    2222
    2323/**
    24  \page Concepts Concepts
     24\page Concepts Concepts
    2525
    26  This page lists all the C++ concepts in the yat project.
     26This page lists all the C++ concepts in the yat project.
    2727
    28  - \subpage concept_distance
     28- \subpage concept_distance
     29- \subpage concept_neighbor_weighting
    2930*/
    3031
    3132
    3233/**
    33  \page concept_distance Distance
    34  \section Description
    35  Distance is a concept ...
    36  \section Requirements
    37  Classes implementing the concept Distance should fulfill ...
    38  Examples include theplu::yat::statistics::PearsonDistance and theplu::yat::statistics::EuclideanDistance.
     34\page concept_distance Distance
     35
     36\section Description
     37
     38\ref concept_distance is a concept ..
     39
     40\section Requirements
     41
     42Classes modelling the concept \ref concept_distance should implement ... 
     43
     44Examples of classes modelling the concept \ref concept_distance
     45include theplu::yat::statistics::PearsonDistance and
     46theplu::yat::statistics::EuclideanDistance.
     47
    3948*/
     49
     50/**
     51\page concept_neighbor_weighting Neighbor Weighting Method
     52
     53\section Description
     54
     55\ref concept_neighbor_weighting is a concept used in connection with
     56theplu::yat::classifier::KNN - classes used as the template argument
     57NeighborWeighting should implement this concept.
     58
     59\section Requirements
     60
     61Classes modelling the concept \ref concept_neighbor_weighting should
     62implement the following function:
     63 
     64\verbatim   
     65void operator()(const utility::VectorBase& distance, const std::vector<size_t> k_sorted,
     66                const Target& target, utility::VectorMutable& prediction) const
     67\endverbatim
     68
     69For a test sample, this function should calculate a total vote
     70(i.e. based on all k nearest neighbors) for each class. The vector \a
     71distance contains the distances from a test sample to all training
     72samples. The vector \a k_sorted contains the indices (for both \a
     73distance and \a target) to the k training samples with the smallest
     74distances to the test sample. The class for each training sample is
     75given by \a target, which is sorted in the same sample order as \a
     76distance. For each class the function calculates a total vote based on
     77the the nearest neighbors of the test sample that belong to the
     78class. The total vote for each class is stored in the vector \a prediction.
     79
     80Examples of classes modelling the concept \ref
     81concept_neighbor_weighting include
     82theplu::yat::classifier::KNN_Uniform,
     83theplu::yat::classifier::KNN_ReciprocalDistance and
     84theplu::yat::classifier::KNN_ReciprocalRank.
     85
     86*/
  • trunk/test/knn_test.cc

    r1107 r1112  
    2323
    2424#include "yat/classifier/KNN.h"
     25#include "yat/classifier/KNN_ReciprocalDistance.h"
     26#include "yat/classifier/KNN_ReciprocalRank.h"
    2527#include "yat/classifier/MatrixLookup.h"
    2628#include "yat/classifier/MatrixLookupWeighted.h"
     
    8991  double slack_bound=2e-7;
    9092  utility::matrix result1(2,4);
    91   result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=2.0/3.0;
    92   result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=1.0/3.0;
     93  result1(0,0)=result1(0,1)=result1(1,2)=result1(1,3)=2.0;
     94  result1(0,2)=result1(0,3)=result1(1,0)=result1(1,1)=1.0;
    9395  double slack = deviation(prediction1,result1);
    9496  if (slack > slack_bound || std::isnan(slack)){
     
    108110  classifier::MatrixLookupWeighted mlw1(data1,weights1);
    109111  knn1.predict(mlw1,prediction1);
    110   result1(0,0)=1.0/3.0;
    111   result1(1,0)=2.0/3.0;
     112  result1(0,0)=1.0;
     113  result1(1,0)=2.0;
    112114  slack = deviation(prediction1,result1);
    113115  if (slack > slack_bound || std::isnan(slack)){
     
    119121
    120122  ////////////////////////////////////////////////////////////////
    121   // A test of training and test both weighted
     123  // A test of training and test both weighted 
    122124  ////////////////////////////////////////////////////////////////
    123125  *error << "test of predictions using weighted training and test data\n";
     
    130132  knn2.train();
    131133  knn2.predict(mlw1,prediction1);
    132   result1(0,1)=1.0/3.0;
    133   result1(1,1)=2.0/3.0;
     134  result1(0,1)=1.0;
     135  result1(1,1)=2.0;
    134136  slack = deviation(prediction1,result1);
    135137  if (slack > slack_bound || std::isnan(slack)){
     
    140142  }
    141143
     144
     145  ////////////////////////////////////////////////////////////////
     146  // A test of reciprocal ranks weighting with training and test both weighted
     147  ////////////////////////////////////////////////////////////////
     148  utility::matrix data2(data1);
     149  data2(1,3)=7;
     150  classifier::MatrixLookupWeighted mlw3(data2,weights2);
     151  classifier::KNN<statistics::EuclideanDistance,classifier::KNN_ReciprocalRank>
     152    knn3(mlw2,target1);
     153  knn3.k(3);
     154  knn3.train();
     155  knn3.predict(mlw3,prediction1);
     156  result1(0,0)=result1(1,3)=1.0;
     157  result1(0,3)=result1(1,0)=5.0/6.0;
     158  result1(0,2)=result1(1,1)=1.0/2.0;
     159  result1(0,1)=result1(1,2)=4.0/3.0;
     160  slack = deviation(prediction1,result1);
     161  if (slack > slack_bound || std::isnan(slack)){
     162    *error << "Difference to expected prediction too large\n";
     163    *error << "slack: " << slack << std::endl;
     164    *error << "expected less than " << slack_bound << std::endl;
     165    ok = false;
     166  }
     167
     168
     169  ////////////////////////////////////////////////////////////////
     170  // A test of reciprocal distance weighting with training and test both weighted
     171  ////////////////////////////////////////////////////////////////
     172  classifier::KNN<statistics::EuclideanDistance,classifier::KNN_ReciprocalDistance>
     173    knn4(mlw2,target1);
     174  knn4.k(3);
     175  knn4.train();
     176  knn4.predict(mlw3,prediction1);
     177  slack = deviation(prediction1,result1);
     178  if (!std::isinf(prediction1(0,0)) && !std::isinf(prediction1(0,1)) &&
     179      !std::isinf(prediction1(1,2)) &&
     180      fabs(prediction1(1,3)-(1.0/3.67423461417))>slack_bound &&
     181      fabs(prediction1(1,0)-(1.0/2.82842712475+1.0/2.44948974278))>slack_bound){
     182    *error << "Difference to expected prediction too large\n";
     183    ok = false;
     184  }
    142185
    143186  if(!ok) {
  • trunk/yat/classifier/KNN.h

    r1107 r1112  
    2727#include "DataLookup1D.h"
    2828#include "DataLookupWeighted1D.h"
     29#include "KNN_Uniform.h"
    2930#include "MatrixLookup.h"
    3031#include "MatrixLookupWeighted.h"
     
    4344
    4445  ///
    45   /// @brief Class for Nearest Centroid Classification.
    46   ///
    47  
    48  
    49   template <typename Distance>
     46  /// @brief Class for Nearest Neigbor Classification.
     47  ///
     48  /// The template argument Distance should be a class implementing
     49  /// the concept \ref concept_distance.
     50  /// The template argument NeigborWeighting should be a class implementing
     51  /// the concept \ref concept_neighbor_weighting.
     52
     53  template <typename Distance, typename NeighborWeighting=KNN_Uniform>
    5054  class KNN : public SupervisedClassifier
    5155  {
     
    7478
    7579    ///
    76     /// Default number of neighbours (k) is set to 3.
    77     ///
    78     /// @return the number of neighbours
     80    /// Default number of neighbors (k) is set to 3.
     81    ///
     82    /// @return the number of neighbors
    7983    ///
    8084    u_int k() const;
    8185
    8286    ///
    83     /// @brief sets the number of neighbours, k.
     87    /// @brief sets the number of neighbors, k.
    8488    ///
    8589    void k(u_int);
     
    99103   
    100104    ///
    101     /// For each sample, calculate the number of neighbours for each
     105    /// For each sample, calculate the number of neighbors for each
    102106    /// class.
    103107    ///
     
    112116    const DataLookup2D& data_;
    113117
    114     // The number of neighbours
     118    // The number of neighbors
    115119    u_int k_;
    116120
    117121    Distance distance_;
     122
     123    NeighborWeighting weighting_;
     124
    118125    ///
    119126    /// Calculates the distances between a data set and the training
     
    123130    ///
    124131    utility::matrix* calculate_distances(const DataLookup2D&) const;
     132
    125133    void calculate_unweighted(const MatrixLookup&,
    126134                              const MatrixLookup&,
     
    134142  // templates
    135143 
    136   template <typename Distance>
    137   KNN<Distance>::KNN(const MatrixLookup& data, const Target& target)
     144  template <typename Distance, typename NeighborWeighting>
     145  KNN<Distance, NeighborWeighting>::KNN(const MatrixLookup& data, const Target& target)
    138146    : SupervisedClassifier(target), data_(data),k_(3)
    139147  {
     
    141149
    142150
    143   template <typename Distance>
    144   KNN<Distance>::KNN(const MatrixLookupWeighted& data, const Target& target)
     151  template <typename Distance, typename NeighborWeighting>
     152  KNN<Distance, NeighborWeighting>::KNN
     153  (const MatrixLookupWeighted& data, const Target& target)
    145154    : SupervisedClassifier(target), data_(data),k_(3)
    146155  {
    147156  }
    148157 
    149   template <typename Distance>
    150   KNN<Distance>::~KNN()   
    151   {
    152   }
    153  
    154   template <typename Distance>
    155   utility::matrix* KNN<Distance>::calculate_distances(const DataLookup2D& test) const
     158  template <typename Distance, typename NeighborWeighting>
     159  KNN<Distance, NeighborWeighting>::~KNN()   
     160  {
     161  }
     162 
     163  template <typename Distance, typename NeighborWeighting>
     164  utility::matrix* KNN<Distance, NeighborWeighting>::calculate_distances
     165  (const DataLookup2D& test) const
    156166  {
    157167    // matrix with training samples as rows and test samples as columns
     
    197207  }
    198208
    199   template <typename Distance>
    200   void  KNN<Distance>:: calculate_unweighted(const MatrixLookup& training,
    201                                             const MatrixLookup& test,
    202                                             utility::matrix* distances) const
     209  template <typename Distance, typename NeighborWeighting>
     210  void  KNN<Distance, NeighborWeighting>::calculate_unweighted
     211  (const MatrixLookup& training, const MatrixLookup& test,
     212  utility::matrix* distances) const
    203213  {
    204214    for(size_t i=0; i<training.columns(); i++) {
     
    212222  }
    213223 
    214   template <typename Distance>
    215   void  KNN<Distance>:: calculate_weighted(const MatrixLookupWeighted& training,
    216                                            const MatrixLookupWeighted& test,
    217                                            utility::matrix* distances) const
     224  template <typename Distance, typename NeighborWeighting>
     225  void 
     226  KNN<Distance, NeighborWeighting>::calculate_weighted
     227  (const MatrixLookupWeighted& training, const MatrixLookupWeighted& test,
     228   utility::matrix* distances) const
    218229  {
    219230    for(size_t i=0; i<training.columns(); i++) {
     
    221232      for(size_t j=0; j<test.columns(); j++) {
    222233        classifier::DataLookupWeighted1D test1(test,j,false);
    223         (*distances)(i,j) = distance_(training1.begin(), training1.end(), test1.begin());
     234        (*distances)(i,j) = distance_(training1.begin(), training1.end(),
     235                                      test1.begin());
    224236        utility::yat_assert<std::runtime_error>(!std::isnan((*distances)(i,j)));
    225237      }
     
    228240
    229241 
    230   template <typename Distance>
    231   const DataLookup2D& KNN<Distance>::data(void) const
     242  template <typename Distance, typename NeighborWeighting>
     243  const DataLookup2D& KNN<Distance, NeighborWeighting>::data(void) const
    232244  {
    233245    return data_;
     
    235247 
    236248 
    237   template <typename Distance>
    238   u_int KNN<Distance>::k() const
     249  template <typename Distance, typename NeighborWeighting>
     250  u_int KNN<Distance, NeighborWeighting>::k() const
    239251  {
    240252    return k_;
    241253  }
    242254
    243   template <typename Distance>
    244   void KNN<Distance>::k(u_int k)
     255  template <typename Distance, typename NeighborWeighting>
     256  void KNN<Distance, NeighborWeighting>::k(u_int k)
    245257  {
    246258    k_=k;
     
    248260
    249261
    250   template <typename Distance>
     262  template <typename Distance, typename NeighborWeighting>
    251263  SupervisedClassifier*
    252   KNN<Distance>::make_classifier(const DataLookup2D& data, const Target& target) const
     264  KNN<Distance, NeighborWeighting>::make_classifier(const DataLookup2D& data,
     265                                                    const Target& target) const
    253266  {     
    254267    KNN* knn=0;
    255268    try {
    256269      if(data.weighted()) {
    257         knn=new KNN<Distance>(dynamic_cast<const MatrixLookupWeighted&>(data),
    258                               target);
     270        knn=new KNN<Distance, NeighborWeighting>
     271          (dynamic_cast<const MatrixLookupWeighted&>(data),target);
    259272      } 
    260273      else {
    261         knn=new KNN<Distance>(dynamic_cast<const MatrixLookup&>(data),
    262                               target);
     274        knn=new KNN<Distance, NeighborWeighting>
     275          (dynamic_cast<const MatrixLookup&>(data),target);
    263276      }
    264277      knn->k(this->k());
    265278    }
    266279    catch (std::bad_cast) {
    267       std::string str = "Error in KNN<Distance>::make_classifier: DataLookup2D of unexpected class.";
     280      std::string str = "Error in KNN<Distance, NeighborWeighting>";
     281      str += "::make_classifier: DataLookup2D of unexpected class.";
    268282      throw std::runtime_error(str);
    269283    }
     
    272286 
    273287 
    274   template <typename Distance>
    275   void KNN<Distance>::train()
     288  template <typename Distance, typename NeighborWeighting>
     289  void KNN<Distance, NeighborWeighting>::train()
    276290  {   
    277291    trained_=true;
     
    279293
    280294
    281   template <typename Distance>
    282   void KNN<Distance>::predict(const DataLookup2D& test,                     
    283                               utility::matrix& prediction) const
     295  template <typename Distance, typename NeighborWeighting>
     296  void KNN<Distance, NeighborWeighting>::predict(const DataLookup2D& test,
     297                                                 utility::matrix& prediction) const
    284298  {   
    285299    utility::yat_assert<std::runtime_error>(data_.rows()==test.rows());
     
    287301    utility::matrix* distances=calculate_distances(test);
    288302   
    289     // for each test sample (column in distances) find the closest
    290     // training samples
    291303    prediction.resize(target_.nof_classes(),test.columns(),0.0);
    292304    for(size_t sample=0;sample<distances->columns();sample++) {
    293305      std::vector<size_t> k_index;
    294       utility::sort_smallest_index(k_index,k_,
    295                                    distances->column_const_view(sample));
    296       for(size_t j=0;j<k_index.size();j++) {
    297         prediction(target_(k_index[j]),sample)++;
    298       }
    299     }
    300     prediction*=(1.0/k_);
     306      utility::VectorConstView dist=distances->column_const_view(sample);
     307      utility::sort_smallest_index(k_index,k_,dist);
     308      utility::VectorView pred=prediction.column_view(sample);
     309      weighting_(dist,k_index,target_,pred);
     310    }
    301311    delete distances;
    302312  }
  • trunk/yat/classifier/Makefile.am

    r1079 r1112  
    4242  Kernel_SEV.cc \
    4343  KernelLookup.cc \
     44  KNN_Uniform.cc \
     45  KNN_ReciprocalDistance.cc \
     46  KNN_ReciprocalRank.cc \
    4447  MatrixLookup.cc \
    4548  MatrixLookupWeighted.cc \
     
    7780  Kernel_SEV.h \
    7881  KNN.h \
     82  KNN_Uniform.h \
    7983  MatrixLookup.h \
    8084  MatrixLookupWeighted.h \
  • trunk/yat/classifier/NCC.h

    r1098 r1112  
    5656  /// @brief Class for Nearest Centroid Classification.
    5757  ///
    58 
     58  /// The template argument Distance should be a class implementing
     59  /// the concept \ref concept_distance.
     60  ///
    5961  template <typename Distance>
    6062  class NCC : public SupervisedClassifier
Note: See TracChangeset for help on using the changeset viewer.