Changeset 824 for trunk/test


Ignore:
Timestamp:
Mar 19, 2007, 1:12:04 PM (16 years ago)
Author:
Peter
Message:

Moved Ensemble stuff in subset_egnerator_test to ensemble_test. Also merged crossvalidation and subsetgenerator to one test, and added boostrap_test into this test. Refs #175

Location:
trunk/test
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/test/Makefile.am

    r822 r824  
    2727
    2828TESTS = alignment_test averager_test                  \
    29   consensus_inputranker_test crossvalidation_test data_lookup_1d_test \
     29  consensus_inputranker_test data_lookup_1d_test  \
    3030  distance_test \
    3131  ensemble_test feature_selection_test fileutil_test inputranker_test \
     
    4949averager_test_SOURCES = averager_test.cc
    5050consensus_inputranker_test_SOURCES = consensus_inputranker_test.cc
    51 crossvalidation_test_SOURCES = crossvalidation_test.cc
    5251data_lookup_1d_test_SOURCES = data_lookup_1d_test.cc
    5352distance_test_SOURCES = distance_test.cc
  • trunk/test/crossvalidation_test.cc

    r781 r824  
    2323
    2424#include "yat/classifier/CrossValidationSampler.h"
    25 #include "yat/classifier/SubsetGenerator.h"
    2625#include "yat/classifier/MatrixLookup.h"
    2726#include "yat/classifier/Target.h"
     
    5453  *error << "testing crosssplitter" << std::endl;
    5554  bool ok = true;
    56 
    57   std::vector<std::string> label(10,"default");
    58   label[2]=label[7]="white";
    59   label[4]=label[5]="black";
    60   label[6]=label[3]="green";
    61   label[8]=label[9]="red";
    62                  
    63   classifier::Target target(label);
    64   utility::matrix raw_data(10,10);
    65   classifier::MatrixLookup data(raw_data);
    66   classifier::CrossValidationSampler cv(target,3,3);
    67  
    68   std::vector<size_t> sample_count(10,0);
    69   for (size_t j=0; j<cv.size(); ++j){
    70     std::vector<size_t> class_count(5,0);
    71     assert(j<cv.size());
    72     if (cv.training_index(j).size()+cv.validation_index(j).size()!=
    73         target.size()){
    74       ok = false;
    75       *error << "ERROR: size of training samples plus "
    76              << "size of validation samples is invalid." << std::endl;
    77     }
    78     if (cv.validation_index(j).size()!=3 && cv.validation_index(j).size()!=4){
    79       ok = false;
    80       *error << "ERROR: size of validation samples is invalid."
    81              << "expected size to be 3 or 4" << std::endl;
    82     }
    83     for (size_t i=0; i<cv.validation_index(j).size(); i++) {
    84       assert(cv.validation_index(j)[i]<sample_count.size());
    85       sample_count[cv.validation_index(j)[i]]++;
    86     }
    87     for (size_t i=0; i<cv.training_index(j).size(); i++) {
    88       class_count[target(cv.training_index(j)[i])]++;
    89     }
    90     class_count_test(class_count,error,ok);
    91   }
    92   sample_count_test(sample_count,error,ok);
    93  
    94   //
    95   // Test two nested CrossSplitters
    96   //
    97 
    98   *error << "\ntesting two nested crossplitters" << std::endl;
    99   label.resize(9);
    100   label[0]=label[1]=label[2]="0";
    101   label[3]=label[4]=label[5]="1";
    102   label[6]=label[7]=label[8]="2";
    103                  
    104   target=classifier::Target(label);
    105   utility::matrix raw_data2(2,9);
    106   for(size_t i=0;i<raw_data2.rows();i++)
    107     for(size_t j=0;j<raw_data2.columns();j++)
    108       raw_data2(i,j)=i*10+10+j+1;
    109    
    110   classifier::MatrixLookup data2(raw_data2);
    111   classifier::CrossValidationSampler cv2(target,3,3);
    112   classifier::SubsetGenerator cv_test(cv2,data2);
    113 
    114   std::vector<size_t> test_sample_count(9,0);
    115   std::vector<size_t> test_class_count(3,0);
    116   std::vector<double> test_value1(4,0);
    117   std::vector<double> test_value2(4,0);
    118   std::vector<double> t_value(4,0);
    119   std::vector<double> v_value(4,0);
    120   for(u_long k=0;k<cv_test.size();k++) {
    121    
    122     const classifier::DataLookup2D& tv_view=cv_test.training_data(k);
    123     const classifier::Target& tv_target=cv_test.training_target(k);
    124     const std::vector<size_t>& tv_index=cv_test.training_index(k);
    125     const classifier::DataLookup2D& test_view=cv_test.validation_data(k);
    126     const classifier::Target& test_target=cv_test.validation_target(k);
    127     const std::vector<size_t>& test_index=cv_test.validation_index(k);
    128 
    129     for (size_t i=0; i<test_index.size(); i++) {
    130       assert(test_index[i]<sample_count.size());
    131       test_sample_count[test_index[i]]++;
    132       test_class_count[target(test_index[i])]++;
    133       test_value1[0]+=test_view(0,i);
    134       test_value2[0]+=test_view(1,i);
    135       test_value1[test_target(i)+1]+=test_view(0,i);
    136       test_value2[test_target(i)+1]+=test_view(1,i);
    137       if(test_target(i)!=target(test_index[i])) {
    138         ok=false;
    139         *error << "ERROR: incorrect mapping of test indices" << std:: endl;
    140       }       
    141     }
    142    
    143     classifier::CrossValidationSampler sampler_training(tv_target,2,2);
    144     classifier::SubsetGenerator cv_training(sampler_training,tv_view);
    145     std::vector<size_t> v_sample_count(6,0);
    146     std::vector<size_t> t_sample_count(6,0);
    147     std::vector<size_t> v_class_count(3,0);
    148     std::vector<size_t> t_class_count(3,0);
    149     std::vector<size_t> t_class_count2(3,0);
    150     for(u_long l=0;l<cv_training.size();l++) {
    151       const classifier::DataLookup2D& t_view=cv_training.training_data(l);
    152       const classifier::Target& t_target=cv_training.training_target(l);
    153       const std::vector<size_t>& t_index=cv_training.training_index(l);
    154       const classifier::DataLookup2D& v_view=cv_training.validation_data(l);
    155       const classifier::Target& v_target=cv_training.validation_target(l);
    156       const std::vector<size_t>& v_index=cv_training.validation_index(l);
    157      
    158       if (test_index.size()+tv_index.size()!=target.size()
    159           || t_index.size()+v_index.size() != tv_target.size()
    160           || test_index.size()+v_index.size()+t_index.size() !=  target.size()){
    161         ok = false;
    162         *error << "ERROR: size of training samples, validation samples "
    163                << "and test samples in is invalid."
    164                << std::endl;
    165       }
    166       if (test_index.size()!=3 || tv_index.size()!=6 || t_index.size()!=3 ||
    167           v_index.size()!=3){
    168         ok = false;
    169         *error << "ERROR: size of training, validation, and test samples"
    170                << " is invalid."
    171                << " Expected sizes to be 3" << std::endl;
    172       }     
    173 
    174       std::vector<size_t> tv_sample_count(6,0);
    175       for (size_t i=0; i<t_index.size(); i++) {
    176         assert(t_index[i]<t_sample_count.size());
    177         tv_sample_count[t_index[i]]++;
    178         t_sample_count[t_index[i]]++;
    179         t_class_count[t_target(i)]++;
    180         t_class_count2[tv_target(t_index[i])]++;
    181         t_value[0]+=t_view(0,i);
    182         t_value[t_target(i)+1]+=t_view(0,i);       
    183       }
    184       for (size_t i=0; i<v_index.size(); i++) {
    185         assert(v_index[i]<v_sample_count.size());
    186         tv_sample_count[v_index[i]]++;
    187         v_sample_count[v_index[i]]++;
    188         v_class_count[v_target(i)]++;
    189         v_value[0]+=v_view(0,i);
    190         v_value[v_target(i)+1]+=v_view(0,i);
    191       }
    192  
    193       sample_count_test(tv_sample_count,error,ok);     
    194 
    195     }
    196     sample_count_test(v_sample_count,error,ok);
    197     sample_count_test(t_sample_count,error,ok);
    198    
    199     class_count_test(t_class_count,error,ok);
    200     class_count_test(t_class_count2,error,ok);
    201     class_count_test(v_class_count,error,ok);
    202 
    203 
    204   }
    205   sample_count_test(test_sample_count,error,ok);
    206   class_count_test(test_class_count,error,ok);
    207  
    208   if(test_value1[0]!=135 || test_value1[1]!=36 || test_value1[2]!=45 ||
    209      test_value1[3]!=54) {
    210     ok=false;
    211     *error << "ERROR: incorrect sums of test values in row 1"
    212            << " found: " << test_value1[0] << ", "  << test_value1[1]
    213            << ", "  << test_value1[2] << " and "  << test_value1[3]
    214            << std::endl;
    215   }
    216 
    217  
    218   if(test_value2[0]!=225 || test_value2[1]!=66 || test_value2[2]!=75 ||
    219      test_value2[3]!=84) {
    220     ok=false;
    221     *error << "ERROR: incorrect sums of test values in row 2"
    222            << " found: " << test_value2[0] << ", "  << test_value2[1]
    223            << ", "  << test_value2[2] << " and "  << test_value2[3]
    224            << std::endl;
    225   }
    226 
    227   if(t_value[0]!=270 || t_value[1]!=72 || t_value[2]!=90 || t_value[3]!=108)  {
    228     ok=false;
    229     *error << "ERROR: incorrect sums of training values in row 1"
    230            << " found: " << t_value[0] << ", "  << t_value[1]
    231            << ", "  << t_value[2] << " and "  << t_value[3]
    232            << std::endl;   
    233   }
    234 
    235   if(v_value[0]!=270 || v_value[1]!=72 || v_value[2]!=90 || v_value[3]!=108)  {
    236     ok=false;
    237     *error << "ERROR: incorrect sums of validation values in row 1"
    238            << " found: " << v_value[0] << ", "  << v_value[1]
    239            << ", "  << v_value[2] << " and "  << v_value[3]
    240            << std::endl;   
    241   }
    242 
    243 
    24455
    24556  if (error!=&std::cerr)
  • trunk/test/ensemble_test.cc

    r722 r824  
    3535#include "yat/classifier/PolynomialKernelFunction.h"
    3636#include "yat/classifier/SVM.h"
     37#include "yat/statistics/AUC.h"
    3738
    3839#include <cassert>
     
    8889  ensemble.build();
    8990 
     91  utility::vector out(target.size(),0);
     92  for (size_t i = 0; i<out.size(); ++i)
     93    out(i)=ensemble.validate()[0][i].mean();
     94  statistics::AUC roc;
     95  *error << roc.score(target,out) << std::endl;
     96
    9097  delete kf;
    9198
  • trunk/test/subset_generator_test.cc

    r820 r824  
    2222*/
    2323
     24#include "yat/classifier/BootstrapSampler.h"
    2425#include "yat/classifier/CrossValidationSampler.h"
    25 #include "yat/classifier/EnsembleBuilder.h"
    2626#include "yat/classifier/FeatureSelectorIR.h"
    2727#include "yat/classifier/Kernel_SEV.h"
     
    4343using namespace theplu::yat;
    4444
     45bool class_count_test(const std::vector<size_t>&, std::ostream*);
     46bool sample_count_test(const std::vector<size_t>&, std::ostream*);
     47bool test_nested(std::ostream* error);
     48bool test_cv(std::ostream*);
     49bool test_creation(std::ostream* error);
     50bool test_bootstrap(std::ostream* error);
     51
     52
    4553int main(const int argc,const char* argv[])
    4654
     
    5159    error = new std::ofstream("/dev/null");
    5260    if (argc>1)
    53       std::cout << "feature_selection -v : for printing extra information\n";
    54   }
    55   *error << "testing ferature_selection" << std::endl;
     61      std::cout << "subset_generator -v : for printing extra information\n";
     62  }
     63  *error << "testing subset_generator" << std::endl;
    5664  bool ok = true;
    5765
    58 
     66  ok = ok && test_creation(error);
     67  ok = ok && test_nested(error);
     68  ok = ok && test_cv(error);
     69
     70  if (ok)
     71    return 0;
     72  return -1;
     73}
     74
     75
     76bool test_creation(std::ostream* error)
     77{
     78  bool ok=true;
    5979  std::ifstream is("data/nm_target_bin.txt");
    6080  *error << "loading target " << std::endl;
     
    84104  classifier::SubsetGenerator subset_data(sampler, data, fs);
    85105  classifier::SubsetGenerator subset_kernel(sampler, kernel, fs);
    86 
    87   classifier::SVM svm(kernel,target);
    88   statistics::PearsonDistance distance;
    89   classifier::NCC ncc(data,target,distance);
    90   *error << "building Ensemble" << std::endl;
    91   //  classifier::EnsembleBuilder ensemble_ncc(ncc,subset_data);
    92   //ensemble_ncc.build();
    93   classifier::EnsembleBuilder ensemble_svm(svm,sampler);
    94   ensemble_svm.build();
    95  
    96   utility::vector out(target.size(),0);
    97   for (size_t i = 0; i<out.size(); ++i)
    98     out(i)=ensemble_svm.validate()[0][i].mean();
    99   statistics::AUC roc;
    100   *error << roc.score(target,out) << std::endl;
    101 
    102   if (ok)
    103     return 0;
    104   return -1;
    105 }
     106  return ok;
     107}
     108
     109bool test_nested(std::ostream* error)
     110{
     111  bool ok=true;
     112  //
     113  // Test two nested CrossSplitters
     114  //
     115
     116  *error << "\ntesting two nested crossplitters" << std::endl;
     117  std::vector<std::string> label(9);
     118  label[0]=label[1]=label[2]="0";
     119  label[3]=label[4]=label[5]="1";
     120  label[6]=label[7]=label[8]="2";
     121                 
     122  classifier::Target target(label);
     123  utility::matrix raw_data2(2,9);
     124  for(size_t i=0;i<raw_data2.rows();i++)
     125    for(size_t j=0;j<raw_data2.columns();j++)
     126      raw_data2(i,j)=i*10+10+j+1;
     127   
     128  classifier::MatrixLookup data2(raw_data2);
     129  classifier::CrossValidationSampler cv2(target,3,3);
     130  classifier::SubsetGenerator cv_test(cv2,data2);
     131
     132  std::vector<size_t> sample_count(10,0);
     133  std::vector<size_t> test_sample_count(9,0);
     134  std::vector<size_t> test_class_count(3,0);
     135  std::vector<double> test_value1(4,0);
     136  std::vector<double> test_value2(4,0);
     137  std::vector<double> t_value(4,0);
     138  std::vector<double> v_value(4,0);
     139  for(u_long k=0;k<cv_test.size();k++) {
     140   
     141    const classifier::DataLookup2D& tv_view=cv_test.training_data(k);
     142    const classifier::Target& tv_target=cv_test.training_target(k);
     143    const std::vector<size_t>& tv_index=cv_test.training_index(k);
     144    const classifier::DataLookup2D& test_view=cv_test.validation_data(k);
     145    const classifier::Target& test_target=cv_test.validation_target(k);
     146    const std::vector<size_t>& test_index=cv_test.validation_index(k);
     147
     148    for (size_t i=0; i<test_index.size(); i++) {
     149      assert(test_index[i]<sample_count.size());
     150      test_sample_count[test_index[i]]++;
     151      test_class_count[target(test_index[i])]++;
     152      test_value1[0]+=test_view(0,i);
     153      test_value2[0]+=test_view(1,i);
     154      test_value1[test_target(i)+1]+=test_view(0,i);
     155      test_value2[test_target(i)+1]+=test_view(1,i);
     156      if(test_target(i)!=target(test_index[i])) {
     157        ok=false;
     158        *error << "ERROR: incorrect mapping of test indices" << std:: endl;
     159      }       
     160    }
     161   
     162    classifier::CrossValidationSampler sampler_training(tv_target,2,2);
     163    classifier::SubsetGenerator cv_training(sampler_training,tv_view);
     164    std::vector<size_t> v_sample_count(6,0);
     165    std::vector<size_t> t_sample_count(6,0);
     166    std::vector<size_t> v_class_count(3,0);
     167    std::vector<size_t> t_class_count(3,0);
     168    std::vector<size_t> t_class_count2(3,0);
     169    for(u_long l=0;l<cv_training.size();l++) {
     170      const classifier::DataLookup2D& t_view=cv_training.training_data(l);
     171      const classifier::Target& t_target=cv_training.training_target(l);
     172      const std::vector<size_t>& t_index=cv_training.training_index(l);
     173      const classifier::DataLookup2D& v_view=cv_training.validation_data(l);
     174      const classifier::Target& v_target=cv_training.validation_target(l);
     175      const std::vector<size_t>& v_index=cv_training.validation_index(l);
     176     
     177      if (test_index.size()+tv_index.size()!=target.size()
     178          || t_index.size()+v_index.size() != tv_target.size()
     179          || test_index.size()+v_index.size()+t_index.size() !=  target.size()){
     180        ok = false;
     181        *error << "ERROR: size of training samples, validation samples "
     182               << "and test samples in is invalid."
     183               << std::endl;
     184      }
     185      if (test_index.size()!=3 || tv_index.size()!=6 || t_index.size()!=3 ||
     186          v_index.size()!=3){
     187        ok = false;
     188        *error << "ERROR: size of training, validation, and test samples"
     189               << " is invalid."
     190               << " Expected sizes to be 3" << std::endl;
     191      }     
     192
     193      std::vector<size_t> tv_sample_count(6,0);
     194      for (size_t i=0; i<t_index.size(); i++) {
     195        assert(t_index[i]<t_sample_count.size());
     196        tv_sample_count[t_index[i]]++;
     197        t_sample_count[t_index[i]]++;
     198        t_class_count[t_target(i)]++;
     199        t_class_count2[tv_target(t_index[i])]++;
     200        t_value[0]+=t_view(0,i);
     201        t_value[t_target(i)+1]+=t_view(0,i);       
     202      }
     203      for (size_t i=0; i<v_index.size(); i++) {
     204        assert(v_index[i]<v_sample_count.size());
     205        tv_sample_count[v_index[i]]++;
     206        v_sample_count[v_index[i]]++;
     207        v_class_count[v_target(i)]++;
     208        v_value[0]+=v_view(0,i);
     209        v_value[v_target(i)+1]+=v_view(0,i);
     210      }
     211 
     212      ok = ok && sample_count_test(tv_sample_count,error);     
     213
     214    }
     215    ok = ok && sample_count_test(v_sample_count,error);
     216    ok = ok && sample_count_test(t_sample_count,error);
     217   
     218    ok = ok && class_count_test(t_class_count,error);
     219    ok = ok && class_count_test(t_class_count2,error);
     220    ok = ok && class_count_test(v_class_count,error);
     221
     222
     223  }
     224  ok = ok && sample_count_test(test_sample_count,error);
     225  ok = ok && class_count_test(test_class_count,error);
     226 
     227  if(test_value1[0]!=135 || test_value1[1]!=36 || test_value1[2]!=45 ||
     228     test_value1[3]!=54) {
     229    ok=false;
     230    *error << "ERROR: incorrect sums of test values in row 1"
     231           << " found: " << test_value1[0] << ", "  << test_value1[1]
     232           << ", "  << test_value1[2] << " and "  << test_value1[3]
     233           << std::endl;
     234  }
     235
     236 
     237  if(test_value2[0]!=225 || test_value2[1]!=66 || test_value2[2]!=75 ||
     238     test_value2[3]!=84) {
     239    ok=false;
     240    *error << "ERROR: incorrect sums of test values in row 2"
     241           << " found: " << test_value2[0] << ", "  << test_value2[1]
     242           << ", "  << test_value2[2] << " and "  << test_value2[3]
     243           << std::endl;
     244  }
     245
     246  if(t_value[0]!=270 || t_value[1]!=72 || t_value[2]!=90 || t_value[3]!=108)  {
     247    ok=false;
     248    *error << "ERROR: incorrect sums of training values in row 1"
     249           << " found: " << t_value[0] << ", "  << t_value[1]
     250           << ", "  << t_value[2] << " and "  << t_value[3]
     251           << std::endl;   
     252  }
     253
     254  if(v_value[0]!=270 || v_value[1]!=72 || v_value[2]!=90 || v_value[3]!=108)  {
     255    ok=false;
     256    *error << "ERROR: incorrect sums of validation values in row 1"
     257           << " found: " << v_value[0] << ", "  << v_value[1]
     258           << ", "  << v_value[2] << " and "  << v_value[3]
     259           << std::endl;   
     260  }
     261  return ok;
     262}
     263
     264bool class_count_test(const std::vector<size_t>& class_count,
     265                      std::ostream* error) 
     266{
     267  bool ok=true;
     268  for (size_t i=0; i<class_count.size(); i++)
     269    if (class_count[i]==0){
     270      ok = false;
     271      *error << "ERROR: class " << i << " was not in set."
     272             << " Expected at least one sample from each class."
     273             << std::endl;
     274    }
     275  return ok;
     276}
     277
     278bool sample_count_test(const std::vector<size_t>& sample_count,
     279                       std::ostream* error) 
     280{
     281  bool ok=true;
     282  for (size_t i=0; i<sample_count.size(); i++){
     283    if (sample_count[i]!=1){
     284      ok = false;
     285      *error << "ERROR: sample " << i << " was in a group " << sample_count[i]
     286             << " times." << " Expected to be 1 time" << std::endl;
     287    }
     288  }
     289  return ok;
     290}
     291
     292
     293bool test_bootstrap(std::ostream* error)
     294{
     295  bool ok=true;
     296  std::vector<std::string> label(10,"default");
     297  label[2]=label[7]="white";
     298  label[4]=label[5]="black";
     299  label[6]=label[3]="green";
     300  label[8]=label[9]="red";
     301                 
     302  classifier::Target target(label);
     303  utility::matrix raw_data(10,10);
     304  classifier::MatrixLookup data(raw_data);
     305  classifier::BootstrapSampler cv(target,3);
     306  return ok;
     307}
     308
     309
     310bool test_cv(std::ostream* error)
     311{
     312  bool ok=true;
     313  std::vector<std::string> label(10,"default");
     314  label[2]=label[7]="white";
     315  label[4]=label[5]="black";
     316  label[6]=label[3]="green";
     317  label[8]=label[9]="red";
     318                 
     319  classifier::Target target(label);
     320  utility::matrix raw_data(10,10);
     321  classifier::MatrixLookup data(raw_data);
     322  classifier::CrossValidationSampler cv(target,3,3);
     323 
     324  std::vector<size_t> sample_count(10,0);
     325  for (size_t j=0; j<cv.size(); ++j){
     326    std::vector<size_t> class_count(5,0);
     327    assert(j<cv.size());
     328    if (cv.training_index(j).size()+cv.validation_index(j).size()!=
     329        target.size()){
     330      ok = false;
     331      *error << "ERROR: size of training samples plus "
     332             << "size of validation samples is invalid." << std::endl;
     333    }
     334    if (cv.validation_index(j).size()!=3 && cv.validation_index(j).size()!=4){
     335      ok = false;
     336      *error << "ERROR: size of validation samples is invalid."
     337             << "expected size to be 3 or 4" << std::endl;
     338    }
     339    for (size_t i=0; i<cv.validation_index(j).size(); i++) {
     340      assert(cv.validation_index(j)[i]<sample_count.size());
     341      sample_count[cv.validation_index(j)[i]]++;
     342    }
     343    for (size_t i=0; i<cv.training_index(j).size(); i++) {
     344      class_count[target(cv.training_index(j)[i])]++;
     345    }
     346    ok = ok && class_count_test(class_count,error);
     347  }
     348  ok = ok && sample_count_test(sample_count,error);
     349 
     350  return ok;
     351}
Note: See TracChangeset for help on using the changeset viewer.