Changeset 514


Ignore:
Timestamp:
Feb 20, 2006, 10:45:34 AM (16 years ago)
Author:
Peter
Message:

generalised binary functionality in Target

Location:
trunk
Files:
15 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/classifier/CrossSplitter.cc

    r509 r514  
    4444     
    4545      for (size_t part=0; part<k && i<N; i++, part++) {
    46        
    4746        std::vector<size_t> training_index;
    4847        std::vector<size_t> validation_index;
    49 
    5048        for (size_t j=0; j<v.size(); j++) {
    5149          if (j%k==part)
  • trunk/lib/classifier/CrossSplitter.h

    r509 r514  
    2121  /// to the proportions in the whole dataset. In the first \a k
    2222  /// rounds each sample is returned k-1 times, for next round the
    23   /// samples are shuffled and...
     23  /// samples are shuffled and... In total there are N partitions, in
     24  /// other words, each sample is in validation roughly N/k
    2425  ///   
    2526
     
    2930  public:
    3031    ///
    31     /// Constructor taking \a target and \a k for k-fold cross
    32     /// validation
     32    /// @brief Constructor
     33    /// 
     34    /// @parameter Target targets
     35    /// @parameter data data to split up in validation and training.
     36    /// @parameter N total number of partitions
     37    /// @parameter k for k-fold crossvalidation
    3338    ///
    34 
    35     CrossSplitter(const Target& target, const DataLookup2D&,
    36                    const size_t N, const size_t k);
     39    CrossSplitter(const Target& target, const DataLookup2D& data,
     40                  const size_t N, const size_t k);
    3741
    3842    ///
     
    122126   
    123127    std::vector<const DataLookup2D*> training_data_;
     128    std::vector<std::vector<size_t> > training_index_;
    124129    std::vector<Target> training_target_;
    125     std::vector<std::vector<size_t> > training_index_;
    126130
    127131    std::vector<const DataLookup2D*> validation_data_;
     132    std::vector<std::vector<size_t> > validation_index_;
    128133    std::vector<Target> validation_target_;
    129     std::vector<std::vector<size_t> > validation_index_;     
    130134  };
    131135
  • trunk/lib/classifier/SVM.cc

    r509 r514  
    250250  bool SVM::choose(const theplu::gslapi::vector& E)
    251251  {
    252     //std::cout << "e choose\n"  ;
    253252    // First check for violation among SVs
    254253    // E should be the same for all SVs
     
    257256    sample_.update_first(0);
    258257    if (sample_.nof_sv()>1){
    259       //std::cout << "there is SVs\n";
     258
    260259      double max = E(sample_(0));
    261260      double min = max;
     
    280279     
    281280      // If no violation check among non-support vectors
    282       //std::cout << "no violation SVs\n";
    283281
    284282      sample_.shuffle();
    285       //std::cout << "randomized\n";
    286283     
    287284      for (size_t i=sample_.nof_sv(); i<sample_.n();i++){
    288285        //std::cout << "nr: " << i << std::endl;
    289         if (target_.one(sample_(i))){
     286        if (target_.binary(sample_(i))){
    290287          if(E(sample_(i)) < E(sample_.value_first()) - 2*tolerance_){
    291288            sample_.update_second(i);
     
    305302    else{
    306303      for (size_t i=0; i<sample_.n(); i++) {
    307         if (target_.one(sample_(i))){
     304        if (target_.binary(sample_(i))){
    308305          for (size_t j=0; j<sample_.n(); j++) {
    309             if ( !target_.one(sample_(j)) &&
     306            if ( !target_.binary(sample_(j)) &&
    310307                 E(sample_(i)) < E(sample_(j))+2*tolerance_ ){
    311308              sample_.update_first(i);
  • trunk/lib/classifier/SVM.h

    r509 r514  
    189189
    190190    ///
    191     /// @todo Function calculating bias
     191    /// @brief calculates the bias term
    192192    ///
    193193    /// @return true if successful
     
    214214   
    215215    /// @return 1 if i belong to class one and -1 if i belong to rest
    216     inline int target(size_t i) const { return target_.one(i) ? 1 : -1; }
     216    inline int target(size_t i) const { return target_.binary(i) ? 1 : -1; }
    217217
    218218    gslapi::vector alpha_;
     
    233233  /// @todo The output operator for the SVM class.
    234234  ///
    235   std::ostream& operator<< (std::ostream& s, const SVM&);
     235  //std::ostream& operator<< (std::ostream& s, const SVM&);
    236236 
    237237 
  • trunk/lib/classifier/Target.cc

    r509 r514  
    1717
    1818  Target::Target(const std::vector<std::string>& label)
    19     : one_(0)
    2019  {
    2120    init(label);
     
    2423  Target::Target(const Target& t,
    2524                 const std::vector<size_t>& index)
    26     : class_map_(t.class_map_), one_(t.one_)
     25    : class_map_(t.class_map_)
    2726  {
     27    binary_.resize(index.size());
    2828    classes_.resize(index.size());
    2929    for (size_t i=0; i<index.size(); i++) {
    3030      assert(index[i]<t.size());
    3131      classes_[i]=t.classes_[index[i]];
     32      binary_[i]=t.binary_[index[i]];
    3233    }
    3334    labels_ = t.labels_;
     
    3536
    3637
    37   // Peter to Markus, align with gslapi
    3838  Target::Target(std::istream& is, char sep)
    3939    throw (utility::IO_error,std::exception)
    40     : one_(0)
    4140  {
    4241    std::vector<std::string> vec;
     
    7271    class_map_.clear();
    7372    labels_.clear();
    74 
    7573   
    7674    for (size_t i=0; i<label.size(); i++) {
    77       std::map<std::string,size_t>::const_iterator iter =
     75      std::map<std::string,size_t>::iterator iter =
    7876        class_map_.lower_bound(label[i]);
    7977
     
    8785      else{
    8886        classes_.push_back(class_map_.size());
    89         // Peter, should use iter to hint on position
    90         class_map_.insert(std::make_pair(label[i],classes_[i]));
     87        class_map_.insert(iter, std::make_pair(label[i],classes_[i]));
    9188        labels_.push_back(label[i]);
    9289      }
    9390    }
     91    // setting binary to false for every class
     92    binary_=std::vector<short>(label.size(),0);
     93   
     94    set_binary(0,true);
    9495
    9596  }
     
    102103  //  return *this;
    103104  //}
     105
     106  void Target::set_binary(const size_t target, const bool b)
     107  {
     108    for (size_t i=0; i<classes_.size(); i++)
     109      if (classes_[i]==target)
     110        binary_[i]=b;
     111  }
     112
    104113
    105114  std::ostream& operator<<(std::ostream& s, const Target& a)
  • trunk/lib/classifier/Target.h

    r509 r514  
    6969
    7070    ///
    71     /// @return true if class of sample @a i is equal to variable one
     71    /// Default binary is set to false for all classes except class 0
    7272    ///
    73     /// @see set_one(const size_t one)
     73    /// @return true if class of sample @a i is set to true
    7474    ///
    75     inline bool one(const size_t i) const
    76     { assert(i<size()); return classes_[i]==one_; }
     75    /// @see set_binary
     76    ///
     77    inline bool binary(const size_t i) const
     78    { assert(i<size()); return binary_[i]; }
    7779   
    7880    ///
    79     /// Function to set variable one. This variable is used in 2-class
    80     /// algorithm in rder to use the Targer object in a one-versus-all
    81     /// manner.
     81    /// Class @a target is set to @a b. Default is binary set to false
     82    /// for each class.
    8283    ///
    83     /// @see one(const size_t i)
    84     ///
    85     inline void set_one(const size_t one) { assert(one<size()); one_=one; }
     84    void set_binary(const size_t target, const bool b);
    8685
    8786    ///
     
    9594    const size_t size(const std::string& label) const;
    9695
    97     //
    98     //@brief sort
    99     //
    100     //inline void sort(void) { std::sort(classes_.begin(), classes_.end()); }
    101 
    10296    ///
    10397    /// @return the class of @a sample
     
    106100    { assert(sample<size()); return classes_[sample]; }
    107101
    108     ///
    109     /// @brief assignment operator
    110     ///
    111     //const Target& operator=(const Target& other);
    112102
    113103  private:
     104    std::vector<short> binary_; // avoid using vector<bool>
    114105    std::vector<size_t> classes_; // class of sample i
    115106    std::map<std::string,size_t> class_map_; 
    116107    std::vector<std::string> labels_; // label of class i
    117     size_t one_;
    118108   
    119109    void init(const std::vector<std::string>&);
  • trunk/lib/statistics/Fisher.cc

    r509 r514  
    116116    a_=b_=c_=d_=0;
    117117    for (size_t i=0; i<target.size(); i++)
    118       if (target.one(i))
     118      if (target.binary(i))
    119119        if (value(i)>value_cutoff_)
    120120          a_++;
     
    143143    a_=b_=c_=d_=0;
    144144    for (size_t i=0; i<target.size(); i++)
    145       if (target.one(i))
     145      if (target.binary(i))
    146146        if (value(i)>value_cutoff_)
    147147          a_+=weight(i);
  • trunk/lib/statistics/FoldChange.cc

    r509 r514  
    3939
    4040    for (size_t i=0; i<value.size(); i++)
    41       if (target.one(i))
     41      if (target.binary(i))
    4242        pos.add(value(i));
    4343      else
     
    5858
    5959    for (size_t i=0; i<value.size(); i++)
    60       if (target.one(i))
     60      if (target.binary(i))
    6161        pos.add(value(i),weight(i));
    6262      else
  • trunk/lib/statistics/Pearson.cc

    r509 r514  
    4646    AveragerPair ap;
    4747    for (size_t i=0; i<target.size(); i++){
    48       if (target.one(i))
     48      if (target.binary(i))
    4949        ap.add(1, value(i));
    5050      else
     
    6666    AveragerPairWeighted ap;
    6767    for (size_t i=0; i<target.size(); i++){
    68       if (target.one(i))
     68      if (target.binary(i))
    6969        ap.add(1, value(i),1,weight(i));
    7070      else
  • trunk/lib/statistics/ROC.cc

    r509 r514  
    7575    vec_pair_.reserve(target.size());
    7676    for (size_t i=0; i<target.size(); i++)
    77       vec_pair_.push_back(std::make_pair(target.one(i),value(i)));
     77      vec_pair_.push_back(std::make_pair(target.binary(i),value(i)));
    7878
    7979    std::sort(vec_pair_.begin(),vec_pair_.end(),
     
    110110    for (unsigned int i=0; i<target.size(); i++)
    111111      if (weight(i))
    112         vec_pair_.push_back(std::make_pair(target.one(i),value(i)));
     112        vec_pair_.push_back(std::make_pair(target.binary(i),value(i)));
    113113
    114114    std::sort(vec_pair_.begin(),vec_pair_.end(),
     
    120120
    121121    for (size_t i=0; i<n(); i++)
    122       if (target.one(i))
     122      if (target.binary(i))
    123123        for (size_t j=0; j<n(); j++)
    124           if (!target.one(j)){
     124          if (!target.binary(j)){
    125125            if (value(i)>value(j))
    126126              area_+=weight(i)*weight(j);
  • trunk/lib/statistics/tScore.cc

    r509 r514  
    2727    dof_=target.size()-2;
    2828    for(size_t i=0; i<target.size(); i++){
    29       if (target.one(i))
     29      if (target.binary(i))
    3030        positive.add(value(i));
    3131      else
     
    5252    dof_=target.size()-2;
    5353    for(size_t i=0; i<target.size(); i++){
    54       if (target.one(i))
     54      if (target.binary(i))
    5555        positive.add(value(i),weight(i));
    5656      else
  • trunk/test/crossvalidation_test.cc

    r509 r514  
    2727  bool ok = true;
    2828
    29  
    3029  std::vector<std::string> label(10,"default");
    3130  label[2]=label[7]="white";
  • trunk/test/score_test.cc

    r509 r514  
    4545    ok = false;
    4646  }
    47   target.set_one(1);
     47  target.set_binary(0,false);
     48  target.set_binary(1,true);
    4849  area = roc.score(target, value);
    4950  if (area!=1.0){
  • trunk/test/svm_test.cc

    r509 r514  
    5959  double tmp=0;
    6060  for (size_t i=0; i<target2.size(); i++)
    61     if (target2.one(i))
     61    if (target2.binary(i))
    6262      tmp += classifier2.alpha()(i);
    6363    else
     
    121121  for (unsigned int i=0; i<target.size(); i++){
    122122    if (output(i)*target(i) < 1){
    123       if (target.one(i))
     123      if (target.binary(i))
    124124        slack += 1 - output(i);
    125125      else
  • trunk/test/target_test.cc

    r509 r514  
    6060    *error << "Error: target(20)!=2" << std::endl;
    6161  }
    62   if (!target.one(0)){
     62  if (!target.binary(0)){
    6363    ok=false;
    64     *error << "Error: target.one(0) not true" << std::endl;
     64    *error << "Error: target.binary(0) not true" << std::endl;
    6565  }   
    66   if (target.one(20)){
     66  if (target.binary(20)){
    6767    ok=false;
    68     *error << "Error: target.one(20) not false" << std::endl;
     68    *error << "Error: target.binary(20) not false" << std::endl;
    6969  }   
    70   target.set_one(2);
    71   if (!target.one(20)){
     70  target.set_binary(2,true);
     71  if (!target.binary(20)){
    7272    ok=false;
    73     *error << "Error: target.one(20) not true" << std::endl;
     73    *error << "Error: target.binary(20) not true" << std::endl;
    7474  }   
    7575  std::ifstream is("data/rank_target.txt");
Note: See TracChangeset for help on using the changeset viewer.