Changeset 1519


Ignore:
Timestamp:
Sep 21, 2008, 6:35:39 AM (13 years ago)
Author:
Peter
Message:

speeding up QuantileNormalizer? by avoiding some copying and by only sorting each column once and not twice as before.

Location:
trunk/yat
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/yat/normalizer/QuantileNormalizer.cc

    r1518 r1519  
    4141    assert(data.columns()==result.columns());
    4242
    43     // create a tmp copy
    44     utility::Matrix data_copy(data);
    45    
    46     // sort columns in copy
    47     for (size_t column=0; column<data_copy.columns(); ++column){
    48       std::sort(data_copy.begin_column(column), data_copy.end_column(column));
    49     }
    50 
    51     // calculate average of each row
    52     std::vector<yat::statistics::Averager> averager(data_copy.rows());
    53     for (size_t row=0; row<result.rows(); ++row){
    54       add(averager[row], data_copy.begin_row(row), data_copy.end_row(row));
    55     }
    56 
    57     for (size_t column=0; column<result.columns(); ++column){
    58       std::vector<size_t> index;
    59       utility::sort_index(index, data.column_const_view(column));
    60                                
    61       for (size_t row=0; row<data.rows(); ++row)
    62         result(index[row], column) = averager[row].mean();
     43    std::vector<std::vector<size_t> > index(data.rows());
     44    for (size_t column=0; column<data.columns(); ++column)
     45      utility::sort_index(index[column], data.column_const_view(column));
     46     
     47    for (size_t rank=0; rank<data.rows(); ++rank) {
     48      statistics::Averager a;
     49      for (size_t column=0; column<data.columns(); ++column)
     50        a.add(data(index[column][rank], column));
     51      double mean = a.mean();
     52      for (size_t column=0; column<data.columns(); ++column)
     53        result(index[column][rank], column) = mean;
    6354    }
    6455  }
  • trunk/yat/utility/VectorBase.cc

    r1487 r1519  
    182182      throw utility::GSL_error(std::string("sort_index(vector&,const VectorBase&)",status));     
    183183    }
    184     sort_index=std::vector<size_t>(p->data,p->data+p->size);
     184    std::vector<size_t> tmp(p->data,p->data+p->size);
     185    sort_index.swap(tmp);
    185186    gsl_permutation_free(p);
    186187  }
Note: See TracChangeset for help on using the changeset viewer.