Changeset 3205


Ignore:
Timestamp:
May 4, 2014, 4:50:50 AM (9 years ago)
Author:
Peter
Message:

implement new class SmithWaterman?

Location:
trunk
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/test/Makefile.am

    r3200 r3205  
    7676  test/roc.test \
    7777  test/score.test test/segment.test test/smart_ptr.test   \
     78  test/smith_waterman.test \
    7879  test/smoother.test test/spearman.test \
    7980  test/split.test test/statistics.test test/stream_redirect.test \
  • trunk/yat/utility/Aligner.cc

    r3202 r3205  
    220220  void Aligner::Cigar::push_back(uint8_t op, uint32_t len)
    221221  {
     222    if (len==0)
     223      return;
    222224    if (cigar_.empty() || this->op(size()-1)!=op)
    223225      cigar_.push_back(bam_cigar_gen(len, op));
     
    229231  void Aligner::Cigar::push_front(uint8_t op, uint32_t len)
    230232  {
     233    if (len==0)
     234      return;
    231235    if (cigar_.empty() || this->op(0)!=op)
    232236      cigar_.push_front(bam_cigar_gen(len, op));
  • trunk/yat/utility/Aligner.h

    r3203 r3205  
    120120       gap, and gap + open_gap otherwise.
    121121
     122       To get the CIGAR to behave as expected, the each row in \a dot
     123       corresponds to an element in reference sequence and each row in
     124       \a dot corresponds to an element in query reference. If that is
     125       transposed, insertions and deletions are swapped in CIGAR.
    122126     */
    123127    void operator()(const Matrix& dot, Matrix& score);
  • trunk/yat/utility/SmithWaterman.cc

    r3204 r3205  
    2323
    2424#include "SmithWaterman.h"
     25#include "yat/omic/BamRead.h" // for cigar defines
    2526
    2627#include <cassert>
     
    3031namespace utility {
    3132
     33  SmithWaterman::SmithWaterman(double gap, double open_gap)
     34    : aligner_(gap, open_gap) {}
     35
     36
     37  SmithWaterman::SmithWaterman(double vertical_gap, double open_vertical_gap,
     38                               double horizon_gap, double open_horizon_gap)
     39    : aligner_(vertical_gap, open_vertical_gap, horizon_gap, open_horizon_gap)
     40  {}
     41
     42
     43  const Aligner::Cigar& SmithWaterman::cigar(void) const
     44  {
     45    return cigar_;
     46  }
     47
     48
     49  size_t SmithWaterman::position(void) const
     50  {
     51    return position_;
     52  }
     53
     54
     55  const Matrix& SmithWaterman::score(void) const
     56  {
     57    return score_;
     58  }
     59
     60
     61  double SmithWaterman::operator()(const Matrix& dot)
     62  {
     63    score_.resize(dot.rows()+1, dot.columns()+1);
     64    aligner_(dot, score_);
     65    cigar_.clear();
     66    size_t row = 0;
     67    size_t column = 0;
     68    for (size_t i=0; i<score_.rows(); ++i)
     69      for (size_t j=0; j<score_.columns(); ++j)
     70        if (score_(i, j) > score_(row, column)) {
     71          row = i;
     72          column = j;
     73        }
     74
     75    cigar_.push_front(BAM_CSOFT_CLIP, score_.columns()-column-1);
     76
     77    size_t end_row(row);
     78    size_t end_column(column);
     79
     80    while (aligner_.alignment(row, column)!=Aligner::none) {
     81      switch (aligner_.alignment(row, column)) {
     82      case(Aligner::diagonal):
     83        assert(row);
     84        --row;
     85        assert(column);
     86        --column;
     87        if (dot(row, column)>0)
     88          cigar_.push_front(BAM_CEQUAL);
     89        else
     90          cigar_.push_front(BAM_CDIFF);
     91        break;
     92      case (Aligner::right):
     93        assert(column);
     94        --column;
     95        cigar_.push_front(BAM_CINS);
     96        break;
     97      case (Aligner::down):
     98        assert(row);
     99        --row;
     100        cigar_.push_front(BAM_CDEL);
     101        break;
     102      default:;
     103      }
     104    }
     105
     106    position_ = row;
     107    cigar_.push_front(BAM_CSOFT_CLIP, column);
     108
     109    return score_(end_row, end_column);
     110  }
     111
    32112}}} // of namespace utility, yat, and theplu
  • trunk/yat/utility/SmithWaterman.h

    r3204 r3205  
    4343       Same as SmithWaterman(gap, open_gap, gap, open_gap)
    4444     */
    45     SmithWaterman(double gap=0, double open_gap=0);
     45    explicit SmithWaterman(double gap=0, double open_gap=0);
    4646
    4747    /**
    48        \brief constructor
     48       \brief Constructor
    4949
    5050       \param vertical_gap Penalty for extending a vertical gap. A
    51        vertical gap means alignment consumes an element in second
    52        element and not, in the first element, i.e., an element has
    53        been inserted to the second element or equivalently an element
    54        has been deleted in first sequence.
     51       vertical gap means consuming a reference element i.e. a
     52       deletion in query sequence.
    5553
    56        \param open_vertical_gap Penalty for open a vertical gap. Total
    57        cost for a vertical gap is open_vertical_gap + N *
     54       \param open_vertical_gap Open a deletion. Total
     55       cost for a deletion is open_vertical_gap + N *
    5856       vertical_gap.
    5957
    60        \param horizon_gap Penalty for extending a horizontal gap
     58       \param horizon_gap Penalty for extending a insertion.
    6159
    62        \param open_horizon_gap Penalty for open a horizontal
    63        gap. Total penalty for a horizontal gap is open_horizon_gap + N
    64        * horizon_gap.
     60       \param open_horizon_gap Penalty for open an insertion. Total
     61       penalty for a insertion is open_horizon_gap + N * horizon_gap.
    6562     */
    6663    SmithWaterman(double vertical_gap, double open_vertical_gap,
     
    6865
    6966    /**
     67       The CIGAR here is slightly different compared to CIGAR in
     68       Aligner as it uses BAM_CEQUAL and BAM_CDIFF rather than
     69       BAM_CMATCH, and it also uses BAM_CSOFT_CLIP and ends of CIGAR
     70       to reflect if whole query was aligned.
     71
    7072       \return CIGAR reflecting latest performed alignment
    7173     */
     
    110112  };
    111113
     114  // template implementaion
     115
     116  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
     117  double SmithWaterman::operator()(RandomAccessIterator1 reference_begin,
     118                                   RandomAccessIterator1 reference_end,
     119                                   RandomAccessIterator2 query_begin,
     120                                   RandomAccessIterator2 query_end,
     121                                   double mismatch)
     122  {
     123    Matrix dot(reference_end-reference_begin, query_end-query_begin, -mismatch);
     124    for (size_t i=0; i<dot.rows(); ++i)
     125      for (size_t j=0; j<dot.columns(); ++j)
     126        if (reference_begin[i] == query_begin[j])
     127          dot(i, j) = 1;
     128    return this->operator()(dot);
     129  }
     130
    112131}}} // of namespace utility, yat, and theplu
    113132
Note: See TracChangeset for help on using the changeset viewer.