Changeset 3395


Ignore:
Timestamp:
Mar 23, 2015, 6:31:15 AM (8 years ago)
Author:
Peter
Message:

closes #830. New function that calculates alignment_score of a bam alignment. Also a new function that returns bams one char in bam's query sequence. Using this function in sequence function. This might slow down the code slightly when building against old libbam, but speed is not a priority in that case.

Location:
trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/test/bam.cc

    r3384 r3395  
    4545#ifdef YAT_HAVE_LIBBAM
    4646using namespace omic;
     47
     48void test_alignment_score(test::Suite& suite, const BamRead& b,
     49                          const std::string& ref, double correct)
     50{
     51  int score = alignment_score(b, ref.begin(), 3, 2, 5);
     52  if (!suite.equal(score, correct)) {
     53    suite.add(false);
     54    suite.err() << "error: alignment_score: " << score
     55                << " expected " << correct << "\n";
     56    suite.err() << "query: " << b.sequence() << "\n";
     57    suite.err() << "cigar: " << b.cigar_str() << "\n";
     58    suite.err() << "ref:   " << ref << "\n";
     59  }
     60}
     61
     62
     63void test_alignment_score(test::Suite& suite, const BamRead& b)
     64{
     65  std::string reference = b.sequence();
     66  test_alignment_score(suite, b, reference, 100);
     67  reference[48] = 'N';
     68  test_alignment_score(suite, b, reference, 96);
     69
     70  utility::Aligner::Cigar cig;
     71  cig.push_back(BAM_CEQUAL, 48);
     72  cig.push_back(BAM_CDIFF, 1);
     73  cig.push_back(BAM_CEQUAL, 51);
     74  BamRead bam(b);
     75  bam.cigar(cig);
     76  test_alignment_score(suite, bam, reference, 96);
     77
     78  cig.clear();
     79  cig.push_back(BAM_CSOFT_CLIP, 10);
     80  cig.push_back(BAM_CEQUAL, 38);
     81  cig.push_back(BAM_CDIFF, 1);
     82  cig.push_back(BAM_CEQUAL, 51);
     83  bam.cigar(cig);
     84  test_alignment_score(suite, bam, reference, 86);
     85
     86  cig.clear();
     87  cig.push_back(BAM_CSOFT_CLIP, 10);
     88  cig.push_back(BAM_CEQUAL, 38);
     89  cig.push_back(BAM_CDEL, 2);
     90  cig.push_back(BAM_CEQUAL, 10);
     91  cig.push_back(BAM_CINS, 2);
     92  cig.push_back(BAM_CEQUAL, 10);
     93  cig.push_back(BAM_CSOFT_CLIP, 2);
     94  bam.cigar(cig);
     95  test_alignment_score(suite, bam, reference, 58 - (5+2*2) - (5+2*2));
     96}
     97
    4798
    4899void test_aux(test::Suite& suite, const BamRead& b)
     
    310361                << " expected " << new_q << "\n";
    311362  }
     363  test_alignment_score(suite, bam);
    312364  test_cigar(suite, bam, in.header());
    313365  test_aux(suite, bam);
  • trunk/yat/omic/BamRead.cc

    r3384 r3395  
    340340  {
    341341    std::string result(sequence_length(), ' ');
    342 #if YAT_HAVE_HTSLIB
    343342    for (size_t i=0; i<result.size(); ++i)
    344       result[i] = seq_nt16_str[sequence(i)];
     343      result[i] = sequence_str(i);
     344    return result;
     345  }
     346
     347
     348  char BamRead::sequence_str(size_t i) const
     349  {
     350#if YAT_HAVE_HTSLIB
     351    return seq_nt16_str[sequence(i)];
    345352#elif HAVE_BAM_NT16_REV_TABLE
    346     for (size_t i=0; i<result.size(); ++i)
    347       result[i] = bam_nt16_rev_table[sequence(i)];
     353    return bam_nt16_rev_table[sequence(i)];
    348354#else
    349355    std::string table = "=ACMGRSVTWYHKDBN";
    350     for (size_t i=0; i<result.size(); ++i)
    351       result[i] = table[sequence(i)];
    352 #endif
    353     return result;
     356    return table[sequence(i)];
     357#endif
    354358  }
    355359
  • trunk/yat/omic/BamRead.h

    r3384 r3395  
    266266     */
    267267    uint8_t sequence(size_t index) const;
     268
     269    /**
     270       \return A, C, G, T, or N.
     271
     272       \since New in yat 0.13
     273     */
     274    char sequence_str(size_t index) const;
    268275
    269276    /**
     
    439446  };
    440447
     448
     449  /**
     450     Calculate alignment score based on \a bam's cigar and sequence
     451     and the reference as in [\a ref, ref + bam.end()-bam.pos()).
     452
     453     For indels the score is calculated as open_gap + len * gap.  For
     454     mismatches the score is calculated -mismatch. For matches the
     455     score is unity.
     456
     457     For cigar element BAM_CMATCH the sequence and reference is used
     458     to differentiate match from mismatch.
     459
     460     \since New in yat 0.13
     461   */
     462  template<typename RandomAccessIterator>
     463  double alignment_score(const BamRead& bam, RandomAccessIterator ref,
     464                         double mismatch, double gap=0.0, double open_gap=0.0)
     465  {
     466    double res = 0;
     467    size_t q = 0;
     468
     469    for (uint32_t i = 0; i<bam.core().n_cigar; ++i) {
     470      switch (bam.cigar_op(i)) {
     471      case (BAM_CMATCH): {
     472        RandomAccessIterator end = ref + bam.cigar_oplen(i);
     473        for (; ref!=end; ++ref) {
     474          if (bam.sequence_str(q) == *ref)
     475            res += 1.0;
     476          else
     477            res -= mismatch;
     478          ++q;
     479        }
     480        break;
     481      }
     482      case (BAM_CEQUAL): {
     483        q += bam.cigar_oplen(i);
     484        ref += bam.cigar_oplen(i);
     485        res += bam.cigar_oplen(i);
     486        break;
     487      }
     488      case (BAM_CDIFF): {
     489        q += bam.cigar_oplen(i);
     490        ref += bam.cigar_oplen(i);
     491        res -= bam.cigar_oplen(i)*mismatch;
     492        break;
     493      }
     494      case (BAM_CSOFT_CLIP): {
     495        q += bam.cigar_oplen(i);
     496        break;
     497      }
     498      case (BAM_CINS): {
     499        q += bam.cigar_oplen(i);
     500        res -= open_gap + bam.cigar_oplen(i) * gap;
     501        break;
     502      }
     503      case (BAM_CDEL): {
     504        ref += bam.cigar_oplen(i);
     505        res -= open_gap + bam.cigar_oplen(i) * gap;
     506        break;
     507      }
     508      } // end switch
     509    }
     510    return res;
     511  }
     512
    441513}}}
    442514#endif
Note: See TracChangeset for help on using the changeset viewer.