source: trunk/yat/omic/BamRead.cc @ 2989

Last change on this file since 2989 was 2989, checked in by Peter, 9 years ago

speedup BamRead::cigar for case when size is not modified.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.9 KB
Line 
1// $Id: BamRead.cc 2989 2013-02-23 05:35:33Z peter $
2//
3// Copyright (C) 2012, 2013 Peter Johansson
4//
5// This program is free software; you can redistribute it and/or modify
6// it under the terms of the GNU General Public License as published by
7// the Free Software Foundation; either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful, but
11// WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13// General Public License for more details.
14//
15// You should have received a copy of the GNU General Public License
16// along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18#include <config.h>
19
20#include "BamRead.h"
21#include "config_bam.h"
22
23#include YAT_BAM_HEADER
24
25#include <algorithm>
26#include <cassert>
27#include <sstream>
28
29// bam1_seq_seti is not defined in 0.1.18 (or older), so
30// backport. Code is taken bam.h in version 0.1.19-dev.
31#ifndef bam1_seq_seti
32#define bam1_seq_seti(s, i, c) ( (s)[(i)>>1] = \
33  ((s)[(i)>>1] & 0xf<<(((i)&1)<<2)) | (c)<<((~(i)&1)<<2) )
34#endif
35
36namespace theplu {
37namespace yat {
38namespace omic {
39
40  BamRead::BamRead(void)
41    : bam_(bam_init1())
42  {}
43
44
45  BamRead::BamRead(const BamRead& other)
46    : bam_(bam_dup1(other.bam_))
47  {
48  }
49
50
51  BamRead::~BamRead(void)
52  {
53    bam_destroy1(bam_);
54  }
55
56
57  const BamRead& BamRead::operator=(const BamRead& rhs)
58  {
59    if (bam_!=rhs.bam_)
60      bam_copy1(bam_, rhs.bam_);
61    return *this;
62  }
63
64
65  const uint8_t* BamRead::aux(void) const
66  {
67    return bam1_aux(bam_);
68  }
69
70
71  const uint32_t* BamRead::cigar(void) const
72  {
73    return bam1_cigar(bam_);
74  }
75
76
77  uint32_t BamRead::cigar(size_t i) const
78  {
79    assert(i<core().n_cigar);
80    return bam1_cigar(bam_)[i];
81  }
82
83
84  uint32_t BamRead::cigar_op(size_t i) const
85  {
86    assert(i<core().n_cigar);
87    return bam_cigar_op(cigar(i));
88  }
89
90
91  uint32_t BamRead::cigar_oplen(size_t i) const
92  {
93    assert(i<core().n_cigar);
94    return bam_cigar_oplen(cigar(i));
95  }
96
97
98  std::string BamRead::cigar_str(void) const
99  {
100    std::ostringstream os;
101    std::string str = BAM_CIGAR_STR;
102    for (uint32_t i=0; i<core().n_cigar; ++i) {
103      os << bam_cigar_oplen(bam1_cigar(bam_)[i])
104         << str[bam_cigar_op(bam1_cigar(bam_)[i])];
105    }
106    return os.str();
107  }
108
109
110  void BamRead::cigar(const std::vector<uint32_t>& c)
111  {
112    // special case when cigar size is not changed
113    if (c.size() == core().n_cigar) {
114      if (c.size())
115        memcpy(bam1_cigar(bam_), &c[0], 4*c.size());
116      return;
117    }
118
119    int m_data = bam_->m_data;
120    int data_len = bam_->data_len + 4*c.size() - 4*core().n_cigar;
121    // double capacity if needed
122    if (m_data < data_len) {
123      m_data = data_len;
124      kroundup32(m_data);
125    }
126    uint8_t* data = (uint8_t*)calloc(m_data, 1);
127    /*
128      data comes as qname, cigar, rest
129     */
130    // first copy qname
131    memcpy(data, bam_->data, core().l_qname);
132    // copy new cigar
133    if (c.size())
134      // each cigar element is 4 bytes
135      memcpy(data + core().l_qname, &c[0], 4*c.size());
136    // copy remaining data
137    memcpy(data + core().l_qname + 4*c.size(), bam1_seq(bam_),
138           data_len - 4*c.size() - core().l_qname);
139    bam_->m_data = m_data;
140    bam_->data_len = data_len;
141    core().n_cigar = c.size();
142    free(bam_->data);
143    bam_->data = data;
144  }
145
146
147  const bam1_core_t& BamRead::core(void) const
148  { return bam_->core; }
149
150
151  bam1_core_t& BamRead::core(void)
152  { return bam_->core; }
153
154
155  const char* BamRead::name(void) const
156  { return bam1_qname(bam_); }
157
158
159  uint8_t BamRead::qual(size_t i) const
160  { return *(bam1_qual(bam_)+i); }
161
162
163  std::string BamRead::sequence(void) const
164  {
165    std::string result(sequence_length(), ' ');
166#ifdef HAVE_BAM_NT16_REV_TABLE
167    for (size_t i=0; i<result.size(); ++i)
168      result[i] = bam_nt16_rev_table[sequence(i)];
169#else
170    std::string table = "=ACMGRSVTWYHKDBN";
171    for (size_t i=0; i<result.size(); ++i)
172      result[i] = table[sequence(i)];
173#endif
174    return result;
175  }
176
177
178  void BamRead::sequence(size_t i, uint8_t seq)
179  {
180    assert(bam_);
181    assert(seq < 16);
182    bam1_seq_seti(bam1_seq(bam_), i, seq);
183  }
184
185
186  uint8_t BamRead::sequence(size_t index) const
187  {
188    assert(bam_);
189    return bam1_seqi(bam1_seq(bam_),index);
190  }
191
192
193  uint32_t BamRead::sequence_length(void) const
194  {
195    assert(bam_);
196    return bam_->core.l_qseq;
197  }
198
199
200  int32_t BamRead::pos(void) const
201  {
202    assert(bam_);
203    return bam_->core.pos;
204  }
205
206
207  int32_t BamRead::tid(void) const
208  {
209    assert(bam_);
210    return bam_->core.tid;
211  }
212
213
214  int32_t BamRead::mtid(void) const
215  {
216    assert(bam_);
217    return bam_->core.mtid;
218  }
219
220
221  int32_t BamRead::mpos(void) const
222  {
223    assert(bam_);
224    return bam_->core.mpos;
225  }
226
227
228  int32_t BamRead::end(void) const
229  {
230    assert(bam_);
231    return bam_calend(&core(), bam1_cigar(bam_));
232  }
233
234
235  void BamRead::swap(BamRead& other)
236  {
237    std::swap(bam_, other.bam_);
238  }
239
240
241  void swap(BamRead& lhs, BamRead& rhs)
242  {
243    lhs.swap(rhs);
244  }
245
246  bool same_query_name(const BamRead& lhs, const BamRead& rhs)
247  {
248    if (lhs.core().l_qname != rhs.core().l_qname)
249      return false;
250
251    return std::equal(lhs.name(),
252                      lhs.name() + lhs.core().l_qname,
253                      rhs.name());
254  }
255
256
257  bool soft_clipped(const BamRead& bam)
258  {
259    return left_soft_clipped(bam) || right_soft_clipped(bam);
260  }
261
262
263  uint32_t left_soft_clipped(const BamRead& bam)
264  {
265    for (size_t i=0; i<bam.core().n_cigar; ++i) {
266      if (bam.cigar_op(i) == BAM_CSOFT_CLIP)
267        return bam.cigar_oplen(i);
268      if (bam_cigar_type(bam.cigar_op(i))&1)
269        return 0;
270    }
271    return 0;
272  }
273
274
275  uint32_t right_soft_clipped(const BamRead& bam)
276  {
277    if (bam.core().n_cigar==0)
278      return 0;
279    // right clip can't be at first (most left cigar element)
280    for (size_t i=bam.core().n_cigar-1; i>0; --i) {
281      if (bam.cigar_op(i) == BAM_CSOFT_CLIP)
282        return bam.cigar_oplen(i);
283      if (bam_cigar_type(bam.cigar_op(i))&1)
284        return 0;
285    }
286    return 0;
287  }
288
289
290  bool BamLessPos::operator()(const BamRead& lhs, const BamRead& rhs) const
291  {
292    return lhs.tid() < rhs.tid() ||
293      (lhs.tid() == rhs.tid() && lhs.pos() < rhs.pos());
294  }
295
296
297  bool BamLessEnd::operator()(const BamRead& lhs, const BamRead& rhs) const
298  {
299    return lhs.tid() < rhs.tid() ||
300      (lhs.tid() == rhs.tid() && lhs.end() < rhs.end());
301  }
302
303}}}
Note: See TracBrowser for help on using the repository browser.