source: trunk/yat/omic/BamRead.cc @ 2993

Last change on this file since 2993 was 2993, checked in by Peter, 10 years ago

conform GPL header to same style as in other yat files

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 6.0 KB
Line 
1
2// $Id: BamRead.cc 2993 2013-03-03 07:38:22Z peter $
3
4/*
5  Copyright (C) 2012, 2013 Peter Johansson
6
7  This file is part of the yat library, http://dev.thep.lu.se/yat
8
9  The yat library is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 3 of the
12  License, or (at your option) any later version.
13
14  The yat library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with this program. If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include <config.h>
24
25#include "BamRead.h"
26#include "config_bam.h"
27
28#include YAT_BAM_HEADER
29
30#include <algorithm>
31#include <cassert>
32#include <sstream>
33
34// bam1_seq_seti is not defined in 0.1.18 (or older), so
35// backport. Code is taken bam.h in version 0.1.19-dev.
36#ifndef bam1_seq_seti
37#define bam1_seq_seti(s, i, c) ( (s)[(i)>>1] = \
38  ((s)[(i)>>1] & 0xf<<(((i)&1)<<2)) | (c)<<((~(i)&1)<<2) )
39#endif
40
41namespace theplu {
42namespace yat {
43namespace omic {
44
45  BamRead::BamRead(void)
46    : bam_(bam_init1())
47  {}
48
49
50  BamRead::BamRead(const BamRead& other)
51    : bam_(bam_dup1(other.bam_))
52  {
53  }
54
55
56  BamRead::~BamRead(void)
57  {
58    bam_destroy1(bam_);
59  }
60
61
62  const BamRead& BamRead::operator=(const BamRead& rhs)
63  {
64    if (bam_!=rhs.bam_)
65      bam_copy1(bam_, rhs.bam_);
66    return *this;
67  }
68
69
70  const uint8_t* BamRead::aux(void) const
71  {
72    return bam1_aux(bam_);
73  }
74
75
76  const uint32_t* BamRead::cigar(void) const
77  {
78    return bam1_cigar(bam_);
79  }
80
81
82  uint32_t BamRead::cigar(size_t i) const
83  {
84    assert(i<core().n_cigar);
85    return bam1_cigar(bam_)[i];
86  }
87
88
89  uint32_t BamRead::cigar_op(size_t i) const
90  {
91    assert(i<core().n_cigar);
92    return bam_cigar_op(cigar(i));
93  }
94
95
96  uint32_t BamRead::cigar_oplen(size_t i) const
97  {
98    assert(i<core().n_cigar);
99    return bam_cigar_oplen(cigar(i));
100  }
101
102
103  std::string BamRead::cigar_str(void) const
104  {
105    std::ostringstream os;
106    std::string str = BAM_CIGAR_STR;
107    for (uint32_t i=0; i<core().n_cigar; ++i) {
108      os << bam_cigar_oplen(bam1_cigar(bam_)[i])
109         << str[bam_cigar_op(bam1_cigar(bam_)[i])];
110    }
111    return os.str();
112  }
113
114
115  void BamRead::cigar(const std::vector<uint32_t>& c)
116  {
117    // special case when cigar size is not changed
118    if (c.size() == core().n_cigar) {
119      if (c.size())
120        memcpy(bam1_cigar(bam_), &c[0], 4*c.size());
121      return;
122    }
123
124    int m_data = bam_->m_data;
125    int data_len = bam_->data_len + 4*c.size() - 4*core().n_cigar;
126    // double capacity if needed
127    if (m_data < data_len) {
128      m_data = data_len;
129      kroundup32(m_data);
130    }
131    uint8_t* data = (uint8_t*)calloc(m_data, 1);
132    /*
133      data comes as qname, cigar, rest
134     */
135    // first copy qname
136    memcpy(data, bam_->data, core().l_qname);
137    // copy new cigar
138    if (c.size())
139      // each cigar element is 4 bytes
140      memcpy(data + core().l_qname, &c[0], 4*c.size());
141    // copy remaining data
142    memcpy(data + core().l_qname + 4*c.size(), bam1_seq(bam_),
143           data_len - 4*c.size() - core().l_qname);
144    bam_->m_data = m_data;
145    bam_->data_len = data_len;
146    core().n_cigar = c.size();
147    free(bam_->data);
148    bam_->data = data;
149  }
150
151
152  const bam1_core_t& BamRead::core(void) const
153  { return bam_->core; }
154
155
156  bam1_core_t& BamRead::core(void)
157  { return bam_->core; }
158
159
160  const char* BamRead::name(void) const
161  { return bam1_qname(bam_); }
162
163
164  uint8_t BamRead::qual(size_t i) const
165  { return *(bam1_qual(bam_)+i); }
166
167
168  std::string BamRead::sequence(void) const
169  {
170    std::string result(sequence_length(), ' ');
171#ifdef HAVE_BAM_NT16_REV_TABLE
172    for (size_t i=0; i<result.size(); ++i)
173      result[i] = bam_nt16_rev_table[sequence(i)];
174#else
175    std::string table = "=ACMGRSVTWYHKDBN";
176    for (size_t i=0; i<result.size(); ++i)
177      result[i] = table[sequence(i)];
178#endif
179    return result;
180  }
181
182
183  void BamRead::sequence(size_t i, uint8_t seq)
184  {
185    assert(bam_);
186    assert(seq < 16);
187    bam1_seq_seti(bam1_seq(bam_), i, seq);
188  }
189
190
191  uint8_t BamRead::sequence(size_t index) const
192  {
193    assert(bam_);
194    return bam1_seqi(bam1_seq(bam_),index);
195  }
196
197
198  uint32_t BamRead::sequence_length(void) const
199  {
200    assert(bam_);
201    return bam_->core.l_qseq;
202  }
203
204
205  int32_t BamRead::pos(void) const
206  {
207    assert(bam_);
208    return bam_->core.pos;
209  }
210
211
212  int32_t BamRead::tid(void) const
213  {
214    assert(bam_);
215    return bam_->core.tid;
216  }
217
218
219  int32_t BamRead::mtid(void) const
220  {
221    assert(bam_);
222    return bam_->core.mtid;
223  }
224
225
226  int32_t BamRead::mpos(void) const
227  {
228    assert(bam_);
229    return bam_->core.mpos;
230  }
231
232
233  int32_t BamRead::end(void) const
234  {
235    assert(bam_);
236    return bam_calend(&core(), bam1_cigar(bam_));
237  }
238
239
240  void BamRead::swap(BamRead& other)
241  {
242    std::swap(bam_, other.bam_);
243  }
244
245
246  void swap(BamRead& lhs, BamRead& rhs)
247  {
248    lhs.swap(rhs);
249  }
250
251  bool same_query_name(const BamRead& lhs, const BamRead& rhs)
252  {
253    if (lhs.core().l_qname != rhs.core().l_qname)
254      return false;
255
256    return std::equal(lhs.name(),
257                      lhs.name() + lhs.core().l_qname,
258                      rhs.name());
259  }
260
261
262  bool soft_clipped(const BamRead& bam)
263  {
264    return left_soft_clipped(bam) || right_soft_clipped(bam);
265  }
266
267
268  uint32_t left_soft_clipped(const BamRead& bam)
269  {
270    for (size_t i=0; i<bam.core().n_cigar; ++i) {
271      if (bam.cigar_op(i) == BAM_CSOFT_CLIP)
272        return bam.cigar_oplen(i);
273      if (bam_cigar_type(bam.cigar_op(i))&1)
274        return 0;
275    }
276    return 0;
277  }
278
279
280  uint32_t right_soft_clipped(const BamRead& bam)
281  {
282    if (bam.core().n_cigar==0)
283      return 0;
284    // right clip can't be at first (most left cigar element)
285    for (size_t i=bam.core().n_cigar-1; i>0; --i) {
286      if (bam.cigar_op(i) == BAM_CSOFT_CLIP)
287        return bam.cigar_oplen(i);
288      if (bam_cigar_type(bam.cigar_op(i))&1)
289        return 0;
290    }
291    return 0;
292  }
293
294
295  bool BamLessPos::operator()(const BamRead& lhs, const BamRead& rhs) const
296  {
297    return lhs.tid() < rhs.tid() ||
298      (lhs.tid() == rhs.tid() && lhs.pos() < rhs.pos());
299  }
300
301
302  bool BamLessEnd::operator()(const BamRead& lhs, const BamRead& rhs) const
303  {
304    return lhs.tid() < rhs.tid() ||
305      (lhs.tid() == rhs.tid() && lhs.end() < rhs.end());
306  }
307
308}}}
Note: See TracBrowser for help on using the repository browser.