source: trunk/yat/omic/BamFile.cc @ 3484

Last change on this file since 3484 was 3484, checked in by Peter, 6 years ago

implement InBamFile? functions mapped, unmapped, and n_no_coordinate for libbam case. The code is hackish but see comment in BamFile?.cc for rationale. closes #852

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.2 KB
Line 
1// $Id: BamFile.cc 3484 2016-03-23 00:37:29Z peter $
2
3/*
4  Copyright (C) 2012, 2013, 2014, 2015, 2016 Peter Johansson
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include <iostream>
23
24#include <config.h>
25
26#include "BamFile.h"
27#include "BamHeader.h"
28#include "BamReadIterator.h"
29#include "config_bam.h"
30
31#include "yat/utility/Exception.h"
32
33#include YAT_SAM_HEADER
34
35#include <cassert>
36#include <stdexcept>
37#include <string>
38#include <sstream>
39
40// This is an ugly hack to allow us access members in private struct
41// bam_index_t. This is needed to implement faster than linear
42// versions of functions 'get_idx_stat' and
43// 'n_no_coordinate'. Stealing declarations like this is obviously
44// ugly and usually a no-no as it would break if the declaration
45// changes in future versions of libbam. Here, we waive that concern
46// as the code is only used against libbam and libbam is a project
47// with no momentum and no future versions.
48//
49// Declarations taken from bam_index.c in samtools-0.1.19
50// Copyright (c) 2008-2009 Genome Research Ltd.
51#ifndef YAT_HAVE_HTSLIB
52#if defined YAT_HAVE_BAM_H
53#  include <khash.h>
54#  include <ksort.h>
55#elif defined YAT_HAVE_BAM_BAM_H
56#  include <bam/khash.h>
57#  include <bam/ksort.h>
58#elif defined YAT_HAVE_SAMTOOLS_BAM_H
59#  include <samtools/khash.h>
60#  include <samtools/ksort.h>
61#endif
62
63#define BAM_MAX_BIN 37450 // =(8^6-1)/7+1
64
65typedef struct {
66  uint64_t u, v;
67} pair64_t;
68
69#define pair64_lt(a,b) ((a).u < (b).u)
70KSORT_INIT(off, pair64_t, pair64_lt)
71
72typedef struct {
73  uint32_t m, n;
74  pair64_t *list;
75} bam_binlist_t;
76
77typedef struct {
78  int32_t n, m;
79  uint64_t *offset;
80} bam_lidx_t;
81
82KHASH_MAP_INIT_INT(i, bam_binlist_t)
83
84struct __bam_index_t {
85  int32_t n;
86  uint64_t n_no_coor; // unmapped reads without coordinate
87  khash_t(i) **index;
88  bam_lidx_t *index2;
89};
90#endif // end samtools specific declarations
91
92
93namespace theplu {
94namespace yat {
95namespace omic {
96
97  InBamFile::InBamFile(void)
98    : super_t(), index_(NULL)
99  {}
100
101
102  InBamFile::InBamFile(const std::string& fn)
103    : super_t(), index_(NULL)
104  {
105    open(fn);
106  }
107
108
109  InBamFile::~InBamFile(void)
110  {
111#if YAT_HAVE_HTSLIB
112    hts_idx_destroy(index_);
113#else
114    bam_index_destroy(index_);
115#endif
116  }
117
118
119  uint64_t InBamFile::get_idx_stat(int tid, bool return_mapped) const
120  {
121    uint64_t mapped;
122    uint64_t unmapped;
123#if YAT_HAVE_HTSLIB
124    if (hts_idx_get_stat(index(), tid, &mapped, &unmapped)) {
125      throw utility::runtime_error("InBamFile::get_idx_stat");
126    }
127#else
128    index(); // ensure index_ is loaded
129    assert(index_);
130    khint_t k;
131    int i = tid;
132    khash_t(i)* h = index_->index[i];
133    k = kh_get(i, h, BAM_MAX_BIN);
134    if (k != kh_end(h)) {
135      mapped = kh_val(h, k).list[1].u;
136      unmapped = kh_val(h, k).list[1].v;
137    }
138    else {
139      mapped = 0;
140      unmapped = 0;
141    }
142#endif
143    if (return_mapped)
144      return mapped;
145    return unmapped;
146  }
147
148
149  const BamHeader& InBamFile::header(void) const
150  {
151    return header_;
152  }
153
154
155  const InBamFile::index_type* InBamFile::index(void) const
156  {
157    if (!index_) {
158#if YAT_HAVE_HTSLIB
159      index_ = hts_idx_load(filename().c_str(), HTS_FMT_BAI);
160#else
161      index_ = bam_index_load(filename().c_str());
162#endif
163      if (!index_) {
164        std::ostringstream ss;
165        ss << "cannot load bam index '" << filename() << ".bai'";
166        throw utility::runtime_error(ss.str());
167      }
168    }
169    return index_;
170  }
171
172
173  uint64_t InBamFile::n_mapped(int tid) const
174  {
175    return get_idx_stat(tid, true);
176  }
177
178
179  uint64_t InBamFile::n_unmapped(int tid) const
180  {
181    return get_idx_stat(tid, false);
182  }
183
184
185  uint64_t InBamFile::n_no_coordinate(void) const
186  {
187#if YAT_HAVE_HTSLIB
188    return hts_idx_get_n_no_coor(index());
189#else
190    index(); // ensure index_ is loaded
191    assert(index_);
192    return index_->n_no_coor;
193#endif
194  }
195
196
197  void InBamFile::open(const std::string& fn)
198  {
199#if YAT_HAVE_HTSLIB
200    open_base(fn, "rb", NULL);
201    header_.header_ = sam_hdr_read(sf_);
202#else
203    open_base(fn, "rb", NULL);
204    header_.header_ = detail::bam_hdr_dup(sf_->header);
205#endif
206    if (header_.header_ == NULL) {
207      std::ostringstream ss;
208      ss << "failed to read header from '" << filename() << "'";
209      throw utility::runtime_error(ss.str());
210    }
211  }
212
213
214  bool InBamFile::read(BamRead& read)
215  {
216#if YAT_HAVE_HTSLIB
217    int result_ = sam_read1(sf_, header_.header_, read.bam_);
218#else
219    int result_ = samread(sf_, read.bam_);
220#endif
221    if (result_<-1) {
222      std::ostringstream ss;
223      ss << "InBamFile::read(1): invalid bam file '" << filename() << "'";
224      if (result_ == -2)
225        ss << " truncated file";
226      else
227        ss << " error: " << result_;
228      throw utility::runtime_error(ss.str());
229    }
230    return result_>=0;
231  }
232
233
234#if YAT_HAVE_HTSLIB
235  bool InBamFile::read(BamRead& read, hts_itr_t* iter)
236  {
237    int result = sam_itr_next(sf_, iter, read.bam_);
238#else
239  bool InBamFile::read(BamRead& read, bam_iter_t iter)
240  {
241    assert(sf_->type & 1); // no random access from sam files
242    int result = bam_iter_read(sf_->x.bam, iter, read.bam_);
243#endif
244    if (result<-1) {
245      std::ostringstream ss;
246      ss << "read(2): invalid bam file '" << filename() << "'";
247      if (result == -2)
248        ss << " truncated file";
249      else
250        ss << " error: " << result;
251      throw utility::runtime_error(ss.str());
252    }
253    return result>=0;
254  }
255
256
257  OutBamFile::OutBamFile(void)
258    : super_t() {}
259
260
261  OutBamFile::OutBamFile(const std::string& fn, const BamHeader& header)
262  {
263    open(fn, header);
264  }
265
266
267  OutBamFile::OutBamFile(const std::string& fn, const BamHeader& header,
268                         unsigned int c)
269  {
270    open(fn, header, c);
271  }
272
273
274  void OutBamFile::open(const std::string& fn, const BamHeader& h)
275  {
276    if (!h.header_)
277      throw utility::runtime_error("OutBamFile::open with null header");
278#if YAT_HAVE_HTSLIB
279    open_base(fn, "wb", NULL);
280    sam_hdr_write(sf_, h.header_);
281#else
282    open_base(fn, "wb", h.header_);
283#endif
284  }
285
286
287  void OutBamFile::open(const std::string& fn, const BamHeader& h,
288                        unsigned int compression)
289  {
290    std::string mode("wb");
291    if (compression > 9) {
292      std::stringstream oss;
293      oss << "OutBamFile::open( " << fn << ", <header>, " << compression
294          << "): incorrect compression level\n";
295      throw std::invalid_argument(oss.str());
296    }
297#if YAT_HAVE_HTSLIB
298    if (compression)
299      mode.push_back('0'+compression);
300    else
301      mode = "wu";
302    open_base(fn, mode, NULL);
303    sam_hdr_write(sf_, h.header_);
304#else
305    mode.push_back('0'+compression);
306    open_base(fn, mode, h.header_);
307#endif
308  }
309
310
311  void OutBamFile::write(const BamRead& read)
312  {
313#if YAT_HAVE_HTSLIB
314    assert(read.bam_);
315    if (is_open() && bam_write1(sf_->fp.bgzf, read.bam_)>0)
316      return;
317#else
318    if (samwrite(sf_, read.bam_)>0)
319      return;
320#endif
321    throw OutBamFile::error(read);
322  }
323
324
325  // OutBamFile::error
326
327  OutBamFile::error::error(const BamRead& r)
328    : IO_error(""), read_(r) {}
329
330
331  OutBamFile::error::~error(void) throw() {}
332
333
334  const BamRead& OutBamFile::error::read(void) const
335  { return read_; }
336
337}}}
Note: See TracBrowser for help on using the repository browser.