source: trunk/yat/omic/BamFile.cc @ 3579

Last change on this file since 3579 was 3579, checked in by Peter, 5 years ago

merge release 0.14 into trunk

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.2 KB
Line 
1// $Id: BamFile.cc 3579 2017-01-16 03:54:43Z peter $
2
3/*
4  Copyright (C) 2012, 2013, 2014, 2015, 2016 Peter Johansson
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include <config.h>
23
24#include "BamFile.h"
25#include "BamHeader.h"
26#include "BamReadIterator.h"
27#include "config_bam.h"
28
29#include "yat/utility/Exception.h"
30
31#include YAT_SAM_HEADER
32
33#include <cassert>
34#include <stdexcept>
35#include <string>
36#include <sstream>
37
38// This is an ugly hack to allow us access members in private struct
39// bam_index_t. This is needed to implement faster than linear
40// versions of functions 'get_idx_stat' and
41// 'n_no_coordinate'. Stealing declarations like this is obviously
42// ugly and usually a no-no as it would break if the declaration
43// changes in future versions of libbam. Here, we waive that concern
44// as the code is only used against libbam and libbam is a project
45// with no momentum and no future versions.
46//
47// Declarations taken from bam_index.c in samtools-0.1.19
48// Copyright (c) 2008-2009 Genome Research Ltd.
49#ifndef YAT_HAVE_HTSLIB
50#if defined YAT_HAVE_BAM_H
51#  include <khash.h>
52#  include <ksort.h>
53#elif defined YAT_HAVE_BAM_BAM_H
54#  include <bam/khash.h>
55#  include <bam/ksort.h>
56#elif defined YAT_HAVE_SAMTOOLS_BAM_H
57#  include <samtools/khash.h>
58#  include <samtools/ksort.h>
59#endif
60
61#define BAM_MAX_BIN 37450 // =(8^6-1)/7+1
62
63typedef struct {
64  uint64_t u, v;
65} pair64_t;
66
67#define pair64_lt(a,b) ((a).u < (b).u)
68KSORT_INIT(off, pair64_t, pair64_lt)
69
70typedef struct {
71  uint32_t m, n;
72  pair64_t *list;
73} bam_binlist_t;
74
75typedef struct {
76  int32_t n, m;
77  uint64_t *offset;
78} bam_lidx_t;
79
80KHASH_MAP_INIT_INT(i, bam_binlist_t)
81
82struct __bam_index_t {
83  int32_t n;
84  uint64_t n_no_coor; // unmapped reads without coordinate
85  khash_t(i) **index;
86  bam_lidx_t *index2;
87};
88#endif // end samtools specific declarations
89
90
91namespace theplu {
92namespace yat {
93namespace omic {
94
95  InBamFile::InBamFile(void)
96    : super_t(), index_(NULL)
97  {}
98
99
100  InBamFile::InBamFile(const std::string& fn)
101    : super_t(), index_(NULL)
102  {
103    open(fn);
104  }
105
106
107  InBamFile::~InBamFile(void)
108  {
109#if YAT_HAVE_HTSLIB
110    hts_idx_destroy(index_);
111#else
112    bam_index_destroy(index_);
113#endif
114  }
115
116
117  uint64_t InBamFile::get_idx_stat(int tid, bool return_mapped) const
118  {
119    uint64_t mapped;
120    uint64_t unmapped;
121#if YAT_HAVE_HTSLIB
122    if (hts_idx_get_stat(index(), tid, &mapped, &unmapped)) {
123      throw utility::runtime_error("InBamFile::get_idx_stat");
124    }
125#else
126    index(); // ensure index_ is loaded
127    assert(index_);
128    khint_t k;
129    int i = tid;
130    khash_t(i)* h = index_->index[i];
131    k = kh_get(i, h, BAM_MAX_BIN);
132    if (k != kh_end(h)) {
133      mapped = kh_val(h, k).list[1].u;
134      unmapped = kh_val(h, k).list[1].v;
135    }
136    else {
137      mapped = 0;
138      unmapped = 0;
139    }
140#endif
141    if (return_mapped)
142      return mapped;
143    return unmapped;
144  }
145
146
147  const BamHeader& InBamFile::header(void) const
148  {
149    return header_;
150  }
151
152
153  const InBamFile::index_type* InBamFile::index(void) const
154  {
155    if (!index_) {
156#if YAT_HAVE_HTSLIB
157      index_ = hts_idx_load(filename().c_str(), HTS_FMT_BAI);
158#else
159      index_ = bam_index_load(filename().c_str());
160#endif
161      if (!index_) {
162        std::ostringstream ss;
163        ss << "cannot load bam index '" << filename() << ".bai'";
164        throw utility::runtime_error(ss.str());
165      }
166    }
167    return index_;
168  }
169
170
171  uint64_t InBamFile::n_mapped(int tid) const
172  {
173    return get_idx_stat(tid, true);
174  }
175
176
177  uint64_t InBamFile::n_unmapped(int tid) const
178  {
179    return get_idx_stat(tid, false);
180  }
181
182
183  uint64_t InBamFile::n_no_coordinate(void) const
184  {
185#if YAT_HAVE_HTSLIB
186    return hts_idx_get_n_no_coor(index());
187#else
188    index(); // ensure index_ is loaded
189    assert(index_);
190    return index_->n_no_coor;
191#endif
192  }
193
194
195  void InBamFile::open(const std::string& fn)
196  {
197#if YAT_HAVE_HTSLIB
198    open_base(fn, "rb", NULL);
199    header_.header_ = sam_hdr_read(sf_);
200#else
201    open_base(fn, "rb", NULL);
202    header_.header_ = detail::bam_hdr_dup(sf_->header);
203#endif
204    if (header_.header_ == NULL) {
205      std::ostringstream ss;
206      ss << "failed to read header from '" << filename() << "'";
207      throw utility::runtime_error(ss.str());
208    }
209  }
210
211
212  bool InBamFile::read(BamRead& read)
213  {
214#if YAT_HAVE_HTSLIB
215    int result_ = sam_read1(sf_, header_.header_, read.bam_);
216#else
217    int result_ = samread(sf_, read.bam_);
218#endif
219    if (result_<-1) {
220      std::ostringstream ss;
221      ss << "InBamFile::read(1): invalid bam file '" << filename() << "'";
222      if (result_ == -2)
223        ss << " truncated file";
224      else
225        ss << " error: " << result_;
226      throw utility::runtime_error(ss.str());
227    }
228    return result_>=0;
229  }
230
231
232#if YAT_HAVE_HTSLIB
233  bool InBamFile::read(BamRead& read, hts_itr_t* iter)
234  {
235    int result = sam_itr_next(sf_, iter, read.bam_);
236#else
237  bool InBamFile::read(BamRead& read, bam_iter_t iter)
238  {
239    assert(sf_->type & 1); // no random access from sam files
240    int result = bam_iter_read(sf_->x.bam, iter, read.bam_);
241#endif
242    if (result<-1) {
243      std::ostringstream ss;
244      ss << "read(2): invalid bam file '" << filename() << "'";
245      if (result == -2)
246        ss << " truncated file";
247      else
248        ss << " error: " << result;
249      throw utility::runtime_error(ss.str());
250    }
251    return result>=0;
252  }
253
254
255  OutBamFile::OutBamFile(void)
256    : super_t() {}
257
258
259  OutBamFile::OutBamFile(const std::string& fn, const BamHeader& header)
260  {
261    open(fn, header);
262  }
263
264
265  OutBamFile::OutBamFile(const std::string& fn, const BamHeader& header,
266                         unsigned int c)
267  {
268    open(fn, header, c);
269  }
270
271
272  void OutBamFile::open(const std::string& fn, const BamHeader& h)
273  {
274    if (!h.header_)
275      throw utility::runtime_error("OutBamFile::open with null header");
276#if YAT_HAVE_HTSLIB
277    open_base(fn, "wb", NULL);
278    sam_hdr_write(sf_, h.header_);
279#else
280    open_base(fn, "wb", h.header_);
281#endif
282  }
283
284
285  void OutBamFile::open(const std::string& fn, const BamHeader& h,
286                        unsigned int compression)
287  {
288    std::string mode("wb");
289    if (compression > 9) {
290      std::stringstream oss;
291      oss << "OutBamFile::open( " << fn << ", <header>, " << compression
292          << "): incorrect compression level\n";
293      throw std::invalid_argument(oss.str());
294    }
295#if YAT_HAVE_HTSLIB
296    if (compression)
297      mode.push_back('0'+compression);
298    else
299      mode = "wu";
300    open_base(fn, mode, NULL);
301    sam_hdr_write(sf_, h.header_);
302#else
303    mode.push_back('0'+compression);
304    open_base(fn, mode, h.header_);
305#endif
306  }
307
308
309  void OutBamFile::write(const BamRead& read)
310  {
311#if YAT_HAVE_HTSLIB
312    assert(read.bam_);
313    if (is_open() && bam_write1(sf_->fp.bgzf, read.bam_)>0)
314      return;
315#else
316    if (samwrite(sf_, read.bam_)>0)
317      return;
318#endif
319    throw OutBamFile::error(read);
320  }
321
322
323  // OutBamFile::error
324
325  OutBamFile::error::error(const BamRead& r)
326    : IO_error(""), read_(r) {}
327
328
329  OutBamFile::error::~error(void) throw() {}
330
331
332  const BamRead& OutBamFile::error::read(void) const
333  { return read_; }
334
335}}}
Note: See TracBrowser for help on using the repository browser.