source: trunk/yat/omic/BamFile.cc

Last change on this file was 3792, checked in by Peter, 5 months ago

update copyright years

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.7 KB
Line 
1// $Id: BamFile.cc 3792 2019-04-12 07:15:09Z peter $
2
3/*
4  Copyright (C) 2012, 2013, 2014, 2015, 2016, 2017, 2018 Peter Johansson
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with yat. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include <config.h>
23
24#include "BamFile.h"
25#include "BamHeader.h"
26#include "BamReadIterator.h"
27#include "config_bam.h"
28
29#include "yat/utility/Exception.h"
30
31#include YAT_SAM_HEADER
32
33#include <cassert>
34#include <stdexcept>
35#include <string>
36#include <sstream>
37
38// This is an ugly hack to allow us access members in private struct
39// bam_index_t. This is needed to implement faster than linear
40// versions of functions 'get_idx_stat' and
41// 'n_no_coordinate'. Stealing declarations like this is obviously
42// ugly and usually a no-no as it would break if the declaration
43// changes in future versions of libbam. Here, we waive that concern
44// as the code is only used against libbam and libbam is a project
45// with no momentum and no future versions.
46//
47// Declarations taken from bam_index.c in samtools-0.1.19
48// Copyright (c) 2008-2009 Genome Research Ltd.
49#ifndef YAT_HAVE_HTSLIB
50#if defined YAT_HAVE_BAM_H
51#  include <khash.h>
52#  include <ksort.h>
53#elif defined YAT_HAVE_BAM_BAM_H
54#  include <bam/khash.h>
55#  include <bam/ksort.h>
56#elif defined YAT_HAVE_SAMTOOLS_BAM_H
57#  include <samtools/khash.h>
58#  include <samtools/ksort.h>
59#endif
60
61#define BAM_MAX_BIN 37450 // =(8^6-1)/7+1
62
63typedef struct {
64  uint64_t u, v;
65} pair64_t;
66
67#define pair64_lt(a,b) ((a).u < (b).u)
68KSORT_INIT(off, pair64_t, pair64_lt)
69
70typedef struct {
71  uint32_t m, n;
72  pair64_t *list;
73} bam_binlist_t;
74
75typedef struct {
76  int32_t n, m;
77  uint64_t *offset;
78} bam_lidx_t;
79
80KHASH_MAP_INIT_INT(i, bam_binlist_t)
81
82struct __bam_index_t {
83  int32_t n;
84  uint64_t n_no_coor; // unmapped reads without coordinate
85  khash_t(i) **index;
86  bam_lidx_t *index2;
87};
88#endif // end samtools specific declarations
89
90
91namespace theplu {
92namespace yat {
93namespace omic {
94
95  InBamFile::InBamFile(void)
96    : super_t(), index_(NULL)
97  {}
98
99
100  InBamFile::InBamFile(const std::string& fn)
101    : super_t(), index_(NULL)
102  {
103    open(fn);
104  }
105
106
107  InBamFile::~InBamFile(void)
108  {
109#if YAT_HAVE_HTSLIB
110    hts_idx_destroy(index_);
111#else
112    bam_index_destroy(index_);
113#endif
114  }
115
116
117  void InBamFile::build_index(void) const
118  {
119    build_index_base();
120  }
121
122
123  uint64_t InBamFile::get_idx_stat(int tid, bool return_mapped) const
124  {
125    uint64_t mapped;
126    uint64_t unmapped;
127#if YAT_HAVE_HTSLIB
128    if (tid >= header().n_targets())
129      throw utility::runtime_error("InBamFile::get_idx_stat");
130    hts_idx_get_stat(index(), tid, &mapped, &unmapped);
131#else
132    index(); // ensure index_ is loaded
133    assert(index_);
134    khint_t k;
135    int i = tid;
136    khash_t(i)* h = index_->index[i];
137    k = kh_get(i, h, BAM_MAX_BIN);
138    if (k != kh_end(h)) {
139      mapped = kh_val(h, k).list[1].u;
140      unmapped = kh_val(h, k).list[1].v;
141    }
142    else {
143      mapped = 0;
144      unmapped = 0;
145    }
146#endif
147    if (return_mapped)
148      return mapped;
149    return unmapped;
150  }
151
152
153  const BamHeader& InBamFile::header(void) const
154  {
155    return header_;
156  }
157
158
159  const InBamFile::index_type* InBamFile::index(void) const
160  {
161    if (!index_) {
162#if YAT_HAVE_HTSLIB
163      index_ = hts_idx_load(filename().c_str(), HTS_FMT_BAI);
164#else
165      index_ = bam_index_load(filename().c_str());
166#endif
167      if (!index_) {
168        std::ostringstream ss;
169        ss << "cannot load bam index '" << filename() << ".bai'";
170        throw utility::runtime_error(ss.str());
171      }
172    }
173    return index_;
174  }
175
176
177  uint64_t InBamFile::n_mapped(int tid) const
178  {
179    return get_idx_stat(tid, true);
180  }
181
182
183  uint64_t InBamFile::n_unmapped(int tid) const
184  {
185    return get_idx_stat(tid, false);
186  }
187
188
189  uint64_t InBamFile::n_no_coordinate(void) const
190  {
191#if YAT_HAVE_HTSLIB
192    return hts_idx_get_n_no_coor(index());
193#else
194    index(); // ensure index_ is loaded
195    assert(index_);
196    return index_->n_no_coor;
197#endif
198  }
199
200
201  void InBamFile::open(const std::string& fn)
202  {
203#if YAT_HAVE_HTSLIB
204    open_base(fn, "rb", NULL);
205    header_.header_ = sam_hdr_read(sf_);
206#else
207    open_base(fn, "rb", NULL);
208    header_.header_ = detail::bam_hdr_dup(sf_->header);
209#endif
210    if (header_.header_ == NULL) {
211      std::ostringstream ss;
212      ss << "failed to read header from '" << filename() << "'";
213      throw utility::runtime_error(ss.str());
214    }
215  }
216
217
218  bool InBamFile::read(BamRead& read)
219  {
220#if YAT_HAVE_HTSLIB
221    int result_ = sam_read1(sf_, header_.header_, read.bam_);
222#else
223    int result_ = samread(sf_, read.bam_);
224#endif
225    if (result_<-1) {
226      std::ostringstream ss;
227      ss << "InBamFile::read(1): invalid bam file '" << filename() << "'";
228      if (result_ == -2)
229        ss << " truncated file";
230      else
231        ss << " error: " << result_;
232      throw utility::runtime_error(ss.str());
233    }
234    return result_>=0;
235  }
236
237
238#if YAT_HAVE_HTSLIB
239  bool InBamFile::read(BamRead& read, hts_itr_t* iter)
240  {
241    int result = sam_itr_next(sf_, iter, read.bam_);
242#else
243  bool InBamFile::read(BamRead& read, bam_iter_t iter)
244  {
245    assert(sf_->type & 1); // no random access from sam files
246    int result = bam_iter_read(sf_->x.bam, iter, read.bam_);
247#endif
248    if (result<-1) {
249      std::ostringstream ss;
250      ss << "BamRead::read(2): invalid bam file '" << filename() << "'";
251      if (result == -2)
252        ss << " truncated file";
253      else
254        ss << " error: " << result;
255      throw utility::runtime_error(ss.str());
256    }
257    return result>=0;
258  }
259
260
261  OutBamFile::OutBamFile(void)
262    : super_t() {}
263
264
265  OutBamFile::OutBamFile(const std::string& fn, const BamHeader& header)
266  {
267    open(fn, header);
268  }
269
270
271  OutBamFile::OutBamFile(const std::string& fn, const BamHeader& header,
272                         unsigned int c)
273  {
274    open(fn, header, c);
275  }
276
277
278  void OutBamFile::build_index(void) const
279  {
280    if (is_open()) {
281      std::ostringstream msg;
282      msg << "failed building index for '" << filename() << "': file is open";
283      throw utility::runtime_error(msg.str());
284    }
285    build_index_base();
286  }
287
288
289  void OutBamFile::open(const std::string& fn, const BamHeader& h)
290  {
291    if (!h.header_)
292      throw utility::runtime_error("OutBamFile::open with null header");
293    open(fn, std::string("wb"), h);
294  }
295
296
297  void OutBamFile::open(const std::string& fn, const BamHeader& h,
298                        unsigned int compression)
299  {
300    std::string mode("wb");
301    if (compression > 9) {
302      std::stringstream oss;
303      oss << "OutBamFile::open( " << fn << ", <header>, " << compression
304          << "): incorrect compression level\n";
305      throw std::invalid_argument(oss.str());
306    }
307#if YAT_HAVE_HTSLIB
308    if (compression)
309      mode.push_back('0'+compression);
310    else
311      mode = "wu";
312#else
313    mode.push_back('0'+compression);
314#endif
315    open(fn, mode, h);
316  }
317
318
319  void OutBamFile::open(const std::string& fn, const std::string& mode,
320                        const BamHeader& h)
321  {
322#if YAT_HAVE_HTSLIB
323    open_base(fn, mode, NULL);
324    if (sam_hdr_write(sf_, h.header_)) {
325      std::ostringstream ss;
326      ss << "failed open '" << fn << "'";
327      throw utility::runtime_error(ss.str());
328    }
329#else
330    open_base(fn, mode, h.header_);
331#endif
332  }
333
334
335  void OutBamFile::write(const BamRead& read)
336  {
337#if YAT_HAVE_HTSLIB
338    assert(read.bam_);
339    if (is_open() && bam_write1(sf_->fp.bgzf, read.bam_)>0)
340      return;
341#else
342    if (samwrite(sf_, read.bam_)>0)
343      return;
344#endif
345    throw OutBamFile::error(read);
346  }
347
348
349  // OutBamFile::error
350
351  OutBamFile::error::error(const BamRead& r)
352    : IO_error("OutBamFile::write failed"), read_(r) {}
353
354
355  OutBamFile::error::~error(void) throw() {}
356
357
358  const BamRead& OutBamFile::error::read(void) const
359  { return read_; }
360
361}}}
Note: See TracBrowser for help on using the repository browser.