source: trunk/yat/omic/BamHeader.cc @ 3412

Last change on this file since 3412 was 3412, checked in by Peter, 4 years ago

throw exception rather than returnining empty string when ID or KEY is missing in header. refs #817

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.1 KB
Line 
1// $Id: BamHeader.cc 3412 2015-04-22 23:00:30Z peter $
2
3/*
4  Copyright (C) 2012, 2013, 2014 Peter Johansson
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include <config.h>
23
24#include "BamHeader.h"
25
26#include "yat/utility/Exception.h"
27#include "yat/utility/split.h"
28#include "yat/utility/stl_utility.h"
29
30// we need to include 'sam_header.h' when compiling against libbam
31#ifndef YAT_HAVE_HTSLIB
32  #if YAT_HAVE_SAM_H
33  #include <sam_header.h>
34  #elif YAT_HAVE_BAM_BAM_H
35  #include <bam/sam_header.h>
36  #elif YAT_HAVE_SAMTOOLS_SAM_H
37  #include <samtools/sam_header.h>
38  #else
39  #error cannot end up here
40  #endif
41#endif
42
43#include <cassert>
44#include <cstdlib>
45#include <cstring>
46#include <string>
47#include <sstream>
48#include <utility>
49
50namespace theplu {
51namespace yat {
52namespace omic {
53
54  BamHeader::BamHeader(void)
55    : header_(NULL)
56  {
57  }
58
59
60  BamHeader::~BamHeader(void)
61  {
62#if YAT_HAVE_HTSLIB
63    bam_hdr_destroy(header_);
64#else
65    bam_header_destroy(header_);
66#endif
67  }
68
69
70  BamHeader::BamHeader(const BamHeader& other)
71    : read_group_(other.read_group_), program_group_(other.program_group_)
72  {
73#ifndef YAT_HAVE_HTSLIB
74    using namespace detail; // for bam_hdr_dup in YAT_HAVE_LIBBAM mode
75#endif
76    header_ = bam_hdr_dup(other.header_);
77  }
78
79
80  const std::string& BamHeader::group(strMap2& table,const std::string& type,
81                                      const std::string& id,
82                                      const std::string& key) const
83  {
84    assert(type.size()==2);
85    if (table.empty()) { // parse header
86      std::istringstream ss(text());
87      std::string line;
88      while (getline(ss, line)) {
89        if (line.substr(1,2)!=type)
90          continue;
91        // find ID value
92        size_t pos = line.find("\tID:", 3);
93        size_t end = line.find('\t', pos+4);
94        std::string id_val = line.substr(pos+4, end-(pos+4));
95        std::vector<std::string> vec;
96        utility::split(vec, line, '\t');
97        strMap& m = table[id_val];
98        for (size_t i=1; i<vec.size(); ++i) {
99          m[vec[i].substr(0,2)] = vec[i].substr(3);
100        }
101      }
102    }
103    strMap2::const_iterator line_table = table.find(id);
104    if (line_table == table.end()) {
105      std::string msg = "@" + type + ": unknown ID: '" + id + "'";
106      throw utility::runtime_error(msg);
107    }
108    strMap::const_iterator it = line_table->second.find(key);
109    if (it == line_table->second.end()) {
110      std::string msg = "@" + type + ": unknown KEY: '" + key + "'";
111      throw utility::runtime_error(msg);
112    }
113    return it->second;
114  }
115
116
117  void BamHeader::parse_region(const std::string& reg, int& id, int& begin,
118                               int& end) const
119  {
120    assert(header_);
121#if YAT_HAVE_HTSLIB
122    const char* b = reg.c_str();
123    const char* e = hts_parse_reg(b, &begin, &end);
124    // If begin > end suggests something is wrong and that is also how
125    // bam_parse_region (below in samtools impl) used to detect error
126    // and return non-zero.
127    if (begin<=end) {
128      std::string chr = reg.substr(0, e-b);
129      try {
130        id = tid(chr);
131        return;
132      }
133      catch (utility::runtime_error& e) {
134        ; // throw below instead
135      }
136    }
137#else
138    if (!bam_parse_region(header_, reg.c_str(), &id, &begin, &end))
139      return;
140#endif
141    std::ostringstream ss;
142    ss << "invalid region: '" << reg << "'";
143    throw utility::runtime_error(ss.str());
144  }
145
146
147  const std::string& BamHeader::program_group(const std::string& id,
148                                              const std::string& key) const
149  {
150    return group(program_group_, "PG", id, key);
151  }
152
153
154  const std::string& BamHeader::read_group(const std::string& id,
155                                           const std::string& key) const
156  {
157    return group(read_group_, "RG", id, key);
158  }
159
160
161  void BamHeader::swap(BamHeader& other)
162  {
163    std::swap(header_, other.header_);
164    std::swap(read_group_, other.read_group_);
165    std::swap(program_group_, other.program_group_);
166  }
167
168
169  uint32_t BamHeader::target_length(size_t tid) const
170  {
171    assert(tid < static_cast<size_t>(n_targets()));
172    return header_->target_len[tid];
173  }
174
175
176  const char* BamHeader::target_name(size_t tid) const
177  {
178    assert(tid < static_cast<size_t>(n_targets()));
179    return header_->target_name[tid];
180  }
181
182
183  std::string BamHeader::text(void) const
184  {
185    assert(header_);
186    return std::string(header_->text, header_->l_text);
187  }
188
189
190  void BamHeader::text(const std::string& txt)
191  {
192    read_group_.clear();
193    program_group_.clear();
194#ifdef YAT_HAVE_HTSLIB
195    // destroy the old
196    bam_hdr_destroy(header_);
197    // and create header pointer
198    header_ = sam_hdr_parse(txt.size(), txt.c_str());
199#endif
200    header_->l_text = txt.size();
201    header_->text = (char*)realloc(header_->text, txt.size()+1);
202    memcpy(header_->text, txt.c_str(), txt.size()+1);
203#ifndef YAT_HAVE_HTSLIB
204    // destroy dict to ensure that things are updated during parsing
205    if (header_->dict) {
206      sam_header_free(header_->dict);
207      header_->dict = 0;
208    }
209    sam_header_parse(header_);
210#endif
211  }
212
213
214  int32_t BamHeader::tid(const std::string& name) const
215  {
216#if YAT_HAVE_HTSLIB
217    int res = bam_name2id(header_, name.c_str());
218    if (res==-1) {
219      std::string msg("invalid segment name:");
220      msg += name;
221      throw utility::runtime_error(msg);
222    }
223    return res;
224#else
225    if (!header_->hash) {
226      // We would like to call something like bam_init_header_hash,
227      // but unfortunately that function is not defined in any header
228      // (only in two source files). Instead we call bam_parse_region
229      // because a documented side-effect of that function is that
230      // hash get populated.
231      int tid, b, e;
232      bam_parse_region(header_, name.c_str(), &tid, &b, &e);
233      assert(header_->hash);
234      assert(tid == bam_get_tid(header_, name.c_str()));
235      return tid;
236    }
237    return bam_get_tid(header_, name.c_str());
238#endif
239  }
240
241
242  int32_t BamHeader::n_targets(void) const
243  {
244    assert(header_);
245    return header_->n_targets;
246  }
247
248
249  void swap(BamHeader& lhs, BamHeader& rhs)
250  {
251    lhs.swap(rhs);
252  }
253
254
255  BamHeader& BamHeader::operator=(const BamHeader& rhs)
256  {
257    BamHeader tmp(rhs);
258    swap(tmp);
259    return *this;
260  }
261
262#ifndef YAT_HAVE_HTSLIB
263  namespace detail {
264    bam_header_t * bam_hdr_dup(const bam_header_t* other)
265    {
266      // taken from htslib 1.2.1 and adjusted to fit old struct bam_header_t
267      if (!other)
268        return NULL;
269      bam_header_t* h = bam_header_init();
270      if (!h)
271        return NULL;
272      // copy stuff
273      h->n_targets = other->n_targets;
274      h->l_text = other->l_text;
275      h->n_text = other->n_text;
276      h->dict = NULL;
277      h->hash = NULL;
278      h->rg2lib = NULL;
279
280      h->text = (char*)calloc(h->l_text + 1, 1);
281      memcpy(h->text, other->text, h->l_text);
282      h->target_len = (uint32_t*)calloc(h->n_targets, sizeof(uint32_t));
283      h->target_name = (char**)calloc(h->n_targets, sizeof(char*));
284      for (int i = 0; i < h->n_targets; ++i) {
285        h->target_len[i] = other->target_len[i];
286        h->target_name[i] = strdup(other->target_name[i]);
287      }
288      return h;
289    }
290  }
291#endif
292
293}}}
Note: See TracBrowser for help on using the repository browser.