source: trunk/yat/omic/BamHeader.cc @ 3410

Last change on this file since 3410 was 3410, checked in by Peter, 8 years ago

add two new functions read_group and program_group to access data in BamHeader?. closes #817

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 6.8 KB
Line 
1// $Id: BamHeader.cc 3410 2015-04-21 23:05:43Z peter $
2
3/*
4  Copyright (C) 2012, 2013, 2014 Peter Johansson
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include <config.h>
23
24#include "BamHeader.h"
25
26#include "yat/utility/Exception.h"
27#include "yat/utility/split.h"
28
29// we need to include 'sam_header.h' when compiling against libbam
30#ifndef YAT_HAVE_HTSLIB
31  #if YAT_HAVE_SAM_H
32  #include <sam_header.h>
33  #elif YAT_HAVE_BAM_BAM_H
34  #include <bam/sam_header.h>
35  #elif YAT_HAVE_SAMTOOLS_SAM_H
36  #include <samtools/sam_header.h>
37  #else
38  #error cannot end up here
39  #endif
40#endif
41
42#include <cassert>
43#include <cstdlib>
44#include <cstring>
45#include <string>
46#include <sstream>
47#include <utility>
48
49namespace theplu {
50namespace yat {
51namespace omic {
52
53  BamHeader::BamHeader(void)
54    : header_(NULL)
55  {
56  }
57
58
59  BamHeader::~BamHeader(void)
60  {
61#if YAT_HAVE_HTSLIB
62    bam_hdr_destroy(header_);
63#else
64    bam_header_destroy(header_);
65#endif
66  }
67
68
69  BamHeader::BamHeader(const BamHeader& other)
70    : read_group_(other.read_group_), program_group_(other.program_group_)
71  {
72#ifndef YAT_HAVE_HTSLIB
73    using namespace detail; // for bam_hdr_dup in YAT_HAVE_LIBBAM mode
74#endif
75    header_ = bam_hdr_dup(other.header_);
76  }
77
78
79  const std::string& BamHeader::group(strMap2& table,const std::string& type,
80                                      const std::string& id,
81                                      const std::string& key) const
82  {
83    assert(type.size()==2);
84    if (table.empty()) { // parse header
85      std::istringstream ss(text());
86      std::string line;
87      while (getline(ss, line)) {
88        if (line.substr(1,2)!=type)
89          continue;
90        // find ID value
91        size_t pos = line.find("\tID:", 3);
92        size_t end = line.find('\t', pos+4);
93        std::string id_val = line.substr(pos+4, end-(pos+4));
94        std::vector<std::string> vec;
95        utility::split(vec, line, '\t');
96        strMap& m = table[id_val];
97        for (size_t i=1; i<vec.size(); ++i) {
98          m[vec[i].substr(0,2)] = vec[i].substr(3);
99        }
100      }
101      if (table.empty())
102        table[""][""]; // to avoid re-parsing
103    }
104    return table[id][key];
105  }
106
107
108  void BamHeader::parse_region(const std::string& reg, int& id, int& begin,
109                               int& end) const
110  {
111    assert(header_);
112#if YAT_HAVE_HTSLIB
113    const char* b = reg.c_str();
114    const char* e = hts_parse_reg(b, &begin, &end);
115    // If begin > end suggests something is wrong and that is also how
116    // bam_parse_region (below in samtools impl) used to detect error
117    // and return non-zero.
118    if (begin<=end) {
119      std::string chr = reg.substr(0, e-b);
120      try {
121        id = tid(chr);
122        return;
123      }
124      catch (utility::runtime_error& e) {
125        ; // throw below instead
126      }
127    }
128#else
129    if (!bam_parse_region(header_, reg.c_str(), &id, &begin, &end))
130      return;
131#endif
132    std::ostringstream ss;
133    ss << "invalid region: '" << reg << "'";
134    throw utility::runtime_error(ss.str());
135  }
136
137
138  const std::string& BamHeader::program_group(const std::string& id,
139                                              const std::string& key) const
140  {
141    return group(program_group_, "PG", id, key);
142  }
143
144
145  const std::string& BamHeader::read_group(const std::string& id,
146                                           const std::string& key) const
147  {
148    return group(read_group_, "RG", id, key);
149  }
150
151
152  void BamHeader::swap(BamHeader& other)
153  {
154    std::swap(header_, other.header_);
155    std::swap(read_group_, other.read_group_);
156    std::swap(program_group_, other.program_group_);
157  }
158
159
160  uint32_t BamHeader::target_length(size_t tid) const
161  {
162    assert(tid < static_cast<size_t>(n_targets()));
163    return header_->target_len[tid];
164  }
165
166
167  const char* BamHeader::target_name(size_t tid) const
168  {
169    assert(tid < static_cast<size_t>(n_targets()));
170    return header_->target_name[tid];
171  }
172
173
174  std::string BamHeader::text(void) const
175  {
176    assert(header_);
177    return std::string(header_->text, header_->l_text);
178  }
179
180
181  void BamHeader::text(const std::string& txt)
182  {
183    read_group_.clear();
184    program_group_.clear();
185#ifdef YAT_HAVE_HTSLIB
186    // destroy the old
187    bam_hdr_destroy(header_);
188    // and create header pointer
189    header_ = sam_hdr_parse(txt.size(), txt.c_str());
190#endif
191    header_->l_text = txt.size();
192    header_->text = (char*)realloc(header_->text, txt.size()+1);
193    memcpy(header_->text, txt.c_str(), txt.size()+1);
194#ifndef YAT_HAVE_HTSLIB
195    // destroy dict to ensure that things are updated during parsing
196    if (header_->dict) {
197      sam_header_free(header_->dict);
198      header_->dict = 0;
199    }
200    sam_header_parse(header_);
201#endif
202  }
203
204
205  int32_t BamHeader::tid(const std::string& name) const
206  {
207#if YAT_HAVE_HTSLIB
208    int res = bam_name2id(header_, name.c_str());
209    if (res==-1) {
210      std::string msg("invalid segment name:");
211      msg += name;
212      throw utility::runtime_error(msg);
213    }
214    return res;
215#else
216    if (!header_->hash) {
217      // We would like to call something like bam_init_header_hash,
218      // but unfortunately that function is not defined in any header
219      // (only in two source files). Instead we call bam_parse_region
220      // because a documented side-effect of that function is that
221      // hash get populated.
222      int tid, b, e;
223      bam_parse_region(header_, name.c_str(), &tid, &b, &e);
224      assert(header_->hash);
225      assert(tid == bam_get_tid(header_, name.c_str()));
226      return tid;
227    }
228    return bam_get_tid(header_, name.c_str());
229#endif
230  }
231
232
233  int32_t BamHeader::n_targets(void) const
234  {
235    assert(header_);
236    return header_->n_targets;
237  }
238
239
240  void swap(BamHeader& lhs, BamHeader& rhs)
241  {
242    lhs.swap(rhs);
243  }
244
245
246  BamHeader& BamHeader::operator=(const BamHeader& rhs)
247  {
248    BamHeader tmp(rhs);
249    swap(tmp);
250    return *this;
251  }
252
253#ifndef YAT_HAVE_HTSLIB
254  namespace detail {
255    bam_header_t * bam_hdr_dup(const bam_header_t* other)
256    {
257      // taken from htslib 1.2.1 and adjusted to fit old struct bam_header_t
258      if (!other)
259        return NULL;
260      bam_header_t* h = bam_header_init();
261      if (!h)
262        return NULL;
263      // copy stuff
264      h->n_targets = other->n_targets;
265      h->l_text = other->l_text;
266      h->n_text = other->n_text;
267      h->dict = NULL;
268      h->hash = NULL;
269      h->rg2lib = NULL;
270
271      h->text = (char*)calloc(h->l_text + 1, 1);
272      memcpy(h->text, other->text, h->l_text);
273      h->target_len = (uint32_t*)calloc(h->n_targets, sizeof(uint32_t));
274      h->target_name = (char**)calloc(h->n_targets, sizeof(char*));
275      for (int i = 0; i < h->n_targets; ++i) {
276        h->target_len[i] = other->target_len[i];
277        h->target_name[i] = strdup(other->target_name[i]);
278      }
279      return h;
280    }
281  }
282#endif
283
284}}}
Note: See TracBrowser for help on using the repository browser.