source: trunk/yat/omic/Pileup.h @ 3332

Last change on this file since 3332 was 3332, checked in by Peter, 7 years ago

In Pileup store reads in a std::list (rather than std::deque) to allow
easy erase as soon as we are one passed last base. Erasing reads
immediately implies we don't need a filter_iterator, but can use
list::const_iterator directly.

Fix a bug in Pileup::Entry, which held a BamRead? and CigarIterator? and
the latter is essentially a uint32_t* that is returned from
BamRead::cigar(). This caused problems in copy/assignment since the
BamRead? was copied by value and the CigarIterator? didn't point to
BamRead? in this class but to BamRead? in rhs. The problem was solved by
replacing the BamRead? by a shared_ptr<BamRead?>.

refs #806

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.4 KB
Line 
1#ifndef theplu_yat_omic_pileup
2#define theplu_yat_omic_pileup
3
4// $Id: Pileup.h 3332 2014-10-23 11:18:15Z peter $
5
6/*
7  Copyright (C) 2014 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "BamRead.h"
26#include "CigarIterator.h"
27
28#include "yat/utility/yat_assert.h"
29
30#include <boost/shared_ptr.hpp>
31
32#include <algorithm>
33#include <list>
34
35namespace theplu {
36namespace yat {
37namespace omic {
38
39  /**
40     \since new in yat 0.13
41   */
42  template<typename Iterator>
43  class Pileup
44  {
45  public:
46    /**
47       Essentially a BamRead but with additional information regarding
48       base we currently pointing to
49     */
50    class Entry
51    {
52    public:
53      /**
54         \brief default constructor
55       */
56      Entry(void) {}
57
58      /**
59         Create an Entry with read \a b
60       */
61      Entry(const BamRead& b);
62
63      /**
64         \return const reference to BamRead
65       */
66      const BamRead& bam(void) const
67      {
68        YAT_ASSERT(bam_.get());
69        return *bam_;
70      }
71
72      /**
73         \return cigar operation at current position
74       */
75      uint8_t cigar_op(void) const { return *cigar_; }
76
77
78      /**
79         \return length of current cigar element
80       */
81      uint32_t cigar_oplen(void) const
82      {
83        YAT_ASSERT(bam_.get());
84        YAT_ASSERT(cigar_.base() >= bam_->cigar());
85        return bam_cigar_oplen(*cigar_.base());
86      }
87
88
89      /**
90         \return base quality of current position
91       */
92      unsigned short qual(void) const
93      {
94        YAT_ASSERT(bam_.get());
95        YAT_ASSERT(qpos_ < bam_->sequence_length());
96        return bam_->qual(qpos_);
97      }
98
99      /**
100         \return base of current position
101       */
102      uint8_t sequence(void) const;
103
104      /**
105         \brief iterate to next position with coverage
106
107         If any read has an insertion here, next position will be a
108         skip_ref locus; otherwise we iterate to next locus on
109         reference that has coverage.
110       */
111      // FIXME should this be public?
112      void increment(void);
113
114      /**
115         \return true if this position was deleted in this query
116       */
117      bool is_del(void) const
118      {
119        return cigar_type()==2;
120      }
121
122      /**
123         \return index of base at this position
124       */
125      size_t qpos(void) const { return qpos_; }
126    private:
127      boost::shared_ptr<BamRead> bam_;
128      // index of base pointed to
129      size_t qpos_;
130      CigarIterator cigar_;
131      size_t skip_;
132      // return 0, 1, 2, or 3 see bam_cigar_type in yat/utility/Cigar.h
133      uint8_t cigar_type(void) const { return bam_cigar_type(cigar_op()); }
134      friend class Pileup;
135    };
136
137  public:
138    /**
139       Const iterator that can be used to iterate over reads that
140       overlap with current position (as deined by tid() and pos()).
141     */
142    typedef typename std::list<Entry>::const_iterator const_iterator;
143
144    /**
145       Create an empty Pileup
146     */
147    Pileup(void) {}
148
149    /**
150       Create a Pileup that will use reads in range [first, last)
151     */
152    Pileup(Iterator first, Iterator last);
153
154    /**
155       \return an iterator that points to the first Entry in Pileup.
156
157       Iteration is done in same order as range provided in
158       constructor. Reads that do not overlap with current position
159       (tid:pos) are ignored.
160     */
161    const_iterator begin(void) const;
162
163    /**
164       \return an iterator that points one past the last element in
165       the Pileup.
166     */
167    const_iterator end(void) const;
168
169    /**
170       \return true if there are more positions to increment into
171     */
172    bool good(void) const { return !(bam_ == bam_end_ && data_.empty()); }
173
174    /**
175       \brief step to next position with coverage
176     */
177    void increment(void);
178
179    /**
180       \return template id as defined in bam file header
181     */
182    int32_t tid(void) const { return tid_; }
183
184    /**
185       \return position
186     */
187    int32_t pos(void) const { return pos_; }
188
189    /**
190       \return true if position does not exist in reference, i.e.,
191       inserted region.
192     */
193    bool skip_ref(void) const;
194  private:
195    typedef typename std::list<Entry>::iterator iterator;
196
197    Iterator bam_;
198    Iterator bam_end_;
199    int32_t pos_;
200    int32_t tid_;
201    std::list<Entry> data_;
202    uint32_t skip_ref_;
203  };
204
205
206  /**
207     \relates Pileup
208
209     \since new in yat 0.13
210   */
211  template<typename Iterator>
212  Pileup<Iterator> make_pileup(Iterator first, Iterator last)
213  {
214    return Pileup<Iterator>(first, last);
215  }
216
217  // implementations
218
219  template<typename Iterator>
220  Pileup<Iterator>::Pileup(Iterator first, Iterator last)
221    : bam_(first), bam_end_(last), pos_(0), tid_(0), skip_ref_(0)
222  {
223    increment();
224  }
225
226
227  template<typename Iterator>
228  typename Pileup<Iterator>::const_iterator Pileup<Iterator>::begin(void) const
229  {
230    return data_.begin();
231  }
232
233
234  template<typename Iterator>
235  typename Pileup<Iterator>::const_iterator Pileup<Iterator>::end(void) const
236  {
237    return data_.end();
238  }
239
240
241  template<typename Iterator>
242  void Pileup<Iterator>::increment(void)
243  {
244    if (data_.empty()) {
245      if (bam_==bam_end_)
246        return;
247      tid_ = bam_->core().tid;
248      pos_ = bam_->core().pos;
249    }
250    else {
251
252      // in insertion
253      if (skip_ref()) {
254        for (iterator it = data_.begin(); it!=data_.end(); ++it) {
255          if (it->cigar_type() == 1) {
256            it->increment();
257          }
258          --it->skip_;
259        }
260        --skip_ref_;
261      }
262      // walk one base forward on reference
263      else {
264        ++pos_;
265        typename std::list<Entry>::iterator read = data_.begin();
266        while (read != data_.end()) {
267          read->increment();
268          // remove entry if read is entirely left of pos
269          if (read->bam().end() <= pos_) { // FIXME speedup possible
270            data_.erase(read++);
271            continue;
272          }
273          // check if we're stepping into insertion
274          if (read->cigar_op() == BAM_CINS) {
275            skip_ref_ = std::max(skip_ref_, read->cigar_oplen());
276          }
277          ++read;
278        }
279
280        // stepped into an insertion
281        if (skip_ref_)
282          for (iterator iter=data_.begin(); iter!=data_.end(); ++iter)
283            iter->skip_ = skip_ref_;
284      }
285    }
286
287    // read data
288    while (bam_!=bam_end_ && bam_->core().tid==tid_ && bam_->pos()<=pos_) {
289      // skip unmapped reads
290      if (!(bam_->core().flag & BAM_FUNMAP)) {
291        data_.push_back(Entry(*bam_));
292      }
293      ++bam_;
294    }
295  }
296
297
298  template<typename Iterator>
299  bool Pileup<Iterator>::skip_ref(void) const
300  {
301    return skip_ref_;
302  }
303
304
305  template<typename Iterator>
306  Pileup<Iterator>::Entry::Entry(const BamRead& b)
307    : bam_(new BamRead(b)), qpos_(0), cigar_(bam_->cigar()), skip_(0)
308  {
309    YAT_ASSERT(cigar_.base() == bam_->cigar());
310    if (bam_->core().n_cigar==0)
311      return;
312    // iterate cigar to first match
313    while (qpos_ < bam_->sequence_length() && cigar_type() != 3) {
314      // type is either 0 (hardclipped) or 1 (softclipped)
315      YAT_ASSERT(cigar_type()!=2);
316      if (cigar_type() == 1)
317        ++qpos_;
318      ++cigar_;
319    }
320  }
321
322
323  template<typename Iterator>
324  void Pileup<Iterator>::Entry::increment(void)
325  {
326    YAT_ASSERT(cigar_.base() >= bam_->cigar());
327    if (cigar_type() & 1)
328      ++qpos_;
329    ++cigar_;
330    YAT_ASSERT(cigar_.base() >= bam_->cigar());
331  }
332
333
334  template<typename Iterator>
335  uint8_t Pileup<Iterator>::Entry::sequence(void) const
336  {
337    if (skip_==0) {
338      if (cigar_type() & 1)
339        return bam_->sequence(qpos_);
340      return 0;
341    }
342    if (cigar_op()!=BAM_CINS)
343      return 0;
344    return bam_->sequence(qpos_);
345  }
346
347}}}
348#endif
Note: See TracBrowser for help on using the repository browser.