source: trunk/yat/omic/Pileup.h @ 3327

Last change on this file since 3327 was 3327, checked in by Peter, 7 years ago

redefine Pileup::const_iterator so it avoids reads that do not overlap current position. refs #806

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 8.4 KB
Line 
1#ifndef theplu_yat_omic_pileup
2#define theplu_yat_omic_pileup
3
4// $Id: Pileup.h 3327 2014-10-10 08:30:20Z peter $
5
6/*
7  Copyright (C) 2012, 2013, 2014 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "BamRead.h"
26
27#include "yat/utility/yat_assert.h"
28
29#include <boost/iterator/filter_iterator.hpp>
30
31#include <algorithm>
32#include <deque>
33
34namespace theplu {
35namespace yat {
36namespace omic {
37
38  /**
39     \since new in yat 0.13
40   */
41  template<typename Iterator>
42  class Pileup
43  {
44  public:
45    /**
46       Essentially a BamRead but with additional information regarding
47       base we currently pointing to
48     */
49    class Entry
50    {
51    public:
52      /**
53         Create an Entry with first read \a b
54       */
55      Entry(const yat::omic::BamRead& b);
56
57      /**
58         \return const ref to BamRead
59       */
60      const yat::omic::BamRead& bam(void) const { return bam_; }
61
62      /**
63         \return cigar element at current position
64       */
65      uint32_t cigar(void) const
66      {
67        YAT_ASSERT(c_index_ < bam_.core().n_cigar);
68        return bam_.cigar(c_index_);
69      }
70
71      /**
72         \return base quality of current position
73       */
74      unsigned short qual(void) const
75      {
76        YAT_ASSERT(qpos_ < bam_.sequence_length());
77        return bam_.qual(qpos_);
78      }
79
80      /**
81         \return base of current position
82       */
83      uint8_t sequence(void) const;
84
85      /**
86         \brief iterate to next position with coverage
87
88         If any read has an insertion here, next position will be a
89         skip_ref locus; otherwise we iterate to next locus on
90         reference that has coverage.
91       */
92      // FIXME should this be public?
93      void increment(void);
94
95      /**
96         \return true if beyond last (aligned) base, i.e., we don't
97         count clipped bases
98       */
99      bool end_of_sequence(void) const
100      {
101        return qpos_ >= bam_.sequence_length();
102      }
103
104      /**
105         \return true if this position was deleted in this query
106       */
107      bool is_del(void) const
108      {
109        YAT_ASSERT(c_index_ < bam_.core().n_cigar);
110        return bam_cigar_type(cigar())==2;
111      }
112
113      /**
114         \return index of base at this position
115       */
116      size_t qpos(void) const { return qpos_; }
117    private:
118      yat::omic::BamRead bam_;
119      // index of base pointed to
120      size_t qpos_;
121      size_t c_index_;
122      size_t c_index2_;
123      size_t skip_;
124      void step_cigar(void);
125      uint32_t cigar_op(void) const
126      {
127        return bam_cigar_op(cigar());
128      }
129      friend class Pileup;
130    };
131
132  private:
133    class Filter
134    {
135    public:
136      Filter(void) {}
137      Filter(int32_t tid, int32_t pos) : tid_(tid), pos_(pos) {}
138      // return true if Entry read overlaps with tid:pos
139      bool operator()(const Entry& read) const
140      {
141        return read.bam().pos() <= pos_ && pos_ < read.bam().end()
142          && read.bam().tid() == tid_;
143      }
144    private:
145      int32_t tid_;
146      int32_t pos_;
147    };
148
149  public:
150    /**
151       Const iterator that can be used to iterate over reads that
152       overlap with current position (as deined by tid() and pos()).
153     */
154    typedef boost::filter_iterator<
155    Filter
156    , typename std::deque<Entry>::const_iterator> const_iterator;
157
158    /**
159       Create an empty Pileup
160     */
161    Pileup(void) {}
162
163    /**
164       Create a Pileup that will use reads in range [first, last)
165     */
166    Pileup(Iterator first, Iterator last);
167
168    /**
169       \return an iterator that points to the first Entry in Pileup.
170
171       Iteration is done in same order as range provided in
172       constructor. Reads that do not overlap with current position
173       (tid:pos) are ignored.
174     */
175    const_iterator begin(void) const;
176
177    /**
178       \return an iterator that points one past the last element in
179       the Pileup.
180     */
181    const_iterator end(void) const;
182
183    /**
184       \return true if there are more positions to increment into
185     */
186    bool good(void) const { return !(bam_ == bam_end_ && data_.empty()); }
187
188    /**
189       \brief step to next position with coverage
190     */
191    void increment(void);
192
193    /**
194       \return template id as defined bam file header
195     */
196    int32_t tid(void) const { return tid_; }
197
198    /**
199       \return position
200     */
201    int32_t pos(void) const { return pos_; }
202
203    /**
204       \return true if position does not exist in reference, i.e.,
205       inserted region.
206     */
207    bool skip_ref(void) const;
208  private:
209    void step_cigar(void);
210
211    Iterator bam_;
212    Iterator bam_end_;
213    int32_t pos_;
214    int32_t tid_;
215    std::deque<Entry> data_;
216    uint32_t skip_ref_;
217  };
218
219
220  /**
221     \relates Pileup
222
223     \since new in yat 0.13
224   */
225  template<typename Iterator>
226  Pileup<Iterator> make_pileup(Iterator first, Iterator last)
227  {
228    return Pileup<Iterator>(first, last);
229  }
230
231  // implementations
232
233  template<typename Iterator>
234  Pileup<Iterator>::Pileup(Iterator first, Iterator last)
235    : bam_(first), bam_end_(last), skip_ref_(0)
236  {
237    tid_ = 0;
238    pos_ = 0;
239    increment();
240  }
241
242
243  template<typename Iterator>
244  typename Pileup<Iterator>::const_iterator Pileup<Iterator>::begin(void) const
245  {
246    Filter filter(tid(), pos());
247    typename std::deque<Entry>::const_iterator iter = data_.begin();
248    typename std::deque<Entry>::const_iterator end = data_.end();
249    return boost::make_filter_iterator(filter, iter, end);
250  }
251
252
253  template<typename Iterator>
254  typename Pileup<Iterator>::const_iterator Pileup<Iterator>::end(void) const
255  {
256    Filter filter(tid(), pos());
257    typename std::deque<Entry>::const_iterator end = data_.end();
258    return boost::make_filter_iterator(filter, end, end);
259  }
260
261
262  template<typename Iterator>
263  void Pileup<Iterator>::increment(void)
264  {
265    if (data_.empty()) {
266      if (bam_==bam_end_)
267        return;
268      tid_ = bam_->core().tid;
269      pos_ = bam_->core().pos;
270    }
271    else {
272
273      // in insertion
274      if (skip_ref()) {
275        for (size_t i=0; i<data_.size(); ++i) {
276          // ignore reads left of current position
277          if (data_[i].end_of_sequence())
278            continue;
279          if (bam_cigar_type(data_[i].cigar())==1) {
280            data_[i].increment();
281          }
282          --data_[i].skip_;
283        }
284        --skip_ref_;
285      }
286      // walk one base forward on reference
287      else {
288        ++pos_;
289        for (size_t i=0; i<data_.size(); ++i) {
290          if (data_[i].end_of_sequence())
291            continue;
292          data_[i].increment();
293          // check if we're stepping into insertion
294          if (!data_[i].end_of_sequence() && (data_[i].cigar_op() == BAM_CINS))
295            skip_ref_ = std::max(skip_ref_, bam_cigar_oplen(data_[i].cigar()));
296        }
297        // stepped into an insertion
298        if (skip_ref_) {
299          for (size_t i=0; i<data_.size(); ++i)
300            data_[i].skip_ = skip_ref_;
301        }
302      }
303    }
304
305    // read data
306    while (bam_!=bam_end_ && bam_->core().tid==tid_ && bam_->pos()<=pos_) {
307      // skip unmapped reads
308      if (!(bam_->core().flag & BAM_FUNMAP)) {
309        data_.push_back(Entry(*bam_));
310      }
311      ++bam_;
312    }
313
314    // pop reads that are end of sequence
315    while (data_.size() && data_[0].end_of_sequence()) {
316      data_.pop_front();
317    }
318  }
319
320
321  template<typename Iterator>
322  bool Pileup<Iterator>::skip_ref(void) const
323  {
324    return skip_ref_;
325  }
326
327
328  template<typename Iterator>
329  Pileup<Iterator>::Entry::Entry(const yat::omic::BamRead& b)
330    : bam_(b), qpos_(0), c_index_(0), c_index2_(0), skip_(0)
331  {
332    if (bam_.core().n_cigar==0)
333      return;
334    // iterate cigar to first match
335    while (bam_cigar_type(bam_.cigar(c_index_)) != 3) {
336      YAT_ASSERT(bam_cigar_type(bam_.cigar(c_index_))!=2);
337      if (bam_cigar_type(bam_.cigar(c_index_)) == 1) {
338        qpos_ += b.cigar_oplen(c_index_);
339      }
340      ++c_index_;
341      if (c_index_ >= bam_.core().n_cigar) {
342        qpos_ = bam_.sequence_length();
343        return;
344      }
345      YAT_ASSERT(c_index_ < bam_.core().n_cigar);
346    }
347  }
348
349
350  template<typename Iterator>
351  void Pileup<Iterator>::Entry::increment(void)
352  {
353    if (bam_cigar_type(cigar()) & 1)
354      ++qpos_;
355    step_cigar();
356  }
357
358
359  template<typename Iterator>
360  uint8_t Pileup<Iterator>::Entry::sequence(void) const
361  {
362    if (skip_==0) {
363      if (bam_cigar_type(cigar()) & 1)
364        return bam_.sequence(qpos_);
365      else
366        return 0;
367    }
368    if (cigar_op()!=BAM_CINS)
369      return 0;
370    return bam_.sequence(qpos_);
371  }
372
373
374  template<typename Iterator>
375  void Pileup<Iterator>::Entry::step_cigar(void)
376  {
377    ++c_index2_;
378    YAT_ASSERT(c_index_<bam_.core().n_cigar);
379    if (c_index2_ >= bam_.cigar_oplen(c_index_)) {
380      c_index2_ = 0;
381      ++c_index_;
382    }
383  }
384
385}}}
386#endif
Note: See TracBrowser for help on using the repository browser.