source: trunk/yat/omic/Pileup.h @ 3334

Last change on this file since 3334 was 3334, checked in by Peter, 7 years ago

Avoid stopping at position one passed a pile, instead fast forward to
next locus with coverage.

refs #806

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.7 KB
Line 
1#ifndef theplu_yat_omic_pileup
2#define theplu_yat_omic_pileup
3
4// $Id: Pileup.h 3334 2014-10-23 13:26:35Z peter $
5
6/*
7  Copyright (C) 2014 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "BamRead.h"
26#include "CigarIterator.h"
27
28#include "yat/utility/yat_assert.h"
29
30#include <boost/shared_ptr.hpp>
31
32#include <algorithm>
33#include <list>
34
35namespace theplu {
36namespace yat {
37namespace omic {
38
39  /**
40     \since new in yat 0.13
41   */
42  template<typename Iterator>
43  class Pileup
44  {
45  public:
46    /**
47       Essentially a BamRead but with additional information regarding
48       base we currently pointing to
49     */
50    class Entry
51    {
52    public:
53      /**
54         \brief default constructor
55       */
56      Entry(void) {}
57
58      /**
59         Create an Entry with read \a b. Entry is initialized to point
60         to first aligned base (b.pos()).
61       */
62      Entry(const BamRead& b);
63
64      /**
65         \return const reference to BamRead
66       */
67      const BamRead& bam(void) const
68      {
69        YAT_ASSERT(bam_.get());
70        return *bam_;
71      }
72
73      /**
74         \return cigar operation at current position
75       */
76      uint8_t cigar_op(void) const { return *cigar_; }
77
78
79      /**
80         \return length of current cigar element
81       */
82      uint32_t cigar_oplen(void) const
83      {
84        YAT_ASSERT(bam_.get());
85        YAT_ASSERT(cigar_.base() >= bam_->cigar());
86        return bam_cigar_oplen(*cigar_.base());
87      }
88
89
90      /**
91         \return base quality of current position
92       */
93      unsigned short qual(void) const
94      {
95        YAT_ASSERT(bam_.get());
96        YAT_ASSERT(qpos_ < bam_->sequence_length());
97        return bam_->qual(qpos_);
98      }
99
100      /**
101         \return base of current position
102       */
103      uint8_t sequence(void) const;
104
105      /**
106         Increment CIGAR. If current CIGAR element is consuming query
107         (not deletion), increment to next base on query.
108       */
109      void increment(void);
110
111      /**
112         \return true if this position was deleted in this query
113       */
114      bool is_del(void) const
115      {
116        return cigar_type()==2;
117      }
118
119      /**
120         \return index of base at this position
121       */
122      size_t qpos(void) const { return qpos_; }
123    private:
124      boost::shared_ptr<BamRead> bam_;
125      // index of base pointed to
126      size_t qpos_;
127      CigarIterator cigar_;
128      size_t skip_;
129      // return 0, 1, 2, or 3 see bam_cigar_type in yat/utility/Cigar.h
130      uint8_t cigar_type(void) const { return bam_cigar_type(cigar_op()); }
131      friend class Pileup;
132    };
133
134  public:
135    /**
136       Const iterator that can be used to iterate over reads that
137       overlap with current position (as deined by tid() and pos()).
138     */
139    typedef typename std::list<Entry>::const_iterator const_iterator;
140
141    /**
142       Create an empty Pileup
143     */
144    Pileup(void) {}
145
146    /**
147       Create a Pileup that will use reads in range [first, last). The
148       range must be sorted as defined by BamLessPos.
149
150       Iterator is a \readable_iterator.
151     */
152    Pileup(Iterator first, Iterator last);
153
154    /**
155       \return an iterator that points to the first Entry in Pileup.
156
157       Iteration is done in same order as range provided in
158       constructor. Reads that do not overlap with current position
159       (tid:pos) are ignored.
160     */
161    const_iterator begin(void) const;
162
163    /**
164       \return an iterator that points one past the last element in
165       the Pileup.
166     */
167    const_iterator end(void) const;
168
169    /**
170       \return true if there are more positions to increment into
171     */
172    bool good(void) const { return !(bam_ == bam_end_ && data_.empty()); }
173
174    /**
175       \brief step to next position.
176
177       If any Entry has a insertion at current position, increment
178       each Entry that has an insertion. Otherwise increment to next
179       position and update each Entry accordingly.
180     */
181    void increment(void);
182
183    /**
184       \return template id as defined in bam file header
185     */
186    int32_t tid(void) const { return tid_; }
187
188    /**
189       \return position
190     */
191    int32_t pos(void) const { return pos_; }
192
193    /**
194       \return true if position does not exist in reference, i.e.,
195       inserted region.
196     */
197    bool skip_ref(void) const;
198  private:
199    typedef typename std::list<Entry>::iterator iterator;
200
201    Iterator bam_;
202    Iterator bam_end_;
203    int32_t pos_;
204    int32_t tid_;
205    std::list<Entry> data_;
206    uint32_t skip_ref_;
207  };
208
209
210  /**
211     \relates Pileup
212
213     \since new in yat 0.13
214   */
215  template<typename Iterator>
216  Pileup<Iterator> make_pileup(Iterator first, Iterator last)
217  {
218    return Pileup<Iterator>(first, last);
219  }
220
221  // implementations
222
223  template<typename Iterator>
224  Pileup<Iterator>::Pileup(Iterator first, Iterator last)
225    : bam_(first), bam_end_(last), pos_(0), tid_(0), skip_ref_(0)
226  {
227    increment();
228  }
229
230
231  template<typename Iterator>
232  typename Pileup<Iterator>::const_iterator Pileup<Iterator>::begin(void) const
233  {
234    return data_.begin();
235  }
236
237
238  template<typename Iterator>
239  typename Pileup<Iterator>::const_iterator Pileup<Iterator>::end(void) const
240  {
241    return data_.end();
242  }
243
244
245  template<typename Iterator>
246  void Pileup<Iterator>::increment(void)
247  {
248    if (!data_.empty()) {
249      // in insertion
250      if (skip_ref()) {
251        for (iterator it = data_.begin(); it!=data_.end(); ++it) {
252          if (it->cigar_type() == 1) {
253            it->increment();
254          }
255          --it->skip_;
256        }
257        --skip_ref_;
258      }
259      // walk one base forward on reference
260      else {
261        ++pos_;
262        typename std::list<Entry>::iterator read = data_.begin();
263        while (read != data_.end()) {
264          read->increment();
265          // remove entry if read is entirely left of pos
266          if (read->bam().end() <= pos_) { // FIXME speedup possible
267            data_.erase(read++);
268            continue;
269          }
270          // check if we're stepping into insertion
271          if (read->cigar_op() == BAM_CINS) {
272            skip_ref_ = std::max(skip_ref_, read->cigar_oplen());
273          }
274          ++read;
275        }
276
277        // stepped into an insertion
278        if (skip_ref_)
279          for (iterator iter=data_.begin(); iter!=data_.end(); ++iter)
280            iter->skip_ = skip_ref_;
281      }
282    }
283
284
285    // fast forward to next covered locus
286    if (data_.empty()) {
287      if (bam_==bam_end_)
288        return;
289      tid_ = bam_->core().tid;
290      pos_ = bam_->core().pos;
291    }
292
293    // read data
294    while (bam_!=bam_end_ && bam_->core().tid==tid_ && bam_->pos()<=pos_) {
295      // skip unmapped reads
296      if (!(bam_->core().flag & BAM_FUNMAP)) {
297        data_.push_back(Entry(*bam_));
298      }
299      ++bam_;
300    }
301  }
302
303
304  template<typename Iterator>
305  bool Pileup<Iterator>::skip_ref(void) const
306  {
307    return skip_ref_;
308  }
309
310
311  template<typename Iterator>
312  Pileup<Iterator>::Entry::Entry(const BamRead& b)
313    : bam_(new BamRead(b)), qpos_(0), cigar_(bam_->cigar()), skip_(0)
314  {
315    YAT_ASSERT(cigar_.base() == bam_->cigar());
316    if (bam_->core().n_cigar==0)
317      return;
318    // iterate cigar to first match
319    while (qpos_ < bam_->sequence_length() && cigar_type() != 3) {
320      // type is either 0 (hardclipped) or 1 (softclipped)
321      YAT_ASSERT(cigar_type()!=2);
322      if (cigar_type() == 1)
323        ++qpos_;
324      ++cigar_;
325    }
326  }
327
328
329  template<typename Iterator>
330  void Pileup<Iterator>::Entry::increment(void)
331  {
332    YAT_ASSERT(cigar_.base() >= bam_->cigar());
333    if (cigar_type() & 1)
334      ++qpos_;
335    ++cigar_;
336    YAT_ASSERT(cigar_.base() >= bam_->cigar());
337  }
338
339
340  template<typename Iterator>
341  uint8_t Pileup<Iterator>::Entry::sequence(void) const
342  {
343    if (skip_==0) {
344      if (cigar_type() & 1)
345        return bam_->sequence(qpos_);
346      return 0;
347    }
348    if (cigar_op()!=BAM_CINS)
349      return 0;
350    return bam_->sequence(qpos_);
351  }
352
353}}}
354#endif
Note: See TracBrowser for help on using the repository browser.