source: trunk/yat/omic/Pileup.h @ 3333

Last change on this file since 3333 was 3333, checked in by Peter, 7 years ago

correct docs. refs #806

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.3 KB
Line 
1#ifndef theplu_yat_omic_pileup
2#define theplu_yat_omic_pileup
3
4// $Id: Pileup.h 3333 2014-10-23 12:26:50Z peter $
5
6/*
7  Copyright (C) 2014 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "BamRead.h"
26#include "CigarIterator.h"
27
28#include "yat/utility/yat_assert.h"
29
30#include <boost/shared_ptr.hpp>
31
32#include <algorithm>
33#include <list>
34
35namespace theplu {
36namespace yat {
37namespace omic {
38
39  /**
40     \since new in yat 0.13
41   */
42  template<typename Iterator>
43  class Pileup
44  {
45  public:
46    /**
47       Essentially a BamRead but with additional information regarding
48       base we currently pointing to
49     */
50    class Entry
51    {
52    public:
53      /**
54         \brief default constructor
55       */
56      Entry(void) {}
57
58      /**
59         Create an Entry with read \a b
60       */
61      Entry(const BamRead& b);
62
63      /**
64         \return const reference to BamRead
65       */
66      const BamRead& bam(void) const
67      {
68        YAT_ASSERT(bam_.get());
69        return *bam_;
70      }
71
72      /**
73         \return cigar operation at current position
74       */
75      uint8_t cigar_op(void) const { return *cigar_; }
76
77
78      /**
79         \return length of current cigar element
80       */
81      uint32_t cigar_oplen(void) const
82      {
83        YAT_ASSERT(bam_.get());
84        YAT_ASSERT(cigar_.base() >= bam_->cigar());
85        return bam_cigar_oplen(*cigar_.base());
86      }
87
88
89      /**
90         \return base quality of current position
91       */
92      unsigned short qual(void) const
93      {
94        YAT_ASSERT(bam_.get());
95        YAT_ASSERT(qpos_ < bam_->sequence_length());
96        return bam_->qual(qpos_);
97      }
98
99      /**
100         \return base of current position
101       */
102      uint8_t sequence(void) const;
103
104      /**
105         Increment CIGAR. If current CIGAR element is consuming query
106         (not deletion), increment to next base on query.
107       */
108      void increment(void);
109
110      /**
111         \return true if this position was deleted in this query
112       */
113      bool is_del(void) const
114      {
115        return cigar_type()==2;
116      }
117
118      /**
119         \return index of base at this position
120       */
121      size_t qpos(void) const { return qpos_; }
122    private:
123      boost::shared_ptr<BamRead> bam_;
124      // index of base pointed to
125      size_t qpos_;
126      CigarIterator cigar_;
127      size_t skip_;
128      // return 0, 1, 2, or 3 see bam_cigar_type in yat/utility/Cigar.h
129      uint8_t cigar_type(void) const { return bam_cigar_type(cigar_op()); }
130      friend class Pileup;
131    };
132
133  public:
134    /**
135       Const iterator that can be used to iterate over reads that
136       overlap with current position (as deined by tid() and pos()).
137     */
138    typedef typename std::list<Entry>::const_iterator const_iterator;
139
140    /**
141       Create an empty Pileup
142     */
143    Pileup(void) {}
144
145    /**
146       Create a Pileup that will use reads in range [first, last)
147     */
148    Pileup(Iterator first, Iterator last);
149
150    /**
151       \return an iterator that points to the first Entry in Pileup.
152
153       Iteration is done in same order as range provided in
154       constructor. Reads that do not overlap with current position
155       (tid:pos) are ignored.
156     */
157    const_iterator begin(void) const;
158
159    /**
160       \return an iterator that points one past the last element in
161       the Pileup.
162     */
163    const_iterator end(void) const;
164
165    /**
166       \return true if there are more positions to increment into
167     */
168    bool good(void) const { return !(bam_ == bam_end_ && data_.empty()); }
169
170    /**
171       \brief step to next position with coverage
172     */
173    void increment(void);
174
175    /**
176       \return template id as defined in bam file header
177     */
178    int32_t tid(void) const { return tid_; }
179
180    /**
181       \return position
182     */
183    int32_t pos(void) const { return pos_; }
184
185    /**
186       \return true if position does not exist in reference, i.e.,
187       inserted region.
188     */
189    bool skip_ref(void) const;
190  private:
191    typedef typename std::list<Entry>::iterator iterator;
192
193    Iterator bam_;
194    Iterator bam_end_;
195    int32_t pos_;
196    int32_t tid_;
197    std::list<Entry> data_;
198    uint32_t skip_ref_;
199  };
200
201
202  /**
203     \relates Pileup
204
205     \since new in yat 0.13
206   */
207  template<typename Iterator>
208  Pileup<Iterator> make_pileup(Iterator first, Iterator last)
209  {
210    return Pileup<Iterator>(first, last);
211  }
212
213  // implementations
214
215  template<typename Iterator>
216  Pileup<Iterator>::Pileup(Iterator first, Iterator last)
217    : bam_(first), bam_end_(last), pos_(0), tid_(0), skip_ref_(0)
218  {
219    increment();
220  }
221
222
223  template<typename Iterator>
224  typename Pileup<Iterator>::const_iterator Pileup<Iterator>::begin(void) const
225  {
226    return data_.begin();
227  }
228
229
230  template<typename Iterator>
231  typename Pileup<Iterator>::const_iterator Pileup<Iterator>::end(void) const
232  {
233    return data_.end();
234  }
235
236
237  template<typename Iterator>
238  void Pileup<Iterator>::increment(void)
239  {
240    if (data_.empty()) {
241      if (bam_==bam_end_)
242        return;
243      tid_ = bam_->core().tid;
244      pos_ = bam_->core().pos;
245    }
246    else {
247
248      // in insertion
249      if (skip_ref()) {
250        for (iterator it = data_.begin(); it!=data_.end(); ++it) {
251          if (it->cigar_type() == 1) {
252            it->increment();
253          }
254          --it->skip_;
255        }
256        --skip_ref_;
257      }
258      // walk one base forward on reference
259      else {
260        ++pos_;
261        typename std::list<Entry>::iterator read = data_.begin();
262        while (read != data_.end()) {
263          read->increment();
264          // remove entry if read is entirely left of pos
265          if (read->bam().end() <= pos_) { // FIXME speedup possible
266            data_.erase(read++);
267            continue;
268          }
269          // check if we're stepping into insertion
270          if (read->cigar_op() == BAM_CINS) {
271            skip_ref_ = std::max(skip_ref_, read->cigar_oplen());
272          }
273          ++read;
274        }
275
276        // stepped into an insertion
277        if (skip_ref_)
278          for (iterator iter=data_.begin(); iter!=data_.end(); ++iter)
279            iter->skip_ = skip_ref_;
280      }
281    }
282
283    // read data
284    while (bam_!=bam_end_ && bam_->core().tid==tid_ && bam_->pos()<=pos_) {
285      // skip unmapped reads
286      if (!(bam_->core().flag & BAM_FUNMAP)) {
287        data_.push_back(Entry(*bam_));
288      }
289      ++bam_;
290    }
291  }
292
293
294  template<typename Iterator>
295  bool Pileup<Iterator>::skip_ref(void) const
296  {
297    return skip_ref_;
298  }
299
300
301  template<typename Iterator>
302  Pileup<Iterator>::Entry::Entry(const BamRead& b)
303    : bam_(new BamRead(b)), qpos_(0), cigar_(bam_->cigar()), skip_(0)
304  {
305    YAT_ASSERT(cigar_.base() == bam_->cigar());
306    if (bam_->core().n_cigar==0)
307      return;
308    // iterate cigar to first match
309    while (qpos_ < bam_->sequence_length() && cigar_type() != 3) {
310      // type is either 0 (hardclipped) or 1 (softclipped)
311      YAT_ASSERT(cigar_type()!=2);
312      if (cigar_type() == 1)
313        ++qpos_;
314      ++cigar_;
315    }
316  }
317
318
319  template<typename Iterator>
320  void Pileup<Iterator>::Entry::increment(void)
321  {
322    YAT_ASSERT(cigar_.base() >= bam_->cigar());
323    if (cigar_type() & 1)
324      ++qpos_;
325    ++cigar_;
326    YAT_ASSERT(cigar_.base() >= bam_->cigar());
327  }
328
329
330  template<typename Iterator>
331  uint8_t Pileup<Iterator>::Entry::sequence(void) const
332  {
333    if (skip_==0) {
334      if (cigar_type() & 1)
335        return bam_->sequence(qpos_);
336      return 0;
337    }
338    if (cigar_op()!=BAM_CINS)
339      return 0;
340    return bam_->sequence(qpos_);
341  }
342
343}}}
344#endif
Note: See TracBrowser for help on using the repository browser.