source: trunk/yat/omic/Pileup.h @ 3792

Last change on this file since 3792 was 3792, checked in by Peter, 4 years ago

update copyright years

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 8.8 KB
Line 
1#ifndef theplu_yat_omic_pileup
2#define theplu_yat_omic_pileup
3
4// $Id: Pileup.h 3792 2019-04-12 07:15:09Z peter $
5
6/*
7  Copyright (C) 2014, 2016, 2017, 2018 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with yat. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "BamRead.h"
26
27#include "yat/utility/CigarIterator.h"
28#include "yat/utility/yat_assert.h"
29
30#include <boost/iterator/iterator_traits.hpp>
31#include <boost/iterator/iterator_concepts.hpp>
32#include <boost/concept/assert.hpp>
33#include <boost/shared_ptr.hpp>
34
35#include <algorithm>
36#include <list>
37
38namespace theplu {
39namespace yat {
40namespace omic {
41
42  /**
43     Pileup takes a range of BamReads and pile them up on the current
44     position. User can investigate the pile of reads at current
45     (locus) position via begin() and end(), which provide access to
46     reads at current position. The position can be incremented and in
47     that way the user can investigate each position of the genome.
48
49     \since new in yat 0.13
50
51     Type Requirments:
52     - \c Iterator is a \readable_iterator
53     - \c Iterator is a \single_pass_iterator
54     - \c value_type is convertible to BamRead
55   */
56  template<typename Iterator>
57  class Pileup
58  {
59  public:
60    /**
61       Essentially a BamRead but with additional information regarding
62       base we currently pointing to
63     */
64    class Entry
65    {
66    public:
67      /**
68         \brief default constructor
69       */
70      Entry(void) {}
71
72      /**
73         Create an Entry with read \a b. Entry is initialized to point
74         to first aligned base (b.pos()).
75       */
76      Entry(const BamRead& b);
77
78      /**
79         \return const reference to BamRead
80       */
81      const BamRead& bam(void) const
82      {
83        YAT_ASSERT(bam_.get());
84        return *bam_;
85      }
86
87      /**
88         \return cigar operation at current position
89       */
90      uint8_t cigar_op(void) const { return *cigar_; }
91
92
93      /**
94         \return length of current cigar element
95       */
96      uint32_t cigar_oplen(void) const
97      {
98        YAT_ASSERT(bam_.get());
99        YAT_ASSERT(cigar_.base() >= bam_->cigar());
100        return bam_cigar_oplen(*cigar_.base());
101      }
102
103
104      /**
105         \return base quality of current position
106       */
107      unsigned short qual(void) const
108      {
109        YAT_ASSERT(bam_.get());
110        YAT_ASSERT(qpos_ < bam_->sequence_length());
111        return bam_->qual(qpos_);
112      }
113
114      /**
115         Returned base is encoded in 4 bits and can be converted to
116         char using nt16_str(uint8_t).
117
118         \return base of current position
119       */
120      uint8_t sequence(void) const;
121
122      /**
123         Increment CIGAR. If current CIGAR element is consuming query
124         (not deletion), increment to next base on query.
125       */
126      void increment(void);
127
128      /**
129         \return true if this position was deleted in this query
130       */
131      bool is_del(void) const
132      {
133        return cigar_type()==2;
134      }
135
136      /**
137         \return index of base at this position
138       */
139      size_t qpos(void) const { return qpos_; }
140    private:
141      boost::shared_ptr<BamRead> bam_;
142      // index of base pointed to
143      size_t qpos_;
144      yat::utility::CigarIterator<const uint32_t*> cigar_;
145      size_t skip_;
146      // return 0, 1, 2, or 3 see bam_cigar_type in yat/utility/Cigar.h
147      uint8_t cigar_type(void) const { return bam_cigar_type(cigar_op()); }
148      friend class Pileup;
149    };
150
151  public:
152    /**
153       Const iterator that can be used to iterate over reads that
154       overlap with current position (as defined by tid() and pos()).
155     */
156    typedef typename std::list<Entry>::const_iterator const_iterator;
157
158    /**
159       Create an empty Pileup
160     */
161    Pileup(void) {}
162
163    /**
164       Create a Pileup that will use reads in range [first, last). The
165       range must be sorted as defined by BamLessPos.
166
167       Iterator is a \readable_iterator.
168     */
169    Pileup(Iterator first, Iterator last);
170
171    /**
172       \return an iterator that points to the first Entry in Pileup.
173
174       Iteration is done in same order as range provided in
175       constructor. Reads that do not overlap with current position
176       (tid:pos) are ignored.
177     */
178    const_iterator begin(void) const;
179
180    /**
181       \return an iterator that points one past the last element in
182       the Pileup.
183     */
184    const_iterator end(void) const;
185
186    /**
187       \return true if there are more positions to increment into
188     */
189    bool good(void) const { return !(bam_ == bam_end_ && data_.empty()); }
190
191    /**
192       \brief step to next position.
193
194       If any Entry has a insertion at current position, increment
195       each Entry that has an insertion. Otherwise increment to next
196       position and update each Entry accordingly.
197
198       If the position is not covered by any read, the Pileup
199       fastforward to next covered locus.
200     */
201    void increment(void);
202
203    /**
204       \return template id as defined in bam file header
205     */
206    int32_t tid(void) const { return tid_; }
207
208    /**
209       \return position
210     */
211    int32_t pos(void) const { return pos_; }
212
213    /**
214       \return true if position does not exist in reference, i.e.,
215       inserted region.
216     */
217    bool skip_ref(void) const;
218  private:
219    typedef typename std::list<Entry>::iterator iterator;
220
221    Iterator bam_;
222    Iterator bam_end_;
223    int32_t pos_;
224    int32_t tid_;
225    std::list<Entry> data_;
226    uint32_t skip_ref_;
227  };
228
229
230  /**
231     \relates Pileup
232
233     \since new in yat 0.13
234   */
235  template<typename Iterator>
236  Pileup<Iterator> make_pileup(Iterator first, Iterator last)
237  {
238    return Pileup<Iterator>(first, last);
239  }
240
241  // implementations
242
243  template<typename Iterator>
244  Pileup<Iterator>::Pileup(Iterator first, Iterator last)
245    : bam_(first), bam_end_(last), pos_(0), tid_(0), skip_ref_(0)
246  {
247    BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<Iterator>));
248    BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<Iterator>));
249    using boost::Convertible;
250    typedef typename boost::iterator_value<Iterator>::type value_type;
251    BOOST_CONCEPT_ASSERT((Convertible<value_type, BamRead>));
252    increment();
253  }
254
255
256  template<typename Iterator>
257  typename Pileup<Iterator>::const_iterator Pileup<Iterator>::begin(void) const
258  {
259    return data_.begin();
260  }
261
262
263  template<typename Iterator>
264  typename Pileup<Iterator>::const_iterator Pileup<Iterator>::end(void) const
265  {
266    return data_.end();
267  }
268
269
270  template<typename Iterator>
271  void Pileup<Iterator>::increment(void)
272  {
273    if (!data_.empty()) {
274      // in insertion
275      if (skip_ref()) {
276        for (iterator it = data_.begin(); it!=data_.end(); ++it) {
277          if (it->cigar_type() == 1) {
278            it->increment();
279          }
280          --it->skip_;
281        }
282        --skip_ref_;
283      }
284      // walk one base forward on reference
285      else {
286        ++pos_;
287        typename std::list<Entry>::iterator read = data_.begin();
288        while (read != data_.end()) {
289          read->increment();
290          // remove entry if read is entirely left of pos
291          if (read->bam().end() <= pos_) { // FIXME speedup possible
292            data_.erase(read++);
293            continue;
294          }
295          // check if we're stepping into insertion
296          if (read->cigar_op() == BAM_CINS) {
297            skip_ref_ = std::max(skip_ref_, read->cigar_oplen());
298          }
299          ++read;
300        }
301
302        // stepped into an insertion
303        if (skip_ref_)
304          for (iterator iter=data_.begin(); iter!=data_.end(); ++iter)
305            iter->skip_ = skip_ref_;
306      }
307    }
308
309
310    // fast forward to next covered locus
311    if (data_.empty()) {
312      if (bam_==bam_end_)
313        return;
314      tid_ = bam_->core().tid;
315      pos_ = bam_->core().pos;
316    }
317
318    // read data
319    while (bam_!=bam_end_ && bam_->core().tid==tid_ && bam_->pos()<=pos_) {
320      // skip unmapped reads
321      if (!(bam_->core().flag & BAM_FUNMAP)) {
322        data_.push_back(Entry(*bam_));
323      }
324      ++bam_;
325    }
326  }
327
328
329  template<typename Iterator>
330  bool Pileup<Iterator>::skip_ref(void) const
331  {
332    return skip_ref_;
333  }
334
335
336  template<typename Iterator>
337  Pileup<Iterator>::Entry::Entry(const BamRead& b)
338    : bam_(new BamRead(b)), qpos_(0), cigar_(bam_->cigar()), skip_(0)
339  {
340    YAT_ASSERT(cigar_.base() == bam_->cigar());
341    if (bam_->core().n_cigar==0)
342      return;
343    // iterate cigar to first match
344    while (qpos_ < bam_->sequence_length() && cigar_type() != 3) {
345      // type is either 0 (hardclipped) or 1 (softclipped)
346      YAT_ASSERT(cigar_type()!=2);
347      if (cigar_type() == 1)
348        ++qpos_;
349      ++cigar_;
350    }
351  }
352
353
354  template<typename Iterator>
355  void Pileup<Iterator>::Entry::increment(void)
356  {
357    YAT_ASSERT(cigar_.base() >= bam_->cigar());
358    if (cigar_type() & 1)
359      ++qpos_;
360    ++cigar_;
361    YAT_ASSERT(cigar_.base() >= bam_->cigar());
362  }
363
364
365  template<typename Iterator>
366  uint8_t Pileup<Iterator>::Entry::sequence(void) const
367  {
368    if (skip_==0) {
369      if (cigar_type() & 1)
370        return bam_->sequence(qpos_);
371      return 0;
372    }
373    if (cigar_op()!=BAM_CINS)
374      return 0;
375    return bam_->sequence(qpos_);
376  }
377
378}}}
379#endif
Note: See TracBrowser for help on using the repository browser.