source: trunk/yat/omic/Pileup.h @ 3336

Last change on this file since 3336 was 3336, checked in by Peter, 7 years ago

closes #816

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 8.5 KB
Line 
1#ifndef theplu_yat_omic_pileup
2#define theplu_yat_omic_pileup
3
4// $Id: Pileup.h 3336 2014-10-24 23:27:45Z peter $
5
6/*
7  Copyright (C) 2014 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "BamRead.h"
26
27#include "yat/utility/CigarIterator.h"
28#include "yat/utility/yat_assert.h"
29
30#include <boost/iterator/iterator_traits.hpp>
31#include <boost/concept/assert.hpp>
32#include <boost/shared_ptr.hpp>
33
34#include <algorithm>
35#include <list>
36
37namespace theplu {
38namespace yat {
39namespace omic {
40
41  /**
42     Pileup takes a range of BamReads and pile them up on the current
43     position. User can investigate the pile of reads at current
44     (locus) position via begin() and end(), which provide access to
45     reads at current position. The position can be incremented and in
46     that way the user can investigate each position of the genome.
47
48     \since new in yat 0.13
49
50     Type Requirments:
51     - \c Iterator is a \input_iterator
52     - \c value_type is convertible to BamRead
53   */
54  template<typename Iterator>
55  class Pileup
56  {
57  public:
58    /**
59       Essentially a BamRead but with additional information regarding
60       base we currently pointing to
61     */
62    class Entry
63    {
64    public:
65      /**
66         \brief default constructor
67       */
68      Entry(void) {}
69
70      /**
71         Create an Entry with read \a b. Entry is initialized to point
72         to first aligned base (b.pos()).
73       */
74      Entry(const BamRead& b);
75
76      /**
77         \return const reference to BamRead
78       */
79      const BamRead& bam(void) const
80      {
81        YAT_ASSERT(bam_.get());
82        return *bam_;
83      }
84
85      /**
86         \return cigar operation at current position
87       */
88      uint8_t cigar_op(void) const { return *cigar_; }
89
90
91      /**
92         \return length of current cigar element
93       */
94      uint32_t cigar_oplen(void) const
95      {
96        YAT_ASSERT(bam_.get());
97        YAT_ASSERT(cigar_.base() >= bam_->cigar());
98        return bam_cigar_oplen(*cigar_.base());
99      }
100
101
102      /**
103         \return base quality of current position
104       */
105      unsigned short qual(void) const
106      {
107        YAT_ASSERT(bam_.get());
108        YAT_ASSERT(qpos_ < bam_->sequence_length());
109        return bam_->qual(qpos_);
110      }
111
112      /**
113         \return base of current position
114       */
115      uint8_t sequence(void) const;
116
117      /**
118         Increment CIGAR. If current CIGAR element is consuming query
119         (not deletion), increment to next base on query.
120       */
121      void increment(void);
122
123      /**
124         \return true if this position was deleted in this query
125       */
126      bool is_del(void) const
127      {
128        return cigar_type()==2;
129      }
130
131      /**
132         \return index of base at this position
133       */
134      size_t qpos(void) const { return qpos_; }
135    private:
136      boost::shared_ptr<BamRead> bam_;
137      // index of base pointed to
138      size_t qpos_;
139      yat::utility::CigarIterator<const uint32_t*> cigar_;
140      size_t skip_;
141      // return 0, 1, 2, or 3 see bam_cigar_type in yat/utility/Cigar.h
142      uint8_t cigar_type(void) const { return bam_cigar_type(cigar_op()); }
143      friend class Pileup;
144    };
145
146  public:
147    /**
148       Const iterator that can be used to iterate over reads that
149       overlap with current position (as deined by tid() and pos()).
150     */
151    typedef typename std::list<Entry>::const_iterator const_iterator;
152
153    /**
154       Create an empty Pileup
155     */
156    Pileup(void) {}
157
158    /**
159       Create a Pileup that will use reads in range [first, last). The
160       range must be sorted as defined by BamLessPos.
161
162       Iterator is a \readable_iterator.
163     */
164    Pileup(Iterator first, Iterator last);
165
166    /**
167       \return an iterator that points to the first Entry in Pileup.
168
169       Iteration is done in same order as range provided in
170       constructor. Reads that do not overlap with current position
171       (tid:pos) are ignored.
172     */
173    const_iterator begin(void) const;
174
175    /**
176       \return an iterator that points one past the last element in
177       the Pileup.
178     */
179    const_iterator end(void) const;
180
181    /**
182       \return true if there are more positions to increment into
183     */
184    bool good(void) const { return !(bam_ == bam_end_ && data_.empty()); }
185
186    /**
187       \brief step to next position.
188
189       If any Entry has a insertion at current position, increment
190       each Entry that has an insertion. Otherwise increment to next
191       position and update each Entry accordingly.
192
193       If the position is not covered by any read, the Pileup
194       fastforward to next covered locus.
195     */
196    void increment(void);
197
198    /**
199       \return template id as defined in bam file header
200     */
201    int32_t tid(void) const { return tid_; }
202
203    /**
204       \return position
205     */
206    int32_t pos(void) const { return pos_; }
207
208    /**
209       \return true if position does not exist in reference, i.e.,
210       inserted region.
211     */
212    bool skip_ref(void) const;
213  private:
214    typedef typename std::list<Entry>::iterator iterator;
215
216    Iterator bam_;
217    Iterator bam_end_;
218    int32_t pos_;
219    int32_t tid_;
220    std::list<Entry> data_;
221    uint32_t skip_ref_;
222  };
223
224
225  /**
226     \relates Pileup
227
228     \since new in yat 0.13
229   */
230  template<typename Iterator>
231  Pileup<Iterator> make_pileup(Iterator first, Iterator last)
232  {
233    return Pileup<Iterator>(first, last);
234  }
235
236  // implementations
237
238  template<typename Iterator>
239  Pileup<Iterator>::Pileup(Iterator first, Iterator last)
240    : bam_(first), bam_end_(last), pos_(0), tid_(0), skip_ref_(0)
241  {
242    BOOST_CONCEPT_ASSERT((boost::InputIterator<Iterator>));
243    using boost::Convertible;
244    typedef typename boost::iterator_value<Iterator>::type value_type;
245    BOOST_CONCEPT_ASSERT((Convertible<value_type, BamRead>));
246    increment();
247  }
248
249
250  template<typename Iterator>
251  typename Pileup<Iterator>::const_iterator Pileup<Iterator>::begin(void) const
252  {
253    return data_.begin();
254  }
255
256
257  template<typename Iterator>
258  typename Pileup<Iterator>::const_iterator Pileup<Iterator>::end(void) const
259  {
260    return data_.end();
261  }
262
263
264  template<typename Iterator>
265  void Pileup<Iterator>::increment(void)
266  {
267    if (!data_.empty()) {
268      // in insertion
269      if (skip_ref()) {
270        for (iterator it = data_.begin(); it!=data_.end(); ++it) {
271          if (it->cigar_type() == 1) {
272            it->increment();
273          }
274          --it->skip_;
275        }
276        --skip_ref_;
277      }
278      // walk one base forward on reference
279      else {
280        ++pos_;
281        typename std::list<Entry>::iterator read = data_.begin();
282        while (read != data_.end()) {
283          read->increment();
284          // remove entry if read is entirely left of pos
285          if (read->bam().end() <= pos_) { // FIXME speedup possible
286            data_.erase(read++);
287            continue;
288          }
289          // check if we're stepping into insertion
290          if (read->cigar_op() == BAM_CINS) {
291            skip_ref_ = std::max(skip_ref_, read->cigar_oplen());
292          }
293          ++read;
294        }
295
296        // stepped into an insertion
297        if (skip_ref_)
298          for (iterator iter=data_.begin(); iter!=data_.end(); ++iter)
299            iter->skip_ = skip_ref_;
300      }
301    }
302
303
304    // fast forward to next covered locus
305    if (data_.empty()) {
306      if (bam_==bam_end_)
307        return;
308      tid_ = bam_->core().tid;
309      pos_ = bam_->core().pos;
310    }
311
312    // read data
313    while (bam_!=bam_end_ && bam_->core().tid==tid_ && bam_->pos()<=pos_) {
314      // skip unmapped reads
315      if (!(bam_->core().flag & BAM_FUNMAP)) {
316        data_.push_back(Entry(*bam_));
317      }
318      ++bam_;
319    }
320  }
321
322
323  template<typename Iterator>
324  bool Pileup<Iterator>::skip_ref(void) const
325  {
326    return skip_ref_;
327  }
328
329
330  template<typename Iterator>
331  Pileup<Iterator>::Entry::Entry(const BamRead& b)
332    : bam_(new BamRead(b)), qpos_(0), cigar_(bam_->cigar()), skip_(0)
333  {
334    YAT_ASSERT(cigar_.base() == bam_->cigar());
335    if (bam_->core().n_cigar==0)
336      return;
337    // iterate cigar to first match
338    while (qpos_ < bam_->sequence_length() && cigar_type() != 3) {
339      // type is either 0 (hardclipped) or 1 (softclipped)
340      YAT_ASSERT(cigar_type()!=2);
341      if (cigar_type() == 1)
342        ++qpos_;
343      ++cigar_;
344    }
345  }
346
347
348  template<typename Iterator>
349  void Pileup<Iterator>::Entry::increment(void)
350  {
351    YAT_ASSERT(cigar_.base() >= bam_->cigar());
352    if (cigar_type() & 1)
353      ++qpos_;
354    ++cigar_;
355    YAT_ASSERT(cigar_.base() >= bam_->cigar());
356  }
357
358
359  template<typename Iterator>
360  uint8_t Pileup<Iterator>::Entry::sequence(void) const
361  {
362    if (skip_==0) {
363      if (cigar_type() & 1)
364        return bam_->sequence(qpos_);
365      return 0;
366    }
367    if (cigar_op()!=BAM_CINS)
368      return 0;
369    return bam_->sequence(qpos_);
370  }
371
372}}}
373#endif
Note: See TracBrowser for help on using the repository browser.