source: trunk/yat/omic/Pileup.h @ 3328

Last change on this file since 3328 was 3328, checked in by Peter, 7 years ago

avoid temporary variables; prefer initialization. refs #806

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 8.3 KB
Line 
1#ifndef theplu_yat_omic_pileup
2#define theplu_yat_omic_pileup
3
4// $Id: Pileup.h 3328 2014-10-10 11:41:24Z peter $
5
6/*
7  Copyright (C) 2012, 2013, 2014 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "BamRead.h"
26
27#include "yat/utility/yat_assert.h"
28
29#include <boost/iterator/filter_iterator.hpp>
30
31#include <algorithm>
32#include <deque>
33
34namespace theplu {
35namespace yat {
36namespace omic {
37
38  /**
39     \since new in yat 0.13
40   */
41  template<typename Iterator>
42  class Pileup
43  {
44  public:
45    /**
46       Essentially a BamRead but with additional information regarding
47       base we currently pointing to
48     */
49    class Entry
50    {
51    public:
52      /**
53         Create an Entry with read \a b
54       */
55      Entry(const yat::omic::BamRead& b);
56
57      /**
58         \return const reference to BamRead
59       */
60      const yat::omic::BamRead& bam(void) const { return bam_; }
61
62      /**
63         \return cigar element at current position
64       */
65      uint32_t cigar(void) const
66      {
67        YAT_ASSERT(c_index_ < bam_.core().n_cigar);
68        return bam_.cigar(c_index_);
69      }
70
71      /**
72         \return base quality of current position
73       */
74      unsigned short qual(void) const
75      {
76        YAT_ASSERT(qpos_ < bam_.sequence_length());
77        return bam_.qual(qpos_);
78      }
79
80      /**
81         \return base of current position
82       */
83      uint8_t sequence(void) const;
84
85      /**
86         \brief iterate to next position with coverage
87
88         If any read has an insertion here, next position will be a
89         skip_ref locus; otherwise we iterate to next locus on
90         reference that has coverage.
91       */
92      // FIXME should this be public?
93      void increment(void);
94
95      /**
96         \return true if beyond last (aligned) base, i.e., we don't
97         count clipped bases
98       */
99      bool end_of_sequence(void) const
100      {
101        return qpos_ >= bam_.sequence_length();
102      }
103
104      /**
105         \return true if this position was deleted in this query
106       */
107      bool is_del(void) const
108      {
109        YAT_ASSERT(c_index_ < bam_.core().n_cigar);
110        return bam_cigar_type(cigar())==2;
111      }
112
113      /**
114         \return index of base at this position
115       */
116      size_t qpos(void) const { return qpos_; }
117    private:
118      yat::omic::BamRead bam_;
119      // index of base pointed to
120      size_t qpos_;
121      size_t c_index_;
122      size_t c_index2_;
123      size_t skip_;
124      void step_cigar(void);
125      uint32_t cigar_op(void) const
126      {
127        return bam_cigar_op(cigar());
128      }
129      friend class Pileup;
130    };
131
132  private:
133
134    /**
135       Private class used in const_iterator.
136     */
137    class Filter
138    {
139    public:
140      Filter(void) {}
141      Filter(int32_t tid, int32_t pos) : tid_(tid), pos_(pos) {}
142      // return true if Entry read overlaps with tid:pos
143      bool operator()(const Entry& read) const
144      {
145        return read.bam().pos() <= pos_ && pos_ < read.bam().end()
146          && read.bam().tid() == tid_;
147      }
148    private:
149      int32_t tid_;
150      int32_t pos_;
151    };
152
153  public:
154    /**
155       Const iterator that can be used to iterate over reads that
156       overlap with current position (as deined by tid() and pos()).
157     */
158    typedef boost::filter_iterator<
159    Filter
160    , typename std::deque<Entry>::const_iterator> const_iterator;
161
162    /**
163       Create an empty Pileup
164     */
165    Pileup(void) {}
166
167    /**
168       Create a Pileup that will use reads in range [first, last)
169     */
170    Pileup(Iterator first, Iterator last);
171
172    /**
173       \return an iterator that points to the first Entry in Pileup.
174
175       Iteration is done in same order as range provided in
176       constructor. Reads that do not overlap with current position
177       (tid:pos) are ignored.
178     */
179    const_iterator begin(void) const;
180
181    /**
182       \return an iterator that points one past the last element in
183       the Pileup.
184     */
185    const_iterator end(void) const;
186
187    /**
188       \return true if there are more positions to increment into
189     */
190    bool good(void) const { return !(bam_ == bam_end_ && data_.empty()); }
191
192    /**
193       \brief step to next position with coverage
194     */
195    void increment(void);
196
197    /**
198       \return template id as defined in bam file header
199     */
200    int32_t tid(void) const { return tid_; }
201
202    /**
203       \return position
204     */
205    int32_t pos(void) const { return pos_; }
206
207    /**
208       \return true if position does not exist in reference, i.e.,
209       inserted region.
210     */
211    bool skip_ref(void) const;
212  private:
213    void step_cigar(void);
214
215    Iterator bam_;
216    Iterator bam_end_;
217    int32_t pos_;
218    int32_t tid_;
219    std::deque<Entry> data_;
220    uint32_t skip_ref_;
221  };
222
223
224  /**
225     \relates Pileup
226
227     \since new in yat 0.13
228   */
229  template<typename Iterator>
230  Pileup<Iterator> make_pileup(Iterator first, Iterator last)
231  {
232    return Pileup<Iterator>(first, last);
233  }
234
235  // implementations
236
237  template<typename Iterator>
238  Pileup<Iterator>::Pileup(Iterator first, Iterator last)
239    : bam_(first), bam_end_(last), pos_(0), tid_(0), skip_ref_(0)
240  {
241    increment();
242  }
243
244
245  template<typename Iterator>
246  typename Pileup<Iterator>::const_iterator Pileup<Iterator>::begin(void) const
247  {
248    return boost::make_filter_iterator(Filter(tid(), pos()),
249                                       data_.begin(), data_.end());
250  }
251
252
253  template<typename Iterator>
254  typename Pileup<Iterator>::const_iterator Pileup<Iterator>::end(void) const
255  {
256    return boost::make_filter_iterator(Filter(),
257                                       data_.end(), data_.end());
258  }
259
260
261  template<typename Iterator>
262  void Pileup<Iterator>::increment(void)
263  {
264    if (data_.empty()) {
265      if (bam_==bam_end_)
266        return;
267      tid_ = bam_->core().tid;
268      pos_ = bam_->core().pos;
269    }
270    else {
271
272      // in insertion
273      if (skip_ref()) {
274        for (size_t i=0; i<data_.size(); ++i) {
275          // ignore reads left of current position
276          if (data_[i].end_of_sequence())
277            continue;
278          if (bam_cigar_type(data_[i].cigar())==1) {
279            data_[i].increment();
280          }
281          --data_[i].skip_;
282        }
283        --skip_ref_;
284      }
285      // walk one base forward on reference
286      else {
287        ++pos_;
288        for (size_t i=0; i<data_.size(); ++i) {
289          if (data_[i].end_of_sequence())
290            continue;
291          data_[i].increment();
292          // check if we're stepping into insertion
293          if (!data_[i].end_of_sequence() && (data_[i].cigar_op() == BAM_CINS))
294            skip_ref_ = std::max(skip_ref_, bam_cigar_oplen(data_[i].cigar()));
295        }
296        // stepped into an insertion
297        if (skip_ref_) {
298          for (size_t i=0; i<data_.size(); ++i)
299            data_[i].skip_ = skip_ref_;
300        }
301      }
302    }
303
304    // read data
305    while (bam_!=bam_end_ && bam_->core().tid==tid_ && bam_->pos()<=pos_) {
306      // skip unmapped reads
307      if (!(bam_->core().flag & BAM_FUNMAP)) {
308        data_.push_back(Entry(*bam_));
309      }
310      ++bam_;
311    }
312
313    // pop reads that are end of sequence
314    while (data_.size() && data_[0].end_of_sequence()) {
315      data_.pop_front();
316    }
317  }
318
319
320  template<typename Iterator>
321  bool Pileup<Iterator>::skip_ref(void) const
322  {
323    return skip_ref_;
324  }
325
326
327  template<typename Iterator>
328  Pileup<Iterator>::Entry::Entry(const yat::omic::BamRead& b)
329    : bam_(b), qpos_(0), c_index_(0), c_index2_(0), skip_(0)
330  {
331    if (bam_.core().n_cigar==0)
332      return;
333    // iterate cigar to first match
334    while (bam_cigar_type(bam_.cigar(c_index_)) != 3) {
335      YAT_ASSERT(bam_cigar_type(bam_.cigar(c_index_))!=2);
336      if (bam_cigar_type(bam_.cigar(c_index_)) == 1) {
337        qpos_ += b.cigar_oplen(c_index_);
338      }
339      ++c_index_;
340      if (c_index_ >= bam_.core().n_cigar) {
341        qpos_ = bam_.sequence_length();
342        return;
343      }
344      YAT_ASSERT(c_index_ < bam_.core().n_cigar);
345    }
346  }
347
348
349  template<typename Iterator>
350  void Pileup<Iterator>::Entry::increment(void)
351  {
352    if (bam_cigar_type(cigar()) & 1)
353      ++qpos_;
354    step_cigar();
355  }
356
357
358  template<typename Iterator>
359  uint8_t Pileup<Iterator>::Entry::sequence(void) const
360  {
361    if (skip_==0) {
362      if (bam_cigar_type(cigar()) & 1)
363        return bam_.sequence(qpos_);
364      else
365        return 0;
366    }
367    if (cigar_op()!=BAM_CINS)
368      return 0;
369    return bam_.sequence(qpos_);
370  }
371
372
373  template<typename Iterator>
374  void Pileup<Iterator>::Entry::step_cigar(void)
375  {
376    ++c_index2_;
377    YAT_ASSERT(c_index_<bam_.core().n_cigar);
378    if (c_index2_ >= bam_.cigar_oplen(c_index_)) {
379      c_index2_ = 0;
380      ++c_index_;
381    }
382  }
383
384}}}
385#endif
Note: See TracBrowser for help on using the repository browser.