source: trunk/yat/omic/BamPairIterator.h @ 3174

Last change on this file since 3174 was 3174, checked in by Peter, 8 years ago

#refs 784

Speed up copy and assignment by keeping pointers of maps rather than
hard copies. Incrementing input iterators is expected to [possibly]
modify underlying resource. Take istream_iterator as an example, which
obviously modifies the istream it points to when it's incremented and
thus effects other iterators that point to the same istream. This is
described in concept description as that input iterators are only
guaranteed to be a single pass iterator. Given that users do not
expect input iterators to be independent, we can use that to speed up
copy constructor by only copiding a [smart] pointer rather than the
whole map, which might get quite heavy if there are a lot of pairs
mapped far away from each other.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.9 KB
Line 
1#ifndef theplu_yat_omic_bam_pair_iterator
2#define theplu_yat_omic_bam_pair_iterator
3
4// $Id: BamPairIterator.h 3174 2014-03-08 14:51:20Z peter $
5
6/*
7  Copyright (C) 2014 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include <iostream>
26
27#include "BamRead.h"
28
29#include <boost/concept/assert.hpp>
30#include <boost/iterator/iterator_facade.hpp>
31#include <boost/shared_ptr.hpp>
32
33#include <iterator>
34#include <map>
35#include <utility>
36
37namespace theplu {
38namespace yat {
39namespace omic {
40
41  /**
42     \since New in yat 0.12
43
44     Type Requirements:
45     - \c Iterator must be an \input_iterator
46     - \c Iterator 's \c reference type must be convertible to BamRead
47
48     This iterator works on a sorted range [begin, end) and provies a
49     covenient way to access the pairs rather than the reads
50     individually. When BamPairIterator is incremented, rather than
51     just step to next element in [begin, end), BamPairIterator keeps
52     stepping until it finds a read whose mate it has already seen, and
53     operator* return the pair of BamReads.
54
55     Note that BamPairIterator is a single-pass iterator, i.e., once it
56     is incremented the behaviour of its copies is undefined.
57   */
58  template<typename Base>
59  class BamPairIterator
60    : public boost::iterator_facade<
61    BamPairIterator<Base>, const std::pair<BamRead, BamRead>,
62    std::input_iterator_tag
63    >
64  {
65  public:
66    /**
67       \brief default constructor
68     */
69    BamPairIterator(void);
70
71    /**
72       Creates an iterator that will work on [begin, end).
73
74       \see bam_pair_iterator
75
76       \note Input range \c [\a first, \a last \c ) must be sorted or
77       behaviour is undefined.
78     */
79    explicit BamPairIterator(Base begin, Base end);
80  private:
81    Base iter_;
82    Base end_;
83    std::pair<BamRead, BamRead> x_;
84    boost::shared_ptr<std::map<std::string, BamRead> > siam_reads_;
85    typedef std::pair<int32_t, int32_t> Position;
86    boost::shared_ptr<std::multimap<Position, BamRead> > reads_;
87    friend class boost::iterator_core_access;
88
89    const std::pair<BamRead, BamRead>& dereference(void) const;
90    bool equal(const BamPairIterator& other) const;
91    void increment(void);
92    void find_next(void);
93  };
94
95
96  /**
97     Convenient function to create BamPairIterator
98
99     \relates BamPairIterator
100   */
101  template<typename Base>
102  BamPairIterator<Base> bam_pair_iterator(Base base, Base end)
103  { return BamPairIterator<Base>(base, end); }
104
105
106  /**
107     Convenient function to create BamPairIterator that works on empty
108     range. Equivalent to bam_pair_iterator(end, end).
109
110     \relates BamPairIterator
111  */
112  template<typename Base>
113  BamPairIterator<Base> bam_pair_iterator(Base end)
114  { return BamPairIterator<Base>(end, end); }
115
116
117  //             template implementations
118  ///////////////////////////////////////////////////////////////
119
120  template<typename Base>
121  BamPairIterator<Base>::BamPairIterator(Base base, Base end)
122    : iter_(base), end_(end),
123      siam_reads_(new std::map<std::string, BamRead>),
124      reads_(new std::multimap<Position, BamRead>)
125  {
126    BOOST_CONCEPT_ASSERT((boost::InputIterator<Base>));
127    using boost::Convertible;
128    typedef typename std::iterator_traits<Base>::reference reference_type;
129    BOOST_CONCEPT_ASSERT((Convertible<reference_type, BamRead>));
130    find_next();
131  }
132
133
134  template<typename Base>
135  BamPairIterator<Base>::BamPairIterator(void)
136  {
137    BOOST_CONCEPT_ASSERT((boost::InputIterator<Base>));
138    using boost::Convertible;
139    typedef typename std::iterator_traits<Base>::reference reference_type;
140    BOOST_CONCEPT_ASSERT((Convertible<reference_type, BamRead>));
141  }
142
143
144  template<typename Base>
145  const std::pair<BamRead, BamRead>&
146  BamPairIterator<Base>::dereference(void) const
147  {
148    return x_;
149  }
150
151
152  template<typename Base>
153  bool BamPairIterator<Base>::equal(const BamPairIterator& other) const
154  {
155    return iter_ == other.iter_;
156  }
157
158
159  template<typename Base>
160  void BamPairIterator<Base>::increment(void)
161  {
162    ++iter_;
163    find_next();
164  }
165
166
167  template<typename Base>
168  void BamPairIterator<Base>::find_next(void)
169  {
170    BamLessPos less;
171    for (; iter_!=end_; ++iter_) {
172      Position position(iter_->tid(), iter_->pos());
173      Position mate_position(iter_->mtid(), iter_->mpos());
174      // clear siam reads if iter is more advanced than siam reads
175      if (siam_reads_->size() && less(siam_reads_->begin()->second, *iter_))
176        siam_reads_->clear();
177
178      // have not seen mate yet
179      if (mate_position > position)
180        reads_->insert(std::make_pair(mate_position, *iter_));
181      else if (position > mate_position) {
182        std::multimap<Position, BamRead>::iterator
183          lower = reads_->lower_bound(position);
184        // erase all reads with mate position less than current
185        // position (assuming input range is sorted)
186        reads_->erase(reads_->begin(), lower);
187
188        // find mate and assign pair
189        for (; lower!=reads_->end() && lower->first == position; ++lower)
190          if (same_query_name(lower->second, *iter_)) {
191            x_.first = lower->second;
192            x_.second = *iter_;
193            return;
194          }
195      }
196      else { // siam read, i.e., iter_ and mate have same position
197        // check if we have already seen mate and stored it in map
198        std::map<std::string, BamRead>::iterator
199          mate = siam_reads_->lower_bound(iter_->name());
200        if (mate!=siam_reads_->end() && same_query_name(mate->second, *iter_)) {
201          x_.first = mate->second;
202          x_.second = *iter_;
203          return;
204        }
205        else
206          // insert with hint
207          siam_reads_->insert(mate, std::make_pair(iter_->name(), *iter_));
208      }
209    }
210  }
211
212}}}
213#endif
Note: See TracBrowser for help on using the repository browser.