Changeset 3332 for trunk/yat/omic/Pileup.h
- Timestamp:
- Oct 23, 2014, 1:18:15 PM (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/yat/omic/Pileup.h
r3330 r3332 24 24 25 25 #include "BamRead.h" 26 #include "CigarIterator.h" 26 27 27 28 #include "yat/utility/yat_assert.h" 28 29 29 #include <boost/ iterator/filter_iterator.hpp>30 #include <boost/shared_ptr.hpp> 30 31 31 32 #include <algorithm> 32 #include < deque>33 #include <list> 33 34 34 35 namespace theplu { … … 51 52 public: 52 53 /** 54 \brief default constructor 55 */ 56 Entry(void) {} 57 58 /** 53 59 Create an Entry with read \a b 54 60 */ 55 Entry(const yat::omic::BamRead& b);61 Entry(const BamRead& b); 56 62 57 63 /** 58 64 \return const reference to BamRead 59 65 */ 60 const yat::omic::BamRead& bam(void) const { return bam_; } 61 62 /** 63 \return cigar element at current position 64 */ 65 uint32_t cigar(void) const 66 const BamRead& bam(void) const 66 67 { 67 YAT_ASSERT(c_index_ < bam_.core().n_cigar); 68 return bam_.cigar(c_index_); 69 } 68 YAT_ASSERT(bam_.get()); 69 return *bam_; 70 } 71 72 /** 73 \return cigar operation at current position 74 */ 75 uint8_t cigar_op(void) const { return *cigar_; } 76 77 78 /** 79 \return length of current cigar element 80 */ 81 uint32_t cigar_oplen(void) const 82 { 83 YAT_ASSERT(bam_.get()); 84 YAT_ASSERT(cigar_.base() >= bam_->cigar()); 85 return bam_cigar_oplen(*cigar_.base()); 86 } 87 70 88 71 89 /** … … 74 92 unsigned short qual(void) const 75 93 { 76 YAT_ASSERT(qpos_ < bam_.sequence_length()); 77 return bam_.qual(qpos_); 94 YAT_ASSERT(bam_.get()); 95 YAT_ASSERT(qpos_ < bam_->sequence_length()); 96 return bam_->qual(qpos_); 78 97 } 79 98 … … 94 113 95 114 /** 96 \return true if beyond last (aligned) base, i.e., we don't97 count clipped bases98 */99 bool end_of_sequence(void) const100 {101 return qpos_ >= bam_.sequence_length();102 }103 104 /**105 115 \return true if this position was deleted in this query 106 116 */ 107 117 bool is_del(void) const 108 118 { 109 YAT_ASSERT(c_index_ < bam_.core().n_cigar); 110 return bam_cigar_type(cigar())==2; 119 return cigar_type()==2; 111 120 } 112 121 … … 116 125 size_t qpos(void) const { return qpos_; } 117 126 private: 118 yat::omic::BamReadbam_;127 boost::shared_ptr<BamRead> bam_; 119 128 // index of base pointed to 120 129 size_t qpos_; 121 size_t c_index_; 122 size_t c_index2_; 130 CigarIterator cigar_; 123 131 size_t skip_; 124 void step_cigar(void); 125 uint32_t cigar_op(void) const 126 { 127 return bam_cigar_op(cigar()); 128 } 132 // return 0, 1, 2, or 3 see bam_cigar_type in yat/utility/Cigar.h 133 uint8_t cigar_type(void) const { return bam_cigar_type(cigar_op()); } 129 134 friend class Pileup; 130 135 }; 131 136 132 private:133 134 /**135 Private class used in const_iterator.136 */137 class Filter138 {139 public:140 Filter(void) {}141 Filter(int32_t tid, int32_t pos) : tid_(tid), pos_(pos) {}142 // return true if Entry read overlaps with tid:pos143 bool operator()(const Entry& read) const144 {145 return read.bam().pos() <= pos_ && pos_ < read.bam().end()146 && read.bam().tid() == tid_;147 }148 private:149 int32_t tid_;150 int32_t pos_;151 };152 153 137 public: 154 138 /** … … 156 140 overlap with current position (as deined by tid() and pos()). 157 141 */ 158 typedef boost::filter_iterator< 159 Filter 160 , typename std::deque<Entry>::const_iterator> const_iterator; 142 typedef typename std::list<Entry>::const_iterator const_iterator; 161 143 162 144 /** … … 211 193 bool skip_ref(void) const; 212 194 private: 213 void step_cigar(void);195 typedef typename std::list<Entry>::iterator iterator; 214 196 215 197 Iterator bam_; … … 217 199 int32_t pos_; 218 200 int32_t tid_; 219 std:: deque<Entry> data_;201 std::list<Entry> data_; 220 202 uint32_t skip_ref_; 221 203 }; … … 246 228 typename Pileup<Iterator>::const_iterator Pileup<Iterator>::begin(void) const 247 229 { 248 return boost::make_filter_iterator(Filter(tid(), pos()), 249 data_.begin(), data_.end()); 230 return data_.begin(); 250 231 } 251 232 … … 254 235 typename Pileup<Iterator>::const_iterator Pileup<Iterator>::end(void) const 255 236 { 256 return boost::make_filter_iterator(Filter(), 257 data_.end(), data_.end()); 237 return data_.end(); 258 238 } 259 239 … … 272 252 // in insertion 273 253 if (skip_ref()) { 274 for (size_t i=0; i<data_.size(); ++i) { 275 // ignore reads left of current position 276 if (data_[i].end_of_sequence()) 277 continue; 278 if (bam_cigar_type(data_[i].cigar())==1) { 279 data_[i].increment(); 254 for (iterator it = data_.begin(); it!=data_.end(); ++it) { 255 if (it->cigar_type() == 1) { 256 it->increment(); 280 257 } 281 -- data_[i].skip_;258 --it->skip_; 282 259 } 283 260 --skip_ref_; … … 286 263 else { 287 264 ++pos_; 288 for (size_t i=0; i<data_.size(); ++i) { 289 if (data_[i].end_of_sequence()) 265 typename std::list<Entry>::iterator read = data_.begin(); 266 while (read != data_.end()) { 267 read->increment(); 268 // remove entry if read is entirely left of pos 269 if (read->bam().end() <= pos_) { // FIXME speedup possible 270 data_.erase(read++); 290 271 continue; 291 data_[i].increment();272 } 292 273 // check if we're stepping into insertion 293 if (!data_[i].end_of_sequence() && (data_[i].cigar_op() == BAM_CINS)) 294 skip_ref_ = std::max(skip_ref_, bam_cigar_oplen(data_[i].cigar())); 274 if (read->cigar_op() == BAM_CINS) { 275 skip_ref_ = std::max(skip_ref_, read->cigar_oplen()); 276 } 277 ++read; 295 278 } 279 296 280 // stepped into an insertion 297 if (skip_ref_) { 298 for (size_t i=0; i<data_.size(); ++i) 299 data_[i].skip_ = skip_ref_; 300 } 281 if (skip_ref_) 282 for (iterator iter=data_.begin(); iter!=data_.end(); ++iter) 283 iter->skip_ = skip_ref_; 301 284 } 302 285 } … … 310 293 ++bam_; 311 294 } 312 313 // pop reads that are end of sequence314 while (data_.size() && data_[0].end_of_sequence()) {315 data_.pop_front();316 }317 295 } 318 296 … … 326 304 327 305 template<typename Iterator> 328 Pileup<Iterator>::Entry::Entry(const yat::omic::BamRead& b) 329 : bam_(b), qpos_(0), c_index_(0), c_index2_(0), skip_(0) 330 { 331 if (bam_.core().n_cigar==0) 306 Pileup<Iterator>::Entry::Entry(const BamRead& b) 307 : bam_(new BamRead(b)), qpos_(0), cigar_(bam_->cigar()), skip_(0) 308 { 309 YAT_ASSERT(cigar_.base() == bam_->cigar()); 310 if (bam_->core().n_cigar==0) 332 311 return; 333 312 // iterate cigar to first match 334 while (bam_cigar_type(bam_.cigar(c_index_)) != 3) { 335 YAT_ASSERT(bam_cigar_type(bam_.cigar(c_index_))!=2); 336 if (bam_cigar_type(bam_.cigar(c_index_)) == 1) { 337 qpos_ += b.cigar_oplen(c_index_); 338 } 339 ++c_index_; 340 if (c_index_ >= bam_.core().n_cigar) { 341 qpos_ = bam_.sequence_length(); 342 return; 343 } 344 YAT_ASSERT(c_index_ < bam_.core().n_cigar); 313 while (qpos_ < bam_->sequence_length() && cigar_type() != 3) { 314 // type is either 0 (hardclipped) or 1 (softclipped) 315 YAT_ASSERT(cigar_type()!=2); 316 if (cigar_type() == 1) 317 ++qpos_; 318 ++cigar_; 345 319 } 346 320 } … … 350 324 void Pileup<Iterator>::Entry::increment(void) 351 325 { 352 if (bam_cigar_type(cigar()) & 1) 326 YAT_ASSERT(cigar_.base() >= bam_->cigar()); 327 if (cigar_type() & 1) 353 328 ++qpos_; 354 step_cigar(); 329 ++cigar_; 330 YAT_ASSERT(cigar_.base() >= bam_->cigar()); 355 331 } 356 332 … … 360 336 { 361 337 if (skip_==0) { 362 if (bam_cigar_type(cigar()) & 1) 363 return bam_.sequence(qpos_); 364 else 365 return 0; 338 if (cigar_type() & 1) 339 return bam_->sequence(qpos_); 340 return 0; 366 341 } 367 342 if (cigar_op()!=BAM_CINS) 368 343 return 0; 369 return bam_.sequence(qpos_); 370 } 371 372 373 template<typename Iterator> 374 void Pileup<Iterator>::Entry::step_cigar(void) 375 { 376 ++c_index2_; 377 YAT_ASSERT(c_index_<bam_.core().n_cigar); 378 if (c_index2_ >= bam_.cigar_oplen(c_index_)) { 379 c_index2_ = 0; 380 ++c_index_; 381 } 344 return bam_->sequence(qpos_); 382 345 } 383 346
Note: See TracChangeset
for help on using the changeset viewer.