source: trunk/test/pileup.cc @ 3327

Last change on this file since 3327 was 3327, checked in by Peter, 7 years ago

redefine Pileup::const_iterator so it avoids reads that do not overlap current position. refs #806

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 8.6 KB
Line 
1// $Id: pileup.cc 3327 2014-10-10 08:30:20Z peter $
2
3/*
4  Copyright (C) 2014 Peter Johansson
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with yat. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include <config.h>
23
24#include "Suite.h"
25
26#include <yat/omic/BamFile.h>
27#include <yat/omic/BamReadIterator.h>
28#include "yat/omic/Pileup.h"
29
30#include <yat/utility/Aligner.h>
31
32#include <algorithm>
33#include <cassert>
34#include <string>
35#include <vector>
36
37using namespace theplu::yat;
38
39// The simplest of genotypers; reporting the first and second "thirdiles"
40std::string genotype(std::string str)
41{
42  std::string res("  ");
43  std::sort(str.begin(), str.end());
44  res[0] = str[str.size()/3];
45  res[1] = str[2*str.size()/3];
46  return res;
47}
48
49
50#ifdef YAT_HAVE_LIBBAM
51void test1(test::Suite& suite, const std::string& fn)
52{
53  omic::InBamFile in;
54  in.open(fn);
55
56  using omic::BamReadIterator;
57  BamReadIterator first(in);
58  BamReadIterator last;
59
60  size_t count = 0;
61  std::vector<std::string> correct_gt;
62  correct_gt.push_back("AA");
63  correct_gt.push_back("CC");
64  correct_gt.push_back("CC");
65  correct_gt.push_back("TT");
66  correct_gt.push_back("=A");
67  correct_gt.push_back("AA");
68  correct_gt.push_back("AA");
69  correct_gt.push_back("GG");
70  correct_gt.push_back("AA");
71  correct_gt.push_back("AA");
72  correct_gt.push_back("AA");
73
74  correct_gt.push_back("GG");
75  correct_gt.push_back("GG");
76  correct_gt.push_back("CC");
77  correct_gt.push_back("CC");
78  correct_gt.push_back("CC");
79
80  correct_gt.push_back("TT");
81  correct_gt.push_back("TT");
82  correct_gt.push_back("=G");
83  correct_gt.push_back("=C");
84
85  using omic::Pileup;
86  Pileup<BamReadIterator> pileup(first, last);
87  while (pileup.good()) {
88    if (pileup.pos()+1 < 24341) {
89      pileup.increment();
90      continue;
91    }
92    Pileup<BamReadIterator>::const_iterator iter = pileup.begin();
93    std::string str;
94    for (; iter!=pileup.end(); ++iter) {
95      if (iter->bam().end() <= pileup.pos()) {
96        suite.err() << "error: " << pileup.pos() << " "
97                    << iter->bam().pos() << " "
98                    << iter->bam().end() << " "
99                    << iter->bam().cigar_str()
100                    << "\n";
101        suite.add(false);
102        continue;
103      }
104      str += bam_nt16_rev_table[iter->sequence()];
105    }
106
107    if (pileup.pos()+1 == 24341 && count!=0) {
108      suite.err() << "incorrect count\n";
109      suite.add(false);
110    }
111
112    if (count < correct_gt.size())
113      if (genotype(str) != correct_gt[count]) {
114        suite.err() << "incorrect genotype: " << genotype(str) << "\n";
115        suite.err() << "expected: " << correct_gt[count] << "\n";
116        suite.add(false);
117      }
118
119    if (count <= 3) {
120      if (pileup.pos()+1 != static_cast<int>(count+24341)) {
121        suite.err() << "incorrect pos or count\n";
122        suite.err() << "pos: " << pileup.pos()+1 << "\n";
123        suite.err() << "count: " << count << "\n";
124        suite.add(false);
125      }
126    }
127    else if (count == 4 && !pileup.skip_ref() ) {
128      suite.out() << "expected skip ref\n";
129      suite.add(false);
130    }
131    else if (count==5) {
132      if (pileup.pos()+1 != static_cast<int>(count+24340)) {
133        suite.err() << "incorrect pos or count\n";
134        suite.err() << "pos: " << pileup.pos()+1 << "\n";
135        suite.err() << "count: " << count << "\n";
136        suite.add(false);
137      }
138    }
139    pileup.increment();
140    ++count;
141  }
142}
143
144
145void test2(test::Suite& suite, const std::string& fn)
146{
147  std::ostringstream error;
148  theplu::yat::omic::InBamFile in(fn);
149  using omic::BamRead;
150  using omic::BamReadIterator;
151  using omic::Pileup;
152  BamReadIterator first(in, 1, 24150, 25000);
153  BamReadIterator last;
154
155  std::vector<BamRead> reads(2);
156  reads[0] = *first;
157  ++first;
158  reads[1] = *first;
159  in.close();
160  std::string ref = reads[0].sequence();
161
162  suite.out() << "no modifications\n";
163  // no modification
164  typedef std::vector<BamRead>::iterator iterator;
165  typedef Pileup<iterator>::const_iterator plp_iterator;
166  for (Pileup<iterator> plp(reads.begin(), reads.end());plp.pos()<24070;
167       plp.increment()) {
168    for (plp_iterator i = plp.begin(); i!=plp.end(); ++i) {
169      if (i->bam().pos()!=reads[1].pos())
170        continue;
171      char nt = bam_nt16_rev_table[i->sequence()];
172      if (nt != ref[plp.pos()-reads[0].pos()]) {
173        error << "'" << nt << "' not equal '"
174              << ref[plp.pos()-reads[0].pos()]
175              << "' at pos " << reads[0].pos() << " for bam with start pos: "
176              << i->bam().pos();
177        throw std::runtime_error(error.str());
178      }
179    }
180  }
181
182  // insertion
183  suite.out() << "insertion\n";
184  assert(reads[1].sequence_length()==100);
185  utility::Aligner::Cigar cig;
186  cig.push_back(BAM_CMATCH, 10);
187  cig.push_back(BAM_CINS, 1);
188  cig.push_back(BAM_CMATCH, 89);
189  reads[1].cigar(cig);
190  int count = 0;
191  for (Pileup<iterator> plp(reads.begin(), reads.end());plp.good();
192       plp.increment()) {
193    for (plp_iterator i = plp.begin(); i!=plp.end(); ++i) {
194      if (!same_query_name(i->bam(), reads[1]))
195        continue;
196      ++count;
197      suite.err() << count << "\n";
198      char nt = bam_nt16_rev_table[i->sequence()];
199      if (nt != reads[1].sequence()[count-1])
200        error << "nt: " << nt << " not as expected. count: " << count << "\n";
201      // first 10 matches
202      if (count <= 10) {
203        if (plp.skip_ref())
204          error << count << " unexpected skip_ref\n";
205        if (plp.pos() != reads[1].pos()+count-1)
206          error << "count: " << count << " with pos: " << plp.pos() << "\n";
207      }
208      else if (count==11) { // insertion
209        if (!plp.skip_ref())
210          error << count << " unexpected skip_ref\n";
211      }
212      else {
213        if (plp.skip_ref())
214          error << count << " unexpected skip_ref\n";
215        if (plp.pos() != reads[1].pos()+count-2)
216          error << "count: " << count << " with pos: " << plp.pos() << "\n";
217      }
218
219      if (error.str().size())
220        throw std::runtime_error(error.str());
221    }
222  }
223
224
225  // deletion
226  suite.out() << "deletion\n";
227  assert(reads[1].sequence_length()==100);
228  cig.clear();
229  cig.push_back(BAM_CMATCH, 10);
230  cig.push_back(BAM_CDEL, 1);
231  cig.push_back(BAM_CMATCH, 90);
232  assert(cig.size()==3);
233  reads[1].cigar(cig);
234  count = 0;
235  for (Pileup<iterator> plp(reads.begin(), reads.end()); plp.good();
236       plp.increment()) {
237    for (plp_iterator i = plp.begin(); i!=plp.end(); ++i) {
238      if (!same_query_name(i->bam(), reads[1]))
239        continue;
240      ++count;
241      char nt = bam_nt16_rev_table[i->sequence()];
242      if (plp.pos() != reads[1].pos()+count-1)
243        error << "count: " << count << " with pos: " << plp.pos() << "\n";
244      // first 10 matches
245      if (count <= 10) {
246        if (plp.skip_ref())
247          error << count << " unexpected skip_ref\n";
248        if (nt != reads[1].sequence()[count-1])
249          error << "nt: " << nt << " not as expected\n";
250      }
251      else if (count==11) {
252        if (plp.skip_ref())
253          error << count << " unexpected skip_ref\n";
254        if (!i->is_del())
255          error << count << " expected deletion\n";
256      }
257      else {
258        if (plp.skip_ref())
259          error << count << " unexpected skip_ref\n";
260        if (nt != reads[1].sequence()[count-2])
261          error << "nt: " << nt << " not as expected\n";
262      }
263
264      if (error.str().size())
265        throw std::runtime_error(error.str());
266    }
267  }
268
269  // soft clipped
270  suite.out() << "soft clipped\n";
271  assert(reads[1].sequence_length()==100);
272  cig.clear();
273  cig.push_back(BAM_CSOFT_CLIP, 10);
274  cig.push_back(BAM_CMATCH, 90);
275  reads[1].cigar(cig);
276  count = 0;
277  for (Pileup<iterator> plp(reads.begin(), reads.end()); plp.good();
278       plp.increment()) {
279    for (plp_iterator i = plp.begin(); i!=plp.end(); ++i) {
280      if (!same_query_name(i->bam(), reads[1]))
281        continue;
282      ++count;
283      char nt = bam_nt16_rev_table[i->sequence()];
284      if (plp.skip_ref())
285        error << count << " unexpected skip_ref\n";
286      if (plp.pos() != reads[1].pos()+count-1)
287        error << "count: " << count << " with pos: " << plp.pos() << "\n";
288      if (nt != reads[1].sequence()[count+9])
289        error << "count: " << count << " nt: " << nt << " not as expected\n";
290
291      if (error.str().size())
292        throw std::runtime_error(error.str());
293    }
294  }
295}
296#else
297// tests if libbam is not available, should never be run
298void test1(test::Suite& suite, const std::string& fn) {assert(0);}
299void test2(test::Suite& suite, const std::string& fn) {assert(0);}
300#endif
301
302int main(int argc, char* argv[])
303{
304  test::Suite suite(argc, argv, true);
305  std::string fn = "../../data/foo.sorted.bam";
306  try {
307    test1(suite, fn);
308    test2(suite, fn);
309  }
310  catch (std::runtime_error& e) {
311    suite.err() << "test failed: " << e.what() << "\n";
312    suite.add(false);
313  }
314  return suite.return_value();
315}
Note: See TracBrowser for help on using the repository browser.