source: trunk/test/pileup.cc @ 3356

Last change on this file since 3356 was 3356, checked in by Peter, 9 years ago

refs #794. make test/pileup.cc compile with htslib

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 8.7 KB
Line 
1// $Id: pileup.cc 3356 2014-11-22 13:34:04Z peter $
2
3/*
4  Copyright (C) 2014 Peter Johansson
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with yat. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include <config.h>
23
24#include "Suite.h"
25
26#include <yat/omic/BamFile.h>
27#include <yat/omic/BamReadIterator.h>
28#include "yat/omic/Pileup.h"
29
30#include <yat/utility/Aligner.h>
31
32#include <algorithm>
33#include <cassert>
34#include <string>
35#include <vector>
36
37using namespace theplu::yat;
38
39// The simplest of genotypers; reporting the first and second "thirdiles"
40std::string genotype(std::string str)
41{
42  std::string res("  ");
43  std::sort(str.begin(), str.end());
44  res[0] = str[str.size()/3];
45  res[1] = str[2*str.size()/3];
46  return res;
47}
48
49
50#ifdef YAT_HAVE_LIBBAM
51char seq_nt16(size_t x)
52{
53#if YAT_HAVE_HTSLIB
54  return seq_nt16_str[x];
55#elif HAVE_BAM_NT16_REV_TABLE
56  return bam_nt16_rev_table[x];
57#else
58  std::string table = "=ACMGRSVTWYHKDBN";
59  return table[c];
60#endif
61}
62
63void test1(test::Suite& suite, const std::string& fn)
64{
65  omic::InBamFile in;
66  in.open(fn);
67
68  using omic::BamReadIterator;
69  BamReadIterator first(in);
70  BamReadIterator last;
71
72  size_t count = 0;
73  std::vector<std::string> correct_gt;
74  correct_gt.push_back("AA");
75  correct_gt.push_back("CC");
76  correct_gt.push_back("CC");
77  correct_gt.push_back("TT");
78  correct_gt.push_back("=A");
79  correct_gt.push_back("AA");
80  correct_gt.push_back("AA");
81  correct_gt.push_back("GG");
82  correct_gt.push_back("AA");
83  correct_gt.push_back("AA");
84  correct_gt.push_back("AA");
85
86  correct_gt.push_back("GG");
87  correct_gt.push_back("GG");
88  correct_gt.push_back("CC");
89  correct_gt.push_back("CC");
90  correct_gt.push_back("CC");
91
92  correct_gt.push_back("TT");
93  correct_gt.push_back("TT");
94  correct_gt.push_back("=G");
95  correct_gt.push_back("=C");
96
97  using omic::Pileup;
98  Pileup<BamReadIterator> pileup(first, last);
99  while (pileup.good()) {
100    if (pileup.pos()+1 < 24341) {
101      pileup.increment();
102      continue;
103    }
104    Pileup<BamReadIterator>::const_iterator iter = pileup.begin();
105    std::string str;
106    for (; iter!=pileup.end(); ++iter) {
107      if (iter->bam().end() <= pileup.pos()) {
108        suite.err() << "error: " << pileup.pos() << " "
109                    << iter->bam().pos() << " "
110                    << iter->bam().end() << " "
111                    << iter->bam().cigar_str()
112                    << "\n";
113        suite.add(false);
114        continue;
115      }
116      str += seq_nt16(iter->sequence());
117    }
118
119    if (pileup.pos()+1 == 24341 && count!=0) {
120      suite.err() << "incorrect count\n";
121      suite.add(false);
122    }
123
124    if (count < correct_gt.size())
125      if (genotype(str) != correct_gt[count]) {
126        suite.err() << "incorrect genotype: " << genotype(str) << "\n";
127        suite.err() << "expected: " << correct_gt[count] << "\n";
128        suite.add(false);
129      }
130
131    if (count <= 3) {
132      if (pileup.pos()+1 != static_cast<int>(count+24341)) {
133        suite.err() << "incorrect pos or count\n";
134        suite.err() << "pos: " << pileup.pos()+1 << "\n";
135        suite.err() << "count: " << count << "\n";
136        suite.add(false);
137      }
138    }
139    else if (count == 4 && !pileup.skip_ref() ) {
140      suite.out() << "expected skip ref\n";
141      suite.add(false);
142    }
143    else if (count==5) {
144      if (pileup.pos()+1 != static_cast<int>(count+24340)) {
145        suite.err() << "incorrect pos or count\n";
146        suite.err() << "pos: " << pileup.pos()+1 << "\n";
147        suite.err() << "count: " << count << "\n";
148        suite.add(false);
149      }
150    }
151    pileup.increment();
152    ++count;
153  }
154}
155
156
157void test2(test::Suite& suite, const std::string& fn)
158{
159  std::ostringstream error;
160  theplu::yat::omic::InBamFile in(fn);
161  using omic::BamRead;
162  using omic::BamReadIterator;
163  using omic::Pileup;
164  BamReadIterator first(in, 1, 24150, 25000);
165  BamReadIterator last;
166
167  std::vector<BamRead> reads(2);
168  reads[0] = *first;
169  ++first;
170  reads[1] = *first;
171  in.close();
172  std::string ref = reads[0].sequence();
173
174  suite.out() << "no modifications\n";
175  // no modification
176  typedef std::vector<BamRead>::iterator iterator;
177  typedef Pileup<iterator>::const_iterator plp_iterator;
178  for (Pileup<iterator> plp(reads.begin(), reads.end());plp.pos()<24070;
179       plp.increment()) {
180    for (plp_iterator i = plp.begin(); i!=plp.end(); ++i) {
181      if (i->bam().pos()!=reads[1].pos())
182        continue;
183      char nt = seq_nt16(i->sequence());
184      if (nt != ref[plp.pos()-reads[0].pos()]) {
185        error << "'" << nt << "' not equal '"
186              << ref[plp.pos()-reads[0].pos()]
187              << "' at pos " << reads[0].pos() << " for bam with start pos: "
188              << i->bam().pos();
189        throw std::runtime_error(error.str());
190      }
191    }
192  }
193
194  // insertion
195  suite.out() << "insertion\n";
196  assert(reads[1].sequence_length()==100);
197  utility::Aligner::Cigar cig;
198  cig.push_back(BAM_CMATCH, 10);
199  cig.push_back(BAM_CINS, 1);
200  cig.push_back(BAM_CMATCH, 89);
201  reads[1].cigar(cig);
202  int count = 0;
203  for (Pileup<iterator> plp(reads.begin(), reads.end());plp.good();
204       plp.increment()) {
205    for (plp_iterator i = plp.begin(); i!=plp.end(); ++i) {
206      if (!same_query_name(i->bam(), reads[1]))
207        continue;
208      ++count;
209      suite.err() << count << "\n";
210      char nt = seq_nt16(i->sequence());
211      if (nt != reads[1].sequence()[count-1])
212        error << "nt: " << nt << " not as expected. count: " << count << "\n";
213      // first 10 matches
214      if (count <= 10) {
215        if (plp.skip_ref())
216          error << count << " unexpected skip_ref\n";
217        if (plp.pos() != reads[1].pos()+count-1)
218          error << "count: " << count << " with pos: " << plp.pos() << "\n";
219      }
220      else if (count==11) { // insertion
221        if (!plp.skip_ref())
222          error << count << " unexpected skip_ref\n";
223      }
224      else {
225        if (plp.skip_ref())
226          error << count << " unexpected skip_ref\n";
227        if (plp.pos() != reads[1].pos()+count-2)
228          error << "count: " << count << " with pos: " << plp.pos() << "\n";
229      }
230
231      if (error.str().size())
232        throw std::runtime_error(error.str());
233    }
234  }
235
236
237  // deletion
238  suite.out() << "deletion\n";
239  assert(reads[1].sequence_length()==100);
240  cig.clear();
241  cig.push_back(BAM_CMATCH, 10);
242  cig.push_back(BAM_CDEL, 1);
243  cig.push_back(BAM_CMATCH, 90);
244  assert(cig.size()==3);
245  reads[1].cigar(cig);
246  count = 0;
247  for (Pileup<iterator> plp(reads.begin(), reads.end()); plp.good();
248       plp.increment()) {
249    for (plp_iterator i = plp.begin(); i!=plp.end(); ++i) {
250      if (!same_query_name(i->bam(), reads[1]))
251        continue;
252      ++count;
253      char nt = seq_nt16(i->sequence());
254      if (plp.pos() != reads[1].pos()+count-1)
255        error << "count: " << count << " with pos: " << plp.pos() << "\n";
256      // first 10 matches
257      if (count <= 10) {
258        if (plp.skip_ref())
259          error << count << " unexpected skip_ref\n";
260        if (nt != reads[1].sequence()[count-1])
261          error << "nt: " << nt << " not as expected\n";
262      }
263      else if (count==11) {
264        if (plp.skip_ref())
265          error << count << " unexpected skip_ref\n";
266        if (!i->is_del())
267          error << count << " expected deletion\n";
268      }
269      else {
270        if (plp.skip_ref())
271          error << count << " unexpected skip_ref\n";
272        if (nt != reads[1].sequence()[count-2])
273          error << "nt: " << nt << " not as expected\n";
274      }
275
276      if (error.str().size())
277        throw std::runtime_error(error.str());
278    }
279  }
280
281  // soft clipped
282  suite.out() << "soft clipped\n";
283  assert(reads[1].sequence_length()==100);
284  cig.clear();
285  cig.push_back(BAM_CSOFT_CLIP, 10);
286  cig.push_back(BAM_CMATCH, 90);
287  reads[1].cigar(cig);
288  count = 0;
289  for (Pileup<iterator> plp(reads.begin(), reads.end()); plp.good();
290       plp.increment()) {
291    for (plp_iterator i = plp.begin(); i!=plp.end(); ++i) {
292      if (!same_query_name(i->bam(), reads[1]))
293        continue;
294      ++count;
295      char nt = seq_nt16(i->sequence());
296      if (plp.skip_ref())
297        error << count << " unexpected skip_ref\n";
298      if (plp.pos() != reads[1].pos()+count-1)
299        error << "count: " << count << " with pos: " << plp.pos() << "\n";
300      if (nt != reads[1].sequence()[count+9])
301        error << "count: " << count << " nt: " << nt << " not as expected\n";
302
303      if (error.str().size())
304        throw std::runtime_error(error.str());
305    }
306  }
307}
308#else
309// tests if libbam is not available, should never be run
310void test1(test::Suite& suite, const std::string& fn) {assert(0);}
311void test2(test::Suite& suite, const std::string& fn) {assert(0);}
312#endif
313
314int main(int argc, char* argv[])
315{
316  test::Suite suite(argc, argv, true);
317  std::string fn = "../../data/foo.sorted.bam";
318  try {
319    test1(suite, fn);
320    test2(suite, fn);
321  }
322  catch (std::runtime_error& e) {
323    suite.err() << "test failed: " << e.what() << "\n";
324    suite.add(false);
325  }
326  return suite.return_value();
327}
Note: See TracBrowser for help on using the repository browser.