source: trunk/test/pileup.cc @ 3371

Last change on this file since 3371 was 3371, checked in by Peter, 4 years ago

fix typo

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 8.8 KB
Line 
1// $Id: pileup.cc 3371 2015-02-10 07:21:57Z peter $
2
3/*
4  Copyright (C) 2014 Peter Johansson
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with yat. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include <config.h>
23
24#include "Suite.h"
25
26#ifdef YAT_HAVE_LIBBAM
27#include <yat/omic/BamFile.h>
28#include <yat/omic/BamReadIterator.h>
29#include "yat/omic/Pileup.h"
30#endif
31
32#include <yat/utility/Aligner.h>
33
34#include <algorithm>
35#include <cassert>
36#include <string>
37#include <vector>
38
39using namespace theplu::yat;
40
41// The simplest of genotypers; reporting the first and second "thirdiles"
42std::string genotype(std::string str)
43{
44  std::string res("  ");
45  std::sort(str.begin(), str.end());
46  res[0] = str[str.size()/3];
47  res[1] = str[2*str.size()/3];
48  return res;
49}
50
51
52#ifdef YAT_HAVE_LIBBAM
53char seq_nt16(size_t x)
54{
55#if YAT_HAVE_HTSLIB
56  return seq_nt16_str[x];
57#elif HAVE_BAM_NT16_REV_TABLE
58  return bam_nt16_rev_table[x];
59#else
60  std::string table = "=ACMGRSVTWYHKDBN";
61  return table[c];
62#endif
63}
64
65void test1(test::Suite& suite, const std::string& fn)
66{
67  omic::InBamFile in;
68  in.open(fn);
69
70  using omic::BamReadIterator;
71  BamReadIterator first(in);
72  BamReadIterator last;
73
74  size_t count = 0;
75  std::vector<std::string> correct_gt;
76  correct_gt.push_back("AA");
77  correct_gt.push_back("CC");
78  correct_gt.push_back("CC");
79  correct_gt.push_back("TT");
80  correct_gt.push_back("=A");
81  correct_gt.push_back("AA");
82  correct_gt.push_back("AA");
83  correct_gt.push_back("GG");
84  correct_gt.push_back("AA");
85  correct_gt.push_back("AA");
86  correct_gt.push_back("AA");
87
88  correct_gt.push_back("GG");
89  correct_gt.push_back("GG");
90  correct_gt.push_back("CC");
91  correct_gt.push_back("CC");
92  correct_gt.push_back("CC");
93
94  correct_gt.push_back("TT");
95  correct_gt.push_back("TT");
96  correct_gt.push_back("=G");
97  correct_gt.push_back("=C");
98
99  using omic::Pileup;
100  Pileup<BamReadIterator> pileup(first, last);
101  while (pileup.good()) {
102    if (pileup.pos()+1 < 24341) {
103      pileup.increment();
104      continue;
105    }
106    Pileup<BamReadIterator>::const_iterator iter = pileup.begin();
107    std::string str;
108    for (; iter!=pileup.end(); ++iter) {
109      if (iter->bam().end() <= pileup.pos()) {
110        suite.err() << "error: " << pileup.pos() << " "
111                    << iter->bam().pos() << " "
112                    << iter->bam().end() << " "
113                    << iter->bam().cigar_str()
114                    << "\n";
115        suite.add(false);
116        continue;
117      }
118      str += seq_nt16(iter->sequence());
119    }
120
121    if (pileup.pos()+1 == 24341 && count!=0) {
122      suite.err() << "incorrect count\n";
123      suite.add(false);
124    }
125
126    if (count < correct_gt.size())
127      if (genotype(str) != correct_gt[count]) {
128        suite.err() << "incorrect genotype: " << genotype(str) << "\n";
129        suite.err() << "expected: " << correct_gt[count] << "\n";
130        suite.add(false);
131      }
132
133    if (count <= 3) {
134      if (pileup.pos()+1 != static_cast<int>(count+24341)) {
135        suite.err() << "incorrect pos or count\n";
136        suite.err() << "pos: " << pileup.pos()+1 << "\n";
137        suite.err() << "count: " << count << "\n";
138        suite.add(false);
139      }
140    }
141    else if (count == 4 && !pileup.skip_ref() ) {
142      suite.out() << "expected skip ref\n";
143      suite.add(false);
144    }
145    else if (count==5) {
146      if (pileup.pos()+1 != static_cast<int>(count+24340)) {
147        suite.err() << "incorrect pos or count\n";
148        suite.err() << "pos: " << pileup.pos()+1 << "\n";
149        suite.err() << "count: " << count << "\n";
150        suite.add(false);
151      }
152    }
153    pileup.increment();
154    ++count;
155  }
156}
157
158
159void test2(test::Suite& suite, const std::string& fn)
160{
161  std::ostringstream error;
162  theplu::yat::omic::InBamFile in(fn);
163  using omic::BamRead;
164  using omic::BamReadIterator;
165  using omic::Pileup;
166  BamReadIterator first(in, 1, 24150, 25000);
167  BamReadIterator last;
168
169  std::vector<BamRead> reads(2);
170  reads[0] = *first;
171  ++first;
172  reads[1] = *first;
173  in.close();
174  std::string ref = reads[0].sequence();
175
176  suite.out() << "no modifications\n";
177  // no modification
178  typedef std::vector<BamRead>::iterator iterator;
179  typedef Pileup<iterator>::const_iterator plp_iterator;
180  for (Pileup<iterator> plp(reads.begin(), reads.end());plp.pos()<24070;
181       plp.increment()) {
182    for (plp_iterator i = plp.begin(); i!=plp.end(); ++i) {
183      if (i->bam().pos()!=reads[1].pos())
184        continue;
185      char nt = seq_nt16(i->sequence());
186      if (nt != ref[plp.pos()-reads[0].pos()]) {
187        error << "'" << nt << "' not equal '"
188              << ref[plp.pos()-reads[0].pos()]
189              << "' at pos " << reads[0].pos() << " for bam with start pos: "
190              << i->bam().pos();
191        throw std::runtime_error(error.str());
192      }
193    }
194  }
195
196  // insertion
197  suite.out() << "insertion\n";
198  assert(reads[1].sequence_length()==100);
199  utility::Aligner::Cigar cig;
200  cig.push_back(BAM_CMATCH, 10);
201  cig.push_back(BAM_CINS, 1);
202  cig.push_back(BAM_CMATCH, 89);
203  reads[1].cigar(cig);
204  int count = 0;
205  for (Pileup<iterator> plp(reads.begin(), reads.end());plp.good();
206       plp.increment()) {
207    for (plp_iterator i = plp.begin(); i!=plp.end(); ++i) {
208      if (!same_query_name(i->bam(), reads[1]))
209        continue;
210      ++count;
211      suite.err() << count << "\n";
212      char nt = seq_nt16(i->sequence());
213      if (nt != reads[1].sequence()[count-1])
214        error << "nt: " << nt << " not as expected. count: " << count << "\n";
215      // first 10 matches
216      if (count <= 10) {
217        if (plp.skip_ref())
218          error << count << " unexpected skip_ref\n";
219        if (plp.pos() != reads[1].pos()+count-1)
220          error << "count: " << count << " with pos: " << plp.pos() << "\n";
221      }
222      else if (count==11) { // insertion
223        if (!plp.skip_ref())
224          error << count << " unexpected skip_ref\n";
225      }
226      else {
227        if (plp.skip_ref())
228          error << count << " unexpected skip_ref\n";
229        if (plp.pos() != reads[1].pos()+count-2)
230          error << "count: " << count << " with pos: " << plp.pos() << "\n";
231      }
232
233      if (error.str().size())
234        throw std::runtime_error(error.str());
235    }
236  }
237
238
239  // deletion
240  suite.out() << "deletion\n";
241  assert(reads[1].sequence_length()==100);
242  cig.clear();
243  cig.push_back(BAM_CMATCH, 10);
244  cig.push_back(BAM_CDEL, 1);
245  cig.push_back(BAM_CMATCH, 90);
246  assert(cig.size()==3);
247  reads[1].cigar(cig);
248  count = 0;
249  for (Pileup<iterator> plp(reads.begin(), reads.end()); plp.good();
250       plp.increment()) {
251    for (plp_iterator i = plp.begin(); i!=plp.end(); ++i) {
252      if (!same_query_name(i->bam(), reads[1]))
253        continue;
254      ++count;
255      char nt = seq_nt16(i->sequence());
256      if (plp.pos() != reads[1].pos()+count-1)
257        error << "count: " << count << " with pos: " << plp.pos() << "\n";
258      // first 10 matches
259      if (count <= 10) {
260        if (plp.skip_ref())
261          error << count << " unexpected skip_ref\n";
262        if (nt != reads[1].sequence()[count-1])
263          error << "nt: " << nt << " not as expected\n";
264      }
265      else if (count==11) {
266        if (plp.skip_ref())
267          error << count << " unexpected skip_ref\n";
268        if (!i->is_del())
269          error << count << " expected deletion\n";
270      }
271      else {
272        if (plp.skip_ref())
273          error << count << " unexpected skip_ref\n";
274        if (nt != reads[1].sequence()[count-2])
275          error << "nt: " << nt << " not as expected\n";
276      }
277
278      if (error.str().size())
279        throw std::runtime_error(error.str());
280    }
281  }
282
283  // soft clipped
284  suite.out() << "soft clipped\n";
285  assert(reads[1].sequence_length()==100);
286  cig.clear();
287  cig.push_back(BAM_CSOFT_CLIP, 10);
288  cig.push_back(BAM_CMATCH, 90);
289  reads[1].cigar(cig);
290  count = 0;
291  for (Pileup<iterator> plp(reads.begin(), reads.end()); plp.good();
292       plp.increment()) {
293    for (plp_iterator i = plp.begin(); i!=plp.end(); ++i) {
294      if (!same_query_name(i->bam(), reads[1]))
295        continue;
296      ++count;
297      char nt = seq_nt16(i->sequence());
298      if (plp.skip_ref())
299        error << count << " unexpected skip_ref\n";
300      if (plp.pos() != reads[1].pos()+count-1)
301        error << "count: " << count << " with pos: " << plp.pos() << "\n";
302      if (nt != reads[1].sequence()[count+9])
303        error << "count: " << count << " nt: " << nt << " not as expected\n";
304
305      if (error.str().size())
306        throw std::runtime_error(error.str());
307    }
308  }
309}
310#else
311// tests if libbam is not available, should never be run
312void test1(test::Suite& suite, const std::string& fn) {assert(0);}
313void test2(test::Suite& suite, const std::string& fn) {assert(0);}
314#endif
315
316int main(int argc, char* argv[])
317{
318  test::Suite suite(argc, argv, true);
319  std::string fn = "../../data/foo.sorted.bam";
320  try {
321    test1(suite, fn);
322    test2(suite, fn);
323  }
324  catch (std::runtime_error& e) {
325    suite.err() << "test failed: " << e.what() << "\n";
326    suite.add(false);
327  }
328  return suite.return_value();
329}
Note: See TracBrowser for help on using the repository browser.