source: trunk/test/pileup.cc @ 3802

Last change on this file since 3802 was 3802, checked in by Peter, 2 months ago

closes #918. implement function Pileup::size

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 8.9 KB
Line 
1// $Id: pileup.cc 3802 2019-05-12 04:31:06Z peter $
2
3/*
4  Copyright (C) 2014, 2015 Peter Johansson
5
6  This file is part of the yat library, http://dev.thep.lu.se/yat
7
8  The yat library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 3 of the
11  License, or (at your option) any later version.
12
13  The yat library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with yat. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#include <config.h>
23
24#include "Suite.h"
25
26#include <yat/utility/config_public.h>
27
28#ifdef YAT_HAVE_LIBBAM
29#include <yat/omic/BamFile.h>
30#include <yat/omic/BamReadIterator.h>
31#include "yat/omic/Pileup.h"
32#endif
33
34#include <yat/utility/Aligner.h>
35
36#include <algorithm>
37#include <cassert>
38#include <string>
39#include <vector>
40
41using namespace theplu::yat;
42
43// The simplest of genotypers; reporting the first and second "thirdiles"
44std::string genotype(std::string str)
45{
46  std::string res("  ");
47  std::sort(str.begin(), str.end());
48  res[0] = str[str.size()/3];
49  res[1] = str[2*str.size()/3];
50  return res;
51}
52
53
54char seq_nt16(size_t x)
55{
56#if YAT_HAVE_HTSLIB
57  return seq_nt16_str[x];
58#elif HAVE_BAM_NT16_REV_TABLE
59  return bam_nt16_rev_table[x];
60#else
61  std::string table = "=ACMGRSVTWYHKDBN";
62  return table[x];
63#endif
64}
65
66#ifdef YAT_HAVE_LIBBAM
67void test1(test::Suite& suite, const std::string& fn)
68{
69  omic::InBamFile in;
70  in.open(fn);
71
72  using omic::BamReadIterator;
73  BamReadIterator first(in);
74  BamReadIterator last;
75
76  size_t count = 0;
77  std::vector<std::string> correct_gt;
78  correct_gt.push_back("AA");
79  correct_gt.push_back("CC");
80  correct_gt.push_back("CC");
81  correct_gt.push_back("TT");
82  correct_gt.push_back("=A");
83  correct_gt.push_back("AA");
84  correct_gt.push_back("AA");
85  correct_gt.push_back("GG");
86  correct_gt.push_back("AA");
87  correct_gt.push_back("AA");
88  correct_gt.push_back("AA");
89
90  correct_gt.push_back("GG");
91  correct_gt.push_back("GG");
92  correct_gt.push_back("CC");
93  correct_gt.push_back("CC");
94  correct_gt.push_back("CC");
95
96  correct_gt.push_back("TT");
97  correct_gt.push_back("TT");
98  correct_gt.push_back("=G");
99  correct_gt.push_back("=C");
100
101  using omic::Pileup;
102  Pileup<BamReadIterator> pileup(first, last);
103  while (pileup.good()) {
104    if (pileup.pos()+1 < 24341) {
105      pileup.increment();
106      continue;
107    }
108    Pileup<BamReadIterator>::const_iterator iter = pileup.begin();
109    std::string str;
110    for (; iter!=pileup.end(); ++iter) {
111      if (iter->bam().end() <= pileup.pos()) {
112        suite.err() << "error: " << pileup.pos() << " "
113                    << iter->bam().pos() << " "
114                    << iter->bam().end() << " "
115                    << iter->bam().cigar_str()
116                    << "\n";
117        suite.add(false);
118        continue;
119      }
120      str += seq_nt16(iter->sequence());
121    }
122
123    if (pileup.pos()+1 == 24341 && count!=0) {
124      suite.err() << "incorrect count\n";
125      suite.add(false);
126    }
127
128    if (count < correct_gt.size())
129      if (genotype(str) != correct_gt[count]) {
130        suite.err() << "incorrect genotype: " << genotype(str) << "\n";
131        suite.err() << "expected: " << correct_gt[count] << "\n";
132        suite.add(false);
133      }
134
135    if (count <= 3) {
136      if (pileup.pos()+1 != static_cast<int>(count+24341)) {
137        suite.err() << "incorrect pos or count\n";
138        suite.err() << "pos: " << pileup.pos()+1 << "\n";
139        suite.err() << "count: " << count << "\n";
140        suite.add(false);
141      }
142    }
143    else if (count == 4 && !pileup.skip_ref() ) {
144      suite.out() << "expected skip ref\n";
145      suite.add(false);
146    }
147    else if (count==5) {
148      if (pileup.pos()+1 != static_cast<int>(count+24340)) {
149        suite.err() << "incorrect pos or count\n";
150        suite.err() << "pos: " << pileup.pos()+1 << "\n";
151        suite.err() << "count: " << count << "\n";
152        suite.add(false);
153      }
154    }
155    pileup.increment();
156    ++count;
157  }
158  Pileup<BamReadIterator>::const_reverse_iterator rend = pileup.rend();
159  std::distance(pileup.rbegin(), rend);
160  pileup.size();
161}
162
163
164void test2(test::Suite& suite, const std::string& fn)
165{
166  std::ostringstream error;
167  theplu::yat::omic::InBamFile in(fn);
168  using omic::BamRead;
169  using omic::BamReadIterator;
170  using omic::Pileup;
171  BamReadIterator first(in, 1, 24150, 25000);
172  BamReadIterator last;
173
174  std::vector<BamRead> reads(2);
175  reads[0] = *first;
176  ++first;
177  reads[1] = *first;
178  in.close();
179  std::string ref = reads[0].sequence();
180
181  suite.out() << "no modifications\n";
182  // no modification
183  typedef std::vector<BamRead>::iterator iterator;
184  typedef Pileup<iterator>::const_iterator plp_iterator;
185  for (Pileup<iterator> plp(reads.begin(), reads.end());plp.pos()<24070;
186       plp.increment()) {
187    for (plp_iterator i = plp.begin(); i!=plp.end(); ++i) {
188      if (i->bam().pos()!=reads[1].pos())
189        continue;
190      char nt = seq_nt16(i->sequence());
191      if (nt != ref[plp.pos()-reads[0].pos()]) {
192        error << "'" << nt << "' not equal '"
193              << ref[plp.pos()-reads[0].pos()]
194              << "' at pos " << reads[0].pos() << " for bam with start pos: "
195              << i->bam().pos();
196        throw std::runtime_error(error.str());
197      }
198    }
199  }
200
201  // insertion
202  suite.out() << "insertion\n";
203  assert(reads[1].sequence_length()==100);
204  utility::Aligner::Cigar cig;
205  cig.push_back(BAM_CMATCH, 10);
206  cig.push_back(BAM_CINS, 1);
207  cig.push_back(BAM_CMATCH, 89);
208  reads[1].cigar(cig);
209  int count = 0;
210  for (Pileup<iterator> plp(reads.begin(), reads.end());plp.good();
211       plp.increment()) {
212    for (plp_iterator i = plp.begin(); i!=plp.end(); ++i) {
213      if (!same_query_name(i->bam(), reads[1]))
214        continue;
215      ++count;
216      suite.err() << count << "\n";
217      char nt = seq_nt16(i->sequence());
218      if (nt != reads[1].sequence()[count-1])
219        error << "nt: " << nt << " not as expected. count: " << count << "\n";
220      // first 10 matches
221      if (count <= 10) {
222        if (plp.skip_ref())
223          error << count << " unexpected skip_ref\n";
224        if (plp.pos() != reads[1].pos()+count-1)
225          error << "count: " << count << " with pos: " << plp.pos() << "\n";
226      }
227      else if (count==11) { // insertion
228        if (!plp.skip_ref())
229          error << count << " unexpected skip_ref\n";
230      }
231      else {
232        if (plp.skip_ref())
233          error << count << " unexpected skip_ref\n";
234        if (plp.pos() != reads[1].pos()+count-2)
235          error << "count: " << count << " with pos: " << plp.pos() << "\n";
236      }
237
238      if (error.str().size())
239        throw std::runtime_error(error.str());
240    }
241  }
242
243
244  // deletion
245  suite.out() << "deletion\n";
246  assert(reads[1].sequence_length()==100);
247  cig.clear();
248  cig.push_back(BAM_CMATCH, 10);
249  cig.push_back(BAM_CDEL, 1);
250  cig.push_back(BAM_CMATCH, 90);
251  assert(cig.size()==3);
252  reads[1].cigar(cig);
253  count = 0;
254  for (Pileup<iterator> plp(reads.begin(), reads.end()); plp.good();
255       plp.increment()) {
256    for (plp_iterator i = plp.begin(); i!=plp.end(); ++i) {
257      if (!same_query_name(i->bam(), reads[1]))
258        continue;
259      ++count;
260      char nt = seq_nt16(i->sequence());
261      if (plp.pos() != reads[1].pos()+count-1)
262        error << "count: " << count << " with pos: " << plp.pos() << "\n";
263      // first 10 matches
264      if (count <= 10) {
265        if (plp.skip_ref())
266          error << count << " unexpected skip_ref\n";
267        if (nt != reads[1].sequence()[count-1])
268          error << "nt: " << nt << " not as expected\n";
269      }
270      else if (count==11) {
271        if (plp.skip_ref())
272          error << count << " unexpected skip_ref\n";
273        if (!i->is_del())
274          error << count << " expected deletion\n";
275      }
276      else {
277        if (plp.skip_ref())
278          error << count << " unexpected skip_ref\n";
279        if (nt != reads[1].sequence()[count-2])
280          error << "nt: " << nt << " not as expected\n";
281      }
282
283      if (error.str().size())
284        throw std::runtime_error(error.str());
285    }
286  }
287
288  // soft clipped
289  suite.out() << "soft clipped\n";
290  assert(reads[1].sequence_length()==100);
291  cig.clear();
292  cig.push_back(BAM_CSOFT_CLIP, 10);
293  cig.push_back(BAM_CMATCH, 90);
294  reads[1].cigar(cig);
295  count = 0;
296  for (Pileup<iterator> plp(reads.begin(), reads.end()); plp.good();
297       plp.increment()) {
298    for (plp_iterator i = plp.begin(); i!=plp.end(); ++i) {
299      if (!same_query_name(i->bam(), reads[1]))
300        continue;
301      ++count;
302      char nt = seq_nt16(i->sequence());
303      if (plp.skip_ref())
304        error << count << " unexpected skip_ref\n";
305      if (plp.pos() != reads[1].pos()+count-1)
306        error << "count: " << count << " with pos: " << plp.pos() << "\n";
307      if (nt != reads[1].sequence()[count+9])
308        error << "count: " << count << " nt: " << nt << " not as expected\n";
309
310      if (error.str().size())
311        throw std::runtime_error(error.str());
312    }
313  }
314}
315#else
316// tests if libbam is not available, should never be run
317void test1(test::Suite& suite, const std::string& fn) {assert(0);}
318void test2(test::Suite& suite, const std::string& fn) {assert(0);}
319#endif
320
321int main(int argc, char* argv[])
322{
323  test::Suite suite(argc, argv, true);
324  std::string fn = "../../data/foo.sorted.bam";
325  try {
326    test1(suite, fn);
327    test2(suite, fn);
328  }
329  catch (std::runtime_error& e) {
330    suite.err() << "test failed: " << e.what() << "\n";
331    suite.add(false);
332  }
333  return suite.return_value();
334}
Note: See TracBrowser for help on using the repository browser.