1 | // $Id: bam.cc 3306 2014-08-21 04:37:21Z peter $ |
---|
2 | // |
---|
3 | // Copyright (C) 2013, 2014 Peter Johansson |
---|
4 | // |
---|
5 | // This program is free software; you can redistribute it and/or modify |
---|
6 | // it under the terms of the GNU General Public License as published by |
---|
7 | // the Free Software Foundation; either version 3 of the License, or |
---|
8 | // (at your option) any later version. |
---|
9 | // |
---|
10 | // This program is distributed in the hope that it will be useful, but |
---|
11 | // WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
13 | // General Public License for more details. |
---|
14 | // |
---|
15 | // You should have received a copy of the GNU General Public License |
---|
16 | // along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
17 | |
---|
18 | #include <config.h> |
---|
19 | |
---|
20 | #include "Suite.h" |
---|
21 | |
---|
22 | #ifdef YAT_HAVE_LIBBAM |
---|
23 | #include "yat/omic/BamFile.h" |
---|
24 | #include "yat/omic/BamRead.h" |
---|
25 | #endif |
---|
26 | |
---|
27 | #include "yat/utility/Aligner.h" |
---|
28 | |
---|
29 | #include <cassert> |
---|
30 | #include <string> |
---|
31 | |
---|
32 | using namespace theplu::yat; |
---|
33 | |
---|
34 | void test1(test::Suite& suite); |
---|
35 | |
---|
36 | int main(int argc, char* argv[]) |
---|
37 | { |
---|
38 | test::Suite suite(argc, argv, true); |
---|
39 | #ifdef YAT_HAVE_LIBBAM |
---|
40 | test1(suite); |
---|
41 | #endif |
---|
42 | return suite.return_value(); |
---|
43 | } |
---|
44 | |
---|
45 | #ifdef YAT_HAVE_LIBBAM |
---|
46 | using namespace omic; |
---|
47 | |
---|
48 | void test_aux(test::Suite& suite, const BamRead& b) |
---|
49 | { |
---|
50 | suite.out() << "test aux\n"; |
---|
51 | BamRead bam(b); |
---|
52 | |
---|
53 | char c = 'h'; |
---|
54 | int size = bam.aux_size(); |
---|
55 | bam.aux_append("ZZ", 'A', 1, (uint8_t*)(&c)); |
---|
56 | suite.out() << "size: " << size << " " << bam.aux_size() << "\n"; |
---|
57 | suite.add(bam.aux_size() == size+4); |
---|
58 | bam.aux(); |
---|
59 | bam.aux("ZZ"); |
---|
60 | char c1 = bam_aux2A(bam.aux("ZZ")); |
---|
61 | suite.out() << c << "==" << c1 << "\n"; |
---|
62 | suite.add(c==c1); |
---|
63 | bam.aux_del("ZZ"); |
---|
64 | if (!suite.add(bam.aux_size() == size)) |
---|
65 | suite.err() << "error: incorrect size: " << bam.aux_size() << "\n"; |
---|
66 | } |
---|
67 | |
---|
68 | |
---|
69 | void test_cigar(test::Suite& suite, const BamRead& b, const BamHeader& hdr) |
---|
70 | { |
---|
71 | BamRead bam(b); |
---|
72 | |
---|
73 | bam.cigar(); |
---|
74 | bam.cigar(0); |
---|
75 | bam.cigar_op(0); |
---|
76 | bam.cigar_oplen(0); |
---|
77 | |
---|
78 | suite.out() << "test cigar:\n"; |
---|
79 | suite.out() << bam.cigar_str() << "\n"; |
---|
80 | OutBamFile os("cigar_test.bam", hdr); |
---|
81 | utility::Aligner::Cigar cig; |
---|
82 | |
---|
83 | bam.cigar(cig); |
---|
84 | suite.out() << bam.cigar_str() << "\n"; |
---|
85 | suite.add(bam.cigar_str()==""); |
---|
86 | os.write(bam); |
---|
87 | |
---|
88 | cig.push_back(BAM_CMATCH, bam.sequence_length()); |
---|
89 | bam.cigar(cig); |
---|
90 | suite.out() << bam.cigar_str() << "\n"; |
---|
91 | suite.add(bam.cigar_str()=="100M"); |
---|
92 | os.write(bam); |
---|
93 | |
---|
94 | cig.clear(); |
---|
95 | cig.push_back(BAM_CMATCH, 50); |
---|
96 | cig.push_back(BAM_CDEL, 2); |
---|
97 | cig.push_back(BAM_CMATCH, 50); |
---|
98 | bam.cigar(cig); |
---|
99 | suite.out() << bam.cigar_str() << "\n"; |
---|
100 | suite.add(bam.cigar_str()=="50M2D50M"); |
---|
101 | os.write(bam); |
---|
102 | |
---|
103 | // check that other elements in data* is preserved |
---|
104 | if (!suite.add(same_query_name(b, bam))) |
---|
105 | suite.err() << "error: \n'" << b.name() << "'\n'" << bam.name() << "'\n"; |
---|
106 | |
---|
107 | if (!suite.add(b.sequence()==bam.sequence())) |
---|
108 | suite.err() << "error: \n'" << b.sequence() << "'\n'" |
---|
109 | << bam.sequence() << "'\n"; |
---|
110 | |
---|
111 | for (int32_t i=0; i<b.core().l_qseq; ++i) |
---|
112 | if (bam.qual(i) != b.qual(i)) { |
---|
113 | suite.err() << "error: qual: " << i << " " << bam.qual(i) |
---|
114 | << " not euqal " << b.qual(i) << "\n"; |
---|
115 | suite.add(false); |
---|
116 | } |
---|
117 | |
---|
118 | os.close(); |
---|
119 | } |
---|
120 | |
---|
121 | |
---|
122 | void test_end(test::Suite& suite, BamRead bam) |
---|
123 | { |
---|
124 | suite.out() << "test end:\n"; |
---|
125 | suite.out() << bam.pos() << "-" << bam.end() << " " << bam.cigar_str() |
---|
126 | << "\n"; |
---|
127 | uint32_t end = bam.end(); |
---|
128 | if (!suite.add(end==24093)) { |
---|
129 | suite.err() << "error: incorrect end: " << end << "\n"; |
---|
130 | } |
---|
131 | |
---|
132 | // change the cigar to include a DIFF and EQUAL |
---|
133 | utility::Aligner::Cigar cig; |
---|
134 | cig.push_back(BAM_CEQUAL, 55); |
---|
135 | cig.push_back(BAM_CDIFF, 1); |
---|
136 | cig.push_back(BAM_CEQUAL, 44); |
---|
137 | bam.cigar(cig); |
---|
138 | suite.out() << bam.pos() << "-" << bam.end() << " " << bam.cigar_str() |
---|
139 | << "\n"; |
---|
140 | end = bam.end(); |
---|
141 | if (!suite.add(end==24093)) { |
---|
142 | suite.err() << "error: incorrect end: " << end << "\n"; |
---|
143 | } |
---|
144 | } |
---|
145 | |
---|
146 | |
---|
147 | void test_sequence(test::Suite& suite, const BamRead& b) |
---|
148 | { |
---|
149 | suite.out() << "test sequence\n"; |
---|
150 | BamRead bam(b); |
---|
151 | std::string seq = b.sequence(); |
---|
152 | std::vector<uint8_t> qual; |
---|
153 | qual.reserve(seq.size()); |
---|
154 | for (size_t i=0; i<seq.size(); ++i) |
---|
155 | qual.push_back(bam.qual(i)); |
---|
156 | assert(seq.size()==100); |
---|
157 | bam.sequence(seq, qual); |
---|
158 | |
---|
159 | if (bam.sequence()!=seq) { |
---|
160 | suite.add(false); |
---|
161 | suite.err() << "incorrect sequence:" << bam.sequence() << "\n"; |
---|
162 | suite.err() << "expected: " << seq << "\n"; |
---|
163 | } |
---|
164 | |
---|
165 | // just for consistency |
---|
166 | utility::Aligner::Cigar cig; |
---|
167 | bam.cigar(cig); |
---|
168 | bam.core().flag &= ~BAM_FUNMAP; |
---|
169 | |
---|
170 | // trim off one base |
---|
171 | seq.resize(seq.size()-1); |
---|
172 | qual.resize(qual.size()-1); |
---|
173 | bam.sequence(seq, qual); |
---|
174 | |
---|
175 | if (bam.sequence()!=seq) { |
---|
176 | suite.add(false); |
---|
177 | suite.err() << "incorrect sequence:" << bam.sequence() << "\n"; |
---|
178 | suite.err() << "expected: " << seq << "\n"; |
---|
179 | } |
---|
180 | |
---|
181 | // extend sequence |
---|
182 | seq.resize(120, 'A'); |
---|
183 | qual.resize(120, 0); |
---|
184 | bam.sequence(seq, qual); |
---|
185 | if (bam.sequence()!=seq) { |
---|
186 | suite.add(false); |
---|
187 | suite.err() << "incorrect sequence:" << bam.sequence() << "\n"; |
---|
188 | suite.err() << "expected: " << seq << "\n"; |
---|
189 | } |
---|
190 | } |
---|
191 | |
---|
192 | |
---|
193 | void test_name(test::Suite& suite, const BamRead& b) |
---|
194 | { |
---|
195 | suite.out() << "test name\n"; |
---|
196 | BamRead bam(b); |
---|
197 | std::string name = "my-new-name"; |
---|
198 | bam.name(name); |
---|
199 | if (bam.name() != name) { |
---|
200 | suite.add(false); |
---|
201 | suite.err() << "error: name: '" << bam.name() |
---|
202 | << "' expected: '" << name << "'\n"; |
---|
203 | } |
---|
204 | // check that other fields are not changed |
---|
205 | if (bam.cigar_str()!=b.cigar_str()) { |
---|
206 | suite.add(false); |
---|
207 | suite.err() << "error: cigar str:" << bam.cigar_str() << " != " |
---|
208 | << b.cigar_str() << "\n"; |
---|
209 | } |
---|
210 | if (bam.sequence()!=b.sequence()) { |
---|
211 | suite.add(false); |
---|
212 | suite.err() << "error: sequence:" << bam.sequence() << " != " |
---|
213 | << b.sequence() << "\n"; |
---|
214 | } |
---|
215 | } |
---|
216 | |
---|
217 | |
---|
218 | void test_open3(test::Suite& suite, const BamHeader& header) |
---|
219 | { |
---|
220 | OutBamFile out("yaya.bam", header, 0); |
---|
221 | out.close(); |
---|
222 | out.open("yaya.bam", header, 9); |
---|
223 | out.close(); |
---|
224 | try { |
---|
225 | out.open("yaya.bam", header, 10); |
---|
226 | suite.add(false); |
---|
227 | suite.err() << "open(\"yaya.bam\", header, 10): did not throw\n"; |
---|
228 | } |
---|
229 | catch (std::invalid_argument& e) { |
---|
230 | suite.err() << "expected exception: " << e.what() << "\n"; |
---|
231 | } |
---|
232 | out.close(); |
---|
233 | } |
---|
234 | |
---|
235 | |
---|
236 | void test_write(test::Suite& suite, const BamHeader& header) |
---|
237 | { |
---|
238 | OutBamFile out; |
---|
239 | BamRead read; |
---|
240 | try { |
---|
241 | // trying to write to a file not open |
---|
242 | out.write(read); |
---|
243 | suite.add(false); |
---|
244 | suite.err() << "write(BamRead): did not throw\n"; |
---|
245 | } |
---|
246 | catch (utility::runtime_error& e) { |
---|
247 | suite.err() << "expected exception: " << e.what() << "\n"; |
---|
248 | } |
---|
249 | } |
---|
250 | |
---|
251 | |
---|
252 | void test1(test::Suite& suite) |
---|
253 | { |
---|
254 | std::string file = "../../data/foo.sorted.bam"; |
---|
255 | |
---|
256 | InBamFile in; |
---|
257 | in.open(file); |
---|
258 | BamRead bam; |
---|
259 | in.read(bam); |
---|
260 | // test that functions are implemented |
---|
261 | test_end(suite, bam); |
---|
262 | bam.flag(); |
---|
263 | bam.mpos(); |
---|
264 | bam.mtid(); |
---|
265 | bam.pos(); |
---|
266 | bam.tid(); |
---|
267 | bam.swap(bam); |
---|
268 | std::string str = "AGCTTTGTTATTATTTGAGGGTGTGGTTCAGTTGTAAACAGTGTATG" |
---|
269 | "TTTTAGAATTTGTGTTATTGTGATGGCGATGACCAAGTACAACATATTTCCCA"; |
---|
270 | std::string seq = bam.sequence(); |
---|
271 | if (str!=seq) { |
---|
272 | suite.err() << "incorrect sequence: '" << bam.sequence() << "'\n"; |
---|
273 | suite.err() << "expected: '" << str << "'\n"; |
---|
274 | suite.add(false); |
---|
275 | } |
---|
276 | uint8_t c = bam.sequence(24); |
---|
277 | if (c != 4) { |
---|
278 | suite.add(false); |
---|
279 | suite.err() << "error: sequence: " << static_cast<int>(c) |
---|
280 | << " expected " << 4 << "\n"; |
---|
281 | } |
---|
282 | bam.sequence(24, 8); |
---|
283 | c = bam.sequence(24); |
---|
284 | if (c != 8) { |
---|
285 | suite.add(false); |
---|
286 | suite.err() << "error: sequence: " << static_cast<int>(c) |
---|
287 | << " expected " << 8 << "\n"; |
---|
288 | } |
---|
289 | uint8_t q = bam.qual(24); |
---|
290 | if (q != 72) { |
---|
291 | suite.add(false); |
---|
292 | suite.err() << "error: quality: " << static_cast<int>(q) |
---|
293 | << " expected " << 72 << "\n"; |
---|
294 | } |
---|
295 | uint8_t new_q = 60; |
---|
296 | bam.qual(24, new_q); |
---|
297 | q = bam.qual(24); |
---|
298 | if (q != new_q) { |
---|
299 | suite.add(false); |
---|
300 | suite.err() << "error: quality: " << static_cast<int>(q) |
---|
301 | << " expected " << new_q << "\n"; |
---|
302 | } |
---|
303 | test_cigar(suite, bam, in.header()); |
---|
304 | test_aux(suite, bam); |
---|
305 | test_sequence(suite, bam); |
---|
306 | test_name(suite, bam); |
---|
307 | test_open3(suite, in.header()); |
---|
308 | test_write(suite, in.header()); |
---|
309 | } |
---|
310 | #endif |
---|