1 | // $Id: bam.cc 3182 2014-03-24 22:28:02Z peter $ |
---|
2 | // |
---|
3 | // Copyright (C) 2013, 2014 Peter Johansson |
---|
4 | // |
---|
5 | // This program is free software; you can redistribute it and/or modify |
---|
6 | // it under the terms of the GNU General Public License as published by |
---|
7 | // the Free Software Foundation; either version 3 of the License, or |
---|
8 | // (at your option) any later version. |
---|
9 | // |
---|
10 | // This program is distributed in the hope that it will be useful, but |
---|
11 | // WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
13 | // General Public License for more details. |
---|
14 | // |
---|
15 | // You should have received a copy of the GNU General Public License |
---|
16 | // along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
17 | |
---|
18 | #include <config.h> |
---|
19 | |
---|
20 | #include "Suite.h" |
---|
21 | |
---|
22 | #ifdef HAVE_LIBBAM |
---|
23 | #include "yat/omic/BamFile.h" |
---|
24 | #include "yat/omic/BamRead.h" |
---|
25 | #endif |
---|
26 | |
---|
27 | #include <cassert> |
---|
28 | #include <string> |
---|
29 | |
---|
30 | using namespace theplu::yat; |
---|
31 | |
---|
32 | void test1(test::Suite& suite); |
---|
33 | |
---|
34 | int main(int argc, char* argv[]) |
---|
35 | { |
---|
36 | test::Suite suite(argc, argv); |
---|
37 | #ifndef HAVE_LIBBAM |
---|
38 | suite.out() << "no libbam\n"; |
---|
39 | return EXIT_SKIP; |
---|
40 | #endif |
---|
41 | #ifndef HAVE_SAMTOOLS |
---|
42 | suite.out() << "no samtools\n"; |
---|
43 | return EXIT_SKIP; |
---|
44 | #endif |
---|
45 | #ifdef HAVE_LIBBAM |
---|
46 | test1(suite); |
---|
47 | #endif |
---|
48 | return suite.return_value(); |
---|
49 | } |
---|
50 | |
---|
51 | #ifdef HAVE_LIBBAM |
---|
52 | using namespace omic; |
---|
53 | |
---|
54 | void test_aux(test::Suite& suite, const BamRead& b) |
---|
55 | { |
---|
56 | suite.out() << "test aux\n"; |
---|
57 | BamRead bam(b); |
---|
58 | |
---|
59 | char c = 'h'; |
---|
60 | int size = bam.aux_size(); |
---|
61 | bam.aux_append("ZZ", 'A', 1, (uint8_t*)(&c)); |
---|
62 | suite.out() << "size: " << size << " " << bam.aux_size() << "\n"; |
---|
63 | suite.add(bam.aux_size() == size+4); |
---|
64 | bam.aux("ZZ"); |
---|
65 | char c1 = bam_aux2A(bam.aux("ZZ")); |
---|
66 | suite.out() << c << "==" << c1 << "\n"; |
---|
67 | suite.add(c==c1); |
---|
68 | bam.aux_del("ZZ"); |
---|
69 | if (!suite.add(bam.aux_size() == size)) |
---|
70 | suite.err() << "error: incorrect size: " << bam.aux_size() << "\n"; |
---|
71 | } |
---|
72 | |
---|
73 | |
---|
74 | void test_cigar(test::Suite& suite, const BamRead& b, const BamHeader& hdr) |
---|
75 | { |
---|
76 | BamRead bam(b); |
---|
77 | |
---|
78 | suite.out() << "test cigar:\n"; |
---|
79 | suite.out() << bam.cigar_str() << "\n"; |
---|
80 | OutBamFile os("cigar_test.bam", hdr); |
---|
81 | std::vector<uint32_t> cig; |
---|
82 | |
---|
83 | bam.cigar(cig); |
---|
84 | suite.out() << bam.cigar_str() << "\n"; |
---|
85 | suite.add(bam.cigar_str()==""); |
---|
86 | os.write(bam); |
---|
87 | |
---|
88 | cig.resize(1); |
---|
89 | cig[0] = bam_cigar_gen(bam.sequence_length(), BAM_CMATCH); |
---|
90 | bam.cigar(cig); |
---|
91 | suite.out() << bam.cigar_str() << "\n"; |
---|
92 | suite.add(bam.cigar_str()=="100M"); |
---|
93 | os.write(bam); |
---|
94 | |
---|
95 | cig.resize(3); |
---|
96 | cig[0] = bam_cigar_gen(50, BAM_CMATCH); |
---|
97 | cig[1] = bam_cigar_gen(2, BAM_CDEL); |
---|
98 | cig[2] = bam_cigar_gen(50, BAM_CMATCH); |
---|
99 | bam.cigar(cig); |
---|
100 | suite.out() << bam.cigar_str() << "\n"; |
---|
101 | suite.add(bam.cigar_str()=="50M2D50M"); |
---|
102 | os.write(bam); |
---|
103 | |
---|
104 | // check that other elements in data* is preserved |
---|
105 | if (!suite.add(same_query_name(b, bam))) |
---|
106 | suite.err() << "error: \n'" << b.name() << "'\n'" << bam.name() << "'\n"; |
---|
107 | |
---|
108 | if (!suite.add(b.sequence()==bam.sequence())) |
---|
109 | suite.err() << "error: \n'" << b.sequence() << "'\n'" |
---|
110 | << bam.sequence() << "'\n"; |
---|
111 | |
---|
112 | for (int32_t i=0; i<b.core().l_qseq; ++i) |
---|
113 | if (bam.qual(i) != b.qual(i)) { |
---|
114 | suite.err() << "error: qual: " << i << " " << bam.qual(i) |
---|
115 | << " not euqal " << b.qual(i) << "\n"; |
---|
116 | suite.add(false); |
---|
117 | } |
---|
118 | |
---|
119 | os.close(); |
---|
120 | } |
---|
121 | |
---|
122 | |
---|
123 | void test_sequence(test::Suite& suite, const BamRead& b) |
---|
124 | { |
---|
125 | suite.out() << "test sequence\n"; |
---|
126 | BamRead bam(b); |
---|
127 | std::string seq = b.sequence(); |
---|
128 | std::vector<uint8_t> qual; |
---|
129 | qual.reserve(seq.size()); |
---|
130 | for (size_t i=0; i<seq.size(); ++i) |
---|
131 | qual.push_back(bam.qual(i)); |
---|
132 | assert(seq.size()==100); |
---|
133 | bam.sequence(seq, qual); |
---|
134 | |
---|
135 | if (bam.sequence()!=seq) { |
---|
136 | suite.add(false); |
---|
137 | suite.err() << "incorrect sequence:" << bam.sequence() << "\n"; |
---|
138 | suite.err() << "expected: " << seq << "\n"; |
---|
139 | } |
---|
140 | |
---|
141 | // just for consistency |
---|
142 | std::vector<uint32_t> cig; |
---|
143 | bam.cigar(cig); |
---|
144 | bam.core().flag &= ~BAM_FUNMAP; |
---|
145 | |
---|
146 | // trim off one base |
---|
147 | seq.resize(seq.size()-1); |
---|
148 | qual.resize(qual.size()-1); |
---|
149 | bam.sequence(seq, qual); |
---|
150 | |
---|
151 | if (bam.sequence()!=seq) { |
---|
152 | suite.add(false); |
---|
153 | suite.err() << "incorrect sequence:" << bam.sequence() << "\n"; |
---|
154 | suite.err() << "expected: " << seq << "\n"; |
---|
155 | } |
---|
156 | |
---|
157 | // extend sequence |
---|
158 | seq.resize(120, 'A'); |
---|
159 | qual.resize(120, 0); |
---|
160 | bam.sequence(seq, qual); |
---|
161 | if (bam.sequence()!=seq) { |
---|
162 | suite.add(false); |
---|
163 | suite.err() << "incorrect sequence:" << bam.sequence() << "\n"; |
---|
164 | suite.err() << "expected: " << seq << "\n"; |
---|
165 | } |
---|
166 | } |
---|
167 | |
---|
168 | |
---|
169 | void test_name(test::Suite& suite, const BamRead& b) |
---|
170 | { |
---|
171 | suite.out() << "test name\n"; |
---|
172 | BamRead bam(b); |
---|
173 | std::string name = "my-new-name"; |
---|
174 | bam.name(name); |
---|
175 | if (bam.name() != name) { |
---|
176 | suite.add(false); |
---|
177 | suite.err() << "error: name: '" << bam.name() |
---|
178 | << "' expected: '" << name << "'\n"; |
---|
179 | } |
---|
180 | // check that other fields are not changed |
---|
181 | if (bam.cigar_str()!=b.cigar_str()) { |
---|
182 | suite.add(false); |
---|
183 | suite.err() << "error: cigar str:" << bam.cigar_str() << " != " |
---|
184 | << b.cigar_str() << "\n"; |
---|
185 | } |
---|
186 | if (bam.sequence()!=b.sequence()) { |
---|
187 | suite.add(false); |
---|
188 | suite.err() << "error: sequence:" << bam.sequence() << " != " |
---|
189 | << b.sequence() << "\n"; |
---|
190 | } |
---|
191 | } |
---|
192 | |
---|
193 | |
---|
194 | void test_write(test::Suite& suite, const BamHeader& header) |
---|
195 | { |
---|
196 | OutBamFile out; |
---|
197 | BamRead read; |
---|
198 | try { |
---|
199 | // trying to write to a file not open |
---|
200 | out.write(read); |
---|
201 | suite.add(false); |
---|
202 | suite.err() << "write(BamRead): did not throw\n"; |
---|
203 | } |
---|
204 | catch (utility::runtime_error& e) { |
---|
205 | suite.err() << "expected exception: " << e.what() << "\n"; |
---|
206 | } |
---|
207 | } |
---|
208 | |
---|
209 | |
---|
210 | void test1(test::Suite& suite) |
---|
211 | { |
---|
212 | std::string file = "../../data/foo.sorted.bam"; |
---|
213 | |
---|
214 | InBamFile in; |
---|
215 | in.open(file); |
---|
216 | BamRead bam; |
---|
217 | in.read(bam); |
---|
218 | std::string str = "AGCTTTGTTATTATTTGAGGGTGTGGTTCAGTTGTAAACAGTGTATG" |
---|
219 | "TTTTAGAATTTGTGTTATTGTGATGGCGATGACCAAGTACAACATATTTCCCA"; |
---|
220 | std::string seq = bam.sequence(); |
---|
221 | if (str!=seq) { |
---|
222 | suite.err() << "incorrect sequence: '" << bam.sequence() << "'\n"; |
---|
223 | suite.err() << "expected: '" << str << "'\n"; |
---|
224 | suite.add(false); |
---|
225 | } |
---|
226 | uint8_t c = bam.sequence(24); |
---|
227 | if (c != 4) { |
---|
228 | suite.add(false); |
---|
229 | suite.err() << "error: sequence: " << static_cast<int>(c) |
---|
230 | << " expected " << 4 << "\n"; |
---|
231 | } |
---|
232 | bam.sequence(24, 8); |
---|
233 | c = bam.sequence(24); |
---|
234 | if (c != 8) { |
---|
235 | suite.add(false); |
---|
236 | suite.err() << "error: sequence: " << static_cast<int>(c) |
---|
237 | << " expected " << 8 << "\n"; |
---|
238 | } |
---|
239 | uint8_t q = bam.qual(24); |
---|
240 | if (q != 72) { |
---|
241 | suite.add(false); |
---|
242 | suite.err() << "error: quality: " << static_cast<int>(q) |
---|
243 | << " expected " << 72 << "\n"; |
---|
244 | } |
---|
245 | uint8_t new_q = 60; |
---|
246 | bam.qual(24, new_q); |
---|
247 | q = bam.qual(24); |
---|
248 | if (q != new_q) { |
---|
249 | suite.add(false); |
---|
250 | suite.err() << "error: quality: " << static_cast<int>(q) |
---|
251 | << " expected " << new_q << "\n"; |
---|
252 | } |
---|
253 | test_cigar(suite, bam, in.header()); |
---|
254 | test_aux(suite, bam); |
---|
255 | test_sequence(suite, bam); |
---|
256 | test_name(suite, bam); |
---|
257 | test_write(suite, in.header()); |
---|
258 | } |
---|
259 | #endif |
---|