source: trunk/yat/omic/BamRead.h @ 3306

Last change on this file since 3306 was 3306, checked in by Peter, 9 years ago

merge ptach release 0.12.1 into trunk

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.9 KB
Line 
1#ifndef theplu_yat_omic_bam_read
2#define theplu_yat_omic_bam_read
3
4// $Id: BamRead.h 3306 2014-08-21 04:37:21Z peter $
5
6/*
7  Copyright (C) 2012, 2013, 2014 Peter Johansson
8
9  This file is part of the yat library, http://dev.thep.lu.se/yat
10
11  The yat library is free software; you can redistribute it and/or
12  modify it under the terms of the GNU General Public License as
13  published by the Free Software Foundation; either version 3 of the
14  License, or (at your option) any later version.
15
16  The yat library is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  General Public License for more details.
20
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23*/
24
25#include "config_bam.h"
26#include YAT_BAM_HEADER
27#include YAT_SAM_HEADER
28
29#include "yat/utility/Aligner.h"
30// This file has to be included to keep compatibility with yat 0.11
31#include "yat/utility/Cigar.h"
32#include "yat/utility/deprecate.h"
33
34#include <functional>
35#include <string>
36#include <vector>
37
38namespace theplu {
39namespace yat {
40namespace omic {
41
42  /**
43     \brief Class holding a bam query
44
45     This is a wrapper around bam1_t and most of its information is
46     held in the core struct. A BamRead is typically created from a
47     InBamFile.
48
49     \see samtools
50
51     \since New in yat 0.10
52   */
53  class BamRead
54  {
55  public:
56    /**
57       \brief default constructor
58
59       Constructed object contains no data and most operations are not
60       defined
61     */
62    BamRead(void);
63
64    /**
65       \brief Copy constructor
66     */
67    BamRead(const BamRead& other);
68
69    /**
70       \brief Destructor
71     */
72    virtual ~BamRead(void);
73
74    /**
75       \brief assignment operator
76     */
77    const BamRead& operator=(const BamRead& rhs);
78
79    /**
80       \return pointer to auxiliary data
81
82       \see aux_size(void)
83     */
84    const uint8_t* aux(void) const;
85
86    /**
87       Use bam_aux2? functions (in samtools C api) to convert returned
88       pointer to corresponding type.
89
90       \return pointer to field associated with \a tag, NULL if \a tag
91       doesn't exist.
92
93       \see bam_aux_get
94
95       \since New in yat 0.11
96     */
97    const uint8_t* aux(const char tag[2]) const;
98
99    /**
100       \brief append a new tag to aux field
101
102       \param tag two-charcter tag to append
103       \param type describes which type and can be 'iIsScCdfAZH'
104       \param len length of data
105       \param data pointer to data
106
107       \since New in yat 0.11
108
109       \see SAM specification
110     */
111    void aux_append(const char tag[2], char type, int len, uint8_t* data);
112
113    /**
114       \brief remove a tag in aux field
115
116       \throw utility::runtime_error if \a tag is not present in read
117
118       \since New in yat 0.11
119     */
120    void aux_del(const char tag[2]);
121
122    /**
123       \return length of aux field
124
125       \since New in yat 0.11
126     */
127    int aux_size(void) const;
128
129    /**
130       \brief access core data struct
131
132       \see samtools C api documentaion
133     */
134    const bam1_core_t& core(void) const;
135
136    /**
137       \brief access core data struct
138
139       \see samtools C api documentaion
140     */
141    bam1_core_t& core(void);
142
143    /**
144       \brief access CIGAR array
145
146       In each element the lower 4 bits gives a CIGAR operation and
147       the upper 28 bits keep the length.
148     */
149    const uint32_t* cigar(void) const;
150
151    /**
152       \return \a i th element of CIGAR array
153     */
154    uint32_t cigar(size_t i) const;
155
156    /**
157       \return \a i th CIGAR operation
158     */
159    uint32_t cigar_op(size_t i) const;
160
161    /**
162       \return length of \a i th CIGAR element
163     */
164    uint32_t cigar_oplen(size_t i) const;
165
166    /**
167       Translate CIGAR array to a string such as '72M3S'
168     */
169    std::string cigar_str(void) const;
170
171    /**
172       \brief set CIGAR
173
174       \param c new cigar
175
176       \deprecated Provided for backward compatibility with 0.11
177       API. Use cigar(const utility::Aligner::Cigar&) instead.
178    */
179    void cigar(const std::vector<uint32_t>& c) YAT_DEPRECATE;
180
181    /**
182       \brief set CIGAR
183
184       \param cigar new cigar
185
186       \since new in yat 0.12
187    */
188    void cigar(const utility::Aligner::Cigar& cigar);
189
190    /**
191       \brief rightmost coordinate
192
193       Coordinate is 0-based, i.e., the end is one passed the last
194       matching position.
195
196       \see bam_calend
197     */
198    int32_t end(void) const;
199
200    /**
201       \brief bitwise flag
202
203       \see Preprocessor defines BAM_F*
204
205       \since implemented since yat 0.12
206     */
207    uint16_t flag(void) const;
208
209    /**
210       \brief leftmost position for mate
211     */
212    int32_t mpos(void) const;
213
214    /**
215       \brief Chromosome ID for mate
216     */
217    int32_t mtid(void) const;
218
219    /**
220       \return query name
221
222       Length of array is described by core().l_qname
223     */
224    const char* name(void) const;
225
226    /**
227       \brief modify name
228
229       \since New in yat 0.11
230    */
231    void name(const std::string& n);
232
233    /**
234       \brief 0-based laftmost coordinate
235     */
236    int32_t pos(void) const;
237
238    /**
239       \return Quality of base \a i
240     */
241    uint8_t qual(size_t i) const;
242
243    /**
244       \brief set quality of a base
245
246       \param i base to modify
247       \param q new quality
248
249       \since New in yat 0.11
250     */
251    void qual(size_t i, uint8_t q);
252
253    /**
254       Each character in returned sequence is either A, C, G, T, or N.
255
256       \return sequence
257     */
258    std::string sequence(void) const;
259
260    /**
261       4-bit integer describing base \a index
262
263       \see bam_nt16_rev_table
264     */
265    uint8_t sequence(size_t index) const;
266
267    /**
268       \brief modify a base in sequence
269
270       Set i-th base in sequence to \a x, where seq is a 4-bit integer.
271
272       \see bam_nt16_table
273
274       \since New in yat 0.11
275     */
276    void sequence(size_t i, uint8_t x);
277
278    /**
279       \brief set sequence and quality
280
281       \since New in yat 0.11
282     */
283    void sequence(const std::string& seq, const std::vector<uint8_t>& qual);
284
285    /**
286       \see core().l_qseq
287     */
288    uint32_t sequence_length(void) const;
289
290    /**
291       Exchanging this read with \a other.
292
293       \see swap(BamRead&, BamRead&)
294     */
295    void swap(BamRead& other);
296
297    /**
298       \brief chromosome ID
299     */
300    int32_t tid(void) const;
301
302  private:
303    // ensure capacity of data pointer is (at least) n
304    void reserve(int n);
305
306    bam1_t* bam_;
307
308    friend class InBamFile;
309    friend class OutBamFile;
310    friend class BamReadIterator;
311    uint32_t calend(const bam1_core_t *c, const uint32_t *cigar) const;
312  };
313
314  /**
315     Swap specialization for BamRead that is faster than generic
316     std::swap as it just swap a pair of pointers.
317
318     \since New in yat 0.10
319
320     \relates BamRead
321   */
322  void swap(BamRead& lhs, BamRead& rhs);
323
324  /**
325     \return \c true if read is soft clipped, either left_soft_clipped
326     or right_soft_clipped.
327
328     \since New in yat 0.10
329
330     \relates BamRead
331   */
332  bool soft_clipped(const BamRead& bam);
333
334  /**
335     If read is soft clipped on left side, return how many bases are
336     clipped, otherwise return 0.
337
338     \since New in yat 0.10
339
340     \relates BamRead
341   */
342  uint32_t left_soft_clipped(const BamRead& bam);
343
344  /**
345     If read is soft clipped on right side, return how many bases are
346     clipped, otherwise return 0.
347
348     \since New in yat 0.10
349
350     \relates BamRead
351   */
352  uint32_t right_soft_clipped(const BamRead& bam);
353
354  /**
355     return \c true if query names are equal
356
357     \see BamRead::name()
358
359     \since New in yat 0.10
360
361     \relates BamRead
362   */
363  bool same_query_name(const BamRead& lhs, const BamRead& rhs);
364
365  /**
366     Functor to compare two reads with respect to their leftmost
367     coordinate.
368
369     \see BamRead
370
371     \since New in yat 0.10
372   */
373  struct BamLessPos
374    : public std::binary_function<const BamRead&, const BamRead&, bool>
375  {
376    /**
377       \return true if lhs tid is less than rhs tid; or tid
378       are equal and lhs pos is smaller than rhs pos.
379
380       \see BamRead::tid() and BamRead::pos()
381     */
382    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
383  };
384
385
386  /**
387     Functor to compare two reads with respect to their rightmost
388     coordinate.
389
390     \see BamRead
391
392     \since New in yat 0.10
393   */
394  struct BamLessEnd
395    : public std::binary_function<const BamRead&, const BamRead&, bool>
396  {
397    /**
398       \return true if lhs tid is less than rhs tid; or tid
399       are equal and lhs end is smaller than rhs end.
400
401       \see BamRead::tid() and BamRead::end()
402     */
403    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
404  };
405
406}}}
407#endif
Note: See TracBrowser for help on using the repository browser.