source: branches/0.10-stable/yat/omic/BamRead.h @ 2965

Last change on this file since 2965 was 2965, checked in by Peter, 9 years ago

add doxygen tag relates for BamRead?

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 6.3 KB
Line 
1#ifndef theplu_yat_omic_bam_read
2#define theplu_yat_omic_bam_read
3
4// $Id: BamRead.h 2965 2013-01-23 04:58:35Z peter $
5//
6// Copyright (C) 2012 Peter Johansson
7//
8// This program is free software; you can redistribute it and/or modify
9// it under the terms of the GNU General Public License as published by
10// the Free Software Foundation; either version 3 of the License, or
11// (at your option) any later version.
12//
13// This program is distributed in the hope that it will be useful, but
14// WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16// General Public License for more details.
17//
18// You should have received a copy of the GNU General Public License
19// along with this program. If not, see <http://www.gnu.org/licenses/>.
20
21#include "config_bam.h"
22
23#include YAT_BAM_HEADER
24#include YAT_SAM_HEADER
25
26#include <functional>
27#include <string>
28#include <vector>
29
30// backport #defines from samtools 0.1.19
31#ifndef BAM_CIGAR_STR
32#define BAM_CIGAR_STR "MIDNSHP=XB"
33#define BAM_CIGAR_TYPE 0x3C1A7
34
35#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)
36#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)
37#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])
38#define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))
39#define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3)
40#endif
41
42/*
43  CIGAR
440 M 1 1 match
451 I 0 1 insert
462 D 1 0 deletion
473 N 1 0 spliced (rna)
484 S 0 1 soft clipped
495 H 0 0 hard clipped
506 P 0 0 padded
517 = 1 1 equal
528 X 1 1 mismatch
539 B 0 0
54
553rd column: consumes reference sequence
564th column: consumes query sequence
57 */
58
59namespace theplu {
60namespace yat {
61namespace omic {
62
63  /**
64     \brief Class holding a bam query
65
66     This is a wrapper around bam1_t and most of its information is
67     held in the core struct. A BamRead is typically created from a
68     InBamFile.
69
70     \see samtools
71
72     \since New in yat 0.10
73   */
74  class BamRead
75  {
76  public:
77    /**
78       \brief default constructor
79
80       Constructed object contains no data and most operations are not
81       defined
82     */
83    BamRead(void);
84
85    /**
86       \brief Copy constructor
87     */
88    BamRead(const BamRead& other);
89
90    /**
91       \brief Destructor
92     */
93    virtual ~BamRead(void);
94
95    /**
96       \brief assignment operator
97     */
98    const BamRead& operator=(const BamRead& rhs);
99
100    /**
101       \return pointer to auxiliary data
102     */
103    const uint8_t* aux(void) const;
104
105    /**
106       \brief access core data struct
107
108       \see samtools C api documentaion
109     */
110    const bam1_core_t& core(void) const;
111
112    /**
113       \brief access core data struct
114
115       \see samtools C api documentaion
116     */
117    bam1_core_t& core(void);
118
119    /**
120       \brief access CIGAR array
121
122       In each element the lower 4 bits gives a CIGAR operation and
123       the upper 28 bits keep the length.
124     */
125    const uint32_t* cigar(void) const;
126
127    /**
128       \return \a i th element of CIGAR array
129     */
130    uint32_t cigar(size_t i) const;
131
132    /**
133       \return \a i th CIGAR operation
134     */
135    uint32_t cigar_op(size_t i) const;
136
137    /**
138       \return length of \a i th CIGAR element
139     */
140    uint32_t cigar_oplen(size_t i) const;
141
142    /**
143       Translate CIGAR array to a string such as '72M3S'
144     */
145    std::string cigar_str(void) const;
146
147    /**
148       \brief set CIGAR
149
150       \param c new cigar
151    */
152    void cigar(const std::vector<uint32_t>& c);
153
154    /**
155       \return Quality of base \a i
156     */
157    uint8_t qual(size_t i) const;
158
159    /**
160       \return query name
161
162       Length of array is described by core().l_qname
163     */
164    const char* name(void) const;
165
166    /**
167       \brief chromosome ID
168     */
169    int32_t tid(void) const;
170
171    /**
172       \brief 0-based laftmost coordinate
173     */
174    int32_t pos(void) const;
175
176    /**
177       \brief rightmost coordinate
178
179       Coordinate is 0-based, i.e., the end is one passed the last
180       matching position.
181
182       \see bam_calend
183     */
184    int32_t end(void) const;
185
186    /**
187       \brief Chromosome ID for mate
188     */
189    int32_t mtid(void) const;
190
191    /**
192       \brief leftmost position for mate
193     */
194    int32_t mpos(void) const;
195
196    /**
197       Each character in returned sequence is either A, C, G, T, or N.
198
199       \return sequence
200     */
201    std::string sequence(void) const;
202
203    /**
204       4-bit integer describing base \a index
205
206       \see bam_nt16_rev_table
207     */
208    uint8_t sequence(size_t index) const;
209
210    /**
211       \see core().l_qseq
212     */
213    uint32_t sequence_length(void) const;
214
215    /**
216       \brief bitwise flag
217
218       \see Preprocessor defines BAM_F*
219     */
220    uint16_t flag(void) const;
221
222    /**
223       Exchanging this read with \a other.
224     */
225    void swap(BamRead& other);
226
227  private:
228    bam1_t* bam_;
229
230    friend class InBamFile;
231    friend class OutBamFile;
232    friend class BamReadIterator;
233  };
234
235  /**
236     Swap specialization for BamRead that is faster than generic
237     std::swap as it just swap a pointers.
238
239     \since New in yat 0.10
240
241     \relates BamRead
242   */
243  void swap(BamRead& lhs, BamRead& rhs);
244
245  /**
246     \return \c true if read is soft clipped, either left_soft_clipped
247     or right_soft_clipped.
248
249     \since New in yat 0.10
250
251     \relates BamRead
252   */
253  bool soft_clipped(const BamRead& bam);
254
255  /**
256     If read is soft clipped on left side, return how many bases are
257     clipped, otherwise return 0.
258
259     \since New in yat 0.10
260   */
261  uint32_t left_soft_clipped(const BamRead& bam);
262
263  /**
264     If read is soft clipped on right side, return how many bases are
265     clipped, otherwise return 0.
266
267     \since New in yat 0.10
268
269     \relates BamRead
270   */
271  uint32_t right_soft_clipped(const BamRead& bam);
272
273  /**
274     return \c true if query names are equal
275
276     \see BamRead::name()
277
278     \since New in yat 0.10
279
280     \relates BamRead
281   */
282  bool same_query_name(const BamRead& lhs, const BamRead& rhs);
283
284  /**
285     Functor to compare two reads with respect to their leftmost
286     coordinate.
287
288     \see BamRead
289
290     \since New in yat 0.10
291   */
292  struct BamLessPos
293    : public std::binary_function<const BamRead&, const BamRead&, bool>
294  {
295    /**
296       \return true if lhs tid is less than rhs tid; or tid
297       are equal and lhs pos is smaller than rhs pos.
298
299       \see BamRead::tid() and BamRead::pos()
300     */
301    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
302  };
303
304
305  /**
306     Functor to compare two reads with respect to their rightmost
307     coordinate.
308
309     \see BamRead
310
311     \since New in yat 0.10
312   */
313  struct BamLessEnd
314    : public std::binary_function<const BamRead&, const BamRead&, bool>
315  {
316    /**
317       \return true if lhs tid is less than rhs tid; or tid
318       are equal and lhs end is smaller than rhs end.
319
320       \see BamRead::tid() and BamRead::end()
321     */
322    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
323  };
324
325}}}
326#endif
Note: See TracBrowser for help on using the repository browser.