source: trunk/yat/omic/BamRead.h @ 2910

Last change on this file since 2910 was 2910, checked in by Peter, 10 years ago

improve docs. closes #729

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 6.2 KB
Line 
1#ifndef theplu_yat_omic_bam_read
2#define theplu_yat_omic_bam_read
3
4// $Id: BamRead.h 2910 2012-12-17 02:31:13Z peter $
5//
6// Copyright (C) 2012 Peter Johansson
7//
8// This program is free software; you can redistribute it and/or modify
9// it under the terms of the GNU General Public License as published by
10// the Free Software Foundation; either version 3 of the License, or
11// (at your option) any later version.
12//
13// This program is distributed in the hope that it will be useful, but
14// WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16// General Public License for more details.
17//
18// You should have received a copy of the GNU General Public License
19// along with this program. If not, see <http://www.gnu.org/licenses/>.
20
21#include <sam.h>
22#include <bam.h>
23
24#include <boost/move/move.hpp>
25
26#include <functional>
27#include <string>
28#include <vector>
29
30// backport #defines from samtools 0.1.19
31#ifndef BAM_CIGAR_STR
32#define BAM_CIGAR_STR "MIDNSHP=XB"
33#define BAM_CIGAR_TYPE 0x3C1A7
34
35#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)
36#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)
37#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])
38#define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))
39#define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3)
40#endif
41
42/*
43  CIGAR
440 M 1 1 match
451 I 0 1 insert
462 D 1 0 deletion
473 N 1 0 spliced (rna)
484 S 0 1 soft clipped
495 H 0 0 hard clipped
506 P 0 0 padded
517 = 1 1 equal
528 X 1 1 mismatch
539 B 0 0
54
553rd column: consumes reference sequence
564th column: consumes query sequence
57 */
58
59namespace theplu {
60namespace yat {
61namespace omic {
62
63  /**
64     \brief Class holding a bam query
65
66     This is a wrapper around bam1_t and most of its information is
67     held in the core struct. A BamRead is typically created from a
68     InBamFile.
69
70     \see samtools
71
72     \since New in yat 0.10
73   */
74  class BamRead
75  {
76  public:
77    /**
78       \brief default constructor
79
80       Constructed object contains no data and most operations are not
81       defined
82     */
83    BamRead(void);
84
85    /**
86       \brief Copy constructor
87     */
88    BamRead(const BamRead& other);
89
90    /**
91       \brief Destructor
92     */
93    virtual ~BamRead(void);
94
95    /**
96       \brief assignment operator
97     */
98    const BamRead& operator=(const BamRead& rhs);
99
100    /**
101       \return pointer to auxiliary data
102     */
103    const uint8_t* aux(void) const;
104
105    /**
106       \brief access core data struct
107
108       \see samtools C api documentaion
109     */
110    const bam1_core_t& core(void) const;
111
112    /**
113       \brief access core data struct
114
115       \see samtools C api documentaion
116     */
117    bam1_core_t& core(void);
118
119    /**
120       \brief access CIGAR array
121
122       In each element the lower 4 bits gives a CIGAR operation and
123       the upper 28 bits keep the length.
124     */
125    const uint32_t* cigar(void) const;
126
127    /**
128       \return \a i th element of CIGAR array
129     */
130    uint32_t cigar(size_t i) const;
131
132    /**
133       \return \a i th CIGAR operation
134     */
135    uint32_t cigar_op(size_t i) const;
136
137    /**
138       \return length of \a i th CIGAR element
139     */
140    uint32_t cigar_oplen(size_t i) const;
141
142    /**
143       Translate CIGAR array to a string such as '72M3S'
144     */
145    std::string cigar_str(void) const;
146
147    /**
148       \brief set CIGAR
149
150       \param c new cigar
151    */
152    void cigar(const std::vector<uint32_t>& c);
153
154    /**
155       \return Quality of base \a i
156     */
157    uint8_t qual(size_t i) const;
158
159    /**
160       \return query name
161
162       Length of array is described by core().l_qname
163     */
164    const char* name(void) const;
165
166    /**
167       \brief chromosome ID
168     */
169    int32_t tid(void) const;
170
171    /**
172       \brief 0-based laftmost coordinate
173     */
174    int32_t pos(void) const;
175
176    /**
177       \brief rightmost coordinate
178
179       Coordinate is 0-based, i.e., the end is one passed the last
180       matching position.
181
182       \see bam_calend
183     */
184    int32_t end(void) const;
185
186    /**
187       \brief Chromosome ID for mate
188     */
189    int32_t mtid(void) const;
190
191    /**
192       \brief leftmost position for mate
193     */
194    int32_t mpos(void) const;
195
196    /**
197       Each character in returned sequence is either A, C, G, T, or N.
198
199       \return sequence
200     */
201    std::string sequence(void) const;
202
203    /**
204       4-bit integer describing base \a index
205
206       \see bam_nt16_rev_table
207     */
208    uint8_t sequence(size_t index) const;
209
210    /**
211       \see core().l_qseq
212     */
213    uint32_t sequence_length(void) const;
214
215    /**
216       \brief bitwise flag
217
218       \see Preprocessor defines BAM_F*
219     */
220    uint16_t flag(void) const;
221
222    /**
223       Exchanging this read with \a other.
224     */
225    void swap(BamRead& other);
226
227  private:
228    bam1_t* bam_;
229
230    friend class InBamFile;
231    friend class OutBamFile;
232    friend class BamReadIterator;
233  };
234
235  /**
236     Swap specialization for BamRead that is faster than generic
237     std::swap as it just swap a pointers.
238
239     \since New in yat 0.10
240   */
241  void swap(BamRead& lhs, BamRead& rhs);
242
243  /**
244     \return \c true if read is soft clipped, either left_soft_clipped
245     or right_soft_clipped.
246
247     \since New in yat 0.10
248   */
249  bool soft_clipped(const BamRead& bam);
250
251  /**
252     If read is soft clipped on left side, return how many bases are
253     clipped, otherwise return 0.
254
255     \since New in yat 0.10
256   */
257  uint32_t left_soft_clipped(const BamRead& bam);
258
259  /**
260     If read is soft clipped on right side, return how many bases are
261     clipped, otherwise return 0.
262
263     \since New in yat 0.10
264   */
265  uint32_t right_soft_clipped(const BamRead& bam);
266
267  /**
268     return \c true if query names are equal
269
270     \see BamRead::name()
271
272     \since New in yat 0.10
273   */
274  bool same_query_name(const BamRead& lhs, const BamRead& rhs);
275
276  /**
277     Functor to compare two reads with respect to their leftmost
278     coordinate.
279
280     \see BamRead
281
282     \since New in yat 0.10
283   */
284  struct BamLessPos
285    : public std::binary_function<const BamRead&, const BamRead&, bool>
286  {
287    /**
288       \return true if lhs tid is less than rhs tid; or tid
289       are equal and lhs pos is smaller than rhs pos.
290
291       \see BamRead::tid() and BamRead::pos()
292     */
293    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
294  };
295
296
297  /**
298     Functor to compare two reads with respect to their rightmost
299     coordinate.
300
301     \see BamRead
302
303     \since New in yat 0.10
304   */
305  struct BamLessEnd
306    : public std::binary_function<const BamRead&, const BamRead&, bool>
307  {
308    /**
309       \return true if lhs tid is less than rhs tid; or tid
310       are equal and lhs end is smaller than rhs end.
311
312       \see BamRead::tid() and BamRead::end()
313     */
314    bool operator()(const BamRead& lhs, const BamRead& rhs) const;
315  };
316
317}}}
318#endif
Note: See TracBrowser for help on using the repository browser.