source: trunk/yat/utility/Aligner.h @ 3203

Last change on this file since 3203 was 3203, checked in by Peter, 8 years ago

improve docs of CIGAR. closes #785

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 6.2 KB
Line 
1#ifndef _theplu_yat_utility_aligner_
2#define _theplu_yat_utility_aligner_
3
4// $Id: Aligner.h 3203 2014-05-03 14:15:32Z peter $
5
6/*
7  Copyright (C) 2012 Peter Johansson
8  Copyright (C) 2013 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2014 Peter Johansson
10
11  This file is part of the yat library, http://dev.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 3 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with yat. If not, see <http://www.gnu.org/licenses/>.
25*/
26
27#include <boost/cstdint.hpp>
28
29#include <deque>
30#include <cstddef>
31#include <iosfwd>
32#include <vector>
33
34namespace theplu {
35namespace yat {
36namespace utility {
37
38  class Matrix;
39
40  /**
41     \brief Aligning two sequences
42
43     General class aligning two sequences described in a dot-matrix,
44     D, in which \f$ D(i,j) \f$ describes how similar element \c i in
45     the first sequence is with element \c j in the second sequence. A
46     path through this matrix corresponds to an alignment of the two
47     sequences, in which a diagonal step correspoinds to a match
48     between corresponding sequence elements, and a vertical or
49     horizontal step correspond correspond to inserting a gap into one
50     of the sequences.
51
52     \since New in yat 0.9
53   */
54  class Aligner
55  {
56  public:
57    /**
58       Enum used to describe alignment.
59
60       \see alignment
61     */
62    enum direction { none, right, down, diagonal };
63
64    /**
65       \brief Constructor
66
67       Same as Aligner(gap, open_gap, gap, open_gap)
68     */
69    Aligner(double gap=0, double open_gap=0);
70
71    /**
72       \brief constructor
73
74       \param vertical_gap Penalty for extending a vertical gap. A
75       vertical gap means alignment consumes an element in second
76       element and not, in the first element, i.e., an element has
77       been inserted to the second element or equivalently an element
78       has been deleted in first sequence.
79
80       \param open_vertical_gap Penalty for open a vertical gap. Total
81       cost for a vertical gap is open_vertical_gap + N *
82       vertical_gap.
83
84       \param horizon_gap Penalty for extending a horizontal gap
85
86       \param open_horizon_gap Penalty for open a horizontal
87       gap. Total penalty for a horizontal gap is open_horizon_gap + N
88       * horizon_gap.
89     */
90    Aligner(double vertical_gap, double open_vertical_gap,
91            double horizon_gap, double open_horizon_gap);
92
93    /**
94       Initialize a score matrix with \f$ S_{k,0} = -
95       \textrm{open\_vertical\_gap} - k * \textrm{vertical\_gap} \f$
96       and \f$ S_{0,k} = - \textrm{open\_horizon\_gap} - k *
97       \textrm{horizon\_gap} \f$ and align using operator().
98
99       \return most lower right element of score matrix
100     */
101    double needleman_wunsch(const Matrix& d);
102
103    /**
104       Initialize a score matrix with all elements equal to zero,
105       align using operator().
106
107       \return max element in score matrix.
108     */
109    double smith_waterman(const Matrix& d);
110
111    /**
112       \brief calculate score matrix based on the \a dot matrix
113
114       The algorithm calculates a maximum \a score matrix as
115
116       \f$ S_{i,j} = \textrm{max} \{ S_{ij}; S_{i,j-1} - F(\textrm{gap}_V);
117       S_{i-1,j-1} + D_{i-1,j-1}; S_{i-1,j} - F(\textrm{gap}_H) \} \f$
118
119       where \f$ F(\textrm{gap}) \f$ is gap if there was already a
120       gap, and gap + open_gap otherwise.
121
122     */
123    void operator()(const Matrix& dot, Matrix& score);
124
125    /**
126       If, for example, alignment(i,j) returns Aligner::diagonal,
127       implies that score(i,j) was maximized as score(i-1,j-1) +
128       dot(i-1, j-1), in other words, element i-1 in first sequence
129       was aligned with element j-1 in second sequence.
130    */
131    const direction& alignment(size_t i, size_t j) const;
132
133    /**
134       \brief Compact Idiosyncratic Gapped Alignment Report
135
136       A CIGAR is a representation of an alignment, an array of
137       operation/length where the operation and length is packed into
138       a uint32_t. So a CIGAR of e.g. '3M1D5M' means alignment is
139       three matches, one deletion, and five matches, which is
140       represented by an array of length 3.
141
142       \since new in yat 0.12
143     */
144    class Cigar
145    {
146    public:
147      /**
148         Erases all elements.
149       */
150      void clear(void);
151
152      /**
153         See defines BAM_CIGAR_*
154
155         \return operation part of element \a i
156       */
157      uint8_t op(size_t i) const;
158
159      /**
160         Translate the op(i) to a descriptive character of one of the
161         following "MIDNSHP=XB"
162
163         \return character describing operation
164         i
165       */
166      char opchr(size_t i) const;
167
168      /**
169         \return length of element i
170       */
171      uint32_t oplen(size_t i) const;
172
173      /**
174         Erase last \a len operation.
175       */
176      void pop_back(uint32_t len=1);
177
178      /**
179         Erase first \a len operation.
180       */
181      void pop_front(uint32_t len=1);
182
183      /**
184         Add an operation \a op of length \a len at back of cigar. If
185         last operation equals \a op, length of that element is
186         changed and the size() is not modified.
187       */
188      void push_back(uint8_t op, uint32_t len=1);
189
190      /**
191         Add an operation \a op of length \a len at front of cigar.
192       */
193      void push_front(uint8_t op, uint32_t len=1);
194
195      /**
196         \return cigar element \a i
197       */
198      uint32_t operator[](size_t i) const;
199
200      /**
201         \return number of elements in cigar
202       */
203      size_t size(void) const;
204    private:
205      std::deque<uint32_t> cigar_;
206
207      // using compiler generated copy
208      // Cigar(const Cigar& other);
209      // Cigar& operator=(const Cigar&);
210    };
211
212    /**
213       \since new in yat 0.12
214     */
215    const Cigar cigar(size_t i, size_t j) const;
216
217  private:
218    direction& directions(size_t i, size_t j);
219
220    size_t columns_;
221    double vertical_gap_;
222    double open_vertical_gap_;
223    double horizon_gap_;
224    double open_horizon_gap_;
225    std::vector<direction> alignment_;
226  };
227
228
229  /**
230     Output e.g. '5S44M5I98M' if the cigar has four elements. Each
231     element is output as the length followed by the character
232     descibing the operation (see opchr).
233
234     \relates Aligner::Cigar
235   */
236  std::ostream& operator<<(std::ostream& os, const Aligner::Cigar& cigar);
237
238}}} // of namespace utility, yat, and theplu
239
240#endif
Note: See TracBrowser for help on using the repository browser.