source: trunk/yat/utility/Aligner.h @ 3200

Last change on this file since 3200 was 3200, checked in by Peter, 8 years ago

implement first version of CIGAR (refs #785). Note, this revision does not compile without samtools (should be fixed shortly).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 5.6 KB
Line 
1#ifndef _theplu_yat_utility_aligner_
2#define _theplu_yat_utility_aligner_
3
4// $Id: Aligner.h 3200 2014-05-03 11:36:31Z peter $
5
6/*
7  Copyright (C) 2012 Peter Johansson
8  Copyright (C) 2013 Jari Häkkinen, Peter Johansson
9  Copyright (C) 2014 Peter Johansson
10
11  This file is part of the yat library, http://dev.thep.lu.se/yat
12
13  The yat library is free software; you can redistribute it and/or
14  modify it under the terms of the GNU General Public License as
15  published by the Free Software Foundation; either version 3 of the
16  License, or (at your option) any later version.
17
18  The yat library is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  General Public License for more details.
22
23  You should have received a copy of the GNU General Public License
24  along with yat. If not, see <http://www.gnu.org/licenses/>.
25*/
26
27#include <boost/cstdint.hpp>
28
29#include <deque>
30#include <cstddef>
31#include <iosfwd>
32#include <vector>
33
34namespace theplu {
35namespace yat {
36namespace utility {
37
38  class Matrix;
39
40  /**
41     \brief Aligning two sequences
42
43     General class aligning two sequences described in a dot-matrix,
44     D, in which \f$ D(i,j) \f$ describes how similar element \c i in
45     the first sequence is with element \c j in the second sequence. A
46     path through this matrix corresponds to an alignment of the two
47     sequences, in which a diagonal step correspoinds to a match
48     between corresponding sequence elements, and a vertical or
49     horizontal step correspond correspond to inserting a gap into one
50     of the sequences.
51
52     \since New in yat 0.9
53   */
54  class Aligner
55  {
56  public:
57    /**
58       Enum used to describe alignment.
59
60       \see alignment
61     */
62    enum direction { none, right, down, diagonal };
63
64    /**
65       \brief Constructor
66
67       Same as Aligner(gap, open_gap, gap, open_gap)
68     */
69    Aligner(double gap=0, double open_gap=0);
70
71    /**
72       \brief constructor
73
74       \param vertical_gap Penalty for extending a vertical gap. A
75       vertical gap means alignment consumes an element in second
76       element and not, in the first element, i.e., an element has
77       been inserted to the second element or equivalently an element
78       has been deleted in first sequence.
79
80       \param open_vertical_gap Penalty for open a vertical gap. Total
81       cost for a vertical gap is open_vertical_gap + N *
82       vertical_gap.
83
84       \param horizon_gap Penalty for extending a horizontal gap
85
86       \param open_horizon_gap Penalty for open a horizontal
87       gap. Total penalty for a horizontal gap is open_horizon_gap + N
88       * horizon_gap.
89     */
90    Aligner(double vertical_gap, double open_vertical_gap,
91            double horizon_gap, double open_horizon_gap);
92
93    /**
94       Initialize a score matrix with \f$ S_{k,0} = -
95       \textrm{open\_vertical\_gap} - k * \textrm{vertical\_gap} \f$
96       and \f$ S_{0,k} = - \textrm{open\_horizon\_gap} - k *
97       \textrm{horizon\_gap} \f$ and align using operator().
98
99       \return most lower right element of score matrix
100     */
101    double needleman_wunsch(const Matrix& d);
102
103    /**
104       Initialize a score matrix with all elements equal to zero,
105       align using operator().
106
107       \return max element in score matrix.
108     */
109    double smith_waterman(const Matrix& d);
110
111    /**
112       \brief calculate score matrix based on the \a dot matrix
113
114       The algorithm calculates a maximum \a score matrix as
115
116       \f$ S_{i,j} = \textrm{max} \{ S_{ij}; S_{i,j-1} - F(\textrm{gap}_V);
117       S_{i-1,j-1} + D_{i-1,j-1}; S_{i-1,j} - F(\textrm{gap}_H) \} \f$
118
119       where \f$ F(\textrm{gap}) \f$ is gap if there was already a
120       gap, and gap + open_gap otherwise.
121
122     */
123    void operator()(const Matrix& dot, Matrix& score);
124
125    /**
126       If, for example, alignment(i,j) returns Aligner::diagonal,
127       implies that score(i,j) was maximized as score(i-1,j-1) +
128       dot(i-1, j-1), in other words, element i-1 in first sequence
129       was aligned with element j-1 in second sequence.
130    */
131    const direction& alignment(size_t i, size_t j) const;
132
133    /**
134       \since new in yat 0.12
135     */
136    class Cigar
137    {
138    public:
139      /**
140         Erases all elements.
141       */
142      void clear(void);
143
144      /**
145         \return operation i
146       */
147      uint8_t op(size_t i) const;
148
149      /**
150         \return character describing operation i
151       */
152      char opchr(size_t i) const;
153
154      /**
155         \return length of element i
156       */
157      uint32_t oplen(size_t i) const;
158
159      /**
160         Erase last operation.
161       */
162      void pop_back(uint32_t len=1);
163
164      /**
165         Erase first operation.
166       */
167      void pop_front(uint32_t len=1);
168
169      /**
170         Add an operation \a op of length \a len at back of cigar.
171       */
172      void push_back(uint8_t op, uint32_t len=1);
173
174      /**
175         Add an operation \a op of length \a len at front of cigar.
176       */
177      void push_front(uint8_t op, uint32_t len=1);
178
179      /**
180         \return cigar element i
181       */
182      uint32_t operator[](size_t) const;
183
184      /**
185         \return number of elements in cigar
186       */
187      size_t size(void) const;
188    private:
189      std::deque<uint32_t> cigar_;
190
191      // using compiler generated copy
192      // Cigar(const Cigar& other);
193      // Cigar& operator=(const Cigar&);
194    };
195
196    /**
197       \since new in yat 0.12
198     */
199    const Cigar cigar(size_t i, size_t j) const;
200
201  private:
202    direction& directions(size_t i, size_t j);
203
204    size_t columns_;
205    double vertical_gap_;
206    double open_vertical_gap_;
207    double horizon_gap_;
208    double open_horizon_gap_;
209    std::vector<direction> alignment_;
210  };
211
212
213  /**
214     Output e.g. '5S44M5I98M' if the cigar has four elements. Each
215     element is output as the length followed by the character
216     descibing the operation (see opchr).
217
218     \relates Aligner::Cigar
219   */
220  std::ostream& operator<<(std::ostream& os, const Aligner::Cigar& cigar);
221
222}}} // of namespace utility, yat, and theplu
223
224#endif
Note: See TracBrowser for help on using the repository browser.