File: score_matrix.h

package info (click to toggle)
seqan2 2.5.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 228,748 kB
  • sloc: cpp: 257,602; ansic: 91,967; python: 8,326; sh: 1,056; xml: 570; makefile: 229; awk: 51; javascript: 21
file content (225 lines) | stat: -rw-r--r-- 9,298 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
// ==========================================================================
//                 SeqAn - The Library for Sequence Analysis
// ==========================================================================
// Copyright (c) 2006-2026, Knut Reinert, FU Berlin
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
//     * Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above copyright
//       notice, this list of conditions and the following disclaimer in the
//       documentation and/or other materials provided with the distribution.
//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
//       its contributors may be used to endorse or promote products derived
//       from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
// DAMAGE.
//
// ==========================================================================
// Author: Andreas Gogol-Doering <andreas.doering@mdc-berlin.de>
// ==========================================================================
// Code for score matrices with data from files or built-in data.
// ==========================================================================

#ifndef SEQAN_SSCORE_MATRIX_H_
#define SEQAN_SSCORE_MATRIX_H_

// TODO(holtgrew): If the complex type conversions are necessary, a static_cast<> is more C++ and explicit.

namespace seqan2 {

template <typename TValue, typename TSequenceValue, typename TSpec>
struct ScoringMatrixData_;


template <typename TSequenceValue = AminoAcid, typename TSpec = Default>
struct ScoreMatrix;

/*!
 * @class MatrixScore
 * @headerfile <seqan/score.h>
 * @extends Score
 * @brief A general scoring matrix.
 *
 * @signature template <typename TValue, typename TSeqValue, typename TSpec>
 *            class Score<TValue, ScoreMatrix<[TSeqValue[, TSpec]]> >;
 *
 * @tparam TValue    The score value.
 * @tparam TSeqValue The alphabet type, defaults to AminoAcid.
 * @tparam TSpec     Further specialization, defaults to Default.
 *
 * The TSpec argument can be used to obtain a predefined matrix.
 * Specify one of the following tags:
 *
 * ScoreSpecBlosum30, ScoreSpecBlosum45, ScoreSpecBlosum62, ScoreSpecBlosum80,
 * ScoreSpecPam40, ScoreSpecPam120, ScoreSpecPam200, ScoreSpecPam250, ScoreSpecVtml200.
 *
 * This will internally call @link MatrixScore#setDefaultScoreMatrix setDefaultScoreMatrix@endlink.
 *
 * In order to provide a more user-friendly access to the predefined scoring matrixes, typedefs exist:
 * @link Blosum30 @endlink, @link Blosum45 @endlink,  @link Blosum62 @endlink,
 * @link Blosum80 @endlink, @link Pam40 @endlink,     @link Pam120 @endlink,
 * @link Pam200 @endlink,   @link Pam250 @endlink and @link Vtml200 @endlink.
 *
 * @fn MatrixScore::Score
 * @brief Constructor
 *
 * @signature MatrixScore::Score(gapExtend[, gapOpen]);
 * @signature MatrixScore::Score(fileName, gapExtend[, gapOpen]);
 *
 * @param[in] fileName  Path to load the matrix from, type is <tt>char const *</tt>.
 * @param[in] gapExtend Gap extension score, type is TValue.
 * @param[in] gapOpen   Gap open score, defaults to gapExtend, type is TValue.
 */

template <typename TValue, typename TSequenceValue, typename TSpec>
class Score<TValue, ScoreMatrix<TSequenceValue, TSpec> > {
public:
    // Static computation of the required array size.
    enum {
        VALUE_SIZE = ValueSize<TSequenceValue>::VALUE,
        TAB_SIZE = VALUE_SIZE * VALUE_SIZE
    };

    // The data table.
    TValue data_tab[TAB_SIZE];

    // The gap extension score.
    TValue data_gap_extend;

    // The gap open score.
    TValue data_gap_open;

    explicit Score(TValue _gap_extend = -1)
        : data_gap_extend(_gap_extend),
          data_gap_open(_gap_extend) {
        setDefaultScoreMatrix(*this, TSpec());
    }

    Score(TValue _gap_extend, TValue _gap_open)
        : data_gap_extend(_gap_extend), data_gap_open(_gap_open) {
        setDefaultScoreMatrix(*this, TSpec());
    }

    explicit Score(char const * filename, TValue _gap_extend = -1)
        : data_gap_extend(_gap_extend), data_gap_open(_gap_extend) {
        loadScoreMatrix(*this, filename);
    }

    Score(char const * filename, TValue _gap_extend, TValue _gap_open)
        : data_gap_extend(_gap_extend), data_gap_open(_gap_open) {
        loadScoreMatrix(*this, filename);
    }

    Score(Score const &) = default;
    Score(Score &&) = default;

    Score & operator=(Score const &) = default;
    Score & operator=(Score &&) = default;

    ~Score() = default;
};


// ----------------------------------------------------------------------------
// Metafunction IsScoreMatrix_
// ----------------------------------------------------------------------------

template <typename TSpec>
struct IsScoreMatrix_ : False
{};

template <typename TAlphabet, typename TSpec>
struct IsScoreMatrix_<ScoreMatrix<TAlphabet, TSpec> > : True
{};

template <typename TValue, typename  TSpec>
struct IsScoreMatrix_<Score<TValue, TSpec> > : IsScoreMatrix_<TSpec>
{};

// ----------------------------------------------------------------------------
// Function score()
// ----------------------------------------------------------------------------

// TODO(holtgrew): Does it make sense to document each Score specialization?  Should dddoc show a list of all specializations of a class?
template <typename TValue, typename TSequenceValue, typename TSpec, typename TVal1, typename TVal2>
inline TValue
score(Score<TValue, ScoreMatrix<TSequenceValue, TSpec> > const & sc, TVal1 val1, TVal2 val2) {
    typedef Score<TValue, ScoreMatrix<TSequenceValue, TSpec> > TScore;
    // TODO(holtgrew): Why not implicit cast?
    unsigned int i = (TSequenceValue) val1;  // conversion TVal1 => TSequenceValue => integral
    unsigned int j = (TSequenceValue) val2;  // conversion TVal2 => TSequenceValue => integral
    return sc.data_tab[i * TScore::VALUE_SIZE + j];
}

/*!
 * @fn MatrixScore#setScore
 * @brief Set the substitution score between to values.
 *
 * @signature void setScore(score, x, y, v);
 *
 * @param[in,out] score The MatrixScore to set the value for.
 * @param[in]     x     The substituted alphabet value.
 * @param[in]     y     The alphabet value to substitute x for.
 * @param[in]     v     The score value to set.
 */

template <typename TValue, typename TSequenceValue, typename TSpec, typename TVal1, typename TVal2, typename T>
inline void
setScore(Score<TValue, ScoreMatrix<TSequenceValue, TSpec> > & sc, TVal1 val1, TVal2 val2, T score) {
    typedef Score<TValue, ScoreMatrix<TSequenceValue, TSpec> > TScore;
    // TODO(holtgrew): Why not implicit cast?
    unsigned int i = (TSequenceValue) val1;  // conversion TVal1 => TSequenceValue => integral
    unsigned int j = (TSequenceValue) val2;  // conversion TVal2 => TSequenceValue => integral
    sc.data_tab[i * TScore::VALUE_SIZE + j] = score;
}

/*!
 * @fn MatrixScore#setDefaultScoreMatrix
 * @brief Set the score matrix of a Score to one of the default matrixes.
 *
 * @signature void setDefaultScoreMatrix(score, tag);
 *
 * @param[in,out] score The MatrixScore to update.
 * @param[in]     tag   The tag to select the default matrix, see description below.
 *
 * @section Remarks
 *
 * The tag must be one of the following:
 * Default, ScoreSpecBlosum30, ScoreSpecBlosum45, ScoreSpecBlosum62, ScoreSpecBlosum80,
 * ScoreSpecPam40, ScoreSpecPam120, ScoreSpecPam200, ScoreSpecPam250, ScoreSpecVtml200.
 *
 * If Default is used for tag then the matrix will be filled with default-constructed TValue values.
 */

template <typename TValue, typename TSequenceValue, typename TSpec, typename TTag>
inline void
setDefaultScoreMatrix(Score<TValue, ScoreMatrix<TSequenceValue, TSpec> > & sc, TTag) {
    typedef Score<TValue, ScoreMatrix<TSequenceValue, TSpec> > TScore;
    TValue const * tab = ScoringMatrixData_<TValue, TSequenceValue, TTag>::getData();
    arrayCopy(tab, tab + TScore::TAB_SIZE, sc.data_tab);
}

template <typename TValue, typename TSequenceValue, typename TSpec>
inline void
setDefaultScoreMatrix(Score<TValue, ScoreMatrix<TSequenceValue, TSpec> > & sc, Default) {
    typedef Score<TValue, ScoreMatrix<TSequenceValue, TSpec> > TScore;
    arrayFill(sc.data_tab, sc.data_tab + TScore::TAB_SIZE, TValue());
}

}  // namespace seqan2

#endif  // SEQAN_SSCORE_MATRIX_H_