File: SalmonStringUtils.hpp

package info (click to toggle)
salmon 1.10.3%2Bds1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 35,148 kB
  • sloc: cpp: 200,707; ansic: 171,082; sh: 859; python: 792; makefile: 238
file content (204 lines) | stat: -rw-r--r-- 11,318 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#ifndef SALMON_STRING_UTILS
#define SALMON_STRING_UTILS

#include <cstddef>
#include <cstdint>
#include <vector>

namespace salmon {

namespace stringtools {

constexpr double phredToLogProb[] = {-1000.0,
                                     -1.58147375341,
                                     -0.996843044008,
                                     -0.695524471332,
                                     -0.507675873697,
                                     -0.380130408066,
                                     -0.289268187202,
                                     -0.222551515973,
                                     -0.172556572914,
                                     -0.134551960289,
                                     -0.105360515658,
                                     -0.0827653026692,
                                     -0.0651741731994,
                                     -0.051418274158,
                                     -0.0406248442216,
                                     -0.0321335740231,
                                     -0.0254397275342,
                                     -0.0201543647612,
                                     -0.0159758692467,
                                     -0.0126691702086,
                                     -0.0100503358535,
                                     -0.00797499827851,
                                     -0.00632956293111,
                                     -0.00502447389099,
                                     -0.00398901726641,
                                     -0.00316728822616,
                                     -0.00251504651118,
                                     -0.00199725550255,
                                     -0.00158615046428,
                                     -0.00125971852411,
                                     -0.00100050033358,
                                     -0.000794643880558,
                                     -0.000631156481835,
                                     -0.000501312869929,
                                     -0.000398186436251,
                                     -0.00031627777656,
                                     -0.000251220196302,
                                     -0.000199546139504,
                                     -0.000158501880005,
                                     -0.000125900466311,
                                     -0.000100005000333,
                                     -7.94359784262e-05,
                                     -6.30977250677e-05,
                                     -5.01199793479e-05,
                                     -3.9811509523e-05,
                                     -3.16232766123e-05,
                                     -2.51191797991e-05,
                                     -1.99528222059e-05,
                                     -1.58490575203e-05,
                                     -1.25893333633e-05,
                                     -1.00000500003e-05,
                                     -7.94331389525e-06,
                                     -6.30959335027e-06,
                                     -5.01188489573e-06,
                                     -3.98107963e-06,
                                     -3.16228266018e-06,
                                     -2.51188958631e-06,
                                     -1.99526430546e-06,
                                     -1.58489444841e-06,
                                     -1.25892620425e-06,
                                     -1.00000050003e-06,
                                     -7.94328550222e-07,
                                     -6.30957543537e-07,
                                     -5.01187359198e-07,
                                     -3.98107249807e-07,
                                     -3.16227815973e-07,
                                     -2.51188674656e-07,
                                     -1.99526251442e-07,
                                     -1.58489331784e-07,
                                     -1.25892549094e-07,
                                     -1.00000004947e-07,
                                     -7.94328265847e-08,
                                     -6.30957364055e-08,
                                     -5.01187246496e-08,
                                     -3.98107178487e-08,
                                     -3.16227771195e-08,
                                     -2.51188646374e-08,
                                     -1.99526233083e-08,
                                     -1.58489320702e-08,
                                     -1.25892541629e-08,
                                     -1.00000001002e-08,
                                     -7.94328239674e-09,
                                     -6.3095734392e-09,
                                     -5.01187237413e-09,
                                     -3.98107170245e-09,
                                     -3.16227766695e-09,
                                     -2.51188647975e-09,
                                     -1.99526229071e-09,
                                     -1.58489321792e-09,
                                     -1.25892540916e-09,
                                     -9.99999972218e-10,
                                     -7.94328270142e-10,
                                     -6.3095739764e-10,
                                     -5.01187202976e-10};
constexpr uint8_t samToTwoBit[] = {
    0, /*A*/ 0, /*C*/ 1, 0, /*G*/ 2, 0, 0, 0, /*T*/ 3, 0, 0, 0, 0, 0, 0, 0};

constexpr char twoBitToChar[] = {'A', 'C', 'G', 'T'};

constexpr uint8_t charToSamEncode[] = {
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
    15, 15, 15, 15, 0,  15, 15, 15, 1,  14, 2,  13, 15, 15, 4,  11, 15, 15, 12,
    15, 3,  15, 15, 15, 15, 5,  6,  8,  15, 7,  9,  15, 10, 15, 15, 15, 15, 15,
    15, 15, 1,  15, 2,  15, 15, 15, 4,  15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
    15, 15, 8,  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15};

/*
constexpr uint8_t charToSamEncode[] = {
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 1, 14, 2, 13,
    15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9,
    15, 10, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
    15, 15, 15, 15, 15, 15};
    */

std::vector<uint8_t> encodeSequenceInSAM(const char* src, size_t len);

/**
   Incomplete: currently only rev for 'ATCG'
 */
constexpr uint8_t encodedRevComp[] = {15, 8,  4,  15, 2,  15, 15, 15,
                                      1,  15, 15, 15, 15, 15, 15, 15};

constexpr char samCodeToChar[] = {
    '=', 'A', 'C', 'M', 'G', 'R', 'S', 'V', 'T', 'W', 'Y', 'H', 'K', 'D', 'B',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N'};

constexpr char charCanon[] = {
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'A', 'N', 'C', 'N', 'N', 'N', 'G', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'T', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'A', 'N', 'C', 'N', 'N', 'N', 'G', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'T', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N'};

constexpr char charRC[] = {
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'T', 'N', 'G', 'N', 'N', 'N', 'C', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'A', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'T', 'N', 'G', 'N', 'N', 'N', 'C', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'A', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
    'N'};

enum class strand { forward, reverse };
} // namespace stringtools
} // namespace salmon

#endif // SALMON_STRING_UTILS