File: Tables.cpp

package info (click to toggle)
snap-aligner 2.0.3%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 6,652 kB
  • sloc: cpp: 41,051; ansic: 5,239; python: 227; makefile: 85; sh: 28
file content (109 lines) | stat: -rw-r--r-- 3,520 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#include "stdafx.h"
#include "Tables.h"


static const Tables tables;

const char *COMPLEMENT = tables.getComplement();
const char *IS_N = tables.getIsN();
const int  *BASE_VALUE = tables.getBaseValue();
const int  *BASE_VALUE_NO_N = tables.getBaseValueNoN();
const char *VALUE_BASE = tables.getValueBase();
const unsigned char *VALUE4_RC = tables.getValue4RC();
const char *PACKED_BASE_VALUE = tables.getPackedBaseValue();
const char *PACKED_QUALITY_MASK = tables.getPackedQualityMask();
const char *PACKED_VALUE_BASE = tables.getPackedValueBase();
const unsigned *IS_LOWER_CASE_OR_DOT = tables.getIsLowerCaseOrDot();
const char *TO_UPPER_CASE_DOT_TO_N = tables.getToUpperCaseDotToN();
const char *PACKED_VALUE_BASE_RC = tables.getPackedValueBaseRC();
const char *CIGAR_QUAL_TO_SAM = tables.getCigarQualToSam();

Tables::Tables()
{
    memset(complement, 'N', sizeof(complement));	// Everything we don't understand turns into an N
    memset(isN, 0, sizeof(isN));

    complement['A'] = 'T';
    complement['C'] = 'G';
    complement['G'] = 'C';
    complement['T'] = 'A';
    complement['N'] = 'N';
    complement['n'] = 'n';
	
	//
	// Compliments for weird values.  Generated by hand from the Wikipedia article: https://en.wikipedia.org/wiki/FASTA_format.  The aligner treats these all as N, but putting them here gets them right in the output file.
	//
	complement['U'] = 'A';
	complement['R'] = 'Y';
	complement['Y'] = 'R';
	complement['K'] = 'M';
	complement['M'] = 'K';
	complement['S'] = 'W';
	complement['W'] = 'S';
	complement['B'] = 'V';
	complement['V'] = 'B';
	complement['D'] = 'H';
	complement['H'] = 'D';

    isN['N'] = 1;
    isN['n'] = 1;

    // Base values chosen so that complements are bitwise opposites.
    for (unsigned i = 0; i < 256; i++) {
        baseValue[i] = 4;// Everything's an N unless it's not
    }
    baseValue['A'] = 0;
    baseValue['G'] = 1;
    baseValue['C'] = 2;
    baseValue['T'] = 3;

    // inverse of BASE_VALUE
    valueBase[0] = 'A';
    valueBase[1] = 'G';
    valueBase[2] = 'C';
    valueBase[3] = 'T';
    valueBase[4] = 'N';

    // Version that maps N's value to 0 instead of 4
    memset(baseValueNoN, 0, sizeof(baseValueNoN));
    baseValueNoN['A'] = 0;
    baseValueNoN['G'] = 1;
    baseValueNoN['C'] = 2;
    baseValueNoN['T'] = 3;

    // reverse complement of a byte of 4x2-bit values
    for (int i = 0; i < 256; i++) {
        value4RC[i] = 0xff ^ (((i & 0x03) << 6) | ((i & 0x0c) << 2) | ((i & 0x30) >> 2) | ((i & 0xc0) >> 6));
    }

    // packed base tables
    for (int i = 0; i < 256; i++) {
        packedValueBase[i] = i < 4 ? 'N' : "AGCT"[i >> 6];
        packedValueBaseRC[i] = i < 4 ? 'N' : "TCGA"[i >> 6];
    }

    memset(packedBaseValue, 0, sizeof(packedBaseValue));
    packedBaseValue['A'] = packedBaseValue['a'] = 0x00;
    packedBaseValue['G'] = packedBaseValue['g'] = 0x40;
    packedBaseValue['C'] = packedBaseValue['c'] = (char) 0x80;
    packedBaseValue['T'] = packedBaseValue['t'] = (char) 0xc0;
    
    memset(packedQualityMask, 0, 4);
    memset(packedQualityMask + 4, 0x3f, sizeof(packedQualityMask) - 4);

    for (unsigned i = 0; i < 256; i++) {
        isLowerCaseOrDot[i] = 0;
        toUpperCaseDotToN[i] = i;
    }
    for (unsigned i = 0x61; i <= 0x7a; i++) {
        isLowerCaseOrDot[i] = 1;
        toUpperCaseDotToN[i] = i - 0x20;
    }

    isLowerCaseOrDot['.'] = 1;
    toUpperCaseDotToN['.'] = 'N';

    for (unsigned i = 0; i < 256; i++) {
        cigarQualToSam[i] = i > ('~' - '!') ? '!' : '!' + i;
    }
}