File: BaseAsciiMap.cpp

package info (click to toggle)
libstatgen 1.0.15-8
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,588 kB
  • sloc: cpp: 49,624; ansic: 1,408; makefile: 320; sh: 60
file content (145 lines) | stat: -rw-r--r-- 7,413 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/*
 *  Copyright (C) 2010  Regents of the University of Michigan
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "BaseAsciiMap.h"

//
// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
// both base and color space.
//  class 0 -> 'A' (Adenine - 0x41 and 0x61)
//  class 1 -> 'C' (Cytosine - 0x43 and 0x63)
//  class 2 -> 'G' (Guanine - 0x47 and 0x67)
//  class 3 -> 'T' (Thymine - 0x54 and 0x74)
//  class 4 -> 'N' (Unknown - read error or incomplete data - 0x4E and 0x6E)
//  class 5 -> not a valid DNA base pair character
//
// Note: The +1 array size is for the terminating NUL character
//
// NB: This table also maps 0, 1, 2, and 3 to the corresponding integers,
// and '.' to class 4.  This allows ABI SOLiD reads to be converted
// to integers via ReadIndexer::Word2Integer.
//
unsigned char BaseAsciiMap::baseColor2int[256+1] =
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x00-0x0F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x10-0x1F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\004\005"  // 0x20-0x2F
    "\000\001\002\003\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x30-0x3F
    "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005"  // 0x40-0x4F
    "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005"  // 0x50-0x5F
    "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005"  // 0x60-0x6F
    "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005"  // 0x70-0x7F
// not used, but included for completeness:
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x80-0x8F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x90-0x9F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xA0-0xAF
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xB0-0xBF
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xC0-0xCF
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xD0-0xDF
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xE0-0xEF
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xF0-0xFF
    ;

// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
// just base space (ACTGNactgn).
unsigned char BaseAsciiMap::base2int[256+1] =
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x00-0x0F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x10-0x1F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x20-0x2F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x30-0x3F
    "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005"  // 0x40-0x4F
    "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005"  // 0x50-0x5F
    "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005"  // 0x60-0x6F
    "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005"  // 0x70-0x7F
// not used, but included for completeness:
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x80-0x8F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x90-0x9F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xA0-0xAF
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xB0-0xBF
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xC0-0xCF
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xD0-0xDF
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xE0-0xEF
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xF0-0xFF
    ;

// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
// just color space (0123).
unsigned char BaseAsciiMap::color2int[256+1] =
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x00-0x0F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x10-0x1F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\004\005"  // 0x20-0x2F
    "\000\001\002\003\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x30-0x3F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x40-0x4F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x50-0x5F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x60-0x6F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x70-0x7F
// not used, but included for completeness:
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x80-0x8F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x90-0x9F
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xA0-0xAF
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xB0-0xBF
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xC0-0xCF
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xD0-0xDF
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xE0-0xEF
    "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xF0-0xFF
    ;


//
// This is obviously for base space use only:
//
const char BaseAsciiMap::int2base[] = "ACGTNMXXXXXXXXXX";
//
// convert int to color space value
//
const char BaseAsciiMap::int2colorSpace[] = "0123NXXXXXXXXXXX";

/// This table maps 5' base space to the 3' complement base space
/// values, as well as 5' color space values to the corresponding
/// 3' complement color space values.
///
/// In both cases, invalids are mapped to 'N', which isn't accurate
/// for ABI SOLiD, but internally it shouldn't matter (on output it
/// will).
unsigned char BaseAsciiMap::base2complement[256+1 /* for NUL char */] =
    "NNNNNNNNNNNNNNNN"  // 0x00-0x0F
    "NNNNNNNNNNNNNNNN"  // 0x10-0x1F
    "NNNNNNNNNNNNNNNN"  // 0x20-0x2F
    "0123NNNNNNNNNNNN"  // 0x30-0x3F
    "NTNGNNNCNNNNNNNN"  // 0x40-0x4F
    "NNNNANNNNNNNNNNN"  // 0x50-0x5F
    "NTNGNNNCNNNNNNNN"  // 0x60-0x6F
    "NNNNANNNNNNNNNNN"  // 0x70-0x7F
// not used, but included for completeness:
    "NNNNNNNNNNNNNNNN"  // 0x80-0x8F
    "NNNNNNNNNNNNNNNN"  // 0x90-0x9F
    "NNNNNNNNNNNNNNNN"  // 0xA0-0xAF
    "NNNNNNNNNNNNNNNN"  // 0xB0-0xBF
    "NNNNNNNNNNNNNNNN"  // 0xC0-0xCF
    "NNNNNNNNNNNNNNNN"  // 0xD0-0xDF
    "NNNNNNNNNNNNNNNN"  // 0xE0-0xEF
    "NNNNNNNNNNNNNNNN"  // 0xF0-0xFF
    ;

BaseAsciiMap::BaseAsciiMap()
        : myNumPrimerBases(1)
{
    myBase2IntMapPtr = NULL;
}

BaseAsciiMap::~BaseAsciiMap()
{
}