File: HSP.h

package info (click to toggle)
soapaligner 2.20-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 768 kB
  • sloc: ansic: 10,051; makefile: 236
file content (126 lines) | stat: -rw-r--r-- 3,719 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
/*

   HSP.h		BWTBlastn functions

   This module contains miscellaneous BWTBlastn functions.

   Copyright (C) 2004, Wong Chi Kwong.

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
   as published by the Free Software Foundation; either version 2
   of the License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

*/

#ifndef __HSP_H__
#define __HSP_H__

#include "TypeNLimit.h"
#include "MemManager.h"
#include "TextConverter.h"

#define ALPHABET_SIZE				4
#define BIT_PER_CHAR				2
#define CHAR_PER_128				64
#define CHAR_PER_WORD				16
#define CHAR_PER_BYTE				4

#define MAX_ALIGNMENT_LENGTH	131072
#define SHORTEST	70

typedef struct _ChrBlock{
	int chrID;
	unsigned int blockStart;
	unsigned int blockEnd;
	unsigned int ori;
}ChrBlock;

typedef struct _NewAnnotation{
	char chrName[MAX_SEQ_NAME_LENGTH];
	int nameLen;
	unsigned int chrStart;
	unsigned int chrEnd;
	int blockNum;
	ChrBlock *blockInChr;
}NewAnnotation;

typedef struct Annotation {
	int gi;
	char text[MAX_SEQ_NAME_LENGTH+1];
} Annotation;

typedef struct HSP {
	unsigned int* packedDNA;
	int chrNum;
	char **chrName;
	int numOfBlock;
	ChrBlock *blockList;
	unsigned int dnaLength;
}HSP;

#define MAX_SEQ_NAME_LENGTH				256

#define MAX_HISTO_SIZE					256

#define INVALID_CHAR_INDEX				15

#define ALIGN_MATCH					0
#define ALIGN_MISMATCH_AMBIGUITY	1
#define ALIGN_INSERT				2
#define ALIGN_DELETE				3

#define ALIGN_PER_WORD				16
#define ALIGN_BIT					2

#define AUX_TEXT_PER_WORD			8
#define AUX_TEXT_BIT				4

static const char lowercaseDnaCharIndex = 14;	// Seems that BLAST treat masked characters as 'N' (still have 1/4 chance of matching)
static const char nonMatchDnaCharIndex  = 15;
static const char dnaChar[16]			= {'A', 'C', 'G', 'T', 'M', 'R', 'S', 'V', 'W', 'Y', 'H', 'K', 'D', 'B', 'N', 'L'};
static const char dnaComplement[16]		= {'T', 'G', 'C', 'A', 'K', 'Y', 'S', 'B', 'W', 'R', 'D', 'M', 'H', 'V', 'N', 'L'};
static const char ambiguityCount[16]    = { 1 ,  1 ,  1 ,  1 ,  2 ,  2 ,  2 ,  3 ,  2 ,  2 ,  3 ,  2 ,  3 ,  3 ,  4 ,  0 };
static const char ambiguityMatch[16][4] = {{0, 0, 0, 0},
	{1, 0, 0, 0},
	{2, 0, 0, 0},
	{3, 0, 0, 0},
	{0, 1, 0, 0},
	{0, 2, 0, 0},
	{1, 2, 0, 0},
	{0, 1, 2, 0},
	{0, 3, 0, 0},
	{1, 3, 0, 0},
	{0, 1, 3, 0},
	{2, 3, 0, 0},
	{0, 2, 3, 0},
	{1, 2, 3, 0},
	{0, 1, 2, 3},
	{0, 0, 0, 0}
};

// Map must be allocated with char[256]
void HSPFillCharMap(unsigned char *charMap);
void HSPFillComplementMap(unsigned char *complementMap);

HSP *HSPLoad(MMPool *mmPool, const char *PackedDNAFileName, const char *AnnotationFileName);
HSP *HSPConvertFromText(MMPool *mmPool, const unsigned char *text, const unsigned int textLength,
						const unsigned int FASTARandomSeed, const int maskLowerCase,
						const int gi, const char *seqName);
void HSPFree(MMPool *mmPool, HSP *hsp);

unsigned int HSPParseFASTAToPacked(const char* FASTAFileName, const char* annotationFileName, const char* packedDNAFileName, const char* ambiguityFileName,
					  const unsigned int FASTARandomSeed, const int maskLowerCase);
unsigned int HSPPackedToFASTA(const char* FASTAFileName, const char* annotationFileName, const char* packedDNAFileName, const char* ambiguityFileName);


#endif