File: gblast.h

package info (click to toggle)
glam2 1064-9
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid
  • size: 956 kB
  • sloc: ansic: 6,925; xml: 757; asm: 74; makefile: 54; sh: 11
file content (86 lines) | stat: -rw-r--r-- 2,778 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
/* gblast.h - generic blast program. */
#if !defined (GBLAST)
#define GBLAST
#include <time.h>
#include "afnio.h"
#include "random.h"
#include "residues.h"
#include "alphabet.h"
#include "dheap.h"
#include "sequence.h"
#include "mlist.h"

/*********************** Finite State Machine ************************
FSM = (Q,q0,A,Sigma,d)
	Q = States
	q0 e Q = start state
	A subset Q = accepting states
	Sigma = input alphabet
	d = function from Q x Sigma -> Q (transition function)

if in an accepting state then execute appropriate action.
	- go to positions on list and try extending alignment

input text T[1..n];  pattern to be found P[1..m]
 n = input string length; m = pattern length 

	(note: lex is a FSM)

  input tokens = A-Y and '-' 'x' '*' '\0'

 if q = a then  go to query sequence:
   pos[q][1..n][A...Y] = list of positions matching pattern in accepting 
	state = NULL if not an accepting state.

  blast method:
	1. compile list of high scoring words and make fsm.
	2. scan database for hits.
	3. extend hits.
(for purge extend only until find that score >= cutoff.)

	QWL
	..:  -> S = R(Q,N) + R(W,Y) + R(L,L).
	NYL	
		if(S > Threshold) then extend hit to find MSP.

		need drop-score.
 *********************************************************************/

/*************************** generic gblast type **************************/
typedef struct {
	long	nQ;		/** number of States **/
	long	**d;		/** d[q][r] = function from Q x A -> Q **/
	ml_type	*pos;		/* lists for accept */
	long	*tmp;		/* temporary list */
	long	T;
	a_type  A;		/* alphabet */
	e_type  E;              /* query sequence */
} gblast_type;
typedef gblast_type *gb_typ;
/*********************************************************************/
/******************************* private *******************************/
Boolean FastExtendGBlast(e_type E1, long i1, e_type E2, long i2,
        register int **R, long score);
long     gblast_error(char *s);

/******************************* PUBLIC *******************************/
gb_typ	MakeGBlast(long T, e_type E, a_type A) ;
long     MatcherGBlastOffset(e_type E, gb_typ B, long *os);
void    NilGBlast(gb_typ B);
long     MatcherGBlast(FILE *fptr, e_type E, gb_typ B);
Boolean FastMatcherGBlast(e_type E, gb_typ B, long score);
long     ExtendGBlast(e_type E1, long i1, e_type E2, long i2, a_type A);
/*********************************************************************/

/* CODES */

/* CONSTANTS */
#define	USAGE2_START	"USAGE: fsm seq1 seq2 [options]\n\
   options:\n\
     [-l]	      - eliminate low complexity sequences\n\
     [-f]	      - create output file for sequence with repeats\n\
     [-F]	      - create output file for sequence without repeats\n\
     [-e<float>]      - E value cutoff for output of sequences\n"

#endif