File: finddiagsn.cpp

package info (click to toggle)
muscle 3.60-1
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 1,384 kB
  • ctags: 2,079
  • sloc: cpp: 26,452; xml: 185; makefile: 101
file content (152 lines) | stat: -rw-r--r-- 3,457 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#include "muscle.h"
#include "profile.h"
#include "diaglist.h"

#define TRACE	0

#define pow4(i)	(1 << (2*i))	// 4^i = 2^(2*i)
const unsigned K = 7;
const unsigned KTUPS = pow4(K);
static unsigned TuplePos[KTUPS];

static char *TupleToStr(int t)
	{
	static char s[K];

	for (int i = 0; i < K; ++i)
		{
		unsigned Letter = (t/(pow4(i)))%4;
		assert(Letter >= 0 && Letter < 4);
		s[K-i-1] = LetterToChar(Letter);
		}

	return s;
	}

static unsigned GetTuple(const ProfPos *PP, unsigned uPos)
	{
	unsigned t = 0;

	for (unsigned i = 0; i < K; ++i)
		{
		const unsigned uLetter = PP[uPos+i].m_uResidueGroup;
		if (RESIDUE_GROUP_MULTIPLE == uLetter)
			return EMPTY;
		t = t*4 + uLetter;
		}

	return t;
	}

void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
  unsigned uLengthY, DiagList &DL)
	{
	if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)
		Quit("FindDiagsNuc: requires nucleo alphabet");

	DL.Clear();

// 16 is arbitrary slop, no principled reason for this.
	if (uLengthX < K + 16 || uLengthY < K + 16)
		return;

// Set A to shorter profile, B to longer
	const ProfPos *PA;
	const ProfPos *PB;
	unsigned uLengthA;
	unsigned uLengthB;
	bool bSwap;
	if (uLengthX < uLengthY)
		{
		bSwap = false;
		PA = PX;
		PB = PY;
		uLengthA = uLengthX;
		uLengthB = uLengthY;
		}
	else
		{
		bSwap = true;
		PA = PY;
		PB = PX;
		uLengthA = uLengthY;
		uLengthB = uLengthX;
		}

#if	TRACE
	Log("FindDiagsNuc(LengthA=%d LengthB=%d\n", uLengthA, uLengthB);
#endif

// Build tuple map for the longer profile, B
	if (uLengthB < K)
		Quit("FindDiags: profile too short");

	memset(TuplePos, EMPTY, sizeof(TuplePos));

	for (unsigned uPos = 0; uPos < uLengthB - K; ++uPos)
		{
		const unsigned uTuple = GetTuple(PB, uPos);
		if (EMPTY == uTuple)
			continue;
		TuplePos[uTuple] = uPos;
		}

// Find matches
	for (unsigned uPosA = 0; uPosA < uLengthA - K; ++uPosA)
		{
		const unsigned uTuple = GetTuple(PA, uPosA);
		if (EMPTY == uTuple)
			continue;
		const unsigned uPosB = TuplePos[uTuple];
		if (EMPTY == uPosB)
			continue;

	// This tuple is found in both profiles
		unsigned uStartPosA = uPosA;
		unsigned uStartPosB = uPosB;

	// Try to extend the match forwards
		unsigned uEndPosA = uPosA + K - 1;
		unsigned uEndPosB = uPosB + K - 1;
		for (;;)
			{
			if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)
				break;
			const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;
			if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)
				break;
			const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;
			if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)
				break;
			if (uAAGroupA != uAAGroupB)
				break;
			++uEndPosA;
			++uEndPosB;
			}
		uPosA = uEndPosA;

#if	TRACE
		{
		Log("Match: A %4u-%4u   ", uStartPosA, uEndPosA);
		for (unsigned n = uStartPosA; n <= uEndPosA; ++n)
			Log("%c", LetterToChar(PA[n].m_uResidueGroup));
		Log("\n");
		Log("       B %4u-%4u   ", uStartPosB, uEndPosB);
		for (unsigned n = uStartPosB; n <= uEndPosB; ++n)
			Log("%c", LetterToChar(PB[n].m_uResidueGroup));
		Log("\n");
		}
#endif

		const unsigned uLength = uEndPosA - uStartPosA + 1;
		assert(uEndPosB - uStartPosB + 1 == uLength);

		if (uLength >= g_uMinDiagLength)
			{
			if (bSwap)
				DL.Add(uStartPosB, uStartPosA, uLength);
			else
				DL.Add(uStartPosA, uStartPosB, uLength);
			}
		}
	}