File: diffobjscore.cpp

package info (click to toggle)
muscle 3.60-1
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 1,384 kB
  • ctags: 2,079
  • sloc: cpp: 26,452; xml: 185; makefile: 101
file content (162 lines) | stat: -rw-r--r-- 4,461 bytes parent folder | download | duplicates (13)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#include "muscle.h"
#include "msa.h"
#include "objscore.h"
#include "profile.h"

#define TRACE				0
#define COMPARE_3_52		0
#define BRUTE_LETTERS		0

static SCORE ScoreColLetters(const MSA &msa, unsigned uColIndex)
	{
	SCOREMATRIX &Mx = *g_ptrScoreMatrix;
	const unsigned uSeqCount = msa.GetSeqCount();

#if	BRUTE_LETTERS
	SCORE BruteScore = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);
		if (uLetter1 >= g_AlphaSize)
			continue;
		WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
			{
			unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);
			if (uLetter2 >= g_AlphaSize)
				continue;
			WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
			BruteScore += w1*w2*Mx[uLetter1][uLetter2];
			}
		}
#endif
	
	double N = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
		N += w;
		}
	if (N <= 0)
		return 0;

	FCOUNT Freqs[20];
	memset(Freqs, 0, sizeof(Freqs));
	SCORE Score = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		unsigned uLetter = msa.GetLetterEx(uSeqIndex1, uColIndex);
		if (uLetter >= g_AlphaSize)
			continue;
		WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
		Freqs[uLetter] += w;
		Score -= w*w*Mx[uLetter][uLetter];
		}

	for (unsigned uLetter1 = 0; uLetter1 < g_AlphaSize; ++uLetter1)
		{
		const FCOUNT f1 = Freqs[uLetter1];
		Score += f1*f1*Mx[uLetter1][uLetter1];
		for (unsigned uLetter2 = uLetter1 + 1; uLetter2 < g_AlphaSize; ++uLetter2)
			{
			const FCOUNT f2 = Freqs[uLetter2];
			Score += 2*f1*f2*Mx[uLetter1][uLetter2];
			}
		}
	Score /= 2;
#if	BRUTE_LETTERS
	assert(BTEq(BruteScore, Score));
#endif
	return Score;
	}

static SCORE ScoreLetters(const MSA &msa, const unsigned Edges[],
  unsigned uEdgeCount)
	{
	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();

// Letters
	SCORE Score = 0;
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const unsigned uColIndex = Edges[uEdgeIndex];
		assert(uColIndex < uColCount);
		Score += ScoreColLetters(msa, uColIndex);
		}
	return Score;
	}

void GetLetterScores(const MSA &msa, SCORE Scores[])
	{
	const unsigned uColCount = msa.GetColCount();
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		Scores[uColIndex] = ScoreColLetters(msa, uColIndex);
	}

SCORE DiffObjScore(
  const MSA &msa1, const PWPath &Path1, const unsigned Edges1[], unsigned uEdgeCount1, 
  const MSA &msa2, const PWPath &Path2, const unsigned Edges2[], unsigned uEdgeCount2)
	{
#if	TRACE
	{
	Log("============DiffObjScore===========\n");
	Log("msa1:\n");
	msa1.LogMe();
	Log("\n");
	Log("Cols1: ");
	for (unsigned i = 0; i < uEdgeCount1; ++i)
		Log(" %u", Edges1[i]);
	Log("\n\n");
	Log("msa2:\n");
	msa2.LogMe();
	Log("Cols2: ");
	for (unsigned i = 0; i < uEdgeCount2; ++i)
		Log(" %u", Edges2[i]);
	Log("\n\n");
	}
#endif

#if	COMPARE_3_52
	extern SCORE g_SPScoreLetters;
	extern SCORE g_SPScoreGaps;
	SCORE SP1 = ObjScoreSP(msa1);
	SCORE SPLetters1 = g_SPScoreLetters;
	SCORE SPGaps1 = g_SPScoreGaps;

	SCORE SP2 = ObjScoreSP(msa2);
	SCORE SPLetters2 = g_SPScoreLetters;
	SCORE SPGaps2 = g_SPScoreGaps;
	SCORE SPDiffLetters = SPLetters2 - SPLetters1;
	SCORE SPDiffGaps = SPGaps2 - SPGaps1;
	SCORE SPDiff = SPDiffLetters + SPDiffGaps;
#endif

	SCORE Letters1 = ScoreLetters(msa1, Edges1, uEdgeCount1);
	SCORE Letters2 = ScoreLetters(msa2, Edges2, uEdgeCount2);

	SCORE Gaps1 = ScoreGaps(msa1, Edges1, uEdgeCount1);
	SCORE Gaps2 = ScoreGaps(msa2, Edges2, uEdgeCount2);

	SCORE DiffLetters = Letters2 - Letters1;
	SCORE DiffGaps = Gaps2 - Gaps1;
	SCORE Diff = DiffLetters + DiffGaps;

#if	COMPARE_3_52
	Log("ObjScoreSP    Letters1=%.4g  Letters2=%.4g  DiffLetters=%.4g\n",
	  SPLetters1, SPLetters2, SPDiffLetters);

	Log("DiffObjScore  Letters1=%.4g  Letters2=%.4g  DiffLetters=%.4g\n",
	  Letters1, Letters2, DiffLetters);

	Log("ObjScoreSP    Gaps1=%.4g  Gaps2=%.4g  DiffGaps=%.4g\n",
	  SPGaps1, SPGaps2, SPDiffGaps);

	Log("DiffObjScore  Gaps1=%.4g  Gaps2=%.4g  DiffGaps=%.4g\n",
	  Gaps1, Gaps2, DiffGaps);

	Log("SP diff=%.4g DiffObjScore Diff=%.4g\n", SPDiff, Diff);
#endif

	return Diff;
	}