File: select_pattern.c

package info (click to toggle)
garlic 1.6-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 4,516 kB
  • sloc: ansic: 52,465; makefile: 2,254
file content (274 lines) | stat: -rw-r--r-- 8,549 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
/* Copyright (C) 2003 Damir Zucic */

/*=============================================================================

				select_pattern.c

Purpose:
	Select sequence pattern. Some residues are similar; pattern allows
	the specification of a number of residues at a given place of some
	sequence fragment.  Example: GLU GLN ASP ASN ARG LYS / ALA / TYR ;
	this pattern  represents a fragment of  three residues.  The first
	one may be GLU, GLN, ASP, ASN, ARG or LYS while the second and the
	third must be  ALA and TYR.  The maximal number of residues in the
	given sequence pattern  is defined in  defines.h  (MAXPATTLENGTH).

Input:
	(1) Pointer to MolComplexS structure, with macromol. complexes.
	(2) Number of macromolecular complexes.
	(3) Pointer to RuntimeS structure (the pattern buffer is there).
	(4) Selection mode index  (0 = overwrite, 1 = restrict, 2 = expand
	    previous selection).

Output:
	(1) The flag  selectedF  will be set to one  for selected atoms in
	    every caught macromolecular complex.
	(2) Return value.

Return value:
	(1) The number of selected atoms (zero or positive value).

Notes:
	(1) Indentation is exceptionally 4 spaces.

========includes:============================================================*/

#include <stdio.h>

#include <string.h>

#include <X11/Xlib.h>
#include <X11/Xutil.h>
#include <X11/Xos.h>
#include <X11/Xatom.h>

#include "defines.h"
#include "typedefs.h"

/*======function prototypes:=================================================*/


/*======select sequence pattern:=============================================*/

long SelectPattern_ (MolComplexS *mol_complexSP, int mol_complexesN,
		     RuntimeS *runtimeSP, int selection_modeI)
{
long		selected_atomsN = 0;
int		max_length;
int		window_width, windowI;
int		mol_complexI;
MolComplexS	*curr_mol_complexSP;
int		atomsN, atomI;
AtomS		*curr_atomSP;
int		residuesN, max_residueI, residueI;
int		combinedI;
ResidueS	*curr_residueSP;
AtomS		*first_atomSP;
char		*checked_nameP, *acceptable_nameP;
int		name_offset;
int		score;
int		nameI;
int		first_atomI, last_atomI;

/* The maximal residue name length: */
max_length = RESNAMESIZE - 1;

/* Copy and check the number of residues in the sequence pattern: */
window_width = runtimeSP->pattern_length;
if (window_width == 0) return (long) 0;

/* Scan all caught macromol. complexes and search the specified pattern: */
for (mol_complexI = 0; mol_complexI < mol_complexesN; mol_complexI++)
    {
    /* Pointer to the current macromolecular complex: */
    curr_mol_complexSP = mol_complexSP + mol_complexI;

    /* Check is the current macromolecular complex caught: */
    if (curr_mol_complexSP->catchF == 0) continue;

    /* Number of atoms in a macromolecular complex: */
    atomsN = curr_mol_complexSP->atomsN;
    if (atomsN == 0) continue;

    /* Copy and check the number of residues: */
    residuesN = curr_mol_complexSP->residuesN;
    if (residuesN == 0) continue;

    /* The maximal residue index: */
    max_residueI = residuesN - window_width;

    /* Backup the current selection if selection mode is restrict: */
    if (selection_modeI == 1)
	{
	for (atomI = 0; atomI < atomsN; atomI++)
	    {
	    /* Pointer to the current atom: */
	    curr_atomSP = curr_mol_complexSP->atomSP + atomI;

	    /* Copy the selection flag: */
	    curr_atomSP->previous_selectedF = curr_atomSP->selectedF;
	    }
	}

    /* Unselect everything if selection mode is overwrite or restrict: */
    if ((selection_modeI == 0) || (selection_modeI == 1))
	{
	for (atomI = 0; atomI < atomsN; atomI++)
	    {
	    /* Pointer to the current atom: */
	    curr_atomSP = curr_mol_complexSP->atomSP + atomI;

	    /* Unselect the current atom: */
	    curr_atomSP->selectedF = 0;
	    }
	}

    /* Scan the residues of the current macromolecular complex: */
    for (residueI = 0; residueI <= max_residueI; residueI++)
	{
	/* Reset the score: */
	score = 0;

	/* Reset  the name offset.  The name offset  helps  to find  the */
	/* first name from the specified set. When the value is equal to */
	/* zero,  it is the offset of the first name from the first set. */
	name_offset = 0;

	/* Scan the given number of residues, */
	/* starting from the current residue: */
	for (windowI = 0; windowI < window_width; windowI++)
	    {
	    /* Prepare and check the combined index: */
	    combinedI = residueI + windowI;
	    if (combinedI >= residuesN) break;

	    /* Residue associated with the combined index: */
	    curr_residueSP = curr_mol_complexSP->residueSP + combinedI;

	    /* Pointer to the first atom of current residue: */
	    first_atomSP = curr_mol_complexSP->atomSP +
			   curr_residueSP->residue_startI;

	    /* Pointer to the name of  the current */
	    /* residue from the current structure: */
	    checked_nameP = first_atomSP->raw_atomS.pure_residue_nameA;

	    /* Scan the set of names.  Name sets are building blocks of */
	    /* the pattern.  The pattern length  is equal to  the total */
	    /* number of sets. The window width is equal to the pattern */
	    /* length.  The current name set is defined by the windowI. */
	    for (nameI = 0; nameI < runtimeSP->namesNA[windowI]; nameI++)
		{
		/* Pointer to the name from the set of names: */
		acceptable_nameP = runtimeSP->patternP +
				   max_length * (name_offset + nameI);

		/* Check for wildcard: */
		if (*acceptable_nameP == '*')
		    {
		    /* Increment score, wildcard matches any residue name: */
		    score++;

		    /* The number of names in this set should */
		    /* be equal to one (see parse_pattern.c); */
		    /* this command was not really necessary: */
		    break;
		    }

		/* If this point is reached, no wildcard was */
		/* found.  Compare the current  residue name */
		/* with the given name from the current set: */
		if (strncmp (acceptable_nameP, checked_nameP, max_length) == 0)
		    {
		    /* Increment the score: */
		    score++;

		    /* Break from the loop, one match is enough: */
		    break;
		    }

		/* End of nameI loop: */
		}

	    /* Update the name offset.  This is the offset of  the first */
	    /* name from the next set. It will be used in the next step. */
	    name_offset += runtimeSP->namesNA[windowI];

	    /* End of windowI loop: */
	    }

	/* Check the total score.  If the sum of  the total score and */
	/* the tolerance is equal to the window width or even larger, */
	/* the current  sequence fragment  matches the given pattern. */
	if (score + runtimeSP->pattern_tolerance >= window_width)
	    {
	    /* Scan the window again and select all atoms from this window: */
	    for (windowI = 0; windowI < window_width; windowI++)
		{
		/* Prepare and check the combined index: */
		combinedI = residueI + windowI;
		if (combinedI >= residuesN) break;

		/* Residue associated with the combined index: */
		curr_residueSP = curr_mol_complexSP->residueSP + combinedI;

		/* The indices of the first  and the */
		/* last atom of the current residue: */
		first_atomI = curr_residueSP->residue_startI;
		last_atomI  = curr_residueSP->residue_endI;

		/* Select all atoms of the current residue: */
		for (atomI = first_atomI; atomI <= last_atomI; atomI++)
		    {
		    /* Pointer to the current atom: */
		    curr_atomSP = curr_mol_complexSP->atomSP + atomI;

		    /* Select the current atom: */
		    curr_atomSP->selectedF = 1;

		    /* Update the counter of selected atoms: */
		    selected_atomsN++;
		    }
		}
	    }

	/* End of residueI loop: */
	}

    /* Combine  the current selection  with the */
    /* previous, if selection mode is restrict: */
    if (selection_modeI == 1)
	{
	/* Reset the counter of selected atoms. The */
	/* selected atoms should be  counted again. */
	selected_atomsN = 0;

	/* Combine the old and the new selection flag: */
	for (atomI = 0; atomI < atomsN; atomI++)
	    {
	    /* Pointer to the current atom: */
	    curr_atomSP = curr_mol_complexSP->atomSP + atomI;

	    /* Combine selection flags: */
	    curr_atomSP->selectedF *= curr_atomSP->previous_selectedF;

	    /* Check the selection flag;  increase */
	    /* the count of  selected  residues if */
	    /* the selection flag is equal to one: */
	    if (curr_atomSP->selectedF) selected_atomsN++;
	    }
	}

    /* Update the position_changedF (some atoms may have bad color): */
    curr_mol_complexSP->position_changedF = 1;

    /* End of mol_complexI loop: */
    }

/* Return the number of selected atoms: */
return selected_atomsN;
}

/*===========================================================================*/