File: extract_sequence.c

package info (click to toggle)
garlic 1.4-1
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 4,192 kB
  • ctags: 1,368
  • sloc: ansic: 49,603; makefile: 1,079
file content (154 lines) | stat: -rw-r--r-- 4,772 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
/* Copyright (C) 2000, 2001 Damir Zucic */

/*=============================================================================

				extract_sequence.c

Purpose:
	Extract  sequence  information: use pure residue names, i.e. ignore
	whitespace. Up to three characters are available in the PDB version
	2.1 file format. In addition, add residue array index to each atom.

Input:
	(1) Pointer to MolComplexS structure, with macromolecular data.

Output:
	(1) An array of ResidueS structures allocated and initialized.
	(2) Return value.

Return value:
	(1) Positive on success.
	(2) Zero, if complex contains no atoms.
	(3) Negative on failure.

========includes:============================================================*/

#include <stdio.h>

#include <stdlib.h>

#include <X11/Xlib.h>
#include <X11/Xutil.h>
#include <X11/Xos.h>
#include <X11/Xatom.h>

#include "defines.h"
#include "typedefs.h"

/*======function prototypes:=================================================*/

size_t		CountResidues_ (MolComplexS *);
void		ErrorMessage_ (char *, char *, char *,
			       char *, char *, char *, char *);

/*======extract sequence information:========================================*/

int ExtractSequence_ (MolComplexS *mol_complexSP)
{
size_t		atomsN, atomI;
size_t		residuesN, residue_arrayI = 0;
size_t		struct_size, elementsN;
AtomS		*curr_atomSP;
int		previous_residueI, current_residueI;
int		previous_insertion_code, current_insertion_code;
ResidueS	*current_residueSP;
ResidueS	*previous_residueSP;

/* Initialize the number of residues: */
mol_complexSP->residuesN = 0;

/* Return zero if there are no atoms: */
atomsN = mol_complexSP->atomsN;
if (atomsN == 0) return 0;

/* Count residues; if there is nothing, return negative value: */
residuesN = CountResidues_ (mol_complexSP);
if (residuesN == 0) return -1;

/* Store the number of residues: */
mol_complexSP->residuesN = residuesN;

/* Allocate memory for sequence information and other associated data: */
struct_size = sizeof (ResidueS);
elementsN = residuesN + 100;
mol_complexSP->residueSP = (ResidueS *) calloc (elementsN, struct_size);
if (mol_complexSP->residueSP == NULL)
	{
	ErrorMessage_ ("garlic", "ExtractSequence_", "",
		       "Failed to allocate memory for ResidueS array!\n",
		       "", "", "");
	return -2;
	}

/* The first atom has special treatment: */
curr_atomSP = mol_complexSP->atomSP;
curr_atomSP->residue_arrayI = 0;
previous_residueI = curr_atomSP->raw_atomS.residue_sequenceI;
previous_insertion_code = curr_atomSP->raw_atomS.residue_insertion_code;

/* Extract residue information, but skip the first atom: */
for (atomI = 1; atomI < atomsN; atomI++)
	{
	/* Pointer to raw atomic data: */
	curr_atomSP = mol_complexSP->atomSP + atomI;

	/* Copy the residue sequence number and residue insertion code: */
	current_residueI = curr_atomSP->raw_atomS.residue_sequenceI;
	current_insertion_code = curr_atomSP->raw_atomS.residue_insertion_code;

	/* Compare the current residue sequence number with the old one */
	/* and  the current residue  insertion code  with the old code. */

	/* If this atom belongs to the same residue as the previous */
	/* one, set the residue array index and take the next atom: */
	if ((current_residueI == previous_residueI) &&
	    (current_insertion_code == previous_insertion_code))
		{
		curr_atomSP->residue_arrayI = residue_arrayI;
		continue;
		}

	/* If this atom  does not belong to  the same residue as */
	/* the previous one,  increment  the residue array index */
	/* by one and assign the incremented value to this atom: */
	else
		{
		residue_arrayI++;
		curr_atomSP->residue_arrayI = residue_arrayI;
		}

	/* If this point is reached, a new residue is found. */

	/* Prepare the pointer to the current residue: */
	current_residueSP = mol_complexSP->residueSP + residue_arrayI;

	/* The residue start index for the current residue: */
	current_residueSP->residue_startI = atomI;

	/* The initial (dummy) residue end index for the current residue: */
	current_residueSP->residue_endI = atomI;

	/* The residue end index for the previous residue: */
	if (residue_arrayI != 0)
		{
		previous_residueSP = mol_complexSP->residueSP +
				     residue_arrayI - 1;
		if (atomI != 0) previous_residueSP->residue_endI = atomI - 1;
		}

	/* Copy the residue sequence index and residue insertion code: */
	previous_residueI = current_residueI;
	previous_insertion_code = current_insertion_code;
	}

/* The residue end index for the last residue: */
previous_residueSP = mol_complexSP->residueSP + residue_arrayI;
if (atomI != 0) previous_residueSP->residue_endI = atomI - 1;

/* If this point is reached, return positive value (success indicator): */
return 1;
}

/*===========================================================================*/