File: pdb_atom.c

package info (click to toggle)
garlic 1.6-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 4,516 kB
  • sloc: ansic: 52,465; makefile: 2,254
file content (308 lines) | stat: -rw-r--r-- 10,939 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
/* Copyright (C) 2000-2003 Damir Zucic */

/*=============================================================================

				pdb_atom.c

Purpose:
	Parse the line  (string) with  atomic  coordinates;  store data to
	AtomS structure.  ATOM or  HETATM line is  expected as input. This
	function  contains detailed format  specification  for lines  with
	atomic data.  If updating garlic program for  the newer version of
	PDB  format, modify  this  function.  Check  AtomS  definition  in
	typedefs.h file and all functions which use atomic data.  Add some
	data if necessary but  try to avoid  removal or change of original
	data.
	
Input:
	(1) Pointer to AtomS structure,  where atomic data will be stored.
	(2) Input line (string), read from PDB file.

Output:
	(1) Data stored to AtomS structure.
	(2) Return value.

Return value:
	(1) Positive on success.
	(3) Negative on failure.

Notes:
	(1) The first  character in  PDB line has  index  1  (one).  The C
	    programming language  has  0  (zero) as the index of the first
	    array element.  Therefore each position listed  in  this  file
	    is shifted for one compared to indices from PDB specification:

	    C_language_index = PDB_column_index - 1

	(2) PDB format has certain  drawbacks:  the width of the field for
	    atom serial  number is too small, so the format may be changed
	    soon (Damir Zucic, Feb 18, 1999).

	(3) Some structures  deposited at PDB (http://www.pdb.bnl.gov) are
	    not compliant with the format specification used here!

	(4) The input line  is taken into account if it is  long enough to
	    contain all fields preceding occupancy.  If missing, occupancy
	    and temperature factor are set to zero.

========includes:============================================================*/

#include <stdio.h>

#include <string.h>
#include <ctype.h>

#include <X11/Xlib.h>
#include <X11/Xutil.h>
#include <X11/Xos.h>
#include <X11/Xatom.h>

#include "defines.h"
#include "typedefs.h"

/*======function prototypes:=================================================*/

void		ErrorMessage_ (char *, char *, char *,
			       char *, char *, char *, char *);
int		ExtractField_ (char *, int, int, char *);
void		CopyPurified_ (char *, char *);

/*======parse PDB ATOM or HETATM line:=======================================*/

int ParsePDBAtomicData_ (AtomS * atomSP, char *lineP)
{
/*=================================================================*/
/* PDB ATOM and HETATM tokens:           ANSI C range:  PDB range: */
/*=================================================================*/
static int	atom_serial_start        =  6;		/*  7 ~ 11 */
static int	atom_serial_end          = 10;
static int	atom_name_start          = 12;		/* 13 ~ 16 */
static int	atom_name_end            = 15;
static int	chemical_symbol_start    = 12;		/* 13 ~ 14 */
static int	chemical_symbol_end      = 13;
static int	remoteness_ind_start     = 14;		/* 15 ~ 15 */
static int	remoteness_ind_end       = 14;
static int	branch_desig_start       = 15;		/* 16 ~ 16 */
static int	branch_desig_end         = 15;
static int	alt_location_start       = 16;		/* 17 ~ 17 */
static int	alt_location_end         = 16;
static int	residue_name_start       = 17;		/* 18 ~ 20 */
static int	residue_name_end         = 19;
static int	chainID_start            = 21;		/* 22 ~ 22 */
static int	chainID_end              = 21;
static int	residue_sequence_start   = 22;		/* 23 ~ 26 */
static int	residue_sequence_end     = 25;
static int	insertion_code_start     = 26;		/* 27 ~ 27 */
static int	insertion_code_end       = 26;
static int	x_start                  = 30;		/* 31 ~ 38 */
static int	x_end                    = 37;
static int	y_start                  = 38;		/* 39 ~ 46 */
static int	y_end                    = 45;
static int	z_start                  = 46;		/* 47 ~ 54 */
static int	z_end                    = 53;
static int	occupancy_start          = 54;		/* 55 ~ 60 */
static int	occupancy_end            = 59;
static int	temperature_factor_start = 60;		/* 61 ~ 66 */
static int	temperature_factor_end   = 65;
static int	segmentID_start          = 72;		/* 73 ~ 76 */
static int	segmentID_end            = 75;
static int	element_symbol_start     = 76;		/* 77 ~ 78 */
static int	element_symbol_end       = 77;
static int	charge_start             = 78;		/* 79 ~ 80 */
static int	charge_end               = 79;

/* Other auxiliary variables: */
int		line_length;
char		substringA[STRINGSIZE];
int		int_value;
double		double_value;
int		error_messagesN = 0;
int		n;

/* Store the line length to avoid repeated calls of strlen: */
line_length = strlen (lineP);

/* Remove the trailing newline, if present: */
if (*(lineP + line_length - 1) == '\n')
        {
        *(lineP + line_length - 1) = '\0';
        line_length--;
        }

/* Check the line length; if too short, return: */
/* Note: the line which  does not contain */
/* three coordinates  is not a good line! */
if (line_length < z_end + 1) return -1;

/* Some entries may be missing; initialize all data: */
atomSP->raw_atomS.serialI                = 0;
*atomSP->raw_atomS.atom_nameA            = '\0';
*atomSP->raw_atomS.pure_atom_nameA       = '\0';
*atomSP->raw_atomS.chemical_symbolA      = '\0';
atomSP->raw_atomS.remoteness_indicator   = ' ';
atomSP->raw_atomS.branch_designator      = ' ';
atomSP->raw_atomS.alt_location           = ' ';
*atomSP->raw_atomS.residue_nameA         = '\0';
*atomSP->raw_atomS.pure_residue_nameA    = '\0';
atomSP->raw_atomS.chainID                = ' ';
atomSP->raw_atomS.residue_sequenceI      = 0;
atomSP->raw_atomS.residue_insertion_code = ' ';
atomSP->raw_atomS.x[0]                   = 0.0;
atomSP->raw_atomS.y                      = 0.0;
atomSP->raw_atomS.z[0]                   = 0.0;
atomSP->raw_atomS.occupancy              = 1.0;
atomSP->raw_atomS.temperature_factor     = 0.0;
*atomSP->raw_atomS.segmentA              = '\0';
*atomSP->raw_atomS.elementA              = '\0';
*atomSP->raw_atomS.pure_elementA         = '\0';
*atomSP->raw_atomS.chargeA               = '\0';

/* Atom serial number: */
ExtractField_ (substringA, atom_serial_start, atom_serial_end, lineP);
if (sscanf (substringA, "%d", &int_value) != 1)
	{
	int_value = 0;
	}
atomSP->raw_atomS.serialI = int_value;

/* Atom name (including spaces): */
ExtractField_ (atomSP->raw_atomS.atom_nameA,
	       atom_name_start, atom_name_end, lineP);

/* Purified atom name (spaces removed): */
CopyPurified_ (atomSP->raw_atomS.pure_atom_nameA,
               atomSP->raw_atomS.atom_nameA);

/* Chemical symbol: */
ExtractField_ (atomSP->raw_atomS.chemical_symbolA,
	       chemical_symbol_start, chemical_symbol_end, lineP);

/* Chemical symbol should be clean and right justified. However, */
/* the symbol for hydrogen (and perhaps some other atoms) may be */
/* preceded by a single digit.  Check the first character of the */
/* chemical symbol.  If a digit is found,  replace it  by space. */
n = atomSP->raw_atomS.chemical_symbolA[0];
if (isdigit (n))
	{
	atomSP->raw_atomS.chemical_symbolA[0] = ' ';
	}

/* Remoteness indicator (transliterated Greek letter): */
ExtractField_ (substringA, remoteness_ind_start, remoteness_ind_end, lineP);
atomSP->raw_atomS.remoteness_indicator = *substringA;

/* Branch designator: */
ExtractField_ (substringA, branch_desig_start, branch_desig_end, lineP);
atomSP->raw_atomS.branch_designator = *substringA;

/* Alternate location indicator: */
ExtractField_ (substringA, alt_location_start, alt_location_end, lineP);
atomSP->raw_atomS.alt_location = *substringA;

/* Residue name (with spaces): */
ExtractField_ (atomSP->raw_atomS.residue_nameA,
	       residue_name_start, residue_name_end, lineP);

/* Purified residue name (spaces removed): */
CopyPurified_ (atomSP->raw_atomS.pure_residue_nameA,
	       atomSP->raw_atomS.residue_nameA);

/* Chain identifier: */
ExtractField_ (substringA, chainID_start, chainID_end, lineP);
atomSP->raw_atomS.chainID = *substringA;

/* Residue sequence number: */
ExtractField_ (substringA,
	       residue_sequence_start, residue_sequence_end, lineP);
if (sscanf (substringA, "%d", &int_value) != 1)
        {
	int_value = 0;
        }
atomSP->raw_atomS.residue_sequenceI = int_value;

/* Insertion code: */
ExtractField_ (substringA, insertion_code_start, insertion_code_end, lineP);
atomSP->raw_atomS.residue_insertion_code = *substringA;

/* Atom x coordinate: */
ExtractField_ (substringA, x_start, x_end, lineP);
if (sscanf (substringA, "%lf", &double_value) != 1)
        {
	error_messagesN++;
	if (error_messagesN > MAXMESSAGES) return -2;
        ErrorMessage_ ("garlic", "ParsePDBAtomicData_", "",
                "Failed to extract x coordinate!\n", "", "", "");
        return -2;
        }
atomSP->raw_atomS.x[0] = double_value;

/* Atom y coordinate: */
ExtractField_ (substringA, y_start, y_end, lineP);
if (sscanf (substringA, "%lf", &double_value) != 1)
        {
	error_messagesN++;
	if (error_messagesN > MAXMESSAGES) return -3;
        ErrorMessage_ ("garlic", "ParsePDBAtomicData_", "",
                "Failed to extract y coordinate!\n", "", "", "");
        return -3;
        }
atomSP->raw_atomS.y = double_value;

/* Atom z coordinate: */
ExtractField_ (substringA, z_start, z_end, lineP);
if (sscanf (substringA, "%lf", &double_value) != 1)
        {
	error_messagesN++;
	if (error_messagesN > MAXMESSAGES) return -4;
        ErrorMessage_ ("garlic", "ParsePDBAtomicData_", "",
                "Failed to extract z coordinate!\n", "", "", "");
        return -4;
        }
atomSP->raw_atomS.z[0] = double_value;

/*------the following fields may be missing in some files:-------------------*/

/* Occupancy: */
if (line_length < occupancy_end + 1) return 1;
ExtractField_ (substringA, occupancy_start, occupancy_end, lineP);
if (sscanf (substringA, "%lf", &double_value) != 1)
        {
	double_value = 0.0;
        }
atomSP->raw_atomS.occupancy = double_value;

/* Isotropic temperature factor: */
if (line_length < temperature_factor_end + 1) return 2;
ExtractField_ (substringA,
	       temperature_factor_start, temperature_factor_end, lineP);
if (sscanf (substringA, "%lf", &double_value) != 1)
        {
	double_value = 0.00;
        }
atomSP->raw_atomS.temperature_factor = double_value;

/* Segment identifier: */
if (line_length < segmentID_end + 1) return 3;
ExtractField_ (atomSP->raw_atomS.segmentA,
	       segmentID_start, segmentID_end, lineP);

/* Element symbol: */
if (line_length < element_symbol_end + 1) return 4;
ExtractField_ (atomSP->raw_atomS.elementA,
	       element_symbol_start, element_symbol_end, lineP);

/* Purified element symbol (spaces removed): */
CopyPurified_ (atomSP->raw_atomS.pure_elementA,
	       atomSP->raw_atomS.elementA);

/* Charge: */
if (line_length < charge_end + 1) return 5;
ExtractField_ (atomSP->raw_atomS.chargeA,
	       charge_start, charge_end, lineP);

return 6;
}

/*===========================================================================*/