File: read_pdb.c

package info (click to toggle)
garlic 1.6-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 4,516 kB
  • sloc: ansic: 52,465; makefile: 2,254
file content (265 lines) | stat: -rw-r--r-- 7,989 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
/* Copyright (C) 2000-2003 Damir Zucic */

/*=============================================================================

				read_pdb.c

Purpose:
	Read  PDB  file  and store  atomic  data.  The most important  data
	(atomic coordinates) are extracted from  ATOM  and  HETATM records.
	The following mandatory records are read:  HEADER,  TITLE,  COMPND,
	SOURCE,  EXPDTA and  AUTHOR.  Other mandatory  and optional records
	are ignored  in current  version of  the program.  This  version of
	garlic  program  is  capable  to read  PDB format version 2.1;  see
	pdb_atom.c  for details about format.  PDB documentation  says that
	order of records is important,  but  garlic  does not  care for it.

Input:
	(1) Pointer to  MolComplexS  structure (macromolecular complex). It
	    should point to the first free MolComplexS structure.
	(2) Input file name (pointer).
	(3) Pointer to ConfigS structure, with configuration data.

Output:
	(1) Allocate memory,  read file and store data to allocated memory.
	(2) Return value.

Return value:
	(1) Positive on success.
	(2) Negative on failure.

Notes:
	(1) This function first checks are there  any atomic coordinates in
	    the current line, though  atomic data should  be expected after
	    header data. There are many more lines with atomic coordinates,
	    so the reversed order of checking ensures faster reading.

	(2) Memory  reallocation  was tested  by reading a single  PDB file
	    containing 32 MB of valid PDB records.

	(3) Many older PDB files have some other data after the temperature
	    factor instead of data specified by the format used here.

	(4) Some  flags are set and  default  colors assigned to  each atom
	    immediately after parsing atomic data.

========includes:============================================================*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <X11/Xlib.h>
#include <X11/Xutil.h>
#include <X11/Xos.h>
#include <X11/Xatom.h>

#include "defines.h"
#include "typedefs.h"

/*======function prototypes:=================================================*/

void		ErrorMessage_ (char *, char *, char *,
			       char *, char *, char *, char *);
FILE		*OpenFileForReading_ (char *);
void		InitializeHeader_ (HeaderS *);
int		ParsePDBAtomicData_ (AtomS *, char *);
int		ReallocPDBMemory_ (AtomS **, AtomS **, size_t *, size_t);
int		CopyHeaderLine_ (HeaderS *, char *, int);
int		PDBIdentifier_ (MolComplexS *, char *);

/*======read PDB file:=======================================================*/

int ReadPDBFile_ (MolComplexS *mol_complexSP,
		  char *file_nameP, ConfigS *configSP)
{
FILE		*fileP;
size_t		rgb_struct_size;
size_t		memory_size;
AtomS		*curr_atomSP;
char		lineA[STRINGSIZE];
int		line_size;
int		rec_typeI = 0, first_char;
int		n;
int		PDB_ID_foundF = 0;
int		model_serialI = 1;
int		model_serial_start = 10, model_serial_end = 13;
char		*P;

/* Open file for reading: */
if ((fileP = OpenFileForReading_ (file_nameP)) == NULL)
	{
	ErrorMessage_ ("garlic", "ReadPDBFile_", file_nameP,
		"Failed to open file!\n", "", "", "");
	return -1;
	}

/* Allocate memory for header strings: */
mol_complexSP->headerS.total_linesN = MAXHEADERLINES + MAXTITLELINES +
				      MAXCOMPNDLINES + MAXSOURCELINES +
				      MAXEXPDTALINES + MAXAUTHORLINES;
memory_size = mol_complexSP->headerS.total_linesN * HEADERLINESIZE + 100;
if ((mol_complexSP->headerS.dataP = (char *) malloc (memory_size)) == NULL)
	{
	ErrorMessage_ ("garlic", "ReadPDBFile", "",
		"Failed to allocate memory for header data!\n", "", "", "");
	fclose (fileP);
	return -2;
	}

/* Initialize the unique PDB identifier: */
strcpy (mol_complexSP->unique_PDB_codeA, "XXXX");

/* Initialize the memory reserved for text and initialize offsets: */
InitializeHeader_ (&mol_complexSP->headerS);

/* Allocate the initial amount of memory for atomic coordinates: */
mol_complexSP->atomsN = 0;    /* It was zero before, this is just a reminder */
mol_complexSP->max_atomsN = ATOMS_IN_CHUNK;
memory_size = mol_complexSP->max_atomsN * sizeof (AtomS);
if ((mol_complexSP->atomSP = (AtomS *) malloc (memory_size)) == NULL)
	{
	ErrorMessage_ ("garlic", "ReadPDBFile_", "",
		"Failed to allocate memory for atomic data!\n", "", "", "");
	fclose (fileP);
	return -3;
	}
curr_atomSP = mol_complexSP->atomSP;

/* Prepare the size of the RGBS structure: */
rgb_struct_size = sizeof (RGBS);

/* Read the entire file, line by line: */
line_size = sizeof (lineA);
while (fgets (lineA, line_size, fileP))
	{
	/* Prepare the first character, it will be used */
	/* to speed up the recognition of  record type: */
	first_char = *lineA;

	/* Check is there a chance that input line is ATOM or HETATM record: */
	rec_typeI = 0;
	switch (first_char)
		{
		case 'A':
			if (strstr (lineA, "ATOM") == lineA) rec_typeI = 1;
			break;
		case 'H':
			if (strstr (lineA, "HETATM") == lineA) rec_typeI = 2;
			break;
		default:
			;
		}

	/* Extract data if input line contains ATOM or HETATM record: */
	if (rec_typeI > 0)
		{
		/** Try to parse ATOM or HETATM line: **/
		if (ParsePDBAtomicData_ (curr_atomSP, lineA) < 0) continue;

		/** If this point is reached, parsing was successful! **/

		/** The initial drawing style for atoms: **/
		curr_atomSP->raw_atomS.atom_styleI =
						configSP->default_atom_styleI;

		/** Copy the model serial number: **/
		curr_atomSP->raw_atomS.model_serialI = model_serialI;

		/** By default, after loading the atom is selected: **/
		curr_atomSP->selectedF = 1;

		/** By default, the atom is not hidden: **/
		curr_atomSP->hiddenF = 0;

		/** By default, the atom is inside the slab: **/
		curr_atomSP->inside_slabF = 1;

		/** By default, atomic label is hidden: **/
		curr_atomSP->labelF = 0;

		/** Set flag which distinguishes ATOM and HETATM data: **/
		if (rec_typeI == 1) curr_atomSP->raw_atomS.heteroF = 0;
		else curr_atomSP->raw_atomS.heteroF = 1;

		/** Set the number of color fading surfaces: **/
		curr_atomSP->surfacesN = configSP->default_surfacesN;

		/** Copy default basic colors: **/
		for (n = 0; n < MAXCOLORSURFACES; n++)
			{
			memcpy (curr_atomSP->left_rgbSA + n,
				configSP->left_rgbSA + n,
				rgb_struct_size);
			memcpy (curr_atomSP->middle_rgbSA + n,
				configSP->middle_rgbSA + n,
				rgb_struct_size);
			memcpy (curr_atomSP->right_rgbSA + n,
				configSP->right_rgbSA + n,
				rgb_struct_size);
			}

		/** Increase the number of atoms, update pointer: **/
		mol_complexSP->atomsN++;
		curr_atomSP++;

		/** Check is there enough memory left; reallocate if not: **/
		n = ReallocPDBMemory_ (&mol_complexSP->atomSP,
				       &curr_atomSP,
				       &mol_complexSP->max_atomsN,
				       mol_complexSP->atomsN);
		/*** If reallocation fails, close file and return: ***/
		if (n < 0)
			{
			mol_complexSP->atomsN = 0;
			fclose (fileP);
			return -4;
			}

		/** Take the next line: **/
		continue;
		}

	/* Check is there a keyword MODEL in this line (NMR structures): */
	if (strstr (lineA, "MODEL") == lineA)
		{
		/* Check the line length  (do not */
		/* forget that newline is there): */
		if ((int) strlen (lineA) < model_serial_end + 2) continue;

		/* Try to read the model serial number: */
		P = lineA + model_serial_start;
		if (sscanf (P, "%d", &n) != 1) continue;

		/* Update the model serial number: */
		model_serialI = n;

		/** Take the next line: **/
		continue;
		}

	/* Check for records  which are mandatory */
	/* but do not contain atomic coordinates: */
	/* (ATOM and HETATM  are not  mandatory!) */
	CopyHeaderLine_ (&mol_complexSP->headerS, lineA, first_char);

	/* Check for the unique PDB identifier, if not found before: */
	if (!PDB_ID_foundF)
		{
		n = PDBIdentifier_ (mol_complexSP, lineA);
		if (n > 0) PDB_ID_foundF = 1;
		}
	}

/* Close file: */
fclose (fileP);

/* Set the flag which forces projection of coordinates: */
mol_complexSP->position_changedF = 1;

/* Return positive value on success: */
return 1;
}

/*===========================================================================*/