File: is_pdb.c

package info (click to toggle)
garlic 1.1-2
  • links: PTS
  • area: main
  • in suites: woody
  • size: 2,492 kB
  • ctags: 1,013
  • sloc: ansic: 29,925; makefile: 753
file content (134 lines) | stat: -rw-r--r-- 3,733 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/* Copyright (C) 2000 Damir Zucic */

/*=============================================================================

				is_pdb.c

Purpose:
	Check is input file in PDB format.  Check up to 100 lines:  if 80%
	or more  lines contain  PDB keywords,  file is  recognized as  PDB
	file. If there are less than 10 lines, at least one should contain
	some PDB keyword. Lines which contain less than six characters are
	not counted.

Input:
	(1) File name.

Output:
	(1) Return value.

Return value:
	(1) PDB_FORMAT identifier on success (see defines.h for values).
	(3) Negative on failure.

Notes:
	(1) Note that END record contains only three characters. The lines
	    with END record do not contain any data.

	(2) Read openfile_r.c, fopen () is there.

========includes:============================================================*/

#include <stdio.h>
#include <string.h>

#include "defines.h"

/*======function prototypes:=================================================*/

void		ErrorMessage_ (char *, char *, char *,
			       char *, char *, char *, char *);
FILE		*OpenFileForReading_ (char *);

/*======check is file format PDB format:=====================================*/

int IsPDB_ (char *file_nameP)
{
FILE		*fileP;
int		lineI = 0;
char		lineA[STRINGSIZE];
int		line_size, i, line_length;
int		total_linesN = 0;
int		good_linesN = 0;
double		percentage;
static char	PDBKeywordAA[PDB_KEYWORDS][7] =
			{"HEADER", "OBSLTE", "TITLE ", "CAVEAT", "COMPND",
			 "SOURCE", "KEYWDS", "EXPDTA", "AUTHOR", "REVDAT",
			 "SPRSDE", "JRNL  ", "REMARK", "DBREF ", "SEQADV",
			 "SEQRES", "MODRES", "HET   ", "HETNAM", "HETSYN",
			 "FORMUL", "HELIX ", "SHEET ", "TURN  ", "SSBOND",
			 "LINK  ", "HYDBND", "SLTBRG", "CISPEP", "SITE  ",
			 "CRYST1", "ORIGX1", "ORIGX2", "ORIGX3", "SCALE1",
			 "SCALE2", "SCALE3", "MTRIX1", "MTRIX2", "MTRIX3",
			 "TVECT ", "MODEL ", "ATOM  ", "SIGATM", "ANISOU",
			 "SIGUIJ", "TER   ", "HETATM", "ENDMDL", "CONECT",
			 "MASTER", "END"};       /* PDB cares for the order! */

/* Open file: */
fileP = OpenFileForReading_ (file_nameP);
if (fileP == NULL)
	{
	ErrorMessage_ ("garlic", "IsPDB_", file_nameP,
		"Failed to open file!\n", "", "", "");
	return OPEN_FAILURE;
	}

/* Read up to 100 lines: */
line_size = sizeof (lineA);
while (lineI++ < 100)
	{
	/* Read next line: */
	if (fgets (lineA, line_size, fileP) == NULL) break;

	/* Remove the trailing newline: */
	line_length = strlen (lineA);
	if (line_length == 0) continue;
	if (lineA[line_length - 1] == '\n') lineA[line_length - 1] = '\0';

	/* Lines shorter than 6 characters are not counted: */
	if (line_length < 6) continue;

	/* Truncate line to six characters: */
	lineA[6] = '\0';

	/* Increase count of lines read: */
	total_linesN++;

	/* Look for PDB keywords: */
	for (i = 0; i < PDB_KEYWORDS; i++)
		{
		/** Compare the first character: **/
		if (*lineA != PDBKeywordAA[i][0]) continue;

		/** Compare with entire keyword: **/
		if (strcmp (lineA, PDBKeywordAA[i]) != 0) continue;

		/** If this point is reached, PDB keyword is found: **/
		good_linesN++;
		}
	}

/* Close file: */
fclose (fileP);

/* Check how many lines were taken into account: */
if (total_linesN == 0) return -1;

/* Return PDB_FORMAT identifier if at least 80% of PDB lines were found: */
percentage = (double) good_linesN / (double) total_linesN;
if (percentage >= 0.80) return PDB_FORMAT;            /* See defines.h ! */

/* If there are only 10 lines or less, at least */
/* one line should contain a valid PDB keyword: */
if (total_linesN <= 10)
	{
	if (good_linesN >= 1) return PDB_FORMAT;
	}

/* If this point is reached, file format is not PDB: */
return -2;
}

/*===========================================================================*/