File: load_sequence.c

package info (click to toggle)
garlic 1.6-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 4,516 kB
  • sloc: ansic: 52,465; makefile: 2,254
file content (206 lines) | stat: -rw-r--r-- 5,272 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
/* Copyright (C) 2000-2003 Damir Zucic */

/*=============================================================================

				load_sequence.c

Purpose:
	Load sequence from file.

Input:
	(1) Pointer to RuntimeS structure.

Output:
	(1) Sequence stored to the sequence buffer.
	(2) Return value.

Return value:
	(1) Positive on success.
	(2) Negative on failure.

Notes:
	(1) The sequence is expected in three letters code or  in FASTA
	    format  (one letter code,  but the first character in title
	    line is '>').  If not in FASTA format,  one letter code may
	    be missinterpreted as valid three letters code. File format
	    is free,  but the maximal  input  line  should  not  exceed
	    STRINGSIZE.

	(2) Space, comma, tab, semicolon and newline are interpreted as
	    separators. Lines beginning with # (numbersign) are treated
	    as comments.  Empty lines are ignored. This applies to both
	    three letters code and to FASTA format.

	(3) The original  command string  is used  because the copy was
	    converted to uppercase.

========includes:============================================================*/

#include <stdio.h>

#include <string.h>
#include <ctype.h>

#include <X11/Xlib.h>
#include <X11/Xutil.h>
#include <X11/Xos.h>
#include <X11/Xatom.h>

#include "defines.h"
#include "typedefs.h"

/*======function prototypes:=================================================*/

char		*ExtractToken_ (char *, int, char *, char *);
int		ReadFasta_ (RuntimeS *, char *);
FILE		*OpenFileForReading_ (char *);
void		InitHyphob_ (RuntimeS *);

/*======load sequence from file:=============================================*/

int LoadSequence_ (RuntimeS *runtimeSP)
{
int		max_length, i;
char		lineA[STRINGSIZE];
char		*remainderP;
char		tokenA[STRINGSIZE];
FILE		*fileP;
char		*P;
int		n;
size_t		residueI = 0;
int		token_length;
int		numberF;
size_t		offset;

/* The maximal residue name length: */
max_length = RESNAMESIZE - 1;

/* Zero initialize the sequence buffer: */
runtimeSP->residuesN = 0;
for (i = 0; i < (int) runtimeSP->sequence_buffer_size; i++)
	{
	*(runtimeSP->sequenceP + i) = '\0';
	}

/* Copy the original command string: */
strncpy (lineA, runtimeSP->curr_commandA, STRINGSIZE - 1);
lineA[STRINGSIZE - 1] = '\0';

/* Skip two tokens: */
remainderP = ExtractToken_ (tokenA, STRINGSIZE, lineA, " \t\n");
if (!remainderP) return -1;
remainderP = ExtractToken_ (tokenA, STRINGSIZE, remainderP, " \t\n");
if (!remainderP) return -2;

/* The third token should contain the file name: */
remainderP = ExtractToken_ (tokenA, STRINGSIZE, remainderP, " \t\n");
if (!remainderP)
	{
	strcpy (runtimeSP->messageA, "File name missing!");
	runtimeSP->message_length = strlen (runtimeSP->messageA);
	return -3;
	}

/* Try to interpret file as FASTA (one letter code): */
if (ReadFasta_ (runtimeSP, tokenA) > 0) return 1;

/* If this point is reached, the input file was not in FASTA format. */

/* Try to open file: */
fileP = OpenFileForReading_ (tokenA);
if (fileP == NULL)
	{
	strcpy (runtimeSP->messageA, "Failed to open file!");
	runtimeSP->message_length = strlen (runtimeSP->messageA);
	return -4;
	}

/* Read file, line by line: */
while (fgets (lineA, STRINGSIZE, fileP))
	{
	/* Lines beginning with # are treated as comments: */
	if (lineA[0] == '#') continue;

	/* Convert to uppercase: */
	P = lineA;
	while ((n = *P++) != '\0') *(P - 1) = toupper (n);

	/* Parse line: */
	remainderP = lineA;
	while ((remainderP = ExtractToken_ (tokenA, STRINGSIZE,
					    remainderP, " ,;\t\n")) != NULL)
		{
		/* Check  the token length - it should */
		/* not contain more than max_length characters: */
		token_length = strlen (tokenA);
		if (token_length > max_length)
			{
			sprintf (runtimeSP->messageA,
				 "Bad residue name: %s", tokenA);
			runtimeSP->message_length =
						strlen (runtimeSP->messageA);
			fclose (fileP);
			return -5;
			}

		/* If this token contains nothing but digits, ignore it: */
		numberF = 1;
		for (i = 0; i < token_length; i++)
			{
			if ((isdigit (tokenA[i]) == 0) &&
			    (tokenA[i] != '-') && (tokenA[i] != '+'))
				{
				numberF = 0;
				break;
				}
			}
		if (numberF) continue;

		/* Check is there enough space left in the buffer: */
		offset = max_length * residueI;
		if (offset > runtimeSP->sequence_buffer_size - 10 * max_length)
			{
			strcpy (runtimeSP->messageA, "Sequence too long!");
			runtimeSP->message_length =
						strlen (runtimeSP->messageA);
			fclose (fileP);
			return -6;
			}

		/* Copy the residue name to the sequence buffer: */
		P = runtimeSP->sequenceP + offset;
		strncpy (P, tokenA, max_length);

		/* Update the residue index: */
		residueI++;
		}
	}

/* Close file: */
fclose (fileP);

/* Store the number of residues: */
runtimeSP->residuesN = residueI;

/* Initialize serial numbers: */
for (residueI = 0; residueI < runtimeSP->residuesN; residueI++)
	{
	*(runtimeSP->serialIP + residueI) = residueI + 1;
	}

/* Initialize disulfide flags: */
for (residueI = 0; residueI < runtimeSP->residuesN; residueI++)
	{
	*(runtimeSP->disulfideFP + residueI) = 0;
	}

/* Initialize hydrophobicity values: */
InitHyphob_ (runtimeSP);

/* Return positive value on success: */
return 2;
}

/*===========================================================================*/