File: extract_sequence.c

package info (click to toggle)
garlic 1.4-1
links: PTS
area: main
in suites: sarge
size: 4,192 kB
ctags: 1,368
sloc: ansic: 49,603; makefile: 1,079
file content (154 lines) | stat: -rw-r--r-- 4,772 bytes
parent folder | download | duplicates (5)
/* Copyright (C) 2000, 2001 Damir Zucic */

/*=============================================================================

				extract_sequence.c

Purpose:
	Extract  sequence  information: use pure residue names, i.e. ignore
	whitespace. Up to three characters are available in the PDB version
	2.1 file format. In addition, add residue array index to each atom.

Input:
	(1) Pointer to MolComplexS structure, with macromolecular data.

Output:
	(1) An array of ResidueS structures allocated and initialized.
	(2) Return value.

Return value:
	(1) Positive on success.
	(2) Zero, if complex contains no atoms.
	(3) Negative on failure.

========includes:============================================================*/

#include <stdio.h>

#include <stdlib.h>

#include <X11/Xlib.h>
#include <X11/Xutil.h>
#include <X11/Xos.h>
#include <X11/Xatom.h>

#include "defines.h"
#include "typedefs.h"

/*======function prototypes:=================================================*/

size_t		CountResidues_ (MolComplexS *);
void		ErrorMessage_ (char *, char *, char *,
			       char *, char *, char *, char *);

/*======extract sequence information:========================================*/

int ExtractSequence_ (MolComplexS *mol_complexSP)
{
size_t		atomsN, atomI;
size_t		residuesN, residue_arrayI = 0;
size_t		struct_size, elementsN;
AtomS		*curr_atomSP;
int		previous_residueI, current_residueI;
int		previous_insertion_code, current_insertion_code;
ResidueS	*current_residueSP;
ResidueS	*previous_residueSP;

/* Initialize the number of residues: */
mol_complexSP->residuesN = 0;

/* Return zero if there are no atoms: */
atomsN = mol_complexSP->atomsN;
if (atomsN == 0) return 0;

/* Count residues; if there is nothing, return negative value: */
residuesN = CountResidues_ (mol_complexSP);
if (residuesN == 0) return -1;

/* Store the number of residues: */
mol_complexSP->residuesN = residuesN;

/* Allocate memory for sequence information and other associated data: */
struct_size = sizeof (ResidueS);
elementsN = residuesN + 100;
mol_complexSP->residueSP = (ResidueS *) calloc (elementsN, struct_size);
if (mol_complexSP->residueSP == NULL)
	{
	ErrorMessage_ ("garlic", "ExtractSequence_", "",
		       "Failed to allocate memory for ResidueS array!\n",
		       "", "", "");
	return -2;
	}

/* The first atom has special treatment: */
curr_atomSP = mol_complexSP->atomSP;
curr_atomSP->residue_arrayI = 0;
previous_residueI = curr_atomSP->raw_atomS.residue_sequenceI;
previous_insertion_code = curr_atomSP->raw_atomS.residue_insertion_code;

/* Extract residue information, but skip the first atom: */
for (atomI = 1; atomI < atomsN; atomI++)
	{
	/* Pointer to raw atomic data: */
	curr_atomSP = mol_complexSP->atomSP + atomI;

	/* Copy the residue sequence number and residue insertion code: */
	current_residueI = curr_atomSP->raw_atomS.residue_sequenceI;
	current_insertion_code = curr_atomSP->raw_atomS.residue_insertion_code;

	/* Compare the current residue sequence number with the old one */
	/* and  the current residue  insertion code  with the old code. */

	/* If this atom belongs to the same residue as the previous */
	/* one, set the residue array index and take the next atom: */
	if ((current_residueI == previous_residueI) &&
	    (current_insertion_code == previous_insertion_code))
		{
		curr_atomSP->residue_arrayI = residue_arrayI;
		continue;
		}

	/* If this atom  does not belong to  the same residue as */
	/* the previous one,  increment  the residue array index */
	/* by one and assign the incremented value to this atom: */
	else
		{
		residue_arrayI++;
		curr_atomSP->residue_arrayI = residue_arrayI;
		}

	/* If this point is reached, a new residue is found. */

	/* Prepare the pointer to the current residue: */
	current_residueSP = mol_complexSP->residueSP + residue_arrayI;

	/* The residue start index for the current residue: */
	current_residueSP->residue_startI = atomI;

	/* The initial (dummy) residue end index for the current residue: */
	current_residueSP->residue_endI = atomI;

	/* The residue end index for the previous residue: */
	if (residue_arrayI != 0)
		{
		previous_residueSP = mol_complexSP->residueSP +
				     residue_arrayI - 1;
		if (atomI != 0) previous_residueSP->residue_endI = atomI - 1;
		}

	/* Copy the residue sequence index and residue insertion code: */
	previous_residueI = current_residueI;
	previous_insertion_code = current_insertion_code;
	}

/* The residue end index for the last residue: */
previous_residueSP = mol_complexSP->residueSP + residue_arrayI;
if (atomI != 0) previous_residueSP->residue_endI = atomI - 1;

/* If this point is reached, return positive value (success indicator): */
return 1;
}

/*===========================================================================*/