1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
|
#include "muscle.h"
#include <stdio.h>
#include <errno.h>
const int BUFFER_BYTES = 16*1024;
const int CR = '\r';
const int NL = '\n';
#define ADD(c) \
{ \
if (Pos >= BufferLength) \
{ \
const int NewBufferLength = BufferLength + BUFFER_BYTES; \
char *NewBuffer = new char[NewBufferLength]; \
memcpy(NewBuffer, Buffer, BufferLength); \
delete[] Buffer; \
Buffer = NewBuffer; \
BufferLength = NewBufferLength; \
} \
Buffer[Pos++] = c; \
}
// Get next sequence from file.
char *GetFastaSeq(FILE *f, unsigned *ptrSeqLength, char **ptrLabel, bool DeleteGaps)
{
unsigned BufferLength = 0;
unsigned Pos = 0;
char *Buffer = 0;
int c = fgetc(f);
if (EOF == c)
return 0;
if ('>' != c)
Quit("Invalid file format, expected '>' to start FASTA label");
for (;;)
{
int c = fgetc(f);
if (EOF == c)
Quit("End-of-file or input error in FASTA label");
// Ignore CR (discard, do not include in label)
if (CR == c)
continue;
// NL terminates label
if (NL == c)
break;
// All other characters added to label
ADD(c)
}
// Nul-terminate label
ADD(0)
*ptrLabel = Buffer;
BufferLength = 0;
Pos = 0;
Buffer = 0;
int PreviousChar = NL;
for (;;)
{
int c = fgetc(f);
if (EOF == c)
{
if (feof(f))
break;
else if (ferror(f))
Quit("Error reading FASTA file, ferror=TRUE feof=FALSE errno=%d %s",
errno, strerror(errno));
else
Quit("Error reading FASTA file, fgetc=EOF feof=FALSE ferror=FALSE errno=%d %s",
errno, strerror(errno));
}
if ('>' == c)
{
if (NL == PreviousChar)
{
ungetc(c, f);
break;
}
else
Quit("Unexpected '>' in FASTA sequence data");
}
else if (isspace(c))
;
else if (IsGapChar(c))
{
if (!DeleteGaps)
ADD(c)
}
else if (isalpha(c))
{
c = toupper(c);
ADD(c)
}
else if (isprint(c))
{
Warning("Invalid character '%c' in FASTA sequence data, ignored", c);
continue;
}
else
{
Warning("Invalid byte hex %02x in FASTA sequence data, ignored", (unsigned char) c);
continue;
}
PreviousChar = c;
}
if (0 == Pos)
return GetFastaSeq(f, ptrSeqLength, ptrLabel, DeleteGaps);
*ptrSeqLength = Pos;
return Buffer;
}
|