File: gff.cpp

package info (click to toggle)
piler 0~20140707-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye
  • size: 360 kB
  • sloc: cpp: 5,369; makefile: 39
file content (114 lines) | stat: -rwxr-xr-x 3,190 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#include "piler2.h"

// GFF fields are:
// <seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
//     0         1         2        3      4      5        6       7         8           9

int GFFLineNr;

// Destructive read -- pokes nuls onto FS
int GetFields(char *Line, char **Fields, int MaxFields, char FS)
	{
	char *p = Line;
	for (int FieldIndex = 0; FieldIndex < MaxFields; ++FieldIndex)
		{
		Fields[FieldIndex] = p;
		char *Tab = strchr(p, FS);
		char *End = Tab;
		if (0 == End)
			End = strchr(p, '\0');
		size_t FieldLength = End - p;
		if (FieldLength > MAX_GFF_FEATURE_LENGTH)
			Quit("Max GFF field length exceeded, field is %d chars, max=%d, line %d",
			  FieldLength, MAX_GFF_FEATURE_LENGTH, GFFLineNr);
		if (0 == Tab)
			return FieldIndex + 1;
		*Tab = 0;
		p = Tab + 1;
		}
	return MaxFields;
	}

bool GetNextGFFRecord(FILE *f, GFFRecord &Rec)
	{
	for (;;)
		{
		++GFFLineNr;
		const char TAB = '\t';
		char Line[MAX_GFF_LINE+1];
		char *Ok = fgets(Line, sizeof(Line), f);
		if (NULL == Ok)
			{
			if (feof(f))
				return false;
			Quit("Error reading GFF file, line=%d feof=%d ftell=%d ferror=%d errno=%d",
			  GFFLineNr, feof(f), ftell(f), ferror(f), errno);
			}
		if ('#' == Line[0])
			continue;
		size_t n = strlen(Line);
		if (0 == n)
			Quit("fgets returned zero-length line");
		if (Line[n-1] != '\n' && !feof(f))
			Quit("Max line length in GFF file exceeded, line %d is %d chars long, max=%d",
			  GFFLineNr, n - 1, MAX_GFF_LINE);
		Line[n-1] = 0;	// delete newline

		char *Fields[9];
		int FieldCount = GetFields(Line, Fields, 9, '\t');
		if (FieldCount < 8)
			Quit("GFF record has < 8 fields, line %d", GFFLineNr);

		const char *SeqName = Fields[0];
		const char *Source = Fields[1];
		const char *Feature = Fields[2];
		const char *Start = Fields[3];
		const char *End = Fields[4];
		const char *Score = Fields[5];
		const char *Strand = Fields[6];
		const char *Frame = Fields[7];
		const char *Attrs = "";
		if (FieldCount > 8)
			{
	// Truncate attrs if comment found
			Attrs = Fields[8];
	//		char *Pound = strchr(Attrs, '#');
	//		if (0 != Pound)
	//			*Pound = 0;
			}

		strcpy(Rec.SeqName, SeqName);
		strcpy(Rec.Source, Source);
		strcpy(Rec.Feature, Feature);
		Rec.Start = atoi(Start);
		Rec.End = atoi(End);
		Rec.Score = (float) atof(Score);
		Rec.Strand = Strand[0];
		Rec.Frame = Frame[0] == '.' ? -1 : atoi(Frame);
		strcpy(Rec.Attrs, Attrs);
		return true;
		}
	}

// GFF fields are:
// <seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
//     0         1         2        3      4      5        6       7         8           9
void WriteGFFRecord(FILE *f, const GFFRecord &Rec)
	{
	fprintf(f, "%s\t%s\t%s\t%d\t%d\t%.3g\t%c",
	//           0   1   2   3   4   5     6   7   8
	  Rec.SeqName,	// 0
	  Rec.Source,	// 1
	  Rec.Feature,	// 2
	  Rec.Start,	// 3
	  Rec.End,		// 4
	  Rec.Score,	// 5
	  Rec.Strand);	// 6

	if (-1 == Rec.Frame)
		fprintf(f, "\t.");
	else
		fprintf(f, "\t%d", Rec.Frame);

	fprintf(f, "\t%s\n", Rec.Attrs);
	}