1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
|
#include "piler2.h"
// Destructive: pokes Rec.Attrs
static char *GetTarget(GFFRecord &Rec, int *ptrStart, int *ptrEnd)
{
char *Attrs = Rec.Attrs;
char *SemiColon = strchr(Attrs, ';');
if (0 != SemiColon)
*SemiColon = 0;
char *Space = strchr(Attrs, ' ');
if (0 == Space)
Quit("GFF hit record line %d, missing space in attrs", GFFLineNr);
*Space = 0;
if (0 != strcmp(Attrs, "Target"))
Quit("GFF hit record line %d, expected Target as first attr", GFFLineNr);
char *Label = Space + 1;
Space = strchr(Label, ' ');
if (0 == Space)
Quit("GFF hit record line %d, missing space following Target label", GFFLineNr);
*Space = 0;
char *Start = Space + 1;
Space = strchr(Start, ' ');
if (0 == Space)
Quit("GFF hit record line %d, missing space following Target start", GFFLineNr);
*Space = 0;
char *End = Space + 1;
*ptrStart = atoi(Start);
*ptrEnd = atoi(End);
return Label;
}
static int ReadHitsPass1(FILE *f)
{
GFFLineNr = 0;
int HitCount = 0;
GFFRecord Rec;
while (GetNextGFFRecord(f, Rec))
{
if (0 != strcmp(Rec.Feature, "hit"))
continue;
if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
Warning("GFF line %d: invalid start %d / end %d",
GFFLineNr, Rec.Start, Rec.End);
int TargetStart;
int TargetEnd;
const char *TargetLabel = GetTarget(Rec, &TargetStart, &TargetEnd);
if (TargetStart <= 0 || TargetEnd <= 0 || TargetStart > TargetEnd)
Warning("GFF line %d: invalid target start %d / end %d",
GFFLineNr, Rec.Start, Rec.End);
++HitCount;
AddContigPos(Rec.SeqName, Rec.End - 1);
AddContigPos(TargetLabel, TargetEnd - 1);
}
return HitCount;
}
static int ReadHitsPass2(FILE *f, HitData *Hits)
{
rewind(f);
GFFLineNr = 0;
int HitCount = 0;
GFFRecord Rec;
while (GetNextGFFRecord(f, Rec))
{
if (0 != strcmp(Rec.Feature, "hit"))
continue;
if (Rec.Start <= 0 || Rec.End <= 0 || Rec.Start > Rec.End)
Warning("GFF line %d: invalid start %d / end %d",
GFFLineNr, Rec.Start, Rec.End);
int TargetStart;
int TargetEnd;
const char *TargetLabel = GetTarget(Rec, &TargetStart, &TargetEnd);
if (TargetStart <= 0 || TargetEnd <= 0 || TargetStart > TargetEnd)
Warning("GFF line %d: invalid target start %d / end %d",
GFFLineNr, Rec.Start, Rec.End);
HitData &Hit = Hits[HitCount];
const int QueryFrom = ContigToGlobal(Rec.Start, Rec.SeqName);
const int TargetFrom = ContigToGlobal(TargetStart, TargetLabel);
const int QueryLength = Rec.End - Rec.Start + 1;
const int TargetLength = TargetEnd - TargetStart + 1;
Hit.QueryFrom = QueryFrom;
Hit.QueryTo = QueryFrom + QueryLength - 1;
Hit.TargetFrom = TargetFrom;
Hit.TargetTo = TargetFrom + TargetLength - 1;
if (Rec.Strand == '+')
Hit.Rev = false;
else if (Rec.Strand == '-')
Hit.Rev = true;
else
Quit("GFF line %d, Invalid strand %c", GFFLineNr, Rec.Strand);
++HitCount;
}
return HitCount;
}
HitData *ReadHits(const char *FileName, int *ptrHitCount, int *ptrSeqLength)
{
FILE *f = OpenStdioFile(FileName);
int HitCount = ReadHitsPass1(f);
int SeqLength = GlobalizeContigs();
MakeContigMap();
HitData *Hits = all(HitData, HitCount);
ReadHitsPass2(f, Hits);
fclose(f);
*ptrSeqLength = SeqLength;
*ptrHitCount = HitCount;
return Hits;
}
|