1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
#!/bin/awk -f
# Usage: glim-diff.awk <a-pred> <b-pred>
# Read gene predictions in <a-pred> and <b-pred>
# and output them side by side. Both must be
# in sorted order by stop codon and the format for
# each must be:
# <id> <start> <stop> [additional columns irrelevant]
# Also print summary info at end.
BEGIN {
if (ARGC < 3)
Usage_Exit();
afp = ARGV [1];
delete ARGV [1];
bfp = ARGV [2];
delete ARGV [2];
Read_A();
Read_B();
while (! (adone || bdone))
{
if (1 * aend < 1 * bend)
{
printf "%-8s %7d %7d <\n", aid, astart, aend;
aonly ++;
Read_A();
}
else if (1 * bend < 1 * aend)
{
printf "%24s > %-8s %7d %7d\n", "", bid, bstart, bend;
bonly ++;
Read_B();
}
else
{
if (1 * astart < 1 * aend)
diff = bstart - astart;
else
diff = astart - bstart;
if (diff == 0)
{
ch = "=";
exact_ct ++;
}
else
ch = "|";
printf "%-8s %7d %7d %s %-8s %7d %7d\n",
aid, astart, aend, ch, bid, bstart, bend;
match_ct ++;
diff_sum += diff;
Read_A();
Read_B();
}
}
while (! adone)
{
printf "%-8s %7d %7d <\n", aid, astart, aend;
aonly ++;
Read_A();
}
while (! bdone)
{
printf "%24s > %-8s %7d %7d\n", "", bid, bstart, bend;
bonly ++;
Read_B();
}
print "";
printf " A only: %6d %5.1f%%\n", aonly, Percent(aonly, acount);
printf " B only: %6d %5.1f%%\n", bonly, Percent(bonly, bcount);
printf "Matches: %6d %5.1f%% %5.1f%%\n", match_ct,
Percent(match_ct, acount), Percent(match_ct, bcount);
printf " Exact: %6d %5.1f%% %5.1f%%\n", exact_ct,
Percent(exact_ct, match_ct), Percent(exact_ct, acount);
printf "AvgDiff: %8.1f\n", diff_sum / match_ct;
printf "A count: %6d\n", acount;
printf "B count: %6d\n", bcount;
}
function Percent (x, y)
{
if (y == 0)
return 0.0;
else
return (100.0 * x) / y;
}
function Read_A ()
{
if ((getline < afp) > 0)
{
aid = $1;
astart = $2;
aend = $3;
acount ++;
}
else
adone = 1;
}
function Read_B ()
{
if ((getline < bfp) > 0)
{
bid = $1;
bstart = $2;
bend = $3;
bcount ++;
}
else
bdone = 1;
}
function Usage_Exit ()
{
print "# Usage: glim-diff.awk <a-pred> <b-pred>";
print "# Read gene predictions in <a-pred> and <b-pred>";
print "# and output them side by side. Both must be";
print "# in sorted order by stop codon and the format for";
print "# each must be:";
print "# <id> <start> <stop> [additional columns irrelevant]";
print "# Also print summary info at end.";
exit;
}
|