1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
|
#!/usr/bin/perl
#
# Convert a polyA polyT position table to tts hints
# This is for the polyA/T tables from Chun Liang
#
# Mario Stanke, 11.7.2009
use strict;
use Getopt::Long;
my $usage = "$0 -- polyA/polyT table to tts hints for AUGUSTUS\n";
$usage .= "\n";
$usage .= "Usage: $0 --in=polyA.txt --out=hintsfile\n";
$usage .= " options:\n";
$usage .= " hintradius=n a 3' transcript end at position p gives rise to a tts hint from p-n to p+n (default 10)\n";
$usage .= " format=new|newer new: input file in newer format with 12 columns\n";
$usage .= " newer: input file in format with these 13 columns\n";
$usage .= " SeqName EstDir Chromsome ChrStrand ChrStart ChrEnd EstStrand EstStart EstEnd EstMapLen ChrSite Type PolyLen PolyStr\n";
$usage .= " swapstrand swap strand, currently use this for polyT files\n";
my $infilename;
my $hintsfilename;
my $hintradius = 10;
my $source="E";
my $format = "old";
my $swapstrand = 0;
my $priority = 4;
my $prgsrc = "polyA";
my ($qname, $strand, $targetname, $ttspos, $start, $end);
if ($#ARGV < 1 ) {
print "$usage";
exit;
}
GetOptions(
'in=s'=>\$infilename,
'out=s'=>\$hintsfilename,
'format=s'=>\$format,
'swapstrand!'=>\$swapstrand,
'hintradius:i'=>\$hintradius);
open(POLYA, "<$infilename") || die "Couldn't open $infilename\n";
open(HINTS, ">$hintsfilename") || die "Could not open $hintsfilename";
while (<POLYA>) {
next if (/Chromosome/ && /SeqName/); # skip header line
my @f = split /\t/, $_;
if (($format eq "old" && @f < 26) || ($format eq "new" && @f<12) || ($format eq "newer" && @f<13)) { warn "Not poly(A) format"; next }
if ($format eq "new"){
$targetname = $f[1];
$ttspos = $f[9];
$strand = $f[2];
} elsif ($format eq "newer"){
$targetname = $f[2];
$ttspos = $f[10];
$strand = $f[3];
} else {
$targetname = $f[3];
$ttspos = $f[18];
$strand = $f[2];
}
$qname = $f[0];
$start = $ttspos - $hintradius;
$end = $ttspos + $hintradius;
if ($swapstrand) {
if ($strand eq "+") {
$strand = "-";
} else {
$strand = "+";
}
}
print HINTS "$targetname\t$prgsrc\ttts\t$start\t$end\t.\t$strand\t.\tpri=$priority;src=$source;est=$qname\n";
}
|