1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
|
#!/usr/bin/perl
# convert a utr.gff from makeUtrTrainingSet.pl to a gbrowse file
# Mario Stanke, 9.4.2008
#
# example input:
#
# chromosome1 makeUtr 5'-UTR 43947 44004 128 - . transcript_id "g6.t1"; gene_id "g6";
# chromosome1 makeUtr 5'-UTR 44317 44476 128 - . transcript_id "g6.t1"; gene_id "g6";
# chromosome1 makeUtr 3'-UTR 43022 43173 46 - . transcript_id "g7.t1"; gene_id "g7";
# chromosome1 makeUtr 3'-UTR 43436 43609 46 - . transcript_id "g7.t1"; gene_id "g7";
# chromosome1 makeUtr 3'-UTR 43837 43995 46 - . transcript_id "g7.t1"; gene_id "g7";
#
# example output:
#
# chromosome1 makeUtr 5UTREX 43947 44004 128 - . 5UTR g6.t1
# chromosome1 makeUtr 5UTREX 44317 44476 128 - . 5UTR g6.t1
# chromosome1 makeUtr 5UTR 43947 44476 128 - . 5UTR g6.t1
# chromosome1 makeUtr 3UTREX 43022 43173 46 - . 3UTR g7.t1
# chromosome1 makeUtr 3UTREX 43436 43609 46 - . 3UTR g7.t1
# chromosome1 makeUtr 3UTREX 43837 43995 46 - . 3UTR g7.t1
# chromosome1 makeUtr 3UTR 43022 43995 46 - . 3UTR g7.t1
my ($gene, $oldgene);
my ($type, $newtype);
my $start = -1;
my $end = -1;
my $score = -1;
my $seqname;
my $source;
my $strand;
while(<>){
@f = split /\t/;
next unless @f>7;
if ($f[8] =~ /transcript_id "(\S+)"/) {
$newgene = $1;
if ($f[2] =~ /^5/) {
$newtype = 5;
} else {
$newtype = 3;
}
if ((defined($gene) && defined($type)) && ($gene ne $newgene || $type ne $newtype)) {
if ($type == 5) {
print "$seqname\t$source\t5UTR\t$start\t$end\t$score\t$strand\t.\t5UTR $gene\n";
} else {
print "$seqname\t$source\t3UTR\t$start\t$end\t$score\t$strand\t.\t3UTR $gene\n";
}
$start = $end = -1;
}
if ($newtype == 5) {
print "$f[0]\t$f[1]\t5UTREX\t$f[3]\t$f[4]\t$f[5]\t$f[6]\t$f[7]\t5UTR $newgene\n";
} else {
print "$f[0]\t$f[1]\t3UTREX\t$f[3]\t$f[4]\t$f[5]\t$f[6]\t$f[7]\t3UTR $newgene\n";
}
$seqname = $f[0];
$source = $f[1];
$start = $f[3] unless ($start != -1 && $start < $f[3]);
$end = $f[4] unless ($end != -1 && $end > $f[4]);
$score = $f[5];
$strand = $f[6];
$gene = $newgene;
$type = $newtype;
}
}
|