1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
|
#!/usr/bin/env perl
use strict;
use warnings;
use FindBin;
use lib ("$FindBin::RealBin/../../PerlLib");
use Gene_obj;
my $usage = "usage: $0 file.gmap (BED|GTF)\n\n";
my $gmap_file = $ARGV[0] or die $usage;
my $out_format = $ARGV[1] or die $usage;
unless ($out_format =~ /^(BED|GTF)$/) {
die $usage;
}
my %data;
open (my $fh, $gmap_file) or die $!;
while (<$fh>) {
if (/^>(\S+)/) {
if (%data) {
&format_output(%data);
}
%data = ();
my $transcript_acc = $1;
%data = (acc => $transcript_acc,
segments => {}, # end5 => end3
);
}
elsif (/\s*([\+\-])([^\:]+):(\d+)-(\d+)\s+\((\d+)-(\d+)\)\s+(\d[^\%]+\%)/) {
#print " $transcript_acc\t$_";
my $orient = $1;
my $genome_acc = $2;
my $genome_end5 = $3;
my $genome_end3 = $4;
my $transcript_end5 = $5;
my $transcript_end3 = $6;
my $percent_identity = $7;
$data{segments}->{$genome_end5} = $genome_end3;
$data{scaff} = $genome_acc;
}
}
if (%data) {
&format_output(%data);
}
exit(0);
####
sub format_output {
my %data = @_;
my $gene_obj = new Gene_obj();
my $coords_href = $data{segments};
my $acc = $data{acc};
my $genome_scaff = $data{scaff};
if (%$coords_href) {
$gene_obj->populate_gene_object($coords_href, $coords_href);
$gene_obj->{com_name} = $acc;
$gene_obj->{asmbl_id} = $genome_scaff;
$gene_obj->{TU_feat_name} = "g|$acc";
$gene_obj->{Model_feat_name} = "t|$acc";
if ($out_format eq "BED") {
print $gene_obj->to_BED_format();
}
elsif ($out_format eq "GTF") {
print $gene_obj->to_transcript_GTF_format() . "\n";
}
else {
die "Error, cannot process format $out_format";
# shouldn't get here if proper option values processed at top.
}
}
return;
}
|