1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
|
#!/usr/bin/env perl
use strict;
use warnings;
use FindBin;
use lib ("$FindBin::Bin/../PerlLib");
use Gene_obj;
use Gene_obj_indexer;
use GFF3_utils;
use Carp;
my $usage = "\n\nusage: $0 gff3_file\n\n";
my $gff3_file = $ARGV[0] or die $usage;
main: {
my $gene_obj_indexer_href = {};
my $asmbl_id_to_gene_list_href = &GFF3_utils::index_GFF3_gene_objs($gff3_file, $gene_obj_indexer_href);
my %seen;
foreach my $asmbl_id (sort keys %$asmbl_id_to_gene_list_href) {
my @gene_ids = @{$asmbl_id_to_gene_list_href->{$asmbl_id}};
#print "ASMBL: $asmbl_id, gene_ids: @gene_ids\n";
my @gene_entries;
foreach my $gene_id (@gene_ids) {
my $gene_obj = $gene_obj_indexer_href->{$gene_id};
my $cds_token = &get_CDS_token($gene_obj);
if (! $seen{$cds_token}) {
print $gene_obj->to_GFF3_format(source => "transdecoder") . "\n";
$seen{$cds_token} = 1;
}
else {
print STDERR "-ignoring entry $cds_token, already represented by another transcript\n";
}
}
}
exit(0);
}
####
sub get_CDS_token {
my ($gene_obj) = @_;
my $cds_text = $gene_obj->{asmbl_id};
my @exons = $gene_obj->get_exons();
foreach my $exon (@exons) {
if (my $cds_obj = $exon->get_CDS_obj()) {
my ($cds_end5, $cds_end3) = $cds_obj->get_coords();
$cds_text .= ":$cds_end5-$cds_end3";
}
}
return($cds_text);
}
|