File: extract_FL_subset.pl

package info (click to toggle)
transdecoder 3.0.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 11,356 kB
  • ctags: 274
  • sloc: perl: 5,454; sh: 81; makefile: 51
file content (134 lines) | stat: -rwxr-xr-x 2,967 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env perl

=pod

=head1 NAME

extract_FL_subset.pl

=head1 USAGE

Required:

 --transcripts, -t <string>                            transcripts.fasta

 --gff3, -g <string>                            transcripts.fasta.transdecoder.gff3

=cut


use strict;
use warnings;
use FindBin;
use Pod::Usage;
use Getopt::Long qw(:config posix_default no_ignore_case bundling pass_through);
use Data::Dumper;
use File::Basename;

use lib ("$FindBin::RealBin/../PerlLib");
use Gene_obj;


my $UTIL_DIR = "$FindBin::RealBin";

my $help;

my $transcripts_file;
my $gff3_file;

&GetOptions( 'transcripts|t=s' => \$transcripts_file,
             'gff3|g=s' => \$gff3_file,

             'h' => \$help,
    );



pod2usage(-verbose => 2, -output => \*STDERR) if ($help || ! ($transcripts_file && $gff3_file));

if (@ARGV) {
    die "Error, don't understand options: @ARGV";
}


main: {
    
    my $output_prefix = basename($transcripts_file);

    # get the FL entries:
    my $FL_accs_file = "$output_prefix.complete_only";
    my $cmd = "$UTIL_DIR/get_FL_accs.pl $gff3_file > $FL_accs_file";
    &process_cmd($cmd);

    # index the current gff file:
    $cmd = "$UTIL_DIR/index_gff3_files_by_isoform.pl $gff3_file";
    &process_cmd($cmd);
        
    # retrieve the best entries:
    my $complete_gff3_filename = $FL_accs_file . ".gff3";
    $cmd = "$UTIL_DIR/gene_list_to_gff.pl $FL_accs_file $gff3_file.inx > $FL_accs_file.gff3";
    &process_cmd($cmd);
    
    
    ##############################
    ## Generate the final outputs.
    ##############################
    
    {
                        
        ## write final outputs:
        $gff3_file = "$FL_accs_file.gff3";
        

        ## make a BED file for viewing in IGV
        my $bed_file = $gff3_file;
        $bed_file =~ s/\.gff3$/\.bed/;
        $cmd = "$UTIL_DIR/gff3_file_to_bed.pl $gff3_file > $bed_file";
        &process_cmd($cmd);
        
    
        # make a peptide file:
        my $best_pep_file = $gff3_file;
        $best_pep_file =~ s/\.gff3$/\.pep/;
        $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file > $best_pep_file";
        &process_cmd($cmd);
        
        

        # make a CDS file:
        my $best_cds_file = $best_pep_file;
        $best_cds_file =~ s/\.pep$/\.cds/;
        $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file CDS > $best_cds_file";
        &process_cmd($cmd);
        
        # make a CDS file:
        my $best_cdna_file = $best_pep_file;
        $best_cdna_file =~ s/\.pep$/\.mRNA/;
        $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file cDNA > $best_cdna_file";
        &process_cmd($cmd);
    
    }
    
    print STDERR "Done. See output files $FL_accs_file.\*\n\n\n";
    
    
    
    exit(0);
}


####
sub process_cmd {
	my ($cmd) = @_;

	print "CMD: $cmd\n";
	my $ret = system($cmd);

	if ($ret) {
		die "Error, cmd: $cmd died with ret $ret";
	}
	
	return;

}