File: run_read_simulator_per_gene.pl

package info (click to toggle)
trinityrnaseq 2.2.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 212,452 kB
  • ctags: 5,067
  • sloc: perl: 45,552; cpp: 19,678; java: 11,865; sh: 1,485; makefile: 613; ansic: 427; python: 313; xml: 83
file content (102 lines) | stat: -rwxr-xr-x 2,187 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env perl

use strict;
use warnings;

use lib ($ENV{EUK_MODULES});
use Fasta_reader;
use FindBin;


my $usage = "usage: $0 file.fasta [max_genes]\n\n";

my $fasta_file = $ARGV[0] or die $usage;
my $max_genes = $ARGV[1];


main: {

    my $sim_out_dir = "sim_AS_data";
    unless (-d $sim_out_dir) {
        mkdir $sim_out_dir or die $!;
    }

    my $fasta_reader = new Fasta_reader($fasta_file);

    my %gene_to_seqs;

    while (my $seq_obj = $fasta_reader->next()) {

        my $acc = $seq_obj->get_accession();
        
        my ($trans, $gene) = split(/;/, $acc);
        
        unless ($gene) {
            die "Error, need trans;gene  format for accession: $acc";
        }

        my $sequence = $seq_obj->get_sequence();

        push (@{$gene_to_seqs{$gene}}, { acc => $acc,
                                         seq => $sequence, });
        
    }

   
    my $gene_counter = 0;
    ## only including those entries that are alt-spliced
    foreach my $gene (keys %gene_to_seqs) {

        my @trans = @{$gene_to_seqs{$gene}};

        if (scalar @trans == 1) {
            next;
        }
        

        my $outdir = $gene;
        $outdir =~ s/\W/_/g;
        
        mkdir ("$sim_out_dir/$outdir") or die $!;
        
        my $template_file = "$sim_out_dir/$outdir/$outdir.template.fa";
        open (my $ofh, ">$template_file") or die "Error, cannot write to $template_file";
        foreach my $entry (@trans) {
            my ($acc, $sequence) = ($entry->{acc}, $entry->{seq});
            print $ofh ">$acc\n$sequence\n";
        }
        close $ofh;
        
        my $outfile = "$sim_out_dir/$outdir/$outdir.reads.fa";
        
        my $cmd = "$FindBin::RealBin/../simulate_illuminaPE_from_transcripts.pl --transcripts $template_file --SS > $outfile";
        &process_cmd($cmd);
        
        $gene_counter++;

        if ($max_genes && $gene_counter >= $max_genes) {
            last;
        }

        
    }

    exit(0);
    
}

####
sub process_cmd {
    my ($cmd) = @_;

    print STDERR "CMD: $cmd\n";

    my $ret = system($cmd);
    if ($ret) {
        die "Error, cmd: $cmd died with ret $ret";
    }

    return;
}