File: blat_to_sam.pl

package info (click to toggle)
trinityrnaseq 2.2.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 212,452 kB
  • ctags: 5,067
  • sloc: perl: 45,552; cpp: 19,678; java: 11,865; sh: 1,485; makefile: 613; ansic: 427; python: 313; xml: 83
file content (125 lines) | stat: -rwxr-xr-x 2,608 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env perl

use strict;
use warnings;

use FindBin;
use Cwd;

use Getopt::Long qw(:config no_ignore_case bundling);


$ENV{LC_ALL} = 'C';

my $usage = <<_EOUSAGE_;

################################################################################################
#
# Required:
#
# --genome      genome in fasta format
# --reads       reads in fasta format
#
# Optional:
#
# --blat_params  quote-delimited params to pass to blat, eg. "-q=rna -t=dna -maxIntron=1000"  (default: "-q=rna -t=dna")
#
# --top         number of top hits (default: 10)
# --min_per_ID  minimum percent identity (default: 95)
#
##############################################################################################


_EOUSAGE_

	;


my ($genome_fa, $reads_fa);

my $blat_params = "-q=rna -t=dna";
my $top_hits = 10;
my $min_per_ID = 95;



&GetOptions( 'genome=s' => \$genome_fa,
			 'reads=s' => \$reads_fa,
			 
			 'blat_params=s' => \$blat_params,
			 
			 'top=i' => \$top_hits,
			 'min_per_ID=i' => \$min_per_ID,
	);

unless ($genome_fa && $reads_fa) {
	die $usage;
}


{
	my @required_progs = qw (blat psl2sam.pl);

	foreach my $prog (@required_progs) {
		my $path = `which $prog`;
		unless ($path =~ /^\//) {
			die "Error, cannot locate required program: $prog";
		}
	}
}



main: {

	my $util_dir = "$FindBin::RealBin/../util";
	
	my $cmd = "$util_dir/fasta_to_tab.pl $reads_fa > $reads_fa.tab";
	&process_cmd($cmd) unless (-s "$reads_fa.tab");

	$cmd = "sort -T . -S 2G -k 1,1 $reads_fa.tab > $reads_fa.tab.sort";
	&process_cmd($cmd) unless (-s "$reads_fa.tab.sort");

	# run blat
	$cmd = "blat $blat_params $genome_fa $reads_fa $reads_fa.psl";
	&process_cmd($cmd);

	# convert to sam
	$cmd = "psl2sam.pl -q 0 -r 0 $reads_fa.psl | sort -T . -S 2G -k 1,1 > $reads_fa.psl.sam";
	&process_cmd($cmd);


	# add the reads
	$cmd = "$util_dir/blat_sam_add_reads2.pl $reads_fa.psl.sam $reads_fa.tab.sort > $reads_fa.psl.sam.wReads";
	&process_cmd($cmd);

	## prune output to top matches:
	$cmd = "$util_dir/top_blat_sam_extractor.pl $reads_fa.psl.sam.wReads $top_hits $min_per_ID > $reads_fa.psl.sam.wReads.top";
	&process_cmd($cmd);

	$cmd = "$FindBin::RealBin/cigar_tweaker $reads_fa.psl.sam.wReads.top $genome_fa > $reads_fa.psl.sam.wReads.top.tweaked";
	&process_cmd($cmd);

	$cmd = "sort -T . -S 2G -k 3,3 -k 4,4n $reads_fa.psl.sam.wReads.top.tweaked > $reads_fa.psl.sam.wReads.top.tweaked.coordSorted.sam";
	&process_cmd($cmd);

	
	
	exit(0);
}

####
sub process_cmd {
	my ($cmd) = @_;

	print "CMD: $cmd\n";
	my $ret = system($cmd);

	if ($ret) {
		die "Error, cmd: $cmd died with ret $ret";
	}

	return;
}