File: infer_fraglen.pl

package info (click to toggle)
bowtie 1.3.1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 15,816 kB
  • sloc: cpp: 37,094; perl: 5,806; ansic: 1,474; sh: 1,197; python: 462; makefile: 419
file content (102 lines) | stat: -rwxr-xr-x 3,109 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/perl -w

##
# infer_fraglen.pl: Infer fragment length by looking separately for
# unique alignments for the mates, then piecing those together and
# building a distribution.
#

use strict;
use warnings;
use Getopt::Long;

my $m1 = "";
my $m2 = "";
my $index = "";
my $bowtie_args = "";
my $bowtie = "bowtie";
my $debug = 0;
my $binsz = 10;

sub dieusage {
	my $msg = shift;
	my $exitlevel = shift;
	$exitlevel = $exitlevel || 1;
	print STDERR "$msg\n";
	exit $exitlevel;
}

##
# Given a basename, return true iff all index files exist.
#
sub checkIndex($) {
	my $idx = shift;
	return -f "$idx.1.ebwt" &&
	       -f "$idx.2.ebwt" &&
	       -f "$idx.3.ebwt" &&
	       -f "$idx.4.ebwt" &&
	       -f "$idx.rev.1.ebwt" &&
	       -f "$idx.rev.2.ebwt";
}

GetOptions (
	"bowtie=s"      => \$bowtie,
	"index=s"       => \$index,
	"m1=s"          => \$m1,
	"m2=s"          => \$m2,
	"debug"         => \$debug,
	"bowtie-args=s" => \$bowtie_args) || dieusage("Bad option", 1);

die "Must specify --m1" if $m1 eq "";
die "Must specify --m2" if $m2 eq "";
die "Must specify --index" if $index eq "";
$m1 =~ s/^~/$ENV{HOME}/;
$m2 =~ s/^~/$ENV{HOME}/;
$index =~ s/^~/$ENV{HOME}/;
die "Bad bowtie path: $bowtie" if system("$bowtie --version >/dev/null 2>/dev/null") != 0;
die "Bad index: $index" if !checkIndex($index);

# Hash holding all the observed fragment orientations and lengths
my %fragments = ();
my $m1cmd = ($m1 =~ /\.gz$/) ? "gzip -dc $m1" : "cat $m1";
my $m2cmd = ($m2 =~ /\.gz$/) ? "gzip -dc $m2" : "cat $m2";
my $cmd1 = "$m1cmd | $bowtie $bowtie_args -m 1 -S --sam-nohead $index - > .infer_fraglen.tmp";
my $cmd2 = "$m2cmd | $bowtie $bowtie_args -m 1 -S --sam-nohead $index - |";
system($cmd1) == 0 || die "Error running '$cmd1'";
open (M1, ".infer_fraglen.tmp") || die "Could not open '.infer_fraglen.tmp'";
open (M2, $cmd2) || die "Could not open '$cmd2'";
while(<M1>) {
	my $lm1 = $_;
	my $lm2 = <M2>;
	chomp($lm1); chomp($lm2);
	my @lms1 = split(/\t/, $lm1);
	my @lms2 = split(/\t/, $lm2);
	my ($name1, $flags1, $chr1, $off1, $slen1) = ($lms1[0], $lms1[1], $lms1[2], $lms1[3], length($lms1[9]));
	my ($name2, $flags2, $chr2, $off2, $slen2) = ($lms2[0], $lms2[1], $lms2[2], $lms2[3], length($lms2[9]));
	# One or both mates didn't align uniquely?
	next if $chr1 eq "*" || $chr2 eq "*";
	# Mates aligned to different chromosomes?
	next if $chr1 ne $chr2;
	# This pairing can be used as an observation of fragment orientation and length
	my $fw1 = (($flags1 & 16) == 0) ? "F" : "R";
	my $fw2 = (($flags2 & 16) == 0) ? "F" : "R";
	my $frag = $off2 - $off1;
	# This can overestimate if one mate is entirely subsumed in the other
	if($frag > 0) { $frag += $slen2; }
	else          { $frag -= $slen1; }
	# Install into bin
	$frag = int(($frag + ($binsz/2))/$binsz); # Round to nearest bin
	$fragments{"$fw1$fw2"}{$frag}++;
}
close(M1);
close(M2);
unlink(".infer_fraglen.tmp"); # ditch temporary file

# Print out the bins
for my $k (keys %fragments) {
	for my $k2 (sort {$a <=> $b} keys %{$fragments{$k}}) {
		print "$k, ".($k2*$binsz).", ".$fragments{$k}{$k2}."\n";
	}
}

print STDERR "DONE\n";