1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
|
#!/usr/bin/env perl
#
# Author: petr.danecek@sanger
#
use strict;
use warnings;
use Carp;
my $opts = parse_params();
create_test($opts);
exit;
#--------------------------------
sub error
{
my (@msg) = @_;
if ( scalar @msg ) { confess @msg; }
print
"Usage: create-bam-test [OPTIONS]\n",
"Options:\n",
" -b, --bam FILE \n",
" -f, --fa-ref FILE \n",
" -k, --keep-samples \n",
" -o, --output-prefix PATH \n",
" -O, --offset INT \n",
" -r, --region REGION \n",
" -h, -?, --help This help message.\n",
"\n";
exit -1;
}
sub parse_params
{
my $opts = {};
if ( -t STDIN && !@ARGV ) { error(); }
while (defined(my $arg=shift(@ARGV)))
{
if ( $arg eq '-b' || $arg eq '--bam' ) { $$opts{bam}=shift(@ARGV); next }
if ( $arg eq '-f' || $arg eq '--fa-ref' ) { $$opts{ref}=shift(@ARGV); next }
if ( $arg eq '-k' || $arg eq '--keep-samples' ) { $$opts{keep_samples}=1; next }
if ( $arg eq '-o' || $arg eq '--output-prefix' ) { $$opts{out}=shift(@ARGV); next }
if ( $arg eq '-O' || $arg eq '--offset' ) { $$opts{offset}=shift(@ARGV); next }
if ( $arg eq '-r' || $arg eq '--region' ) { $$opts{region}=shift(@ARGV); next }
if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
error("Unknown parameter \"$arg\". Run -h for help.\n");
}
if ( !exists($$opts{bam}) ) { error("Missing the -b option.\n") }
if ( !exists($$opts{ref}) ) { error("Missing the -f option.\n") }
if ( !exists($$opts{out}) ) { error("Missing the -o option.\n") }
if ( !exists($$opts{region}) ) { error("Missing the -r option.\n") }
return $opts;
}
sub create_test
{
my ($opts) = @_;
if ( !($$opts{region}=~/^([^:]+):(\d+)-(\d+)$/) ) { error("Could not parse the region: $$opts{region}"); }
my %readnames = ();
my $chr = $1;
my $beg = $2;
my $end = $3;
my $ori_beg = $beg;
my $ori_end = $end;
my @reads = ();
my $cmd = "samtools view -h $$opts{bam} $$opts{region}";
open(my $out,'>',"$$opts{out}.sam") or error("$$opts{out}.sam: $!");
open(my $fh,"$cmd |") or error("$cmd: $!");
while (my $line=<$fh>)
{
my @vals = split(/\t/,$line);
if ( $vals[0] eq '@HD' ) { print $out $line; next; }
if ( $$opts{keep_samples} && $vals[0] eq '@RG' ) { print $out $line; next; }
if ( $vals[0] =~ /^\@/ ) { next; }
if ( $vals[3] < $beg ) { $beg = $vals[3]; }
if ( $vals[3] + length($vals[9]) > $end ) { $end = $vals[3] + length($vals[9]); }
my $mate_mapped = 1;
if ( $vals[6] ne $chr && $vals[6] ne '=' ) { $vals[6] = '='; $mate_mapped = 0; }
if ( $vals[7] < $beg or $vals[7] > $end ) { $mate_mapped = 0; }
if ( !$mate_mapped ) { $vals[1] &= ~0x8; $vals[6] = '*'; $vals[7] = $vals[3]; }
chomp($vals[-1]);
if ( !$$opts{keep_samples} )
{
my $i;
for ($i=0; $i<@vals; $i++) { if ($vals[$i]=~/^RG:Z:/) { last; } }
if ( $i!=@vals ) { splice(@vals,$i,1); }
push @vals,"RG:Z:rg";
}
push @reads, \@vals;
}
close($fh) or error("close failed: $cmd");
print $out "\@SQ\tSN:$chr\tLN:".($end-$beg+1)."\n";
if ( !$$opts{keep_samples} ) { print $out "\@RG\tID:rg\tSM:sample\n"; }
if ( exists($$opts{offset}) ) { $beg = abs($$opts{offset}) + 1; }
for my $read (@reads)
{
$$read[3] -= $beg - 1;
$$read[7] -= $beg - 1;
print $out join("\t",@$read)."\n";
}
close($out) or error("close failed: $$opts{out}.sam");
`samtools view -b $$opts{out}.sam -o $$opts{out}.bam`;
`samtools index $$opts{out}.bam`;
`samtools faidx $$opts{ref} $chr:$beg-$end | sed 's,:, ,' > $$opts{out}.fa`;
`samtools faidx $$opts{out}.fa`;
print "offset\t-".($beg-1)."\n";
}
|