File: create-bam-test

package info (click to toggle)
bcftools 1.9-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 12,820 kB
  • sloc: ansic: 44,107; perl: 4,644; python: 471; sh: 333; makefile: 226
file content (99 lines) | stat: -rwxr-xr-x 3,203 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env perl
#
# Author: petr.danecek@sanger
#

use strict;
use warnings;
use Carp;

my $opts = parse_params();
create_test($opts);

exit;

#--------------------------------

sub error
{
    my (@msg) = @_;
    if ( scalar @msg ) { confess @msg; }
    print 
        "Usage: create-bam-test [OPTIONS]\n",
        "Options:\n",
        "   -b, --bam <file.bam>            \n",
        "   -f, --fa-ref <ref.fa>           \n",
        "   -o, --output-prefix <string>    \n",
        "   -r, --region <region>           \n",
        "   -h, -?, --help                  This help message.\n",
        "\n";
    exit -1;
}
sub parse_params
{
    my $opts = {};
    while (defined(my $arg=shift(@ARGV)))
    {
        if ( $arg eq '-b' || $arg eq '--bam' ) { $$opts{bam}=shift(@ARGV); next }
        if ( $arg eq '-f' || $arg eq '--fa-ref' ) { $$opts{ref}=shift(@ARGV); next }
        if ( $arg eq '-o' || $arg eq '--output-prefix' ) { $$opts{out}=shift(@ARGV); next }
        if ( $arg eq '-r' || $arg eq '--region' ) { $$opts{region}=shift(@ARGV); next }
        if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
        error("Unknown parameter \"$arg\". Run -h for help.\n");
    }
    if ( !exists($$opts{bam}) ) { error("Missing the -b option.\n") }
    if ( !exists($$opts{ref}) ) { error("Missing the -f option.\n") }
    if ( !exists($$opts{out}) ) { error("Missing the -o option.\n") }
    if ( !exists($$opts{region}) ) { error("Missing the -r option.\n") }
    return $opts;
}
sub create_test
{
    my ($opts) = @_;
    if ( !($$opts{region}=~/^([^:]+):(\d+)-(\d+)$/) ) { error("Could not parse the region: $$opts{region}"); }
    my $chr = $1;
    my $beg = $2;
    my $end = $3;
    my $ori_beg = $beg;
    my $ori_end = $end;
    my @reads  = ();
    my $cmd = "samtools view -h $$opts{bam} $$opts{region}";
    open(my $out,'>',"$$opts{out}.sam") or error("$$opts{out}.sam: $!");
    open(my $fh,"$cmd |") or error("$cmd: $!");
    while (my $line=<$fh>)
    {
        my @vals = split(/\t/,$line);
        if ( $vals[0] eq '@HD' ) { print $out $line; next; }
        if ( $vals[0] =~ /^\@/ ) { next; }
        if ( $vals[3] < $beg ) { $beg = $vals[3]; }
        if ( $vals[7] < $beg ) { $beg = $vals[7]; }
        if ( $vals[3] + length($vals[9]) > $end ) { $end = $vals[3] + length($vals[9]); }
        my $i;
        for ($i=0; $i<@vals; $i++) { if ($vals[$i]=~/^RG:Z:/) { last; } }
        if ( $i!=@vals ) { splice(@vals,$i,1); }
        chomp($vals[-1]);
        push @reads, \@vals;
    }
    close($fh) or error("close failed: $cmd");

    print $out "\@SQ\tSN:$chr\tLN:".($end-$beg+1)."\n";
    print $out "\@RG\tID:rg\tSM:sample\n";

    for my $read (@reads)
    {
        $$read[3] -= $beg - 1;
        $$read[7] -= $beg - 1;
        print $out join("\t",@$read,"RG:Z:rg")."\n";
    }
    close($out) or error("close failed: $$opts{out}.sam");

    `samtools view -b $$opts{out}.sam -o $$opts{out}.bam`;
    `samtools index $$opts{out}.bam`;
    `samtools faidx $$opts{ref} $chr:$beg-$end | sed 's,:, ,' > $$opts{out}.fa`;
    `samtools faidx $$opts{out}.fa`;

    $ori_beg -= $beg - 1;
    $ori_end -= $beg - 1;
    print "$chr:$ori_beg-$ori_end\n";
}