File: create-bam-test

package info (click to toggle)
bcftools 1.16-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 20,252 kB
  • sloc: ansic: 60,589; perl: 5,818; python: 587; sh: 333; makefile: 284
file content (114 lines) | stat: -rwxr-xr-x 4,002 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env perl
#
# Author: petr.danecek@sanger
#

use strict;
use warnings;
use Carp;

my $opts = parse_params();
create_test($opts);

exit;

#--------------------------------

sub error
{
    my (@msg) = @_;
    if ( scalar @msg ) { confess @msg; }
    print 
        "Usage: create-bam-test [OPTIONS]\n",
        "Options:\n",
        "   -b, --bam FILE                  \n",
        "   -f, --fa-ref FILE               \n",
        "   -k, --keep-samples              \n",
        "   -o, --output-prefix PATH        \n",
        "   -O, --offset INT                \n",
        "   -r, --region REGION             \n",
        "   -h, -?, --help                  This help message.\n",
        "\n";
    exit -1;
}
sub parse_params
{
    my $opts = {};
    if ( -t STDIN && !@ARGV ) { error(); }
    while (defined(my $arg=shift(@ARGV)))
    {
        if ( $arg eq '-b' || $arg eq '--bam' ) { $$opts{bam}=shift(@ARGV); next }
        if ( $arg eq '-f' || $arg eq '--fa-ref' ) { $$opts{ref}=shift(@ARGV); next }
        if ( $arg eq '-k' || $arg eq '--keep-samples' ) { $$opts{keep_samples}=1; next }
        if ( $arg eq '-o' || $arg eq '--output-prefix' ) { $$opts{out}=shift(@ARGV); next }
        if ( $arg eq '-O' || $arg eq '--offset' ) { $$opts{offset}=shift(@ARGV); next }
        if ( $arg eq '-r' || $arg eq '--region' ) { $$opts{region}=shift(@ARGV); next }
        if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); }
        error("Unknown parameter \"$arg\". Run -h for help.\n");
    }
    if ( !exists($$opts{bam}) ) { error("Missing the -b option.\n") }
    if ( !exists($$opts{ref}) ) { error("Missing the -f option.\n") }
    if ( !exists($$opts{out}) ) { error("Missing the -o option.\n") }
    if ( !exists($$opts{region}) ) { error("Missing the -r option.\n") }
    return $opts;
}
sub create_test
{
    my ($opts) = @_;
    if ( !($$opts{region}=~/^([^:]+):(\d+)-(\d+)$/) ) { error("Could not parse the region: $$opts{region}"); }
    my %readnames = ();
    my $chr = $1;
    my $beg = $2;
    my $end = $3;
    my $ori_beg = $beg;
    my $ori_end = $end;
    my @reads  = ();
    my $cmd = "samtools view -h $$opts{bam} $$opts{region}";
    open(my $out,'>',"$$opts{out}.sam") or error("$$opts{out}.sam: $!");
    open(my $fh,"$cmd |") or error("$cmd: $!");
    while (my $line=<$fh>)
    {
        my @vals = split(/\t/,$line);
        if ( $vals[0] eq '@HD' ) { print $out $line; next; }
        if ( $$opts{keep_samples} && $vals[0] eq '@RG' ) { print $out $line; next; }
        if ( $vals[0] =~ /^\@/ ) { next; }
        if ( $vals[3] < $beg ) { $beg = $vals[3]; }
        if ( $vals[3] + length($vals[9]) > $end ) { $end = $vals[3] + length($vals[9]); }
        my $mate_mapped = 1;
        if ( $vals[6] ne $chr && $vals[6] ne '=' ) { $vals[6] = '='; $mate_mapped = 0; }
        if ( $vals[7] < $beg or $vals[7] > $end ) { $mate_mapped = 0; }
        if ( !$mate_mapped ) { $vals[1] &= ~0x8; $vals[6] = '*'; $vals[7] = $vals[3];  }

        chomp($vals[-1]);
        if ( !$$opts{keep_samples} )
        {
            my $i;
            for ($i=0; $i<@vals; $i++) { if ($vals[$i]=~/^RG:Z:/) { last; } }
            if ( $i!=@vals ) { splice(@vals,$i,1); }
            push @vals,"RG:Z:rg";
        }
        push @reads, \@vals;
    }
    close($fh) or error("close failed: $cmd");

    print $out "\@SQ\tSN:$chr\tLN:".($end-$beg+1)."\n";
    if ( !$$opts{keep_samples} ) { print $out "\@RG\tID:rg\tSM:sample\n"; }

    if ( exists($$opts{offset}) ) { $beg = abs($$opts{offset}) + 1; }

    for my $read (@reads)
    {
        $$read[3] -= $beg - 1;
        $$read[7] -= $beg - 1;
        print $out join("\t",@$read)."\n";
    }
    close($out) or error("close failed: $$opts{out}.sam");

    `samtools view -b $$opts{out}.sam -o $$opts{out}.bam`;
    `samtools index $$opts{out}.bam`;
    `samtools faidx $$opts{ref} $chr:$beg-$end | sed 's,:, ,' > $$opts{out}.fa`;
    `samtools faidx $$opts{out}.fa`;

    print "offset\t-".($beg-1)."\n";
}