File: par-svtyper.sh

package info (click to toggle)
lumpy-sv 0.3.1%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 296,072 kB
  • sloc: cpp: 9,908; python: 1,768; sh: 1,384; makefile: 365; ansic: 322; perl: 58
file content (87 lines) | stat: -rw-r--r-- 2,116 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
set -euo pipefail

export vcf=$1
export bams=$2

export LINES=250

svtpar() {
    set -euo pipefail
    subvcf=$1
    lib=$TMPDIR/tmp.$mypid.$(echo $bams | md5sum | awk '{ print $1 }').lib
    svtyper -B $bams -i $subvcf -l $lib
    # vcf is a temporary file so clean it up.
    # but for the first time, it's and fd from the header so the rm will fail.
    set +e
    tail -1 $subvcf 2>/dev/null | awk '{ print "svtyped through:" $1":"$2 }' > /dev/stderr
    rm -f $subvcf >/dev/null 2>&1
}
export -f svtpar

genvcfs() {
    set -euo pipefail
    bams=$1
    nth=0
    tmpvcf=$TMPDIR/tmp.$$.$nth.vcf
    echo -e "$header" > $tmpvcf
    k=0
    # run this once to force the calculation of the bam stats.
    # this avoids a race condition.
    svtpar <(echo -e "$header")
    while read -r line || [[ -n "$line" ]]; do
        k=$((k + 1))
        echo -e "$line" >> $tmpvcf
        if [[ "$k" -eq "$LINES" ]]; then
            # echo the bam and the vcf so they can be sent to svtyper.
            echo "$tmpvcf"
            nth=$((nth + 1))
            tmpvcf=$TMPDIR/tmp.$$.$nth.vcf
            echo -e "$header" > $tmpvcf
            k=0;
        fi 
    done < "$vcf"
    echo "$tmpvcf"
}

vcfsort() {
     awk 'BEGIN{x=0;} $0 ~/^#/{ if(x==0) {print;} if($1 == "#CHROM"){x=1}; next;} {x=1; print $0 | "LC_ALL=C sort --buffer-size 2G -k1,1 -k2,2n" }'
}


export -f genvcfs

if [[ -z ${TMPDIR+x} ]]; then
       export TMPDIR=/tmp/
fi
export mypid=$$

cleanup() {
   rm -f $TMPDIR/tmp.$mypid.*.vcf
   rm -f $TMPDIR/tmp.$mypid.*.lib
}

trap cleanup EXIT

# add gargs to path
export PATH=$PATH:.

# get gargs if it doesn't exist
type gargs >/dev/null 2>&1 || {
    if [ "$(uname)" == "Darwin" ]; then
        wget -O gargs https://github.com/brentp/gargs/releases/download/v0.3.5/gargs_darwin
    else
        wget -O gargs https://github.com/brentp/gargs/releases/download/v0.3.5/gargs_linux
    fi
    chmod +x ./gargs
}

psvtyper() {
    set -euo pipefail
    export header=$(head -2000 $vcf | grep ^#)
    genvcfs $bams | gargs -p $THREADS "svtpar {}" | vcfsort
}

mkdir -p $TMPDIR

export -f psvtyper
psvtyper