File: vcfregionreduce_and_cut

package info (click to toggle)
libvcflib 1.0.12%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 70,520 kB
  • sloc: cpp: 39,837; python: 532; perl: 474; ansic: 317; ruby: 295; sh: 254; lisp: 148; makefile: 123; javascript: 94
file content (33 lines) | stat: -rwxr-xr-x 1,015 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/bin/bash
# Reduce, gzip, and tabix

if [ $# -ne 2 ];
then
    echo "Usage: $0 [region file] [directory]"
    echo
    echo "Generates \`basename directory\`.vcf.gz and \`basename directory\`.sites.vcf.gz"
    echo "which are the concatenation of files in the directory named [directory]/[region1].vcf.gz,"
    echo "[directory]/[region2].vcf.gz, etc. in the order in which they occur in the region file."
    echo
    echo "Tabix indexes are simultaneously generated."
    exit 1
fi

regionfile=$1
mergedir=$2
mergename=$(basename $mergedir)
vcfgenotypes=$mergename.vcf.gz
vcfsites=$mergename.sites.vcf.gz

regions=$(cat $regionfile)

firstfile=$mergedir/$(echo $regions | cut -f 1 -d\  ).vcf.gz
files=$(for region in $regions; do echo $mergedir/$region.vcf.gz; done)

( zcat $firstfile | head -1000 | grep ^#
for file in $files
do
    zcat $file | grep -v "^#"
done ) | uniq | pee \
        "bgzip >$vcfgenotypes && tabix -p vcf $vcfgenotypes" \
        "cut -f -8 | bgzip >$vcfsites && tabix -p vcf $vcfsites"