File: stacks-count-reads-per-sample-per-locus

package info (click to toggle)
stacks 2.55%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 2,420 kB
  • sloc: cpp: 38,596; perl: 1,337; sh: 539; python: 497; makefile: 144
file content (35 lines) | stat: -rwxr-xr-x 1,040 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/env bash
set -e -o pipefail

out_path=reads_per_sample_per_locus.tsv
usage="\
usage: $(basename "${BASH_SOURCE[0]}") STACKS_DENOVO_DIR

Reads all samples' \"matches.bam\" files to tally the number of samples present
and number of reads present at all loci in the catalog. Then, runs
stacks-hist2d-loci-samples-coverage.

Outputs to '$out_path' and '$out_path.pdf'.
"

if (( $# != 1 )) || [[ "$1" == -h ]] || [[ "$1" == --help ]] ;then
    echo -n "$usage" >&2
    exit 1
fi
cd "$1" || exit
command -v samtools >/dev/null || { samtools; exit; }
command -v Rscript >/dev/null || { Rscript; exit; }

ls ./*.matches.bam >/dev/null || exit
for f in *.matches.bam ;do
    sample=${f%.matches.bam}
    samtools view -f0x40 $f |
        cut -f3 |
        sed $'s/$/\t'"$sample"'/'
done | Rscript -e "\
    t = table(read.table('stdin'));\
    rownames(t) = paste0('c',rownames(t));\
    write.table(t,sep='\t',quote=F,file='$out_path')"

# Plot the table.
"$(dirname "${BASH_SOURCE[0]}")/stacks-hist2d-loci-samples-coverage" "$out_path"