File: example_pipeline.sh

package info (click to toggle)
pairtools 1.1.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,284 kB
  • sloc: python: 8,792; sh: 123; makefile: 68; ansic: 31
file content (70 lines) | stat: -rw-r--r-- 2,321 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env bash
if [ $# -le 3 ] ; then
    echo "Usage: bash example_pipeline.sh BWA_INDEX FASTQ_1 FASTQ_2 OUTPUT_PREFIX"
    echo ""
    echo "A example of a bash pipeline to align the sequencing data from a "
    echo "single Hi-C experiment."
    echo ""
    echo "positional arguments:"
    echo ""
    echo "BWA_INDEX       The path to a bwa index of the reference genome."
    echo "CHROM_SIZES     The path to a file with chromosome sizes."
    echo "FASTQ_1         The path to a fastq file with the sequences of "
    echo "                the first side of Hi-C molecules."
    echo "FASTQ_2         The path to a fastq file with the sequences of "
    echo "                the second side of Hi-C molecules."
    echo "OUTPUT_PREFIX   The prefix to the paths of generated outputs. "
    echo ""
    echo ""

    exit 0
fi

set -o errexit
set -o nounset
set -o pipefail

INDEX=$1
CHROM_SIZES=$2
FASTQ1=$3
FASTQ2=$4
OUTPREFIX=$5

N_THREADS=8

UNMAPPED_SAM_PATH=${OUTPREFIX}.unmapped.bam
UNMAPPED_PAIRS_PATH=${OUTPREFIX}.unmapped.pairs.gz
NODUPS_SAM_PATH=${OUTPREFIX}.nodups.bam
NODUPS_PAIRS_PATH=${OUTPREFIX}.nodups.pairs.gz
DUPS_SAM_PATH=${OUTPREFIX}.dups.bam
DUPS_PAIRS_PATH=${OUTPREFIX}.dups.pairs.gz

bwa mem -SP -t "${N_THREADS}" "${INDEX}" "${FASTQ1}" "${FASTQ2}" | {
    # Classify Hi-C molecules as unmapped/single-sided/multimapped/chimeric/etc
    # and output one line per read, containing the following, separated by \\v:
    #  * triu-flipped pairs
    #  * read id
    #  * type of a Hi-C molecule
    #  * corresponding sam entries
    pairtools parse --chroms-path "{CHROM_SIZES}"
} | {
    # Block-sort pairs together with SAM entries
    pairtools sort --nproc 4
} | {
    # Remove duplicates, separate mapped and unmapped reads 
    pairtools dedup \
        --output \
            >( pairtools split \
                --output-pairs ${NODUPS_PAIRS_PATH} \
                --output-sam ${NODUPS_SAM_PATH} ) \
        --output-dups \
            >( pairtools markasdup \
                | pairtools split \
                    --output-pairs ${DUPS_PAIRS_PATH} \
                    --output-sam ${DUPS_SAM_PATH} ) \                    
        --output-unmapped >( pairtools split \
            --output-pairs ${UNMAPPED_PAIRS_PATH} \
            --output-sam ${UNMAPPED_SAM_PATH} ) 

}