File: Snakefile

package info (click to toggle)
pizzly 0.37.3%2Bds-9
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, trixie
  • size: 476 kB
  • sloc: cpp: 1,458; python: 66; sh: 29; makefile: 11
file content (83 lines) | stat: -rw-r--r-- 1,963 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os

PRE = "transcripts"
FASTA = "{0}.fasta.gz".format(PRE)
GTF = "{0}.gtf.gz".format(PRE)
INDEX = "{0}.kidx".format(PRE)
K = 31


ZCAT = 'gzcat' if os.uname()[0] == 'Darwin' else 'zcat'

rule all:
    input:
        "kallisto_out/abundance.h5",
        "pizzly_out/output.json",
        "pizzly_post/abundance.h5"

rule index:
    input: FASTA
    output: INDEX
    shell:
        "kallisto index -k {K} -i {output} {input}"

rule kallisto_quant:
    input:
        "reads_1.fastq.gz",
        "reads_2.fastq.gz",
        INDEX
    output:
        "kallisto_out",
        "kallisto_out/abundance.h5",
        "kallisto_out/abundance.tsv",
        "kallisto_out/run_info.json",
        "kallisto_out/fusion.txt"
    shell:
        "kallisto quant "
        "-i {INDEX} "
        "-o {output[0]} "
        "--fusion "
        "{input[0]} {input[1]}"

rule pizzly:
    input:
        FASTA,
        "kallisto_out/fusion.txt"
    output:
        "pizzly_out/output.json",
        "pizzly_out/output.fusions.fasta"
    shell:
        "../build/pizzly "
        "-k {K} "
        "--gtf {GTF} "
        "--cache cache.txt "
        "--align-score 2 "
        "--insert-size 400 "
        "--fasta {FASTA} "
        "--output pizzly_out/output "
        "kallisto_out/fusion.txt "

rule append_index:
    input:
        FASTA,
        "pizzly_out/output.fusions.fasta"
    output:
        "pizzly_post/transcripts_with_fusions.fasta.gz",
        "pizzly_post/transcripts_with_fusions.kidx"
    shell:
        "cat <({ZCAT} {FASTA}) {input[1]} | gzip - > {output[0]} && "
        "kallisto index -k {K} -i {output[1]} {output[0]}"

rule requant_kallisto:
    input:
        "reads_1.fastq.gz",
        "reads_2.fastq.gz",
        "pizzly_post/transcripts_with_fusions.kidx"
    output:
        "pizzly_post",
        "pizzly_post/abundance.h5"
    shell:
        "kallisto quant "
        "-i {input[2]} "
        "-o {output[0]} "
        "{input[0]} {input[1]}"