File: fastqc.snakefile

package info (click to toggle)
qcumber 2.3.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid
  • size: 2,276 kB
  • sloc: python: 3,097; sh: 153; makefile: 18
file content (70 lines) | stat: -rwxr-xr-x 2,832 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import sys
#--------------------------------------------< RULES >-----------------------------------------------------------------#

# Run FastQC on raw data
rule fastqc_raw:
    input:
        lambda wildcards: sample_dict[wildcards.sample]
    output:
        qc_summary=temp(fastqc_path + "/raw/{sample}_fastqc/fastqc_data.txt"),
        zip= fastqc_path + "/raw/{sample}_fastqc.zip",
        html = temp(fastqc_path + "/raw/{sample}_fastqc.html"),
        folder = temp(fastqc_path + "/raw/{sample}_fastqc"),
    message:
        "Run FastQC on raw data."
    threads:
        max_threads
    log:
        log_path + "/logfile.fastqc.txt"
    run:
        shell("fastqc {input} -o {path}/raw/ "
              "--extract -t {threads} >>  {log} 2>&1 ", path = fastqc_path )
        if ("{path}/raw/{name}_fastqc".format(name=get_name(str(input)),
                                             path=fastqc_path)
            != str(output.folder)):
                shell("rsync -r --remove-source-files"
                      " {path}/raw/{name}_fastqc/* {output.folder}",
                      name=get_name(str(input)), path=fastqc_path )
                #shell("mv {/fastqc_data.txt "
                #      "{output.qc_summary}",
                #      name=get_name(str(input)), path=fastqc_path)
                shell("mv -f {path}/raw/{name}_fastqc.html {output.html}",
                      name=get_name(str(input)), path=fastqc_path)
                shell("mv -f {path}/raw/{name}_fastqc.zip {output.zip}",
                      name=get_name(str(input)), path=fastqc_path)


rule fastqc_trimmed:
    input:
        trimming_path + "/{sample}.fastq.gz"
    output:
        fastqc_path + "/trimmed/{sample}_fastqc/fastqc_data.txt",
        fastqc_path + "/trimmed/{sample}_fastqc.zip",
        temp(fastqc_path + "/trimmed/{sample}_fastqc.html"),
        folder = temp(fastqc_path + "/trimmed/{sample}_fastqc"),
    threads:
        max_threads
    log:
        log_path + "/logfile.trimmed.fastqc.txt"
        #temp(log_path + "/{sample}.fastqc.trimmed.log")
    message:
        "Run FastQC for trimmed data."
    run:
        # print('sizes:', ' '.join(['%s:%i:' % (x,os.path.getsize(x))
        #                           for x in input]), file=sys.stderr)
        shell(
            "if [ `zcat '{input}' | head -n 1 | wc -c ` -eq 0 ];"
            "then touch {output};"
            "else fastqc {input} "  # "-Djava.awt.headless=true "
            "-o $(dirname {output.folder})"
            " --extract -t {threads} >>  {log} 2>&1;  fi ")


rule trimmomatic_stats_2_csv:
    input:
        fastqc_path + "/raw/{sample}_{read}_fastqc_data.txt"
    output:
        temp(fastqc_path + "/raw/{sample}_{read}_fastqc_stat.csv")
    run:
        pass