File: mapping.snakefile

package info (click to toggle)
qcumber 2.3.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid
  • size: 2,276 kB
  • sloc: python: 3,097; sh: 153; makefile: 18
file content (105 lines) | stat: -rwxr-xr-x 4,858 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#################
#   MAPPING     #
#################

from utils import calculate_insert_length

def get_bowtie2_result(filename):
    mapping_dict = OrderedDict()
    mapping_dict["Bowtie2_log"] = ""
    mapping_dict["#AlignedReads"] = 0
    with open(filename, "r") as logfile:
        for line in logfile.readlines():
            pattern1 = re.match(
                ".* (?P<aligned_exact>\d+) \((?P<percent_aligned_exact>\d+.\d+)%\) aligned exactly 1 time", line)
            pattern2 = re.match(
                ".* (?P<aligned_more_than_one>\d+) \((?P<percent_aligned_more_than_one>\d+.\d+)%\) aligned >1 times",
                line)
            pattern3 = re.match(
                "(?P<overall>\d+\.\d+)\% overall alignment rate",
                line)
            if pattern1:
                mapping_dict["#AlignedReads"] += int(pattern1.group("aligned_exact"))
                #mapping_dict["%AlignedReads"] = float(pattern1.group("percent_aligned_exact").replace(",", "."))
            elif pattern2:
                mapping_dict["#AlignedReads"] += int(pattern2.group("aligned_more_than_one"))
                #mapping_dict["%AlignedReads"] = float(pattern2.group("percent_aligned_more_than_one"))
            elif pattern3:
                mapping_dict["%AlignedReads"] = float(pattern3.group("overall"))
            mapping_dict["Bowtie2_log"] += line

    return mapping_dict

if not config["nomapping"]:
    if config["index"]:
        index_file = config["index"]
    else:
        index_file = mapping_path + "/%s.bowtie2" %basename(config["reference"])

        rule bowtie_index:
            input:
                config["reference"]
            output:
                temp(mapping_path  +"/{ref,.*bowtie2}.1.bt2"),
                temp(mapping_path  +"/{ref,.*bowtie2}.2.bt2"),
                temp(mapping_path  +"/{ref,.*bowtie2}.3.bt2"),
                temp(mapping_path  +"/{ref,.*bowtie2}.4.bt2"),
                temp(mapping_path  +"/{ref,.*bowtie2}.rev.1.bt2"),
                temp(mapping_path  +"/{ref,.*bowtie2}.rev.2.bt2")
            log:
                log_path  +"/logfile.mapping.log"
            message:
                "Building bt2 index for mapping."
            shell:
                "bowtie2-build {input} %s/{wildcards.ref}  1>&2>> {log}" % mapping_path

    if config["save_mapping"]:
        samfile = mapping_path  + "/{sample}.sam"
    else:
        samfile = temp(mapping_path  + "/{sample}.sam")

    rule bowtie_mapping:
        input:
            trimmed_fastq = get_all_reads,
            check_index = expand(index_file + ".{index}.bt2" , index = [1,2,3,4,"rev.1","rev.2"])
        output:
            samfile = samfile,
            logfile = temp(log_path  + "/{sample}.bowtie2.log"),
            statsfile = mapping_path + "/{sample}_fragmentsize.txt",
            imagefile = mapping_path + "/{sample}_fragmentsize.png",
            insertsizefile = mapping_path + "/{sample}_insertsizes.txt"
        log:
            log_path  +"/{sample}.bowtie2.log"
        params:
            notrimming = config["notrimming"]
        threads:
            max_threads
        run:
            # Check for empty files
            fastq_files = [x for x in list(input.trimmed_fastq) if getsize(x) != 0]
            if len(fastq_files) == 0:
                shell("touch {output.samfile}")
                f = open(output.statsfile, "w")
                f.write("Average\tMinimum\tMaximum\n0\t0\t0")
            else:
                if params.notrimming:
                    paired_1 = [f for f in fastq_files
                                if "_R1_" in f]
                    paired_2 = [f for f in fastq_files
                                if "_R2_" in f]
                else:
                    paired_1 = [x for x in fastq_files
                                if x.find(".1P.fastq") != -1]
                    paired_2 = [x for x in fastq_files
                                if x.find(".2P.fastq") != -1]
                unpaired = [x for x in fastq_files if x.find("U.fastq") != -1]
                paired_1.sort()
                paired_2.sort()
                unpaired_command = ""
                paired_command = ""
                if len(paired_1) != 0:
                    paired_command = " -1 " + ",".join(paired_1) + " -2 " + ",".join(paired_2)
                if len(unpaired) != 0:
                    unpaired_command = " -U " + ",".join(unpaired)
                shell("bowtie2 -x {ref} {unpaired_command} {paired_command} -S {output.samfile} --threads {threads} 2> {log}", ref = index_file, unpaired_command = unpaired_command, paired_command = paired_command)
            calculate_insert_length(output.samfile, output.statsfile, output.imagefile, output.insertsizefile)