1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
|
#################
# MAPPING #
#################
from utils import calculate_insert_length
def get_bowtie2_result(filename):
mapping_dict = OrderedDict()
mapping_dict["Bowtie2_log"] = ""
mapping_dict["#AlignedReads"] = 0
with open(filename, "r") as logfile:
for line in logfile.readlines():
pattern1 = re.match(
".* (?P<aligned_exact>\d+) \((?P<percent_aligned_exact>\d+.\d+)%\) aligned exactly 1 time", line)
pattern2 = re.match(
".* (?P<aligned_more_than_one>\d+) \((?P<percent_aligned_more_than_one>\d+.\d+)%\) aligned >1 times",
line)
pattern3 = re.match(
"(?P<overall>\d+\.\d+)\% overall alignment rate",
line)
if pattern1:
mapping_dict["#AlignedReads"] += int(pattern1.group("aligned_exact"))
#mapping_dict["%AlignedReads"] = float(pattern1.group("percent_aligned_exact").replace(",", "."))
elif pattern2:
mapping_dict["#AlignedReads"] += int(pattern2.group("aligned_more_than_one"))
#mapping_dict["%AlignedReads"] = float(pattern2.group("percent_aligned_more_than_one"))
elif pattern3:
mapping_dict["%AlignedReads"] = float(pattern3.group("overall"))
mapping_dict["Bowtie2_log"] += line
return mapping_dict
if not config["nomapping"]:
if config["index"]:
index_file = config["index"]
else:
index_file = mapping_path + "/%s.bowtie2" %basename(config["reference"])
rule bowtie_index:
input:
config["reference"]
output:
temp(mapping_path +"/{ref,.*bowtie2}.1.bt2"),
temp(mapping_path +"/{ref,.*bowtie2}.2.bt2"),
temp(mapping_path +"/{ref,.*bowtie2}.3.bt2"),
temp(mapping_path +"/{ref,.*bowtie2}.4.bt2"),
temp(mapping_path +"/{ref,.*bowtie2}.rev.1.bt2"),
temp(mapping_path +"/{ref,.*bowtie2}.rev.2.bt2")
log:
log_path +"/logfile.mapping.log"
message:
"Building bt2 index for mapping."
shell:
"bowtie2-build {input} %s/{wildcards.ref} 1>&2>> {log}" % mapping_path
if config["save_mapping"]:
samfile = mapping_path + "/{sample}.sam"
else:
samfile = temp(mapping_path + "/{sample}.sam")
rule bowtie_mapping:
input:
trimmed_fastq = get_all_reads,
check_index = expand(index_file + ".{index}.bt2" , index = [1,2,3,4,"rev.1","rev.2"])
output:
samfile = samfile,
logfile = temp(log_path + "/{sample}.bowtie2.log"),
statsfile = mapping_path + "/{sample}_fragmentsize.txt",
imagefile = mapping_path + "/{sample}_fragmentsize.png",
insertsizefile = mapping_path + "/{sample}_insertsizes.txt"
log:
log_path +"/{sample}.bowtie2.log"
params:
notrimming = config["notrimming"]
threads:
max_threads
run:
# Check for empty files
fastq_files = [x for x in list(input.trimmed_fastq) if getsize(x) != 0]
if len(fastq_files) == 0:
shell("touch {output.samfile}")
f = open(output.statsfile, "w")
f.write("Average\tMinimum\tMaximum\n0\t0\t0")
else:
if params.notrimming:
paired_1 = [f for f in fastq_files
if "_R1_" in f]
paired_2 = [f for f in fastq_files
if "_R2_" in f]
else:
paired_1 = [x for x in fastq_files
if x.find(".1P.fastq") != -1]
paired_2 = [x for x in fastq_files
if x.find(".2P.fastq") != -1]
unpaired = [x for x in fastq_files if x.find("U.fastq") != -1]
paired_1.sort()
paired_2.sort()
unpaired_command = ""
paired_command = ""
if len(paired_1) != 0:
paired_command = " -1 " + ",".join(paired_1) + " -2 " + ",".join(paired_2)
if len(unpaired) != 0:
unpaired_command = " -U " + ",".join(unpaired)
shell("bowtie2 -x {ref} {unpaired_command} {paired_command} -S {output.samfile} --threads {threads} 2> {log}", ref = index_file, unpaired_command = unpaired_command, paired_command = paired_command)
calculate_insert_length(output.samfile, output.statsfile, output.imagefile, output.insertsizefile)
|