Package: qcumber / 1.0.14+dfsg-1

check_fasta.patch Patch series | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
Author: Andreas Tille <tille@debian.org>
Last-Update: Tue, 14 Mar 2017 12:40:48 +0100
Description: Check properly formated reference file

--- a/QCumber.py
+++ b/QCumber.py
@@ -32,6 +32,24 @@ global r2_pattern
 global sep_pattern
 global lane_pattern
 
+from sys import stderr
+from Bio import SeqIO
+
+def check_fasta(fastafile, filetype='File'):
+	seq_record = SeqIO.parse(fastafile, "fasta")
+	try:
+		for seq in seq_record:
+			break # break after first sequence
+	except FileNotFoundError as err:
+		print("%s %s does not exist" % (filetype, fastafile), file=stderr)
+		return False
+	try:
+		s=str(seq) # we somehow need to touch the sequence element to trigger an error or not
+		return True
+	except:
+		print('%s %s does not contain valid fasta data' % (filetype, fastafile), file=stderr)
+		return False
+
 def get_illumina_reads(tmp):
 	readsets = []
 	if not all([re.search(lane_pattern, x) for x in arguments["r1"]]):
@@ -255,8 +273,8 @@ def check_input_validity():
 		if not arguments["reference"]:
 			sys.exit("Mapping needs a reference.")
 		else:
-			if not os.path.exists(arguments["reference"]):
-				sys.exit("Reference does not exist.")
+			if not check_fasta(arguments["reference"],"Reference"):
+				sys.exit(1)
 			try:
 				from Bio import SeqIO
 				seq_record = SeqIO.parse(arguments["reference"], "fasta")