Package: qcumber / 2.3.0-2

check_fasta.patch Patch series | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
--- a/QCumber-2
+++ b/QCumber-2
@@ -17,6 +17,8 @@
 import datetime
 import yaml
 import input_utils
+from sys import stderr
+from Bio import SeqIO
 # Set paths
 
 ADAPTER_PATH = ""   # Adapter path from trimmomatic.
@@ -657,13 +659,28 @@
     return type, sample_dict, join_lanes, name_dict, join_reads
 
 
-def check_input_validity(arguments):
+def check_fasta(fastafile, filetype='File'):
+    seq_record = SeqIO.parse(fastafile, "fasta")
+    try:
+        for seq in seq_record:
+            break # break after first sequence
+    except FileNotFoundError as err:
+        print("%s %s does not exist" % (filetype, fastafile), file=stderr)
+        return False
+    try:
+        s=str(seq) # we somehow need to touch the sequence element to trigger an error or not
+        return True
+    except:
+        print('%s %s does not contain valid fasta data' % (filetype, fastafile), file=stderr)
+        return False
+
 
+def check_input_validity(arguments):
     if arguments["reference"]:
         ref_file = arguments["reference"]
         seq_record = ""
-        if not os.path.exists(arguments["reference"]):
-            sys.exit("Reference does not exist.")
+        if not check_fasta(arguments["reference"],"Reference"):
+            sys.exit(1)
         try:
             if ref_file[-2:] == "gz":
                 with subprocess.Popen(["gzip", "-cd", ref_file],