1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
|
Author: Andreas Tille <tille@debian.org>
Last-Update: Tue, 14 Mar 2017 12:40:48 +0100
Description: Check properly formated reference file
--- a/QCumber.py
+++ b/QCumber.py
@@ -32,6 +32,24 @@ global r2_pattern
global sep_pattern
global lane_pattern
+from sys import stderr
+from Bio import SeqIO
+
+def check_fasta(fastafile, filetype='File'):
+ seq_record = SeqIO.parse(fastafile, "fasta")
+ try:
+ for seq in seq_record:
+ break # break after first sequence
+ except FileNotFoundError as err:
+ print("%s %s does not exist" % (filetype, fastafile), file=stderr)
+ return False
+ try:
+ s=str(seq) # we somehow need to touch the sequence element to trigger an error or not
+ return True
+ except:
+ print('%s %s does not contain valid fasta data' % (filetype, fastafile), file=stderr)
+ return False
+
def get_illumina_reads(tmp):
readsets = []
if not all([re.search(lane_pattern, x) for x in arguments["r1"]]):
@@ -255,8 +273,8 @@ def check_input_validity():
if not arguments["reference"]:
sys.exit("Mapping needs a reference.")
else:
- if not os.path.exists(arguments["reference"]):
- sys.exit("Reference does not exist.")
+ if not check_fasta(arguments["reference"],"Reference"):
+ sys.exit(1)
try:
from Bio import SeqIO
seq_record = SeqIO.parse(arguments["reference"], "fasta")
|