1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
|
"""
Support for "biological sequence" files.
:Author: Bob Harris (rsharris@bx.psu.edu)
See seq.py for more information
"""
import struct
from . import (
fasta,
nib,
qdna,
)
# DNA reverse complement table
DNA_COMP = (
" - "
" TVGH CD M KN YSA BWXR tvgh cd m kn ysa bwxr "
" "
" "
)
def reverse_complement(text):
return text.translate(DNA_COMP)[::-1]
def seq_file(file, format=None, revcomp=False, name="", gap=None, contig=None):
if format is None:
format = infer_format(file)
if (contig is not None) and (format not in ["fasta", None]):
raise ValueError(f"Contigs are not supported for format {format}")
if format == "fasta":
return fasta.FastaFile(file, revcomp=revcomp, name=name, gap=gap, contig=contig)
elif format == "nib":
return nib.NibFile(file, revcomp=revcomp, name=name, gap=gap)
elif format == "qdna":
return qdna.QdnaFile(file, revcomp=revcomp, name=name, gap=gap)
else:
if format is None:
format = ""
else:
format = " " + format
raise ValueError(f"Unknown sequence format{format} in {file.name}")
def seq_reader(file, format=None, revcomp=False, name="", gap=None):
if format is None:
format = infer_format(file)
if format == "fasta":
return fasta.FastaReader(file, revcomp=revcomp, name=name, gap=gap)
elif format == "nib":
return nib.NibReader(file, revcomp=revcomp, name=name, gap=gap)
elif format == "qdna":
return qdna.QdnaReader(file, revcomp=revcomp, name=name, gap=gap)
else:
raise ValueError(f"Unknown sequence format {format}")
def seq_writer(outfile, format=None, name=""):
if format == "fasta":
return fasta.FastaWriter(outfile)
elif format == "nib":
return nib.NibWriter(outfile)
elif format == "qdna":
return qdna.QdnaWriter(outfile)
else:
raise ValueError(f"Unknown sequence format {format}")
def infer_format(file):
format = None
magic = struct.unpack(">L", file.read(4))[0]
if (magic == nib.NIB_MAGIC_NUMBER) or (magic == nib.NIB_MAGIC_NUMBER_SWAP):
format = "nib"
elif (magic == qdna.qdnaMagic) or (magic == qdna.qdnaMagicSwap):
format = "qdna"
else:
file.seek(0)
if file.read(1) == b">":
format = "fasta"
file.seek(0)
return format
|