1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
|
# Not clear on the distinction, if any, between 'embl' and 'embl/65'. This
# code might apply to either or both.
# See 'http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html' for a
# definition of this file format.
# This code only makes a best effort--the output may not be strictly valid.
# So, for example, the EMBL ID is supposed to be alphanumeric, starting with a
# letter, but we don't check for this, etc.
# Example:
# ID AA03518 standard; DNA; FUN; 237 BP.
# XX
# AC U03518;
# XX
# DE Aspergillus awamori internal transcribed spacer 1 (ITS1) and 18S
# DE rRNA and 5.8S rRNA genes, partial sequence.
# XX
# SQ Sequence 237 BP; 41 A; 77 C; 67 G; 52 T; 0 other;
# aacctgcgga aggatcatta ccgagtgcgg gtcctttggg cccaacctcc catccgtgtc 60
# tattgtaccc tgttgcttcg gcgggcccgc cgcttgtcgg ccgccggggg ggcgcctctg 120
# ccccccgggc ccgtgcccgc cggagacccc aacacgaaca ctgtctgaaa gcgtgcagtc 180
# tgagttgatt gaatgcaatc agttaaaact ttcaacaatg gatctcttgg ttccggc 237
# //
import textwrap
from Bio import Alphabet
from Bio import Writer
class WriteEmbl(Writer.Writer):
def __init__(self, outfile):
Writer.Writer.__init__(self, outfile)
def write(self, record):
seq = record.seq
assert seq.alphabet.size == 1, "cannot handle alphabet of size %d" % \
seq.alphabet.size
data = seq.data
upperdata = data.upper()
# It'd be nice if the alphabet was usefully set, but for many interesting
# cases (e.g., reading from FASTA files), it's not.
if isinstance(seq.alphabet, Alphabet.RNAAlphabet):
molecule = 'mRNA'
letters = ['A', 'C', 'G', 'U']
else:
molecule = 'DNA'
letters = ['A', 'C', 'G', 'T']
division = 'UNC' # unknown
self.outfile.write("ID %s standard; %s; %s; %d BP.\n"
% (record.id, molecule, division, len(data)))
desclist = textwrap.wrap(record.description, 74)
for l in desclist:
self.outfile.write("DE %s\n" % l)
counts = [ upperdata.count(l) for l in letters ]
othercount = len(upperdata) - sum(counts)
countstring = ''.join([ " %d %s;" % p for p in zip(counts, letters) ])
self.outfile.write("SQ Sequence %s BP;%s %d other;\n"
% (len(data), countstring, othercount))
rowlength = 60
blocklength = 10
for i in xrange(0, len(data), rowlength):
self.outfile.write(" " * 5)
row = data[i:i+rowlength]
for b in xrange(0, rowlength, blocklength):
block = row[b:b+blocklength]
self.outfile.write("%-*s" % (blocklength+1, block))
self.outfile.write("%9d\n" % min(i+rowlength, len(data)))
self.outfile.write("//\n")
make_writer = WriteEmbl
|