1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
|
# Copyright 2014 Joe Cora.
# All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Provides objects to represent NEXUS standard data type matrix coding.
"""
from __future__ import print_function
class NexusError(Exception):
pass
class StandardData(object):
"""Create a StandardData iterable object.
Each coding specifies t [type] => (std [standard], multi [multistate] or
uncer [uncertain]) and d [data]
"""
def __init__(self, data):
self._data = []
self._current_pos = 0
# Enforce string data requirement
if not isinstance(data, basestring):
raise NexusError("The coding data given to a StandardData object should be a string")
# Transfer each coding to a position within a sequence
multi_coding = False
uncertain_coding = False
coding_list = {'t': 'std', 'd': []}
for pos, coding in enumerate(data):
# Check if in a multiple coded or uncertain character
if multi_coding:
# End multicoding if close parenthesis
if coding == ')':
multi_coding = False
else:
# Add current coding to list and advance to next coding
coding_list['d'].append(coding)
continue
elif uncertain_coding:
# End multicoding if close parenthesis
if coding == '}':
uncertain_coding = False
else:
# Add current coding to list and advance to next coding
coding_list['d'].append(coding)
continue
else:
# Check if a multiple coded or uncertain character is starting
if coding == '(':
multi_coding = True
coding_list['t'] = 'multi'
continue
elif coding == '{':
uncertain_coding = True
coding_list['t'] = 'uncer'
continue
elif coding in [')', '}']:
raise NexusError('Improper character "' + coding +
'" at position ' + pos +
' of a coding sequence.')
else:
coding_list['d'].append(coding)
# Add character coding to data
self._data.append(coding_list.copy())
coding_list = {'t': 'std', 'd': []}
def __len__(self):
"""Returns the length of the coding, use len(my_coding)."""
return len(self._data)
def __getitem__(self, arg):
return self._data[arg]
def __iter__(self):
return self
def next(self):
try:
return_coding = self._data[self._current_pos]
except:
self._current_pos = 0
raise StopIteration
else:
self._current_pos += 1
return return_coding
def raw(self):
"""Returns the full coding as a python list."""
return self._data
def __str__(self):
"""Returns the full coding as a python string, use str(my_coding)."""
str_return = ''
for coding in self._data:
if coding['t'] == 'multi':
str_return += '(' + ''.join(coding['d']) + ')'
elif coding['t'] == 'uncer':
str_return += '{' + ''.join(coding['d']) + '}'
else:
str_return += coding['d'][0]
return str_return
|