File: StandardData.py

package info (click to toggle)
python-biopython 1.73%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 57,852 kB
  • sloc: python: 169,977; xml: 97,539; ansic: 15,653; sql: 1,208; makefile: 159; sh: 63
file content (118 lines) | stat: -rw-r--r-- 4,031 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# Copyright 2014 Joe Cora.
# Revisions copyright 2017 Peter Cock.
# All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""Objects to represent NEXUS standard data type matrix coding."""
from __future__ import print_function

import sys

from Bio._py3k import basestring


class NexusError(Exception):
    pass


class StandardData(object):
    """Create a StandardData iterable object.

    Each coding specifies t [type] => (std [standard], multi [multistate] or
    uncer [uncertain]) and d [data]
    """

    def __init__(self, data):
        """Initialize the class."""
        self._data = []
        self._current_pos = 0

        # Enforce string data requirement
        if not isinstance(data, basestring):
            raise NexusError("The coding data given to a StandardData object should be a string")

        # Transfer each coding to a position within a sequence
        multi_coding = False
        uncertain_coding = False
        coding_list = {'t': 'std', 'd': []}

        for pos, coding in enumerate(data):
            # Check if in a multiple coded or uncertain character
            if multi_coding:
                # End multicoding if close parenthesis
                if coding == ')':
                    multi_coding = False
                else:
                    # Add current coding to list and advance to next coding
                    coding_list['d'].append(coding)
                    continue
            elif uncertain_coding:
                # End multicoding if close parenthesis
                if coding == '}':
                    uncertain_coding = False
                else:
                    # Add current coding to list and advance to next coding
                    coding_list['d'].append(coding)
                    continue
            else:
                # Check if a multiple coded or uncertain character is starting
                if coding == '(':
                    multi_coding = True
                    coding_list['t'] = 'multi'
                    continue
                elif coding == '{':
                    uncertain_coding = True
                    coding_list['t'] = 'uncer'
                    continue
                elif coding in [')', '}']:
                    raise NexusError('Improper character "' + coding +
                                     '" at position ' + pos +
                                     ' of a coding sequence.')
                else:
                    coding_list['d'].append(coding)

            # Add character coding to data
            self._data.append(coding_list.copy())
            coding_list = {'t': 'std', 'd': []}

    def __len__(self):
        """Return the length of the coding, use len(my_coding)."""
        return len(self._data)

    def __getitem__(self, arg):
        return self._data[arg]

    def __iter__(self):
        return self

    def __next__(self):
        try:
            return_coding = self._data[self._current_pos]
        except IndexError:
            self._current_pos = 0
            raise StopIteration
        else:
            self._current_pos += 1
            return return_coding

    if sys.version_info[0] < 3:
        def next(self):
            """Return next item, deprecated Python 2 style alias for Python 3 style __next__ method."""
            return self.__next__()

    def raw(self):
        """Return the full coding as a python list."""
        return self._data

    def __str__(self):
        """Return the full coding as a python string, use str(my_coding)."""
        str_return = ''
        for coding in self._data:
            if coding['t'] == 'multi':
                str_return += '(' + ''.join(coding['d']) + ')'
            elif coding['t'] == 'uncer':
                str_return += '{' + ''.join(coding['d']) + '}'
            else:
                str_return += coding['d'][0]
        return str_return