File: psw.py

package info (click to toggle)
python-biopython 1.42-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 17,584 kB
  • ctags: 12,272
  • sloc: python: 80,461; xml: 13,834; ansic: 7,902; cpp: 1,855; sql: 1,144; makefile: 203
file content (130 lines) | stat: -rwxr-xr-x 3,267 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python2.3

__version__ = "$Revision: 1.3 $"

import exceptions
import os
import re
import sys

from Bio import Wise

_CMDLINE_PSW = ["psw", "-l", "-F"]
_OPTION_GAP_START = "-g"
_OPTION_GAP_EXTENSION = "-e"
_OPTION_SCORES = "-m"

class AlignmentColumnFullException(exceptions.Exception):
    pass

class Alignment(list):
    def append(self, column_unit):
        try:
            self[-1].append(column_unit)
        except AlignmentColumnFullException:
            list.append(self, AlignmentColumn(column_unit))
        except IndexError:
            list.append(self, AlignmentColumn(column_unit))

class AlignmentColumn(list):
    def _set_kind(self, column_unit):
        if self.kind == "SEQUENCE":
            self.kind = column_unit.kind

    def __init__(self, column_unit):
        assert column_unit.unit == 0
        self.kind = column_unit.kind
        list.__init__(self, [column_unit.column, None])

    def __repr__(self):
        return "%s(%s, %s)" % (self.kind, self[0], self[1])

    def append(self, column_unit):
        if self[1] is not None:
            raise AlignmentColumnFullException

        assert column_unit.unit == 1

        self._set_kind(column_unit)
        self[1] = column_unit.column
        
class ColumnUnit(object):
    def __init__(self, unit, column, kind):
        self.unit = unit
        self.column = column
        self.kind = kind

    def __str__(self):
        return "ColumnUnit(unit=%s, column=%s, %s)" % (self.unit, self.column, self.kind)

    __repr__ = __str__

_re_unit = re.compile(r"^Unit +([01])- \[ *(-?\d+)- *(-?\d+)\] \[(\w+)\]$")
def parse_line(line):
    """
    >>> print parse_line("Column 0:")
    None
    >>> parse_line("Unit  0- [  -1-   0] [SEQUENCE]")
    ColumnUnit(unit=0, column=0, SEQUENCE)
    >>> parse_line("Unit  1- [  85-  86] [SEQUENCE]")
    ColumnUnit(unit=1, column=86, SEQUENCE)
    """
    match = _re_unit.match(line.rstrip())

    if not match:
        return

    return ColumnUnit(int(match.group(1)), int(match.group(3)), match.group(4))
    
def parse(iterable):
    """
    format

    Column 0:
    Unit  0- [  -1-   0] [SEQUENCE]
    Unit  1- [  85-  86] [SEQUENCE]

    means that seq1[0] == seq2[86] (0-based)
    """

    alignment = Alignment()
    for line in iterable:
        try:
            if os.environ["WISE_PY_DEBUG"]:
                print line,
        except KeyError:
            pass
            
        column_unit = parse_line(line)
        if column_unit:
            alignment.append(column_unit)

    return alignment

def align(pair,
          scores=None,
          gap_start=None,
          gap_extension=None,
          *args, **keywds):
    
    cmdline = _CMDLINE_PSW[:]
    if scores:
        cmdline.extend((_OPTION_SCORES, scores))
    if gap_start:
        cmdline.extend((_OPTION_GAP_START, str(gap_start)))
    if gap_extension:
        cmdline.extend((_OPTION_GAP_EXTENSION, str(gap_extension)))
    temp_file = Wise.align(cmdline, pair, *args, **keywds)
    return parse(temp_file)

def main():
    print align(sys.argv[1:3])

def _test(*args, **keywds):
    import doctest, sys
    doctest.testmod(sys.modules[__name__], *args, **keywds)

if __name__ == "__main__":
    if __debug__:
        _test()
    main()