File: __init__.py

package info (click to toggle)
python-biopython 1.68%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 46,860 kB
  • ctags: 13,237
  • sloc: python: 160,306; xml: 93,216; ansic: 9,118; sql: 1,208; makefile: 155; sh: 63
file content (148 lines) | stat: -rwxr-xr-x 4,774 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
#
"""
Bio.Wise contains modules for running and processing the output of
some of the models in the Wise2 package by Ewan Birney available from:
ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/
http://www.ebi.ac.uk/Wise2/

Bio.Wise.psw is for protein Smith-Waterman alignments
Bio.Wise.dnal is for Smith-Waterman DNA alignments
"""

from __future__ import print_function

import os
import sys
import tempfile

from Bio import SeqIO


def _build_align_cmdline(cmdline, pair, output_filename, kbyte=None, force_type=None, quiet=False):
    """Helper function to build a command line string (PRIVATE).

    >>> os.environ["WISE_KBYTE"]="300000"
    >>> if os.isatty(sys.stderr.fileno()):
    ...    c = _build_align_cmdline(["dnal"], ("seq1.fna", "seq2.fna"),
    ...                             "/tmp/output", kbyte=100000)
    ...    assert c == 'dnal -kbyte 100000 seq1.fna seq2.fna > /tmp/output', c
    ...    c = _build_align_cmdline(["psw"], ("seq1.faa", "seq2.faa"),
    ...                             "/tmp/output_aa")
    ...    assert c == 'psw -kbyte 300000 seq1.faa seq2.faa > /tmp/output_aa', c
    ... else:
    ...    c = _build_align_cmdline(["dnal"], ("seq1.fna", "seq2.fna"),
    ...                             "/tmp/output", kbyte=100000)
    ...    assert c == 'dnal -kbyte 100000 -quiet seq1.fna seq2.fna > /tmp/output', c
    ...    c = _build_align_cmdline(["psw"], ("seq1.faa", "seq2.faa"),
    ...                             "/tmp/output_aa")
    ...    assert c == 'psw -kbyte 300000 -quiet seq1.faa seq2.faa > /tmp/output_aa', c

    """
    cmdline = cmdline[:]

    # XXX: force_type ignored

    if kbyte is None:
        try:
            cmdline.extend(("-kbyte", os.environ["WISE_KBYTE"]))
        except KeyError:
            pass
    else:
        cmdline.extend(("-kbyte", str(kbyte)))

    if not os.isatty(sys.stderr.fileno()):
        cmdline.append("-quiet")

    cmdline.extend(pair)
    cmdline.extend((">", output_filename))
    if quiet:
        cmdline.extend(("2>", "/dev/null"))
    cmdline_str = ' '.join(cmdline)

    return cmdline_str


def align(cmdline, pair, kbyte=None, force_type=None, dry_run=False, quiet=False, debug=False):
    """
    Returns a filehandle
    """
    if not pair or len(pair) != 2:
        raise ValueError("Expected pair of filename, not %s" % repr(pair))

    output_file = tempfile.NamedTemporaryFile(mode='r')
    input_files = tempfile.NamedTemporaryFile(mode="w"), tempfile.NamedTemporaryFile(mode="w")

    if dry_run:
        print(_build_align_cmdline(cmdline,
                                   pair,
                                   output_file.name,
                                   kbyte,
                                   force_type,
                                   quiet))
        return

    for filename, input_file in zip(pair, input_files):
        # Pipe the file through Biopython's Fasta parser/writer
        # to make sure it conforms to the Fasta standard (in particular,
        # Wise2 may choke on long lines in the Fasta file)
        records = SeqIO.parse(open(filename), 'fasta')
        SeqIO.write(records, input_file, 'fasta')
        input_file.flush()

    input_file_names = [input_file.name for input_file in input_files]

    cmdline_str = _build_align_cmdline(cmdline,
                                       input_file_names,
                                       output_file.name,
                                       kbyte,
                                       force_type,
                                       quiet)

    if debug:
        sys.stderr.write("%s\n" % cmdline_str)

    status = os.system(cmdline_str) >> 8

    if status > 1:
        if kbyte != 0:  # possible memory problem; could be None
            sys.stderr.write("INFO trying again with the linear model\n")
            return align(cmdline, pair, 0, force_type, dry_run, quiet, debug)
        else:
            raise OSError("%s returned %s" % (" ".join(cmdline), status))

    return output_file


def all_pairs(singles):
    """
    Generate pairs list for all-against-all alignments

    >>> all_pairs(range(4))
    [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]
    """
    pairs = []

    singles = list(singles)
    while singles:
        suitor = singles.pop(0)  # if sorted, stay sorted
        pairs.extend((suitor, single) for single in singles)

    return pairs


def main():
    pass


def _test(*args, **keywds):
    import doctest
    doctest.testmod(sys.modules[__name__], *args, **keywds)

if __name__ == "__main__":
    if __debug__:
        _test()
    main()