File: PSEA.py

package info (click to toggle)
python-biopython 1.64%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 44,416 kB
  • ctags: 12,472
  • sloc: python: 153,759; xml: 67,286; ansic: 9,003; sql: 1,488; makefile: 144; sh: 59
file content (112 lines) | stat: -rw-r--r-- 2,769 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# Copyright (C) 2006, Thomas Hamelryck (thamelry@binf.ku.dk)
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""Wrappers for PSEA, a program for secondary structure assignment.

See this citation for P-SEA, PMID: 9183534

Labesse G, Colloc'h N, Pothier J, Mornon J-P:  P-SEA: a new efficient
assignment of secondary structure from C_alpha.
Comput Appl Biosci 1997 , 13:291-295

ftp://ftp.lmcp.jussieu.fr/pub/sincris/software/protein/p-sea/
"""

import os

from Bio.PDB.Polypeptide import is_aa


def run_psea(fname):
    """Run PSEA and return output filename.

    Note that this assumes the P-SEA binary is called "psea" and that it is
    on the path.

    Note that P-SEA will write an output file in the current directory using
    the input filename with extension ".sea".

    Note that P-SEA will write output to the terminal while run.
    """
    os.system("psea "+fname)
    last=fname.split("/")[-1]
    base=last.split(".")[0]
    return base+".sea"


def psea(pname):
    """Parse PSEA output file."""
    fname=run_psea(pname)
    start=0
    ss=""
    with open(fname, 'r') as fp:
        for l in fp.readlines():
            if l[0:6]==">p-sea":
                start=1
                continue
            if not start:
                continue
            if l[0]=="\n":
                break
            ss=ss+l[0:-1]
    return ss


def psea2HEC(pseq):
    """Translate PSEA secondary structure string into HEC."""
    seq=[]
    for ss in pseq:
        if ss=="a":
            n="H"
        elif ss=="b":
            n="E"
        elif ss=="c":
            n="C"
        seq.append(n)
    return seq


def annotate(m, ss_seq):
    """Apply seconardary structure information to residues in model."""
    c=m.get_list()[0]
    all=c.get_list()
    residues=[]
    # Now remove HOH etc.
    for res in all:
        if is_aa(res):
            residues.append(res)
    L=len(residues)
    if not (L==len(ss_seq)):
        raise ValueError("Length mismatch %i %i" % (L, len(ss_seq)))
    for i in range(0, L):
        residues[i].xtra["SS_PSEA"]=ss_seq[i]
    #os.system("rm "+fname)


class PSEA(object):
    def __init__(self, model, filename):
        ss_seq=psea(filename)
        ss_seq=psea2HEC(ss_seq)
        annotate(model, ss_seq)
        self.ss_seq=ss_seq

    def get_seq(self):
        """
        Return secondary structure string.
        """
        return self.ss_seq


if __name__=="__main__":

    import sys
    from Bio.PDB import PDBParser

    # Parse PDB file
    p=PDBParser()
    s=p.get_structure('X', sys.argv[1])

    # Annotate structure with PSEA sceondary structure info
    PSEA(s[0], sys.argv[1])