File: promix.py

package info (click to toggle)
python-csb 1.2.3%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 8,708 kB
  • ctags: 4,889
  • sloc: python: 24,180; xml: 812; makefile: 23; sh: 6
file content (110 lines) | stat: -rw-r--r-- 3,456 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""
ProMix: Take a protein structure ensemble and find a mixture of rigid
segments or a mixture of conformers. Writes K copies of the ensemble
(for segments) or K subsets of the ensemble (for conformers) as PDB
files, each superposed on different components.

Reference: Hirsch M, Habeck M. - Bioinformatics. 2008 Oct 1;24(19):2184-92
"""

import numpy

import csb.apps
import csb.bio.structure

from csb.bio.io.wwpdb import LegacyStructureParser
from csb.statistics import mixtures


class ExitCodes(csb.apps.ExitCodes):
    IO_ERROR = 2

class AppRunner(csb.apps.AppRunner):

    @property
    def target(self):
        return ProMixApp

    def command_line(self):
        cmd = csb.apps.ArgHandler(self.program, __doc__)

        cmd.add_scalar_option('components', 'K', int, 'Number of components', -1)
        cmd.add_scalar_option('type', 't', str, 'Type of mixture', 'segments', ('segments', 'conformers'))
        cmd.add_positional_argument('infile', str, 'input PDB file')

        return cmd

    def initapp(self, args):
        app = self.target
        return app(args)

class ProMixApp(csb.apps.Application):

    def main(self):
        try:
            parser = LegacyStructureParser(self.args.infile)
            models = parser.models()
        except:
            self.exit('PDB file parsing failed', ExitCodes.IO_ERROR)

        if len(models) < 2:
            self.exit('PDB file contains only one model', ExitCodes.USAGE_ERROR)

        ensemble = parser.parse_models(models)
        X = numpy.array([model.get_coordinates(['CA'], True) for model in ensemble])

        if self.args.type == 'segments':
            self.main_segments(ensemble, X)
        elif self.args.type == 'conformers':
            self.main_conformers(ensemble, X)
        else:
            raise ValueError('type must be "segments" or "conformers"')

    def main_segments(self, ensemble, X):

        mixture = mixtures.SegmentMixture.new(X, self.args.components)
        self.log('Number of segments: {0}'.format(mixture.K))

        for k,(sigma,w) in enumerate(zip(mixture.sigma, mixture.w)):
            outfile = 'promix_segment_{0}.pdb'.format(k+1)
            self.log('  {0}: sigma = {1:6.3f}, w = {2:.3f}, file = {3}'.format(k+1, sigma, w, outfile))

            for model, R, t in zip(ensemble, mixture.R, mixture.t):
                if k > 0:
                    model.transform(R[k-1], t[k-1])
                R = R[k].T
                t = -numpy.dot(R, t[k])
                model.transform(R, t)

            ensemble.to_pdb(outfile)

    def main_conformers(self, ensemble, X):

        mixture = mixtures.ConformerMixture.new(X, self.args.components)
        self.log('Number of conformers: {0}'.format(mixture.K))

        membership = mixture.membership

        for k,(sigma,w) in enumerate(zip(mixture.sigma, mixture.w)):
            outfile = 'promix_conformer_{0}.pdb'.format(k+1)
            self.log('  {0}: sigma = {1:6.3f}, w = {2:.3f}, file = {3}'.format(k+1, sigma, w, outfile))

            ek = csb.bio.structure.Ensemble()

            for model, R, t, mk in zip(ensemble, mixture.R, mixture.t, membership):
                if mk != k:
                    continue
                R = R[k].T
                t = -numpy.dot(R, t[k])
                model.transform(R, t)
                ek.models.append(model)

            ek.to_pdb(outfile)


def main():
    AppRunner().run()
    
    
if __name__ == '__main__':
    main()