File: _raw_model.py

package info (click to toggle)
promod3 3.4.2%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 966,596 kB
  • sloc: cpp: 55,820; python: 18,058; makefile: 85; sh: 51
file content (155 lines) | stat: -rw-r--r-- 7,702 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and
#                          Biozentrum - University of Basel
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#   http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# internal
from ._modelling import *
from ._alignment_fiddling import *
# external
import ost

def BuildRawModel(aln, chain_names = None, include_ligands = False, 
                  spdbv_style = False, aln_preprocessing='default'):
    '''Builds a raw (pseudo) model from the alignment. Can either take a single
      alignment handle or an alignment handle list. Every list item is treated as a
      single chain in the final raw model.
    
      Each alignment handle must contain exactly two sequences and the second
      sequence is considered the template sequence, which must have a 
      :class:`~ost.mol.EntityView` attached.

      Before extracting the coordinates, the alignments are pre-processed 
      according to *aln_preprocessing*.
    
      This is a basic protein core modelling algorithm that copies backbone
      coordinates based on the sequence alignment. For matching residues, the
      side chain coordinates are also copied. Gaps are ignored. Hydrogen an
      deuterium atoms are not copied into the model.
    
      The function tries to reuse as much as possible from the template. Modified
      residues are treated as follows:
    
        - Selenium methionine residues are converted to methionine
    
        - Side chains which contain all atoms of the parent amino acid, e.g.
          phosphoserine are copied as a whole with the modifications stripped off.
    
      Residues with missing backbone atoms and D-peptides are generally skipped and
      treated as gaps. Missing Cbeta atoms in backbone are ok and reconstructed.
      If all residues are skipped (e.g. Calpha traces), we report an error and
      return an empty model.
    
      Residue numbers are set such that missing residue in gaps are honoured and
      subsequent loop modelling can insert new residues without having to renumber.
      **The numbering of residues starts for every chain with the value 1**.
      
      The returned :class:`ModellingHandle` stores the obtained raw model as well
      as information about insertions and deletions in the gaps list.
    
      :param aln: Single alignment handle for raw model with single chain or
                  list of alignment handles for raw model with multiple chains.
      :type aln:  :class:`~ost.seq.AlignmentHandle` / :class:`~ost.seq.AlignmentList`
    
      :param include_ligands: True, if we wish to include ligands in the model. This
                              searches for ligands in all OST handles of the views
                              attached to the alignments. Ligands are identified
                              with the `ligand` property in the handle (set by OST
                              based on HET records) or by the chain name '_' (as set
                              in SMTL). All ligands are added to a new chain named
                              '_'.
      :type include_ligands:  :class:`bool`
    
      :param chain_names: If set, this overrides the default chain naming 
                          (chains are consecutively named according to 
                          characters in
                          'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz'). 
                          If *aln* is of type :class:`ost.seq.AlignmentHandle`, 
                          *chain_names* is expected to be a :class:`str`.
                          If *aln* is of type :class:`ost.seq.AlignmentList`,
                          *chain_names* is expected to be a :class:`list` of 
                          :class:`str` of same size as *aln* or a :class:`str`.
                          For the latter case, chains will consecutively named
                          according to characters in *chain_names*.
 
      :type chain_names:  :class:`str` / :class:`list`
    
      :param spdbv_style: True, if we need a model in the old SPDBV style.
      :type spdbv_style:  :class:`bool`

      :param aln_preprocessing: Calls :meth:`promod3.modelling.PullTerminalDeletions` 
                                if set to 'default'. Can be disabled when set 
                                to False.

      :return: Raw (pseudo) model from the alignment.
      :rtype:  :class:`ModellingHandle`
    
      :raises: A :exc:`RuntimeError` when: 
               
               - the alignments do not have two sequences
               - the second sequence does not have an attached structure
               - the residues of the template structure do not match with the
                 alignment sequence (note that you can set an "offset" (see
                 :meth:`~ost.seq.AlignmentHandle.SetSequenceOffset`) for the
                 template sequence (but not for the target))
               - the target sequence has a non-zero offset (cannot be honored as
                 the resulting model will always start its residue numbering at 1)
    '''

    aln_list = None
    name_list = None

    if isinstance(aln, ost.seq.AlignmentHandle):
        aln_list = ost.seq.AlignmentList()
        aln_list.append(aln)
        if chain_names is None:
            name_list = ['A']
        elif isinstance(chain_names, str):
            name_list = [chain_names]
        else:
            raise RuntimeError('Expect chain_names to be of type str if aln is'\
                               ' of type ost.seq.AlignmentHandle')
    elif isinstance(aln, ost.seq.AlignmentList):
        aln_list = aln 
        if chain_names is None:
            def_names = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopq'\
                        'rstuvwxyz'
            if len(aln_list) > len(def_names):
                raise RuntimeError('Max of ' + str(len(def_names)) + \
                                   ' alns if no chain_names provided')
            name_list = [str(c) for c in def_names[:len(aln_list)]]
        elif isinstance(chain_names, list):
            if len(chain_names) != len(aln_list):
                raise RuntimeError('Number of alns and chain_names must be '\
                                   'consistent')
            name_list = chain_names
        elif isinstance(chain_names, str):
            if len(chain_names) < len(aln_list):
                raise RuntimeError('If you provide a string as chain_names, '\
                                   'it must be at least as long as the '\
                                   'AlignmentList in aln.')
            name_list = [n for n in chain_names[:len(aln_list)]]
        else:
            raise RuntimeError('chain_names must be list of str or str if aln '\
                               'is of type ost.seq.AlignmentList') 
    else:
        raise RuntimeError('aln must be of type ost.seq.AlignmentHandle or '\
                           'ost.seq.AlignmentList')

    if aln_preprocessing == 'default':
        temp = ost.seq.AlignmentList()
        for a in aln_list:
            temp.append(PullTerminalDeletions(a))
        aln_list = temp

    return MHandleFromAln(aln_list, name_list, include_ligands, spdbv_style)