1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
# Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and
# Biozentrum - University of Basel
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# internal
from ._modelling import *
from ._alignment_fiddling import *
# external
import ost
def BuildRawModel(aln, chain_names = None, include_ligands = False,
spdbv_style = False, aln_preprocessing='default'):
'''Builds a raw (pseudo) model from the alignment. Can either take a single
alignment handle or an alignment handle list. Every list item is treated as a
single chain in the final raw model.
Each alignment handle must contain exactly two sequences and the second
sequence is considered the template sequence, which must have a
:class:`~ost.mol.EntityView` attached.
Before extracting the coordinates, the alignments are pre-processed
according to *aln_preprocessing*.
This is a basic protein core modelling algorithm that copies backbone
coordinates based on the sequence alignment. For matching residues, the
side chain coordinates are also copied. Gaps are ignored. Hydrogen an
deuterium atoms are not copied into the model.
The function tries to reuse as much as possible from the template. Modified
residues are treated as follows:
- Selenium methionine residues are converted to methionine
- Side chains which contain all atoms of the parent amino acid, e.g.
phosphoserine are copied as a whole with the modifications stripped off.
Residues with missing backbone atoms and D-peptides are generally skipped and
treated as gaps. Missing Cbeta atoms in backbone are ok and reconstructed.
If all residues are skipped (e.g. Calpha traces), we report an error and
return an empty model.
Residue numbers are set such that missing residue in gaps are honoured and
subsequent loop modelling can insert new residues without having to renumber.
**The numbering of residues starts for every chain with the value 1**.
The returned :class:`ModellingHandle` stores the obtained raw model as well
as information about insertions and deletions in the gaps list.
:param aln: Single alignment handle for raw model with single chain or
list of alignment handles for raw model with multiple chains.
:type aln: :class:`~ost.seq.AlignmentHandle` / :class:`~ost.seq.AlignmentList`
:param include_ligands: True, if we wish to include ligands in the model. This
searches for ligands in all OST handles of the views
attached to the alignments. Ligands are identified
with the `ligand` property in the handle (set by OST
based on HET records) or by the chain name '_' (as set
in SMTL). All ligands are added to a new chain named
'_'.
:type include_ligands: :class:`bool`
:param chain_names: If set, this overrides the default chain naming
(chains are consecutively named according to
characters in
'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz').
If *aln* is of type :class:`ost.seq.AlignmentHandle`,
*chain_names* is expected to be a :class:`str`.
If *aln* is of type :class:`ost.seq.AlignmentList`,
*chain_names* is expected to be a :class:`list` of
:class:`str` of same size as *aln* or a :class:`str`.
For the latter case, chains will consecutively named
according to characters in *chain_names*.
:type chain_names: :class:`str` / :class:`list`
:param spdbv_style: True, if we need a model in the old SPDBV style.
:type spdbv_style: :class:`bool`
:param aln_preprocessing: Calls :meth:`promod3.modelling.PullTerminalDeletions`
if set to 'default'. Can be disabled when set
to False.
:return: Raw (pseudo) model from the alignment.
:rtype: :class:`ModellingHandle`
:raises: A :exc:`RuntimeError` when:
- the alignments do not have two sequences
- the second sequence does not have an attached structure
- the residues of the template structure do not match with the
alignment sequence (note that you can set an "offset" (see
:meth:`~ost.seq.AlignmentHandle.SetSequenceOffset`) for the
template sequence (but not for the target))
- the target sequence has a non-zero offset (cannot be honored as
the resulting model will always start its residue numbering at 1)
'''
aln_list = None
name_list = None
if isinstance(aln, ost.seq.AlignmentHandle):
aln_list = ost.seq.AlignmentList()
aln_list.append(aln)
if chain_names is None:
name_list = ['A']
elif isinstance(chain_names, str):
name_list = [chain_names]
else:
raise RuntimeError('Expect chain_names to be of type str if aln is'\
' of type ost.seq.AlignmentHandle')
elif isinstance(aln, ost.seq.AlignmentList):
aln_list = aln
if chain_names is None:
def_names = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopq'\
'rstuvwxyz'
if len(aln_list) > len(def_names):
raise RuntimeError('Max of ' + str(len(def_names)) + \
' alns if no chain_names provided')
name_list = [str(c) for c in def_names[:len(aln_list)]]
elif isinstance(chain_names, list):
if len(chain_names) != len(aln_list):
raise RuntimeError('Number of alns and chain_names must be '\
'consistent')
name_list = chain_names
elif isinstance(chain_names, str):
if len(chain_names) < len(aln_list):
raise RuntimeError('If you provide a string as chain_names, '\
'it must be at least as long as the '\
'AlignmentList in aln.')
name_list = [n for n in chain_names[:len(aln_list)]]
else:
raise RuntimeError('chain_names must be list of str or str if aln '\
'is of type ost.seq.AlignmentList')
else:
raise RuntimeError('aln must be of type ost.seq.AlignmentHandle or '\
'ost.seq.AlignmentList')
if aln_preprocessing == 'default':
temp = ost.seq.AlignmentList()
for a in aln_list:
temp.append(PullTerminalDeletions(a))
aln_list = temp
return MHandleFromAln(aln_list, name_list, include_ligands, spdbv_style)
|