1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
|
from ost import conop, mol
def Cleanup(entity, strip_water=True, canonicalize=True, remove_ligands=True):
"""
This function returns a cleaned-up (simplified) version of the protein
structure. Different parameters affect the behaviour of the function.
:param strip_water: Whether to remove water from the structure
:param canonicalize: Whether to strip off modifications of amino acids and map
them back to their parent standard amino acid, e.g. selenium methionine to
methionine.For more complex amino acids, where the relation between the
modified and the standard parent amino acid is not known, sidechain atoms
are removed. D-peptide-linking residues are completely removed as well.
:param remove_ligands: Whether to remove ligands from the structure
:return: a cleaned version of the entity
"""
#setup
lib = conop.GetDefaultLib()
if not lib:
raise RuntimeError("Cleanup requires a compound library.")
clean_entity = entity.Copy()
ed = clean_entity.EditXCS()
#remove water residues
if strip_water:
_StripWater(clean_entity, ed)
#replace modified residues before removing ligands to avoid removing MSE and others
if canonicalize:
_CanonicalizeResidues(clean_entity, ed, lib)
#remove all hetatoms that are not water
if remove_ligands:
_RemoveLigands(clean_entity, ed)
return clean_entity
def _StripWater(clean_entity, ed) :
"""
This function removes water residues from the structure
"""
for res in clean_entity.residues:
if res.IsValid():
if res.chem_class == mol.WATER:
ed.DeleteResidue(res.handle)
ed.UpdateICS()
return
def _RemoveLigands(clean_entity, ed) :
"""
This function removes ligands from the structure
"""
for res in clean_entity.residues:
if res.IsValid():
#WHEN mmCIF WILL BE USED, CHANGE IsPeptideLinking() TO IsProtein()
if not res.IsPeptideLinking() and res.atoms[0].is_hetatom and res.chem_class != mol.WATER:
ed.DeleteResidue(res.handle)
ed.UpdateICS()
return
def _CanonicalizeResidues(clean_entity, ed, compound_lib) :
"""
This function strips off modifications of amino acids and maps
them back to their parent standard amino acid, e.g. selenium methionine to
methionine.For more complex amino acids, where the relation between the
modified and the standard parent amino acid is not known, sidechain atoms
are removed. D-peptide-linking residues are completely removed as well.
"""
for res in clean_entity.residues:
if res.IsValid() and res.IsPeptideLinking() :
parent_olc = res.one_letter_code
if parent_olc == "X" :
_DeleteSidechain(res, ed)
for atom in res.atoms:
atom.is_hetatom = False
else:
parent_tlc = conop.OneLetterCodeToResidueName(parent_olc)
parent_res = compound_lib.FindCompound(parent_tlc)
if not parent_res:
_DeleteSidechain(res, ed)
for atom in res.atoms:
atom.is_hetatom = False
print("Removing sidechain of %s, beacuse it has not been found in the compound library"% parent_tlc)
else:
#collect atom's names
modif_atom_names = set([atom.name for atom in res.atoms
if atom.element != "H" and atom.element != "D" ])
#if the res is the first or last take all the atoms from the parent res
if res.FindAtom("OXT").IsValid() :
parent_atom_names = set([atom.name for atom in parent_res.atom_specs
if atom.element != "H" and atom.element != "D" ])
else:
parent_atom_names = set([atom.name for atom in parent_res.atom_specs
if atom.element != "H" and atom.element != "D" and not atom.is_leaving ])
additional_parent_atoms = parent_atom_names - modif_atom_names
additional_modif_atoms = modif_atom_names - parent_atom_names
#WHEN mmCIF WILL BE USED, CHANGE IsPeptideLinking() TO IsProtein(), TO EXCLUDE LIGANDS FROM CANONICALISATION
if res.atoms[0].is_hetatom :
old_name = res.name
ed.RenameResidue(res, parent_tlc)
if additional_parent_atoms:
if additional_modif_atoms:
#replacement
_Replacement(res, ed, old_name)
else:
#deletion
_Deletion(res, ed)
elif additional_modif_atoms:
#addition
_Addition(res, ed, additional_modif_atoms)
else:
#unchanged, later check stereochemistry or H atoms
_Unchanged(res, ed)
#the res is a peptide but not a ligand (is a protein res)
else:
if additional_parent_atoms:# if the sidechain is incomplete
_DeleteSidechain(res, ed)
ed.UpdateICS()
return
def _Replacement(res, ed, old_name) :
#TEMP ONLY MSE
if old_name == "MSE" :
for atom in res.atoms:
atom.is_hetatom = False
sel = res.FindAtom("SE")
if sel.IsValid() :
ed.InsertAtom( res, "SD", sel.pos, "S", sel.occupancy, sel.b_factor ) #S radius=~1;SE=~1.2
ed.DeleteAtom( sel )
else:
_DeleteSidechain(res, ed)
else:
_DeleteSidechain(res, ed)
return
def _Deletion(res, ed) :
_DeleteSidechain(res, ed)
for atom in res.atoms :
atom.is_hetatom = False
return
def _Addition(res, ed, additional_modif_atoms) :
for add_atom_name in additional_modif_atoms:
add_atom = res.FindAtom( add_atom_name )
if add_atom.IsValid() :
ed.DeleteAtom( add_atom )
for atom in res.atoms:
atom.is_hetatom = False
return
def _Unchanged(res, ed) :
if res.chem_class == mol.D_PEPTIDE_LINKING:
ed.DeleteResidue(res)
else:
_DeleteSidechain(res, ed)
for atom in res.atoms :
atom.is_hetatom = False
return
def _DeleteSidechain(res, ed) :
for atom in res.atoms:
if not atom.name in ['CA','CB','C','N','O']:
ed.DeleteAtom(atom)
return
#visible functions
__all__ = [Cleanup]
|