File: hydrogen_rules.py

package info (click to toggle)
promod3 3.4.2%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 966,596 kB
  • sloc: cpp: 55,820; python: 18,058; makefile: 85; sh: 51
file content (125 lines) | stat: -rw-r--r-- 4,377 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and
#                          Biozentrum - University of Basel
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#   http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""Code generation of code snippets for hydrogen_constructor."""

from promod3 import loop
from ost import conop
from ost.mol import mm

########################################################################
# HELPER
def NameIndex(aa_tlc, name):
  if name in ["N", "CA", "C", "O", "CB"]:
    return "BB_%s_INDEX" % name
  else:
    return "%s_%s_INDEX" % (aa_tlc, name)

def GetHeavyAtomName(aa_tlc, ff_name):
  # only exc. is ILE-CD (ff_name) -> CD1 (pdb_name) for both ff
  if aa_tlc == "ILE" and ff_name == "CD":
    return "CD1"
  else:
    return ff_name

def GetFfResName(ff, aa_tlc):
  # fix residues to have fully protanated variants
  # -> ignored for ASP (ASPH) and GLU (GLUH) as it never happens
  if aa_tlc == "HIS":
    return ff.GetResidueRenamingMain("HISH")
  else:
    return ff.GetResidueRenamingMain(aa_tlc)

def GetHydrogenRules(ff, aa_tlc):
  # rule-format: (number, method, names, anchors)
  hc = ff.GetHydrogenConstructor(GetFfResName(ff, aa_tlc))
  return hc.GetHydrogenRules()
########################################################################

# HC list of polar hydrogens (def PDB naming)
# -> note HN is also polar but not considered in rules
polar_hydrogens = {
  "ARG": ["HE", "HH11", "HH12", "HH21", "HH22"],
  "ASN": ["HD21", "HD22"],
  "GLN": ["HE21", "HE22"],
  "LYS": ["HZ1", "HZ2", "HZ3"],
  "SER": ["HG"],
  "TRP": ["HE1"],
  "TYR": ["HH"],
  "THR": ["HG1"],
  "HIS": ["HD1", "HE2"]
}

# get mapping to CHARMM for AA lookup
ff = mm.LoadCHARMMForcefield()
atom_mapping_ff2pdb = dict()
for i in range(conop.XXX):
  aa = conop.AminoAcid(i)
  aa_tlc = conop.AminoAcidToResidueName(aa)
  atom_mapping_ff2pdb[aa_tlc] = dict()
  for j in range(loop.AminoAcidLookup.GetNumHydrogens(aa)):
    aah = loop.AminoAcidLookup.GetAAH(aa, j)
    aname_charmm = loop.AminoAcidLookup.GetAtomNameCharmm(aah)
    aname = loop.AminoAcidLookup.GetAtomName(aah)
    atom_mapping_ff2pdb[aa_tlc][aname_charmm] = aname

# get hydrogen construction rules and generate code
for i in range(conop.XXX):
  aa_tlc = conop.AminoAcidToResidueName(conop.AminoAcid(i))
  print("\n  // " + aa_tlc)
  for rule in GetHydrogenRules(ff, aa_tlc):
    # get data
    rule_number = rule[0]
    rule_rule = rule[1]
    rule_names = rule[2]
    rule_anchors = rule[3]
    # skip HN
    if "-C" in rule_anchors:
      continue
    # check consistency
    assert(rule_number == len(rule_names))
    assert(len(rule_names) in [1,2,3])
    assert(len(rule_anchors) in [3,4])
    # translate atom names
    hydrogens = [atom_mapping_ff2pdb[aa_tlc][h_name] for h_name in rule_names]
    anchors = [GetHeavyAtomName(aa_tlc, a_name) for a_name in rule_anchors]
    # check polarity
    is_polar = False
    if aa_tlc in polar_hydrogens:
      p_check = [hn in polar_hydrogens[aa_tlc] for hn in hydrogens]
      is_polar = all(p_check)
      # check if our assumption is safe (all polar or none)
      if not is_polar:
        assert(not any(p_check))
    # check HisProtonationState (HD1 for HISD, HE2 for HISE)
    prot_state = None
    if aa_tlc == "HIS" and hydrogens[0] == "HD1":
      assert(len(hydrogens) == 1)
      prot_state = "PROT_STATE_HISD"
    if aa_tlc == "HIS" and hydrogens[0] == "HE2":
      assert(len(hydrogens) == 1)
      prot_state = "PROT_STATE_HISE"

    # write code
    args = [aa_tlc, str(rule_rule), str(is_polar).lower()]
    if prot_state is not None:
      args += [prot_state]
    print("  AddRule_(" + ", ".join(args) + ");")
    args = [aa_tlc] + [NameIndex(aa_tlc, an) for an in anchors]
    print("  SetAnchors_(" + ", ".join(args) + ");")
    args = [aa_tlc] + ["%s_%s_INDEX" % (aa_tlc, hn) for hn in hydrogens]
    print("  SetHydrogens_(" + ", ".join(args) + ");")