File: ideal_bb_param.py

package info (click to toggle)
promod3 3.2.1%2Bds-6
links: PTS, VCS
area: main
in suites: bookworm
size: 1,033,844 kB
sloc: cpp: 55,507; python: 17,487; makefile: 84; sh: 51
file content (220 lines) | stat: -rw-r--r-- 5,171 bytes
# Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and
#                          Biozentrum - University of Basel
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#   http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from ost import conop
import numpy as np

file_content = open("top_all36_prot.rtf",'r').readlines()

ic_data = dict()
aa_names = list()
for i in range(20):
  aa_name = conop.AminoAcidToResidueName(conop.AminoAcid(i))
  # hack to find the proper data in the rtf file...
  if aa_name == "HIS":
    aa_name = "HSE"
  aa_names.append(aa_name)
  ic_data[aa_name] = list()


in_interesting_section = False
current_aa = None

for line in file_content:
  split_line = line.split()
  if len(split_line) == 0:
    continue
  if split_line[0] == "RESI":
    if split_line[1] in aa_names:
      current_aa = split_line[1]
      in_interesting_section = True
    else:
      in_interesting_section = False

  if not in_interesting_section:
    continue

  if split_line[0] == "IC":
    ic_data[current_aa].append(split_line)

# the stuff still contains stars in front of the atom names,
# let's get rid of them
for aa in aa_names:
  for ic_list in ic_data[aa]:
    for i, item in enumerate(ic_list):
      ic_list[i] = item.replace('*','') 


n_ca_bonds = dict()
ca_c_bonds = dict()
c_n_bonds = dict()

c_n_ca_angles = dict()
n_ca_c_angles = dict()
ca_c_n_angles = dict()

for aa_name in aa_names:

    aa_ic_data = ic_data[aa_name]

    # search for n_ca
    bond = ["N","CA"]
    for data in aa_ic_data:
      if data[1:3] == bond:
        n_ca_bonds[aa_name] = float(data[5])
        break
    if not aa_name in n_ca_bonds:
      raise RuntimeError("Could not find N-CA bond for %s"%aa_name)

    # search for ca_c
    bond = ["CA","C"]
    for data in aa_ic_data:
      if data[3:5] == bond:
        ca_c_bonds[aa_name] = float(data[9])
        break
    if not aa_name in ca_c_bonds:
      raise RuntimeError("Could not find CA-C bond for %s"%aa_name)

    # search for c_n
    bond = ["C","+N"]
    for data in aa_ic_data:
      if data[3:5] == bond:
        c_n_bonds[aa_name] = float(data[9])
        break
    if not aa_name in ca_c_bonds:
      raise RuntimeError("Could not find CA-C bond for %s"%aa_name)


    # search for c_n_ca
    angle = ["-C","N","CA"]
    for data in aa_ic_data:
      if data[1:4] == angle:
        c_n_ca_angles[aa_name] = float(data[6])/180*np.pi
        break
    if not aa_name in c_n_ca_angles:
      raise RuntimeError("Could not find C-N-CA angle for %s"%aa_name)

    # search for n_ca_c
    angle = ["N","CA","C"]
    for data in aa_ic_data:
      if data[2:5] == angle:
        n_ca_c_angles[aa_name] = float(data[8])/180*np.pi
        break
    if not aa_name in c_n_ca_angles:
      raise RuntimeError("Could not find N-CA-C angle for %s"%aa_name)
     
    # search for ca_c_n
    angle = ["CA","C","+N"]
    for data in aa_ic_data:
      if data[2:5] == angle:
        ca_c_n_angles[aa_name] = float(data[8])/180*np.pi
        break
    if not aa_name in ca_c_n_angles:
      raise RuntimeError("Could not find CA-C-N angle for %s"%aa_name)



# start code generation
print("void BBTraceParam(char olc, Real& n_ca_bond, Real& ca_c_bond,") 
print("                  Real& c_n_bond, Real& c_n_ca_angle,")
print("                  Real& n_ca_c_angle, Real& ca_c_n_angle){")
print("  switch(olc){")

for i in range(20):

  aa_name = aa_names[i]
  olc = None
  if aa_name == "HSE":
    olc = 'H'
  else:
    olc = conop.ResidueNameToOneLetterCode(aa_name) 

  print("    case \'%s\':{"%olc)
  print("      n_ca_bond = %f;"%n_ca_bonds[aa_name])
  print("      ca_c_bond = %f;"%ca_c_bonds[aa_name])
  print("      c_n_bond = %f;"%c_n_bonds[aa_name])
  print("      c_n_ca_angle = %f;"%c_n_ca_angles[aa_name])
  print("      n_ca_c_angle = %f;"%n_ca_c_angles[aa_name])
  print("      ca_c_n_angle = %f;"%ca_c_n_angles[aa_name])
  print("      break;")
  print("    }")

print("    default:{")
print("      throw promod3::Error(\"Invalid OneLetterCode observed!\");")
print("    }")
print("  }")
print("}")


# this is for the unit tests...
print("bond avg values:")

avg = 0.0
for k,v in n_ca_bonds.items():
  avg += v
avg /= 20

print("n_ca_bonds", avg)

avg = 0.0
for k,v in ca_c_bonds.items():
  avg += v
avg /= 20

print("ca_c_bonds", avg)

avg = 0.0
for k,v in c_n_bonds.items():
  avg += v
avg /= 20

print("c_n_bonds", avg)



print("angle avg values:")

avg = 0.0
for k,v in c_n_ca_angles.items():
  avg += v
avg /= 20

print("c_n_ca_angles", avg)

avg = 0.0
for k,v in n_ca_c_angles.items():
  avg += v
avg /= 20

print("n_ca_c_angles", avg)

avg = 0.0
for k,v in ca_c_n_angles.items():
  avg += v
avg /= 20

print("ca_c_n_angles", avg)