1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
|
# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""mmCIF parser"""
from __future__ import print_function
from string import ascii_letters
import numpy
from Bio._py3k import range
from Bio.PDB.MMCIF2Dict import MMCIF2Dict
from Bio.PDB.StructureBuilder import StructureBuilder
from Bio.PDB.PDBExceptions import PDBConstructionException
class MMCIFParser(object):
def get_structure(self, structure_id, filename):
self._mmcif_dict=MMCIF2Dict(filename)
self._structure_builder=StructureBuilder()
self._build_structure(structure_id)
return self._structure_builder.get_structure()
def _build_structure(self, structure_id):
mmcif_dict=self._mmcif_dict
atom_id_list=mmcif_dict["_atom_site.label_atom_id"]
residue_id_list=mmcif_dict["_atom_site.label_comp_id"]
try:
element_list = mmcif_dict["_atom_site.type_symbol"]
except KeyError:
element_list = None
seq_id_list=mmcif_dict["_atom_site.label_seq_id"]
chain_id_list=mmcif_dict["_atom_site.label_asym_id"]
x_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_x"]]
y_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_y"]]
z_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_z"]]
alt_list=mmcif_dict["_atom_site.label_alt_id"]
b_factor_list=mmcif_dict["_atom_site.B_iso_or_equiv"]
occupancy_list=mmcif_dict["_atom_site.occupancy"]
fieldname_list=mmcif_dict["_atom_site.group_PDB"]
try:
serial_list = [int(n) for n in mmcif_dict["_atom_site.pdbx_PDB_model_num"]]
except KeyError:
# No model number column
serial_list = None
except ValueError:
# Invalid model number (malformed file)
raise PDBConstructionException("Invalid model number")
try:
aniso_u11=mmcif_dict["_atom_site.aniso_U[1][1]"]
aniso_u12=mmcif_dict["_atom_site.aniso_U[1][2]"]
aniso_u13=mmcif_dict["_atom_site.aniso_U[1][3]"]
aniso_u22=mmcif_dict["_atom_site.aniso_U[2][2]"]
aniso_u23=mmcif_dict["_atom_site.aniso_U[2][3]"]
aniso_u33=mmcif_dict["_atom_site.aniso_U[3][3]"]
aniso_flag=1
except KeyError:
# no anisotropic B factors
aniso_flag=0
# if auth_seq_id is present, we use this.
# Otherwise label_seq_id is used.
if "_atom_site.auth_seq_id" in mmcif_dict:
seq_id_list=mmcif_dict["_atom_site.auth_seq_id"]
else:
seq_id_list=mmcif_dict["_atom_site.label_seq_id"]
# Now loop over atoms and build the structure
current_chain_id=None
current_residue_id=None
structure_builder=self._structure_builder
structure_builder.init_structure(structure_id)
structure_builder.init_seg(" ")
# Historically, Biopython PDB parser uses model_id to mean array index
# so serial_id means the Model ID specified in the file
current_model_id = 0
current_serial_id = 0
for i in range(0, len(atom_id_list)):
x=x_list[i]
y=y_list[i]
z=z_list[i]
resname=residue_id_list[i]
chainid=chain_id_list[i]
altloc=alt_list[i]
if altloc==".":
altloc=" "
resseq=seq_id_list[i]
name=atom_id_list[i]
# occupancy & B factor
try:
tempfactor=float(b_factor_list[i])
except ValueError:
raise PDBConstructionException("Invalid or missing B factor")
try:
occupancy=float(occupancy_list[i])
except ValueError:
raise PDBConstructionException("Invalid or missing occupancy")
fieldname=fieldname_list[i]
if fieldname=="HETATM":
hetatm_flag="H"
else:
hetatm_flag=" "
if serial_list is not None:
# model column exists; use it
serial_id = serial_list[i]
if current_serial_id != serial_id:
# if serial changes, update it and start new model
current_serial_id = serial_id
structure_builder.init_model(current_model_id, current_serial_id)
current_model_id += 1
else:
# no explicit model column; initialize single model
structure_builder.init_model(current_model_id)
if current_chain_id!=chainid:
current_chain_id=chainid
structure_builder.init_chain(current_chain_id)
current_residue_id=resseq
icode, int_resseq=self._get_icode(resseq)
structure_builder.init_residue(resname, hetatm_flag, int_resseq,
icode)
elif current_residue_id!=resseq:
current_residue_id=resseq
icode, int_resseq=self._get_icode(resseq)
structure_builder.init_residue(resname, hetatm_flag, int_resseq,
icode)
coord=numpy.array((x, y, z), 'f')
element = element_list[i] if element_list else None
structure_builder.init_atom(name, coord, tempfactor, occupancy, altloc,
name, element=element)
if aniso_flag==1:
u=(aniso_u11[i], aniso_u12[i], aniso_u13[i],
aniso_u22[i], aniso_u23[i], aniso_u33[i])
mapped_anisou = [float(x) for x in u]
anisou_array=numpy.array(mapped_anisou, 'f')
structure_builder.set_anisou(anisou_array)
# Now try to set the cell
try:
a=float(mmcif_dict["_cell.length_a"])
b=float(mmcif_dict["_cell.length_b"])
c=float(mmcif_dict["_cell.length_c"])
alpha=float(mmcif_dict["_cell.angle_alpha"])
beta=float(mmcif_dict["_cell.angle_beta"])
gamma=float(mmcif_dict["_cell.angle_gamma"])
cell=numpy.array((a, b, c, alpha, beta, gamma), 'f')
spacegroup=mmcif_dict["_symmetry.space_group_name_H-M"]
spacegroup=spacegroup[1:-1] # get rid of quotes!!
if spacegroup is None:
raise Exception
structure_builder.set_symmetry(spacegroup, cell)
except:
pass # no cell found, so just ignore
def _get_icode(self, resseq):
"""Tries to return the icode. In MMCIF files this is just part of
resseq! In PDB files, it's a separate field."""
last_resseq_char=resseq[-1]
if last_resseq_char in ascii_letters:
icode=last_resseq_char
int_resseq=int(resseq[0:-1])
else:
icode=" "
int_resseq=int(resseq)
return icode, int_resseq
if __name__=="__main__":
import sys
if len(sys.argv) != 2:
print("Usage: python MMCIFparser.py filename")
raise SystemExit
filename=sys.argv[1]
p=MMCIFParser()
structure=p.get_structure("test", filename)
for model in structure.get_list():
print(model)
for chain in model.get_list():
print(chain)
print("Found %d residues." % len(chain.get_list()))
|