1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
|
#!/usr/bin/env python
# Check column presence and order in the _atom_site category.
# In mmCIF v5 esd _atom_site.*esd columns were removed.
import sys
from gemmi import cif, CifWalk
#ESD = 'Cartn_x_esd Cartn_y_esd Cartn_z_esd occupancy_esd B_iso_or_equiv_esd '
ESD = ''
USUAL_ORDER = ('group_PDB id type_symbol label_atom_id label_alt_id '
'label_comp_id label_asym_id label_entity_id label_seq_id '
'pdbx_PDB_ins_code Cartn_x Cartn_y Cartn_z occupancy '
'B_iso_or_equiv ' + ESD + 'pdbx_formal_charge '
'auth_seq_id auth_comp_id auth_asym_id auth_atom_id '
'pdbx_PDB_model_num')
counts = {}
for arg in sys.argv[1:]:
for path in CifWalk(arg):
block = cif.read(path).sole_block()
loop_tags = block.find_loop("_atom_site.id").get_loop().tags
assert all(t.startswith("_atom_site.") for t in loop_tags)
tags = ' '.join(t[11:] for t in loop_tags)
if tags != USUAL_ORDER:
print(tags)
print(USUAL_ORDER)
print(block.name, tags)
counts[tags] = counts.get(tags, 0) + 1
for key, value in counts.items():
print(value, key)
# Results: in v4 a few EM structures (5A9Z 5AA0 5FKI 4UDF)
# had different order, with ATOM/HETATM in the middle.
|