1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
|
import numpy as np
from ase.atoms import Atom, Atoms
from ase.parallel import paropen
from ase.geometry import cellpar_to_cell
"""Module to read and write atoms in PDB file format"""
def read_proteindatabank(fileobj, index=-1):
"""Read PDB files.
The format is assumed to follow the description given in
http://www.wwpdb.org/documentation/format32/sect8.html and
http://www.wwpdb.org/documentation/format32/sect9.html."""
if isinstance(fileobj, str):
fileobj = open(fileobj)
images = []
orig = np.identity(3)
trans = np.zeros(3)
atoms = Atoms()
for line in fileobj.readlines():
if line.startswith('CRYST1'):
cellpar = [float(word) for word in line[6:54].split()]
atoms.set_cell(cellpar_to_cell(cellpar))
for c in range(3):
if line.startswith('ORIGX' + '123'[c]):
pars = [float(word) for word in line[10:55].split()]
orig[c] = pars[:3]
trans[c] = pars[3]
if line.startswith('ATOM') or line.startswith('HETATM'):
try:
# Atom name is arbitrary and does not necessarily
# contain the element symbol. The specification
# requires the element symbol to be in columns 77+78.
symbol = line[76:78].strip().lower().capitalize()
words = line[30:55].split()
position = np.array([float(words[0]),
float(words[1]),
float(words[2])])
position = np.dot(orig, position) + trans
atoms.append(Atom(symbol, position))
except:
pass
if line.startswith('ENDMDL'):
images.append(atoms)
atoms = Atoms()
if len(images) == 0:
images.append(atoms)
return images[index]
def write_proteindatabank(fileobj, images):
"""Write images to PDB-file.
The format is assumed to follow the description given in
http://www.wwpdb.org/documentation/format32/sect9.html."""
if isinstance(fileobj, str):
fileobj = paropen(fileobj, 'w')
if not isinstance(images, (list, tuple)):
images = [images]
if images[0].get_pbc().any():
from ase.geometry import cell_to_cellpar
cellpar = cell_to_cellpar(images[0].get_cell())
# ignoring Z-value, using P1 since we have all atoms defined explicitly
format = 'CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1\n'
fileobj.write(format % (cellpar[0], cellpar[1], cellpar[2],
cellpar[3], cellpar[4], cellpar[5]))
# 1234567 123 6789012345678901 89 67 456789012345678901234567 890
format = ('ATOM %5d %4s MOL 1 %8.3f%8.3f%8.3f 1.00 0.00'
' %2s \n')
# RasMol complains if the atom index exceeds 100000. There might
# be a limit of 5 digit numbers in this field.
MAXNUM = 100000
symbols = images[0].get_chemical_symbols()
natoms = len(symbols)
for n, atoms in enumerate(images):
fileobj.write('MODEL ' + str(n + 1) + '\n')
p = atoms.get_positions()
for a in range(natoms):
x, y, z = p[a]
fileobj.write(format % (a % MAXNUM, symbols[a],
x, y, z, symbols[a].rjust(2)))
fileobj.write('ENDMDL\n')
|