File: proteindatabank.py

package info (click to toggle)
python-ase 3.12.0-2
links: PTS, VCS
area: main
in suites: stretch
size: 14,192 kB
ctags: 8,112
sloc: python: 93,375; sh: 99; makefile: 94
file content (92 lines) | stat: -rw-r--r-- 3,416 bytes
import numpy as np

from ase.atoms import Atom, Atoms
from ase.parallel import paropen
from ase.geometry import cellpar_to_cell

"""Module to read and write atoms in PDB file format"""


def read_proteindatabank(fileobj, index=-1):
    """Read PDB files.

    The format is assumed to follow the description given in
    http://www.wwpdb.org/documentation/format32/sect8.html and
    http://www.wwpdb.org/documentation/format32/sect9.html."""
    if isinstance(fileobj, str):
        fileobj = open(fileobj)

    images = []
    orig = np.identity(3)
    trans = np.zeros(3)
    atoms = Atoms()
    for line in fileobj.readlines():
        if line.startswith('CRYST1'):
            cellpar = [float(word) for word in line[6:54].split()]
            atoms.set_cell(cellpar_to_cell(cellpar))
        for c in range(3):
            if line.startswith('ORIGX' + '123'[c]):
                pars = [float(word) for word in line[10:55].split()]
                orig[c] = pars[:3]
                trans[c] = pars[3]

        if line.startswith('ATOM') or line.startswith('HETATM'):
            try:
                # Atom name is arbitrary and does not necessarily
                # contain the element symbol.  The specification
                # requires the element symbol to be in columns 77+78.
                symbol = line[76:78].strip().lower().capitalize()
                words = line[30:55].split()
                position = np.array([float(words[0]),
                                     float(words[1]),
                                     float(words[2])])
                position = np.dot(orig, position) + trans
                atoms.append(Atom(symbol, position))
            except:
                pass
        if line.startswith('ENDMDL'):
            images.append(atoms)
            atoms = Atoms()
    if len(images) == 0:
        images.append(atoms)
    return images[index]


def write_proteindatabank(fileobj, images):
    """Write images to PDB-file.

    The format is assumed to follow the description given in
    http://www.wwpdb.org/documentation/format32/sect9.html."""
    if isinstance(fileobj, str):
        fileobj = paropen(fileobj, 'w')

    if not isinstance(images, (list, tuple)):
        images = [images]

    if images[0].get_pbc().any():
        from ase.geometry import cell_to_cellpar
        cellpar = cell_to_cellpar(images[0].get_cell())
        # ignoring Z-value, using P1 since we have all atoms defined explicitly
        format = 'CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1\n'
        fileobj.write(format % (cellpar[0], cellpar[1], cellpar[2],
                                cellpar[3], cellpar[4], cellpar[5]))

    #     1234567 123 6789012345678901   89   67   456789012345678901234567 890
    format = ('ATOM  %5d %4s MOL     1    %8.3f%8.3f%8.3f  1.00  0.00'
              '          %2s  \n')

    # RasMol complains if the atom index exceeds 100000. There might
    # be a limit of 5 digit numbers in this field.
    MAXNUM = 100000

    symbols = images[0].get_chemical_symbols()
    natoms = len(symbols)

    for n, atoms in enumerate(images):
        fileobj.write('MODEL     ' + str(n + 1) + '\n')
        p = atoms.get_positions()
        for a in range(natoms):
            x, y, z = p[a]
            fileobj.write(format % (a % MAXNUM, symbols[a],
                                    x, y, z, symbols[a].rjust(2)))
        fileobj.write('ENDMDL\n')