File: sdf.py

package info (click to toggle)
python-ase 3.26.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 15,484 kB
  • sloc: python: 148,112; xml: 2,728; makefile: 110; javascript: 47
file content (39 lines) | stat: -rw-r--r-- 1,191 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# fmt: off

"""Reads chemical data in SDF format (wraps the molfile format).

See https://en.wikipedia.org/wiki/Chemical_table_file#SDF
"""
from typing import TextIO

from ase.atoms import Atoms
from ase.utils import reader


def get_num_atoms_sdf_v2000(first_line: str) -> int:
    """Parse the first line extracting the number of atoms.

    The V2000 dialect uses a fixed field length of 3, which means there
    won't be space between the numbers if there are 100+ atoms, and
    the format doesn't support 1000+ atoms at all.

    http://biotech.fyicenter.com/1000024_SDF_File_Format_Specification.html
    """
    return int(first_line[0:3])  # first three characters


@reader
def read_sdf(file_obj: TextIO) -> Atoms:
    """Read the sdf data and compose the corresponding Atoms object."""
    lines = file_obj.readlines()
    # first three lines header
    del lines[:3]

    num_atoms = get_num_atoms_sdf_v2000(lines.pop(0))
    positions = []
    symbols = []
    for line in lines[:num_atoms]:
        x, y, z, symbol = line.split()[:4]
        symbols.append(symbol)
        positions.append((float(x), float(y), float(z)))
    return Atoms(symbols=symbols, positions=positions)