File: optimade.py

package info (click to toggle)
python-emmet-core 0.84.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 77,220 kB
  • sloc: python: 16,355; makefile: 30
file content (132 lines) | stat: -rw-r--r-- 4,673 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import string
from datetime import datetime

from monty.fractions import gcd
from optimade.models import Species, StructureResourceAttributes
from pymatgen.core.composition import Composition, formula_double_format
from pymatgen.core.structure import Structure

from emmet.core.base import EmmetBaseModel
from emmet.core.mpid import MPID

letters = "ABCDEFGHIJKLMNOPQRSTUVXYZ"


def optimade_form(comp: Composition):
    symbols = sorted([str(e) for e in comp.keys()])
    numbers = set([comp[s] for s in symbols if comp[s]])

    reduced_form = []
    for s in symbols:
        reduced_form.append(s)
        if comp[s] != 1 and len(numbers) > 1:
            reduced_form.append(str(int(comp[s])))

    return "".join(reduced_form)


def optimade_anonymous_form(comp: Composition):
    reduced = comp.element_composition
    if all(x == int(x) for x in comp.values()):
        reduced /= gcd(*(int(i) for i in comp.values()))

    anon = []

    for e, amt in zip(string.ascii_uppercase, sorted(reduced.values(), reverse=True)):
        if amt == 1:
            amt_str = ""
        elif abs(amt % 1) < 1e-8:
            amt_str = str(int(amt))
        else:
            amt_str = str(amt)
        anon.append(str(e))
        anon.append(amt_str)
    return "".join(anon)


def hill_formula(comp: Composition) -> str:
    """
    :return: Hill formula. The Hill system (or Hill notation) is a system
    of writing empirical chemical formulas, molecular chemical formulas and
    components of a condensed formula such that the number of carbon atoms
    in a molecule is indicated first, the number of hydrogen atoms next,
    and then the number of all other chemical elements subsequently, in
    alphabetical order of the chemical symbols. When the formula contains
    no carbon, all the elements, including hydrogen, are listed
    alphabetically.
    """
    c = comp.element_composition
    elements = sorted([el.symbol for el in c.keys()])

    form_elements = []
    if "C" in elements:
        form_elements.append("C")
        if "H" in elements:
            form_elements.append("H")

        form_elements.extend([el for el in elements if el != "C" and el != "H"])
    else:
        form_elements = elements

    formula = [
        f"{el}{formula_double_format(c[el]) if c[el] != 1 else ''}"
        for el in form_elements
    ]
    return "".join(formula)


class OptimadeMaterialsDoc(StructureResourceAttributes, EmmetBaseModel):
    """
    Optimade Structure resource with a few extra MP specific fields for materials

    Thermo calculations are stored as a nested dict, with keys corresponding to the functional
    used to perform stability calc, i.e., R2SCAN, GGA_GGA+U_R2SCAN, or GGA_GGA+U
    """

    material_id: MPID
    chemical_system: str
    stability: dict

    @classmethod
    def from_structure(
        cls,
        material_id: MPID,
        structure: Structure,
        last_updated_structure: datetime,
        thermo_calcs: dict,
        **kwargs,
    ) -> StructureResourceAttributes:
        structure.remove_oxidation_states()
        return OptimadeMaterialsDoc(
            material_id=material_id,
            chemical_system=structure.composition.chemical_system,
            stability=thermo_calcs,
            elements=sorted(set([e.symbol for e in structure.composition.elements])),
            nelements=len(structure.composition.elements),
            elements_ratios=list(structure.composition.fractional_composition.values()),
            chemical_formula_descriptive=optimade_form(structure.composition),
            chemical_formula_reduced=optimade_form(
                structure.composition.get_reduced_composition_and_factor()[0]
            ),
            chemical_formula_anonymous=optimade_anonymous_form(structure.composition),
            chemical_formula_hill=hill_formula(structure.composition),
            dimension_types=[1, 1, 1],
            nperiodic_dimensions=3,
            lattice_vectors=structure.lattice.matrix.tolist(),
            cartesian_site_positions=[site.coords.tolist() for site in structure],
            nsites=len(structure),
            species=list(
                {
                    site.species_string: Species(
                        chemical_symbols=[site.species_string],
                        concentration=[1.0],
                        name=site.species_string,
                    )
                    for site in structure
                }.values()
            ),
            species_at_sites=[site.species_string for site in structure],
            last_modified=last_updated_structure,
            structure_features=[],
            **kwargs,
        )