File: thermo.py

package info (click to toggle)
python-mp-api 0.45.3-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 6,988 kB
  • sloc: python: 6,712; makefile: 14
file content (190 lines) | stat: -rw-r--r-- 8,057 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
from __future__ import annotations

from collections import defaultdict

import numpy as np
from emmet.core.thermo import ThermoDoc, ThermoType
from monty.json import MontyDecoder
from pymatgen.analysis.phase_diagram import PhaseDiagram
from pymatgen.core import Element

from mp_api.client.core import BaseRester
from mp_api.client.core.utils import validate_ids


class ThermoRester(BaseRester[ThermoDoc]):
    suffix = "materials/thermo"
    document_model = ThermoDoc  # type: ignore
    supports_versions = True
    primary_key = "thermo_id"

    def search(
        self,
        material_ids: str | list[str] | None = None,
        chemsys: str | list[str] | None = None,
        energy_above_hull: tuple[float, float] | None = None,
        equilibrium_reaction_energy: tuple[float, float] | None = None,
        formation_energy: tuple[float, float] | None = None,
        formula: str | list[str] | None = None,
        is_stable: bool | None = None,
        num_elements: tuple[int, int] | None = None,
        thermo_ids: list[str] | None = None,
        thermo_types: list[ThermoType | str] | None = None,
        total_energy: tuple[float, float] | None = None,
        uncorrected_energy: tuple[float, float] | None = None,
        num_chunks: int | None = None,
        chunk_size: int = 1000,
        all_fields: bool = True,
        fields: list[str] | None = None,
    ) -> list[ThermoDoc] | list[dict]:
        """Query core thermo docs using a variety of search criteria.

        Arguments:
            material_ids (str, List[str]): A single Material ID string or list of strings
                (e.g., mp-149, [mp-149, mp-13]).
            chemsys (str, List[str]): A chemical system or list of chemical systems
                (e.g., Li-Fe-O, Si-*, [Si-O, Li-Fe-P]).
            energy_above_hull (Tuple[float,float]): Minimum and maximum energy above the hull in eV/atom to consider.
            equilibrium_reaction_energy (Tuple[float,float]): Minimum and maximum equilibrium reaction energy
                in eV/atom to consider.
            formation_energy (Tuple[float,float]): Minimum and maximum formation energy in eV/atom to consider.
            formula (str, List[str]): A formula including anonymized formula
                or wild cards (e.g., Fe2O3, ABO3, Si*). A list of chemical formulas can also be passed
                (e.g., [Fe2O3, ABO3]).
            is_stable (bool): Whether the material is stable.
            material_ids (List[str]): List of Materials Project IDs to return data for.
            thermo_ids (List[str]): List of thermo IDs to return data for. This is a combination of the Materials
                Project ID and thermo type (e.g. mp-149_GGA_GGA+U).
            thermo_types (List[ThermoType]): List of thermo types to return data for (e.g. ThermoType.GGA_GGA_U).
            num_elements (Tuple[int,int]): Minimum and maximum number of elements in the material to consider.
            total_energy (Tuple[float,float]): Minimum and maximum corrected total energy in eV/atom to consider.
            uncorrected_energy (Tuple[float,float]): Minimum and maximum uncorrected total
                energy in eV/atom to consider.
            num_chunks (int): Maximum number of chunks of data to yield. None will yield all possible.
            chunk_size (int): Number of data entries per chunk.
            all_fields (bool): Whether to return all fields in the document. Defaults to True.
            fields (List[str]): List of fields in ThermoDoc to return data for.
                Default is material_id and last_updated if all_fields is False.

        Returns:
            ([ThermoDoc], [dict]) List of thermo documents or dictionaries.
        """
        query_params = defaultdict(dict)  # type: dict

        if formula:
            if isinstance(formula, str):
                formula = [formula]

            query_params.update({"formula": ",".join(formula)})

        if chemsys:
            if isinstance(chemsys, str):
                chemsys = [chemsys]

            query_params.update({"chemsys": ",".join(chemsys)})

        if material_ids:
            if isinstance(material_ids, str):
                material_ids = [material_ids]

            query_params.update({"material_ids": ",".join(validate_ids(material_ids))})

        if thermo_ids:
            query_params.update({"thermo_ids": ",".join(validate_ids(thermo_ids))})

        if thermo_types:
            t_types = {t if isinstance(t, str) else t.value for t in thermo_types}
            valid_types = {*map(str, ThermoType.__members__.values())}
            if invalid_types := t_types - valid_types:
                raise ValueError(
                    f"Invalid thermo type(s) passed: {invalid_types}, valid types are: {valid_types}"
                )
            query_params.update({"thermo_types": ",".join(t_types)})

        if num_elements:
            if isinstance(num_elements, int):
                num_elements = (num_elements, num_elements)
            query_params.update(
                {"nelements_min": num_elements[0], "nelements_max": num_elements[1]}
            )

        if is_stable is not None:
            query_params.update({"is_stable": is_stable})

        name_dict = {
            "total_energy": "energy_per_atom",
            "formation_energy": "formation_energy_per_atom",
            "energy_above_hull": "energy_above_hull",
            "equilibrium_reaction_energy": "equilibrium_reaction_energy_per_atom",
            "uncorrected_energy": "uncorrected_energy_per_atom",
        }

        for param, value in locals().items():
            if "energy" in param and value:
                query_params.update(
                    {
                        f"{name_dict[param]}_min": value[0],
                        f"{name_dict[param]}_max": value[1],
                    }
                )

        query_params = {
            entry: query_params[entry]
            for entry in query_params
            if query_params[entry] is not None
        }

        return super()._search(
            num_chunks=num_chunks,
            chunk_size=chunk_size,
            all_fields=all_fields,
            fields=fields,
            **query_params,
        )

    def get_phase_diagram_from_chemsys(
        self, chemsys: str, thermo_type: ThermoType | str
    ) -> PhaseDiagram:
        """Get a pre-computed phase diagram for a given chemsys.

        Arguments:
            chemsys (str): A chemical system (e.g. Li-Fe-O)
            thermo_type (ThermoType): The thermo type for the phase diagram.
                Defaults to ThermoType.GGA_GGA_U.


        Returns:
            (PhaseDiagram): Pymatgen phase diagram object.
        """
        t_type = thermo_type if isinstance(thermo_type, str) else thermo_type.value
        valid_types = {*map(str, ThermoType.__members__.values())}
        if invalid_types := {t_type} - valid_types:
            raise ValueError(
                f"Invalid thermo type(s) passed: {invalid_types}, valid types are: {valid_types}"
            )

        sorted_chemsys = "-".join(sorted(chemsys.split("-")))
        phdiag_id = f"thermo_type={t_type}/chemsys={sorted_chemsys}"
        version = self.db_version.replace(".", "-")
        obj_key = f"objects/{version}/phase-diagrams/{phdiag_id}.jsonl.gz"
        pd = self._query_open_data(
            bucket="materialsproject-build",
            key=obj_key,
            decoder=MontyDecoder().decode,
        )[0][0].get("phase_diagram")

        # Ensure el_ref keys are Element objects for PDPlotter.
        # Ensure qhull_data is a numpy array
        # This should be fixed in pymatgen
        if pd:
            for key, entry in list(pd.el_refs.items()):
                if not isinstance(key, str):
                    break

                pd.el_refs[Element(str(key))] = entry
                pd.el_refs.pop(key)

            if isinstance(pd.qhull_data, list):
                pd.qhull_data = np.array(pd.qhull_data)

        return pd  # type: ignore