1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
|
from __future__ import annotations
from collections import defaultdict
import numpy as np
from emmet.core.thermo import ThermoDoc, ThermoType
from monty.json import MontyDecoder
from pymatgen.analysis.phase_diagram import PhaseDiagram
from pymatgen.core import Element
from mp_api.client.core import BaseRester
from mp_api.client.core.utils import validate_ids
class ThermoRester(BaseRester[ThermoDoc]):
suffix = "materials/thermo"
document_model = ThermoDoc # type: ignore
supports_versions = True
primary_key = "thermo_id"
def search(
self,
material_ids: str | list[str] | None = None,
chemsys: str | list[str] | None = None,
energy_above_hull: tuple[float, float] | None = None,
equilibrium_reaction_energy: tuple[float, float] | None = None,
formation_energy: tuple[float, float] | None = None,
formula: str | list[str] | None = None,
is_stable: bool | None = None,
num_elements: tuple[int, int] | None = None,
thermo_ids: list[str] | None = None,
thermo_types: list[ThermoType | str] | None = None,
total_energy: tuple[float, float] | None = None,
uncorrected_energy: tuple[float, float] | None = None,
num_chunks: int | None = None,
chunk_size: int = 1000,
all_fields: bool = True,
fields: list[str] | None = None,
) -> list[ThermoDoc] | list[dict]:
"""Query core thermo docs using a variety of search criteria.
Arguments:
material_ids (str, List[str]): A single Material ID string or list of strings
(e.g., mp-149, [mp-149, mp-13]).
chemsys (str, List[str]): A chemical system or list of chemical systems
(e.g., Li-Fe-O, Si-*, [Si-O, Li-Fe-P]).
energy_above_hull (Tuple[float,float]): Minimum and maximum energy above the hull in eV/atom to consider.
equilibrium_reaction_energy (Tuple[float,float]): Minimum and maximum equilibrium reaction energy
in eV/atom to consider.
formation_energy (Tuple[float,float]): Minimum and maximum formation energy in eV/atom to consider.
formula (str, List[str]): A formula including anonymized formula
or wild cards (e.g., Fe2O3, ABO3, Si*). A list of chemical formulas can also be passed
(e.g., [Fe2O3, ABO3]).
is_stable (bool): Whether the material is stable.
material_ids (List[str]): List of Materials Project IDs to return data for.
thermo_ids (List[str]): List of thermo IDs to return data for. This is a combination of the Materials
Project ID and thermo type (e.g. mp-149_GGA_GGA+U).
thermo_types (List[ThermoType]): List of thermo types to return data for (e.g. ThermoType.GGA_GGA_U).
num_elements (Tuple[int,int]): Minimum and maximum number of elements in the material to consider.
total_energy (Tuple[float,float]): Minimum and maximum corrected total energy in eV/atom to consider.
uncorrected_energy (Tuple[float,float]): Minimum and maximum uncorrected total
energy in eV/atom to consider.
num_chunks (int): Maximum number of chunks of data to yield. None will yield all possible.
chunk_size (int): Number of data entries per chunk.
all_fields (bool): Whether to return all fields in the document. Defaults to True.
fields (List[str]): List of fields in ThermoDoc to return data for.
Default is material_id and last_updated if all_fields is False.
Returns:
([ThermoDoc], [dict]) List of thermo documents or dictionaries.
"""
query_params = defaultdict(dict) # type: dict
if formula:
if isinstance(formula, str):
formula = [formula]
query_params.update({"formula": ",".join(formula)})
if chemsys:
if isinstance(chemsys, str):
chemsys = [chemsys]
query_params.update({"chemsys": ",".join(chemsys)})
if material_ids:
if isinstance(material_ids, str):
material_ids = [material_ids]
query_params.update({"material_ids": ",".join(validate_ids(material_ids))})
if thermo_ids:
query_params.update({"thermo_ids": ",".join(validate_ids(thermo_ids))})
if thermo_types:
t_types = {t if isinstance(t, str) else t.value for t in thermo_types}
valid_types = {*map(str, ThermoType.__members__.values())}
if invalid_types := t_types - valid_types:
raise ValueError(
f"Invalid thermo type(s) passed: {invalid_types}, valid types are: {valid_types}"
)
query_params.update({"thermo_types": ",".join(t_types)})
if num_elements:
if isinstance(num_elements, int):
num_elements = (num_elements, num_elements)
query_params.update(
{"nelements_min": num_elements[0], "nelements_max": num_elements[1]}
)
if is_stable is not None:
query_params.update({"is_stable": is_stable})
name_dict = {
"total_energy": "energy_per_atom",
"formation_energy": "formation_energy_per_atom",
"energy_above_hull": "energy_above_hull",
"equilibrium_reaction_energy": "equilibrium_reaction_energy_per_atom",
"uncorrected_energy": "uncorrected_energy_per_atom",
}
for param, value in locals().items():
if "energy" in param and value:
query_params.update(
{
f"{name_dict[param]}_min": value[0],
f"{name_dict[param]}_max": value[1],
}
)
query_params = {
entry: query_params[entry]
for entry in query_params
if query_params[entry] is not None
}
return super()._search(
num_chunks=num_chunks,
chunk_size=chunk_size,
all_fields=all_fields,
fields=fields,
**query_params,
)
def get_phase_diagram_from_chemsys(
self, chemsys: str, thermo_type: ThermoType | str
) -> PhaseDiagram:
"""Get a pre-computed phase diagram for a given chemsys.
Arguments:
chemsys (str): A chemical system (e.g. Li-Fe-O)
thermo_type (ThermoType): The thermo type for the phase diagram.
Defaults to ThermoType.GGA_GGA_U.
Returns:
(PhaseDiagram): Pymatgen phase diagram object.
"""
t_type = thermo_type if isinstance(thermo_type, str) else thermo_type.value
valid_types = {*map(str, ThermoType.__members__.values())}
if invalid_types := {t_type} - valid_types:
raise ValueError(
f"Invalid thermo type(s) passed: {invalid_types}, valid types are: {valid_types}"
)
sorted_chemsys = "-".join(sorted(chemsys.split("-")))
phdiag_id = f"thermo_type={t_type}/chemsys={sorted_chemsys}"
version = self.db_version.replace(".", "-")
obj_key = f"objects/{version}/phase-diagrams/{phdiag_id}.jsonl.gz"
pd = self._query_open_data(
bucket="materialsproject-build",
key=obj_key,
decoder=MontyDecoder().decode,
)[0][0].get("phase_diagram")
# Ensure el_ref keys are Element objects for PDPlotter.
# Ensure qhull_data is a numpy array
# This should be fixed in pymatgen
if pd:
for key, entry in list(pd.el_refs.items()):
if not isinstance(key, str):
break
pd.el_refs[Element(str(key))] = entry
pd.el_refs.pop(key)
if isinstance(pd.qhull_data, list):
pd.qhull_data = np.array(pd.qhull_data)
return pd # type: ignore
|