1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
|
""" Core definition of a Materials Document """
from __future__ import annotations
from datetime import datetime
from typing import List, Mapping, Type, TypeVar, Union, Optional
from pydantic import BaseModel, Field, field_validator
from pymatgen.core import Structure
from pymatgen.core.structure import Molecule
from emmet.core.mpid import MPID, MPculeID
from emmet.core.structure import MoleculeMetadata, StructureMetadata
from emmet.core.vasp.validation import DeprecationMessage
from emmet.core.common import convert_datetime
class PropertyOrigin(BaseModel):
"""
Provenance document for the origin of properties in a material document
"""
name: str = Field(..., description="The property name")
task_id: Union[MPID, MPculeID] = Field(
..., description="The calculation ID this property comes from"
)
last_updated: datetime = Field( # type: ignore
description="The timestamp when this calculation was last updated",
default_factory=datetime.utcnow,
)
@field_validator("last_updated", mode="before")
@classmethod
def handle_datetime(cls, v):
return convert_datetime(cls, v)
T = TypeVar("T", bound="MaterialsDoc")
S = TypeVar("S", bound="CoreMoleculeDoc")
class MaterialsDoc(StructureMetadata):
"""
Definition for a core Materials Document
"""
# Only material_id is required for all documents
material_id: MPID = Field(
...,
description="The Materials Project ID of the material, used as a universal reference across property documents."
"This comes in the form: mp-******.",
)
structure: Structure = Field(
...,
description="The structure of the this material.",
)
deprecated: bool = Field(
True,
description="Whether this materials document is deprecated.",
)
deprecation_reasons: Optional[List[Union[DeprecationMessage, str]]] = Field(
None,
description="List of deprecation tags detailing why this materials document isn't valid.",
)
initial_structures: List[Structure] = Field(
[],
description="Initial structures used in the DFT optimizations corresponding to this material.",
)
task_ids: List[MPID] = Field(
[],
description="List of Calculations IDs used to make this Materials Document.",
)
deprecated_tasks: List[str] = Field([], title="Deprecated Tasks")
calc_types: Optional[Mapping[str, str]] = Field(
None,
description="Calculation types for all the calculations that make up this material.",
)
last_updated: datetime = Field(
description="Timestamp for when this document was last updated.",
default_factory=datetime.utcnow,
)
created_at: datetime = Field(
description="Timestamp for when this material document was first created.",
default_factory=datetime.utcnow,
)
origins: Optional[List[PropertyOrigin]] = Field(
None, description="Dictionary for tracking the provenance of properties."
)
warnings: List[str] = Field(
[], description="Any warnings related to this material."
)
@classmethod
def from_structure(
cls: Type[T], structure: Structure, material_id: MPID, **kwargs
) -> T: # type: ignore[override]
"""
Builds a materials document using the minimal amount of information
"""
return super().from_structure( # type: ignore
meta_structure=structure,
material_id=material_id,
structure=structure,
**kwargs,
)
class CoreMoleculeDoc(MoleculeMetadata):
"""
Definition for a core Molecule Document
"""
# Only molecule_id is required for all documents
molecule_id: MPculeID = Field(
...,
description="The ID of this molecule, used as a universal reference across property documents."
"This comes in the form of an MPID (or int) or MPculeID (or str)",
)
molecule: Molecule = Field(
...,
description="The best (typically meaning lowest in energy) structure for this molecule",
)
deprecated: bool = Field(
True,
description="Whether this molecule document is deprecated.",
)
# TODO: Why might a molecule be deprecated?
deprecation_reasons: Optional[List[str]] = Field(
None,
description="List of deprecation tags detailing why this molecules document isn't valid",
)
initial_molecules: List[Molecule] = Field(
[],
description="Initial molecules used in the DFT geometry optimizations corresponding to this molecule",
)
task_ids: List[MPID] = Field(
[],
title="Calculation IDs",
description="List of Calculations IDs used to make this Molecule Document",
)
# TODO: Should this be MPID?
deprecated_tasks: List[str] = Field([], title="Deprecated Tasks")
calc_types: Optional[Mapping[str, str]] = Field(
None,
description="Calculation types for all the tasks that make up this molecule",
)
last_updated: datetime = Field(
description="Timestamp for when this document was last updated",
default_factory=datetime.utcnow,
)
created_at: datetime = Field(
description="Timestamp for when this document was first created",
default_factory=datetime.utcnow,
)
origins: Optional[List[PropertyOrigin]] = Field(
None, description="Dictionary for tracking the provenance of properties"
)
warnings: List[str] = Field([], description="Any warnings related to this molecule")
@classmethod
def from_molecule(
cls: Type[S], molecule: Molecule, molecule_id: MPculeID, **kwargs
) -> S: # type: ignore[override]
"""
Builds a molecule document using the minimal amount of information
"""
return super().from_molecule( # type: ignore
meta_molecule=molecule, molecule_id=molecule_id, molecule=molecule, **kwargs
)
|