File: material.py

package info (click to toggle)
python-emmet-core 0.84.2-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 77,220 kB
  • sloc: python: 16,355; makefile: 30
file content (291 lines) | stat: -rw-r--r-- 10,761 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
""" Core definition of a Materials Document """

from typing import Dict, List, Mapping, Optional

from pydantic import BaseModel, Field
from pymatgen.analysis.structure_analyzer import SpacegroupAnalyzer
from pymatgen.analysis.structure_matcher import StructureMatcher
from pymatgen.entries.computed_entries import ComputedStructureEntry

from emmet.core.base import EmmetMeta
from emmet.core.material import MaterialsDoc as CoreMaterialsDoc
from emmet.core.material import PropertyOrigin
from emmet.core.settings import EmmetSettings
from emmet.core.structure import StructureMetadata
from emmet.core.tasks import TaskDoc
from emmet.core.vasp.calc_types import CalcType, RunType, TaskType

SETTINGS = EmmetSettings()


class BlessedCalcs(BaseModel):
    GGA: Optional[ComputedStructureEntry] = None
    GGA_U: Optional[ComputedStructureEntry] = Field(None, alias="GGA+U")
    PBESol: Optional[ComputedStructureEntry] = None
    SCAN: Optional[ComputedStructureEntry] = None
    R2SCAN: Optional[ComputedStructureEntry] = None
    HSE: Optional[ComputedStructureEntry] = None


class MaterialsDoc(CoreMaterialsDoc, StructureMetadata):
    calc_types: Optional[Mapping[str, CalcType]] = Field(  # type: ignore
        None,
        description="Calculation types for all the calculations that make up this material",
    )
    task_types: Optional[Mapping[str, TaskType]] = Field(
        None,
        description="Task types for all the calculations that make up this material",
    )
    run_types: Optional[Mapping[str, RunType]] = Field(
        None,
        description="Run types for all the calculations that make up this material",
    )

    origins: Optional[List[PropertyOrigin]] = Field(
        None, description="Mappingionary for tracking the provenance of properties"
    )

    entries: Optional[BlessedCalcs] = Field(
        None, description="Dictionary for tracking entries for VASP calculations"
    )

    @classmethod
    def from_tasks(
        cls,
        task_group: List[TaskDoc],
        structure_quality_scores: Dict[
            str, int
        ] = SETTINGS.VASP_STRUCTURE_QUALITY_SCORES,
        use_statics: bool = SETTINGS.VASP_USE_STATICS,
        commercial_license: bool = True,
    ) -> "MaterialsDoc":
        """
        Converts a group of tasks into one material

        Args:
            task_group: List of task document
            structure_quality_scores: quality scores for various calculation types
            use_statics: Use statics to define a material
            commercial_license: Whether the data should be licensed with BY-C (otherwise BY-NC).
        """
        if len(task_group) == 0:
            raise Exception("Must have more than one task in the group.")

        # Metadata
        last_updated = max(task.last_updated for task in task_group)
        created_at = min(task.completed_at for task in task_group)
        task_ids = list({task.task_id for task in task_group})

        deprecated_tasks = {task.task_id for task in task_group if not task.is_valid}
        run_types = {task.task_id: task.run_type for task in task_group}
        task_types = {task.task_id: task.task_type for task in task_group}
        calc_types = {task.task_id: task.calc_type for task in task_group}

        structure_optimizations = [
            task for task in task_group if task.task_type == TaskType.Structure_Optimization  # type: ignore
        ]
        statics = [task for task in task_group if task.task_type == TaskType.Static]  # type: ignore
        structure_calcs = (
            structure_optimizations + statics
            if use_statics
            else structure_optimizations
        )

        validity_check = [doc for doc in structure_calcs if doc.is_valid]
        if not validity_check:
            raise ValueError("Group must contain at least one valid task")

        # Material ID
        possible_mat_ids = [task.task_id for task in structure_optimizations]

        if use_statics:
            possible_mat_ids += [task.task_id for task in statics]

        material_id = min(possible_mat_ids)

        # Always prefer a static over a structure opt
        structure_task_quality_scores = {"Structure Optimization": 1, "Static": 2}

        def _structure_eval(task: TaskDoc):
            """
            Helper function to order structures optimization and statics calcs by
            - Functional Type
            - Spin polarization
            - Special Tags
            - Energy
            """

            task_run_type = task.run_type
            _SPECIAL_TAGS = ["LASPH", "ISPIN"]
            special_tags = sum(
                (
                    task.input.parameters.get(tag, False)
                    if task.input.parameters
                    else False
                )
                for tag in _SPECIAL_TAGS
            )

            return (
                -1 * int(task.is_valid),
                -1 * structure_quality_scores.get(task_run_type.value, 0),
                -1 * structure_task_quality_scores.get(task.task_type.value, 0),
                -1 * special_tags,
                task.output.energy_per_atom,
            )

        best_structure_calc = sorted(structure_calcs, key=_structure_eval)[0]
        structure = best_structure_calc.output.structure

        # Initial Structures
        initial_structures = [task.input.structure for task in task_group]
        sm = StructureMatcher(
            ltol=0.1, stol=0.1, angle_tol=0.1, scale=False, attempt_supercell=False
        )
        initial_structures = [
            group[0] for group in sm.group_structures(initial_structures)
        ]

        # Deprecated
        deprecated = all(task.task_id in deprecated_tasks for task in structure_calcs)
        deprecated = deprecated or best_structure_calc.task_id in deprecated_tasks

        # Origins
        origins = [
            PropertyOrigin(
                name="structure",
                task_id=best_structure_calc.task_id,
                last_updated=best_structure_calc.last_updated,
            )
        ]

        # Entries
        # **current materials docs must contain at last one GGA or GGA+U entry

        # Always prefer a static over a structure opt
        entry_task_quality_scores = {"Structure Optimization": 1, "Static": 2}

        def _entry_eval(task: TaskDoc):
            """
            Helper function to order entries and statics calcs by
            - Spin polarization
            - Special Tags
            - Energy
            """

            _SPECIAL_TAGS = ["LASPH", "ISPIN"]
            special_tags = sum(
                (
                    task.input.parameters.get(tag, False)
                    if task.input.parameters
                    else False
                )
                for tag in _SPECIAL_TAGS
            )

            return (
                -1 * int(task.is_valid),
                -1 * entry_task_quality_scores.get(task.task_type.value, 0),
                -1 * special_tags,
                task.output.energy_per_atom,
            )

        # Entries
        # **current materials docs must contain at last one GGA or GGA+U entry
        entries = {}
        all_run_types = set(run_types.values())

        for rt in all_run_types:
            relevant_calcs = sorted(
                [doc for doc in structure_calcs if doc.run_type == rt and doc.is_valid],
                key=_entry_eval,
            )

            if len(relevant_calcs) > 0:
                best_task_doc = relevant_calcs[0]
                entry = best_task_doc.structure_entry
                entry.data["task_id"] = entry.entry_id
                entry.data["material_id"] = material_id
                entry.entry_id = "{}-{}".format(material_id, rt.value)
                entry.parameters["is_hubbard"] = best_task_doc.input.is_hubbard
                entry.parameters["hubbards"] = best_task_doc.input.hubbards
                entries[rt] = entry

        if RunType.GGA not in entries and RunType.GGA_U not in entries:
            raise ValueError(
                "Individual material entry must contain at least one GGA or GGA+U calculation"
            )

        # Builder meta and license
        builder_meta = EmmetMeta(license="BY-C" if commercial_license else "BY-NC")

        return cls.from_structure(
            structure=structure,
            material_id=material_id,
            last_updated=last_updated,
            created_at=created_at,
            task_ids=task_ids,
            calc_types=calc_types,
            run_types=run_types,
            task_types=task_types,
            initial_structures=initial_structures,
            deprecated=deprecated,
            deprecated_tasks=deprecated_tasks,
            origins=origins,
            entries=entries,
            builder_meta=builder_meta,
        )

    @classmethod
    def construct_deprecated_material(
        cls,
        task_group: List[TaskDoc],
        commercial_license: bool = True,
    ) -> "MaterialsDoc":
        """
        Converts a group of tasks into a deprecated material

        Args:
            task_group: List of task document
            commercial_license: Whether the data should be licensed with BY-C (otherwise BY-NC).
        """
        if len(task_group) == 0:
            raise Exception("Must have more than one task in the group.")

        # Metadata
        last_updated = max(task.last_updated for task in task_group)
        created_at = min(task.completed_at for task in task_group)
        task_ids = list({task.task_id for task in task_group})

        deprecated_tasks = {task.task_id for task in task_group}
        run_types = {task.task_id: task.run_type for task in task_group}
        task_types = {task.task_id: task.task_type for task in task_group}
        calc_types = {task.task_id: task.calc_type for task in task_group}

        # Material ID
        material_id = min([task.task_id for task in task_group])

        # Choose any random structure for metadata
        structure = SpacegroupAnalyzer(
            task_group[0].output.structure, symprec=0.1
        ).get_conventional_standard_structure()

        # Deprecated
        deprecated = True

        # Builder meta and license
        builder_meta = EmmetMeta(license="BY-C" if commercial_license else "BY-NC")

        return cls.from_structure(
            structure=structure,
            material_id=material_id,
            last_updated=last_updated,
            created_at=created_at,
            task_ids=task_ids,
            calc_types=calc_types,
            run_types=run_types,
            task_types=task_types,
            deprecated=deprecated,
            deprecated_tasks=deprecated_tasks,
            builder_meta=builder_meta,
        )