1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870
|
#!/usr/bin/env python
"""
This module provides functions and objects to validate netcdf files written in the ETSF-IO file format.
For a quick reference to the etsf specs see: http://esl.cecam.org/mediawiki/index.php/ETSF_File_Format_Specifications
"""
from __future__ import print_function, division, unicode_literals, absolute_import
import os
import re
import logging
logger = logging.getLogger(__name__)
from .termcolor import cprint
try:
import numpy as np
import netCDF4
except ImportError as exc:
errmsg = str(exc) + "\nCannot import numpy or netCDF4. Use `anaconda or pip install netcdf`\n"
logger.warning(errmsg)
raise ImportError(errmsg)
def all_subclasses(cls):
"""
Given a class `cls`, this recursive function returns a list with
all subclasses, subclasses of subclasses, and so on.
"""
subclasses = cls.__subclasses__()
return subclasses + [g for s in subclasses for g in all_subclasses(s)]
class EtsfObject(object):
"""
Base class for netcdf Dimensions, Variables, Attributes.
Subclasses implement a `validate` method that receives a nc dataset
and return a list of errors (strings).
"""
def __str__(self):
return "<%s: %s>" % (self.__class__.__name__, self.name)
class EtsfDimension(EtsfObject):
"""A dimension has a name, a type and, optionally, a list of allowed values."""
def __init__(self, name, xtype, allowed=None):
self.name = name
self.xtype = xtype
self.allowed = allowed
def validate(self, ncdata):
"""Validate the content in the nc dataset, return list of errors."""
errors = []
eapp = errors.append
# Dimension should be present.
if self.name not in ncdata.dimensions:
eapp("<%s> Not present in ncdata" % self.name)
return errors
# Check if value is allowed.
if self.allowed is not None:
n = len(ncdata.dimensions[self.name])
if n not in self.allowed:
eapp.append("Value %s not in allowed %s" % (n, self.allowed))
return errors
class EtsfAttribute(EtsfObject):
"""A dimension has a name, a type and, optionally, a shape and list of allowed values."""
def __init__(self, name, xtype, shape=None, allowed=None):
self.name = name
self.allowed = allowed
def validate(self, ncdata):
"""Validate the content in the nc dataset, return list of errors."""
errors = []
eapp = errors.append
nc_attrs = ncdata.ncattrs()
# Dimension should be present.
if self.name not in nc_attrs:
eapp("<%s> not present in ncdata" % self.name)
# Check if value is allowed.
if self.allowed is not None:
value = nc_attrs[self.name]
if value not in self.allowed:
eapp("Value %s is not allowed" % str(value))
return errors
class EtsfVariable(EtsfObject):
"""
A variable has a name, a type and a list of dimensions.
The list of allowed values and the attributes that must be specified are optional.
"""
# Stores all the instances we are gonna create.
all_variables = []
def __init__(self, name, xtype, dimensions, allowed=None, reqattrs=None):
self.name = name
self.xtype = xtype
#if xtype == "char": assert shape
self.dimensions = dimensions
#self.shape = tuple(shape) if shape else ()
self.allowed = allowed
self.reqattrs = reqattrs
# TODO dtype
self.ndim = len(self.dimensions)
# Store the instance in cls.all_variables
self.__class__.all_variables.append(self)
def validate(self, ncdata):
"""Validate the content in the nc dataset, return list of errors."""
errors = []
eapp = errors.append
# Variable should be present.
if self.name not in ncdata.variables:
eapp("<%s> not present in ncdata" % self.name)
return errors
# If variable is present, check that dimension names agree with the specifications.
ncvar = ncdata.variables[self.name]
# Number of dimensions
if self.ndim != ncvar.ndim:
eapp("Has different ndim:\n\tFile: %s\n\tSpecs: %s" % (self.ndim, ncvar.ndim))
return errors
# Test shape
shape = np.array([len(ncdata.dimensions[dim.name]) for dim in self.dimensions])
if np.any(shape != ncvar.shape):
eapp("Has different shape" % (shape, ncvar.shape))
return errors
# Test dimension names.
dim_names = np.array([dim.name for dim in self.dimensions])
ncdim_names = np.array(ncvar.dimensions)
#print("dim_names: ", dim_names); print("ncdim_names: ", ncdim_names)
if any(dim_names != ncdim_names):
eapp("Wrong dimension names.\n\tFile: %s\n\tSpecs: %s" % (dim_names, ncdim_names))
# Test allowed values.
if self.allowed is not None:
assert not ncvar.shape
try:
value = ncvar.getValue()[0] if not ncvar.shape else ncvar[:]
except IndexError:
value = ncvar.getValue() if not ncvar.shape else ncvar[:]
if value not in self.allowed:
eapp("Value %s not in allowed %s" % (value, self.allowed))
# Test attributes.
if self.reqattrs is not None:
nc_attrs = ncvar.ncattrs()
for attr in self.reqattrs:
aname = attr.name
if aname not in nc_attrs:
eapp("Attribute %s is missing for variable: %s" % (aname, self.name))
return errors
class VariableWithUnits(EtsfVariable):
"""A Variable that requires the specification of units."""
def __init__(self, name, xtype, dimensions, allowed=None, reqattrs=None):
if reqattrs is None: reqattrs = []
reqattrs = reqattrs[:]
reqattrs.append(units)
super(VariableWithUnits, self).__init__(name, xtype, dimensions, allowed=allowed, reqattrs=reqattrs)
def validate(self, ncdata):
"""Validate the content in the nc dataset, return list of errors."""
errors = super(VariableWithUnits, self).validate(ncdata)
if self.name not in ncdata.variables:
assert errors
return errors
ncvar = ncdata.variables[self.name]
nc_attrs = ncvar.ncattrs()
if "scale_to_atomic_units" not in nc_attrs:
errors.append("scale_to_atomic_units attribute must be specified")
return errors
# AND NOW, LADIES & GENTLEMEN, THE ETSF-IO SPECIFICATIONS
######################################################################################
# Mandatory attributes
# This table gathers specifications for required attributes in any ETSF NetCDF files.
######################################################################################
file_format = EtsfAttribute("file_format", "char", [80])
file_format_version = EtsfAttribute("file_format_version", "real")
Conventions = EtsfAttribute("Conventions", "char", [80])
mandatory_attributes = [file_format, file_format_version, Conventions]
#################################################################
# Optional attributes
# This table presents optional attributes for ETSF NetCDF files.
#################################################################
history = EtsfAttribute("history", "char", [1024])
title = EtsfAttribute("title", "char", [80])
k_dependent = EtsfAttribute("k_dependent", "char", [80], allowed=["yes", "no"])
# Attribute of number_of_states, flag-type, see Flag-like attributes.
symmorphic = EtsfAttribute("symmorphic", "char", [80], allowed=["yes", "no"])
# flag-type attribute, see Flag-like attributes.
##################################################################
# Generic attributes of variables
# A few attributes might apply to a large number of variables.
# The following table presents the generic attributes that might
# be mandatory for selected variables in ETSF NetCDF files.
##################################################################
units = EtsfAttribute("units", "char", [80])
scale_to_atomic_units = EtsfAttribute("scale_to_atomic_units", "double")
##############################################################################
# Dimensions that cannot be split
# This table list the dimensions that are not supposed to lead to a splitting.
##############################################################################
character_string_length = EtsfDimension("character_string_length", "integer", allowed=[80])
real_or_complex_coefficients = EtsfDimension("real_or_complex_coefficients", "integer", allowed=[1, 2])
real_or_complex_density = EtsfDimension("real_or_complex_density", "integer", allowed=[1, 2])
real_or_complex_gw_corrections = EtsfDimension("real_or_complex_gw_corrections", "integer", allowed=[1, 2])
real_or_complex_potential = EtsfDimension("real_or_complex_potential", "integer", allowed=[1, 2])
real_or_complex_wavefunctions = EtsfDimension("real_or_complex_wavefunctions", "integer", allowed=[1, 2])
number_of_cartesian_directions = EtsfDimension("number_of_cartesian_directions", "integer", allowed=[3])
number_of_reduced_dimensions = EtsfDimension("number_of_reduced_dimensions", "integer", [3])
number_of_vectors = EtsfDimension("number_of_vectors", "integer", allowed=[3])
number_of_symmetry_operations = EtsfDimension("number_of_symmetry_operations", "integer")
number_of_atoms = EtsfDimension("number_of_atoms", "integer")
number_of_atom_species = EtsfDimension("number_of_atom_species", "integer")
symbol_length = EtsfDimension("symbol_length", "integer", allowed=[2])
##############################################################################
# Dimensions that can be split
# This table list the dimensions that might be used to define a splitting
# (e.g. in case of parallelism).
##############################################################################
max_number_of_states = EtsfDimension("max_number_of_states", "integer")
number_of_kpoints = EtsfDimension("number_of_kpoints", "integer")
number_of_spins = EtsfDimension("number_of_spins", "integer", allowed=[1, 2])
number_of_spinor_components = EtsfDimension("number_of_spinor_components", "integer", allowed=[1, 2])
number_of_components = EtsfDimension("number_of_components", "integer", allowed=[1, 2, 4])
max_number_of_coefficients = EtsfDimension("max_number_of_coefficients", "integer")
number_of_grid_points_vector1 = EtsfDimension("number_of_grid_points_vector1", "integer")
number_of_grid_points_vector2 = EtsfDimension("number_of_grid_points_vector2", "integer")
number_of_grid_points_vector3 = EtsfDimension("number_of_grid_points_vector3", "integer")
max_number_of_basis_grid_points = EtsfDimension("max_number_of_basis_grid_points", "integer")
number_of_localisation_regions = EtsfDimension("number_of_localisation_regions", "integer", allowed=[1])
####################
# Atomic information
####################
valence_charges = EtsfVariable("valence_charges", "double", [number_of_atom_species])
pseudopotential_types = EtsfVariable("pseudopotential_types", "char", [number_of_atom_species, character_string_length])
######################
# Electronic structure
######################
number_of_electrons = EtsfVariable("number_of_electrons", "integer", [])
exchange_functional = EtsfVariable("exchange_functional", "char", [character_string_length])
correlation_functional = EtsfVariable("correlation_functional", "char", [character_string_length])
fermi_energy = VariableWithUnits("fermi_energy", "double", [], reqattrs=[units])
# Units attribute required. The attribute scale to atomic units might also be mandatory
smearing_scheme = EtsfVariable("smearing_scheme", "char", [character_string_length])
smearing_width = VariableWithUnits("smearing_width", "double", [], reqattrs=[units])
# Units attribute required. The attribute scale to atomic units might also be mandatory
##################
# Reciprocal space
##################
kinetic_energy_cutoff = VariableWithUnits("kinetic_energy_cutoff", "double", [], reqattrs=[units])
# Units attribute required. The attribute scale to atomic units might also be mandatory
kpoint_grid_shift = EtsfVariable("kpoint_grid_shift", "double", [number_of_reduced_dimensions])
kpoint_grid_vectors = EtsfVariable("kpoint_grid_vectors", "double", [number_of_vectors, number_of_reduced_dimensions])
monkhorst_pack_folding = EtsfVariable("monkhorst_pack_folding", "integer", [number_of_vectors])
###################################################################################
# Atomic structure and symmetry operations
# Variables and attributes to specify the atomic structure and symmetry operations.
###################################################################################
primitive_vectors = EtsfVariable("primitive_vectors", "double", [number_of_vectors, number_of_cartesian_directions])
reduced_symmetry_matrices = EtsfVariable("reduced_symmetry_matrices", "integer",
[number_of_symmetry_operations, number_of_reduced_dimensions, number_of_reduced_dimensions],
reqattrs=[symmorphic]) # The "symmorphic" attribute is needed.
reduced_symmetry_translations = EtsfVariable("reduced_symmetry_translations", "double",
[number_of_symmetry_operations, number_of_reduced_dimensions])
# The "symmorphic" attribute is needed.
# In principle: allowed=range(1, 233)) but I usually use 0 when the space_group is not avaiable
space_group = EtsfVariable("space_group", "integer", [], allowed=range(0, 233))
atom_species = EtsfVariable("atom_species", "integer", [number_of_atoms]) # Between 1 and number_of_atom_species.
reduced_atom_positions = EtsfVariable("reduced_atom_positions", "double", [number_of_atoms, number_of_reduced_dimensions])
atomic_numbers = EtsfVariable("atomic_numbers", "double", [number_of_atom_species])
atom_species_names = EtsfVariable("atom_species_names", "char", [number_of_atom_species, character_string_length])
chemical_symbols = EtsfVariable("chemical_symbol", "char", [number_of_atom_species, symbol_length])
##########
# K-points
##########
reduced_coordinates_of_kpoints = EtsfVariable("reduced_coordinates_of_kpoints", "double",
[number_of_kpoints, number_of_reduced_dimensions])
kpoint_weights = EtsfVariable("kpoint_weights", "double", [number_of_kpoints])
########
# States
########
number_of_states = EtsfVariable("number_of_states", "integer", [number_of_spins, number_of_kpoints],
reqattrs=[k_dependent]) # The attribute "k_dependent" must be defined.
eigenvalues = VariableWithUnits("eigenvalues", "double", [number_of_spins, number_of_kpoints, max_number_of_states])
occupations = EtsfVariable("occupations", "double", [number_of_spins, number_of_kpoints, max_number_of_states])
# Density
# A density in such a format (represented on a 3D homogeneous grid) is suited for the representation
# of smooth densities, as obtained naturally from pseudopotential calculations using plane waves.
# This specification for a density can also accommodate the response densities of Density-Functional Perturbation Theory.
density = VariableWithUnits("density", "double", [number_of_components, number_of_grid_points_vector3,
number_of_grid_points_vector2, number_of_grid_points_vector1, real_or_complex_density],
reqattrs=[units])
# By default, the density is given in atomic units, that is, number of electrons per Bohr3.
# The "units" attribute is required. The attribute "scale_to_atomic_units" might also be mandatory
# Exchange and correlation
correlation_potential = VariableWithUnits("correlation_potential", "double", [number_of_components,
number_of_grid_points_vector3, number_of_grid_points_vector2, number_of_grid_points_vector1, real_or_complex_potential],
reqattrs=[units])
#Units attribute required. The attribute scale to atomic units might also be mandatory
exchange_potential = VariableWithUnits("exchange_potential", "double", [number_of_components,
number_of_grid_points_vector3, number_of_grid_points_vector2, number_of_grid_points_vector1, real_or_complex_potential],
reqattrs=[units])
# Units attribute required. The attribute scale to atomic units might also be mandatory
exchange_correlation_potential = VariableWithUnits("exchange_correlation_potential", "double", [number_of_components,
number_of_grid_points_vector3, number_of_grid_points_vector2, number_of_grid_points_vector1, real_or_complex_potential],
reqattrs=[units])
# Units attribute required. The attribute "scale to atomic units" might also be mandatory
class EtsfGroup(object):
""""
This object is essentially a container of variables
A Group can contain other subgroups.
"""
attributes = []
dimensions = []
variables = []
at_least_one_variable_in = []
subgroups = []
@classmethod
def validate_file(cls, path):
ncdata = netCDF4.Dataset(path, mode="r")
errors = cls.validate(ncdata)
ncdata.close()
return errors
@classmethod
def validate(cls, ncdata):
errors = []
# Test attributes.
for attr in cls.attributes:
errors += attr.validate(ncdata)
# Test dimensions
for dim in cls.dimensions:
errors += dim.validate(ncdata)
# Test variables
for var in cls.variables:
errors += var.validate(ncdata)
# Test variables.
ok, elist = 0, []
for var in cls.at_least_one_variable_in:
if var.name in ncdata.variables:
ok += 1
else:
elist.append("<%s> not present in ncdata" % var.name)
if not ok:
errors.extend(elist)
# Test subgroups.
for group in cls.subgroups:
errors += group.validate(ncdata)
return errors
class CrystalGroup(EtsfGroup):
"""
A ETSF NetCDF file for crystallographic data should contain the following set of mandatory information:
The three attributes defined in Mandatory attributes
The following dimensions from Dimensions that cannot be split:
number_of_cartesian_directions
number_of_vectors
number_of_atoms
number_of_atom_species
number_of_symmetry_operations
The following variables defined in Atomic structure and symmetry operations:
primitive_vectors
reduced_symmetry_matrices
reduced_symmetry_translations
space_group
atom_species
reduced_atom_positions
At least one of the following variables defined in Atomic structure and symmetry operations,
to specify the kind of atoms:
atomic_numbers
atom_species_names
chemical_symbols
The use of atomic_numbers is preferred. If atomic_numbers is not available, atom_species_names
will be preferred over chemical_symbols. In case more than one such variables are present in a file,
the same order of preference should be followed by the reading program.
"""
attributes = mandatory_attributes
dimensions = [
number_of_cartesian_directions,
number_of_vectors,
number_of_atoms,
number_of_atom_species,
number_of_symmetry_operations,
]
variables = [
primitive_vectors,
reduced_symmetry_matrices,
reduced_symmetry_translations,
space_group,
atom_species,
reduced_atom_positions,
]
at_least_one_variable_in = [
atomic_numbers,
atom_species_names,
chemical_symbols,
]
class KpointsGroup(EtsfGroup):
variables = [
reduced_coordinates_of_kpoints,
kpoint_weights,
]
class StatesGroup(EtsfGroup):
variables = [
number_of_states,
eigenvalues,
occupations,
]
class DenPotGroup(EtsfGroup):
"""
A ETSF NetCDF file for a density should contain the following set of mandatory information:
The three attributes defined in Mandatory attributes
The following dimensions from Dimensions that cannot be split:
number_of_cartesian_directions
number_of_vectors
real_or_complex_density and/or real_or_complex_potential
The following dimensions from Dimensions that can be split:
number_of_components
number_of_grid_points_vector1
number_of_grid_points_vector2
number_of_grid_points_vector3
The primitive vectors of the cell, as defined in Atomic structure and symmetry operations.
The density or potential, as defined in Density or Exchange and correlation.
This variable must be the last, in order not to be limited to 4 GB.
As mentioned in General considerations and General specifications, such file might contain
additional information agreed within ETSF, such as any of the variables specified in General specifications.
It might even contain enough information to be declared a ETSF NetCDF file "containing crystallographic data or
"containing the wavefunctions", or both. Such file might also contain additional information specific
to the software that generated the file. It is not expected that this other software-specific information
be used by another software.
A ETSF NetCDF exchange, correlation, or exchange-correlation potential file should contain at least
one variable among the three presented in Exchange and correlation in replacement of the specification
of the density. The type and size of such variables are similar to the one of the density.
The other variables required for a density are also required for a potential file.
Additional ETSF or software-specific information might be added, as described previously.
The information might distributed among different files, thanks to the use of splitting of data for variables:
number_of_components
number_of_grid_points_vector1
number_of_grid_points_vector2
number_of_grid_points_vector3
In case the splitting related to one of these variables is activated, then the corresponding variables
in Auxiliary variables for splitting must be defined. Accordingly, the dimensions of the variables
in Density and/or Exchange and correlation will be changed, to accommodate only the segment of data
effectively contained in the file.
"""
attributes = mandatory_attributes
dimensions = [
number_of_cartesian_directions,
number_of_vectors,
#real_or_complex_density and/or real_or_complex_potential,
number_of_components,
number_of_grid_points_vector1,
number_of_grid_points_vector2,
number_of_grid_points_vector3,
# Atomic structure and symmetry operations
]
variables = [
density,
]
subgroups = [
CrystalGroup,
]
class WavefunctionGroup(EtsfGroup):
"""
Specification for files containing the wavefunctions
Specification
A ETSF NetCDF file "containing the wavefunctions" should contain at least the information needed to build
the density from this file. Also, since the eigenvalues are intimately linked to eigenfunctions,
it is expected that such a file contain eigenvalues. Of course, files might contain less information
than the one required, but still follow the naming convention of ETSF. It might also contain more information,
of the kind specified in other tables of the present document.
A ETSF NetCDF file "containing the wavefunctions" should contain the following set of mandatory information:
The three attributes defined in Mandatory attributes
The following dimensions from Dimensions that cannot be split:
character_string_length
number_of_cartesian_directions
number_of_vectors
real_or_complex_coefficients and/or real_or_complex_wavefunctions
number_of_symmetry_operations
number_of_reduced_dimensions
The following dimensions from Dimensions that can be split:
max_number_of_states
number_of_kpoints
number_of_spins
number_of_spinor_components
In case of a real-space wavefunctions, the following dimensions from Dimensions that can be split:
number_of_grid_points_vector1
number_of_grid_points_vector2
number_of_grid_points_vector3
In case of a wavefunction given in terms of a basis set, the following dimensions from Dimensions that can be split:
max_number_of_coefficients
In case of a wavefunction given in terms of a Daubechies wavelet basis set, the following dimensions
from Dimensions that can be split:
max_number_of_basis_grid_points
number_of_localization_regions
The primitive vectors of the cell, as defined in Atomic structure and
symmetry operations (variable primitive_vectors)
The symmetry operations, as defined in Atomic structure and symmetry operations
(given by the variables reduced_symmetry_translations and reduced_symmetry_matrices)
The information related to each kpoint, as defined in K-points.
The information related to each state (including eigenenergies and occupation numbers), as defined in States.
In case of basis set representation, the information related to the basis set, and the variable
coefficients_of_wavefunctions, as defined in Wavefunctions.
For basis_set equal to "plane_waves", the following variable is required from Wavefunctions:
reduced_coordinates_of_plane_waves
For basis_set equal "daubechies_wavelets", the following variables are required from Wavefunctions:
coordinates_of_basis_grid_points
number_of_coefficients_per_grid_point
In case of real-space representation, the variable following variable from Wavefunctions:
real_space_wavefunctions
As mentioned in General considerations and General specifications, such a file might contain
additional information agreed on within ETSF, such as any of the variables specified in General specifications.
It might even contain enough information to be declared a ETSF NetCDF file "containing crystallographic data"
or "containing the density", or both. Such a file might also contain additional information specific
to the software that generated the file. It is not expected that this other software-specific information
be used by another software.
The information might be distributed among different files, thanks to the use of splitting of data for variables:
max_number_of_states
number_of_kpoints
number_of_spins
And, either
number_of_grid_points_vector1
number_of_grid_points_vector2
number_of_grid_points_vector3
or
max_number_of_coefficients
In case the splitting related to one of these variables is activated, then the corresponding variables
in Split wavefunctions must be defined. Accordingly, the dimensions of the variables in K-points, States,
Wavefunctions, and BSE/GW might have to be changed, to accommodate only the segment of data effectively
contained in the file.
"""
dimensions = [
character_string_length,
number_of_cartesian_directions,
number_of_vectors,
#real_or_complex_coefficients and/or real_or_complex_wavefunctions
number_of_symmetry_operations,
number_of_reduced_dimensions,
#
max_number_of_states,
number_of_kpoints,
number_of_spins,
number_of_spinor_components,
#
number_of_grid_points_vector1,
number_of_grid_points_vector2,
number_of_grid_points_vector3,
#coefficients_of_wavefunctions,
#reduced_coordinates_of_plane_waves
]
subgroups = [
CrystalGroup,
KpointsGroup,
StatesGroup,
]
class DensityGroup(DenPotGroup):
pass
class PotentialGroup(DenPotGroup):
pass
def validate_vars(path):
"""
Validate the etsf variables declared in file `path`.
Return list of errors.
"""
ncdata = netCDF4.Dataset(path, mode="r")
evars, all_errors = [] , []
d = {}
for var in EtsfVariable.all_variables:
if var.name in ncdata.variables:
elist = var.validate(ncdata)
if elist:
all_errors.append(elist)
evars.append(var)
ncdata.close()
if not all_errors: return all_errors
# Print errors
try:
title = "=== ERRORS IN FILE %s ===" % os.path.relpath(path)
except Exception as exc:
# I've seen this kind of error.
logger.warning("relpath returned %s" % str(exc))
title = "=== ERRORS IN FILE %s ===" % path
bar = "=" * len(title)
cprint(bar, "magenta")
cprint(title, "magenta")
cprint(bar, "magenta")
for var, elist in zip(evars, all_errors):
cprint("%s: Found [%d] error(s):" % (var, len(elist)), "red")
for i, e in enumerate(elist):
print("[%d] " % i, e)
return all_errors
def find_groups(path):
"""Find the etsf groups present in path and validate them."""
ncdata = netCDF4.Dataset(path, mode="r")
groups = []
for g in all_subclasses(EtsfGroup):
if g.validate(ncdata):
groups.append(g)
print("group:", g, "present and validated")
else:
print("group:", g, "present but not ok")
ncdata.close()
return groups
def validate_groups(path, groups):
"""
Validate the presence and the consistency of a list of groups..
Return list of errors
"""
ncdata = netCDF4.Dataset(path, mode="r")
wrong_datamodel = ""
# TODO: Activate this test.
#if ncdata.data_model != "NETCDF4":
# wrong_datamodel = "Found data_model %s while it should be NETCDF4" % ncdata.data_model
egroups, errors = [], []
for group in groups:
elist = group.validate(ncdata)
if elist:
egroups.append(group)
errors.append(elist)
ncdata.close()
if wrong_datamodel or errors:
# Print errors
try:
title = "=== ERRORS IN FILE %s ===" % os.path.relpath(path)
except Exception as exc:
# I've seen this kind of error.
logger.warning("relpath returned %s" % str(exc))
title = "=== ERRORS IN FILE %s ===" % path
bar = "=" * len(title)
cprint(bar, "magenta")
cprint(title, "magenta")
cprint(bar, "magenta")
for group, elist in zip(egroups, errors):
print(group)
for i, e in enumerate(elist):
print("[%d]" % i, e)
if wrong_datamodel:
errors.append(wrong_datamodel)
print(wrong_datamodel)
return errors
def validate_ncfile(path):
"""
This function validates the netcdf files produced by Abinit.
Return:
List of strings with error messages.
"""
# Every netcdf file produced by Abinit must be listed in this dictionary.
# that maps the file extension to the list of Groups contained in the file.
# Legacy files that do not have etsfio groups are tagged with None and ignored.
# New netcdf files introduced in Abinit should try to use the etsfio specs as much
# as possible and extend the format to treat new cases.
# Remember that gmatteo is watching you so don't try to hack this code.
ext2groups = {
# GS files.
"GSR.nc": [CrystalGroup, KpointsGroup, StatesGroup],
"DEN.nc": [DensityGroup],
"EIG.nc": None,
"OUT.nc": None,
"WFK.nc": [WavefunctionGroup],
#"POT.nc": [PotentialGroup],
#"VH.nc": [PotentialGroup],
#"VXC.nc": [PotentialGroup],
"VHXC.nc": [PotentialGroup],
#"KDEN.nc": [DensityGroup],
"HIST.nc": None,
#"PAWDEN.nc": [DensityGroup],
#"ELF.nc": [DensityGroup],
#"ELF_DOWN.nc": [DensityGroup],
#"ELF_UP.nc": [DensityGroup],
"PSPS.nc": None,
#"STM.nc": [],
#"GDEN1.nc":
#"GDEN2.nc":
#"GDEN3.nc":
#"KDEN.nc":
#"LDEN.nc":
# DFPT files.
"DDB.nc": [CrystalGroup],
"WFQ.nc": [WavefunctionGroup],
#"DEN1.nc": [DensityGroup],
#"POT1.nc": [PotentialGroup],
#"EIGR2D.nc":
#"EIGI2D.nc":
# Anaddb files
"anaddb.nc": [CrystalGroup],
"EC.nc": [CrystalGroup],
"PHBST.nc": [CrystalGroup],
# GW files.
#"KSS.nc": [WavefunctionGroup],
"SCR.nc": [CrystalGroup, KpointsGroup, StatesGroup],
"SUS.nc": [CrystalGroup, KpointsGroup, StatesGroup],
"SIGRES.nc": [CrystalGroup, KpointsGroup, StatesGroup],
#"QP_DEN.nc": [DensityGroup],
# BSE files.
"MDF.nc": [CrystalGroup, KpointsGroup, StatesGroup],
}
groups = None
fname = os.path.basename(path)
# DFPT files require regular expressions e.g.
# _DS2_1WF6.nc, _DS2_DEN6, _DEN_POSITRON
re_1wf = re.compile("(\w+_)1WF(\d+)(\.nc)$")
re_1den = re.compile("(\w+_)DEN(\d+)(\.nc)$")
#re_1den = re.compile("(\w+_)POT(\d+)(\.nc)$")
if re_1wf.match(fname): groups = [WavefunctionGroup]
if re_1den.match(fname): groups = [DensityGroup]
if groups is not None: return validate_groups(path, groups)
ext = fname.split("_")[-1]
try:
groups = ext2groups[ext]
except KeyError as exc:
errors = ["Unknown file extension in file %s" % fname]
print(errors)
return errors
if groups is None: return []
return validate_groups(path, groups)
|