diff --git a/pyproject.toml b/pyproject.toml index 061de2a..c7d4ef1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,7 +108,7 @@ package-dir = { "" = "src" } where = ["src"] [project.entry-points.'nomad.plugin'] -xml_parser = "nomad_parser_vasp.parsers:xml_entry_point" +vasp_parser = "nomad_parser_vasp.parsers:vasp_parser_entry_point" [tool.cruft] diff --git a/src/nomad_parser_vasp/parsers/__init__.py b/src/nomad_parser_vasp/parsers/__init__.py index 90e6ff2..0194233 100644 --- a/src/nomad_parser_vasp/parsers/__init__.py +++ b/src/nomad_parser_vasp/parsers/__init__.py @@ -2,24 +2,32 @@ from pydantic import Field -class VasprunXMLEntryPoint(ParserEntryPoint): +class VASPParserEntryPoint(ParserEntryPoint): parameter: int = Field(0, description='Custom configuration parameter') def load(self): from nomad.parsing import MatchingParserInterface return MatchingParserInterface( - parser_class_name='nomad_parser_vasp.parsers.xml_parser.VasprunXMLParser', + parser_class_name='nomad_parser_vasp.parsers.parser.VASPParser', **self.dict(), ) -xml_entry_point = VasprunXMLEntryPoint( +vasp_parser_entry_point = VASPParserEntryPoint( name='nomad-parser-vasp', python_package='nomad_parser_vasp', - code_name='VASP XML', + code_name='VASP', code_category='ab initio', entry_point_type='parser', - description='Parser for VASP output in XML format.', - mainfile_name_re='.*vasprun\.xml.*', + description='Parser for VASP XML and OUTCAR outputs', + mainfile_contents_re=( + r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*?\s*?\s*?\s*\s*vasp\s*?|^\svasp[\.\d]+.+?(?:\(build|complex)[\s\S]+?executed ' + r'on' + ), + mainfile_mime_re='(application/.*)|(text/.*)', + mainfile_name_re='.*[^/]*xml[^/]*', + mainfile_alternative=True, + supported_compressions=['gz', 'bz2', 'xz'], ) diff --git a/src/nomad_parser_vasp/parsers/myparser.py b/src/nomad_parser_vasp/parsers/myparser.py deleted file mode 100644 index 050aa41..0000000 --- a/src/nomad_parser_vasp/parsers/myparser.py +++ /dev/null @@ -1,29 +0,0 @@ -from typing import ( - TYPE_CHECKING, -) - -if TYPE_CHECKING: - from nomad.datamodel.datamodel import ( - EntryArchive, - ) - from structlog.stdlib import ( - BoundLogger, - ) - -from nomad.config import config -from nomad.datamodel.results import Material, Results -from nomad.parsing.parser import MatchingParser - -configuration = config.get_plugin_entry_point('nomad_parser_vasp.parsers:myparser') - - -class MyParser(MatchingParser): - def parse( - self, - mainfile: str, - archive: 'EntryArchive', - logger: 'BoundLogger', - child_archives: dict[str, 'EntryArchive'] = None, - ) -> None: - logger.info('MyParser.parse', parameter=configuration.parameter) - archive.results = Results(material=Material(elements=['H', 'O'])) diff --git a/src/nomad_parser_vasp/parsers/outcar_parser.py b/src/nomad_parser_vasp/parsers/outcar_parser.py new file mode 100644 index 0000000..af8c88a --- /dev/null +++ b/src/nomad_parser_vasp/parsers/outcar_parser.py @@ -0,0 +1,479 @@ +from typing import TYPE_CHECKING, Dict, List, Any + +if TYPE_CHECKING: + from nomad.datamodel.datamodel import ( + EntryArchive, + ) + from structlog.stdlib import ( + BoundLogger, + ) + +import numpy as np +import re + +from nomad.datamodel import EntryArchive +from nomad.parsing.file_parser import TextParser, Quantity +from nomad.parsing.file_parser.mapping_parser import ( + MetainfoParser, + TextParser as MappingTextParser, + Path, +) + +from nomad_parser_vasp.schema_packages.vasp_package import Simulation + + +RE_N = r'[\n\r]' + + +def get_key_values(val_in): + val = [v for v in val_in.split('\n') if '=' in v] + data = {} + pattern = re.compile(r'([A-Z_]+)\s*=\s*(\.?[a-zA-Z]*[\d\-\.\+\sE]*\.?)') + + def convert(v): + if isinstance(v, list): + v = [convert(vi) for vi in v] + elif isinstance(v, str): + try: + v = float(v) if '.' in v else int(v) + except Exception: + pass + else: + pass + return v + + for v in val: + res = pattern.findall(v) + for resi in res: + vi = resi[1].split() + vi = vi[0] if len(vi) == 1 else vi + if isinstance(vi, str): + vi = vi.strip() + vi_upper = vi.upper() + if vi_upper in ['T', '.TRUE.', 'TRUE']: + vi = True + elif vi_upper in ['F', '.FALSE.', 'FALSE']: + vi = False + data[resi[0]] = convert(vi) + return data + + +class OutcarTextParser(TextParser): + def __init__(self): + self._chemical_symbols = None + + super().__init__(None) + + def init_quantities(self): + def str_to_array(val_in): + val = [ + re.findall(r'(\-?\d+\.[\dEe]+)', v) + for v in val_in.strip().split('\n') + if '--' not in v + ] + return [ + np.array([v[0:3] for v in val], float), + np.array([v[3:6] for v in val], float), + ] + + def str_to_stress(val_in): + val = [float(v) for v in val_in.strip().split()] + stress = np.zeros((3, 3)) + stress[0][0] = val[0] + stress[1][1] = val[1] + stress[2][2] = val[2] + stress[0][1] = stress[1][0] = val[3] + stress[1][2] = stress[2][1] = val[4] + stress[0][2] = stress[2][0] = val[5] + return stress + + def str_to_header(val_in): + ( + version, + build_date, + build_type, + platform, + date, + time, + parallel, + ) = val_in.split() + parallel = 'parallel' if parallel == 'running' else parallel + subversion = '%s %s %s' % (build_date, build_type, parallel) + date = date.replace('.', ' ') + return dict( + version=version, + subversion=subversion, + platform=platform, + date=date, + time=time, + ) + + def str_to_positions(val_in): + re_position = re.compile( + r'\d*\s*(\-*\d+\.\d+)\s*(\-*\d+\.\d+)\s*(\-*\d+\.\d+)' + ) + positions = [] + for val in val_in.strip().split('\n'): + position = re_position.search(val) + if position: + positions.append(position.groups()) + return np.array(positions, dtype=float) + + def str_to_eigenvalues(val_in): + val = [] + for line in val_in.strip().splitlines(): + val.extend(['nan' if '*' in v else v for v in line.split()]) + return np.array(val, np.float64) + + scf_iteration = [ + Quantity( + 'energy_total', + r'free energy\s*TOTEN\s*=\s*([\d\.\-]+)\s*eV', + repeats=False, + dtype=float, + ), + Quantity( + 'energy_entropy0', + r'energy without entropy\s*=\s*([\d\.\-]+)', + repeats=False, + dtype=float, + ), + Quantity( + 'energy_T0', + r'energy\(sigma\->0\)\s*=\s*([\d\.\-]+)', + repeats=False, + dtype=float, + ), + Quantity( + 'energy_components', + r'Free energy of the ion-electron system \(eV\)\s*\-+([\s\S]+?)\-{10}', + str_operation=get_key_values, + convert=False, + ), + Quantity( + 'time', + r'LOOP\: +cpu time +([\d\.]+)\: +real time +([\d\.]+)', + dtype=np.dtype(np.float64), + ), + ] + + calculation_quantities = [ + Quantity( + 'scf_iteration', + r'Iteration\s*\d+\(\s*\d+\s*\)([\s\S]+?energy\(sigma\->0\)\s*=\s*.+)', + repeats=True, + sub_parser=TextParser(quantities=scf_iteration), + ), + Quantity( + 'energies', + r'FREE ENERGIE OF THE ION-ELECTRON SYSTEM \(eV\)\s*\-+\s*([\s\S]+?)\-{10}', + sub_parser=TextParser( + quantities=[ + Quantity( + 'energy_total', + r'free\s*energy\s*TOTEN\s*=\s*([\-\d\.]+)', + repeats=False, + dtype=float, + ), + Quantity( + 'energy_entropy0', + r'energy\s*without\s*entropy\s*=\s*([\-\d\.]+)', + repeats=False, + dtype=float, + ), + Quantity( + 'energy_T0', + r'energy\(sigma\->0\)\s*=\s*([\-\d\.]+)', + repeats=False, + dtype=float, + ), + ] + ), + ), + Quantity( + 'stress', + r'in kB\s*(\-?\d+\.\d+)\s*(\-?\d+\.\d+)\s*(\-?\d+\.\d+)\s*' + r'(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)', + str_operation=str_to_stress, + convert=False, + ), + Quantity( + 'positions_forces', + r'POSITION\s*TOTAL\-FORCE \(eV/Angst\)\s*\-+\s*([\d\.\s\-E]+)', + str_operation=str_to_array, + convert=False, + ), + Quantity( + 'lattice_vectors', + r'direct lattice vectors\s*reciprocal lattice vectors\s*' + r'(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)' + r'(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)' + r'(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)', + str_operation=str_to_array, + convert=False, + ), + Quantity( + 'converged', + r'aborting loop because (EDIFF is reached)', + repeats=False, + dtype=str, + convert=False, + ), + Quantity( + 'fermi_energy', r'E\-fermi :\s*([\d\.]+)', dtype=str, repeats=False + ), + Quantity( + 'eigenvalues', + r'band No\.\s*band energies\s*occupation\s*([\d\.\s\-\*]+?)(?:k\-point|spin|\-{10})', + repeats=True, + dtype=float, + str_operation=str_to_eigenvalues, + ), + Quantity('convergence', r'(aborting loop because EDIFF is reached)'), + Quantity( + 'time', + r'LOOP\+\: +cpu time +([\d\.]+)\: +real time +([\d\.]+)', + dtype=np.dtype(np.float64), + ), + ] + + self._quantities = [ + Quantity( + 'calculation', + r'(\-\-\s*Iteration\s*\d+\(\s*1\s*\)\s*[\s\S]+?)' + r'((?:FREE ENERGIE OF THE ION\-ELECTRON SYSTEM \(eV\)[\s\S]+?LOOP\+.+)|\Z)', + repeats=True, + sub_parser=TextParser(quantities=calculation_quantities), + ), + Quantity( + 'header', + r'vasp\.([\d\.]+)\s*(\w+)\s*[\s\S]+?\)\s*(\w+)\s*' + r'executed on\s*(\w+)\s*date\s*([\d\.]+)\s*([\d\:]+)\s*(\w+)', + repeats=False, + str_operation=str_to_header, + convert=False, + ), + Quantity( + 'parameters', + r'Startparameter for this run:([\s\S]+?)\-{100}', + str_operation=get_key_values, + repeats=False, + convert=False, + ), + Quantity( + 'ions_per_type', r'ions per type =\s*([ \d]+)', dtype=int, repeats=False + ), + Quantity( + 'species', + r'(\w+) +([A-Z][a-z]*).+?:\s*energy of atom +\d+', + dtype=str, + repeats=True, + ), # TODO: deprecate + Quantity( + 'kpoints', + r'Following reciprocal coordinates:[\s\S]+?\n([\d\.\s\-]+)', + repeats=False, + dtype=float, + ), + Quantity('nbands', r'NBANDS\s*=\s*(\d+)', dtype=int, repeats=False), + Quantity( + 'lattice_vectors', + r'direct lattice vectors\s*reciprocal lattice vectors\s*' + r'(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)' + r'(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)' + r'(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)(\-?\d+\.\d+\s*)', + str_operation=str_to_array, + convert=False, + ), + Quantity( + 'positions', + r'ion\s*position\s*nearest neighbor table([\s\S]+?)LATTYP', + str_operation=str_to_positions, + convert=False, + ), + # alternative format + Quantity( + 'positions', + r'position of ions in cartesian coordinates\s*\(Angst\):([\s\S]+?)\n *\n', + str_operation=str_to_positions, + convert=False, + ), + Quantity( + 'response_functions', + r'\s*Response functions by sum over occupied states\:([\s\S]+?)(?:\-\-\-\-\-\-)', + repeats=False, + sub_parser=TextParser( + quantities=[ + Quantity( + 'input_parameters', + rf'{RE_N}* *(\w+) *\= *([\w\.\-]+) *.*', + repeats=True, + ) + ] + ), + ), + ] + + +class OutcarParser(MappingTextParser): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.xc_functional_mapping = { + '--': ['GGA_X_PBE', 'GGA_C_PBE'], + 'HL': ['LDA_C_HL'], + 'WI': ['LDA_C_WIGNER'], + 'PZ': ['LDA_C_PZ'], + '91': ['GGA_X_PW91', 'GGA_C_PW91'], + 'PE': ['GGA_X_PBE', 'GGA_C_PBE'], + 'PBE': ['GGA_X_PBE', 'GGA_C_PBE'], + 'RE': ['GGA_X_PBE_R'], + 'VW': ['LDA_C_VWN'], + 'RP': ['GGA_X_RPBE', 'GGA_C_PBE'], + 'PS': ['GGA_C_PBE_SOL', 'GGA_X_PBE_SOL'], + 'AM': ['GGA_X_AM05', 'GGA_C_AM05'], + 'B3': ['HYB_GGA_XC_B3LYP3'], + 'B5': ['HYB_GGA_XC_B3LYP5'], + 'BF': ['GGA_X_BEEFVDW', 'GGA_XC_BEEFVDW'], + 'CO': [], # TODO check if this is ever used + 'OR': ['GGA_X_OPTPBE_VDW'], + 'BO': ['GGA_X_OPTB88_VDW'], + 'MK': ['GGA_X_OPTB86B_VDW'], + 'ML': ['VDW_XC_DF2'], + 'CX': ['VDW_XC_DF_CX'], + 'TPSS': ['MGGA_X_TPSS', 'MGGA_C_TPSS'], + 'RTPSS': ['MGGA_X_RTPSS'], + 'M06L': ['MGGA_C_M06_L'], + 'MS0': ['MGGA_X_MS0'], + 'MS1': ['MGGA_X_MS1'], + 'MS2': ['MGGA_X_MS2'], + 'SCAN': ['MGGA_X_SCAN'], + 'RSCAN': ['MGGA_X_RSCAN', 'MGGA_C_RSCAN'], + 'R2SCAN': ['MGGA_X_R2SCAN', 'MGGA_C_R2SCAN'], + 'SCANL': ['MGGA_X_SCANL', 'MGGA_C_SCANL'], + 'RSCANL': [], # not in LibXC, nor any paper, just deorbitalized SCANL + 'R2SCANL': ['MGGA_X_R2SCANL', 'MGGA_C_R2SCANL'], + 'OFR2': [], + 'MBJ': ['MGGA_X_BJ06'], + 'LBMJ': [], # TODO ask Miguel Marquez + 'HLE17': ['MGGA_XC_HLE17'], # TODO check if this is ever used + 'RA': ['LDA_C_PW_RPA'], # TODO check if this is ever used + } + + def get_version(self, source: Dict[str, Any]) -> str: + return ' '.join( + [ + source[key] + for key in ['version', 'subversion', 'platform'] + if source.get(key) + ] + ) + + def get_data(self, source: Any, **kwargs) -> Any: + if isinstance(source, dict) and source.get('value') is not None: + return source['value'] + path = kwargs.get('path') + if path is None: + return + parser = Path(path=path) + return parser.get_data(source) + + def get_energy_contributions( + self, source: Dict[str, Any], **kwargs + ) -> List[Dict[str, Any]]: + exclude = kwargs.get('exclude', []) + return [ + {'name': key, 'value': val} + for key, val in source.items() + if key not in exclude + ] + + def get_eigenvalues( + self, eigenvalues: np.ndarray, parameters: Dict[str, Any] + ) -> List[Dict[str, Any]]: + ispin = parameters.get('ISPIN', 1) + n_kpts = len(eigenvalues) // ispin + n_bands = len(eigenvalues[0]) // 3 + eigenvalues = np.reshape(eigenvalues, (ispin, n_kpts, n_bands, 3)) + data = [] + for nspin in range(ispin): + eigs, occs = eigenvalues[nspin].T[1:3] + data.append(dict(eigenvalues=eigs.T, occupations=occs.T, n_bands=n_bands)) + return data + + def get_xc_functionals(self, parameters: Dict[str, Any]) -> List[Dict[str, Any]]: + xc_functionals = [] + if parameters.get('LHFCALC', False): + xc_functional = {} + gga = parameters.get('GGA', 'PE') + aexx = parameters.get('AEXX', 0.0) + aggax = parameters.get('AGGAX', 1.0) + aggac = parameters.get('AGGAC', 1.0) + aldac = parameters.get('ALDAC', 1.0) + hfscreen = parameters.get('HFSCREEN', 0.0) + + if hfscreen == 0.2: + xc_functional['name'] = 'HYB_GGA_XC_HSE06' + elif hfscreen == 0.3: + xc_functional['name'] = 'HYB_GGA_XC_HSE03' + elif ( + gga == 'B3' + and aexx == 0.2 + and aggax == 0.72 + and aggac == 0.81 + and aldac == 0.19 + ): + xc_functional['name'] = 'HYB_GGA_XC_B3LYP3' + elif aexx == 1.0 and aldac == 0.0 and aggac == 0.0: + xc_functional['name'] = 'HF_X' + elif gga == 'PE': + xc_functional['name'] = 'HYB_GGA_XC_PBEH' + else: + xc_functional['name'] = f'HYB_GGA_XC_{gga}' + xc_functionals.append(xc_functional) + else: + metagga = parameters.get('METAGGA') + if metagga: + functionals = self.xc_functional_mapping.get( + metagga, [metagga] + ) + else: + functionals = self.xc_functional_mapping.get( + parameters.get('GGA'), [] + ) + for functional in functionals: + xc_functionals.append({'name': functional}) + return xc_functionals + + +class VASPOutcarParser: + def parse( + self, + mainfile: 'str', + archive: 'EntryArchive', + logger: 'BoundLogger', + child_archives: 'dict[str, EntryArchive]' = None, + ) -> None: + # set up archive parser + archive_data_parser = MetainfoParser() + archive_data = Simulation() + archive_data_parser.data_object = archive_data + archive_data_parser.annotation_key = 'outcar' + + # set up outcar parser + source_parser = OutcarParser() + source_parser.text_parser = OutcarTextParser() + source_parser.filepath = mainfile + + # TODO remove this for debug only + self.archive_data_parser = archive_data_parser + self.source_parser = source_parser + + # convert + source_parser.convert(archive_data_parser) + + # assign simulation section to archive data + archive.data = archive_data_parser.data_object + + # close file handles + # archive_data_parser.close() + # source_parser.close() diff --git a/src/nomad_parser_vasp/parsers/parser.py b/src/nomad_parser_vasp/parsers/parser.py new file mode 100644 index 0000000..61b34a9 --- /dev/null +++ b/src/nomad_parser_vasp/parsers/parser.py @@ -0,0 +1,32 @@ +from typing import TYPE_CHECKING, Dict, List, Any + +if TYPE_CHECKING: + from nomad.datamodel.datamodel import ( + EntryArchive, + ) + from structlog.stdlib import ( + BoundLogger, + ) + +from nomad_parser_vasp.parsers.xml_parser import VASPXMLParser +from nomad_parser_vasp.parsers.outcar_parser import VASPOutcarParser + + +class VASPParser: + def parse( + self, + mainfile: str, + archive: 'EntryArchive', + logger: 'BoundLogger', + child_archives: dict[str, 'EntryArchive'] = None, + ) -> None: + is_outcar = 'outcar' in mainfile.lower() + + if is_outcar: + parser = VASPOutcarParser() + + else: + parser = VASPXMLParser() + # TODO remove this for debug + self.parser = parser + parser.parse(mainfile, archive, logger, child_archives) diff --git a/src/nomad_parser_vasp/parsers/xml_parser.py b/src/nomad_parser_vasp/parsers/xml_parser.py index 44523c5..6933df4 100644 --- a/src/nomad_parser_vasp/parsers/xml_parser.py +++ b/src/nomad_parser_vasp/parsers/xml_parser.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Dict, List, Any import numpy as np @@ -11,26 +11,43 @@ ) from nomad.config import config -from nomad.parsing.file_parser.mapping_parser import MetainfoParser, XMLParser +from nomad.parsing.file_parser.mapping_parser import MetainfoParser, XMLParser, Path from nomad_parser_vasp.schema_packages.vasp_package import Simulation configuration = config.get_plugin_entry_point( - 'nomad_parser_vasp.parsers:xml_entry_point' + 'nomad_parser_vasp.parsers:vasp_parser_entry_point' ) class RunXMLParser(XMLParser): - @staticmethod - def mix_alpha(mix: float, cond: bool) -> float: + def mix_alpha(self, mix: float, cond: bool) -> float: return mix if cond else 0 - @staticmethod - def get_eigenvalues(array: list) -> np.ndarray: + def get_eigenvalues(self, array: list) -> np.ndarray: return np.array(array).T[1] if array is not None else np.array([]) + def get_energy_contributions( + self, source: Dict[str, Any], **kwargs + ) -> List[Dict[str, Any]]: + return [ + c + for c in source + if c.get(f'{self.attribute_prefix}name') not in kwargs.get('exclude', []) + ] -class VasprunXMLParser: + def get_data(self, source: Dict[str, Any], **kwargs) -> float: + if source.get(self.value_key): + return source[self.value_key] + path = kwargs.get('path') + if path is None: + return + + parser = Path(path=path) + return parser.get_data(source) + + +class VASPXMLParser: def parse( self, mainfile: 'str', @@ -44,14 +61,20 @@ def parse( data_parser.annotation_key = 'xml' data = Simulation() - xml = RunXMLParser(filepath=mainfile) + xml_parser = VasprunParser(filepath=mainfile) data_parser.data_object = data - xml.convert(data_parser) + xml_parser.convert(data_parser) data_parser = MetainfoParser() data_parser.annotation_key = 'xml2' data_parser.data_object = data - xml.convert(data_parser) + xml_parser.convert(data_parser) archive.data = data + + # close file objects + # data_parser.close() + # xml_parser.close() + self.data_parser = data_parser + self.xml_parser = xml_parser diff --git a/src/nomad_parser_vasp/schema_packages/vasp_package.py b/src/nomad_parser_vasp/schema_packages/vasp_package.py index 926ec84..f48e326 100644 --- a/src/nomad_parser_vasp/schema_packages/vasp_package.py +++ b/src/nomad_parser_vasp/schema_packages/vasp_package.py @@ -11,66 +11,87 @@ model_system, numerical_settings, outputs, + properties, ) m_package = SchemaPackage() +OUTCAR_ANNOTATION_KEY = 'outcar' +XML_ANNOTATION_KEY = 'xml' +XML2_ANNOTATION_KEY = 'xml2' + class Program(general.Program): - general.Program.name.m_annotations['xml'] = MappingAnnotationModel( - path='.i[?"@name"==\'program\'] | [0].__value', + general.Program.name.m_annotations[XML_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='.i[?"@name"==\'program\'] | [0].__value', + ) + + general.Program.version.m_annotations[XML_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='.i[?"@name"==\'version\'] | [0].__value', ) - general.Program.version.m_annotations['xml'] = MappingAnnotationModel( - path='.i[?"@name"==\'version\'] | [0].__value', + general.Program.version.m_annotations[OUTCAR_ANNOTATION_KEY] = ( + MappingAnnotationModel( + mapper=('get_version', ['.@']), + ) ) # Apply similar logic here - general.Program.compilation_host.m_annotations['xml'] = MappingAnnotationModel( - path='.i[?"@name"==\'platform\'] | [0].__value' + general.Program.compilation_host.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.i[?"@name"==\'platform\'] | [0].__value') ) class XCFunctional(model_method.XCFunctional): - model_method.XCFunctional.libxc_name.m_annotations = dict( - xml=MappingAnnotationModel( - path='.i[?"@name"==\'GGA\'] | [0].__value' # TODO add LDA & mGGA, convert_xc - ) + model_method.XCFunctional.libxc_name.m_annotations[XML_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='.i[?"@name"==\'GGA\'] | [0].__value' # TODO add LDA & mGGA, convert_xc + ) + model_method.XCFunctional.libxc_name.m_annotations[OUTCAR_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='.name' ) class KMesh(numerical_settings.KMesh): - numerical_settings.KMesh.grid.m_annotations['xml'] = MappingAnnotationModel( - path='.generation.v[?"@name"==\'divisions\'] | [0].__value' + numerical_settings.KMesh.grid.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel( + mapper='.generation.v[?"@name"==\'divisions\'] | [0].__value' + ) ) - numerical_settings.KMesh.offset.m_annotations['xml'] = MappingAnnotationModel( - path='.generation.v[?"@name"==\'shift\'] | [0].__value' + numerical_settings.KMesh.offset.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel( + mapper='.generation.v[?"@name"==\'shift\'] | [0].__value' + ) ) - numerical_settings.KMesh.points.m_annotations['xml'] = MappingAnnotationModel( - path='.varray[?"@name"==\'kpointlist\'].v | [0]' + numerical_settings.KMesh.points.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.varray[?"@name"==\'kpointlist\'].v | [0]') ) - numerical_settings.KMesh.weights.m_annotations['xml'] = MappingAnnotationModel( - path='.varray[?"@name"==\'weights\'].v | [0]' + numerical_settings.KMesh.weights.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.varray[?"@name"==\'weights\'].v | [0]') ) class KSpace(numerical_settings.KSpace): - numerical_settings.KSpace.k_mesh.m_annotations['xml'] = MappingAnnotationModel( - path='.@' + numerical_settings.KSpace.k_mesh.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.@') ) class DFT(model_method.DFT): - model_method.DFT.xc_functionals.m_annotations['xml'] = MappingAnnotationModel( - path='.separator[?"@name"==\'electronic exchange-correlation\']' + model_method.DFT.xc_functionals.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel( + mapper='.separator[?"@name"==\'electronic exchange-correlation\']' + ) + ) + model_method.DFT.xc_functionals.m_annotations[OUTCAR_ANNOTATION_KEY] = MappingAnnotationModel( + mapper=('get_xc_functionals', ['.@']) ) - model_method.DFT.exact_exchange_mixing_factor.m_annotations['xml'] = ( + model_method.DFT.exact_exchange_mixing_factor.m_annotations[XML_ANNOTATION_KEY] = ( MappingAnnotationModel( - operator=( + mapper=( 'mix_alpha', [ '.i[?"@name"==\'HFALPHA\'] | [0].__value', @@ -80,80 +101,188 @@ class DFT(model_method.DFT): ) ) # TODO convert vasp bool - numerical_settings.KSpace.m_def.m_annotations['xml'] = MappingAnnotationModel( - path='modeling.kpoints' + numerical_settings.KSpace.m_def.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='modeling.kpoints') ) class AtomicCell(model_system.AtomicCell): - model_system.AtomicCell.positions.m_annotations = dict( - xml=MappingAnnotationModel(path='.varray.v', unit='angstrom') + model_system.AtomicCell.positions.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.varray.v', unit='angstrom') + ) + model_system.AtomicCell.positions.m_annotations[OUTCAR_ANNOTATION_KEY] = ( + MappingAnnotationModel( + mapper='.positions_forces', unit='angstrom', search='@ | [0]' + ) ) - model_system.AtomicCell.lattice_vectors.m_annotations['xml'] = ( + model_system.AtomicCell.lattice_vectors.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel( + mapper='.crystal.varray[?"@name"==\'basis\'] | [0].v', unit='angstrom' + ) + ) + model_system.AtomicCell.lattice_vectors.m_annotations[OUTCAR_ANNOTATION_KEY] = ( MappingAnnotationModel( - path='.crystal.varray[?"@name"==\'basis\'] | [0].v', unit='angstrom' + mapper='.lattice_vectors', unit='angstrom', search='@ | [0]' ) ) class ModelSystem(general.ModelSystem): - model_system.AtomicCell.m_def.m_annotations['xml'] = MappingAnnotationModel( - path='.structure' + model_system.AtomicCell.m_def.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.structure') + ) + model_system.AtomicCell.m_def.m_annotations[OUTCAR_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.@') + ) + + +class EnergyContribution(properties.energies.EnergyContribution): + properties.energies.EnergyContribution.name.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='."@name"') + ) + properties.energies.EnergyContribution.name.m_annotations[OUTCAR_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.name') + ) + # value is already defined in TotalEnergy since they use the same def + # get_energy function should be able to handle extraction from both sources + + +class TotalEnergy(properties.energies.TotalEnergy): + properties.energies.TotalEnergy.value.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel( + mapper=( + 'get_data', + ['.@'], + dict(path='.i[?"@name"==\'e_fr_energy\'] | [0].__value'), + ), + unit='eV', + ) + ) + properties.energies.TotalEnergy.value.m_annotations[OUTCAR_ANNOTATION_KEY] = ( + MappingAnnotationModel( + mapper=('get_data', ['.@'], dict(path='.energy_total')), unit='eV' + ) + ) + + properties.energies.TotalEnergy.contributions.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel( + mapper=('get_energy_contributions', ['.i'], dict(exclude=['e_fr_energy'])) + ) + ) + properties.energies.TotalEnergy.contributions.m_annotations[ + OUTCAR_ANNOTATION_KEY + ] = MappingAnnotationModel( + mapper=('get_energy_contributions', ['.@'], dict(exclude=['energy_total'])) ) -class TotalEnergy(outputs.TotalEnergy): - outputs.TotalEnergy.value.m_annotations['xml'] = MappingAnnotationModel( - path='.energy.i[?"@name"==\'e_fr_energy\'] | [0].__value', unit='eV' +class TotalForce(properties.forces.TotalForce): + properties.forces.TotalForce.value.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel( + mapper=('get_data', ['.@'], dict(path='.varray.v')), unit='eV/angstrom' + ) + ) + properties.forces.TotalForce.value.m_annotations[OUTCAR_ANNOTATION_KEY] = ( + MappingAnnotationModel( + mapper=('get_data', ['.@'], dict(path='.positions_forces | [1]')), + unit='eV/angstrom', + ) ) class ElectronicEigenvalues(outputs.ElectronicEigenvalues): - outputs.ElectronicEigenvalues.n_bands.m_annotations['xml'] = MappingAnnotationModel( - path='length(.array.set.set.set[0].r)' + outputs.ElectronicEigenvalues.n_bands.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='length(.array.set.set.set[0].r)') ) - outputs.ElectronicEigenvalues.n_bands.m_annotations['xml2'] = ( - MappingAnnotationModel(path='length(.array.set.set.set[0].r)') + outputs.ElectronicEigenvalues.n_bands.m_annotations[XML2_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='length(.array.set.set.set[0].r)') ) + outputs.ElectronicEigenvalues.n_bands.m_annotations[OUTCAR_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.n_bands') + ) + # TODO This only works for non-spin pol - outputs.ElectronicEigenvalues.occupation.m_annotations['xml2'] = ( - MappingAnnotationModel(operator=('get_eigenvalues', ['.array.set.set.set[].r'])) + outputs.ElectronicEigenvalues.occupation.m_annotations[XML2_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper=('get_eigenvalues', ['.array.set.set.set[].r'])) + ) + outputs.ElectronicEigenvalues.occupation.m_annotations[OUTCAR_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.occupations') + ) + outputs.ElectronicEigenvalues.value.m_annotations[OUTCAR_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.eigenvalues') ) class Ouputs(outputs.Outputs): - outputs.Outputs.total_energies.m_annotations['xml'] = MappingAnnotationModel( - path='.@' + outputs.Outputs.total_energies.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.energy') ) - outputs.Outputs.electronic_eigenvalues.m_annotations = dict( - xml=MappingAnnotationModel(path='.eigenvalues'), - xml2=MappingAnnotationModel(path='.eigenvalues'), + outputs.Outputs.total_energies.m_annotations[OUTCAR_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.energies') + ) + outputs.Outputs.total_forces.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.@') + ) + outputs.Outputs.total_forces.m_annotations[OUTCAR_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.@') + ) + outputs.Outputs.electronic_eigenvalues.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.eigenvalues') + ) + outputs.Outputs.electronic_eigenvalues.m_annotations[XML2_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.eigenvalues') + ) + outputs.Outputs.electronic_eigenvalues.m_annotations[OUTCAR_ANNOTATION_KEY] = ( + MappingAnnotationModel( + mapper=('get_eigenvalues', ['.eigenvalues', 'parameters']) + ) ) class Simulation(general.Simulation): - general.Simulation.program.m_annotations['xml'] = MappingAnnotationModel( - path='.generator' + general.Simulation.program.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.generator') + ) + general.Simulation.program.m_annotations[OUTCAR_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.header') ) - model_method.DFT.m_def.m_annotations['xml'] = MappingAnnotationModel( - path='.parameters.separator[?"@name"==\'electronic\']' + model_method.DFT.m_def.m_annotations[XML_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='.parameters.separator[?"@name"==\'electronic\']' + ) + model_method.DFT.m_def.m_annotations[OUTCAR_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='parameters' ) - general.Simulation.model_system.m_annotations['xml'] = MappingAnnotationModel( - path='.calculation' + general.Simulation.model_system.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.calculation') + ) + general.Simulation.model_system.m_annotations[OUTCAR_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.calculation') ) - general.Simulation.outputs.m_annotations = dict( - xml=MappingAnnotationModel(path='.calculation'), - xml2=MappingAnnotationModel(path='.calculation'), + general.Simulation.outputs.m_annotations[XML_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.calculation') + ) + general.Simulation.outputs.m_annotations[XML2_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.calculation') + ) + general.Simulation.outputs.m_annotations[OUTCAR_ANNOTATION_KEY] = ( + MappingAnnotationModel(mapper='.calculation') ) # note: vasprun.xml has many meta fields, explaining field semantics -Simulation.m_def.m_annotations['xml'] = MappingAnnotationModel(path='modeling') -Simulation.m_def.m_annotations['xml2'] = MappingAnnotationModel(path='modeling') +Simulation.m_def.m_annotations[XML_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='modeling' +) +Simulation.m_def.m_annotations[XML2_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='modeling' +) +Simulation.m_def.m_annotations[OUTCAR_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='@' +) """ @@ -163,49 +292,49 @@ class Simulation(general.Simulation): """ """ -...forces.m_annotations['xml'] = MappingAnnotationModel( - path='varray[?"@name"==\'forces\'] | [0].__value' +...forces.m_annotations[XML_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='varray[?"@name"==\'forces\'] | [0].__value' ) -...stress.m_annotations['xml'] = MappingAnnotationModel( - path='varray[?"@name"==\'stress\'] | [0].__value' +...stress.m_annotations[XML_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='varray[?"@name"==\'stress\'] | [0].__value' ) -cell_volume.m_annotations['xml'] = MappingAnnotationModel( - path='crystal.i[?"@name"==\'volume\'] | [0].__value' +cell_volume.m_annotations[XML_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='crystal.i[?"@name"==\'volume\'] | [0].__value' ) -reciprocal_lattice_vectors.m_annotations['xml'] = MappingAnnotationModel( - path='crystal.varray[?"@name"==\'rec_basis\'] | [0].__value' +reciprocal_lattice_vectors.m_annotations[XML_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='crystal.varray[?"@name"==\'rec_basis\'] | [0].__value' ) -total_free_energy.m_annotations['xml'] = MappingAnnotationModel( - path='calculation.energy.i[?"@name"==\'e_fr_energy\'] | [0].__value' +total_free_energy.m_annotations[XML_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='calculation.energy.i[?"@name"==\'e_fr_energy\'] | [0].__value' ) -total_internal_energy.m_annotations['xml'] = MappingAnnotationModel( - path='calculation.energy.i[?"@name"==\'e_0_energy\'] | [0].__value' +total_internal_energy.m_annotations[XML_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='calculation.energy.i[?"@name"==\'e_0_energy\'] | [0].__value' ) -...eigenvalues.m_annotations['xml'] = MappingAnnotationModel( - path='.eigenvalues.array' +...eigenvalues.m_annotations[XML_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='.eigenvalues.array' ) -ElectronicEigenvalues.spin_channel.m_annotations['xml'] = MappingAnnotationModel( - path='.eigenvalues.set.set[?comment==\'spin 1\'] | [0].__value' +ElectronicEigenvalues.spin_channel.m_annotations[XML_ANNOTATION_KEY] = MappingAnnotationModel( + mapper='.eigenvalues.set.set[?comment==\'spin 1\'] | [0].__value' ) -ElectronicEigenvalues.reciprocal_cell.m_annotations['xml'] = MappingAnnotationModel( - path=ElectronicEigenvalues.spin_channel.m_annotations['xml'] +ElectronicEigenvalues.reciprocal_cell.m_annotations[XML_ANNOTATION_KEY] = MappingAnnotationModel( + mapper=ElectronicEigenvalues.spin_channel.m_annotations[XML_ANNOTATION_KEY] + '.set[?comment==\'kpoint 1\'] | [0].__value' ) -ElectronicEigenvalues.occupation.m_annotations['xml'] = MappingAnnotationModel( - path=ElectronicEigenvalues.reciprocal_cell.m_annotations['xml'] + '.r[0] | [0].__value' +ElectronicEigenvalues.occupation.m_annotations[XML_ANNOTATION_KEY] = MappingAnnotationModel( + mapper=ElectronicEigenvalues.reciprocal_cell.m_annotations[XML_ANNOTATION_KEY] + '.r[0] | [0].__value' ) -ElectronicEigenvalues.value.m_annotations['xml'] = MappingAnnotationModel( - path=ElectronicEigenvalues.reciprocal_cell.m_annotations['xml'] + '.r[1] | [0].__value' +ElectronicEigenvalues.value.m_annotations[XML_ANNOTATION_KEY] = MappingAnnotationModel( + mapper=ElectronicEigenvalues.reciprocal_cell.m_annotations[XML_ANNOTATION_KEY] + '.r[1] | [0].__value' ) """