Skip to content

Commit

Permalink
Refactor basic parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
mbercx committed May 6, 2023
1 parent b21076c commit 3edacd7
Show file tree
Hide file tree
Showing 30 changed files with 442 additions and 795 deletions.
4 changes: 0 additions & 4 deletions src/aiida_quantumespresso/calculations/dos.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@ def define(cls, spec):
spec.output('output_parameters', valid_type=orm.Dict)
spec.output('output_dos', valid_type=orm.XyData)
spec.default_output_node = 'output_parameters'
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(330, 'ERROR_READING_DOS_FILE',
message='The dos file could not be read from the retrieved folder.')
# yapf: enable
4 changes: 0 additions & 4 deletions src/aiida_quantumespresso/calculations/matdyn.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,6 @@ def define(cls, spec):
spec.output('output_phonon_bands', valid_type=orm.BandsData)
spec.default_output_node = 'output_parameters'

spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(330, 'ERROR_OUTPUT_FREQUENCIES',
message='The output frequencies file could not be read from the retrieved folder.')
spec.exit_code(410, 'ERROR_OUTPUT_KPOINTS_MISSING',
Expand Down
8 changes: 8 additions & 0 deletions src/aiida_quantumespresso/calculations/namelists.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ def define(cls, spec):
help='Use an additional node for special settings')
spec.input('parent_folder', valid_type=(RemoteData, FolderData, SinglefileData), required=False,
help='Use a local or remote folder as parent folder (for restarts and similar)')
spec.exit_code(302, 'ERROR_OUTPUT_STDOUT_MISSING',
message='The retrieved folder did not contain the required stdout output file.')
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(311, 'ERROR_OUTPUT_STDOUT_PARSE',
message='The stdout output file could not be parsed.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
# yapf: enable

@classmethod
Expand Down
4 changes: 0 additions & 4 deletions src/aiida_quantumespresso/calculations/projwfc.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,6 @@ def define(cls, spec):
message='The retrieved temporary folder could not be accessed.')
spec.exit_code(303, 'ERROR_OUTPUT_XML_MISSING',
message='The retrieved folder did not contain the required XML file.')
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(320, 'ERROR_OUTPUT_XML_READ',
message='The XML output file could not be read.')
spec.exit_code(321, 'ERROR_OUTPUT_XML_PARSE',
Expand Down
8 changes: 0 additions & 8 deletions src/aiida_quantumespresso/calculations/pw2gw.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,8 @@ def define(cls, spec):
spec.output('eps', valid_type=orm.ArrayData,
help='The `eps` output node containing 5 arrays `energy`, `epsX`, `epsY`, `epsZ`, `epsTOT`')

spec.exit_code(302, 'ERROR_OUTPUT_STDOUT_MISSING',
message='The retrieved folder did not contain the required stdout output file.')
spec.exit_code(305, 'ERROR_OUTPUT_FILES',
message='The eps*.dat output files could not be read or parsed.')
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(311, 'ERROR_OUTPUT_STDOUT_PARSE',
message='The stdout output file could not be parsed.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(330, 'ERROR_OUTPUT_FILES_INVALID_FORMAT',
message='The eps*.dat output files do not have the expected shape (N, 2).')
spec.exit_code(331, 'ERROR_OUTPUT_FILES_ENERGY_MISMATCH',
Expand Down
4 changes: 0 additions & 4 deletions src/aiida_quantumespresso/calculations/pw2wannier90.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@ def define(cls, spec):
help='The output folder of a pw.x calculation')
spec.output('output_parameters', valid_type=Dict)
spec.default_output_node = 'output_parameters'
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(340, 'ERROR_GENERIC_QE_ERROR',
message='Encountered a generic error message')
spec.exit_code(350, 'ERROR_UNEXPECTED_PARSER_EXCEPTION',
Expand Down
4 changes: 0 additions & 4 deletions src/aiida_quantumespresso/calculations/q2r.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,6 @@ def define(cls, spec):
super().define(spec)
spec.input('parent_folder', valid_type=(orm.RemoteData, orm.FolderData), required=True)
spec.output('force_constants', valid_type=ForceConstantsData)
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(330, 'ERROR_READING_FORCE_CONSTANTS_FILE',
message='The force constants file could not be read.')
# yapf: enable
132 changes: 126 additions & 6 deletions src/aiida_quantumespresso/parsers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,135 @@
All `Parser` implementations in `aiida-quantumespresso` must use this base class, not `aiida.parsers.Parser`.
"""
from aiida.parsers import Parser as _BaseParser
import abc
import re
import typing

__all__ = ('Parser',)
from aiida.common import AttributeDict
from aiida.engine import ExitCode
from aiida.parsers import Parser

from aiida_quantumespresso.parsers.parse_raw.base import convert_qe_time_to_sec
from aiida_quantumespresso.utils.mapping import get_logging_container

class Parser(_BaseParser): # pylint: disable=abstract-method
"""Custom `Parser` class for `aiida-quantumespresso` parser implementations."""
__all__ = ('BaseParser',)

def emit_logs(self, logging_dictionaries, ignore=None):

class BaseParser(Parser, metaclass=abc.ABCMeta):
"""Custom ``Parser`` class for ``aiida-quantumespresso`` parser implementations."""

class_error_map = {}
class_warning_map = {}

base_error_map = {
'Maximum CPU time exceeded': 'ERROR_OUT_OF_WALLTIME',
}
base_warning_map = {
'Warning:': None,
'DEPRECATED:': None,
}

@classmethod
def get_error_map(cls):
"""The full error map of the parser class."""
error_map = cls.base_error_map.copy()
error_map.update(cls.class_error_map)
return error_map

@classmethod
def get_warning_map(cls):
"""The full warning map of the parser class."""
warning_map = cls.base_warning_map.copy()
warning_map.update(cls.class_warning_map)
return warning_map

def _parse_stdout_from_retrieved(self, **kwargs) -> typing.Tuple[str, dict, AttributeDict]:
"""Retrieve and parse the ``stdout`` content of a Quantum ESPRESSO calculation.
:returns: size 3 tuple with the stdout content, parsed data and log messages
"""
logs = get_logging_container()

filename_stdout = self.node.get_option('output_filename')

if filename_stdout not in self.retrieved.base.repository.list_object_names():
logs.error.append('ERROR_OUTPUT_STDOUT_MISSING')
return {}, logs

try:
with self.retrieved.open(filename_stdout, 'r') as handle:
stdout = handle.read()
except OSError:
logs.error.append('ERROR_OUTPUT_STDOUT_READ')
return {}, logs

try:
parsed_data, stdout_logs = self.parse_stdout(stdout, **kwargs)
except Exception as exception:
logs.error.append('ERROR_OUTPUT_STDOUT_PARSE')
logs.error.append(exception)
return {}, logs

for log_level, log_items in stdout_logs.items():
logs[log_level].extend(log_items)

return parsed_data, logs

@classmethod
def parse_stdout(cls, stdout: str) -> typing.Tuple[dict, AttributeDict]:
"""Parse the ``stdout`` content of a Quantum ESPRESSO calculation.
This function only checks for basic content like JOB DONE, errors with %%%%% etc.
:param stdout: the stdout content as a string.
:returns: tuple of two dictionaries, with the parsed data and log messages, respectively.
"""
logs = get_logging_container()
parsed_data = {}

if not re.search(r'JOB DONE', stdout):
logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE')

code_match = re.search(r'Program\s(?P<code_name>[A-Z|\_|\d]+)\s(?P<code_version>v\.[\d\.|a-z|A-Z]+)\s', stdout)

if code_match:

code_name = code_match.groupdict()['code_name']
parsed_data['code_version'] = code_match.groupdict()['code_version']

wall_match = re.search(fr'{code_name}\s+:[\s\S]+\s+(?P<wall_time>[.\d|s|m|d|h]+)\sWALL', stdout)

if wall_match:
parsed_data['wall_time'] = wall_match.groupdict()['wall_time']

try:
parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(wall_match.groupdict()['wall_time'])
except ValueError:
logs.warnings.append('Unable to convert wall time from `stdout` to seconds.')

# Look for typical Quantum ESPRESSO error messages between %%%%%-lines that are not in our error map
if re.search(r'\%\%\%\%\%', stdout): # Note: using e.g. `\%{5}` is significantly slower
for error_message in set(re.split(r'\%\%\%\%\%\n', stdout)[1::2]):

if not any(error_marker in error_message for error_marker in cls.get_error_map().keys()):
logs.error.append(error_message.rstrip('\n%'))

# Look for error messages in general
for error_marker, error, in cls.get_error_map().items():
if re.search(fr'{error_marker}', stdout):
logs.error.append(error)

# Look for lines with warnings from the `warning_map`
for warning_marker, warning in cls.get_warning_map().items():
for warning_message in set(re.findall(fr'({warning_marker}.+)\n', stdout)):
if warning is not None:
logs.warning.append(warning)
else:
logs.warning.append(warning_message)

return parsed_data, logs

def _emit_logs(self, logging_dictionaries: AttributeDict, ignore: list = None) -> None:
"""Emit the messages in one or multiple "log dictionaries" through the logger of the parser.
A log dictionary is expected to have the following structure: each key must correspond to a log level of the
Expand Down Expand Up @@ -50,7 +170,7 @@ def emit_logs(self, logging_dictionaries, ignore=None):
except AttributeError:
pass

def exit(self, exit_code):
def _exit(self, exit_code: ExitCode) -> ExitCode:
"""Log the exit message of the give exit code with level `ERROR` and return the exit code.
This is a utility function if one wants to return from the parse method and automically add the exit message
Expand Down
10 changes: 5 additions & 5 deletions src/aiida_quantumespresso/parsers/cp.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from packaging.version import Version
from qe_tools import CONSTANTS

from .base import Parser
from .base import BaseParser
from .parse_raw.cp import parse_cp_raw_output, parse_cp_traj_stanzas


class CpParser(Parser):
class CpParser(BaseParser):
"""This class is the implementation of the Parser class for Cp."""

def parse(self, **kwargs):
Expand All @@ -25,14 +25,14 @@ def parse(self, **kwargs):
stdout_filename = self.node.base.attributes.get('output_filename')
# at least the stdout should exist
if stdout_filename not in list_of_files:
return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)
return self._exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)

# This should match 1 file
xml_files = [xml_file for xml_file in self.node.process_class.xml_filenames if xml_file in list_of_files]
if not xml_files:
return self.exit(self.exit_codes.ERROR_MISSING_XML_FILE)
return self._exit(self.exit_codes.ERROR_MISSING_XML_FILE)
elif len(xml_files) > 1:
return self.exit(self.exit_codes.ERROR_OUTPUT_XML_MULTIPLE)
return self._exit(self.exit_codes.ERROR_OUTPUT_XML_MULTIPLE)

# cp.x can produce, depending on the particular version of the code, a file called `print_counter.xml` or
# `print_counter`, which is a plain text file with the number of the last timestep written in the trajectory
Expand Down
50 changes: 16 additions & 34 deletions src/aiida_quantumespresso/parsers/dos.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,34 @@
# -*- coding: utf-8 -*-
from aiida.common import AttributeDict
from aiida.orm import Dict, XyData
import numpy as np

from aiida_quantumespresso.parsers import QEOutputParsingError
from aiida_quantumespresso.parsers.parse_raw.base import parse_output_base

from .base import Parser
from .base import BaseParser, Parser


class DosParser(Parser):
"""This class is the implementation of the Parser class for Dos."""
class DosParser(BaseParser):
"""``Parser`` implementation for the ``DosCalculation`` calculation job class."""

def parse(self, **kwargs):
"""Parses the datafolder, stores results.
"""Parse the retrieved files of a ``DosCalculation`` into output nodes."""
parsed_stdout, logs_stdout = self._parse_stdout_from_retrieved()
self._emit_logs(logs_stdout)

Retrieves dos output, and some basic information from the out_file, such as warnings and wall_time
"""
retrieved = self.retrieved
self.out('output_parameters', Dict(parsed_stdout))

# Read standard out
try:
filename_stdout = self.node.get_option('output_filename') # or get_attribute(), but this is clearer
with retrieved.base.repository.open(filename_stdout, 'r') as fil:
out_file = fil.readlines()
except OSError:
return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)

job_done = False
for i in range(len(out_file)):
line = out_file[-i]
if 'JOB DONE' in line:
job_done = True
break
if not job_done:
return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE)

# check that the dos file is present, if it is, read it
for exit_code in ['ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUTPUT_STDOUT_INCOMPLETE']:
if exit_code in logs_stdout.error:
return self._exit(self.exit_codes.get(exit_code))

# Parse the DOS
try:
with retrieved.base.repository.open(self.node.process_class._DOS_FILENAME, 'r') as fil:
dos_file = fil.readlines()
with self.retrieved.base.repository.open(self.node.process_class._DOS_FILENAME, 'r') as handle:
dos_file = handle.readlines()
except OSError:
return self.exit(self.exit_codes.ERROR_READING_DOS_FILE)

# end of initial checks
return self._exit(self.exit_codes.ERROR_READING_DOS_FILE)

array_names = [[], []]
array_units = [[], []]
Expand Down Expand Up @@ -79,11 +65,7 @@ def parse(self, **kwargs):
y_units += ['states/eV']
xy_data.set_y(y_arrays, y_names, y_units)

parsed_data, logs = parse_output_base(out_file, 'DOS')
self.emit_logs(logs)

self.out('output_dos', xy_data)
self.out('output_parameters', Dict(parsed_data))


def parse_raw_dos(dos_file, array_names, array_units):
Expand Down
Loading

0 comments on commit 3edacd7

Please sign in to comment.