Skip to content

Commit

Permalink
Refactor base parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
mbercx committed Jan 28, 2022
1 parent 93e3499 commit 0522e9e
Show file tree
Hide file tree
Showing 32 changed files with 422 additions and 760 deletions.
4 changes: 0 additions & 4 deletions aiida_quantumespresso/calculations/dos.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,6 @@ def define(cls, spec):
spec.output('output_parameters', valid_type=orm.Dict)
spec.output('output_dos', valid_type=orm.XyData)
spec.default_output_node = 'output_parameters'
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(330, 'ERROR_READING_DOS_FILE',
message='The dos file could not be read from the retrieved folder.')
# yapf: enable
4 changes: 0 additions & 4 deletions aiida_quantumespresso/calculations/matdyn.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,6 @@ def define(cls, spec):
spec.output('output_parameters', valid_type=orm.Dict)
spec.output('output_phonon_bands', valid_type=orm.BandsData)
spec.default_output_node = 'output_parameters'
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(330, 'ERROR_OUTPUT_FREQUENCIES',
message='The output frequencies file could not be read from the retrieved folder.')
spec.exit_code(410, 'ERROR_OUTPUT_KPOINTS_MISSING',
Expand Down
6 changes: 6 additions & 0 deletions aiida_quantumespresso/calculations/namelists.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ def define(cls, spec):
help='Use an additional node for special settings')
spec.input('parent_folder', valid_type=(RemoteData, FolderData, SinglefileData), required=False,
help='Use a local or remote folder as parent folder (for restarts and similar)')
spec.exit_code(302, 'ERROR_OUTPUT_STDOUT_MISSING',
message='The retrieved folder did not contain the required stdout output file.')
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
# yapf: enable

def _get_following_text(self):
Expand Down
4 changes: 0 additions & 4 deletions aiida_quantumespresso/calculations/projwfc.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,6 @@ def define(cls, spec):
message='The retrieved temporary folder could not be accessed.')
spec.exit_code(303, 'ERROR_OUTPUT_XML_MISSING',
message='The retrieved folder did not contain the required XML file.')
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(320, 'ERROR_OUTPUT_XML_READ',
message='The XML output file could not be read.')
spec.exit_code(321, 'ERROR_OUTPUT_XML_PARSE',
Expand Down
6 changes: 0 additions & 6 deletions aiida_quantumespresso/calculations/pw2gw.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,10 @@ def define(cls, spec):
spec.output('eps', valid_type=orm.ArrayData,
help='The `eps` output node containing 5 arrays `energy`, `epsX`, `epsY`, `epsZ`, `epsTOT`')

spec.exit_code(302, 'ERROR_OUTPUT_STDOUT_MISSING',
message='The retrieved folder did not contain the required stdout output file.')
spec.exit_code(305, 'ERROR_OUTPUT_FILES',
message='The eps*.dat output files could not be read or parsed.')
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(311, 'ERROR_OUTPUT_STDOUT_PARSE',
message='The stdout output file could not be parsed.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(330, 'ERROR_OUTPUT_FILES_INVALID_FORMAT',
message='The eps*.dat output files do not have the expected shape (N, 2).')
spec.exit_code(331, 'ERROR_OUTPUT_FILES_ENERGY_MISMATCH',
Expand Down
4 changes: 0 additions & 4 deletions aiida_quantumespresso/calculations/pw2wannier90.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@ def define(cls, spec):
help='The output folder of a pw.x calculation')
spec.output('output_parameters', valid_type=Dict)
spec.default_output_node = 'output_parameters'
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(340, 'ERROR_GENERIC_QE_ERROR',
message='Encountered a generic error message')
spec.exit_code(350, 'ERROR_UNEXPECTED_PARSER_EXCEPTION',
Expand Down
4 changes: 0 additions & 4 deletions aiida_quantumespresso/calculations/q2r.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@ def define(cls, spec):
super().define(spec)
spec.input('parent_folder', valid_type=(orm.RemoteData, orm.FolderData), required=True)
spec.output('force_constants', valid_type=ForceConstantsData)
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(330, 'ERROR_READING_FORCE_CONSTANTS_FILE',
message='The force constants file could not be read.')
# yapf: enable
125 changes: 119 additions & 6 deletions aiida_quantumespresso/parsers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,128 @@
All `Parser` implementations in `aiida-quantumespresso` must use this base class, not `aiida.parsers.Parser`.
"""
from aiida.parsers import Parser as _BaseParser
import re
import typing

from aiida.parsers import Parser
from aiida.common import AttributeDict
from aiida.engine import ExitCode
from aiida_quantumespresso.utils.mapping import get_logging_container
from aiida_quantumespresso.parsers.parse_raw.base import convert_qe_time_to_sec

__all__ = ('BaseParser',)


class BaseParser(Parser): # pylint: disable=abstract-method
"""Custom ``Parser`` class for ``aiida-quantumespresso`` parser implementations."""

class_error_map = {}
class_warning_map = {}

base_error_map = {
'Maximum CPU time exceeded': 'ERROR_OUT_OF_WALLTIME',
}
base_warning_map = {
'Warning:': None,
'DEPRECATED:': None,
}

@classmethod
def get_error_map(cls):
"""The full error map of the parser class."""
error_map = cls.base_error_map.copy()
error_map.update(cls.class_error_map)
return error_map

@classmethod
def get_warning_map(cls):
"""The full error map of the parser class."""
warning_map = cls.base_warning_map.copy()
warning_map.update(cls.class_warning_map)
return warning_map

def _retrieve_parse_stdout(self, **kwargs) -> typing.Tuple[str, dict, AttributeDict]:
"""Retrieve and parse the ``stdout`` content of a Quantum ESPRESSO calculation.
:returns: size 3 tuple with the stdout content, parsed data and log messages
"""
logs = get_logging_container()

filename_stdout = self.node.get_option('output_filename')

if filename_stdout not in self.retrieved.list_object_names():
logs.error.append('ERROR_OUTPUT_STDOUT_MISSING')
return {}, logs

try:
with self.retrieved.open(filename_stdout, 'r') as handle:
stdout = handle.read()
except OSError:
logs.error.append('ERROR_OUTPUT_STDOUT_READ')
return {}, logs

parsed_data, stdout_logs = self.parse_stdout(stdout, **kwargs)

for log_type, log_items in stdout_logs.items():
logs[log_type].extend(log_items)

return parsed_data, logs

@classmethod
def parse_stdout(cls, stdout: str) -> typing.Tuple[dict, AttributeDict]:
"""Parse the ``stdout`` content of a Quantum ESPRESSO calculation.
This function only checks for basic content like JOB DONE, errors with %%%%% etc.
:param stdout: the stdout content as a string.
:returns: tuple of two dictionaries, with the parsed data and log messages, respectively.
"""
logs = get_logging_container()
parsed_data = {}

if not re.search(r'JOB DONE', stdout):
logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE')

code_match = re.search(r'Program\s(?P<code_name>[A-Z|\_|\d]+)\sv\.(?P<code_version>[\d\.|a-z|A-Z]+)\s', stdout)

if code_match:

code_name = code_match.groupdict()['code_name']
parsed_data['code_version'] = code_match.groupdict()['code_version']

wall_match = re.search(fr'{code_name}\s+:[\s\S]+\s+(?P<wall_time>[.\d|s|m|d|h]+)\sWALL', stdout)

if wall_match:
parsed_data['wall_time'] = wall_match.groupdict()['wall_time']

try:
parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(wall_match.groupdict()['wall_time'])
except ValueError:
logs.warnings.append('Unable to convert wall time from `stdout` to seconds.')

# Look for typical Quantum ESPRESSO error messages between %%%%%-lines that are not in our error map
if re.search(r'\%\%\%\%\%', stdout): # Note: using e.g. `\%{5}` is significantly slower
for error_message in set(re.split(r'\%\%\%\%\%\n', stdout)[1::2]):

if not any(error_marker in error_message for error_marker in cls.get_error_map().keys()):
logs.error.append(error_message.rstrip('\n%'))

__all__ = ('Parser',)
# Look for error messages in general
for error_marker, error, in cls.get_error_map().items():
if re.search(fr'{error_marker}', stdout):
logs.error.append(error)

# Look for lines with warnings from the `warning_map`
for warning_marker, warning in cls.get_warning_map().items():
for warning_message in set(re.findall(fr'({warning_marker}.+)\n', stdout)):
if warning is not None:
logs.warning.append(warning)
else:
logs.warning.append(warning_message)

class Parser(_BaseParser): # pylint: disable=abstract-method
"""Custom `Parser` class for `aiida-quantumespresso` parser implementations."""
return parsed_data, logs

def emit_logs(self, logging_dictionaries, ignore=None):
def _emit_logs(self, logging_dictionaries: AttributeDict, ignore: list = None) -> None:
"""Emit the messages in one or multiple "log dictionaries" through the logger of the parser.
A log dictionary is expected to have the following structure: each key must correspond to a log level of the
Expand Down Expand Up @@ -50,7 +163,7 @@ def emit_logs(self, logging_dictionaries, ignore=None):
except AttributeError:
pass

def exit(self, exit_code):
def _exit(self, exit_code: ExitCode) -> ExitCode:
"""Log the exit message of the give exit code with level `ERROR` and return the exit code.
This is a utility function if one wants to return from the parse method and automically add the exit message
Expand Down
10 changes: 5 additions & 5 deletions aiida_quantumespresso/parsers/cp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
from aiida.orm import Dict, TrajectoryData
from qe_tools import CONSTANTS

from .base import Parser
from .base import BaseParser
from .parse_raw.cp import parse_cp_raw_output, parse_cp_traj_stanzas


class CpParser(Parser):
class CpParser(BaseParser):
"""This class is the implementation of the Parser class for Cp."""

def parse(self, **kwargs):
Expand All @@ -26,14 +26,14 @@ def parse(self, **kwargs):
stdout_filename = self.node.get_attribute('output_filename')
# at least the stdout should exist
if stdout_filename not in list_of_files:
return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)
return self._exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)

# This should match 1 file
xml_files = [xml_file for xml_file in self.node.process_class.xml_filenames if xml_file in list_of_files]
if not xml_files:
return self.exit(self.exit_codes.ERROR_MISSING_XML_FILE)
return self._exit(self.exit_codes.ERROR_MISSING_XML_FILE)
elif len(xml_files) > 1:
return self.exit(self.exit_codes.ERROR_OUTPUT_XML_MULTIPLE)
return self._exit(self.exit_codes.ERROR_OUTPUT_XML_MULTIPLE)

# cp.x can produce, depending on the particular version of the code, a file called `print_counter.xml` or
# `print_counter`, which is a plain text file with the number of the last timestep written in the trajectory
Expand Down
50 changes: 15 additions & 35 deletions aiida_quantumespresso/parsers/dos.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,31 @@
import numpy as np

from aiida.orm import Dict, XyData
from aiida.common import AttributeDict

from aiida_quantumespresso.parsers import QEOutputParsingError
from aiida_quantumespresso.parsers.parse_raw.base import parse_output_base
from .base import Parser
from .base import BaseParser


class DosParser(Parser):
"""This class is the implementation of the Parser class for Dos."""
class DosParser(BaseParser):
"""``Parser`` implementation for the ``DosCalculation`` calculation job class."""

def parse(self, **kwargs):
"""Parses the datafolder, stores results.
"""Parse the retrieved files of a ``DosCalculation`` into output nodes."""
parsed_stdout, logs_stdout = self._retrieve_parse_stdout()

Retrieves dos output, and some basic information from the out_file, such as warnings and wall_time
"""
retrieved = self.retrieved
for exit_code in ['ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUTPUT_STDOUT_INCOMPLETE']:
if exit_code in logs_stdout.error:
return self._exit(self.exit_codes.get(exit_code))

# Read standard out
try:
filename_stdout = self.node.get_option('output_filename') # or get_attribute(), but this is clearer
with retrieved.open(filename_stdout, 'r') as fil:
out_file = fil.readlines()
except OSError:
return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)

job_done = False
for i in range(len(out_file)):
line = out_file[-i]
if 'JOB DONE' in line:
job_done = True
break
if not job_done:
return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE)

# check that the dos file is present, if it is, read it
self.out('output_parameters', Dict(dict=parsed_stdout))

# Parse the DOS
try:
with retrieved.open(self.node.process_class._DOS_FILENAME, 'r') as fil:
dos_file = fil.readlines()
with self.retrieved.open(self.node.process_class._DOS_FILENAME, 'r') as handle:
dos_file = handle.readlines()
except OSError:
return self.exit(self.exit_codes.ERROR_READING_DOS_FILE)

# end of initial checks
return self._exit(self.exit_codes.ERROR_READING_DOS_FILE)

array_names = [[], []]
array_units = [[], []]
Expand Down Expand Up @@ -79,11 +63,7 @@ def parse(self, **kwargs):
y_units += ['states/eV']
xy_data.set_y(y_arrays, y_names, y_units)

parsed_data, logs = parse_output_base(out_file, 'DOS')
self.emit_logs(logs)

self.out('output_dos', xy_data)
self.out('output_parameters', Dict(dict=parsed_data))


def parse_raw_dos(dos_file, array_names, array_units):
Expand Down
Loading

0 comments on commit 0522e9e

Please sign in to comment.