Refactor basic parsing

aiidateam · May 6, 2023 · 3edacd7 · 3edacd7
1 parent b21076c
commit 3edacd7
Show file tree

Hide file tree

Showing 30 changed files with 442 additions and 795 deletions.
diff --git a/src/aiida_quantumespresso/calculations/dos.py b/src/aiida_quantumespresso/calculations/dos.py
@@ -27,10 +27,6 @@ def define(cls, spec):
         spec.output('output_parameters', valid_type=orm.Dict)
         spec.output('output_dos', valid_type=orm.XyData)
         spec.default_output_node = 'output_parameters'
-        spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
-            message='The stdout output file could not be read.')
-        spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
-            message='The stdout output file was incomplete probably because the calculation got interrupted.')
         spec.exit_code(330, 'ERROR_READING_DOS_FILE',
             message='The dos file could not be read from the retrieved folder.')
         # yapf: enable
diff --git a/src/aiida_quantumespresso/calculations/matdyn.py b/src/aiida_quantumespresso/calculations/matdyn.py
@@ -37,10 +37,6 @@ def define(cls, spec):
         spec.output('output_phonon_bands', valid_type=orm.BandsData)
         spec.default_output_node = 'output_parameters'
 
-        spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
-            message='The stdout output file could not be read.')
-        spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
-            message='The stdout output file was incomplete probably because the calculation got interrupted.')
         spec.exit_code(330, 'ERROR_OUTPUT_FREQUENCIES',
             message='The output frequencies file could not be read from the retrieved folder.')
         spec.exit_code(410, 'ERROR_OUTPUT_KPOINTS_MISSING',

diff --git a/src/aiida_quantumespresso/calculations/namelists.py b/src/aiida_quantumespresso/calculations/namelists.py
@@ -58,6 +58,14 @@ def define(cls, spec):
             help='Use an additional node for special settings')
         spec.input('parent_folder', valid_type=(RemoteData, FolderData, SinglefileData), required=False,
             help='Use a local or remote folder as parent folder (for restarts and similar)')
+        spec.exit_code(302, 'ERROR_OUTPUT_STDOUT_MISSING',
+            message='The retrieved folder did not contain the required stdout output file.')
+        spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
+            message='The stdout output file could not be read.')
+        spec.exit_code(311, 'ERROR_OUTPUT_STDOUT_PARSE',
+            message='The stdout output file could not be parsed.')
+        spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
+            message='The stdout output file was incomplete probably because the calculation got interrupted.')
         # yapf: enable
 
     @classmethod

diff --git a/src/aiida_quantumespresso/calculations/projwfc.py b/src/aiida_quantumespresso/calculations/projwfc.py
@@ -62,10 +62,6 @@ def define(cls, spec):
             message='The retrieved temporary folder could not be accessed.')
         spec.exit_code(303, 'ERROR_OUTPUT_XML_MISSING',
             message='The retrieved folder did not contain the required XML file.')
-        spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
-            message='The stdout output file could not be read.')
-        spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
-            message='The stdout output file was incomplete probably because the calculation got interrupted.')
         spec.exit_code(320, 'ERROR_OUTPUT_XML_READ',
             message='The XML output file could not be read.')
         spec.exit_code(321, 'ERROR_OUTPUT_XML_PARSE',

diff --git a/src/aiida_quantumespresso/calculations/pw2gw.py b/src/aiida_quantumespresso/calculations/pw2gw.py
@@ -38,16 +38,8 @@ def define(cls, spec):
         spec.output('eps', valid_type=orm.ArrayData,
             help='The `eps` output node containing 5 arrays `energy`, `epsX`, `epsY`, `epsZ`, `epsTOT`')
 
-        spec.exit_code(302, 'ERROR_OUTPUT_STDOUT_MISSING',
-            message='The retrieved folder did not contain the required stdout output file.')
         spec.exit_code(305, 'ERROR_OUTPUT_FILES',
             message='The eps*.dat output files could not be read or parsed.')
-        spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
-            message='The stdout output file could not be read.')
-        spec.exit_code(311, 'ERROR_OUTPUT_STDOUT_PARSE',
-            message='The stdout output file could not be parsed.')
-        spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
-            message='The stdout output file was incomplete probably because the calculation got interrupted.')
         spec.exit_code(330, 'ERROR_OUTPUT_FILES_INVALID_FORMAT',
             message='The eps*.dat output files do not have the expected shape (N, 2).')
         spec.exit_code(331, 'ERROR_OUTPUT_FILES_ENERGY_MISMATCH',

diff --git a/src/aiida_quantumespresso/calculations/pw2wannier90.py b/src/aiida_quantumespresso/calculations/pw2wannier90.py
@@ -31,10 +31,6 @@ def define(cls, spec):
                    help='The output folder of a pw.x calculation')
         spec.output('output_parameters', valid_type=Dict)
         spec.default_output_node = 'output_parameters'
-        spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
-            message='The stdout output file could not be read.')
-        spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
-            message='The stdout output file was incomplete probably because the calculation got interrupted.')
         spec.exit_code(340, 'ERROR_GENERIC_QE_ERROR',
             message='Encountered a generic error message')
         spec.exit_code(350, 'ERROR_UNEXPECTED_PARSER_EXCEPTION',

diff --git a/src/aiida_quantumespresso/calculations/q2r.py b/src/aiida_quantumespresso/calculations/q2r.py
@@ -32,10 +32,6 @@ def define(cls, spec):
         super().define(spec)
         spec.input('parent_folder', valid_type=(orm.RemoteData, orm.FolderData), required=True)
         spec.output('force_constants', valid_type=ForceConstantsData)
-        spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
-            message='The stdout output file could not be read.')
-        spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
-            message='The stdout output file was incomplete probably because the calculation got interrupted.')
         spec.exit_code(330, 'ERROR_READING_FORCE_CONSTANTS_FILE',
             message='The force constants file could not be read.')
         # yapf: enable
diff --git a/src/aiida_quantumespresso/parsers/base.py b/src/aiida_quantumespresso/parsers/base.py
@@ -3,15 +3,135 @@
 
 All `Parser` implementations in `aiida-quantumespresso` must use this base class, not `aiida.parsers.Parser`.
 """
-from aiida.parsers import Parser as _BaseParser
+import abc
+import re
+import typing
 
-__all__ = ('Parser',)
+from aiida.common import AttributeDict
+from aiida.engine import ExitCode
+from aiida.parsers import Parser
 
+from aiida_quantumespresso.parsers.parse_raw.base import convert_qe_time_to_sec
+from aiida_quantumespresso.utils.mapping import get_logging_container
 
-class Parser(_BaseParser):  # pylint: disable=abstract-method
-    """Custom `Parser` class for `aiida-quantumespresso` parser implementations."""
+__all__ = ('BaseParser',)
 
-    def emit_logs(self, logging_dictionaries, ignore=None):
+
+class BaseParser(Parser, metaclass=abc.ABCMeta):
+    """Custom ``Parser`` class for ``aiida-quantumespresso`` parser implementations."""
+
+    class_error_map = {}
+    class_warning_map = {}
+
+    base_error_map = {
+        'Maximum CPU time exceeded': 'ERROR_OUT_OF_WALLTIME',
+    }
+    base_warning_map = {
+        'Warning:': None,
+        'DEPRECATED:': None,
+    }
+
+    @classmethod
+    def get_error_map(cls):
+        """The full error map of the parser class."""
+        error_map = cls.base_error_map.copy()
+        error_map.update(cls.class_error_map)
+        return error_map
+
+    @classmethod
+    def get_warning_map(cls):
+        """The full warning map of the parser class."""
+        warning_map = cls.base_warning_map.copy()
+        warning_map.update(cls.class_warning_map)
+        return warning_map
+
+    def _parse_stdout_from_retrieved(self, **kwargs) -> typing.Tuple[str, dict, AttributeDict]:
+        """Retrieve and parse the ``stdout`` content of a Quantum ESPRESSO calculation.
+
+        :returns: size 3 tuple with the stdout content, parsed data and log messages
+        """
+        logs = get_logging_container()
+
+        filename_stdout = self.node.get_option('output_filename')
+
+        if filename_stdout not in self.retrieved.base.repository.list_object_names():
+            logs.error.append('ERROR_OUTPUT_STDOUT_MISSING')
+            return {}, logs
+
+        try:
+            with self.retrieved.open(filename_stdout, 'r') as handle:
+                stdout = handle.read()
+        except OSError:
+            logs.error.append('ERROR_OUTPUT_STDOUT_READ')
+            return {}, logs
+
+        try:
+            parsed_data, stdout_logs = self.parse_stdout(stdout, **kwargs)
+        except Exception as exception:
+            logs.error.append('ERROR_OUTPUT_STDOUT_PARSE')
+            logs.error.append(exception)
+            return {}, logs
+
+        for log_level, log_items in stdout_logs.items():
+            logs[log_level].extend(log_items)
+
+        return parsed_data, logs
+
+    @classmethod
+    def parse_stdout(cls, stdout: str) -> typing.Tuple[dict, AttributeDict]:
+        """Parse the ``stdout`` content of a Quantum ESPRESSO calculation.
+
+        This function only checks for basic content like JOB DONE, errors with %%%%% etc.
+
+        :param stdout: the stdout content as a string.
+        :returns: tuple of two dictionaries, with the parsed data and log messages, respectively.
+        """
+        logs = get_logging_container()
+        parsed_data = {}
+
+        if not re.search(r'JOB DONE', stdout):
+            logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE')
+
+        code_match = re.search(r'Program\s(?P<code_name>[A-Z|\_|\d]+)\s(?P<code_version>v\.[\d\.|a-z|A-Z]+)\s', stdout)
+
+        if code_match:
+
+            code_name = code_match.groupdict()['code_name']
+            parsed_data['code_version'] = code_match.groupdict()['code_version']
+
+            wall_match = re.search(fr'{code_name}\s+:[\s\S]+\s+(?P<wall_time>[.\d|s|m|d|h]+)\sWALL', stdout)
+
+            if wall_match:
+                parsed_data['wall_time'] = wall_match.groupdict()['wall_time']
+
+                try:
+                    parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(wall_match.groupdict()['wall_time'])
+                except ValueError:
+                    logs.warnings.append('Unable to convert wall time from `stdout` to seconds.')
+
+        # Look for typical Quantum ESPRESSO error messages between %%%%%-lines that are not in our error map
+        if re.search(r'\%\%\%\%\%', stdout):  # Note: using e.g. `\%{5}` is significantly slower
+            for error_message in set(re.split(r'\%\%\%\%\%\n', stdout)[1::2]):
+
+                if not any(error_marker in error_message for error_marker in cls.get_error_map().keys()):
+                    logs.error.append(error_message.rstrip('\n%'))
+
+        # Look for error messages in general
+        for error_marker, error, in cls.get_error_map().items():
+            if re.search(fr'{error_marker}', stdout):
+                logs.error.append(error)
+
+        # Look for lines with warnings from the `warning_map`
+        for warning_marker, warning in cls.get_warning_map().items():
+            for warning_message in set(re.findall(fr'({warning_marker}.+)\n', stdout)):
+                if warning is not None:
+                    logs.warning.append(warning)
+                else:
+                    logs.warning.append(warning_message)
+
+        return parsed_data, logs
+
+    def _emit_logs(self, logging_dictionaries: AttributeDict, ignore: list = None) -> None:
         """Emit the messages in one or multiple "log dictionaries" through the logger of the parser.
 
         A log dictionary is expected to have the following structure: each key must correspond to a log level of the
@@ -50,7 +170,7 @@ def emit_logs(self, logging_dictionaries, ignore=None):
                     except AttributeError:
                         pass
 
-    def exit(self, exit_code):
+    def _exit(self, exit_code: ExitCode) -> ExitCode:
         """Log the exit message of the give exit code with level `ERROR` and return the exit code.
 
         This is a utility function if one wants to return from the parse method and automically add the exit message

diff --git a/src/aiida_quantumespresso/parsers/cp.py b/src/aiida_quantumespresso/parsers/cp.py
@@ -4,11 +4,11 @@
 from packaging.version import Version
 from qe_tools import CONSTANTS
 
-from .base import Parser
+from .base import BaseParser
 from .parse_raw.cp import parse_cp_raw_output, parse_cp_traj_stanzas
 
 
-class CpParser(Parser):
+class CpParser(BaseParser):
     """This class is the implementation of the Parser class for Cp."""
 
     def parse(self, **kwargs):
@@ -25,14 +25,14 @@ def parse(self, **kwargs):
         stdout_filename = self.node.base.attributes.get('output_filename')
         # at least the stdout should exist
         if stdout_filename not in list_of_files:
-            return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)
+            return self._exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)
 
         # This should match 1 file
         xml_files = [xml_file for xml_file in self.node.process_class.xml_filenames if xml_file in list_of_files]
         if not xml_files:
-            return self.exit(self.exit_codes.ERROR_MISSING_XML_FILE)
+            return self._exit(self.exit_codes.ERROR_MISSING_XML_FILE)
         elif len(xml_files) > 1:
-            return self.exit(self.exit_codes.ERROR_OUTPUT_XML_MULTIPLE)
+            return self._exit(self.exit_codes.ERROR_OUTPUT_XML_MULTIPLE)
 
         # cp.x can produce, depending on the particular version of the code, a file called `print_counter.xml` or
         # `print_counter`, which is a plain text file with the number of the last timestep written in the trajectory

diff --git a/src/aiida_quantumespresso/parsers/dos.py b/src/aiida_quantumespresso/parsers/dos.py
@@ -1,48 +1,34 @@
 # -*- coding: utf-8 -*-
+from aiida.common import AttributeDict
 from aiida.orm import Dict, XyData
 import numpy as np
 
 from aiida_quantumespresso.parsers import QEOutputParsingError
 from aiida_quantumespresso.parsers.parse_raw.base import parse_output_base
 
-from .base import Parser
+from .base import BaseParser, Parser
 
 
-class DosParser(Parser):
-    """This class is the implementation of the Parser class for Dos."""
+class DosParser(BaseParser):
+    """``Parser`` implementation for the ``DosCalculation`` calculation job class."""
 
     def parse(self, **kwargs):
-        """Parses the datafolder, stores results.
+        """Parse the retrieved files of a ``DosCalculation`` into output nodes."""
+        parsed_stdout, logs_stdout = self._parse_stdout_from_retrieved()
+        self._emit_logs(logs_stdout)
 
-        Retrieves dos output, and some basic information from the out_file, such as warnings and wall_time
-        """
-        retrieved = self.retrieved
+        self.out('output_parameters', Dict(parsed_stdout))
 
-        # Read standard out
-        try:
-            filename_stdout = self.node.get_option('output_filename')  # or get_attribute(), but this is clearer
-            with retrieved.base.repository.open(filename_stdout, 'r') as fil:
-                out_file = fil.readlines()
-        except OSError:
-            return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)
-
-        job_done = False
-        for i in range(len(out_file)):
-            line = out_file[-i]
-            if 'JOB DONE' in line:
-                job_done = True
-                break
-        if not job_done:
-            return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE)
-
-        # check that the dos file is present, if it is, read it
+        for exit_code in ['ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUTPUT_STDOUT_INCOMPLETE']:
+            if exit_code in logs_stdout.error:
+                return self._exit(self.exit_codes.get(exit_code))
+
+        # Parse the DOS
         try:
-            with retrieved.base.repository.open(self.node.process_class._DOS_FILENAME, 'r') as fil:
-                dos_file = fil.readlines()
+            with self.retrieved.base.repository.open(self.node.process_class._DOS_FILENAME, 'r') as handle:
+                dos_file = handle.readlines()
         except OSError:
-            return self.exit(self.exit_codes.ERROR_READING_DOS_FILE)
-
-        # end of initial checks
+            return self._exit(self.exit_codes.ERROR_READING_DOS_FILE)
 
         array_names = [[], []]
         array_units = [[], []]
@@ -79,11 +65,7 @@ def parse(self, **kwargs):
             y_units += ['states/eV']
         xy_data.set_y(y_arrays, y_names, y_units)
 
-        parsed_data, logs = parse_output_base(out_file, 'DOS')
-        self.emit_logs(logs)
-
         self.out('output_dos', xy_data)
-        self.out('output_parameters', Dict(parsed_data))
 
 
 def parse_raw_dos(dos_file, array_names, array_units):