diff --git a/src/aiida_quantumespresso/calculations/pp.py b/src/aiida_quantumespresso/calculations/pp.py index e0c1c0ca9..31cf09955 100644 --- a/src/aiida_quantumespresso/calculations/pp.py +++ b/src/aiida_quantumespresso/calculations/pp.py @@ -1,9 +1,11 @@ # -*- coding: utf-8 -*- """`CalcJob` implementation for the pp.x code of Quantum ESPRESSO.""" import os +import warnings from aiida import orm from aiida.common import datastructures, exceptions +from aiida.common.warnings import AiidaDeprecationWarning from aiida_quantumespresso.calculations import _lowercase_dict, _uppercase_dict from aiida_quantumespresso.utils.convert import convert_input_to_namelist_entry @@ -82,7 +84,9 @@ def define(cls, spec): spec.input('metadata.options.output_filename', valid_type=str, default=cls._DEFAULT_OUTPUT_FILE) spec.input('metadata.options.parser_name', valid_type=str, default='quantumespresso.pp') spec.input('metadata.options.withmpi', valid_type=bool, default=True) - spec.input('metadata.options.keep_plot_file', valid_type=bool, default=False) + spec.input('metadata.options.keep_plot_file', valid_type=bool, required=False) + spec.input('metadata.options.keep_data_files', valid_type=bool, default=False) + spec.input('metadata.options.parse_data_files', valid_type=bool, default=True) spec.output('output_parameters', valid_type=orm.Dict) spec.output('output_data', valid_type=orm.ArrayData) @@ -218,10 +222,16 @@ def prepare_for_submission(self, folder): # pylint: disable=too-many-branches,t # distinguish them from one another. The `fileout` filename will be the full data filename with the `fileout` # value as a suffix. retrieve_tuples = [self._FILEOUT, (f'{self._FILPLOT}_*{self._FILEOUT}', '.', 0)] - - if self.inputs.metadata.options.keep_plot_file: + if 'keep_plot_file' in self.inputs.metadata.options: + self.inputs.metadata.options.keep_data_files = self.inputs.metadata.options.keep_plot_file + warnings.warn( + "The input parameter 'keep_plot_file' is deprecated and will be removed in version 5.0.0. " + "Please use 'keep_data_files' instead.", AiidaDeprecationWarning + ) + if self.inputs.metadata.options.keep_data_files: calcinfo.retrieve_list.extend(retrieve_tuples) - else: + # If we do not want to parse the retrieved files, temporary retrieval is meaningless + elif self.inputs.metadata.options.parse_data_files: calcinfo.retrieve_temporary_list.extend(retrieve_tuples) return calcinfo diff --git a/src/aiida_quantumespresso/parsers/pp.py b/src/aiida_quantumespresso/parsers/pp.py index 6132eee27..d4e41881b 100644 --- a/src/aiida_quantumespresso/parsers/pp.py +++ b/src/aiida_quantumespresso/parsers/pp.py @@ -117,35 +117,35 @@ def get_key_from_filename(filename): matches = re.search(pattern, filename) return matches.group(1) - for filename in filenames: - # Directly parse the retrieved files after reading them to memory (`data_raw`). The raw data - # of each file is released from memory after parsing, to improve memory usage. - if filename.endswith(filename_suffix): - # Read the file to memory - try: - with file_opener(filename) as handle: - data_raw = handle.read() - except OSError: - return self.exit_codes.ERROR_OUTPUT_DATAFILE_READ.format(filename=filename) - # Parse the file - try: - key = get_key_from_filename(filename) - data_parsed.append((key, parsers[iflag](data_raw, self.units_dict[parsed_data['plot_num']]))) - del data_raw - except Exception as exception: # pylint: disable=broad-except - return self.exit_codes.ERROR_OUTPUT_DATAFILE_PARSE.format(filename=filename, exception=exception) - - # If we don't have any parsed files, we exit. Note that this will not catch the case where there should be more - # than one file, but the engine did not retrieve all of them. Since often we anyway don't know how many files - # should be retrieved there really is no way to check this explicitly. - if not data_parsed: - return self.exit_codes.ERROR_OUTPUT_DATAFILE_MISSING.format(filename=filename_prefix) - - # Create output nodes - if len(data_parsed) == 1: - self.out('output_data', data_parsed[0][1]) - else: - self.out('output_data_multiple', dict(data_parsed)) + if self.node.base.attributes.get('parse_data_files'): + for filename in filenames: + # Directly parse the retrieved files after reading them to memory (`data_raw`). The raw data + # of each file is released from memory after parsing, to improve memory usage. + if filename.endswith(filename_suffix): + # Read the file to memory + try: + with file_opener(filename) as handle: + data_raw = handle.read() + except OSError: + return self.exit_codes.ERROR_OUTPUT_DATAFILE_READ.format(filename=filename) + # Parse the file + try: + key = get_key_from_filename(filename) + data_parsed.append((key, parsers[iflag](data_raw, self.units_dict[parsed_data['plot_num']]))) + del data_raw + except Exception as exception: # pylint: disable=broad-except + return self.exit_codes.ERROR_OUTPUT_DATAFILE_PARSE.format(filename=filename, exception=exception) + + # If we don't have any parsed files, we exit. Note that this will not catch the case where there should be more + # than one file, but the engine did not retrieve all of them. Since often we anyway don't know how many files + # should be retrieved there really is no way to check this explicitly. + if not data_parsed: + return self.exit_codes.ERROR_OUTPUT_DATAFILE_MISSING.format(filename=filename_prefix) + + if len(data_parsed) == 1: + self.out('output_data', data_parsed[0][1]) + else: + self.out('output_data_multiple', dict(data_parsed)) return self.exit(logs=logs) diff --git a/tests/calculations/test_pp.py b/tests/calculations/test_pp.py index a837127d3..2f86b206f 100644 --- a/tests/calculations/test_pp.py +++ b/tests/calculations/test_pp.py @@ -60,11 +60,11 @@ def test_pp_default(fixture_sandbox, generate_calc_job, generate_inputs, file_re file_regression.check(input_written, encoding='utf-8', extension='.in') -def test_pp_keep_plot_file(fixture_sandbox, generate_calc_job, generate_inputs): +def test_pp_keep_data_files(fixture_sandbox, generate_calc_job, generate_inputs): """Test a `PpCalculation` where we want to retrieve the plot file.""" entry_point_name = 'quantumespresso.pp' inputs = generate_inputs() - inputs.metadata.options.keep_plot_file = True + inputs.metadata.options.keep_data_files = True calc_info = generate_calc_job(fixture_sandbox, entry_point_name, inputs) retrieve_list = ['aiida.out', 'aiida.fileout', ('aiida.filplot_*aiida.fileout', '.', 0)] @@ -80,6 +80,26 @@ def test_pp_keep_plot_file(fixture_sandbox, generate_calc_job, generate_inputs): assert element in calc_info.retrieve_list +def test_pp_parse_data_files(fixture_sandbox, generate_calc_job, generate_inputs): + """Test a `PpCalculation` where we want to retrieve the plot file.""" + entry_point_name = 'quantumespresso.pp' + inputs = generate_inputs() + inputs.metadata.options.parse_data_files = False + + calc_info = generate_calc_job(fixture_sandbox, entry_point_name, inputs) + retrieve_list = ['aiida.out'] + retrieve_temporary_list = [] + local_copy_list = [] + + # When both `keep_data_files` (default) and `parse_data_files` are set to False, the data files won't be pulled. + assert isinstance(calc_info, datastructures.CalcInfo) + assert sorted(calc_info.local_copy_list) == sorted(local_copy_list) + assert sorted(calc_info.retrieve_temporary_list) == sorted(retrieve_temporary_list) + assert len(calc_info.retrieve_list) == 1 + for element in retrieve_list: + assert element in calc_info.retrieve_list + + def test_pp_cmdline_setting(fixture_sandbox, generate_calc_job, generate_inputs): """Test a `PpCalculation` with user-defined cmdline settings.""" entry_point_name = 'quantumespresso.pp' diff --git a/tests/parsers/test_pp.py b/tests/parsers/test_pp.py index 4211651fd..44b5557c3 100644 --- a/tests/parsers/test_pp.py +++ b/tests/parsers/test_pp.py @@ -125,7 +125,11 @@ def test_pp_default_1d( entry_point_calc_job = 'quantumespresso.pp' entry_point_parser = 'quantumespresso.pp' - node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'default_1d', generate_inputs_1d) + attributes = {'keep_data_files': False, 'parse_data_files': True} + + node = generate_calc_job_node( + entry_point_calc_job, fixture_localhost, 'default_1d', generate_inputs_1d, attributes=attributes + ) parser = generate_parser(entry_point_parser) results, calcfunction = parser.parse_from_node(node, store_provenance=False) @@ -157,9 +161,13 @@ def test_pp_default_1d_spherical( """Test a default `pp.x` calculation producing a 1D data set with spherical averaging.""" entry_point_calc_job = 'quantumespresso.pp' entry_point_parser = 'quantumespresso.pp' - + attributes = {'keep_data_files': False, 'parse_data_files': True} node = generate_calc_job_node( - entry_point_calc_job, fixture_localhost, 'default_1d_spherical', generate_inputs_1d_spherical + entry_point_calc_job, + fixture_localhost, + 'default_1d_spherical', + generate_inputs_1d_spherical, + attributes=attributes ) parser = generate_parser(entry_point_parser) results, calcfunction = parser.parse_from_node(node, store_provenance=False) @@ -200,8 +208,11 @@ def test_pp_default_2d( """Test a default `pp.x` calculation producing a 2D data set.""" entry_point_calc_job = 'quantumespresso.pp' entry_point_parser = 'quantumespresso.pp' + attributes = {'keep_data_files': False, 'parse_data_files': True} - node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'default_2d', generate_inputs_2d) + node = generate_calc_job_node( + entry_point_calc_job, fixture_localhost, 'default_2d', generate_inputs_2d, attributes=attributes + ) parser = generate_parser(entry_point_parser) results, calcfunction = parser.parse_from_node(node, store_provenance=False) @@ -237,8 +248,11 @@ def test_pp_default_polar( """Test a default `pp.x` calculation producing a polar coordinates data set.""" entry_point_calc_job = 'quantumespresso.pp' entry_point_parser = 'quantumespresso.pp' + attributes = {'keep_data_files': False, 'parse_data_files': True} - node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'default_polar', generate_inputs_polar) + node = generate_calc_job_node( + entry_point_calc_job, fixture_localhost, 'default_polar', generate_inputs_polar, attributes=attributes + ) parser = generate_parser(entry_point_parser) results, calcfunction = parser.parse_from_node(node, store_provenance=False) @@ -267,8 +281,11 @@ def test_pp_default_3d( """Test a default `pp.x` calculation producing a 3D data set.""" entry_point_calc_job = 'quantumespresso.pp' entry_point_parser = 'quantumespresso.pp' + attributes = {'keep_data_files': False, 'parse_data_files': True} - node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'default_3d', generate_inputs_3d) + node = generate_calc_job_node( + entry_point_calc_job, fixture_localhost, 'default_3d', generate_inputs_3d, attributes=attributes + ) parser = generate_parser(entry_point_parser) results, calcfunction = parser.parse_from_node(node, store_provenance=False) @@ -297,12 +314,16 @@ def test_pp_default_3d( }) -def test_pp_default_3d_keep_plot_file(generate_calc_job_node, generate_parser, generate_inputs_3d, tmpdir): - """Test a `pp.x` calculation where `keep_plot_file=False` meaning files will be parsed from temporary directory.""" +def test_pp_default_3d_keep_data_files(generate_calc_job_node, generate_parser, generate_inputs_3d, tmpdir): + """Test a `pp.x` calculation where `keep_data_files=False` meaning files will be parsed from temporary directory.""" entry_point_calc_job = 'quantumespresso.pp' entry_point_parser = 'quantumespresso.pp' - attributes = {'options': {'keep_plot_file': False}, 'retrieve_temporary_list': ['aiida.fileout']} + attributes = { + 'keep_data_files': False, + 'parse_data_files': True, + 'retrieve_temporary_list': ['aiida.fileout'], + } node = generate_calc_job_node( entry_point_calc_job, test_name='default_3d', @@ -320,12 +341,36 @@ def test_pp_default_3d_keep_plot_file(generate_calc_job_node, generate_parser, g assert len(results['output_data'].get_arraynames()) == 4 +def test_pp_default_3d_parse_data_files(generate_calc_job_node, generate_parser, generate_inputs_3d, tmpdir): + """Test a `pp.x` calculation where `parse_data_files=False`, so data files won't be parsed.""" + entry_point_calc_job = 'quantumespresso.pp' + entry_point_parser = 'quantumespresso.pp' + + attributes = {'keep_data_files': False, 'parse_data_files': False} + node = generate_calc_job_node( + entry_point_calc_job, + test_name='default_3d', + inputs=generate_inputs_3d, + attributes=attributes, + ) + parser = generate_parser(entry_point_parser) + results, calcfunction = parser.parse_from_node(node, store_provenance=False, retrieved_temporary_folder=tmpdir) + + assert calcfunction.is_finished, calcfunction.exception + assert calcfunction.is_finished_ok, calcfunction.exit_message + assert 'output_parameters' in results + assert 'output_data' not in results + + def test_pp_default_3d_multiple(generate_calc_job_node, generate_parser, generate_inputs_3d): """Test a default `pp.x` calculation producing multiple files in 3D format.""" entry_point_calc_job = 'quantumespresso.pp' entry_point_parser = 'quantumespresso.pp' + attributes = {'keep_data_files': False, 'parse_data_files': True} - node = generate_calc_job_node(entry_point_calc_job, test_name='default_3d_multiple', inputs=generate_inputs_3d) + node = generate_calc_job_node( + entry_point_calc_job, test_name='default_3d_multiple', inputs=generate_inputs_3d, attributes=attributes + ) parser = generate_parser(entry_point_parser) results, calcfunction = parser.parse_from_node(node, store_provenance=False) @@ -364,9 +409,14 @@ def test_pp_default_3d_failed_missing_data( """Test a default `pp.x` calculation where the aiida.fileout file is missing.""" entry_point_calc_job = 'quantumespresso.pp' entry_point_parser = 'quantumespresso.pp' + attributes = {'keep_data_files': False, 'parse_data_files': True} node = generate_calc_job_node( - entry_point_calc_job, fixture_localhost, 'default_3d_failed_missing_data', generate_inputs_3d + entry_point_calc_job, + fixture_localhost, + 'default_3d_failed_missing_data', + generate_inputs_3d, + attributes=attributes ) parser = generate_parser(entry_point_parser) _, calcfunction = parser.parse_from_node(node, store_provenance=False) @@ -398,9 +448,10 @@ def test_pp_default_3d_failed_format(fixture_localhost, generate_calc_job_node, """Test a default `pp.x` calculation where an unsupported output file format is used.""" entry_point_calc_job = 'quantumespresso.pp' entry_point_parser = 'quantumespresso.pp' + attributes = {'keep_data_files': False, 'parse_data_files': True} node = generate_calc_job_node( - entry_point_calc_job, fixture_localhost, 'default_3d_failed_format', generate_inputs_3d + entry_point_calc_job, fixture_localhost, 'default_3d_failed_format', generate_inputs_3d, attributes=attributes ) parser = generate_parser(entry_point_parser) _, calcfunction = parser.parse_from_node(node, store_provenance=False)