diff --git a/.github/workflows/CI-test.yaml b/.github/workflows/CI-test.yaml index 1a38313..e7e4d32 100644 --- a/.github/workflows/CI-test.yaml +++ b/.github/workflows/CI-test.yaml @@ -31,7 +31,7 @@ jobs: if ${{ matrix.python-version == '3.12' }}; then pip install --upgrade setuptools; fi - name: Install package run: | - python -m pip install .[dev] + python -m pip install .[dev,fesom] - name: Test if data will work (Meta-Test) run: | export HDF5_DEBUG=1 diff --git a/doc/index.rst b/doc/index.rst index a934ebf..e197a52 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -22,6 +22,7 @@ Contents including_custom_steps including_subcommand_plugins pymorize_configuration + pymorize_aux_files developer_guide developer_setup API diff --git a/doc/pymorize_aux_files.rst b/doc/pymorize_aux_files.rst new file mode 100644 index 0000000..c7bdec4 --- /dev/null +++ b/doc/pymorize_aux_files.rst @@ -0,0 +1,45 @@ +================================== +``pymorize`` Using auxiliary files +================================== + +At times, your post-processing will require additional files beyond the actual data. +For example, say your are analyzing FESOM output, and need to know the computational mesh +in order to calculate transport across a particular edge. In Python, the common way to do this +is to use the ``pyfesom2`` library to load the mesh. For a ``Rule`` to be aware of the mesh, you +can use auxiliary files. + + +You can add additional files to your ``Rule`` objects by specifying them in the +``aux`` element of the rule. These files are loaded when the ``Rule`` object is +initialized, and can be accessed in your steps. + +For example, consider the following YAML configuration:: + + + rules: + - name: My First Rule + aux: + - name: My Aux Data + path: /path/to/aux/data.csv + + +You can then access this in a step like so:: + + def my_step(data, rule): + aux_data = rule.aux["My Aux Data"] + print(aux_data) + return data + +By default, the program assumes you just have a text file which you can +read in. However, you may also want to use something else. Here is how +you can include a FESOM mesh object representation in ``pyfesom2``:: + + + rules: + - name: My Other Rule + aux: + - name: mesh + path: /some/path/to/a/mesh + loader: pyfesom2.read_mesh_data.load_mesh + +In Python, you get back the already loaded mesh object. diff --git a/setup.py b/setup.py index 884985d..2f86336 100644 --- a/setup.py +++ b/setup.py @@ -81,6 +81,10 @@ def read(filename): "yamllint", ], "doc": docs_require, + "fesom": [ + # FIXME(PG): We should talk with Nikolay, this is not optimal... + "pyfesom2 @ git+https://github.com/fesom/pyfesom2.git@0.3.0", + ], }, entry_points={ "console_scripts": [ diff --git a/src/pymorize/aux_files.py b/src/pymorize/aux_files.py new file mode 100644 index 0000000..cb8178c --- /dev/null +++ b/src/pymorize/aux_files.py @@ -0,0 +1,116 @@ +""" +Auxiliary files that can be attached to a Rule +""" + +from .utils import get_callable + + +class AuxiliaryFile: + """ + A class to represent an auxiliary file. + + Attributes + ---------- + name : str + The name of the file. + path : str + The path to the file. + loader : callable, optional + A callable to load the file. + loader_args : list, optional + Arguments to pass to the loader. + loader_kwargs : dict, optional + Keyword arguments to pass to the loader. + + Methods + ------- + load(): + Loads the file using the specified loader or reads the file content. + from_dict(d): + Creates an AuxiliaryFile instance from a dictionary. + """ + + def __init__(self, name, path, loader=None, loader_args=None, loader_kwargs=None): + """ + Constructs all the necessary attributes for the AuxiliaryFile object. + + Parameters + ---------- + name : str + The name of the file. + path : str + The path to the file. + loader : callable, optional + A callable to load the file. + loader_args : list, optional + Arguments to pass to the loader. + loader_kwargs : dict, optional + Keyword arguments to pass to the loader. + """ + self.name = name + self.path = path + self.loader = loader + if loader_args is None: + loader_args = [] + self.loader_args = loader_args + if loader_kwargs is None: + loader_kwargs = {} + self.loader_kwargs = loader_kwargs + + def load(self): + """ + Loads the file using the specified loader or reads the file content. + + Returns + ------- + str + The content of the file if no loader is specified. + object + The result of the loader if a loader is specified. + """ + if self.loader is None: + with open(self.path, "r") as f: + return f.read() + else: + loader = get_callable(self.loader) + return loader(self.path, *self.loader_args, **self.loader_kwargs) + + @classmethod + def from_dict(cls, d): + """ + Creates an AuxiliaryFile instance from a dictionary. + + Parameters + ---------- + d : dict + A dictionary containing the attributes of the AuxiliaryFile. + + Returns + ------- + AuxiliaryFile + An instance of AuxiliaryFile. + """ + return cls( + d["name"], + d["path"], + d.get("loader"), + d.get("loader_args"), + d.get("loader_kwargs"), + ) + + +# NOTE(PG): Think about this...maybe it should be a method of Rule... +def attach_files_to_rule(rule): + """ + Attaches extra files to the rule + + Mutates + ------- + rule : + The Rule object is modified to include the loaded auxiliary files + """ + loaded_aux = {} + for aux_file_spec in rule.get("aux", []): + aux_file = AuxiliaryFile.from_dict(aux_file_spec) + loaded_aux[aux_file.name] = aux_file.load() + rule.aux = loaded_aux diff --git a/src/pymorize/cmorizer.py b/src/pymorize/cmorizer.py index ac888f4..6dc4ada 100644 --- a/src/pymorize/cmorizer.py +++ b/src/pymorize/cmorizer.py @@ -15,6 +15,7 @@ from prefect.futures import wait from rich.progress import track +from .aux_files import attach_files_to_rule from .cluster import ( CLUSTER_ADAPT_SUPPORT, CLUSTER_MAPPINGS, @@ -123,6 +124,7 @@ def __init__( self._post_init_create_data_request() self._post_init_populate_rules_with_tables() self._post_init_populate_rules_with_dimensionless_unit_mappings() + self._post_init_populate_rules_with_aux_files() self._post_init_populate_rules_with_data_request_variables() logger.debug("...post-init done!") ################################################################################ @@ -249,6 +251,11 @@ def _post_init_populate_rules_with_data_request_variables(self): self._rules_expand_drvs() self._rules_depluralize_drvs() + def _post_init_populate_rules_with_aux_files(self): + """Attaches auxiliary files to the rules""" + for rule in self.rules: + attach_files_to_rule(rule) + def _post_init_populate_rules_with_dimensionless_unit_mappings(self): """ Reads the dimensionless unit mappings from a configuration file and @@ -489,6 +496,7 @@ def from_dict(cls, data): instance._post_init_create_data_request() instance._post_init_populate_rules_with_data_request_variables() instance._post_init_populate_rules_with_dimensionless_unit_mappings() + instance._post_init_populate_rules_with_aux_files() logger.debug("Object creation done!") return instance diff --git a/tests/unit/test_aux_files.py b/tests/unit/test_aux_files.py new file mode 100644 index 0000000..e474451 --- /dev/null +++ b/tests/unit/test_aux_files.py @@ -0,0 +1,43 @@ +# import pytest +from pyfesom2.load_mesh_data import fesom_mesh + +from pymorize.aux_files import attach_files_to_rule + + +def test_aux_files_attach_without_aux(pi_uxarray_temp_rule): + rule = pi_uxarray_temp_rule + attach_files_to_rule(rule) + assert rule.aux == {} + + +def test_aux_files_attach_simple_file(pi_uxarray_temp_rule, tmp_path): + # Create a temporary file + temp_file = tmp_path / "temp_file.txt" + temp_file.write_text("Hello, pytest!") + + rule = pi_uxarray_temp_rule + rule.aux = [ + { + "name": "aux1", + "path": str(temp_file), + }, + ] + attach_files_to_rule(rule) + assert rule.aux == {"aux1": "Hello, pytest!"} + + +def test_aux_files_attach_fesom_mesh( + fesom_2p6_esmtools_temp_rule, fesom_2p6_pimesh_esm_tools_data +): + mesh = fesom_2p6_pimesh_esm_tools_data / "input/fesom/mesh/pi" + rule = fesom_2p6_esmtools_temp_rule + rule.aux = [ + { + "name": "mesh", + "path": str(mesh), + "loader": "pyfesom2.load_mesh_data.load_mesh", + }, + ] + attach_files_to_rule(rule) + print(f'PG DEBUG >>> {rule.aux["mesh"]}') + assert isinstance(rule.aux["mesh"], fesom_mesh)