Skip to content

Commit

Permalink
Add metadata attribute to kedro.io datasets (#2537)
Browse files Browse the repository at this point in the history
* Add metadata attribute to kedro.io datasets

Signed-off-by: Ahdra Merali <[email protected]>
  • Loading branch information
AhdraMeraliQB authored May 22, 2023
1 parent 38e030a commit dc14f24
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 1 deletion.
1 change: 1 addition & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

## Major features and improvements
* `kedro run --params` now updates interpolated parameters correctly when using `OmegaConfigLoader`.
* Added `metadata` attribute to `kedro.io` datasets. This is ignored by Kedro, but may be consumed by users or external plugins.

## Bug fixes and other changes
* `OmegaConfigLoader` will return a `dict` instead of `DictConfig`.
Expand Down
4 changes: 4 additions & 0 deletions kedro/io/cached_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(
dataset: AbstractDataSet | dict,
version: Version = None,
copy_mode: str = None,
metadata: dict[str, Any] = None,
):
"""Creates a new instance of ``CachedDataSet`` pointing to the
provided Python object.
Expand All @@ -52,6 +53,8 @@ def __init__(
copy_mode: The copy mode used to copy the data. Possible
values are: "deepcopy", "copy" and "assign". If not
provided, it is inferred based on the data type.
metadata: Any arbitrary metadata.
This is ignored by Kedro, but may be consumed by users or external plugins.
Raises:
ValueError: If the provided dataset is not a valid dict/YAML
Expand All @@ -67,6 +70,7 @@ def __init__(
"representation of the dataset, or the actual dataset object."
)
self._cache = MemoryDataSet(copy_mode=copy_mode)
self.metadata = metadata

def _release(self) -> None:
self._cache.release()
Expand Down
5 changes: 5 additions & 0 deletions kedro/io/lambda_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,14 @@ def _release(self) -> None:
else:
self.__release()

# pylint: disable=too-many-arguments
def __init__(
self,
load: Callable[[], Any] | None,
save: Callable[[Any], None] | None,
exists: Callable[[], bool] = None,
release: Callable[[], None] = None,
metadata: dict[str, Any] = None,
):
"""Creates a new instance of ``LambdaDataSet`` with references to the
required input/output data set methods.
Expand All @@ -91,6 +93,8 @@ def __init__(
save: Method to save data to a data set.
exists: Method to check whether output data already exists.
release: Method to release any cached information.
metadata: Any arbitrary metadata.
This is ignored by Kedro, but may be consumed by users or external plugins.
Raises:
DataSetError: If a method is specified, but is not a Callable.
Expand All @@ -113,3 +117,4 @@ def __init__(
self.__save = save
self.__exists = exists
self.__release = release
self.metadata = metadata
7 changes: 6 additions & 1 deletion kedro/io/memory_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ class MemoryDataSet(AbstractDataSet):
"""

def __init__(self, data: Any = _EMPTY, copy_mode: str = None):
def __init__(
self, data: Any = _EMPTY, copy_mode: str = None, metadata: dict[str, Any] = None
):
"""Creates a new instance of ``MemoryDataSet`` pointing to the
provided Python object.
Expand All @@ -43,9 +45,12 @@ def __init__(self, data: Any = _EMPTY, copy_mode: str = None):
copy_mode: The copy mode used to copy the data. Possible
values are: "deepcopy", "copy" and "assign". If not
provided, it is inferred based on the data type.
metadata: Any arbitrary metadata.
This is ignored by Kedro, but may be consumed by users or external plugins.
"""
self._data = _EMPTY
self._copy_mode = copy_mode
self.metadata = metadata
if data is not _EMPTY:
self._save(data)

Expand Down
8 changes: 8 additions & 0 deletions kedro/io/partitioned_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ def __init__( # pylint: disable=too-many-arguments
load_args: dict[str, Any] = None,
fs_args: dict[str, Any] = None,
overwrite: bool = False,
metadata: dict[str, Any] = None,
):
"""Creates a new instance of ``PartitionedDataSet``.
Expand Down Expand Up @@ -179,6 +180,8 @@ def __init__( # pylint: disable=too-many-arguments
fs_args: Extra arguments to pass into underlying filesystem class constructor
(e.g. `{"project": "my-project"}` for ``GCSFileSystem``)
overwrite: If True, any existing partitions will be removed.
metadata: Any arbitrary metadata.
This is ignored by Kedro, but may be consumed by users or external plugins.
Raises:
DataSetError: If versioning is enabled for the underlying dataset.
Expand All @@ -193,6 +196,7 @@ def __init__( # pylint: disable=too-many-arguments
self._overwrite = overwrite
self._protocol = infer_storage_options(self._path)["protocol"]
self._partition_cache: Cache = Cache(maxsize=1)
self.metadata = metadata

dataset = dataset if isinstance(dataset, dict) else {"type": dataset}
self._dataset_type, self._dataset_config = parse_dataset_definition(dataset)
Expand Down Expand Up @@ -383,6 +387,7 @@ def __init__(
credentials: dict[str, Any] = None,
load_args: dict[str, Any] = None,
fs_args: dict[str, Any] = None,
metadata: dict[str, Any] = None,
):

"""Creates a new instance of ``IncrementalDataSet``.
Expand Down Expand Up @@ -429,6 +434,8 @@ def __init__(
the filesystem implementation.
fs_args: Extra arguments to pass into underlying filesystem class constructor
(e.g. `{"project": "my-project"}` for ``GCSFileSystem``).
metadata: Any arbitrary metadata.
This is ignored by Kedro, but may be consumed by users or external plugins.
Raises:
DataSetError: If versioning is enabled for the underlying dataset.
Expand All @@ -446,6 +453,7 @@ def __init__(

self._checkpoint_config = self._parse_checkpoint_config(checkpoint)
self._force_checkpoint = self._checkpoint_config.pop("force_checkpoint", None)
self.metadata = metadata

comparison_func = self._checkpoint_config.pop("comparison_func", operator.gt)
if isinstance(comparison_func, str):
Expand Down

0 comments on commit dc14f24

Please sign in to comment.