From a92b911fe69016a30090f502af44cd8a5053cc2a Mon Sep 17 00:00:00 2001
From: Greg Vaslowski <7269272+Vaslo@users.noreply.github.com>
Date: Thu, 10 Oct 2024 06:03:44 -0400
Subject: [PATCH 01/19] Update index.md (#4221)

Fixed an erroneous link to the Get started with Kedro - Create your first data pipeline with Kedro video.  It was accidentally linked to the previous video.

Signed-off-by: Greg Vaslowski <7269272+Vaslo@users.noreply.github.com>
Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 docs/source/course/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/course/index.md b/docs/source/course/index.md
index 8b3f49c14f..7d9b9100e3 100644
--- a/docs/source/course/index.md
+++ b/docs/source/course/index.md
@@ -62,7 +62,7 @@ You don't need to register for the course and you can skip around the sections t
 1. [Set up the Kedro Data Catalog](https://www.youtube.com/watch?v=rl2cncGxyts)
 1. [Explore the spaceflights data](https://www.youtube.com/watch?v=bZD8N0yv3Fs)
 1. [Refactor your data processing code into functions](https://www.youtube.com/watch?v=VFcrvnnNas4)
-1. [Create your first data pipeline with Kedro](https://www.youtube.com/watch?v=VFcrvnnNas4)
+1. [Create your first data pipeline with Kedro](https://www.youtube.com/watch?v=3YeE_gvDCvw)
 1. [Assemble your nodes into a Kedro pipeline](https://www.youtube.com/watch?v=P__gFG1TmMo)
 1. [Run your Kedro pipeline](https://www.youtube.com/watch?v=sll_LhZE-p8)
 1. [Visualise your data pipeline with Kedro-Viz](https://www.youtube.com/watch?v=KWqSzbHgNW4)

From e863f1690e1ddfee6f959774affa933a69b08042 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 10 Oct 2024 11:49:16 +0100
Subject: [PATCH 02/19] Bump kedro-sphinx-theme from 2024.4.0 to 2024.10.0
 (#4216)

* Bump kedro-sphinx-theme from 2024.4.0 to 2024.10.0

Bumps [kedro-sphinx-theme](https://github.com/kedro-org/kedro-sphinx-theme) from 2024.4.0 to 2024.10.0.
- [Release notes](https://github.com/kedro-org/kedro-sphinx-theme/releases)
- [Commits](https://github.com/kedro-org/kedro-sphinx-theme/compare/v2024.4.0...v2024.10.0)

---
updated-dependencies:
- dependency-name: kedro-sphinx-theme
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* updated to 2024.10.2

* trigger_run

* trigger_run

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: L. R. Couto <57910428+lrcouto@users.noreply.github.com>
Co-authored-by: rashidakanchwala <rashida_kanchwala@mckinsey.com>
Co-authored-by: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com>
Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 81eb4e301a..6f8e44f7ff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -83,7 +83,7 @@ test = [
 docs = [
     "ipykernel>=5.3, <7.0",
     "Jinja2<3.2.0",
-    "kedro-sphinx-theme==2024.4.0",
+    "kedro-sphinx-theme==2024.10.2",
     "sphinx-notfound-page!=1.0.3",  # Required by kedro-sphinx-theme. 1.0.3 results in `AttributeError: 'tuple' object has no attribute 'default'`.
 ]
 jupyter = [

From 2ccba38e20009dd1055fd2a4fc56f3c7366d9e8e Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Thu, 10 Oct 2024 07:05:20 -0600
Subject: [PATCH 03/19] Replace all instances of "data set" with "dataset"
 (#4211)

Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>
Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 docs/source/data/data_catalog.md              |  2 +-
 docs/source/integrations/mlflow.md            |  2 +-
 .../nodes_and_pipelines/run_a_pipeline.md     |  6 +-
 .../tutorial/spaceflights_tutorial_faqs.md    |  6 +-
 .../conf/base/catalog.yml                     |  6 +-
 .../conf/local/credentials.yml                |  2 +-
 .../pipelines/data_engineering/nodes.py       |  2 +-
 kedro/io/__init__.py                          |  2 +-
 kedro/io/catalog_config_resolver.py           |  2 +-
 kedro/io/core.py                              | 54 ++++++------
 kedro/io/data_catalog.py                      | 88 +++++++++----------
 kedro/io/lambda_dataset.py                    | 14 +--
 kedro/io/memory_dataset.py                    |  2 +-
 kedro/pipeline/node.py                        |  4 +-
 kedro/pipeline/pipeline.py                    |  8 +-
 kedro/runner/parallel_runner.py               | 12 +--
 kedro/runner/sequential_runner.py             |  2 +-
 .../conf/base/catalog.yml                     |  2 +-
 .../conf/local/credentials.yml                |  2 +-
 tests/io/test_core.py                         |  8 +-
 tests/io/test_data_catalog.py                 | 52 +++++------
 tests/io/test_kedro_data_catalog.py           |  6 +-
 tests/io/test_lambda_dataset.py               |  6 +-
 tests/pipeline/test_pipeline_from_missing.py  |  2 +-
 24 files changed, 146 insertions(+), 146 deletions(-)

diff --git a/docs/source/data/data_catalog.md b/docs/source/data/data_catalog.md
index 568e66ee4f..3edb94632a 100644
--- a/docs/source/data/data_catalog.md
+++ b/docs/source/data/data_catalog.md
@@ -200,7 +200,7 @@ cars:
 
 In this example, `filepath` is used as the basis of a folder that stores versions of the `cars` dataset. Each time a new version is created by a pipeline run it is stored within `data/01_raw/company/cars.csv/<version>/cars.csv`, where `<version>` corresponds to a version string formatted as `YYYY-MM-DDThh.mm.ss.sssZ`.
 
-By default, `kedro run` loads the latest version of the dataset. However, you can also specify a particular versioned data set with `--load-version` flag as follows:
+By default, `kedro run` loads the latest version of the dataset. However, you can also specify a particular versioned dataset with `--load-version` flag as follows:
 
 ```bash
 kedro run --load-versions=cars:YYYY-MM-DDThh.mm.ss.sssZ
diff --git a/docs/source/integrations/mlflow.md b/docs/source/integrations/mlflow.md
index e2d06a0295..78d3df6c69 100644
--- a/docs/source/integrations/mlflow.md
+++ b/docs/source/integrations/mlflow.md
@@ -134,7 +134,7 @@ and you would be able to preview it in the MLflow web UI:
 ```
 
 :::{warning}
-If you get a `Failed while saving data to data set MlflowMatplotlibWriter` error,
+If you get a `Failed while saving data to dataset MlflowMatplotlibWriter` error,
 it's probably because you had already executed `kedro run` while the dataset was marked as `versioned: true`.
 The solution is to cleanup the old `data/08_reporting/dummy_confusion_matrix.png` directory.
 :::
diff --git a/docs/source/nodes_and_pipelines/run_a_pipeline.md b/docs/source/nodes_and_pipelines/run_a_pipeline.md
index 4eaa06c296..2bf1a99383 100644
--- a/docs/source/nodes_and_pipelines/run_a_pipeline.md
+++ b/docs/source/nodes_and_pipelines/run_a_pipeline.md
@@ -70,13 +70,13 @@ class DryRunner(AbstractRunner):
     """
 
     def create_default_dataset(self, ds_name: str) -> AbstractDataset:
-        """Factory method for creating the default data set for the runner.
+        """Factory method for creating the default dataset for the runner.
 
         Args:
-            ds_name: Name of the missing data set
+            ds_name: Name of the missing dataset
         Returns:
             An instance of an implementation of AbstractDataset to be used
-            for all unregistered data sets.
+            for all unregistered datasets.
 
         """
         return MemoryDataset()
diff --git a/docs/source/tutorial/spaceflights_tutorial_faqs.md b/docs/source/tutorial/spaceflights_tutorial_faqs.md
index ff09d0ae91..ab6d7b8020 100644
--- a/docs/source/tutorial/spaceflights_tutorial_faqs.md
+++ b/docs/source/tutorial/spaceflights_tutorial_faqs.md
@@ -7,11 +7,11 @@ If you can't find the answer you need here, [ask the Kedro community for help](h
 ## How do I resolve these common errors?
 
 ### Dataset errors
-#### DatasetError: Failed while loading data from data set
+#### DatasetError: Failed while loading data from dataset
 You're [testing whether Kedro can load the raw test data](./set_up_data.md#test-that-kedro-can-load-the-data) and see the following:
 
 ```python
-DatasetError: Failed while loading data from data set
+DatasetError: Failed while loading data from dataset
 CSVDataset(filepath=...).
 [Errno 2] No such file or directory: '.../companies.csv'
 ```
@@ -71,6 +71,6 @@ The above exception was the direct cause of the following exception:
 Traceback (most recent call last):
   ...
     raise DatasetError(message) from exc
-kedro.io.core.DatasetError: Failed while loading data from data set CSVDataset(filepath=data/03_primary/model_input_table.csv, save_args={'index': False}).
+kedro.io.core.DatasetError: Failed while loading data from dataset CSVDataset(filepath=data/03_primary/model_input_table.csv, save_args={'index': False}).
 [Errno 2] File b'data/03_primary/model_input_table.csv' does not exist: b'data/03_primary/model_input_table.csv'
 ```
diff --git a/features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/base/catalog.yml b/features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/base/catalog.yml
index 62280524bd..32da2376b3 100644
--- a/features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/base/catalog.yml	
+++ b/features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/base/catalog.yml	
@@ -1,11 +1,11 @@
-# Here you can define all your data sets by using simple YAML syntax.
+# Here you can define all your datasets by using simple YAML syntax.
 #
 # Documentation for this file format can be found in "The Data Catalog"
 # Link: https://docs.kedro.org/en/stable/data/data_catalog.html
 #
 # We support interacting with a variety of data stores including local file systems, cloud, network and HDFS
 #
-# An example data set definition can look as follows:
+# An example dataset definition can look as follows:
 #
 #bikes:
 #  type: pandas.CSVDataset
@@ -39,7 +39,7 @@
 # (transcoding), templating and a way to reuse arguments that are frequently repeated. See more here:
 # https://docs.kedro.org/en/stable/data/data_catalog.html
 #
-# This is a data set used by the "Hello World" example pipeline provided with the project
+# This is a dataset used by the "Hello World" example pipeline provided with the project
 # template. Please feel free to remove it once you remove the example pipeline.
 
 example_iris_data:
diff --git a/features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/local/credentials.yml b/features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/local/credentials.yml
index 7fce832f6a..753fe237ed 100644
--- a/features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/local/credentials.yml	
+++ b/features/steps/test_starter/{{ cookiecutter.repo_name }}/conf/local/credentials.yml	
@@ -1,4 +1,4 @@
-# Here you can define credentials for different data sets and environment.
+# Here you can define credentials for different datasets and environment.
 #
 #
 # Example:
diff --git a/features/steps/test_starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_engineering/nodes.py b/features/steps/test_starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_engineering/nodes.py
index 024ea394ed..c492614c33 100644
--- a/features/steps/test_starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_engineering/nodes.py	
+++ b/features/steps/test_starter/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_engineering/nodes.py	
@@ -11,7 +11,7 @@
 
 
 def split_data(data: pd.DataFrame, example_test_data_ratio: float) -> dict[str, Any]:
-    """Node for splitting the classical Iris data set into training and test
+    """Node for splitting the classical Iris dataset into training and test
     sets, each split into features and labels.
     The split ratio parameter is taken from conf/project/parameters.yml.
     The data and the parameters will be loaded and provided to your function
diff --git a/kedro/io/__init__.py b/kedro/io/__init__.py
index 9697e1bd35..6384fd6138 100644
--- a/kedro/io/__init__.py
+++ b/kedro/io/__init__.py
@@ -1,5 +1,5 @@
 """``kedro.io`` provides functionality to read and write to a
-number of data sets. At the core of the library is the ``AbstractDataset`` class.
+number of datasets. At the core of the library is the ``AbstractDataset`` class.
 """
 
 from __future__ import annotations
diff --git a/kedro/io/catalog_config_resolver.py b/kedro/io/catalog_config_resolver.py
index dc55d18b3c..f722bedb6e 100644
--- a/kedro/io/catalog_config_resolver.py
+++ b/kedro/io/catalog_config_resolver.py
@@ -90,7 +90,7 @@ def _fetch_credentials(credentials_name: str, credentials: dict[str, Any]) -> An
             The set of requested credentials.
 
         Raises:
-            KeyError: When a data set with the given name has not yet been
+            KeyError: When a dataset with the given name has not yet been
                 registered.
 
         """
diff --git a/kedro/io/core.py b/kedro/io/core.py
index 53b660835c..981e81ccd7 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -71,7 +71,7 @@ class DatasetError(Exception):
 
 class DatasetNotFoundError(DatasetError):
     """``DatasetNotFoundError`` raised by ``DataCatalog`` class in case of
-    trying to use a non-existing data set.
+    trying to use a non-existing dataset.
     """
 
     pass
@@ -79,7 +79,7 @@ class DatasetNotFoundError(DatasetError):
 
 class DatasetAlreadyExistsError(DatasetError):
     """``DatasetAlreadyExistsError`` raised by ``DataCatalog`` class in case
-    of trying to add a data set which already exists in the ``DataCatalog``.
+    of trying to add a dataset which already exists in the ``DataCatalog``.
     """
 
     pass
@@ -87,7 +87,7 @@ class DatasetAlreadyExistsError(DatasetError):
 
 class VersionNotFoundError(DatasetError):
     """``VersionNotFoundError`` raised by ``AbstractVersionedDataset`` implementations
-    in case of no load versions available for the data set.
+    in case of no load versions available for the dataset.
     """
 
     pass
@@ -98,9 +98,9 @@ class VersionNotFoundError(DatasetError):
 
 
 class AbstractDataset(abc.ABC, Generic[_DI, _DO]):
-    """``AbstractDataset`` is the base class for all data set implementations.
+    """``AbstractDataset`` is the base class for all dataset implementations.
 
-    All data set implementations should extend this abstract class
+    All dataset implementations should extend this abstract class
     and implement the methods marked as abstract.
     If a specific dataset implementation cannot be used in conjunction with
     the ``ParallelRunner``, such user-defined dataset should have the
@@ -156,23 +156,23 @@ def from_config(
         load_version: str | None = None,
         save_version: str | None = None,
     ) -> AbstractDataset:
-        """Create a data set instance using the configuration provided.
+        """Create a dataset instance using the configuration provided.
 
         Args:
             name: Data set name.
             config: Data set config dictionary.
             load_version: Version string to be used for ``load`` operation if
-                the data set is versioned. Has no effect on the data set
+                the dataset is versioned. Has no effect on the dataset
                 if versioning was not enabled.
             save_version: Version string to be used for ``save`` operation if
-                the data set is versioned. Has no effect on the data set
+                the dataset is versioned. Has no effect on the dataset
                 if versioning was not enabled.
 
         Returns:
             An instance of an ``AbstractDataset`` subclass.
 
         Raises:
-            DatasetError: When the function fails to create the data set
+            DatasetError: When the function fails to create the dataset
                 from its config.
 
         """
@@ -245,9 +245,9 @@ def load(self: Self) -> _DO:
             except DatasetError:
                 raise
             except Exception as exc:
-                # This exception handling is by design as the composed data sets
+                # This exception handling is by design as the composed datasets
                 # can throw any type of exception.
-                message = f"Failed while loading data from data set {self!s}.\n{exc!s}"
+                message = f"Failed while loading data from dataset {self!s}.\n{exc!s}"
                 raise DatasetError(message) from exc
 
         load.__annotations__["return"] = load_func.__annotations__.get("return")
@@ -271,7 +271,7 @@ def save(self: Self, data: _DI) -> None:
             except (DatasetError, FileNotFoundError, NotADirectoryError):
                 raise
             except Exception as exc:
-                message = f"Failed while saving data to data set {self!s}.\n{exc!s}"
+                message = f"Failed while saving data to dataset {self!s}.\n{exc!s}"
                 raise DatasetError(message) from exc
 
         save.__annotations__["data"] = save_func.__annotations__.get("data", Any)
@@ -377,7 +377,7 @@ def _describe(self) -> dict[str, Any]:
         )
 
     def exists(self) -> bool:
-        """Checks whether a data set's output already exists by calling
+        """Checks whether a dataset's output already exists by calling
         the provided _exists() method.
 
         Returns:
@@ -391,7 +391,7 @@ def exists(self) -> bool:
             self._logger.debug("Checking whether target of %s exists", str(self))
             return self._exists()
         except Exception as exc:
-            message = f"Failed during exists check for data set {self!s}.\n{exc!s}"
+            message = f"Failed during exists check for dataset {self!s}.\n{exc!s}"
             raise DatasetError(message) from exc
 
     def _exists(self) -> bool:
@@ -412,7 +412,7 @@ def release(self) -> None:
             self._logger.debug("Releasing %s", str(self))
             self._release()
         except Exception as exc:
-            message = f"Failed during release for data set {self!s}.\n{exc!s}"
+            message = f"Failed during release for dataset {self!s}.\n{exc!s}"
             raise DatasetError(message) from exc
 
     def _release(self) -> None:
@@ -438,7 +438,7 @@ def generate_timestamp() -> str:
 
 class Version(namedtuple("Version", ["load", "save"])):
     """This namedtuple is used to provide load and save versions for versioned
-    data sets. If ``Version.load`` is None, then the latest available version
+    datasets. If ``Version.load`` is None, then the latest available version
     is loaded. If ``Version.save`` is None, then save version is formatted as
     YYYY-MM-DDThh.mm.ss.sssZ of the current timestamp.
     """
@@ -450,7 +450,7 @@ class Version(namedtuple("Version", ["load", "save"])):
     "Save version '{}' did not match load version '{}' for {}. This is strongly "
     "discouraged due to inconsistencies it may cause between 'save' and "
     "'load' operations. Please refrain from setting exact load version for "
-    "intermediate data sets where possible to avoid this warning."
+    "intermediate datasets where possible to avoid this warning."
 )
 
 _DEFAULT_PACKAGES = ["kedro.io.", "kedro_datasets.", ""]
@@ -467,10 +467,10 @@ def parse_dataset_definition(
         config: Data set config dictionary. It *must* contain the `type` key
             with fully qualified class name or the class object.
         load_version: Version string to be used for ``load`` operation if
-            the data set is versioned. Has no effect on the data set
+            the dataset is versioned. Has no effect on the dataset
             if versioning was not enabled.
         save_version: Version string to be used for ``save`` operation if
-            the data set is versioned. Has no effect on the data set
+            the dataset is versioned. Has no effect on the dataset
             if versioning was not enabled.
 
     Raises:
@@ -522,14 +522,14 @@ def parse_dataset_definition(
     if not issubclass(class_obj, AbstractDataset):
         raise DatasetError(
             f"Dataset type '{class_obj.__module__}.{class_obj.__qualname__}' "
-            f"is invalid: all data set types must extend 'AbstractDataset'."
+            f"is invalid: all dataset types must extend 'AbstractDataset'."
         )
 
     if VERSION_KEY in config:
         # remove "version" key so that it's not passed
-        # to the "unversioned" data set constructor
+        # to the "unversioned" dataset constructor
         message = (
-            "'%s' attribute removed from data set configuration since it is a "
+            "'%s' attribute removed from dataset configuration since it is a "
             "reserved word and cannot be directly specified"
         )
         logging.getLogger(__name__).warning(message, VERSION_KEY)
@@ -579,10 +579,10 @@ def _local_exists(local_filepath: str) -> bool:  # SKIP_IF_NO_SPARK
 
 class AbstractVersionedDataset(AbstractDataset[_DI, _DO], abc.ABC):
     """
-    ``AbstractVersionedDataset`` is the base class for all versioned data set
+    ``AbstractVersionedDataset`` is the base class for all versioned dataset
     implementations.
 
-    All data sets that implement versioning should extend this
+    All datasets that implement versioning should extend this
     abstract class and implement the methods marked as abstract.
 
     Example:
@@ -764,7 +764,7 @@ def save(self: Self, data: _DI) -> None:
         return save
 
     def exists(self) -> bool:
-        """Checks whether a data set's output already exists by calling
+        """Checks whether a dataset's output already exists by calling
         the provided _exists() method.
 
         Returns:
@@ -780,7 +780,7 @@ def exists(self) -> bool:
         except VersionNotFoundError:
             return False
         except Exception as exc:  # SKIP_IF_NO_SPARK
-            message = f"Failed during exists check for data set {self!s}.\n{exc!s}"
+            message = f"Failed during exists check for dataset {self!s}.\n{exc!s}"
             raise DatasetError(message) from exc
 
     def _release(self) -> None:
@@ -938,7 +938,7 @@ def add_feed_dict(self, datasets: dict[str, Any], replace: bool = False) -> None
         ...
 
     def exists(self, name: str) -> bool:
-        """Checks whether registered data set exists by calling its `exists()` method."""
+        """Checks whether registered dataset exists by calling its `exists()` method."""
         ...
 
     def release(self, name: str) -> None:
diff --git a/kedro/io/data_catalog.py b/kedro/io/data_catalog.py
index a010f3e852..6f9a678272 100644
--- a/kedro/io/data_catalog.py
+++ b/kedro/io/data_catalog.py
@@ -2,7 +2,7 @@
 provide ``load`` and ``save`` capabilities from anywhere in the program. To
 use a ``DataCatalog``, you need to instantiate it with a dictionary of data
 sets. Then it will act as a single point of reference for your calls,
-relaying load and save functions to the underlying data sets.
+relaying load and save functions to the underlying datasets.
 """
 
 from __future__ import annotations
@@ -35,10 +35,10 @@
 
 
 def _sub_nonword_chars(dataset_name: str) -> str:
-    """Replace non-word characters in data set names since Kedro 0.16.2.
+    """Replace non-word characters in dataset names since Kedro 0.16.2.
 
     Args:
-        dataset_name: The data set name registered in the data catalog.
+        dataset_name: The dataset name registered in the data catalog.
 
     Returns:
         The name used in `DataCatalog.datasets`.
@@ -102,9 +102,9 @@ class DataCatalog:
     """``DataCatalog`` stores instances of ``AbstractDataset`` implementations
     to provide ``load`` and ``save`` capabilities from anywhere in the
     program. To use a ``DataCatalog``, you need to instantiate it with
-    a dictionary of data sets. Then it will act as a single point of reference
+    a dictionary of datasets. Then it will act as a single point of reference
     for your calls, relaying load and save functions
-    to the underlying data sets.
+    to the underlying datasets.
     """
 
     def __init__(  # noqa: PLR0913
@@ -120,15 +120,15 @@ def __init__(  # noqa: PLR0913
         """``DataCatalog`` stores instances of ``AbstractDataset``
         implementations to provide ``load`` and ``save`` capabilities from
         anywhere in the program. To use a ``DataCatalog``, you need to
-        instantiate it with a dictionary of data sets. Then it will act as a
+        instantiate it with a dictionary of datasets. Then it will act as a
         single point of reference for your calls, relaying load and save
-        functions to the underlying data sets.
+        functions to the underlying datasets.
 
         Args:
-            datasets: A dictionary of data set names and data set instances.
+            datasets: A dictionary of dataset names and dataset instances.
             feed_dict: A feed dict with data to be added in memory.
-            dataset_patterns: A dictionary of data set factory patterns
-                and corresponding data set configuration. When fetched from catalog configuration
+            dataset_patterns: A dictionary of dataset factory patterns
+                and corresponding dataset configuration. When fetched from catalog configuration
                 these patterns will be sorted by:
                 1. Decreasing specificity (number of characters outside the curly brackets)
                 2. Decreasing number of placeholders (number of curly bracket pairs)
@@ -137,10 +137,10 @@ def __init__(  # noqa: PLR0913
                 pattern provided through the runners if it comes before "default" in the alphabet.
                 Such an overwriting pattern will emit a warning. The `"{default}"` name will
                 not emit a warning.
-            load_versions: A mapping between data set names and versions
-                to load. Has no effect on data sets without enabled versioning.
+            load_versions: A mapping between dataset names and versions
+                to load. Has no effect on datasets without enabled versioning.
             save_version: Version string to be used for ``save`` operations
-                by all data sets with enabled versioning. It must: a) be a
+                by all datasets with enabled versioning. It must: a) be a
                 case-insensitive string that conforms with operating system
                 filename limitations, b) always return the latest version when
                 sorted in lexicographical order.
@@ -216,28 +216,28 @@ def from_config(
         ``DataCatalog`` with configuration parsed from configuration files.
 
         Args:
-            catalog: A dictionary whose keys are the data set names and
+            catalog: A dictionary whose keys are the dataset names and
                 the values are dictionaries with the constructor arguments
-                for classes implementing ``AbstractDataset``. The data set
+                for classes implementing ``AbstractDataset``. The dataset
                 class to be loaded is specified with the key ``type`` and their
-                fully qualified class name. All ``kedro.io`` data set can be
+                fully qualified class name. All ``kedro.io`` dataset can be
                 specified by their class name only, i.e. their module name
                 can be omitted.
             credentials: A dictionary containing credentials for different
-                data sets. Use the ``credentials`` key in a ``AbstractDataset``
+                datasets. Use the ``credentials`` key in a ``AbstractDataset``
                 to refer to the appropriate credentials as shown in the example
                 below.
             load_versions: A mapping between dataset names and versions
-                to load. Has no effect on data sets without enabled versioning.
+                to load. Has no effect on datasets without enabled versioning.
             save_version: Version string to be used for ``save`` operations
-                by all data sets with enabled versioning. It must: a) be a
+                by all datasets with enabled versioning. It must: a) be a
                 case-insensitive string that conforms with operating system
                 filename limitations, b) always return the latest version when
                 sorted in lexicographical order.
 
         Returns:
             An instantiated ``DataCatalog`` containing all specified
-            data sets, created and ready to use.
+            datasets, created and ready to use.
 
         Raises:
             DatasetError: When the method fails to create any of the data
@@ -356,10 +356,10 @@ def _get_dataset(
         return dataset
 
     def load(self, name: str, version: str | None = None) -> Any:
-        """Loads a registered data set.
+        """Loads a registered dataset.
 
         Args:
-            name: A data set to be loaded.
+            name: A dataset to be loaded.
             version: Optional argument for concrete data version to be loaded.
                 Works only with versioned datasets.
 
@@ -367,7 +367,7 @@ def load(self, name: str, version: str | None = None) -> Any:
             The loaded data as configured.
 
         Raises:
-            DatasetNotFoundError: When a data set with the given name
+            DatasetNotFoundError: When a dataset with the given name
                 has not yet been registered.
 
         Example:
@@ -398,15 +398,15 @@ def load(self, name: str, version: str | None = None) -> Any:
         return result
 
     def save(self, name: str, data: Any) -> None:
-        """Save data to a registered data set.
+        """Save data to a registered dataset.
 
         Args:
-            name: A data set to be saved to.
+            name: A dataset to be saved to.
             data: A data object to be saved as configured in the registered
-                data set.
+                dataset.
 
         Raises:
-            DatasetNotFoundError: When a data set with the given name
+            DatasetNotFoundError: When a dataset with the given name
                 has not yet been registered.
 
         Example:
@@ -438,15 +438,15 @@ def save(self, name: str, data: Any) -> None:
         dataset.save(data)
 
     def exists(self, name: str) -> bool:
-        """Checks whether registered data set exists by calling its `exists()`
+        """Checks whether registered dataset exists by calling its `exists()`
         method. Raises a warning and returns False if `exists()` is not
         implemented.
 
         Args:
-            name: A data set to be checked.
+            name: A dataset to be checked.
 
         Returns:
-            Whether the data set output exists.
+            Whether the dataset output exists.
 
         """
         try:
@@ -456,13 +456,13 @@ def exists(self, name: str) -> bool:
         return dataset.exists()
 
     def release(self, name: str) -> None:
-        """Release any cached data associated with a data set
+        """Release any cached data associated with a dataset
 
         Args:
-            name: A data set to be checked.
+            name: A dataset to be checked.
 
         Raises:
-            DatasetNotFoundError: When a data set with the given name
+            DatasetNotFoundError: When a dataset with the given name
                 has not yet been registered.
         """
         dataset = self._get_dataset(name)
@@ -477,15 +477,15 @@ def add(
         """Adds a new ``AbstractDataset`` object to the ``DataCatalog``.
 
         Args:
-            dataset_name: A unique data set name which has not been
+            dataset_name: A unique dataset name which has not been
                 registered yet.
-            dataset: A data set object to be associated with the given data
+            dataset: A dataset object to be associated with the given data
                 set name.
             replace: Specifies whether to replace an existing dataset
                 with the same name is allowed.
 
         Raises:
-            DatasetAlreadyExistsError: When a data set with the same name
+            DatasetAlreadyExistsError: When a dataset with the same name
                 has already been registered.
 
         Example:
@@ -514,7 +514,7 @@ def add_all(
         datasets: dict[str, AbstractDataset],
         replace: bool = False,
     ) -> None:
-        """Adds a group of new data sets to the ``DataCatalog``.
+        """Adds a group of new datasets to the ``DataCatalog``.
 
         Args:
             datasets: A dictionary of dataset names and dataset
@@ -523,7 +523,7 @@ def add_all(
                 with the same name is allowed.
 
         Raises:
-            DatasetAlreadyExistsError: When a data set with the same name
+            DatasetAlreadyExistsError: When a dataset with the same name
                 has already been registered.
 
         Example:
@@ -597,10 +597,10 @@ def list(self, regex_search: str | None = None) -> list[str]:
 
         Args:
             regex_search: An optional regular expression which can be provided
-                to limit the data sets returned by a particular pattern.
+                to limit the datasets returned by a particular pattern.
         Returns:
             A list of dataset names available which match the
-            `regex_search` criteria (if provided). All data set names are returned
+            `regex_search` criteria (if provided). All dataset names are returned
             by default.
 
         Raises:
@@ -610,11 +610,11 @@ def list(self, regex_search: str | None = None) -> list[str]:
         ::
 
             >>> catalog = DataCatalog()
-            >>> # get data sets where the substring 'raw' is present
+            >>> # get datasets where the substring 'raw' is present
             >>> raw_data = catalog.list(regex_search='raw')
-            >>> # get data sets which start with 'prm' or 'feat'
+            >>> # get datasets which start with 'prm' or 'feat'
             >>> feat_eng_data = catalog.list(regex_search='^(prm|feat)')
-            >>> # get data sets which end with 'time_series'
+            >>> # get datasets which end with 'time_series'
             >>> models = catalog.list(regex_search='.+time_series$')
         """
 
@@ -622,7 +622,7 @@ def list(self, regex_search: str | None = None) -> list[str]:
             return list(self._datasets.keys())
 
         if not regex_search.strip():
-            self._logger.warning("The empty string will not match any data sets")
+            self._logger.warning("The empty string will not match any datasets")
             return []
 
         try:
diff --git a/kedro/io/lambda_dataset.py b/kedro/io/lambda_dataset.py
index 043bb67737..d120f74ed2 100644
--- a/kedro/io/lambda_dataset.py
+++ b/kedro/io/lambda_dataset.py
@@ -11,11 +11,11 @@
 
 
 class LambdaDataset(AbstractDataset):
-    """``LambdaDataset`` loads and saves data to a data set.
+    """``LambdaDataset`` loads and saves data to a dataset.
     It relies on delegating to specific implementation such as csv, sql, etc.
 
     ``LambdaDataset`` class captures Exceptions while performing operations on
-    composed ``Dataset`` implementations. The composed data set is
+    composed ``Dataset`` implementations. The composed dataset is
     responsible for providing information on how to resolve the issue when
     possible. This information should be available through str(error).
 
@@ -53,7 +53,7 @@ def _to_str(func: Any) -> str | None:
     def _load(self) -> Any:
         if not self.__load:
             raise DatasetError(
-                "Cannot load data set. No 'load' function "
+                "Cannot load dataset. No 'load' function "
                 "provided when LambdaDataset was created."
             )
         return self.__load()
@@ -61,7 +61,7 @@ def _load(self) -> Any:
     def _save(self, data: Any) -> None:
         if not self.__save:
             raise DatasetError(
-                "Cannot save to data set. No 'save' function "
+                "Cannot save to dataset. No 'save' function "
                 "provided when LambdaDataset was created."
             )
         self.__save(data)
@@ -86,11 +86,11 @@ def __init__(
         metadata: dict[str, Any] | None = None,
     ):
         """Creates a new instance of ``LambdaDataset`` with references to the
-        required input/output data set methods.
+        required input/output dataset methods.
 
         Args:
-            load: Method to load data from a data set.
-            save: Method to save data to a data set.
+            load: Method to load data from a dataset.
+            save: Method to save data to a dataset.
             exists: Method to check whether output data already exists.
             release: Method to release any cached information.
             metadata: Any arbitrary metadata.
diff --git a/kedro/io/memory_dataset.py b/kedro/io/memory_dataset.py
index 1b4bb8a371..1e8eef8452 100644
--- a/kedro/io/memory_dataset.py
+++ b/kedro/io/memory_dataset.py
@@ -1,4 +1,4 @@
-"""``MemoryDataset`` is a data set implementation which handles in-memory data."""
+"""``MemoryDataset`` is a dataset implementation which handles in-memory data."""
 
 from __future__ import annotations
 
diff --git a/kedro/pipeline/node.py b/kedro/pipeline/node.py
index b382bee8cf..a303546279 100644
--- a/kedro/pipeline/node.py
+++ b/kedro/pipeline/node.py
@@ -59,7 +59,7 @@ def __init__(  # noqa: PLR0913
                 contain only letters, digits, hyphens, underscores and/or fullstops.
             confirms: Optional name or the list of the names of the datasets
                 that should be confirmed. This will result in calling
-                ``confirm()`` method of the corresponding data set instance.
+                ``confirm()`` method of the corresponding dataset instance.
                 Specified dataset names do not necessarily need to be present
                 in the node ``inputs`` or ``outputs``.
             namespace: Optional node namespace.
@@ -601,7 +601,7 @@ def node(  # noqa: PLR0913
         tags: Optional set of tags to be applied to the node.
         confirms: Optional name or the list of the names of the datasets
             that should be confirmed. This will result in calling ``confirm()``
-            method of the corresponding data set instance. Specified dataset
+            method of the corresponding dataset instance. Specified dataset
             names do not necessarily need to be present in the node ``inputs``
             or ``outputs``.
         namespace: Optional node namespace.
diff --git a/kedro/pipeline/pipeline.py b/kedro/pipeline/pipeline.py
index ab7365a154..749eea8548 100644
--- a/kedro/pipeline/pipeline.py
+++ b/kedro/pipeline/pipeline.py
@@ -93,8 +93,8 @@ def __init__(
             >>> from kedro.pipeline import node
             >>>
             >>> # In the following scenario first_ds and second_ds
-            >>> # are data sets provided by io. Pipeline will pass these
-            >>> # data sets to first_node function and provides the result
+            >>> # are datasets provided by io. Pipeline will pass these
+            >>> # datasets to first_node function and provides the result
             >>> # to the second_node as input.
             >>>
             >>> def first_node(first_ds, second_ds):
@@ -247,11 +247,11 @@ def outputs(self) -> set[str]:
         return self._remove_intermediates(self.all_outputs())
 
     def datasets(self) -> set[str]:
-        """The names of all data sets used by the ``Pipeline``,
+        """The names of all datasets used by the ``Pipeline``,
         including inputs and outputs.
 
         Returns:
-            The set of all pipeline data sets.
+            The set of all pipeline datasets.
 
         """
         return self.all_outputs() | self.all_inputs()
diff --git a/kedro/runner/parallel_runner.py b/kedro/runner/parallel_runner.py
index 7626bf8679..4bbcdc9ec5 100644
--- a/kedro/runner/parallel_runner.py
+++ b/kedro/runner/parallel_runner.py
@@ -43,7 +43,7 @@
 
 class ParallelRunnerManager(SyncManager):
     """``ParallelRunnerManager`` is used to create shared ``MemoryDataset``
-    objects as default data sets in a pipeline.
+    objects as default datasets in a pipeline.
     """
 
 
@@ -171,8 +171,8 @@ def _validate_nodes(cls, nodes: Iterable[Node]) -> None:
 
     @classmethod
     def _validate_catalog(cls, catalog: CatalogProtocol, pipeline: Pipeline) -> None:
-        """Ensure that all data sets are serialisable and that we do not have
-        any non proxied memory data sets being used as outputs as their content
+        """Ensure that all datasets are serialisable and that we do not have
+        any non proxied memory datasets being used as outputs as their content
         will not be synchronized across threads.
         """
 
@@ -190,9 +190,9 @@ def _validate_catalog(cls, catalog: CatalogProtocol, pipeline: Pipeline) -> None
 
         if unserialisable:
             raise AttributeError(
-                f"The following data sets cannot be used with multiprocessing: "
+                f"The following datasets cannot be used with multiprocessing: "
                 f"{sorted(unserialisable)}\nIn order to utilize multiprocessing you "
-                f"need to make sure all data sets are serialisable, i.e. data sets "
+                f"need to make sure all datasets are serialisable, i.e. datasets "
                 f"should not make use of lambda functions, nested functions, closures "
                 f"etc.\nIf you are using custom decorators ensure they are correctly "
                 f"decorated using functools.wraps()."
@@ -209,7 +209,7 @@ def _validate_catalog(cls, catalog: CatalogProtocol, pipeline: Pipeline) -> None
 
         if memory_datasets:
             raise AttributeError(
-                f"The following data sets are memory data sets: "
+                f"The following datasets are memory datasets: "
                 f"{sorted(memory_datasets)}\n"
                 f"ParallelRunner does not support output to externally created "
                 f"MemoryDatasets"
diff --git a/kedro/runner/sequential_runner.py b/kedro/runner/sequential_runner.py
index c888e737cf..57a7aef17f 100644
--- a/kedro/runner/sequential_runner.py
+++ b/kedro/runner/sequential_runner.py
@@ -81,7 +81,7 @@ def _run(
                 self._suggest_resume_scenario(pipeline, done_nodes, catalog)
                 raise
 
-            # decrement load counts and release any data sets we've finished with
+            # decrement load counts and release any datasets we've finished with
             for dataset in node.inputs:
                 load_counts[dataset] -= 1
                 if load_counts[dataset] < 1 and dataset not in pipeline.inputs():
diff --git a/kedro/templates/project/{{ cookiecutter.repo_name }}/conf/base/catalog.yml b/kedro/templates/project/{{ cookiecutter.repo_name }}/conf/base/catalog.yml
index be73adae2a..789fc96fd1 100644
--- a/kedro/templates/project/{{ cookiecutter.repo_name }}/conf/base/catalog.yml	
+++ b/kedro/templates/project/{{ cookiecutter.repo_name }}/conf/base/catalog.yml	
@@ -1,4 +1,4 @@
-# Here you can define all your data sets by using simple YAML syntax.
+# Here you can define all your datasets by using simple YAML syntax.
 #
 # Documentation for this file format can be found in "The Data Catalog"
 # Link: https://docs.kedro.org/en/stable/data/data_catalog.html
diff --git a/kedro/templates/project/{{ cookiecutter.repo_name }}/conf/local/credentials.yml b/kedro/templates/project/{{ cookiecutter.repo_name }}/conf/local/credentials.yml
index b2db154dbc..b9a9cea667 100644
--- a/kedro/templates/project/{{ cookiecutter.repo_name }}/conf/local/credentials.yml	
+++ b/kedro/templates/project/{{ cookiecutter.repo_name }}/conf/local/credentials.yml	
@@ -1,4 +1,4 @@
-# Here you can define credentials for different data sets and environment.
+# Here you can define credentials for different datasets and environment.
 #
 #
 # Example:
diff --git a/tests/io/test_core.py b/tests/io/test_core.py
index 4128ad6da2..286a7142fd 100644
--- a/tests/io/test_core.py
+++ b/tests/io/test_core.py
@@ -359,7 +359,7 @@ def test_version_str_repr(self, load_version, save_version):
 
     def test_save_and_load(self, my_versioned_dataset, dummy_data):
         """Test that saved and reloaded data matches the original one for
-        the versioned data set."""
+        the versioned dataset."""
         my_versioned_dataset.save(dummy_data)
         reloaded = my_versioned_dataset.load()
         assert dummy_data == reloaded
@@ -398,14 +398,14 @@ def test_exists_general_exception(self):
             my_other_versioned_dataset.exists()
 
     def test_exists(self, my_versioned_dataset, dummy_data):
-        """Test `exists` method invocation for versioned data set."""
+        """Test `exists` method invocation for versioned dataset."""
         assert not my_versioned_dataset.exists()
         my_versioned_dataset.save(dummy_data)
         assert my_versioned_dataset.exists()
         shutil.rmtree(my_versioned_dataset._filepath)
 
     def test_prevent_overwrite(self, my_versioned_dataset, dummy_data):
-        """Check the error when attempting to override the data set if the
+        """Check the error when attempting to override the dataset if the
         corresponding json file for a given save version already exists."""
         my_versioned_dataset.save(dummy_data)
         pattern = (
@@ -550,7 +550,7 @@ def test_saving_none(self, my_legacy_dataset):
             my_legacy_dataset.save(None)
 
     def test_saving_invalid_data(self, my_legacy_dataset, dummy_data):
-        pattern = r"Failed while saving data to data set"
+        pattern = r"Failed while saving data to dataset"
         with pytest.raises(DatasetError, match=pattern):
             my_legacy_dataset.save(pd.DataFrame())
 
diff --git a/tests/io/test_data_catalog.py b/tests/io/test_data_catalog.py
index 54cbdf340d..bbaf6e8c6b 100644
--- a/tests/io/test_data_catalog.py
+++ b/tests/io/test_data_catalog.py
@@ -168,14 +168,14 @@ def data_catalog_from_config(correct_config):
 
 class TestDataCatalog:
     def test_save_and_load(self, data_catalog, dummy_dataframe):
-        """Test saving and reloading the data set"""
+        """Test saving and reloading the dataset"""
         data_catalog.save("test", dummy_dataframe)
         reloaded_df = data_catalog.load("test")
 
         assert_frame_equal(reloaded_df, dummy_dataframe)
 
     def test_add_save_and_load(self, dataset, dummy_dataframe):
-        """Test adding and then saving and reloading the data set"""
+        """Test adding and then saving and reloading the dataset"""
         catalog = DataCatalog(datasets={})
         catalog.add("test", dataset)
         catalog.save("test", dummy_dataframe)
@@ -185,7 +185,7 @@ def test_add_save_and_load(self, dataset, dummy_dataframe):
 
     def test_add_all_save_and_load(self, dataset, dummy_dataframe):
         """Test adding all to the data catalog and then saving and reloading
-        the data set"""
+        the dataset"""
         catalog = DataCatalog(datasets={})
         catalog.add_all({"test": dataset})
         catalog.save("test", dummy_dataframe)
@@ -194,34 +194,34 @@ def test_add_all_save_and_load(self, dataset, dummy_dataframe):
         assert_frame_equal(reloaded_df, dummy_dataframe)
 
     def test_load_error(self, data_catalog):
-        """Check the error when attempting to load a data set
+        """Check the error when attempting to load a dataset
         from nonexistent source"""
-        pattern = r"Failed while loading data from data set CSVDataset"
+        pattern = r"Failed while loading data from dataset CSVDataset"
         with pytest.raises(DatasetError, match=pattern):
             data_catalog.load("test")
 
     def test_add_dataset_twice(self, data_catalog, dataset):
-        """Check the error when attempting to add the data set twice"""
+        """Check the error when attempting to add the dataset twice"""
         pattern = r"Dataset 'test' has already been registered"
         with pytest.raises(DatasetAlreadyExistsError, match=pattern):
             data_catalog.add("test", dataset)
 
     def test_load_from_unregistered(self):
-        """Check the error when attempting to load unregistered data set"""
+        """Check the error when attempting to load unregistered dataset"""
         catalog = DataCatalog(datasets={})
         pattern = r"Dataset 'test' not found in the catalog"
         with pytest.raises(DatasetNotFoundError, match=pattern):
             catalog.load("test")
 
     def test_save_to_unregistered(self, dummy_dataframe):
-        """Check the error when attempting to save to unregistered data set"""
+        """Check the error when attempting to save to unregistered dataset"""
         catalog = DataCatalog(datasets={})
         pattern = r"Dataset 'test' not found in the catalog"
         with pytest.raises(DatasetNotFoundError, match=pattern):
             catalog.save("test", dummy_dataframe)
 
     def test_feed_dict(self, memory_catalog, conflicting_feed_dict):
-        """Test feed dict overriding some of the data sets"""
+        """Test feed dict overriding some of the datasets"""
         memory_catalog.add_feed_dict(conflicting_feed_dict, replace=True)
         assert "data" in memory_catalog.load("ds1")
         assert memory_catalog.load("ds1")["data"] == 0
@@ -235,7 +235,7 @@ def test_exists(self, data_catalog, dummy_dataframe):
         assert data_catalog.exists("test")
 
     def test_exists_not_implemented(self, caplog):
-        """Test calling `exists` on the data set, which didn't implement it"""
+        """Test calling `exists` on the dataset, which didn't implement it"""
         catalog = DataCatalog(datasets={"test": LambdaDataset(None, None)})
         result = catalog.exists("test")
 
@@ -248,18 +248,18 @@ def test_exists_not_implemented(self, caplog):
         assert result is False
 
     def test_exists_invalid(self, data_catalog):
-        """Check the error when calling `exists` on invalid data set"""
+        """Check the error when calling `exists` on invalid dataset"""
         assert not data_catalog.exists("wrong_key")
 
     def test_release_unregistered(self, data_catalog):
-        """Check the error when calling `release` on unregistered data set"""
+        """Check the error when calling `release` on unregistered dataset"""
         pattern = r"Dataset \'wrong_key\' not found in the catalog"
         with pytest.raises(DatasetNotFoundError, match=pattern) as e:
             data_catalog.release("wrong_key")
         assert "did you mean" not in str(e.value)
 
     def test_release_unregistered_typo(self, data_catalog):
-        """Check the error when calling `release` on mistyped data set"""
+        """Check the error when calling `release` on mistyped dataset"""
         pattern = (
             "Dataset 'text' not found in the catalog"
             " - did you mean one of these instead: test"
@@ -268,7 +268,7 @@ def test_release_unregistered_typo(self, data_catalog):
             data_catalog.release("text")
 
     def test_multi_catalog_list(self, multi_catalog):
-        """Test data catalog which contains multiple data sets"""
+        """Test data catalog which contains multiple datasets"""
         entries = multi_catalog.list()
         assert "abc" in entries
         assert "xyz" in entries
@@ -284,7 +284,7 @@ def test_multi_catalog_list(self, multi_catalog):
         ],
     )
     def test_multi_catalog_list_regex(self, multi_catalog, pattern, expected):
-        """Test that regex patterns filter data sets accordingly"""
+        """Test that regex patterns filter datasets accordingly"""
         assert multi_catalog.list(regex_search=pattern) == expected
 
     def test_multi_catalog_list_bad_regex(self, multi_catalog):
@@ -404,7 +404,7 @@ def test_from_correct_config(self, data_catalog_from_config, dummy_dataframe):
         assert_frame_equal(reloaded_df, dummy_dataframe)
 
     def test_config_missing_type(self, correct_config):
-        """Check the error if type attribute is missing for some data set(s)
+        """Check the error if type attribute is missing for some dataset(s)
         in the config"""
         del correct_config["catalog"]["boats"]["type"]
         pattern = (
@@ -468,13 +468,13 @@ def test_config_invalid_dataset(self, correct_config):
         pattern = (
             "An exception occurred when parsing config for dataset 'boats':\n"
             "Dataset type 'kedro.io.data_catalog.DataCatalog' is invalid: "
-            "all data set types must extend 'AbstractDataset'"
+            "all dataset types must extend 'AbstractDataset'"
         )
         with pytest.raises(DatasetError, match=re.escape(pattern)):
             DataCatalog.from_config(**correct_config)
 
     def test_config_invalid_arguments(self, correct_config):
-        """Check the error if the data set config contains invalid arguments"""
+        """Check the error if the dataset config contains invalid arguments"""
         correct_config["catalog"]["boats"]["save_and_load_args"] = False
         pattern = (
             r"Dataset 'boats' must only contain arguments valid for "
@@ -504,7 +504,7 @@ def test_missing_credentials(self, correct_config):
             DataCatalog.from_config(**correct_config)
 
     def test_link_credentials(self, correct_config, mocker):
-        """Test credentials being linked to the relevant data set"""
+        """Test credentials being linked to the relevant dataset"""
         mock_client = mocker.patch("kedro_datasets.pandas.csv_dataset.fsspec")
         config = deepcopy(correct_config)
         del config["catalog"]["boats"]
@@ -560,7 +560,7 @@ def test_idempotent_catalog(self, correct_config):
         assert catalog
 
     def test_error_dataset_init(self, bad_config):
-        """Check the error when trying to instantiate erroneous data set"""
+        """Check the error when trying to instantiate erroneous dataset"""
         pattern = r"Failed to instantiate dataset \'bad\' of type '.*BadDataset'"
         with pytest.raises(DatasetError, match=pattern):
             DataCatalog.from_config(bad_config, None)
@@ -606,7 +606,7 @@ def test_bad_confirm(self, correct_config, dataset_name, pattern):
 
 class TestDataCatalogVersioned:
     def test_from_correct_config_versioned(self, correct_config, dummy_dataframe):
-        """Test load and save of versioned data sets from config"""
+        """Test load and save of versioned datasets from config"""
         correct_config["catalog"]["boats"]["versioned"] = True
 
         # Decompose `generate_timestamp` to keep `current_ts` reference.
@@ -649,13 +649,13 @@ def test_from_correct_config_versioned_warn(
         self, caplog, correct_config, versioned
     ):
         """Check the warning if `version` attribute was added
-        to the data set config"""
+        to the dataset config"""
         correct_config["catalog"]["boats"]["versioned"] = versioned
         correct_config["catalog"]["boats"]["version"] = True
         DataCatalog.from_config(**correct_config)
         log_record = caplog.records[0]
         expected_log_message = (
-            "'version' attribute removed from data set configuration since it "
+            "'version' attribute removed from dataset configuration since it "
             "is a reserved word and cannot be directly specified"
         )
         assert log_record.levelname == "WARNING"
@@ -672,7 +672,7 @@ def test_from_correct_config_load_versions_warn(self, correct_config):
     def test_compare_tracking_and_other_dataset_versioned(
         self, correct_config_with_tracking_ds, dummy_dataframe
     ):
-        """Test saving of tracking data sets from config results in the same
+        """Test saving of tracking datasets from config results in the same
         save version as other versioned datasets."""
 
         catalog = DataCatalog.from_config(**correct_config_with_tracking_ds)
@@ -694,7 +694,7 @@ def test_compare_tracking_and_other_dataset_versioned(
         assert tracking_timestamp == csv_timestamp
 
     def test_load_version(self, correct_config, dummy_dataframe, mocker):
-        """Test load versioned data sets from config"""
+        """Test load versioned datasets from config"""
         new_dataframe = pd.DataFrame({"col1": [0, 0], "col2": [0, 0], "col3": [0, 0]})
         correct_config["catalog"]["boats"]["versioned"] = True
         mocker.patch(
@@ -938,7 +938,7 @@ def test_unmatched_key_error_when_parsing_config(
     def test_factory_config_versioned(
         self, config_with_dataset_factories, filepath, dummy_dataframe
     ):
-        """Test load and save of versioned data sets from config"""
+        """Test load and save of versioned datasets from config"""
         config_with_dataset_factories["catalog"]["{brand}_cars"]["versioned"] = True
         config_with_dataset_factories["catalog"]["{brand}_cars"]["filepath"] = filepath
 
diff --git a/tests/io/test_kedro_data_catalog.py b/tests/io/test_kedro_data_catalog.py
index 5e0c463e7d..efa993bb0e 100644
--- a/tests/io/test_kedro_data_catalog.py
+++ b/tests/io/test_kedro_data_catalog.py
@@ -74,7 +74,7 @@ def test_add_save_and_load(self, dataset, dummy_dataframe):
     def test_load_error(self, data_catalog):
         """Check the error when attempting to load a dataset
         from nonexistent source"""
-        pattern = r"Failed while loading data from data set CSVDataset"
+        pattern = r"Failed while loading data from dataset CSVDataset"
         with pytest.raises(DatasetError, match=pattern):
             data_catalog.load("test")
 
@@ -352,7 +352,7 @@ def test_config_invalid_dataset(self, correct_config):
             pattern = (
                 "An exception occurred when parsing config for dataset 'boats':\n"
                 "Dataset type 'kedro.io.kedro_data_catalog.KedroDataCatalog' is invalid: "
-                "all data set types must extend 'AbstractDataset'"
+                "all dataset types must extend 'AbstractDataset'"
             )
             with pytest.raises(DatasetError, match=re.escape(pattern)):
                 KedroDataCatalog.from_config(**correct_config)
@@ -553,7 +553,7 @@ def test_from_correct_config_versioned_warn(
             KedroDataCatalog.from_config(**correct_config)
             log_record = caplog.records[0]
             expected_log_message = (
-                "'version' attribute removed from data set configuration since it "
+                "'version' attribute removed from dataset configuration since it "
                 "is a reserved word and cannot be directly specified"
             )
             assert log_record.levelname == "WARNING"
diff --git a/tests/io/test_lambda_dataset.py b/tests/io/test_lambda_dataset.py
index a3072af451..eac9709d04 100644
--- a/tests/io/test_lambda_dataset.py
+++ b/tests/io/test_lambda_dataset.py
@@ -104,7 +104,7 @@ def internal_load():
 
     def test_load_undefined(self):
         """Check the error if `LambdaDataset.__load` is None"""
-        with pytest.raises(DatasetError, match="Cannot load data set"):
+        with pytest.raises(DatasetError, match="Cannot load dataset"):
             LambdaDataset(None, None).load()
 
     def test_load_not_callable(self):
@@ -128,7 +128,7 @@ def test_save_raises_error(self, mocked_save, mocked_dataset):
         mocked_save.side_effect = FileExistsError(error_message)
 
         pattern = (
-            r"Failed while saving data to data set LambdaDataset\(.+\)\.\n"
+            r"Failed while saving data to dataset LambdaDataset\(.+\)\.\n"
             + error_message
         )
         with pytest.raises(DatasetError, match=pattern):
@@ -137,7 +137,7 @@ def test_save_raises_error(self, mocked_save, mocked_dataset):
 
     def test_save_undefined(self):
         """Check the error if `LambdaDataset.__save` is None"""
-        with pytest.raises(DatasetError, match="Cannot save to data set"):
+        with pytest.raises(DatasetError, match="Cannot save to dataset"):
             LambdaDataset(None, None).save(42)
 
     def test_save_none(self, mocked_save, mocked_dataset):
diff --git a/tests/pipeline/test_pipeline_from_missing.py b/tests/pipeline/test_pipeline_from_missing.py
index f399e70c06..4e40638d83 100644
--- a/tests/pipeline/test_pipeline_from_missing.py
+++ b/tests/pipeline/test_pipeline_from_missing.py
@@ -210,7 +210,7 @@ def test_partial_propagation(self, branched_pipeline, hook_manager):
         assert _pipeline_contains(new_pipeline, ["split", "right_out"])
 
     def test_partial_non_existent_propagation(self, branched_pipeline, hook_manager):
-        """A non existent data set whose node has one unregistered input
+        """A non existent dataset whose node has one unregistered input
         and one existent input should be recalculated correctly.
         """
         catalog = _make_catalog(existent=["A", "C", "E", "F"], non_existent=["D"])

From e071640ae1c9fbbb5419291412ecc2a55b65de0f Mon Sep 17 00:00:00 2001
From: Dmitry Sorokin <40151847+DimedS@users.noreply.github.com>
Date: Thu, 10 Oct 2024 19:33:42 +0100
Subject: [PATCH 04/19] Manually created sitemap.xml for improved control over
 indexed docs pages (#4145)

* Load manually created sitemap

Signed-off-by: Dmitry Sorokin <129520297+DmitrySorokinQB@users.noreply.github.com>

* Add projects remove lastmod for latest

Signed-off-by: Dmitry Sorokin <129520297+DmitrySorokinQB@users.noreply.github.com>

* Add latest for projects

Signed-off-by: Dmitry Sorokin <129520297+DmitrySorokinQB@users.noreply.github.com>

---------

Signed-off-by: Dmitry Sorokin <129520297+DmitrySorokinQB@users.noreply.github.com>
Co-authored-by: Dmitry Sorokin <129520297+DmitrySorokinQB@users.noreply.github.com>
Co-authored-by: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com>
Co-authored-by: L. R. Couto <57910428+lrcouto@users.noreply.github.com>
Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 docs/source/sitemap.xml | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 docs/source/sitemap.xml

diff --git a/docs/source/sitemap.xml b/docs/source/sitemap.xml
new file mode 100644
index 0000000000..059f1ac1c6
--- /dev/null
+++ b/docs/source/sitemap.xml
@@ -0,0 +1,35 @@
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml">
+  <url>
+    <loc>https://docs.kedro.org/en/stable/</loc>
+    <lastmod>2024-09-01T18:53:11.571849+00:00</lastmod>
+    <changefreq>monthly</changefreq>
+    <priority>1.0</priority>
+  </url>
+  <url>
+    <loc>https://docs.kedro.org/en/latest/</loc>
+    <changefreq>daily</changefreq>
+    <priority>0.5</priority>
+  </url>
+  <url>
+    <loc>https://docs.kedro.org/projects/kedro-viz/en/stable/</loc>
+    <lastmod>2024-09-01T18:53:11.571849+00:00</lastmod>
+    <changefreq>monthly</changefreq>
+    <priority>1.0</priority>
+  </url>
+  <url>
+    <loc>https://docs.kedro.org/projects/kedro-viz/en/latest/</loc>
+    <changefreq>daily</changefreq>
+    <priority>0.5</priority>
+  </url>
+  <url>
+    <loc>https://docs.kedro.org/projects/kedro-datasets/en/stable/</loc>
+    <lastmod>2024-09-01T18:53:11.571849+00:00</lastmod>
+    <changefreq>monthly</changefreq>
+    <priority>1.0</priority>
+  </url>
+  <url>
+    <loc>https://docs.kedro.org/projects/kedro-datasets/en/latest/</loc>
+    <changefreq>daily</changefreq>
+    <priority>0.5</priority>
+  </url>
+</urlset>

From 8cb24c40ee0d0a8ea24d54a497219ff7603cfd98 Mon Sep 17 00:00:00 2001
From: "L. R. Couto" <57910428+lrcouto@users.noreply.github.com>
Date: Thu, 10 Oct 2024 15:57:08 -0300
Subject: [PATCH 05/19] Bump up version to 0.19.9 (#4219)

* Bump up version to 0.19.9

Signed-off-by: Laura Couto <laurarccouto@gmail.com>

* Add placeholders to release.md

Signed-off-by: Laura Couto <laurarccouto@gmail.com>

* Update citation.cff release date

Signed-off-by: Laura Couto <laurarccouto@gmail.com>

---------

Signed-off-by: Laura Couto <laurarccouto@gmail.com>
Signed-off-by: L. R. Couto <57910428+lrcouto@users.noreply.github.com>
Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 CITATION.cff      | 4 ++--
 RELEASE.md        | 9 +++++++++
 kedro/__init__.py | 2 +-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/CITATION.cff b/CITATION.cff
index 3f57feb252..371e42a1b0 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -44,6 +44,6 @@ authors:
 - family-names: Brugman
   given-names: Simon
 title: Kedro
-version: 0.19.8
-date-released: 2024-08-20
+version: 0.19.9
+date-released: 2024-10-10
 url: https://github.com/kedro-org/kedro
diff --git a/RELEASE.md b/RELEASE.md
index 5447340938..59cace8a36 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,5 +1,14 @@
 # Upcoming Release
 
+## Major features and improvements
+## Bug fixes and other changes
+## Breaking changes to the API
+## Documentation changes
+## Community contributions
+
+
+# Release 0.19.9
+
 ## Major features and improvements
 * Dropped Python 3.8 support.
 * Implemented `KedroDataCatalog` repeating `DataCatalog` functionality with a few API enhancements:
diff --git a/kedro/__init__.py b/kedro/__init__.py
index b49d498fc9..00ebebc5a7 100644
--- a/kedro/__init__.py
+++ b/kedro/__init__.py
@@ -6,7 +6,7 @@
 import sys
 import warnings
 
-__version__ = "0.19.8"
+__version__ = "0.19.9"
 
 
 class KedroDeprecationWarning(DeprecationWarning):

From 3b2878e0bf04a25e362114b4940a23481cbd27d7 Mon Sep 17 00:00:00 2001
From: Ankita Katiyar <ankitakatiyar2401@gmail.com>
Date: Fri, 11 Oct 2024 13:38:09 +0100
Subject: [PATCH 06/19] first pass doesn't work yet

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 .../91468e64-virtualenv-py3.11.json           |  1 +
 .asv/results/M-WFLM6NH6G5/machine.json        |  9 +++
 .asv/results/benchmarks.json                  | 78 +++++++++++++++++++
 benchmarks/OmegaConfigLoader/__init__.py      |  0
 benchmarks/OmegaConfigLoader/benchmark_ocl.py | 60 ++++++++++++++
 .../OmegaConfigLoader/conf/base/catalog.yml   |  0
 .../OmegaConfigLoader/conf/base/globals.yml   |  0
 .../conf/base/parameters.yml                  |  0
 .../OmegaConfigLoader/conf/local/catalog.yml  |  0
 .../OmegaConfigLoader/conf/local/globals.yml  |  0
 .../conf/local/parameters.yml                 |  0
 11 files changed, 148 insertions(+)
 create mode 100644 .asv/results/M-WFLM6NH6G5/91468e64-virtualenv-py3.11.json
 create mode 100644 .asv/results/M-WFLM6NH6G5/machine.json
 create mode 100644 .asv/results/benchmarks.json
 create mode 100644 benchmarks/OmegaConfigLoader/__init__.py
 create mode 100644 benchmarks/OmegaConfigLoader/benchmark_ocl.py
 create mode 100644 benchmarks/OmegaConfigLoader/conf/base/catalog.yml
 create mode 100644 benchmarks/OmegaConfigLoader/conf/base/globals.yml
 create mode 100644 benchmarks/OmegaConfigLoader/conf/base/parameters.yml
 create mode 100644 benchmarks/OmegaConfigLoader/conf/local/catalog.yml
 create mode 100644 benchmarks/OmegaConfigLoader/conf/local/globals.yml
 create mode 100644 benchmarks/OmegaConfigLoader/conf/local/parameters.yml

diff --git a/.asv/results/M-WFLM6NH6G5/91468e64-virtualenv-py3.11.json b/.asv/results/M-WFLM6NH6G5/91468e64-virtualenv-py3.11.json
new file mode 100644
index 0000000000..f25d79f0bb
--- /dev/null
+++ b/.asv/results/M-WFLM6NH6G5/91468e64-virtualenv-py3.11.json
@@ -0,0 +1 @@
+{"commit_hash": "91468e64ea6f1fc4d51fe6313d738476189dd74a", "env_name": "virtualenv-py3.11", "date": 1728586628000, "params": {"arch": "arm64", "cpu": "Apple M1 Max", "machine": "M-WFLM6NH6G5", "num_cpu": "10", "os": "Darwin 23.6.0", "ram": "34359738368", "python": "3.11"}, "python": "3.11", "requirements": {}, "env_vars": {}, "result_columns": ["result", "params", "version", "started_at", "duration", "stats_ci_99_a", "stats_ci_99_b", "stats_q_25", "stats_q_75", "stats_number", "stats_repeat", "samples", "profile"], "results": {"benchmark_dummy.TimeSuite.time_keys": [[2.541248086432182e-06], [], "86e015a3c40c52da31e4185fff7c7176c38c5e1e1e4aba71912db0b388225191", 1728644924899, 0.63448, [2.5262e-06], [2.6412e-06], [2.533e-06], [2.5861e-06], [4180], [10]], "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_loading_base_config": [[4.0286439130507145e-05], [], "d40454765b26efac921c78a6fef4a045a8e533266f53becd29e5b3d960de881a", 1728644924470, 0.90424, [3.9613e-05], [4.1723e-05], [3.9864e-05], [4.1201e-05], [271], [10]], "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_loading_env_config": [null, [], "a0fd9c77896289880cc8bbde02dc4ef9b7eb6bcba23d6c91a23ccb202d391b9f", 1728644923139, 0.3623], "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_merge_destructive_strategy": [null, [], "bc851f1a9364e2a0de2d111f30652d404e1b61da33d1377799c163c00b2690f8", 1728644923502, 0.31744], "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_merge_soft_strategy": [null, [], "08a283d77079d9fcf30a4863dfe0bafbd85d27b7e84b13eca9a9a7267eaf5b16", 1728644923819, 0.33724]}, "durations": {}, "version": 2}
\ No newline at end of file
diff --git a/.asv/results/M-WFLM6NH6G5/machine.json b/.asv/results/M-WFLM6NH6G5/machine.json
new file mode 100644
index 0000000000..3fe4186a75
--- /dev/null
+++ b/.asv/results/M-WFLM6NH6G5/machine.json
@@ -0,0 +1,9 @@
+{
+    "arch": "arm64",
+    "cpu": "Apple M1 Max",
+    "machine": "M-WFLM6NH6G5",
+    "num_cpu": "10",
+    "os": "Darwin 23.6.0",
+    "ram": "34359738368",
+    "version": 1
+}
\ No newline at end of file
diff --git a/.asv/results/benchmarks.json b/.asv/results/benchmarks.json
new file mode 100644
index 0000000000..1153a1f2fd
--- /dev/null
+++ b/.asv/results/benchmarks.json
@@ -0,0 +1,78 @@
+{
+    "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_loading_base_config": {
+        "code": "class TimeOmegaConfigLoader:\n    def time_loading_base_config(self):\n        \"\"\"Benchmark the time to load the base configuration\"\"\"\n        config = self.loader[\"globals\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n        self.env = \"local\"\n    \n        # Create sample config files in the temp directory\n        self._create_config_file(\"base\", \"globals.yml\", {\"global_param\": \"value\"})\n        self._create_config_file(\"base\", \"catalog.yml\", {\"dataset\": {\"type\": \"pandas.CSVDataSet\"}})\n        self._create_config_file(\"local\", \"catalog.yml\", {\"dataset\": {\"filepath\": \"data.csv\"}})\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source.as_posix(), env=self.env)",
+        "min_run_count": 2,
+        "name": "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_loading_base_config",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "d40454765b26efac921c78a6fef4a045a8e533266f53becd29e5b3d960de881a",
+        "warmup_time": -1
+    },
+    "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_loading_env_config": {
+        "code": "class TimeOmegaConfigLoader:\n    def time_loading_env_config(self):\n        \"\"\"Benchmark the time to load environment-specific configuration\"\"\"\n        config = self.loader[\"catalog\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n        self.env = \"local\"\n    \n        # Create sample config files in the temp directory\n        self._create_config_file(\"base\", \"globals.yml\", {\"global_param\": \"value\"})\n        self._create_config_file(\"base\", \"catalog.yml\", {\"dataset\": {\"type\": \"pandas.CSVDataSet\"}})\n        self._create_config_file(\"local\", \"catalog.yml\", {\"dataset\": {\"filepath\": \"data.csv\"}})\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source.as_posix(), env=self.env)",
+        "min_run_count": 2,
+        "name": "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_loading_env_config",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "a0fd9c77896289880cc8bbde02dc4ef9b7eb6bcba23d6c91a23ccb202d391b9f",
+        "warmup_time": -1
+    },
+    "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_merge_destructive_strategy": {
+        "code": "class TimeOmegaConfigLoader:\n    def time_merge_destructive_strategy(self):\n        \"\"\"Benchmark the time to load and destructively merge configurations\"\"\"\n        self.loader.merge_strategy = {\"catalog\": \"destructive\"}\n        config = self.loader[\"catalog\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n        self.env = \"local\"\n    \n        # Create sample config files in the temp directory\n        self._create_config_file(\"base\", \"globals.yml\", {\"global_param\": \"value\"})\n        self._create_config_file(\"base\", \"catalog.yml\", {\"dataset\": {\"type\": \"pandas.CSVDataSet\"}})\n        self._create_config_file(\"local\", \"catalog.yml\", {\"dataset\": {\"filepath\": \"data.csv\"}})\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source.as_posix(), env=self.env)",
+        "min_run_count": 2,
+        "name": "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_merge_destructive_strategy",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "bc851f1a9364e2a0de2d111f30652d404e1b61da33d1377799c163c00b2690f8",
+        "warmup_time": -1
+    },
+    "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_merge_soft_strategy": {
+        "code": "class TimeOmegaConfigLoader:\n    def time_merge_soft_strategy(self):\n        \"\"\"Benchmark the time to load and soft-merge configurations\"\"\"\n        self.loader.merge_strategy = {\"catalog\": \"soft\"}\n        config = self.loader[\"catalog\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n        self.env = \"local\"\n    \n        # Create sample config files in the temp directory\n        self._create_config_file(\"base\", \"globals.yml\", {\"global_param\": \"value\"})\n        self._create_config_file(\"base\", \"catalog.yml\", {\"dataset\": {\"type\": \"pandas.CSVDataSet\"}})\n        self._create_config_file(\"local\", \"catalog.yml\", {\"dataset\": {\"filepath\": \"data.csv\"}})\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source.as_posix(), env=self.env)",
+        "min_run_count": 2,
+        "name": "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_merge_soft_strategy",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "08a283d77079d9fcf30a4863dfe0bafbd85d27b7e84b13eca9a9a7267eaf5b16",
+        "warmup_time": -1
+    },
+    "benchmark_dummy.TimeSuite.time_keys": {
+        "code": "class TimeSuite:\n    def time_keys(self):\n        for key in self.d.keys():\n            pass\n\n    def setup(self):\n        self.d = {}\n        for x in range(500):\n            self.d[x] = None",
+        "min_run_count": 2,
+        "name": "benchmark_dummy.TimeSuite.time_keys",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "86e015a3c40c52da31e4185fff7c7176c38c5e1e1e4aba71912db0b388225191",
+        "warmup_time": -1
+    },
+    "version": 2
+}
\ No newline at end of file
diff --git a/benchmarks/OmegaConfigLoader/__init__.py b/benchmarks/OmegaConfigLoader/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/benchmarks/OmegaConfigLoader/benchmark_ocl.py b/benchmarks/OmegaConfigLoader/benchmark_ocl.py
new file mode 100644
index 0000000000..c9d1badeda
--- /dev/null
+++ b/benchmarks/OmegaConfigLoader/benchmark_ocl.py
@@ -0,0 +1,60 @@
+from pathlib import Path
+import os
+import tempfile
+from kedro.config import OmegaConfigLoader
+
+class TimeOmegaConfigLoader:
+    
+    def setup(self):
+        # Setup temporary configuration directory with sample config files
+        self.temp_dir = tempfile.TemporaryDirectory()
+        self.conf_source = Path(self.temp_dir.name)
+        self.env = "local"
+
+        # Create sample config files in the temp directory
+        self._create_config_file("base", "globals.yml", {"global_param": "value"})
+        self._create_config_file("base", "catalog.yml", {"dataset": {"type": "pandas.CSVDataSet"}})
+        self._create_config_file("local", "catalog.yml", {"dataset": {"filepath": "data.csv"}})
+        
+        # Instantiate the OmegaConfigLoader
+        self.loader = OmegaConfigLoader(conf_source=self.conf_source.as_posix(), env=self.env)
+
+    def teardown(self):
+        # Cleanup temporary directory
+        self.temp_dir.cleanup()
+
+    def _create_config_file(self, env, file_name, data):
+        env_path = self.conf_source / env
+        env_path.mkdir(parents=True, exist_ok=True)
+        file_path = env_path / file_name
+
+        import yaml
+        with open(file_path, "w") as f:
+            yaml.dump(data, f)
+
+    def time_loading_base_config(self):
+        """Benchmark the time to load the base configuration"""
+        config = self.loader["globals"]
+    
+    def time_loading_env_config(self):
+        """Benchmark the time to load environment-specific configuration"""
+        config = self.loader["catalog"]
+    
+    def time_merge_soft_strategy(self):
+        """Benchmark the time to load and soft-merge configurations"""
+        self.loader.merge_strategy = {"catalog": "soft"}
+        config = self.loader["catalog"]
+    
+    def time_merge_destructive_strategy(self):
+        """Benchmark the time to load and destructively merge configurations"""
+        self.loader.merge_strategy = {"catalog": "destructive"}
+        config = self.loader["catalog"]
+
+    def peak_memory_loading_config(self):
+        """Benchmark peak memory usage during config loading"""
+        config = self.loader["catalog"]
+
+    def peak_memory_soft_merge(self):
+        """Benchmark peak memory usage during soft merge"""
+        self.loader.merge_strategy = {"catalog": "soft"}
+        config = self.loader["catalog"]
diff --git a/benchmarks/OmegaConfigLoader/conf/base/catalog.yml b/benchmarks/OmegaConfigLoader/conf/base/catalog.yml
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/benchmarks/OmegaConfigLoader/conf/base/globals.yml b/benchmarks/OmegaConfigLoader/conf/base/globals.yml
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/benchmarks/OmegaConfigLoader/conf/base/parameters.yml b/benchmarks/OmegaConfigLoader/conf/base/parameters.yml
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/benchmarks/OmegaConfigLoader/conf/local/catalog.yml b/benchmarks/OmegaConfigLoader/conf/local/catalog.yml
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/benchmarks/OmegaConfigLoader/conf/local/globals.yml b/benchmarks/OmegaConfigLoader/conf/local/globals.yml
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/benchmarks/OmegaConfigLoader/conf/local/parameters.yml b/benchmarks/OmegaConfigLoader/conf/local/parameters.yml
new file mode 100644
index 0000000000..e69de29bb2

From f2f177089b0354b07d9cbaaaa4171dae8f85e522 Mon Sep 17 00:00:00 2001
From: Ankita Katiyar <ankitakatiyar2401@gmail.com>
Date: Mon, 14 Oct 2024 11:47:45 +0100
Subject: [PATCH 07/19] Update ocl tests

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 .../91468e64-virtualenv-py3.11.json           |   1 -
 .asv/results/M-WFLM6NH6G5/machine.json        |   9 -
 .asv/results/benchmarks.json                  |  78 ---------
 benchmarks/OmegaConfigLoader/__init__.py      |   0
 benchmarks/OmegaConfigLoader/benchmark_ocl.py |  60 -------
 .../OmegaConfigLoader/conf/base/catalog.yml   |   0
 .../OmegaConfigLoader/conf/base/globals.yml   |   0
 .../conf/base/parameters.yml                  |   0
 .../OmegaConfigLoader/conf/local/catalog.yml  |   0
 .../OmegaConfigLoader/conf/local/globals.yml  |   0
 .../conf/local/parameters.yml                 |   0
 benchmarks/benchmark_ocl.py                   | 161 ++++++++++++++++++
 pyproject.toml                                |   6 +-
 13 files changed, 164 insertions(+), 151 deletions(-)
 delete mode 100644 .asv/results/M-WFLM6NH6G5/91468e64-virtualenv-py3.11.json
 delete mode 100644 .asv/results/M-WFLM6NH6G5/machine.json
 delete mode 100644 .asv/results/benchmarks.json
 delete mode 100644 benchmarks/OmegaConfigLoader/__init__.py
 delete mode 100644 benchmarks/OmegaConfigLoader/benchmark_ocl.py
 delete mode 100644 benchmarks/OmegaConfigLoader/conf/base/catalog.yml
 delete mode 100644 benchmarks/OmegaConfigLoader/conf/base/globals.yml
 delete mode 100644 benchmarks/OmegaConfigLoader/conf/base/parameters.yml
 delete mode 100644 benchmarks/OmegaConfigLoader/conf/local/catalog.yml
 delete mode 100644 benchmarks/OmegaConfigLoader/conf/local/globals.yml
 delete mode 100644 benchmarks/OmegaConfigLoader/conf/local/parameters.yml
 create mode 100644 benchmarks/benchmark_ocl.py

diff --git a/.asv/results/M-WFLM6NH6G5/91468e64-virtualenv-py3.11.json b/.asv/results/M-WFLM6NH6G5/91468e64-virtualenv-py3.11.json
deleted file mode 100644
index f25d79f0bb..0000000000
--- a/.asv/results/M-WFLM6NH6G5/91468e64-virtualenv-py3.11.json
+++ /dev/null
@@ -1 +0,0 @@
-{"commit_hash": "91468e64ea6f1fc4d51fe6313d738476189dd74a", "env_name": "virtualenv-py3.11", "date": 1728586628000, "params": {"arch": "arm64", "cpu": "Apple M1 Max", "machine": "M-WFLM6NH6G5", "num_cpu": "10", "os": "Darwin 23.6.0", "ram": "34359738368", "python": "3.11"}, "python": "3.11", "requirements": {}, "env_vars": {}, "result_columns": ["result", "params", "version", "started_at", "duration", "stats_ci_99_a", "stats_ci_99_b", "stats_q_25", "stats_q_75", "stats_number", "stats_repeat", "samples", "profile"], "results": {"benchmark_dummy.TimeSuite.time_keys": [[2.541248086432182e-06], [], "86e015a3c40c52da31e4185fff7c7176c38c5e1e1e4aba71912db0b388225191", 1728644924899, 0.63448, [2.5262e-06], [2.6412e-06], [2.533e-06], [2.5861e-06], [4180], [10]], "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_loading_base_config": [[4.0286439130507145e-05], [], "d40454765b26efac921c78a6fef4a045a8e533266f53becd29e5b3d960de881a", 1728644924470, 0.90424, [3.9613e-05], [4.1723e-05], [3.9864e-05], [4.1201e-05], [271], [10]], "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_loading_env_config": [null, [], "a0fd9c77896289880cc8bbde02dc4ef9b7eb6bcba23d6c91a23ccb202d391b9f", 1728644923139, 0.3623], "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_merge_destructive_strategy": [null, [], "bc851f1a9364e2a0de2d111f30652d404e1b61da33d1377799c163c00b2690f8", 1728644923502, 0.31744], "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_merge_soft_strategy": [null, [], "08a283d77079d9fcf30a4863dfe0bafbd85d27b7e84b13eca9a9a7267eaf5b16", 1728644923819, 0.33724]}, "durations": {}, "version": 2}
\ No newline at end of file
diff --git a/.asv/results/M-WFLM6NH6G5/machine.json b/.asv/results/M-WFLM6NH6G5/machine.json
deleted file mode 100644
index 3fe4186a75..0000000000
--- a/.asv/results/M-WFLM6NH6G5/machine.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-    "arch": "arm64",
-    "cpu": "Apple M1 Max",
-    "machine": "M-WFLM6NH6G5",
-    "num_cpu": "10",
-    "os": "Darwin 23.6.0",
-    "ram": "34359738368",
-    "version": 1
-}
\ No newline at end of file
diff --git a/.asv/results/benchmarks.json b/.asv/results/benchmarks.json
deleted file mode 100644
index 1153a1f2fd..0000000000
--- a/.asv/results/benchmarks.json
+++ /dev/null
@@ -1,78 +0,0 @@
-{
-    "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_loading_base_config": {
-        "code": "class TimeOmegaConfigLoader:\n    def time_loading_base_config(self):\n        \"\"\"Benchmark the time to load the base configuration\"\"\"\n        config = self.loader[\"globals\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n        self.env = \"local\"\n    \n        # Create sample config files in the temp directory\n        self._create_config_file(\"base\", \"globals.yml\", {\"global_param\": \"value\"})\n        self._create_config_file(\"base\", \"catalog.yml\", {\"dataset\": {\"type\": \"pandas.CSVDataSet\"}})\n        self._create_config_file(\"local\", \"catalog.yml\", {\"dataset\": {\"filepath\": \"data.csv\"}})\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source.as_posix(), env=self.env)",
-        "min_run_count": 2,
-        "name": "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_loading_base_config",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "d40454765b26efac921c78a6fef4a045a8e533266f53becd29e5b3d960de881a",
-        "warmup_time": -1
-    },
-    "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_loading_env_config": {
-        "code": "class TimeOmegaConfigLoader:\n    def time_loading_env_config(self):\n        \"\"\"Benchmark the time to load environment-specific configuration\"\"\"\n        config = self.loader[\"catalog\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n        self.env = \"local\"\n    \n        # Create sample config files in the temp directory\n        self._create_config_file(\"base\", \"globals.yml\", {\"global_param\": \"value\"})\n        self._create_config_file(\"base\", \"catalog.yml\", {\"dataset\": {\"type\": \"pandas.CSVDataSet\"}})\n        self._create_config_file(\"local\", \"catalog.yml\", {\"dataset\": {\"filepath\": \"data.csv\"}})\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source.as_posix(), env=self.env)",
-        "min_run_count": 2,
-        "name": "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_loading_env_config",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "a0fd9c77896289880cc8bbde02dc4ef9b7eb6bcba23d6c91a23ccb202d391b9f",
-        "warmup_time": -1
-    },
-    "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_merge_destructive_strategy": {
-        "code": "class TimeOmegaConfigLoader:\n    def time_merge_destructive_strategy(self):\n        \"\"\"Benchmark the time to load and destructively merge configurations\"\"\"\n        self.loader.merge_strategy = {\"catalog\": \"destructive\"}\n        config = self.loader[\"catalog\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n        self.env = \"local\"\n    \n        # Create sample config files in the temp directory\n        self._create_config_file(\"base\", \"globals.yml\", {\"global_param\": \"value\"})\n        self._create_config_file(\"base\", \"catalog.yml\", {\"dataset\": {\"type\": \"pandas.CSVDataSet\"}})\n        self._create_config_file(\"local\", \"catalog.yml\", {\"dataset\": {\"filepath\": \"data.csv\"}})\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source.as_posix(), env=self.env)",
-        "min_run_count": 2,
-        "name": "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_merge_destructive_strategy",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "bc851f1a9364e2a0de2d111f30652d404e1b61da33d1377799c163c00b2690f8",
-        "warmup_time": -1
-    },
-    "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_merge_soft_strategy": {
-        "code": "class TimeOmegaConfigLoader:\n    def time_merge_soft_strategy(self):\n        \"\"\"Benchmark the time to load and soft-merge configurations\"\"\"\n        self.loader.merge_strategy = {\"catalog\": \"soft\"}\n        config = self.loader[\"catalog\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n        self.env = \"local\"\n    \n        # Create sample config files in the temp directory\n        self._create_config_file(\"base\", \"globals.yml\", {\"global_param\": \"value\"})\n        self._create_config_file(\"base\", \"catalog.yml\", {\"dataset\": {\"type\": \"pandas.CSVDataSet\"}})\n        self._create_config_file(\"local\", \"catalog.yml\", {\"dataset\": {\"filepath\": \"data.csv\"}})\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source.as_posix(), env=self.env)",
-        "min_run_count": 2,
-        "name": "OmegaConfigLoader.benchmark_ocl.TimeOmegaConfigLoader.time_merge_soft_strategy",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "08a283d77079d9fcf30a4863dfe0bafbd85d27b7e84b13eca9a9a7267eaf5b16",
-        "warmup_time": -1
-    },
-    "benchmark_dummy.TimeSuite.time_keys": {
-        "code": "class TimeSuite:\n    def time_keys(self):\n        for key in self.d.keys():\n            pass\n\n    def setup(self):\n        self.d = {}\n        for x in range(500):\n            self.d[x] = None",
-        "min_run_count": 2,
-        "name": "benchmark_dummy.TimeSuite.time_keys",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "86e015a3c40c52da31e4185fff7c7176c38c5e1e1e4aba71912db0b388225191",
-        "warmup_time": -1
-    },
-    "version": 2
-}
\ No newline at end of file
diff --git a/benchmarks/OmegaConfigLoader/__init__.py b/benchmarks/OmegaConfigLoader/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/benchmarks/OmegaConfigLoader/benchmark_ocl.py b/benchmarks/OmegaConfigLoader/benchmark_ocl.py
deleted file mode 100644
index c9d1badeda..0000000000
--- a/benchmarks/OmegaConfigLoader/benchmark_ocl.py
+++ /dev/null
@@ -1,60 +0,0 @@
-from pathlib import Path
-import os
-import tempfile
-from kedro.config import OmegaConfigLoader
-
-class TimeOmegaConfigLoader:
-    
-    def setup(self):
-        # Setup temporary configuration directory with sample config files
-        self.temp_dir = tempfile.TemporaryDirectory()
-        self.conf_source = Path(self.temp_dir.name)
-        self.env = "local"
-
-        # Create sample config files in the temp directory
-        self._create_config_file("base", "globals.yml", {"global_param": "value"})
-        self._create_config_file("base", "catalog.yml", {"dataset": {"type": "pandas.CSVDataSet"}})
-        self._create_config_file("local", "catalog.yml", {"dataset": {"filepath": "data.csv"}})
-        
-        # Instantiate the OmegaConfigLoader
-        self.loader = OmegaConfigLoader(conf_source=self.conf_source.as_posix(), env=self.env)
-
-    def teardown(self):
-        # Cleanup temporary directory
-        self.temp_dir.cleanup()
-
-    def _create_config_file(self, env, file_name, data):
-        env_path = self.conf_source / env
-        env_path.mkdir(parents=True, exist_ok=True)
-        file_path = env_path / file_name
-
-        import yaml
-        with open(file_path, "w") as f:
-            yaml.dump(data, f)
-
-    def time_loading_base_config(self):
-        """Benchmark the time to load the base configuration"""
-        config = self.loader["globals"]
-    
-    def time_loading_env_config(self):
-        """Benchmark the time to load environment-specific configuration"""
-        config = self.loader["catalog"]
-    
-    def time_merge_soft_strategy(self):
-        """Benchmark the time to load and soft-merge configurations"""
-        self.loader.merge_strategy = {"catalog": "soft"}
-        config = self.loader["catalog"]
-    
-    def time_merge_destructive_strategy(self):
-        """Benchmark the time to load and destructively merge configurations"""
-        self.loader.merge_strategy = {"catalog": "destructive"}
-        config = self.loader["catalog"]
-
-    def peak_memory_loading_config(self):
-        """Benchmark peak memory usage during config loading"""
-        config = self.loader["catalog"]
-
-    def peak_memory_soft_merge(self):
-        """Benchmark peak memory usage during soft merge"""
-        self.loader.merge_strategy = {"catalog": "soft"}
-        config = self.loader["catalog"]
diff --git a/benchmarks/OmegaConfigLoader/conf/base/catalog.yml b/benchmarks/OmegaConfigLoader/conf/base/catalog.yml
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/benchmarks/OmegaConfigLoader/conf/base/globals.yml b/benchmarks/OmegaConfigLoader/conf/base/globals.yml
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/benchmarks/OmegaConfigLoader/conf/base/parameters.yml b/benchmarks/OmegaConfigLoader/conf/base/parameters.yml
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/benchmarks/OmegaConfigLoader/conf/local/catalog.yml b/benchmarks/OmegaConfigLoader/conf/local/catalog.yml
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/benchmarks/OmegaConfigLoader/conf/local/globals.yml b/benchmarks/OmegaConfigLoader/conf/local/globals.yml
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/benchmarks/OmegaConfigLoader/conf/local/parameters.yml b/benchmarks/OmegaConfigLoader/conf/local/parameters.yml
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/benchmarks/benchmark_ocl.py b/benchmarks/benchmark_ocl.py
new file mode 100644
index 0000000000..a4b7dda5ed
--- /dev/null
+++ b/benchmarks/benchmark_ocl.py
@@ -0,0 +1,161 @@
+import tempfile
+from pathlib import Path
+
+from kedro.config import OmegaConfigLoader
+
+base_catalog = {
+    "dataset_1": {
+        "type": "pandas.CSVDataset",
+        "filepath": "data1.csv"
+    },
+    "dataset_2": {
+        "type": "pandas.CSVDataset",
+        "filepath": "data2.csv"
+    },
+    "dataset_3": {
+        "type": "pandas.CSVDataset",
+        "filepath": "data3.csv"
+    },
+    "dataset_4": {
+        "type": "pandas.CSVDataset",
+        "filepath": "data4.csv",
+        "versioned": True,
+    },
+}
+local_catalog = {
+    "dataset_4" : {
+        "filepath": "data4_local.csv",
+        "type": "pandas.CSVDataset",
+    },
+    "dataset_5" : {
+        "filepath": "data5_local.csv",
+        "type": "pandas.CSVDataset",
+    },
+}
+base_params = {
+    "param_1": "value_1",
+    "param_2": "value_2",
+    "param_3": "value_3",
+    "param_4": "value_4",
+}
+local_params = {
+    "param_4": "value_4_local",
+    "param_5": "value_5_local",
+}
+base_globals = {
+    "global1": "value1",
+    "global2": "value2",
+    "global3": "value3",
+    "global4": "value4",
+}
+local_globals = {
+    "global4": "value4_local",
+    "global5": "value5_local",
+}
+
+def _create_config_file(self, env, file_name, data):
+        env_path = self.conf_source / env
+        env_path.mkdir(parents=True, exist_ok=True)
+        file_path = env_path / file_name
+
+        import yaml
+        with open(file_path, "w") as f:
+            yaml.dump(data, f)
+
+
+class TimeOmegaConfigLoader:
+
+    def setup(self):
+        # Setup temporary configuration directory with sample config files
+        self.temp_dir = tempfile.TemporaryDirectory()
+        self.conf_source = Path(self.temp_dir.name)
+
+        # Create sample config files in the temp directory
+        _create_config_file("base", "catalog.yml", base_catalog)
+        _create_config_file("local", "catalog.yml", local_catalog)
+        _create_config_file("base", "parameters.yml", base_params)
+        _create_config_file("local", "parameters.yml", local_params)
+        _create_config_file("base", "globals.yml", base_globals)
+        _create_config_file("local", "globals.yml", local_globals)
+
+        # Instantiate the OmegaConfigLoader
+        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')
+
+    def teardown(self):
+        # Cleanup temporary directory
+        self.temp_dir.cleanup()
+
+    def time_loading_catalog(self):
+        """Benchmark the time to load the catalog"""
+        self.loader["catalog"]
+
+    def time_loading_parameters(self):
+        """Benchmark the time to load environment-specific configuration"""
+        self.loader["parameters"]
+
+    def time_loading_parameters_runtime(self):
+        """Benchmark the time to load parameters with runtime configuration"""
+        self.loader.runtime_params = {"param_6": "value_6", "param_7": "value_7"}
+        self.loader["parameters"]
+
+    def time_loading_globals(self):
+        """Benchmark the time to load global configuration"""
+        self.loader["globals"]
+
+    def time_merge_soft_strategy(self):
+        """Benchmark the time to load and soft-merge configurations"""
+        self.loader.merge_strategy = {"catalog": "soft"}
+        self.loader["catalog"]
+
+base_catalog_resolvers = {
+    "dataset_4": {
+        "type": "pandas.CSVDataset",
+        "filepath": "${_basepath}/data4.csv",
+        "versioned": True,
+    },
+    "_basepath": "folder",
+}
+base_params_resolvers = {
+    "param_2": "${globals:global4}",
+    "param_3": "${my_custom_resolver:custom_resolver}",
+}
+def custom_resolver(value):
+    return f"custom_{value}"
+
+class TimeOmegaConfigLoaderAdvanced:
+
+    def setup(self):
+        # Setup temporary configuration directory with sample config files
+        self.temp_dir = tempfile.TemporaryDirectory()
+        self.conf_source = Path(self.temp_dir.name)
+        custom_resolvers = {"my_custom_resolver": custom_resolver}
+
+        base_catalog.update(base_catalog_resolvers)
+        base_params.update(base_params_resolvers)
+
+        # Create sample config files in the temp directory
+        _create_config_file("base", "catalog.yml", base_catalog)
+        _create_config_file("local", "catalog.yml", local_catalog)
+        _create_config_file("base", "parameters.yml", base_params)
+        _create_config_file("local", "parameters.yml", local_params)
+        _create_config_file("base", "globals.yml", base_globals)
+        _create_config_file("local", "globals.yml", local_globals)
+
+        # Instantiate the OmegaConfigLoader
+        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local', custom_resolvers=custom_resolvers)
+
+    def teardown(self):
+        # Cleanup temporary directory
+        self.temp_dir.cleanup()
+
+    def time_loading_catalog(self):
+        """Benchmark the time to load the catalog"""
+        self.loader["catalog"]
+
+    def time_loading_parameters(self):
+        """Benchmark the time to load environment-specific configuration"""
+        self.loader["parameters"]
+
+    def time_loading_globals(self):
+        """Benchmark the time to load global configuration"""
+        self.loader["globals"]
diff --git a/pyproject.toml b/pyproject.toml
index 6f8e44f7ff..e9ffa3abbc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -114,9 +114,9 @@ kedro = ["py.typed"]
 readme = {file = "README.md", content-type = "text/markdown"}
 version = {attr = "kedro.__version__"}
 
-[tool.ruff.format]
-exclude = ["**/templates", "features/steps/test_starter"]
-docstring-code-format = true
+#[tool.ruff.format]
+#exclude = ["**/templates", "features/steps/test_starter"]
+#docstring-code-format = true
 
 [tool.coverage.report]
 fail_under = 100

From 7618ac5c865f36be532889373df198564c1ecc26 Mon Sep 17 00:00:00 2001
From: Ankita Katiyar <ankitakatiyar2401@gmail.com>
Date: Mon, 14 Oct 2024 11:48:33 +0100
Subject: [PATCH 08/19] revert some changes

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 pyproject.toml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index e9ffa3abbc..6f8e44f7ff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -114,9 +114,9 @@ kedro = ["py.typed"]
 readme = {file = "README.md", content-type = "text/markdown"}
 version = {attr = "kedro.__version__"}
 
-#[tool.ruff.format]
-#exclude = ["**/templates", "features/steps/test_starter"]
-#docstring-code-format = true
+[tool.ruff.format]
+exclude = ["**/templates", "features/steps/test_starter"]
+docstring-code-format = true
 
 [tool.coverage.report]
 fail_under = 100

From 95628a3d54e93db6b9cde03ec6ccdbd6f3b55a9f Mon Sep 17 00:00:00 2001
From: Ankita Katiyar <ankitakatiyar2401@gmail.com>
Date: Mon, 14 Oct 2024 15:24:35 +0100
Subject: [PATCH 09/19] Update to use larger config

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 benchmarks/benchmark_ocl.py | 155 +++++++++++++++---------------------
 1 file changed, 65 insertions(+), 90 deletions(-)

diff --git a/benchmarks/benchmark_ocl.py b/benchmarks/benchmark_ocl.py
index a4b7dda5ed..f4bc2733b2 100644
--- a/benchmarks/benchmark_ocl.py
+++ b/benchmarks/benchmark_ocl.py
@@ -3,58 +3,37 @@
 
 from kedro.config import OmegaConfigLoader
 
-base_catalog = {
-    "dataset_1": {
-        "type": "pandas.CSVDataset",
-        "filepath": "data1.csv"
-    },
-    "dataset_2": {
-        "type": "pandas.CSVDataset",
-        "filepath": "data2.csv"
-    },
-    "dataset_3": {
-        "type": "pandas.CSVDataset",
-        "filepath": "data3.csv"
-    },
-    "dataset_4": {
-        "type": "pandas.CSVDataset",
-        "filepath": "data4.csv",
-        "versioned": True,
-    },
-}
-local_catalog = {
-    "dataset_4" : {
-        "filepath": "data4_local.csv",
-        "type": "pandas.CSVDataset",
-    },
-    "dataset_5" : {
-        "filepath": "data5_local.csv",
-        "type": "pandas.CSVDataset",
-    },
-}
-base_params = {
-    "param_1": "value_1",
-    "param_2": "value_2",
-    "param_3": "value_3",
-    "param_4": "value_4",
-}
-local_params = {
-    "param_4": "value_4_local",
-    "param_5": "value_5_local",
-}
-base_globals = {
-    "global1": "value1",
-    "global2": "value2",
-    "global3": "value3",
-    "global4": "value4",
-}
-local_globals = {
-    "global4": "value4_local",
-    "global5": "value5_local",
-}
-
-def _create_config_file(self, env, file_name, data):
-        env_path = self.conf_source / env
+
+# Helper functions to generate sample configuration data
+def generate_catalog(start_range, end_range, is_local=False, is_versioned=False, add_interpolation=False):
+    catalog = {}
+    for i in range(start_range, end_range + 1):
+        catalog[f"dataset_{i}"] = {
+            "type": "pandas.CSVDataset",
+            "filepath": f"data{i}{'_local' if is_local else ''}.csv"
+        }
+        if is_versioned:
+            catalog[f"dataset_{i}"]["versioned"] = True
+        if add_interpolation:
+            catalog[f"dataset_{i}"]["filepath"] = "${_basepath}" + catalog[f"dataset_{i}"]["filepath"]
+    return catalog
+
+def generate_params(start_range, end_range, is_local=False, add_globals=False):
+    if add_globals:
+        # Generate params with "${globals:global{i}}"
+        params = {f"param_{i}": f"${{globals:global_{i}}}" for i in range(start_range, end_range + 1)}
+    else:
+        # Generate params with "value_{i}" or "value_{i}_local"
+        params = {f"param_{i}": f"value_{i}{'_local' if is_local else ''}" for i in range(start_range, end_range + 1)}
+
+    return params
+
+def generate_globals(start_range, end_range, is_local=False):
+    globals_dict = {f"global_{i}": f"value{i}{'_local' if is_local else ''}" for i in range(start_range, end_range + 1)}
+    return globals_dict
+
+def _create_config_file(conf_source, env, file_name, data):
+        env_path = conf_source / env
         env_path.mkdir(parents=True, exist_ok=True)
         file_path = env_path / file_name
 
@@ -62,21 +41,35 @@ def _create_config_file(self, env, file_name, data):
         with open(file_path, "w") as f:
             yaml.dump(data, f)
 
+base_catalog = generate_catalog(1, 1000, is_versioned=True)
+local_catalog = generate_catalog(501, 1500, is_local=True)
+base_params = generate_params(1, 1000)
+local_params = generate_params(501, 1500, is_local=True)
+base_globals = generate_globals(1, 1000)
+local_globals = generate_globals(501, 1500, is_local=True)
 
-class TimeOmegaConfigLoader:
+base_catalog_with_interpolations = generate_catalog(1, 1000, is_versioned=True, add_interpolation=True)
+base_catalog_with_interpolations.update({"_basepath": "/path/to/data"})
+local_catalog_with_interpolations = generate_catalog(501, 1500, is_local=True, add_interpolation=True)
+local_catalog_with_interpolations.update({"_basepath": "/path/to/data"})
+
+base_params_with_globals = generate_params(1, 100, add_globals=True)
+# local_params_with_globals = generate_params(501, 1000, is_local=True, add_globals=True)
 
+
+class TimeOmegaConfigLoader:
     def setup(self):
         # Setup temporary configuration directory with sample config files
         self.temp_dir = tempfile.TemporaryDirectory()
         self.conf_source = Path(self.temp_dir.name)
 
         # Create sample config files in the temp directory
-        _create_config_file("base", "catalog.yml", base_catalog)
-        _create_config_file("local", "catalog.yml", local_catalog)
-        _create_config_file("base", "parameters.yml", base_params)
-        _create_config_file("local", "parameters.yml", local_params)
-        _create_config_file("base", "globals.yml", base_globals)
-        _create_config_file("local", "globals.yml", local_globals)
+        _create_config_file(self.conf_source, "base", "catalog.yml", base_catalog)
+        _create_config_file(self.conf_source, "local", "catalog.yml", local_catalog)
+        _create_config_file(self.conf_source, "base", "parameters.yml", base_params)
+        _create_config_file(self.conf_source, "local", "parameters.yml", local_params)
+        _create_config_file(self.conf_source, "base", "globals.yml", base_globals)
+        _create_config_file(self.conf_source, "local", "globals.yml", local_globals)
 
         # Instantiate the OmegaConfigLoader
         self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')
@@ -93,56 +86,36 @@ def time_loading_parameters(self):
         """Benchmark the time to load environment-specific configuration"""
         self.loader["parameters"]
 
-    def time_loading_parameters_runtime(self):
-        """Benchmark the time to load parameters with runtime configuration"""
-        self.loader.runtime_params = {"param_6": "value_6", "param_7": "value_7"}
-        self.loader["parameters"]
-
     def time_loading_globals(self):
         """Benchmark the time to load global configuration"""
         self.loader["globals"]
 
+    def time_loading_parameters_runtime(self):
+        """Benchmark the time to load parameters with runtime configuration"""
+        self.loader.runtime_params = generate_params(2001, 2002)
+        self.loader["parameters"]
+
     def time_merge_soft_strategy(self):
         """Benchmark the time to load and soft-merge configurations"""
         self.loader.merge_strategy = {"catalog": "soft"}
         self.loader["catalog"]
 
-base_catalog_resolvers = {
-    "dataset_4": {
-        "type": "pandas.CSVDataset",
-        "filepath": "${_basepath}/data4.csv",
-        "versioned": True,
-    },
-    "_basepath": "folder",
-}
-base_params_resolvers = {
-    "param_2": "${globals:global4}",
-    "param_3": "${my_custom_resolver:custom_resolver}",
-}
-def custom_resolver(value):
-    return f"custom_{value}"
 
 class TimeOmegaConfigLoaderAdvanced:
-
     def setup(self):
         # Setup temporary configuration directory with sample config files
         self.temp_dir = tempfile.TemporaryDirectory()
         self.conf_source = Path(self.temp_dir.name)
-        custom_resolvers = {"my_custom_resolver": custom_resolver}
-
-        base_catalog.update(base_catalog_resolvers)
-        base_params.update(base_params_resolvers)
 
         # Create sample config files in the temp directory
-        _create_config_file("base", "catalog.yml", base_catalog)
-        _create_config_file("local", "catalog.yml", local_catalog)
-        _create_config_file("base", "parameters.yml", base_params)
-        _create_config_file("local", "parameters.yml", local_params)
-        _create_config_file("base", "globals.yml", base_globals)
-        _create_config_file("local", "globals.yml", local_globals)
+        _create_config_file(self.conf_source, "base", "catalog.yml", base_catalog_with_interpolations)
+        _create_config_file(self.conf_source, "local", "catalog.yml", local_catalog_with_interpolations)
+        _create_config_file(self.conf_source, "base", "parameters.yml", base_params_with_globals)
+        _create_config_file(self.conf_source, "base", "globals.yml", base_globals)
+        _create_config_file(self.conf_source, "local", "globals.yml", local_globals)
 
         # Instantiate the OmegaConfigLoader
-        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local', custom_resolvers=custom_resolvers)
+        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')
 
     def teardown(self):
         # Cleanup temporary directory
@@ -159,3 +132,5 @@ def time_loading_parameters(self):
     def time_loading_globals(self):
         """Benchmark the time to load global configuration"""
         self.loader["globals"]
+
+

From b5b9bf523164926b883cb9227a7702f1e19afcaa Mon Sep 17 00:00:00 2001
From: Ankita Katiyar <ankitakatiyar2401@gmail.com>
Date: Tue, 15 Oct 2024 11:18:20 +0100
Subject: [PATCH 10/19] Update functions and docstrings

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 benchmarks/benchmark_ocl.py | 36 +++++++++++++++---------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/benchmarks/benchmark_ocl.py b/benchmarks/benchmark_ocl.py
index f4bc2733b2..4b866f9be1 100644
--- a/benchmarks/benchmark_ocl.py
+++ b/benchmarks/benchmark_ocl.py
@@ -5,7 +5,7 @@
 
 
 # Helper functions to generate sample configuration data
-def generate_catalog(start_range, end_range, is_local=False, is_versioned=False, add_interpolation=False):
+def _generate_catalog(start_range, end_range, is_local=False, is_versioned=False, add_interpolation=False):
     catalog = {}
     for i in range(start_range, end_range + 1):
         catalog[f"dataset_{i}"] = {
@@ -18,7 +18,7 @@ def generate_catalog(start_range, end_range, is_local=False, is_versioned=False,
             catalog[f"dataset_{i}"]["filepath"] = "${_basepath}" + catalog[f"dataset_{i}"]["filepath"]
     return catalog
 
-def generate_params(start_range, end_range, is_local=False, add_globals=False):
+def _generate_params(start_range, end_range, is_local=False, add_globals=False):
     if add_globals:
         # Generate params with "${globals:global{i}}"
         params = {f"param_{i}": f"${{globals:global_{i}}}" for i in range(start_range, end_range + 1)}
@@ -28,7 +28,7 @@ def generate_params(start_range, end_range, is_local=False, add_globals=False):
 
     return params
 
-def generate_globals(start_range, end_range, is_local=False):
+def _generate_globals(start_range, end_range, is_local=False):
     globals_dict = {f"global_{i}": f"value{i}{'_local' if is_local else ''}" for i in range(start_range, end_range + 1)}
     return globals_dict
 
@@ -41,20 +41,19 @@ def _create_config_file(conf_source, env, file_name, data):
         with open(file_path, "w") as f:
             yaml.dump(data, f)
 
-base_catalog = generate_catalog(1, 1000, is_versioned=True)
-local_catalog = generate_catalog(501, 1500, is_local=True)
-base_params = generate_params(1, 1000)
-local_params = generate_params(501, 1500, is_local=True)
-base_globals = generate_globals(1, 1000)
-local_globals = generate_globals(501, 1500, is_local=True)
+base_catalog = _generate_catalog(1, 1000, is_versioned=True)
+local_catalog = _generate_catalog(501, 1500, is_local=True)
+base_params = _generate_params(1, 1000)
+local_params = _generate_params(501, 1500, is_local=True)
+base_globals = _generate_globals(1, 1000)
+local_globals = _generate_globals(501, 1500, is_local=True)
 
-base_catalog_with_interpolations = generate_catalog(1, 1000, is_versioned=True, add_interpolation=True)
+base_catalog_with_interpolations = _generate_catalog(1, 1000, is_versioned=True, add_interpolation=True)
 base_catalog_with_interpolations.update({"_basepath": "/path/to/data"})
-local_catalog_with_interpolations = generate_catalog(501, 1500, is_local=True, add_interpolation=True)
+local_catalog_with_interpolations = _generate_catalog(501, 1500, is_local=True, add_interpolation=True)
 local_catalog_with_interpolations.update({"_basepath": "/path/to/data"})
 
-base_params_with_globals = generate_params(1, 100, add_globals=True)
-# local_params_with_globals = generate_params(501, 1000, is_local=True, add_globals=True)
+base_params_with_globals = _generate_params(1, 100, add_globals=True)
 
 
 class TimeOmegaConfigLoader:
@@ -83,7 +82,7 @@ def time_loading_catalog(self):
         self.loader["catalog"]
 
     def time_loading_parameters(self):
-        """Benchmark the time to load environment-specific configuration"""
+        """Benchmark the time to load the parameters"""
         self.loader["parameters"]
 
     def time_loading_globals(self):
@@ -92,7 +91,7 @@ def time_loading_globals(self):
 
     def time_loading_parameters_runtime(self):
         """Benchmark the time to load parameters with runtime configuration"""
-        self.loader.runtime_params = generate_params(2001, 2002)
+        self.loader.runtime_params = _generate_params(2001, 2002)
         self.loader["parameters"]
 
     def time_merge_soft_strategy(self):
@@ -112,7 +111,6 @@ def setup(self):
         _create_config_file(self.conf_source, "local", "catalog.yml", local_catalog_with_interpolations)
         _create_config_file(self.conf_source, "base", "parameters.yml", base_params_with_globals)
         _create_config_file(self.conf_source, "base", "globals.yml", base_globals)
-        _create_config_file(self.conf_source, "local", "globals.yml", local_globals)
 
         # Instantiate the OmegaConfigLoader
         self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')
@@ -126,11 +124,7 @@ def time_loading_catalog(self):
         self.loader["catalog"]
 
     def time_loading_parameters(self):
-        """Benchmark the time to load environment-specific configuration"""
+        """Benchmark the time to load parameters with global interpolation"""
         self.loader["parameters"]
 
-    def time_loading_globals(self):
-        """Benchmark the time to load global configuration"""
-        self.loader["globals"]
-
 

From 9737847954892c5cd4822982d1cdcf5844fb2b15 Mon Sep 17 00:00:00 2001
From: Ankita Katiyar <ankitakatiyar2401@gmail.com>
Date: Tue, 15 Oct 2024 15:52:24 +0100
Subject: [PATCH 11/19] Add performance tests for DataCatalog

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 asv.conf.json                       |  8 ++-
 benchmarks/benchmark_datacatalog.py | 79 +++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 1 deletion(-)
 create mode 100644 benchmarks/benchmark_datacatalog.py

diff --git a/asv.conf.json b/asv.conf.json
index 2cfcd3a057..b61a6c58a3 100644
--- a/asv.conf.json
+++ b/asv.conf.json
@@ -8,5 +8,11 @@
     "environment_type": "virtualenv",
     "show_commit_url": "http://github.com/kedro-org/kedro/commit/",
     "results_dir": ".asv/results",
-    "html_dir": ".asv/html"
+    "html_dir": ".asv/html",
+    "matrix": {
+        "req": {
+            "kedro-datasets": [],
+            "pandas": []
+        }
+    }
 }
diff --git a/benchmarks/benchmark_datacatalog.py b/benchmarks/benchmark_datacatalog.py
new file mode 100644
index 0000000000..d3b12e44ea
--- /dev/null
+++ b/benchmarks/benchmark_datacatalog.py
@@ -0,0 +1,79 @@
+import pandas as pd
+from kedro_datasets.pandas import CSVDataset
+
+from kedro.io import DataCatalog
+
+base_catalog = {
+    f"dataset_{i}": {
+        "type": "pandas.CSVDataset",
+        "filepath": f"data_{i}.csv",
+    } for i in range(1, 1001)
+}
+# Add datasets with the same filepath for loading
+base_catalog.update({
+    f"dataset_load_{i}": {
+        "type": "pandas.CSVDataset",
+        "filepath": "data.csv",
+    } for i in range(1, 1001)
+})
+# Add a factory pattern
+base_catalog.update({
+    "dataset_factory_{placeholder}": {
+        "type": "pandas.CSVDataset",
+        "filepath": "data_{placeholder}.csv",
+    }
+})
+
+class TimeDataCatalog:
+    def setup(self):
+        self.catalog = DataCatalog.from_config(base_catalog)
+        self.dataframe = pd.DataFrame({"column": [1, 2, 3]})
+        self.dataframe.to_csv("data.csv", index=False)
+        self.datasets = {
+            f"dataset_new_{i}": CSVDataset(filepath="data.csv") for i in range(1, 1001)
+        }
+        self.feed_dict = {
+            f"param_{i}": i for i in range(1, 1001)
+        }
+
+
+    def time_save(self):
+        """Benchmark the time to save datasets"""
+        for i in range(1,1001):
+            self.catalog.save(f"dataset_{i}", self.dataframe)
+
+    def time_load(self):
+        """Benchmark the time to load datasets"""
+        for i in range(1,1001):
+            self.catalog.load(f"dataset_load_{i}")
+
+    def time_exists(self):
+        """Benchmark the time to check if datasets exist"""
+        for i in range(1,1001):
+            self.catalog.exists(f"dataset_{i}")
+
+    def time_release(self):
+        """Benchmark the time to release datasets"""
+        for i in range(1,1001):
+            self.catalog.release(f"dataset_{i}")
+
+    def time_add_all(self):
+        """Benchmark the time to add all datasets"""
+        self.catalog.add_all(self.datasets)
+
+    def time_feed_dict(self):
+        """Benchmark the time to add feed dict"""
+        self.catalog.add_feed_dict(self.feed_dict)
+
+    def time_list(self):
+        """Benchmark the time to list all datasets"""
+        self.catalog.list()
+
+    def time_shallow_copy(self):
+        """Benchmark the time to shallow copy the catalog"""
+        self.catalog.shallow_copy()
+
+    def time_resolve_factory(self):
+        """Benchmark the time to resolve factory"""
+        for i in range(1,1001):
+            self.catalog._get_dataset(f"dataset_factory_{i}")

From 3d1cad4a44713a2941e50ca16c8511349df7b3ec Mon Sep 17 00:00:00 2001
From: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com>
Date: Tue, 15 Oct 2024 11:13:22 +0100
Subject: [PATCH 12/19] Update mypy ignore messages (#4228)

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 kedro/framework/cli/utils.py       | 2 +-
 kedro/framework/context/context.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kedro/framework/cli/utils.py b/kedro/framework/cli/utils.py
index 1b50408cc5..ca2acfab31 100644
--- a/kedro/framework/cli/utils.py
+++ b/kedro/framework/cli/utils.py
@@ -422,7 +422,7 @@ def find_run_command(package_name: str) -> Callable:
         # use run command from `kedro.framework.cli.project`
         from kedro.framework.cli.project import run
 
-        return run  # type: ignore[no-any-return]
+        return run  # type: ignore[return-value]
     # fail badly if cli.py exists, but has no `cli` in it
     if not hasattr(project_cli, "cli"):
         raise KedroCliError(f"Cannot load commands from {package_name}.cli")
diff --git a/kedro/framework/context/context.py b/kedro/framework/context/context.py
index 5c14cbae38..0b44056374 100644
--- a/kedro/framework/context/context.py
+++ b/kedro/framework/context/context.py
@@ -207,7 +207,7 @@ def params(self) -> dict[str, Any]:
             # Merge nested structures
             params = OmegaConf.merge(params, self._extra_params)
 
-        return OmegaConf.to_container(params) if OmegaConf.is_config(params) else params  # type: ignore[no-any-return]
+        return OmegaConf.to_container(params) if OmegaConf.is_config(params) else params  # type: ignore[return-value]
 
     def _get_catalog(
         self,

From 9fc67118060075d2d52c4f3596cc5e36f7810dd9 Mon Sep 17 00:00:00 2001
From: Dmitry Sorokin <40151847+DimedS@users.noreply.github.com>
Date: Tue, 15 Oct 2024 13:48:45 +0100
Subject: [PATCH 13/19] Revise Kedro project structure docs (#4208)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Update project structure docs
---------

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>
Signed-off-by: Dmitry Sorokin <40151847+DimedS@users.noreply.github.com>
Co-authored-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>
Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 docs/source/get_started/kedro_concepts.md | 67 ++++++++++++++++++-----
 1 file changed, 52 insertions(+), 15 deletions(-)

diff --git a/docs/source/get_started/kedro_concepts.md b/docs/source/get_started/kedro_concepts.md
index ffe602a7e2..44f54ac4d8 100644
--- a/docs/source/get_started/kedro_concepts.md
+++ b/docs/source/get_started/kedro_concepts.md
@@ -63,20 +63,53 @@ The Kedro Data Catalog is the registry of all data sources that the project can
 
 One of the main advantages of working with Kedro projects is that they follow a default template that makes collaboration straightforward. Kedro uses semantic naming to set up a default project with specific folders to store datasets, notebooks, configuration and source code. We advise you to retain the default Kedro project structure to make it easy to share your projects with other Kedro users, although you can adapt the folder structure if you need to.
 
-The default Kedro project structure is as follows:
+Starting from Kedro 0.19, when you create a new project with `kedro new`, you can customise the structure by selecting which tools to include. Depending on your choices, the resulting structure may vary. Below, we outline the default project structure when all tools are selected and give an example with no tools selected.
+
+### Default Kedro project structure (all tools selected)
+
+If you select all tools during project creation, your project structure will look like this:
+
+```
+project-dir          # Parent directory of the template
+├── conf             # Project configuration files
+├── data             # Local project data (not committed to version control)
+├── docs             # Project documentation
+├── notebooks        # Project-related Jupyter notebooks (can be used for experimental code before moving the code to src)
+├── src              # Project source code
+├── tests            # Folder containing unit and integration tests
+├── .gitignore       # Hidden file that prevents staging of unnecessary files to `git`
+├── pyproject.toml   # Identifies the project root and contains configuration information
+├── README.md        # Project README
+├── requirements.txt # Project dependencies file
+```
+
+### Example Kedro project structure (no tools selected)
+
+If you select no tools, the resulting structure will be simpler:
 
 ```
-project-dir         # Parent directory of the template
-├── .gitignore      # Hidden file that prevents staging of unnecessary files to `git`
-├── conf            # Project configuration files
-├── data            # Local project data (not committed to version control)
-├── docs            # Project documentation
-├── notebooks       # Project-related Jupyter notebooks (can be used for experimental code before moving the code to src)
-├── pyproject.toml  # Identifies the project root and contains configuration information
-├── README.md       # Project README
-└── src             # Project source code
+project-dir          # Parent directory of the template
+├── conf             # Project configuration files
+├── notebooks        # Project-related Jupyter notebooks (can be used for experimental code before moving the code to src)
+├── src              # Project source code
+├── .gitignore       # Hidden file that prevents staging of unnecessary files to `git`
+├── pyproject.toml   # Identifies the project root and contains configuration information
+├── README.md        # Project README
+├── requirements.txt # Project dependencies file
 ```
 
+### Tool selection and resulting structure
+
+During `kedro new`, you can select which [tools to include in your project](../starters/new_project_tools.md). Each tool adds specific files or folders to the project structure:
+
+- **Lint (Ruff)**: Modifies the `pyproject.toml` file to include Ruff configuration settings for linting. It sets up `ruff` under `[tool.ruff]`, defines options like line length, selected rules, and ignored rules, and includes `ruff` as an optional `dev` dependency.
+- **Test (Pytest)**: Adds a `tests` folder for storing unit and integration tests, helping to maintain code quality and ensuring that changes in the codebase do not introduce bugs. For more information about testing in Kedro, visit the [Automated Testing Guide](../development/automated_testing.md).
+- **Log**: Allows specific logging configurations by including a `logging.yml` file inside the `conf` folder. For more information about logging customisation in Kedro, visit the [Logging Customisation Guide](../logging/index.md).
+- **Docs (Sphinx)**: Adds a `docs` folder with a Sphinx documentation setup. This folder is typically used to generate technical documentation for the project.
+- **Data Folder**: Adds a `data` folder structure for managing project data. The `data` folder contains multiple subfolders to store project data. We recommend you put raw data into `raw` and move processed data to other subfolders, as outlined [in this data engineering article](https://towardsdatascience.com/the-importance-of-layered-thinking-in-data-engineering-a09f685edc71).
+- **PySpark**: Adds PySpark-specific configuration files.
+- **Kedro-Viz**: Adds Kedro's native visualisation tool with [experiment tracking setup.](https://docs.kedro.org/projects/kedro-viz/en/stable/experiment_tracking.html)
+
 ### `conf`
 
 The `conf` folder contains two subfolders for storing configuration information: `base` and `local`.
@@ -88,7 +121,7 @@ Use the `base` subfolder for project-specific settings to share across different
 The folder contains three files for the example, but you can add others as you require:
 
 -   `catalog.yml` - [Configures the Data Catalog](../data/data_catalog.md#use-the-data-catalog-within-kedro-configuration) with the file paths and load/save configuration needed for different datasets
--   `logging.yml` - Uses Python's default [`logging`](https://docs.python.org/3/library/logging.html) library to set up logging
+-   `logging.yml` - Uses Python's default [`logging`](https://docs.python.org/3/library/logging.html) library to set up logging (only added if the Log tool is selected).
 -   `parameters.yml` - Allows you to define parameters for machine learning experiments, for example, train/test split and the number of iterations
 
 #### `conf/local`
@@ -99,10 +132,14 @@ Use the `local` subfolder for **settings that should not be shared**, such as ac
 
 By default, Kedro creates one file, `credentials.yml`, in `conf/local`.
 
-### `data`
-
-The `data` folder contains multiple subfolders to store project data. We recommend you put raw data into `raw` and move processed data to other subfolders according to the [commonly accepted data engineering convention](https://towardsdatascience.com/the-importance-of-layered-thinking-in-data-engineering-a09f685edc71).
-
 ### `src`
 
 This subfolder contains the project's source code.
+
+### Customising your project structure
+
+While the default Kedro structure is recommended for collaboration and standardisation, it is possible to adapt the folder structure if necessary. This flexibility allows you to tailor the project to your needs while maintaining a consistent and recognisable structure.
+
+The only technical requirement when organising code is that the `pipeline_registry.py` and `settings.py` files must remain in the `<your_project>/src/<your_project>` directory, where they are created by default.
+
+The `pipeline_registry.py` file must include a `register_pipelines()` function that returns a `dict[str, Pipeline]`, which maps pipeline names to their corresponding `Pipeline` objects.

From b6587e0b0dde727fc1542530c133541c7cae879f Mon Sep 17 00:00:00 2001
From: Hyewon Choi <76198373+hyew0nChoi@users.noreply.github.com>
Date: Tue, 15 Oct 2024 23:20:42 +0900
Subject: [PATCH 14/19] Update CLI autocompletion docs with new Click syntax
 (#4213)

* Update CLI autocompletion docs with new Click syntax

Updated the autocompletion setup instructions for Bash, Zsh, and Fish shells to reflect the latest Click 8.1 syntax. Changed Fish shell completion script path to ~/.config/fish/completions/kedro.fish for correct placement.

Signed-off-by: hyew0nChoi <statistic9935@naver.com>
Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 RELEASE.md                                    | 4 ++++
 docs/source/development/commands_reference.md | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index 59cace8a36..a5e34a6ba8 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -4,7 +4,10 @@
 ## Bug fixes and other changes
 ## Breaking changes to the API
 ## Documentation changes
+* Updated CLI autocompletion docs with new Click syntax.
+
 ## Community contributions
+* [Hyewon Choi](https://github.com/hyew0nChoi)
 
 
 # Release 0.19.9
@@ -38,6 +41,7 @@
 * Fix logo on PyPI page.
 * Minor language/styling updates.
 
+
 ## Community contributions
 * [Puneet](https://github.com/puneeter)
 * [ethanknights](https://github.com/ethanknights)
diff --git a/docs/source/development/commands_reference.md b/docs/source/development/commands_reference.md
index 12a90371f7..d66d4ffcc6 100644
--- a/docs/source/development/commands_reference.md
+++ b/docs/source/development/commands_reference.md
@@ -16,7 +16,7 @@ echo $0
 Add the following to your <code>~/.bashrc</code> (or just run it on the command line):
 
 ```bash
-eval "$(_KEDRO_COMPLETE=source kedro)"
+eval "$(_KEDRO_COMPLETE=bash_source kedro)"
 ```
 </details>
 
@@ -26,7 +26,7 @@ eval "$(_KEDRO_COMPLETE=source kedro)"
 Add the following to <code>~/.zshrc</code>:
 
 ```bash
-eval "$(_KEDRO_COMPLETE=source_zsh kedro)"
+eval "$(_KEDRO_COMPLETE=zsh_source kedro)"
 ```
 </details>
 
@@ -36,7 +36,7 @@ eval "$(_KEDRO_COMPLETE=source_zsh kedro)"
 Add the following to <code>~/.config/fish/completions/foo-bar.fish</code>:
 
 ```bash
-eval (env _KEDRO_COMPLETE=source_fish kedro)
+eval (env _KEDRO_COMPLETE=fish_source kedro)
 ```
 </details>
 

From 062aba3534631a83b1b3fa83c8845c8cfd47fe78 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 15 Oct 2024 15:51:05 +0000
Subject: [PATCH 15/19] Bump import-linter from 2.0 to 2.1 (#4226)

Bumps [import-linter](https://github.com/seddonym/import-linter) from 2.0 to 2.1.
- [Changelog](https://github.com/seddonym/import-linter/blob/master/CHANGELOG.rst)
- [Commits](https://github.com/seddonym/import-linter/compare/v2.0...v2.1)

---
updated-dependencies:
- dependency-name: import-linter
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6f8e44f7ff..97124a5813 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,7 +55,7 @@ dynamic = ["readme", "version"]
 test = [
     "behave==1.2.6",
     "coverage[toml]",
-    "import-linter==2.0",
+    "import-linter==2.1",
     "ipylab>=1.0.0",
     "ipython~=8.10",
     "jupyterlab_server>=2.11.1",

From 56aefae77e13dfd93e0b8c1fddb6fde0812fe1cb Mon Sep 17 00:00:00 2001
From: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com>
Date: Wed, 16 Oct 2024 15:10:47 +0100
Subject: [PATCH 16/19] Performance test for `OmegaConfigLoader` (#4225)

* first pass doesn't work yet

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Update ocl tests

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* revert some changes

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Update to use larger config

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Update functions and docstrings

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* lint

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

---------

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 benchmarks/benchmark_ocl.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/benchmarks/benchmark_ocl.py b/benchmarks/benchmark_ocl.py
index 4b866f9be1..5c38b61901 100644
--- a/benchmarks/benchmark_ocl.py
+++ b/benchmarks/benchmark_ocl.py
@@ -126,5 +126,3 @@ def time_loading_catalog(self):
     def time_loading_parameters(self):
         """Benchmark the time to load parameters with global interpolation"""
         self.loader["parameters"]
-
-

From f981b9bbe4969af30226dcce9c8e6c43907d227a Mon Sep 17 00:00:00 2001
From: Ankita Katiyar <ankitakatiyar2401@gmail.com>
Date: Wed, 16 Oct 2024 17:40:49 +0100
Subject: [PATCH 17/19] Add a test for init and fix indent

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 ...rtualenv-py3.11-kedro-datasets-pandas.json |   1 +
 .asv/results/M-WFLM6NH6G5/machine.json        |   9 +
 .asv/results/benchmarks.json                  | 273 ++++++++++++++++++
 benchmarks/benchmark_datacatalog.py           |   3 +
 benchmarks/benchmark_ocl.py                   |  12 +-
 5 files changed, 292 insertions(+), 6 deletions(-)
 create mode 100644 .asv/results/M-WFLM6NH6G5/f594c8bc-virtualenv-py3.11-kedro-datasets-pandas.json
 create mode 100644 .asv/results/M-WFLM6NH6G5/machine.json
 create mode 100644 .asv/results/benchmarks.json

diff --git a/.asv/results/M-WFLM6NH6G5/f594c8bc-virtualenv-py3.11-kedro-datasets-pandas.json b/.asv/results/M-WFLM6NH6G5/f594c8bc-virtualenv-py3.11-kedro-datasets-pandas.json
new file mode 100644
index 0000000000..b7dff2c438
--- /dev/null
+++ b/.asv/results/M-WFLM6NH6G5/f594c8bc-virtualenv-py3.11-kedro-datasets-pandas.json
@@ -0,0 +1 @@
+{"commit_hash": "f594c8bcd43100b216ac104d1e670ca4d5783096", "env_name": "virtualenv-py3.11-kedro-datasets-pandas", "date": 1729087847000, "params": {"arch": "arm64", "cpu": "Apple M1 Max", "machine": "M-WFLM6NH6G5", "num_cpu": "10", "os": "Darwin 23.6.0", "ram": "34359738368", "python": "3.11", "kedro-datasets": "", "pandas": ""}, "python": "3.11", "requirements": {"kedro-datasets": "", "pandas": ""}, "env_vars": {}, "result_columns": ["result", "params", "version", "started_at", "duration", "stats_ci_99_a", "stats_ci_99_b", "stats_q_25", "stats_q_75", "stats_number", "stats_repeat", "samples", "profile"], "results": {"benchmark_datacatalog.TimeDataCatalog.time_add_all": [[0.04391904201474972], [], "f470854de9a319e47c6bab999cdb3c7662341b063447a6866500b2e70b2c5ed8", 1729096807842, 1.0691, [-Infinity], [Infinity], [0.043919], [0.043919], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_exists": [[0.026259125006617978], [], "9bbce85f01a2cfbb5569bc2ba076dd22b662d5d17db4901cd5269d14dbce3ea6", 1729096808911, 0.93221, [-Infinity], [Infinity], [0.026259], [0.026259], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_feed_dict": [[0.034821834007743746], [], "0101ab38b04d7b27eda18738a82f5f06e48604c6e91d0e10bae33327bb588f69", 1729096809843, 0.8628, [-Infinity], [Infinity], [0.034822], [0.034822], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_initialise": [[0.0948639580165036], [], "9e460ed25ea64f63f905b3f3a01a817e5daa00c81390b0cdfc25fbad0ae85ea6", 1729096810706, 1.0807, [-Infinity], [Infinity], [0.094864], [0.094864], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_list": [[1.4999997802078724e-05], [], "4a7ae456f2349941bdbc595b3919284633da1da166cf1394660a0399ec618687", 1729096811787, 0.82309, [-Infinity], [Infinity], [1.5e-05], [1.5e-05], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_load": [[0.20476879199850373], [], "96bd6914ea6ed937ae958177afd17861ccf3ab1481a5d4d2ed8002dbc5d1131e", 1729096812610, 0.86483, [-Infinity], [Infinity], [0.20477], [0.20477], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_release": [[0.00888583398773335], [], "fa49ed3249b0319f92a7d6309f2a58ed8595c86141b16768cd575326a2d28d77", 1729096813475, 0.76429, [-Infinity], [Infinity], [0.0088858], [0.0088858], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_resolve_factory": [[0.09763062497950159], [], "c051d31d513ef455328bb051eafacb1cc06d9f84dd613ee2e0bee3440bbab467", 1729096814240, 0.83693, [-Infinity], [Infinity], [0.097631], [0.097631], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_save": [[0.5207992079958785], [], "4ea897eb28bd91fc7cf8da6e2679bf608b909c8db9ebdffd97f3bf19b275a809", 1729096815077, 1.6094, [-Infinity], [Infinity], [0.5208], [0.5208], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_shallow_copy": [[0.031129041977692395], [], "64ead39024c492d18b91a21c23a9d3f1323533cd37bd53934d54701ecb259762", 1729096816687, 0.68384, [-Infinity], [Infinity], [0.031129], [0.031129], [1], [1]], "benchmark_dummy.TimeSuite.time_keys": [[3.2910029403865337e-06], [], "86e015a3c40c52da31e4185fff7c7176c38c5e1e1e4aba71912db0b388225191", 1729096817371, 0.18749, [-Infinity], [Infinity], [3.291e-06], [3.291e-06], [1], [1]], "benchmark_ocl.TimeOmegaConfigLoader.time_loading_catalog": [[0.4770808340108488], [], "3ccff2348faeaf3038548994686b45deeaa3c7c46df2270a8a1f697e7401ae5a", 1729096817558, 1.0252, [-Infinity], [Infinity], [0.47708], [0.47708], [1], [1]], "benchmark_ocl.TimeOmegaConfigLoader.time_loading_globals": [[0.11477499999455176], [], "d42dda2b001097642dc790de01ab15e3f1f11426f6bfc5affbc1c658248f32be", 1729096818583, 0.75575, [-Infinity], [Infinity], [0.11477], [0.11477], [1], [1]], "benchmark_ocl.TimeOmegaConfigLoader.time_loading_parameters": [[0.12353595800232142], [], "3187d47ad3445bdf83439512e124e3cde01f0503a3ffa7db9ca7a02e6bc2f7f2", 1729096819339, 0.78861, [-Infinity], [Infinity], [0.12354], [0.12354], [1], [1]], "benchmark_ocl.TimeOmegaConfigLoader.time_loading_parameters_runtime": [[0.13527949998388067], [], "153be6afe75261c83d15bbc165c10b98af15d3489c722c0f7f8e5c0ce3ca2d59", 1729096820128, 0.77955, [-Infinity], [Infinity], [0.13528], [0.13528], [1], [1]], "benchmark_ocl.TimeOmegaConfigLoader.time_merge_soft_strategy": [[0.7280506670067552], [], "317897f43311426ea9b688e3019361eb5bb1f61f60eca4f763d7a8ec38265ea2", 1729096820908, 1.4309, [-Infinity], [Infinity], [0.72805], [0.72805], [1], [1]], "benchmark_ocl.TimeOmegaConfigLoaderAdvanced.time_loading_catalog": [[0.6754177919938229], [], "5499c39a6750c5d527f1a3e8a747fdd5b3128af31640d9d7ee9c72be261e344a", 1729096822339, 1.3692, [-Infinity], [Infinity], [0.67542], [0.67542], [1], [1]], "benchmark_ocl.TimeOmegaConfigLoaderAdvanced.time_loading_parameters": [[1.9011982079828158], [], "f74ef4eead7a35df856006dbf9e1b72b61ba36b34767525f55bf8c5eabb343f1", 1729096823708, 2.5663, [-Infinity], [Infinity], [1.9012], [1.9012], [1], [1]]}, "durations": {}, "version": 2}
\ No newline at end of file
diff --git a/.asv/results/M-WFLM6NH6G5/machine.json b/.asv/results/M-WFLM6NH6G5/machine.json
new file mode 100644
index 0000000000..3fe4186a75
--- /dev/null
+++ b/.asv/results/M-WFLM6NH6G5/machine.json
@@ -0,0 +1,9 @@
+{
+    "arch": "arm64",
+    "cpu": "Apple M1 Max",
+    "machine": "M-WFLM6NH6G5",
+    "num_cpu": "10",
+    "os": "Darwin 23.6.0",
+    "ram": "34359738368",
+    "version": 1
+}
\ No newline at end of file
diff --git a/.asv/results/benchmarks.json b/.asv/results/benchmarks.json
new file mode 100644
index 0000000000..347fbe8942
--- /dev/null
+++ b/.asv/results/benchmarks.json
@@ -0,0 +1,273 @@
+{
+    "benchmark_datacatalog.TimeDataCatalog.time_add_all": {
+        "code": "class TimeDataCatalog:\n    def time_add_all(self):\n        \"\"\"Benchmark the time to add all datasets\"\"\"\n        self.catalog.add_all(self.datasets)\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
+        "min_run_count": 2,
+        "name": "benchmark_datacatalog.TimeDataCatalog.time_add_all",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "f470854de9a319e47c6bab999cdb3c7662341b063447a6866500b2e70b2c5ed8",
+        "warmup_time": -1
+    },
+    "benchmark_datacatalog.TimeDataCatalog.time_exists": {
+        "code": "class TimeDataCatalog:\n    def time_exists(self):\n        \"\"\"Benchmark the time to check if datasets exist\"\"\"\n        for i in range(1,1001):\n            self.catalog.exists(f\"dataset_{i}\")\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
+        "min_run_count": 2,
+        "name": "benchmark_datacatalog.TimeDataCatalog.time_exists",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "9bbce85f01a2cfbb5569bc2ba076dd22b662d5d17db4901cd5269d14dbce3ea6",
+        "warmup_time": -1
+    },
+    "benchmark_datacatalog.TimeDataCatalog.time_feed_dict": {
+        "code": "class TimeDataCatalog:\n    def time_feed_dict(self):\n        \"\"\"Benchmark the time to add feed dict\"\"\"\n        self.catalog.add_feed_dict(self.feed_dict)\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
+        "min_run_count": 2,
+        "name": "benchmark_datacatalog.TimeDataCatalog.time_feed_dict",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "0101ab38b04d7b27eda18738a82f5f06e48604c6e91d0e10bae33327bb588f69",
+        "warmup_time": -1
+    },
+    "benchmark_datacatalog.TimeDataCatalog.time_initialise": {
+        "code": "class TimeDataCatalog:\n    def time_initialise(self):\n        \"\"\"Benchmark the time to initialise the catalog\"\"\"\n        DataCatalog.from_config(base_catalog)\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
+        "min_run_count": 2,
+        "name": "benchmark_datacatalog.TimeDataCatalog.time_initialise",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "9e460ed25ea64f63f905b3f3a01a817e5daa00c81390b0cdfc25fbad0ae85ea6",
+        "warmup_time": -1
+    },
+    "benchmark_datacatalog.TimeDataCatalog.time_list": {
+        "code": "class TimeDataCatalog:\n    def time_list(self):\n        \"\"\"Benchmark the time to list all datasets\"\"\"\n        self.catalog.list()\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
+        "min_run_count": 2,
+        "name": "benchmark_datacatalog.TimeDataCatalog.time_list",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "4a7ae456f2349941bdbc595b3919284633da1da166cf1394660a0399ec618687",
+        "warmup_time": -1
+    },
+    "benchmark_datacatalog.TimeDataCatalog.time_load": {
+        "code": "class TimeDataCatalog:\n    def time_load(self):\n        \"\"\"Benchmark the time to load datasets\"\"\"\n        for i in range(1,1001):\n            self.catalog.load(f\"dataset_load_{i}\")\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
+        "min_run_count": 2,
+        "name": "benchmark_datacatalog.TimeDataCatalog.time_load",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "96bd6914ea6ed937ae958177afd17861ccf3ab1481a5d4d2ed8002dbc5d1131e",
+        "warmup_time": -1
+    },
+    "benchmark_datacatalog.TimeDataCatalog.time_release": {
+        "code": "class TimeDataCatalog:\n    def time_release(self):\n        \"\"\"Benchmark the time to release datasets\"\"\"\n        for i in range(1,1001):\n            self.catalog.release(f\"dataset_{i}\")\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
+        "min_run_count": 2,
+        "name": "benchmark_datacatalog.TimeDataCatalog.time_release",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "fa49ed3249b0319f92a7d6309f2a58ed8595c86141b16768cd575326a2d28d77",
+        "warmup_time": -1
+    },
+    "benchmark_datacatalog.TimeDataCatalog.time_resolve_factory": {
+        "code": "class TimeDataCatalog:\n    def time_resolve_factory(self):\n        \"\"\"Benchmark the time to resolve factory\"\"\"\n        for i in range(1,1001):\n            self.catalog._get_dataset(f\"dataset_factory_{i}\")\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
+        "min_run_count": 2,
+        "name": "benchmark_datacatalog.TimeDataCatalog.time_resolve_factory",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "c051d31d513ef455328bb051eafacb1cc06d9f84dd613ee2e0bee3440bbab467",
+        "warmup_time": -1
+    },
+    "benchmark_datacatalog.TimeDataCatalog.time_save": {
+        "code": "class TimeDataCatalog:\n    def time_save(self):\n        \"\"\"Benchmark the time to save datasets\"\"\"\n        for i in range(1,1001):\n            self.catalog.save(f\"dataset_{i}\", self.dataframe)\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
+        "min_run_count": 2,
+        "name": "benchmark_datacatalog.TimeDataCatalog.time_save",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "4ea897eb28bd91fc7cf8da6e2679bf608b909c8db9ebdffd97f3bf19b275a809",
+        "warmup_time": -1
+    },
+    "benchmark_datacatalog.TimeDataCatalog.time_shallow_copy": {
+        "code": "class TimeDataCatalog:\n    def time_shallow_copy(self):\n        \"\"\"Benchmark the time to shallow copy the catalog\"\"\"\n        self.catalog.shallow_copy()\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
+        "min_run_count": 2,
+        "name": "benchmark_datacatalog.TimeDataCatalog.time_shallow_copy",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "64ead39024c492d18b91a21c23a9d3f1323533cd37bd53934d54701ecb259762",
+        "warmup_time": -1
+    },
+    "benchmark_dummy.TimeSuite.time_keys": {
+        "code": "class TimeSuite:\n    def time_keys(self):\n        for key in self.d.keys():\n            pass\n\n    def setup(self):\n        self.d = {}\n        for x in range(500):\n            self.d[x] = None",
+        "min_run_count": 2,
+        "name": "benchmark_dummy.TimeSuite.time_keys",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "86e015a3c40c52da31e4185fff7c7176c38c5e1e1e4aba71912db0b388225191",
+        "warmup_time": -1
+    },
+    "benchmark_ocl.TimeOmegaConfigLoader.time_loading_catalog": {
+        "code": "class TimeOmegaConfigLoader:\n    def time_loading_catalog(self):\n        \"\"\"Benchmark the time to load the catalog\"\"\"\n        self.loader[\"catalog\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n    \n        # Create sample config files in the temp directory\n        _create_config_file(self.conf_source, \"base\", \"catalog.yml\", base_catalog)\n        _create_config_file(self.conf_source, \"local\", \"catalog.yml\", local_catalog)\n        _create_config_file(self.conf_source, \"base\", \"parameters.yml\", base_params)\n        _create_config_file(self.conf_source, \"local\", \"parameters.yml\", local_params)\n        _create_config_file(self.conf_source, \"base\", \"globals.yml\", base_globals)\n        _create_config_file(self.conf_source, \"local\", \"globals.yml\", local_globals)\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')",
+        "min_run_count": 2,
+        "name": "benchmark_ocl.TimeOmegaConfigLoader.time_loading_catalog",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "3ccff2348faeaf3038548994686b45deeaa3c7c46df2270a8a1f697e7401ae5a",
+        "warmup_time": -1
+    },
+    "benchmark_ocl.TimeOmegaConfigLoader.time_loading_globals": {
+        "code": "class TimeOmegaConfigLoader:\n    def time_loading_globals(self):\n        \"\"\"Benchmark the time to load global configuration\"\"\"\n        self.loader[\"globals\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n    \n        # Create sample config files in the temp directory\n        _create_config_file(self.conf_source, \"base\", \"catalog.yml\", base_catalog)\n        _create_config_file(self.conf_source, \"local\", \"catalog.yml\", local_catalog)\n        _create_config_file(self.conf_source, \"base\", \"parameters.yml\", base_params)\n        _create_config_file(self.conf_source, \"local\", \"parameters.yml\", local_params)\n        _create_config_file(self.conf_source, \"base\", \"globals.yml\", base_globals)\n        _create_config_file(self.conf_source, \"local\", \"globals.yml\", local_globals)\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')",
+        "min_run_count": 2,
+        "name": "benchmark_ocl.TimeOmegaConfigLoader.time_loading_globals",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "d42dda2b001097642dc790de01ab15e3f1f11426f6bfc5affbc1c658248f32be",
+        "warmup_time": -1
+    },
+    "benchmark_ocl.TimeOmegaConfigLoader.time_loading_parameters": {
+        "code": "class TimeOmegaConfigLoader:\n    def time_loading_parameters(self):\n        \"\"\"Benchmark the time to load the parameters\"\"\"\n        self.loader[\"parameters\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n    \n        # Create sample config files in the temp directory\n        _create_config_file(self.conf_source, \"base\", \"catalog.yml\", base_catalog)\n        _create_config_file(self.conf_source, \"local\", \"catalog.yml\", local_catalog)\n        _create_config_file(self.conf_source, \"base\", \"parameters.yml\", base_params)\n        _create_config_file(self.conf_source, \"local\", \"parameters.yml\", local_params)\n        _create_config_file(self.conf_source, \"base\", \"globals.yml\", base_globals)\n        _create_config_file(self.conf_source, \"local\", \"globals.yml\", local_globals)\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')",
+        "min_run_count": 2,
+        "name": "benchmark_ocl.TimeOmegaConfigLoader.time_loading_parameters",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "3187d47ad3445bdf83439512e124e3cde01f0503a3ffa7db9ca7a02e6bc2f7f2",
+        "warmup_time": -1
+    },
+    "benchmark_ocl.TimeOmegaConfigLoader.time_loading_parameters_runtime": {
+        "code": "class TimeOmegaConfigLoader:\n    def time_loading_parameters_runtime(self):\n        \"\"\"Benchmark the time to load parameters with runtime configuration\"\"\"\n        self.loader.runtime_params = _generate_params(2001, 2002)\n        self.loader[\"parameters\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n    \n        # Create sample config files in the temp directory\n        _create_config_file(self.conf_source, \"base\", \"catalog.yml\", base_catalog)\n        _create_config_file(self.conf_source, \"local\", \"catalog.yml\", local_catalog)\n        _create_config_file(self.conf_source, \"base\", \"parameters.yml\", base_params)\n        _create_config_file(self.conf_source, \"local\", \"parameters.yml\", local_params)\n        _create_config_file(self.conf_source, \"base\", \"globals.yml\", base_globals)\n        _create_config_file(self.conf_source, \"local\", \"globals.yml\", local_globals)\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')",
+        "min_run_count": 2,
+        "name": "benchmark_ocl.TimeOmegaConfigLoader.time_loading_parameters_runtime",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "153be6afe75261c83d15bbc165c10b98af15d3489c722c0f7f8e5c0ce3ca2d59",
+        "warmup_time": -1
+    },
+    "benchmark_ocl.TimeOmegaConfigLoader.time_merge_soft_strategy": {
+        "code": "class TimeOmegaConfigLoader:\n    def time_merge_soft_strategy(self):\n        \"\"\"Benchmark the time to load and soft-merge configurations\"\"\"\n        self.loader.merge_strategy = {\"catalog\": \"soft\"}\n        self.loader[\"catalog\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n    \n        # Create sample config files in the temp directory\n        _create_config_file(self.conf_source, \"base\", \"catalog.yml\", base_catalog)\n        _create_config_file(self.conf_source, \"local\", \"catalog.yml\", local_catalog)\n        _create_config_file(self.conf_source, \"base\", \"parameters.yml\", base_params)\n        _create_config_file(self.conf_source, \"local\", \"parameters.yml\", local_params)\n        _create_config_file(self.conf_source, \"base\", \"globals.yml\", base_globals)\n        _create_config_file(self.conf_source, \"local\", \"globals.yml\", local_globals)\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')",
+        "min_run_count": 2,
+        "name": "benchmark_ocl.TimeOmegaConfigLoader.time_merge_soft_strategy",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "317897f43311426ea9b688e3019361eb5bb1f61f60eca4f763d7a8ec38265ea2",
+        "warmup_time": -1
+    },
+    "benchmark_ocl.TimeOmegaConfigLoaderAdvanced.time_loading_catalog": {
+        "code": "class TimeOmegaConfigLoaderAdvanced:\n    def time_loading_catalog(self):\n        \"\"\"Benchmark the time to load the catalog\"\"\"\n        self.loader[\"catalog\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n    \n        # Create sample config files in the temp directory\n        _create_config_file(self.conf_source, \"base\", \"catalog.yml\", base_catalog_with_interpolations)\n        _create_config_file(self.conf_source, \"local\", \"catalog.yml\", local_catalog_with_interpolations)\n        _create_config_file(self.conf_source, \"base\", \"parameters.yml\", base_params_with_globals)\n        _create_config_file(self.conf_source, \"base\", \"globals.yml\", base_globals)\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')",
+        "min_run_count": 2,
+        "name": "benchmark_ocl.TimeOmegaConfigLoaderAdvanced.time_loading_catalog",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "5499c39a6750c5d527f1a3e8a747fdd5b3128af31640d9d7ee9c72be261e344a",
+        "warmup_time": -1
+    },
+    "benchmark_ocl.TimeOmegaConfigLoaderAdvanced.time_loading_parameters": {
+        "code": "class TimeOmegaConfigLoaderAdvanced:\n    def time_loading_parameters(self):\n        \"\"\"Benchmark the time to load parameters with global interpolation\"\"\"\n        self.loader[\"parameters\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n    \n        # Create sample config files in the temp directory\n        _create_config_file(self.conf_source, \"base\", \"catalog.yml\", base_catalog_with_interpolations)\n        _create_config_file(self.conf_source, \"local\", \"catalog.yml\", local_catalog_with_interpolations)\n        _create_config_file(self.conf_source, \"base\", \"parameters.yml\", base_params_with_globals)\n        _create_config_file(self.conf_source, \"base\", \"globals.yml\", base_globals)\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')",
+        "min_run_count": 2,
+        "name": "benchmark_ocl.TimeOmegaConfigLoaderAdvanced.time_loading_parameters",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "type": "time",
+        "unit": "seconds",
+        "version": "f74ef4eead7a35df856006dbf9e1b72b61ba36b34767525f55bf8c5eabb343f1",
+        "warmup_time": -1
+    },
+    "version": 2
+}
\ No newline at end of file
diff --git a/benchmarks/benchmark_datacatalog.py b/benchmarks/benchmark_datacatalog.py
index d3b12e44ea..7c1a73b6a9 100644
--- a/benchmarks/benchmark_datacatalog.py
+++ b/benchmarks/benchmark_datacatalog.py
@@ -36,6 +36,9 @@ def setup(self):
             f"param_{i}": i for i in range(1, 1001)
         }
 
+    def time_initialise(self):
+        """Benchmark the time to initialise the catalog"""
+        DataCatalog.from_config(base_catalog)
 
     def time_save(self):
         """Benchmark the time to save datasets"""
diff --git a/benchmarks/benchmark_ocl.py b/benchmarks/benchmark_ocl.py
index 5c38b61901..2dfd970a2e 100644
--- a/benchmarks/benchmark_ocl.py
+++ b/benchmarks/benchmark_ocl.py
@@ -33,13 +33,13 @@ def _generate_globals(start_range, end_range, is_local=False):
     return globals_dict
 
 def _create_config_file(conf_source, env, file_name, data):
-        env_path = conf_source / env
-        env_path.mkdir(parents=True, exist_ok=True)
-        file_path = env_path / file_name
+    env_path = conf_source / env
+    env_path.mkdir(parents=True, exist_ok=True)
+    file_path = env_path / file_name
 
-        import yaml
-        with open(file_path, "w") as f:
-            yaml.dump(data, f)
+    import yaml
+    with open(file_path, "w") as f:
+        yaml.dump(data, f)
 
 base_catalog = _generate_catalog(1, 1000, is_versioned=True)
 local_catalog = _generate_catalog(501, 1500, is_local=True)

From b8e4203abb98f4cd6bdbdfa3ba139f8cc57bfaef Mon Sep 17 00:00:00 2001
From: Ankita Katiyar <ankitakatiyar2401@gmail.com>
Date: Wed, 16 Oct 2024 17:41:15 +0100
Subject: [PATCH 18/19] Revert "Add a test for init and fix indent"

This reverts commit 0dbe3c79a1aeba027fb0493ca28b75c49763cfbd.

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 ...rtualenv-py3.11-kedro-datasets-pandas.json |   1 -
 .asv/results/M-WFLM6NH6G5/machine.json        |   9 -
 .asv/results/benchmarks.json                  | 273 ------------------
 benchmarks/benchmark_datacatalog.py           |   3 -
 benchmarks/benchmark_ocl.py                   |  12 +-
 5 files changed, 6 insertions(+), 292 deletions(-)
 delete mode 100644 .asv/results/M-WFLM6NH6G5/f594c8bc-virtualenv-py3.11-kedro-datasets-pandas.json
 delete mode 100644 .asv/results/M-WFLM6NH6G5/machine.json
 delete mode 100644 .asv/results/benchmarks.json

diff --git a/.asv/results/M-WFLM6NH6G5/f594c8bc-virtualenv-py3.11-kedro-datasets-pandas.json b/.asv/results/M-WFLM6NH6G5/f594c8bc-virtualenv-py3.11-kedro-datasets-pandas.json
deleted file mode 100644
index b7dff2c438..0000000000
--- a/.asv/results/M-WFLM6NH6G5/f594c8bc-virtualenv-py3.11-kedro-datasets-pandas.json
+++ /dev/null
@@ -1 +0,0 @@
-{"commit_hash": "f594c8bcd43100b216ac104d1e670ca4d5783096", "env_name": "virtualenv-py3.11-kedro-datasets-pandas", "date": 1729087847000, "params": {"arch": "arm64", "cpu": "Apple M1 Max", "machine": "M-WFLM6NH6G5", "num_cpu": "10", "os": "Darwin 23.6.0", "ram": "34359738368", "python": "3.11", "kedro-datasets": "", "pandas": ""}, "python": "3.11", "requirements": {"kedro-datasets": "", "pandas": ""}, "env_vars": {}, "result_columns": ["result", "params", "version", "started_at", "duration", "stats_ci_99_a", "stats_ci_99_b", "stats_q_25", "stats_q_75", "stats_number", "stats_repeat", "samples", "profile"], "results": {"benchmark_datacatalog.TimeDataCatalog.time_add_all": [[0.04391904201474972], [], "f470854de9a319e47c6bab999cdb3c7662341b063447a6866500b2e70b2c5ed8", 1729096807842, 1.0691, [-Infinity], [Infinity], [0.043919], [0.043919], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_exists": [[0.026259125006617978], [], "9bbce85f01a2cfbb5569bc2ba076dd22b662d5d17db4901cd5269d14dbce3ea6", 1729096808911, 0.93221, [-Infinity], [Infinity], [0.026259], [0.026259], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_feed_dict": [[0.034821834007743746], [], "0101ab38b04d7b27eda18738a82f5f06e48604c6e91d0e10bae33327bb588f69", 1729096809843, 0.8628, [-Infinity], [Infinity], [0.034822], [0.034822], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_initialise": [[0.0948639580165036], [], "9e460ed25ea64f63f905b3f3a01a817e5daa00c81390b0cdfc25fbad0ae85ea6", 1729096810706, 1.0807, [-Infinity], [Infinity], [0.094864], [0.094864], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_list": [[1.4999997802078724e-05], [], "4a7ae456f2349941bdbc595b3919284633da1da166cf1394660a0399ec618687", 1729096811787, 0.82309, [-Infinity], [Infinity], [1.5e-05], [1.5e-05], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_load": [[0.20476879199850373], [], "96bd6914ea6ed937ae958177afd17861ccf3ab1481a5d4d2ed8002dbc5d1131e", 1729096812610, 0.86483, [-Infinity], [Infinity], [0.20477], [0.20477], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_release": [[0.00888583398773335], [], "fa49ed3249b0319f92a7d6309f2a58ed8595c86141b16768cd575326a2d28d77", 1729096813475, 0.76429, [-Infinity], [Infinity], [0.0088858], [0.0088858], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_resolve_factory": [[0.09763062497950159], [], "c051d31d513ef455328bb051eafacb1cc06d9f84dd613ee2e0bee3440bbab467", 1729096814240, 0.83693, [-Infinity], [Infinity], [0.097631], [0.097631], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_save": [[0.5207992079958785], [], "4ea897eb28bd91fc7cf8da6e2679bf608b909c8db9ebdffd97f3bf19b275a809", 1729096815077, 1.6094, [-Infinity], [Infinity], [0.5208], [0.5208], [1], [1]], "benchmark_datacatalog.TimeDataCatalog.time_shallow_copy": [[0.031129041977692395], [], "64ead39024c492d18b91a21c23a9d3f1323533cd37bd53934d54701ecb259762", 1729096816687, 0.68384, [-Infinity], [Infinity], [0.031129], [0.031129], [1], [1]], "benchmark_dummy.TimeSuite.time_keys": [[3.2910029403865337e-06], [], "86e015a3c40c52da31e4185fff7c7176c38c5e1e1e4aba71912db0b388225191", 1729096817371, 0.18749, [-Infinity], [Infinity], [3.291e-06], [3.291e-06], [1], [1]], "benchmark_ocl.TimeOmegaConfigLoader.time_loading_catalog": [[0.4770808340108488], [], "3ccff2348faeaf3038548994686b45deeaa3c7c46df2270a8a1f697e7401ae5a", 1729096817558, 1.0252, [-Infinity], [Infinity], [0.47708], [0.47708], [1], [1]], "benchmark_ocl.TimeOmegaConfigLoader.time_loading_globals": [[0.11477499999455176], [], "d42dda2b001097642dc790de01ab15e3f1f11426f6bfc5affbc1c658248f32be", 1729096818583, 0.75575, [-Infinity], [Infinity], [0.11477], [0.11477], [1], [1]], "benchmark_ocl.TimeOmegaConfigLoader.time_loading_parameters": [[0.12353595800232142], [], "3187d47ad3445bdf83439512e124e3cde01f0503a3ffa7db9ca7a02e6bc2f7f2", 1729096819339, 0.78861, [-Infinity], [Infinity], [0.12354], [0.12354], [1], [1]], "benchmark_ocl.TimeOmegaConfigLoader.time_loading_parameters_runtime": [[0.13527949998388067], [], "153be6afe75261c83d15bbc165c10b98af15d3489c722c0f7f8e5c0ce3ca2d59", 1729096820128, 0.77955, [-Infinity], [Infinity], [0.13528], [0.13528], [1], [1]], "benchmark_ocl.TimeOmegaConfigLoader.time_merge_soft_strategy": [[0.7280506670067552], [], "317897f43311426ea9b688e3019361eb5bb1f61f60eca4f763d7a8ec38265ea2", 1729096820908, 1.4309, [-Infinity], [Infinity], [0.72805], [0.72805], [1], [1]], "benchmark_ocl.TimeOmegaConfigLoaderAdvanced.time_loading_catalog": [[0.6754177919938229], [], "5499c39a6750c5d527f1a3e8a747fdd5b3128af31640d9d7ee9c72be261e344a", 1729096822339, 1.3692, [-Infinity], [Infinity], [0.67542], [0.67542], [1], [1]], "benchmark_ocl.TimeOmegaConfigLoaderAdvanced.time_loading_parameters": [[1.9011982079828158], [], "f74ef4eead7a35df856006dbf9e1b72b61ba36b34767525f55bf8c5eabb343f1", 1729096823708, 2.5663, [-Infinity], [Infinity], [1.9012], [1.9012], [1], [1]]}, "durations": {}, "version": 2}
\ No newline at end of file
diff --git a/.asv/results/M-WFLM6NH6G5/machine.json b/.asv/results/M-WFLM6NH6G5/machine.json
deleted file mode 100644
index 3fe4186a75..0000000000
--- a/.asv/results/M-WFLM6NH6G5/machine.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-    "arch": "arm64",
-    "cpu": "Apple M1 Max",
-    "machine": "M-WFLM6NH6G5",
-    "num_cpu": "10",
-    "os": "Darwin 23.6.0",
-    "ram": "34359738368",
-    "version": 1
-}
\ No newline at end of file
diff --git a/.asv/results/benchmarks.json b/.asv/results/benchmarks.json
deleted file mode 100644
index 347fbe8942..0000000000
--- a/.asv/results/benchmarks.json
+++ /dev/null
@@ -1,273 +0,0 @@
-{
-    "benchmark_datacatalog.TimeDataCatalog.time_add_all": {
-        "code": "class TimeDataCatalog:\n    def time_add_all(self):\n        \"\"\"Benchmark the time to add all datasets\"\"\"\n        self.catalog.add_all(self.datasets)\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
-        "min_run_count": 2,
-        "name": "benchmark_datacatalog.TimeDataCatalog.time_add_all",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "f470854de9a319e47c6bab999cdb3c7662341b063447a6866500b2e70b2c5ed8",
-        "warmup_time": -1
-    },
-    "benchmark_datacatalog.TimeDataCatalog.time_exists": {
-        "code": "class TimeDataCatalog:\n    def time_exists(self):\n        \"\"\"Benchmark the time to check if datasets exist\"\"\"\n        for i in range(1,1001):\n            self.catalog.exists(f\"dataset_{i}\")\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
-        "min_run_count": 2,
-        "name": "benchmark_datacatalog.TimeDataCatalog.time_exists",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "9bbce85f01a2cfbb5569bc2ba076dd22b662d5d17db4901cd5269d14dbce3ea6",
-        "warmup_time": -1
-    },
-    "benchmark_datacatalog.TimeDataCatalog.time_feed_dict": {
-        "code": "class TimeDataCatalog:\n    def time_feed_dict(self):\n        \"\"\"Benchmark the time to add feed dict\"\"\"\n        self.catalog.add_feed_dict(self.feed_dict)\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
-        "min_run_count": 2,
-        "name": "benchmark_datacatalog.TimeDataCatalog.time_feed_dict",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "0101ab38b04d7b27eda18738a82f5f06e48604c6e91d0e10bae33327bb588f69",
-        "warmup_time": -1
-    },
-    "benchmark_datacatalog.TimeDataCatalog.time_initialise": {
-        "code": "class TimeDataCatalog:\n    def time_initialise(self):\n        \"\"\"Benchmark the time to initialise the catalog\"\"\"\n        DataCatalog.from_config(base_catalog)\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
-        "min_run_count": 2,
-        "name": "benchmark_datacatalog.TimeDataCatalog.time_initialise",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "9e460ed25ea64f63f905b3f3a01a817e5daa00c81390b0cdfc25fbad0ae85ea6",
-        "warmup_time": -1
-    },
-    "benchmark_datacatalog.TimeDataCatalog.time_list": {
-        "code": "class TimeDataCatalog:\n    def time_list(self):\n        \"\"\"Benchmark the time to list all datasets\"\"\"\n        self.catalog.list()\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
-        "min_run_count": 2,
-        "name": "benchmark_datacatalog.TimeDataCatalog.time_list",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "4a7ae456f2349941bdbc595b3919284633da1da166cf1394660a0399ec618687",
-        "warmup_time": -1
-    },
-    "benchmark_datacatalog.TimeDataCatalog.time_load": {
-        "code": "class TimeDataCatalog:\n    def time_load(self):\n        \"\"\"Benchmark the time to load datasets\"\"\"\n        for i in range(1,1001):\n            self.catalog.load(f\"dataset_load_{i}\")\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
-        "min_run_count": 2,
-        "name": "benchmark_datacatalog.TimeDataCatalog.time_load",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "96bd6914ea6ed937ae958177afd17861ccf3ab1481a5d4d2ed8002dbc5d1131e",
-        "warmup_time": -1
-    },
-    "benchmark_datacatalog.TimeDataCatalog.time_release": {
-        "code": "class TimeDataCatalog:\n    def time_release(self):\n        \"\"\"Benchmark the time to release datasets\"\"\"\n        for i in range(1,1001):\n            self.catalog.release(f\"dataset_{i}\")\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
-        "min_run_count": 2,
-        "name": "benchmark_datacatalog.TimeDataCatalog.time_release",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "fa49ed3249b0319f92a7d6309f2a58ed8595c86141b16768cd575326a2d28d77",
-        "warmup_time": -1
-    },
-    "benchmark_datacatalog.TimeDataCatalog.time_resolve_factory": {
-        "code": "class TimeDataCatalog:\n    def time_resolve_factory(self):\n        \"\"\"Benchmark the time to resolve factory\"\"\"\n        for i in range(1,1001):\n            self.catalog._get_dataset(f\"dataset_factory_{i}\")\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
-        "min_run_count": 2,
-        "name": "benchmark_datacatalog.TimeDataCatalog.time_resolve_factory",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "c051d31d513ef455328bb051eafacb1cc06d9f84dd613ee2e0bee3440bbab467",
-        "warmup_time": -1
-    },
-    "benchmark_datacatalog.TimeDataCatalog.time_save": {
-        "code": "class TimeDataCatalog:\n    def time_save(self):\n        \"\"\"Benchmark the time to save datasets\"\"\"\n        for i in range(1,1001):\n            self.catalog.save(f\"dataset_{i}\", self.dataframe)\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
-        "min_run_count": 2,
-        "name": "benchmark_datacatalog.TimeDataCatalog.time_save",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "4ea897eb28bd91fc7cf8da6e2679bf608b909c8db9ebdffd97f3bf19b275a809",
-        "warmup_time": -1
-    },
-    "benchmark_datacatalog.TimeDataCatalog.time_shallow_copy": {
-        "code": "class TimeDataCatalog:\n    def time_shallow_copy(self):\n        \"\"\"Benchmark the time to shallow copy the catalog\"\"\"\n        self.catalog.shallow_copy()\n\n    def setup(self):\n        self.catalog = DataCatalog.from_config(base_catalog)\n        self.dataframe = pd.DataFrame({\"column\": [1, 2, 3]})\n        self.dataframe.to_csv(\"data.csv\", index=False)\n        self.datasets = {\n            f\"dataset_new_{i}\": CSVDataset(filepath=\"data.csv\") for i in range(1, 1001)\n        }\n        self.feed_dict = {\n            f\"param_{i}\": i for i in range(1, 1001)\n        }",
-        "min_run_count": 2,
-        "name": "benchmark_datacatalog.TimeDataCatalog.time_shallow_copy",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "64ead39024c492d18b91a21c23a9d3f1323533cd37bd53934d54701ecb259762",
-        "warmup_time": -1
-    },
-    "benchmark_dummy.TimeSuite.time_keys": {
-        "code": "class TimeSuite:\n    def time_keys(self):\n        for key in self.d.keys():\n            pass\n\n    def setup(self):\n        self.d = {}\n        for x in range(500):\n            self.d[x] = None",
-        "min_run_count": 2,
-        "name": "benchmark_dummy.TimeSuite.time_keys",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "86e015a3c40c52da31e4185fff7c7176c38c5e1e1e4aba71912db0b388225191",
-        "warmup_time": -1
-    },
-    "benchmark_ocl.TimeOmegaConfigLoader.time_loading_catalog": {
-        "code": "class TimeOmegaConfigLoader:\n    def time_loading_catalog(self):\n        \"\"\"Benchmark the time to load the catalog\"\"\"\n        self.loader[\"catalog\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n    \n        # Create sample config files in the temp directory\n        _create_config_file(self.conf_source, \"base\", \"catalog.yml\", base_catalog)\n        _create_config_file(self.conf_source, \"local\", \"catalog.yml\", local_catalog)\n        _create_config_file(self.conf_source, \"base\", \"parameters.yml\", base_params)\n        _create_config_file(self.conf_source, \"local\", \"parameters.yml\", local_params)\n        _create_config_file(self.conf_source, \"base\", \"globals.yml\", base_globals)\n        _create_config_file(self.conf_source, \"local\", \"globals.yml\", local_globals)\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')",
-        "min_run_count": 2,
-        "name": "benchmark_ocl.TimeOmegaConfigLoader.time_loading_catalog",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "3ccff2348faeaf3038548994686b45deeaa3c7c46df2270a8a1f697e7401ae5a",
-        "warmup_time": -1
-    },
-    "benchmark_ocl.TimeOmegaConfigLoader.time_loading_globals": {
-        "code": "class TimeOmegaConfigLoader:\n    def time_loading_globals(self):\n        \"\"\"Benchmark the time to load global configuration\"\"\"\n        self.loader[\"globals\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n    \n        # Create sample config files in the temp directory\n        _create_config_file(self.conf_source, \"base\", \"catalog.yml\", base_catalog)\n        _create_config_file(self.conf_source, \"local\", \"catalog.yml\", local_catalog)\n        _create_config_file(self.conf_source, \"base\", \"parameters.yml\", base_params)\n        _create_config_file(self.conf_source, \"local\", \"parameters.yml\", local_params)\n        _create_config_file(self.conf_source, \"base\", \"globals.yml\", base_globals)\n        _create_config_file(self.conf_source, \"local\", \"globals.yml\", local_globals)\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')",
-        "min_run_count": 2,
-        "name": "benchmark_ocl.TimeOmegaConfigLoader.time_loading_globals",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "d42dda2b001097642dc790de01ab15e3f1f11426f6bfc5affbc1c658248f32be",
-        "warmup_time": -1
-    },
-    "benchmark_ocl.TimeOmegaConfigLoader.time_loading_parameters": {
-        "code": "class TimeOmegaConfigLoader:\n    def time_loading_parameters(self):\n        \"\"\"Benchmark the time to load the parameters\"\"\"\n        self.loader[\"parameters\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n    \n        # Create sample config files in the temp directory\n        _create_config_file(self.conf_source, \"base\", \"catalog.yml\", base_catalog)\n        _create_config_file(self.conf_source, \"local\", \"catalog.yml\", local_catalog)\n        _create_config_file(self.conf_source, \"base\", \"parameters.yml\", base_params)\n        _create_config_file(self.conf_source, \"local\", \"parameters.yml\", local_params)\n        _create_config_file(self.conf_source, \"base\", \"globals.yml\", base_globals)\n        _create_config_file(self.conf_source, \"local\", \"globals.yml\", local_globals)\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')",
-        "min_run_count": 2,
-        "name": "benchmark_ocl.TimeOmegaConfigLoader.time_loading_parameters",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "3187d47ad3445bdf83439512e124e3cde01f0503a3ffa7db9ca7a02e6bc2f7f2",
-        "warmup_time": -1
-    },
-    "benchmark_ocl.TimeOmegaConfigLoader.time_loading_parameters_runtime": {
-        "code": "class TimeOmegaConfigLoader:\n    def time_loading_parameters_runtime(self):\n        \"\"\"Benchmark the time to load parameters with runtime configuration\"\"\"\n        self.loader.runtime_params = _generate_params(2001, 2002)\n        self.loader[\"parameters\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n    \n        # Create sample config files in the temp directory\n        _create_config_file(self.conf_source, \"base\", \"catalog.yml\", base_catalog)\n        _create_config_file(self.conf_source, \"local\", \"catalog.yml\", local_catalog)\n        _create_config_file(self.conf_source, \"base\", \"parameters.yml\", base_params)\n        _create_config_file(self.conf_source, \"local\", \"parameters.yml\", local_params)\n        _create_config_file(self.conf_source, \"base\", \"globals.yml\", base_globals)\n        _create_config_file(self.conf_source, \"local\", \"globals.yml\", local_globals)\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')",
-        "min_run_count": 2,
-        "name": "benchmark_ocl.TimeOmegaConfigLoader.time_loading_parameters_runtime",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "153be6afe75261c83d15bbc165c10b98af15d3489c722c0f7f8e5c0ce3ca2d59",
-        "warmup_time": -1
-    },
-    "benchmark_ocl.TimeOmegaConfigLoader.time_merge_soft_strategy": {
-        "code": "class TimeOmegaConfigLoader:\n    def time_merge_soft_strategy(self):\n        \"\"\"Benchmark the time to load and soft-merge configurations\"\"\"\n        self.loader.merge_strategy = {\"catalog\": \"soft\"}\n        self.loader[\"catalog\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n    \n        # Create sample config files in the temp directory\n        _create_config_file(self.conf_source, \"base\", \"catalog.yml\", base_catalog)\n        _create_config_file(self.conf_source, \"local\", \"catalog.yml\", local_catalog)\n        _create_config_file(self.conf_source, \"base\", \"parameters.yml\", base_params)\n        _create_config_file(self.conf_source, \"local\", \"parameters.yml\", local_params)\n        _create_config_file(self.conf_source, \"base\", \"globals.yml\", base_globals)\n        _create_config_file(self.conf_source, \"local\", \"globals.yml\", local_globals)\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')",
-        "min_run_count": 2,
-        "name": "benchmark_ocl.TimeOmegaConfigLoader.time_merge_soft_strategy",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "317897f43311426ea9b688e3019361eb5bb1f61f60eca4f763d7a8ec38265ea2",
-        "warmup_time": -1
-    },
-    "benchmark_ocl.TimeOmegaConfigLoaderAdvanced.time_loading_catalog": {
-        "code": "class TimeOmegaConfigLoaderAdvanced:\n    def time_loading_catalog(self):\n        \"\"\"Benchmark the time to load the catalog\"\"\"\n        self.loader[\"catalog\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n    \n        # Create sample config files in the temp directory\n        _create_config_file(self.conf_source, \"base\", \"catalog.yml\", base_catalog_with_interpolations)\n        _create_config_file(self.conf_source, \"local\", \"catalog.yml\", local_catalog_with_interpolations)\n        _create_config_file(self.conf_source, \"base\", \"parameters.yml\", base_params_with_globals)\n        _create_config_file(self.conf_source, \"base\", \"globals.yml\", base_globals)\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')",
-        "min_run_count": 2,
-        "name": "benchmark_ocl.TimeOmegaConfigLoaderAdvanced.time_loading_catalog",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "5499c39a6750c5d527f1a3e8a747fdd5b3128af31640d9d7ee9c72be261e344a",
-        "warmup_time": -1
-    },
-    "benchmark_ocl.TimeOmegaConfigLoaderAdvanced.time_loading_parameters": {
-        "code": "class TimeOmegaConfigLoaderAdvanced:\n    def time_loading_parameters(self):\n        \"\"\"Benchmark the time to load parameters with global interpolation\"\"\"\n        self.loader[\"parameters\"]\n\n    def setup(self):\n        # Setup temporary configuration directory with sample config files\n        self.temp_dir = tempfile.TemporaryDirectory()\n        self.conf_source = Path(self.temp_dir.name)\n    \n        # Create sample config files in the temp directory\n        _create_config_file(self.conf_source, \"base\", \"catalog.yml\", base_catalog_with_interpolations)\n        _create_config_file(self.conf_source, \"local\", \"catalog.yml\", local_catalog_with_interpolations)\n        _create_config_file(self.conf_source, \"base\", \"parameters.yml\", base_params_with_globals)\n        _create_config_file(self.conf_source, \"base\", \"globals.yml\", base_globals)\n    \n        # Instantiate the OmegaConfigLoader\n        self.loader = OmegaConfigLoader(conf_source=self.conf_source, base_env='base', default_run_env='local')",
-        "min_run_count": 2,
-        "name": "benchmark_ocl.TimeOmegaConfigLoaderAdvanced.time_loading_parameters",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "type": "time",
-        "unit": "seconds",
-        "version": "f74ef4eead7a35df856006dbf9e1b72b61ba36b34767525f55bf8c5eabb343f1",
-        "warmup_time": -1
-    },
-    "version": 2
-}
\ No newline at end of file
diff --git a/benchmarks/benchmark_datacatalog.py b/benchmarks/benchmark_datacatalog.py
index 7c1a73b6a9..d3b12e44ea 100644
--- a/benchmarks/benchmark_datacatalog.py
+++ b/benchmarks/benchmark_datacatalog.py
@@ -36,9 +36,6 @@ def setup(self):
             f"param_{i}": i for i in range(1, 1001)
         }
 
-    def time_initialise(self):
-        """Benchmark the time to initialise the catalog"""
-        DataCatalog.from_config(base_catalog)
 
     def time_save(self):
         """Benchmark the time to save datasets"""
diff --git a/benchmarks/benchmark_ocl.py b/benchmarks/benchmark_ocl.py
index 2dfd970a2e..5c38b61901 100644
--- a/benchmarks/benchmark_ocl.py
+++ b/benchmarks/benchmark_ocl.py
@@ -33,13 +33,13 @@ def _generate_globals(start_range, end_range, is_local=False):
     return globals_dict
 
 def _create_config_file(conf_source, env, file_name, data):
-    env_path = conf_source / env
-    env_path.mkdir(parents=True, exist_ok=True)
-    file_path = env_path / file_name
+        env_path = conf_source / env
+        env_path.mkdir(parents=True, exist_ok=True)
+        file_path = env_path / file_name
 
-    import yaml
-    with open(file_path, "w") as f:
-        yaml.dump(data, f)
+        import yaml
+        with open(file_path, "w") as f:
+            yaml.dump(data, f)
 
 base_catalog = _generate_catalog(1, 1000, is_versioned=True)
 local_catalog = _generate_catalog(501, 1500, is_local=True)

From 821401deea8664b0ec91ae0e378268962b844b78 Mon Sep 17 00:00:00 2001
From: Ankita Katiyar <ankitakatiyar2401@gmail.com>
Date: Wed, 16 Oct 2024 17:44:16 +0100
Subject: [PATCH 19/19] Add a test for init and fix indent

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 benchmarks/benchmark_datacatalog.py |  3 +++
 benchmarks/benchmark_ocl.py         | 12 ++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/benchmarks/benchmark_datacatalog.py b/benchmarks/benchmark_datacatalog.py
index d3b12e44ea..15de4ef310 100644
--- a/benchmarks/benchmark_datacatalog.py
+++ b/benchmarks/benchmark_datacatalog.py
@@ -36,6 +36,9 @@ def setup(self):
             f"param_{i}": i for i in range(1, 1001)
         }
 
+    def time_init(self):
+        """Benchmark the time to initialize the catalog"""
+        DataCatalog.from_config(base_catalog)
 
     def time_save(self):
         """Benchmark the time to save datasets"""
diff --git a/benchmarks/benchmark_ocl.py b/benchmarks/benchmark_ocl.py
index 5c38b61901..2dfd970a2e 100644
--- a/benchmarks/benchmark_ocl.py
+++ b/benchmarks/benchmark_ocl.py
@@ -33,13 +33,13 @@ def _generate_globals(start_range, end_range, is_local=False):
     return globals_dict
 
 def _create_config_file(conf_source, env, file_name, data):
-        env_path = conf_source / env
-        env_path.mkdir(parents=True, exist_ok=True)
-        file_path = env_path / file_name
+    env_path = conf_source / env
+    env_path.mkdir(parents=True, exist_ok=True)
+    file_path = env_path / file_name
 
-        import yaml
-        with open(file_path, "w") as f:
-            yaml.dump(data, f)
+    import yaml
+    with open(file_path, "w") as f:
+        yaml.dump(data, f)
 
 base_catalog = _generate_catalog(1, 1000, is_versioned=True)
 local_catalog = _generate_catalog(501, 1500, is_local=True)