From d027ac590853f7213ce384683f8cef028521eb70 Mon Sep 17 00:00:00 2001
From: Ivan Danov <idanov@users.noreply.github.com>
Date: Thu, 16 May 2019 16:54:11 +0100
Subject: [PATCH 01/44] Merge pull request #19 from
 quantumblacklabs/release/0.14.0

Release 0.14.0
---
 kedro/io/__init__.py |  55 ++++++
 kedro/io/core.py     | 450 +++++++++++++++++++++++++++++++++++++++++++
 kedro/utils.py       |  59 ++++++
 3 files changed, 564 insertions(+)
 create mode 100644 kedro/io/__init__.py
 create mode 100644 kedro/io/core.py
 create mode 100644 kedro/utils.py

diff --git a/kedro/io/__init__.py b/kedro/io/__init__.py
new file mode 100644
index 000000000..a426a6374
--- /dev/null
+++ b/kedro/io/__init__.py
@@ -0,0 +1,55 @@
+# Copyright 2018-2019 QuantumBlack Visual Analytics Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
+# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
+# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# The QuantumBlack Visual Analytics Limited (“QuantumBlack”) name and logo
+# (either separately or in combination, “QuantumBlack Trademarks”) are
+# trademarks of QuantumBlack. The License does not grant you any right or
+# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
+# Trademarks or any confusingly similar mark as a trademark for your product,
+#     or use the QuantumBlack Trademarks in any other manner that might cause
+# confusion in the marketplace, including but not limited to in advertising,
+# on websites, or on software.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""``kedro.io`` provides functionality to read and write to a
+number of data sets. At core of the library is ``AbstractDataSet``
+which allows implementation of various ``AbstractDataSet``s.
+"""
+
+from .core import AbstractDataSet  # NOQA
+from .core import DataSetAlreadyExistsError  # NOQA
+from .core import DataSetError  # NOQA
+from .core import DataSetNotFoundError  # NOQA
+from .core import ExistsMixin  # NOQA
+from .core import FilepathVersionMixIn  # NOQA
+from .core import S3PathVersionMixIn  # NOQA
+from .core import Version  # NOQA
+from .csv_local import CSVLocalDataSet  # NOQA
+from .csv_s3 import CSVS3DataSet  # NOQA
+from .data_catalog import DataCatalog  # NOQA
+from .excel_local import ExcelLocalDataSet  # NOQA
+from .hdf_local import HDFLocalDataSet  # NOQA
+from .json_local import JSONLocalDataSet  # NOQA
+from .lambda_data_set import LambdaDataSet  # NOQA
+from .memory_data_set import MemoryDataSet  # NOQA
+from .parquet_local import ParquetLocalDataSet  # NOQA
+from .pickle_local import PickleLocalDataSet  # NOQA
+from .pickle_s3 import PickleS3DataSet  # NOQA
+from .sql import SQLQueryDataSet  # NOQA
+from .sql import SQLTableDataSet  # NOQA
+from .text_local import TextLocalDataSet  # NOQA
diff --git a/kedro/io/core.py b/kedro/io/core.py
new file mode 100644
index 000000000..59a355b5b
--- /dev/null
+++ b/kedro/io/core.py
@@ -0,0 +1,450 @@
+# Copyright 2018-2019 QuantumBlack Visual Analytics Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
+# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
+# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# The QuantumBlack Visual Analytics Limited (“QuantumBlack”) name and logo
+# (either separately or in combination, “QuantumBlack Trademarks”) are
+# trademarks of QuantumBlack. The License does not grant you any right or
+# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
+# Trademarks or any confusingly similar mark as a trademark for your product,
+#     or use the QuantumBlack Trademarks in any other manner that might cause
+# confusion in the marketplace, including but not limited to in advertising,
+# on websites, or on software.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This module provides a set of classes which underpin the data loading and
+saving functionality provided by ``kedro.io``.
+"""
+
+import abc
+import copy
+import logging
+from collections import namedtuple
+from datetime import datetime, timezone
+from glob import iglob
+from pathlib import Path, PurePosixPath
+from typing import Any, Dict, Type
+from warnings import warn
+
+from kedro.utils import load_obj
+
+MAX_DESCRIPTION_LENGTH = 70
+VERSIONED_FLAG_KEY = "versioned"
+VERSION_KEY = "version"
+
+
+class DataSetError(Exception):
+    """``DataSetError`` raised by ``AbstractDataSet`` implementations
+    in case of failure of input/output methods.
+
+    ``AbstractDataSet`` implementations should provide instructive
+    information in case of failure.
+    """
+
+    pass
+
+
+class DataSetNotFoundError(DataSetError):
+    """``DataSetNotFoundError`` raised by ``DataCatalog`` class in case of
+    trying to use a non-existing data set.
+    """
+
+    pass
+
+
+class DataSetAlreadyExistsError(DataSetError):
+    """``DataSetAlreadyExistsError`` raised by ``DataCatalog`` class in case
+    of trying to add a data set which already exists in the ``DataCatalog``.
+    """
+
+    pass
+
+
+class AbstractDataSet(abc.ABC):
+    """``AbstractDataSet`` is the base class for all data set implementations.
+    All data set implementations should extend this abstract class
+    and implement the methods marked as abstract.
+
+    Example:
+    ::
+
+        >>> from kedro.io import AbstractDataSet
+        >>> import pandas as pd
+        >>>
+        >>> class MyOwnDataSet(AbstractDataSet):
+        >>>     def __init__(self, param1, param2):
+        >>>         self._param1 = param1
+        >>>         self._param2 = param2
+        >>>
+        >>>     def _load(self) -> pd.DataFrame:
+        >>>         print("Dummy load: {}".format(self._param1))
+        >>>         return pd.DataFrame()
+        >>>
+        >>>     def _save(self, df: pd.DataFrame) -> None:
+        >>>         print("Dummy save: {}".format(self._param2))
+        >>>
+        >>>     def _describe(self):
+        >>>         return dict(param1=self._param1, param2=self._param2)
+    """
+
+    @classmethod
+    def from_config(
+        cls: Type,
+        name: str,
+        config: Dict[str, Any],
+        load_version: str = None,
+        save_version: str = None,
+    ) -> "AbstractDataSet":
+        """Create a data set instance using the configuration provided.
+
+        Args:
+            name: Data set name.
+            config: Data set config dictionary.
+            load_version: Version string to be used for ``load`` operation if
+                the data set is versioned. Has no effect on the data set
+                if versioning was not enabled.
+            save_version: Version string to be used for ``save`` operation if
+                the data set is versioned. Has no effect on the data set
+                if versioning was not enabled.
+
+        Returns:
+            An instance of an ``AbstractDataSet`` subclass.
+
+        Raises:
+            DataSetError: When the function fails to create the data set
+                from its config.
+
+        """
+        config = copy.deepcopy(config)
+        save_version = save_version or generate_current_version()
+
+        if VERSION_KEY in config:
+            # remove "version" key so that it's not passed
+            # to the 'unversioned' data set constructor
+            message = (
+                "`%s` attribute removed from `%s` data set "
+                "configuration since it is a reserved word and cannot "
+                "be directly specified",
+                VERSION_KEY,
+                name,
+            )
+            logging.getLogger(__name__).warning(*message)
+            del config[VERSION_KEY]
+        if config.pop(VERSIONED_FLAG_KEY, False):  # data set is versioned
+            config[VERSION_KEY] = Version(load_version, save_version)
+
+        dataset_class_path = config.pop("type")
+        try:
+            class_obj = load_obj(dataset_class_path, "kedro.io")
+        except ImportError:
+            raise DataSetError(
+                "Cannot import module when trying to load type "
+                "`{}` for DataSet `{}`.".format(dataset_class_path, name)
+            )
+        except AttributeError:
+            raise DataSetError(
+                "Class `{}` for DataSet `{}` not found.".format(
+                    dataset_class_path, name
+                )
+            )
+
+        if not issubclass(class_obj, AbstractDataSet):
+            raise DataSetError(
+                "DataSet '{}' type `{}.{}` is invalid: "
+                "all data set types must extend "
+                "`AbstractDataSet`.".format(
+                    name, class_obj.__module__, class_obj.__qualname__
+                )
+            )
+        try:
+            data_set = class_obj(**config)
+        except TypeError as err:
+            raise DataSetError(
+                "\n{}.\nDataSet '{}' must only contain "
+                "arguments valid for the constructor "
+                "of `{}.{}`.".format(
+                    str(err), name, class_obj.__module__, class_obj.__qualname__
+                )
+            )
+        except Exception as err:
+            raise DataSetError(
+                "\n{}.\nFailed to instantiate DataSet "
+                "'{}' of type `{}.{}`.".format(
+                    str(err), name, class_obj.__module__, class_obj.__qualname__
+                )
+            )
+        return data_set
+
+    def load(self) -> Any:
+        """Loads data by delegation to the provided load method.
+
+        Returns:
+            Data returned by the provided load method.
+
+        Raises:
+            DataSetError: When underlying load method raises error.
+
+        """
+
+        try:
+            logging.getLogger(__name__).debug("Loading %s", str(self))
+            return self._load()
+        except DataSetError:
+            raise
+        except Exception as exc:
+            # This exception handling is by design as the composed data sets
+            # can throw any type of exception.
+            message = "Failed while loading data from data set {}.\n{}".format(
+                str(self), str(exc)
+            )
+            raise DataSetError(message) from exc
+
+    def save(self, data: Any) -> None:
+        """Saves data by delegation to the provided save method.
+
+        Args:
+            data: the value to be saved by provided save method.
+
+        Raises:
+            DataSetError: when underlying save method raises error.
+
+        """
+
+        if data is None:
+            raise DataSetError("Saving `None` to a `DataSet` is not allowed")
+
+        try:
+            logging.getLogger(__name__).debug("Saving %s", str(self))
+            self._save(data)
+        except DataSetError:
+            raise
+        except Exception as exc:
+            message = "Failed while saving data to data set {}.\n{}".format(
+                str(self), str(exc)
+            )
+            raise DataSetError(message) from exc
+
+    def __str__(self):
+        def _to_str(obj, is_root=False):
+            """Returns a string representation where
+            1. The root level (i.e. the DataSet.__init__ arguments) are
+            formatted like DataSet(key=value).
+            2. Dictionaries have the keys alphabetically sorted recursively.
+            3. Empty dictionaries and None values are not shown.
+            4. String representations of dictionary values are
+            capped to MAX_DESCRIPTION_LENGTH.
+            """
+
+            fmt = "{}={}" if is_root else "'{}': {}"  # 1
+
+            if isinstance(obj, dict):
+                sorted_dict = sorted(obj.items(), key=lambda pair: str(pair[0]))  # 2
+
+                text = ", ".join(
+                    fmt.format(key, _to_str(value))  # 2
+                    for key, value in sorted_dict
+                    if value or isinstance(value, bool)
+                )  # 3
+
+                return text if is_root else "{" + text + "}"  # 1
+
+            # not a dictionary
+            value = str(obj)
+            suffix = "" if len(value) <= MAX_DESCRIPTION_LENGTH else "..."
+            return value[:MAX_DESCRIPTION_LENGTH] + suffix  # 4
+
+        return "{}({})".format(type(self).__name__, _to_str(self._describe(), True))
+
+    @abc.abstractmethod
+    def _load(self) -> Any:
+        raise NotImplementedError(
+            "`{}` is a subclass of AbstractDataSet and"
+            "it must implement the `_load` method".format(self.__class__.__name__)
+        )
+
+    @abc.abstractmethod
+    def _save(self, data: Any) -> None:
+        raise NotImplementedError(
+            "`{}` is a subclass of AbstractDataSet and"
+            "it must implement the `_save` method".format(self.__class__.__name__)
+        )
+
+    @abc.abstractmethod
+    def _describe(self) -> Dict[str, Any]:
+        raise NotImplementedError(
+            "`{}` is a subclass of AbstractDataSet and"
+            "it must implement the `_describe` method".format(self.__class__.__name__)
+        )
+
+
+class ExistsMixin(abc.ABC):
+    """Mixin class which provides an exists() method."""
+
+    def exists(self) -> bool:
+        """Checks whether a data set's output already exists by calling
+        the provided _exists() method.
+
+        Returns:
+            Flag indicating whether the output already exists.
+
+        Raises:
+            DataSetError: when underlying exists method raises error.
+
+        """
+        try:
+            logging.getLogger(__name__).debug(
+                "Checking whether target of %s exists", str(self)
+            )
+            return self._exists()
+        except Exception as exc:
+            message = "Failed during exists check for data set {}.\n{}".format(
+                str(self), str(exc)
+            )
+            raise DataSetError(message) from exc
+
+    @abc.abstractmethod
+    def _exists(self) -> bool:
+        raise NotImplementedError(
+            "`{}` inherits from ExistsMixin and "
+            "it must implement the `_exists` method".format(self.__class__.__name__)
+        )
+
+
+def generate_current_version() -> str:
+    """Generate the current version to be used by versioned data sets.
+
+    Returns:
+        String representation of the current version.
+
+    """
+    current_ts = datetime.now(tz=timezone.utc)
+    fmt = (
+        "{d.year:04d}-{d.month:02d}-{d.day:02d}T{d.hour:02d}"
+        ".{d.minute:02d}.{d.second:02d}.{ms:03d}Z"
+    )
+    return fmt.format(d=current_ts, ms=current_ts.microsecond // 1000)
+
+
+class Version(namedtuple("Version", ["load", "save"])):
+    """This namedtuple is used to provide load and save versions for versioned
+    data sets. If ``Version.load`` is None, then the latest available version
+    is loaded. If ``Version.save`` is None, then save version is formatted as
+    YYYY-MM-DDThh.mm.ss.sssZ of the current timestamp.
+    """
+
+    __slots__ = ()
+
+
+_PATH_CONSISTENCY_WARNING = (
+    "Save path `{}` did not match load path `{}` for {}. This is strongly "
+    "discouraged due to inconsistencies it may cause between `save` and "
+    "`load` operations. Please refrain from setting exact load version for "
+    "intermediate data sets where possible to avoid this warning."
+)
+
+
+# pylint: disable=too-few-public-methods
+class FilepathVersionMixIn:
+    """Mixin class which helps to version filepath-like data sets."""
+
+    def _get_load_path(self, filepath: str, version: Version = None) -> str:
+        if not version:
+            return filepath
+        if version.load:
+            return self._get_versioned_path(filepath, version.load)
+        pattern = self._get_versioned_path(filepath, "*")
+        paths = [f for f in iglob(pattern) if Path(f).exists()]
+        if not paths:
+            message = "Did not find any versions for {}".format(str(self))
+            raise DataSetError(message)
+        return sorted(paths, reverse=True)[0]
+
+    def _get_save_path(self, filepath: str, version: Version = None) -> str:
+        if not version:
+            return filepath
+        save_version = version.save or generate_current_version()
+        versioned_path = self._get_versioned_path(filepath, save_version)
+        if Path(versioned_path).exists():
+            message = (
+                "Save path `{}` for {} must not exist if versioning "
+                "is enabled.".format(versioned_path, str(self))
+            )
+            raise DataSetError(message)
+        return versioned_path
+
+    @staticmethod
+    def _get_versioned_path(filepath: str, version: str) -> str:
+        filepath = Path(filepath)
+        return str(filepath / version / filepath.name)
+
+    def _check_paths_consistency(self, load_path: str, save_path: str):
+        if load_path != save_path:
+            warn(_PATH_CONSISTENCY_WARNING.format(save_path, load_path, str(self)))
+
+
+# pylint: disable=too-few-public-methods
+class S3PathVersionMixIn:
+    """Mixin class which helps to version S3 data sets."""
+
+    def _get_load_path(
+        self, client: Any, bucket: str, filepath: str, version: Version = None
+    ) -> str:
+        if not version:
+            return filepath
+        if version.load:
+            return self._get_versioned_path(filepath, version.load)
+        prefix = filepath if filepath.endswith("/") else filepath + "/"
+        keys = list(self._list_objects(client, bucket, prefix))
+        if not keys:
+            message = "Did not find any versions for {}".format(str(self))
+            raise DataSetError(message)
+        return sorted(keys, reverse=True)[0]
+
+    def _get_save_path(
+        self, client: Any, bucket: str, filepath: str, version: Version = None
+    ) -> str:
+        if not version:
+            return filepath
+        save_version = version.save or generate_current_version()
+        versioned_path = self._get_versioned_path(filepath, save_version)
+        if versioned_path in self._list_objects(client, bucket, versioned_path):
+            message = (
+                "Save path `{}` for {} must not exist if versioning "
+                "is enabled.".format(versioned_path, str(self))
+            )
+            raise DataSetError(message)
+        return versioned_path
+
+    def _check_paths_consistency(self, load_path: str, save_path: str):
+        if load_path != save_path:
+            warn(_PATH_CONSISTENCY_WARNING.format(save_path, load_path, str(self)))
+
+    @staticmethod
+    def _get_versioned_path(filepath: str, version: str) -> str:
+        filepath = PurePosixPath(filepath)
+        return str(filepath / version / filepath.name)
+
+    @staticmethod
+    def _list_objects(client: Any, bucket: str, prefix: str):
+        paginator = client.get_paginator("list_objects_v2")
+        page_iterator = paginator.paginate(Bucket=bucket, Prefix=prefix)
+        for page in page_iterator:
+            yield from (
+                obj["Key"]
+                for obj in page.get("Contents", [])
+                if not obj["Key"].endswith("/")
+            )
diff --git a/kedro/utils.py b/kedro/utils.py
new file mode 100644
index 000000000..63a0b8362
--- /dev/null
+++ b/kedro/utils.py
@@ -0,0 +1,59 @@
+# Copyright 2018-2019 QuantumBlack Visual Analytics Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
+# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
+# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# The QuantumBlack Visual Analytics Limited (“QuantumBlack”) name and logo
+# (either separately or in combination, “QuantumBlack Trademarks”) are
+# trademarks of QuantumBlack. The License does not grant you any right or
+# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
+# Trademarks or any confusingly similar mark as a trademark for your product,
+#     or use the QuantumBlack Trademarks in any other manner that might cause
+# confusion in the marketplace, including but not limited to in advertising,
+# on websites, or on software.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This module provides a set of helper functions being used across different components
+of kedro package.
+"""
+
+import importlib
+from typing import Any
+
+
+def load_obj(obj_path: str, default_obj_path: str) -> Any:
+    """Extract an object from a given path.
+
+        Args:
+            obj_path: Path to an object to be extracted, including the object name.
+            default_obj_path: Default object path.
+
+        Returns:
+            Extracted object.
+
+        Raises:
+            AttributeError: When the object does not have the given named attribute.
+
+    """
+    obj_path_list = obj_path.rsplit(".", 1)
+    obj_path = obj_path_list.pop(0) if len(obj_path_list) > 1 else default_obj_path
+    obj_name = obj_path_list[0]
+    module_obj = importlib.import_module(obj_path)
+    if not hasattr(module_obj, obj_name):
+        raise AttributeError(
+            "Object `{}` cannot be loaded from `{}`.".format(obj_name, obj_path)
+        )
+    return getattr(module_obj, obj_name)

From c947de919383735b2a411fe8d52aee9ad31fe24c Mon Sep 17 00:00:00 2001
From: Nasef Khan <nasef.khan@quantumblack.com>
Date: Fri, 31 May 2019 11:39:15 +0100
Subject: [PATCH 02/44] Merge pull request #51 from
 quantumblacklabs/release/0.14.1

Release 0.14.1
---
 kedro/io/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kedro/io/__init__.py b/kedro/io/__init__.py
index a426a6374..119f08bcb 100644
--- a/kedro/io/__init__.py
+++ b/kedro/io/__init__.py
@@ -44,6 +44,7 @@
 from .data_catalog import DataCatalog  # NOQA
 from .excel_local import ExcelLocalDataSet  # NOQA
 from .hdf_local import HDFLocalDataSet  # NOQA
+from .hdf_s3 import HDFS3DataSet  # NOQA
 from .json_local import JSONLocalDataSet  # NOQA
 from .lambda_data_set import LambdaDataSet  # NOQA
 from .memory_data_set import MemoryDataSet  # NOQA

From 8b218eceadd95c98858ace5beff5fda28a696bb7 Mon Sep 17 00:00:00 2001
From: Nasef Khan <nakhan98@gmail.com>
Date: Tue, 11 Jun 2019 15:14:58 +0100
Subject: [PATCH 03/44] Merge pull request #81 from
 quantumblacklabs/release/0.14.2

Release 0.14.2
---
 kedro/io/__init__.py |  2 +-
 kedro/io/core.py     | 12 ++++--------
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/kedro/io/__init__.py b/kedro/io/__init__.py
index 119f08bcb..2c9436ab2 100644
--- a/kedro/io/__init__.py
+++ b/kedro/io/__init__.py
@@ -35,7 +35,6 @@
 from .core import DataSetAlreadyExistsError  # NOQA
 from .core import DataSetError  # NOQA
 from .core import DataSetNotFoundError  # NOQA
-from .core import ExistsMixin  # NOQA
 from .core import FilepathVersionMixIn  # NOQA
 from .core import S3PathVersionMixIn  # NOQA
 from .core import Version  # NOQA
@@ -54,3 +53,4 @@
 from .sql import SQLQueryDataSet  # NOQA
 from .sql import SQLTableDataSet  # NOQA
 from .text_local import TextLocalDataSet  # NOQA
+from .transformers import AbstractTransformer  # NOQA
diff --git a/kedro/io/core.py b/kedro/io/core.py
index 59a355b5b..45ec91597 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -290,10 +290,6 @@ def _describe(self) -> Dict[str, Any]:
             "it must implement the `_describe` method".format(self.__class__.__name__)
         )
 
-
-class ExistsMixin(abc.ABC):
-    """Mixin class which provides an exists() method."""
-
     def exists(self) -> bool:
         """Checks whether a data set's output already exists by calling
         the provided _exists() method.
@@ -316,12 +312,12 @@ def exists(self) -> bool:
             )
             raise DataSetError(message) from exc
 
-    @abc.abstractmethod
     def _exists(self) -> bool:
-        raise NotImplementedError(
-            "`{}` inherits from ExistsMixin and "
-            "it must implement the `_exists` method".format(self.__class__.__name__)
+        logging.getLogger(__name__).warning(
+            "`exists()` not implemented for `%s`. " "Assuming output does not exist.",
+            self.__class__.__name__,
         )
+        return False
 
 
 def generate_current_version() -> str:

From c9ca3693cc029fcb9ae30f51da41469a2a8d35bb Mon Sep 17 00:00:00 2001
From: Nasef Khan <nakhan98@gmail.com>
Date: Wed, 26 Jun 2019 11:19:37 +0100
Subject: [PATCH 04/44] Merge pull request #105 from
 quantumblacklabs/release/0.14.3

Release 0.14.3
---
 kedro/io/__init__.py |  4 ++--
 kedro/io/core.py     | 23 +++++++++++++++++++++--
 kedro/utils.py       |  4 ++--
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/kedro/io/__init__.py b/kedro/io/__init__.py
index 2c9436ab2..0707691f9 100644
--- a/kedro/io/__init__.py
+++ b/kedro/io/__init__.py
@@ -14,8 +14,8 @@
 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #
-# The QuantumBlack Visual Analytics Limited (“QuantumBlack”) name and logo
-# (either separately or in combination, “QuantumBlack Trademarks”) are
+# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
+# (either separately or in combination, "QuantumBlack Trademarks") are
 # trademarks of QuantumBlack. The License does not grant you any right or
 # license to the QuantumBlack Trademarks. You may not use the QuantumBlack
 # Trademarks or any confusingly similar mark as a trademark for your product,
diff --git a/kedro/io/core.py b/kedro/io/core.py
index 45ec91597..b0a347b77 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -14,8 +14,8 @@
 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #
-# The QuantumBlack Visual Analytics Limited (“QuantumBlack”) name and logo
-# (either separately or in combination, “QuantumBlack Trademarks”) are
+# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
+# (either separately or in combination, "QuantumBlack Trademarks") are
 # trademarks of QuantumBlack. The License does not grant you any right or
 # license to the QuantumBlack Trademarks. You may not use the QuantumBlack
 # Trademarks or any confusingly similar mark as a trademark for your product,
@@ -319,6 +319,25 @@ def _exists(self) -> bool:
         )
         return False
 
+    def release(self) -> bool:
+        """Release any cached data.
+
+        Raises:
+            DataSetError: when underlying exists method raises error.
+
+        """
+        try:
+            logging.getLogger(__name__).debug("Releasing %s", str(self))
+            self._release()
+        except Exception as exc:
+            message = "Failed during release for data set {}.\n{}".format(
+                str(self), str(exc)
+            )
+            raise DataSetError(message) from exc
+
+    def _release(self) -> None:
+        pass
+
 
 def generate_current_version() -> str:
     """Generate the current version to be used by versioned data sets.
diff --git a/kedro/utils.py b/kedro/utils.py
index 63a0b8362..c126cb916 100644
--- a/kedro/utils.py
+++ b/kedro/utils.py
@@ -14,8 +14,8 @@
 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #
-# The QuantumBlack Visual Analytics Limited (“QuantumBlack”) name and logo
-# (either separately or in combination, “QuantumBlack Trademarks”) are
+# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
+# (either separately or in combination, "QuantumBlack Trademarks") are
 # trademarks of QuantumBlack. The License does not grant you any right or
 # license to the QuantumBlack Trademarks. You may not use the QuantumBlack
 # Trademarks or any confusingly similar mark as a trademark for your product,

From f06a603bde910b6ada105c5ba80216989bd3c863 Mon Sep 17 00:00:00 2001
From: "Kiyohito Kunii (Kiyo)" <8097799+921kiyo@users.noreply.github.com>
Date: Tue, 13 Aug 2019 15:00:59 +0100
Subject: [PATCH 05/44] Merge pull request #184 from
 quantumblacklabs/release/0.15.0

Release 0.15.0
---
 kedro/io/__init__.py |   4 +-
 kedro/io/core.py     | 184 ++++++++++++++++++++++++-------------------
 kedro/utils.py       |   2 +-
 3 files changed, 105 insertions(+), 85 deletions(-)

diff --git a/kedro/io/__init__.py b/kedro/io/__init__.py
index 0707691f9..ba16c0651 100644
--- a/kedro/io/__init__.py
+++ b/kedro/io/__init__.py
@@ -32,12 +32,12 @@
 """
 
 from .core import AbstractDataSet  # NOQA
+from .core import AbstractVersionedDataSet  # NOQA
 from .core import DataSetAlreadyExistsError  # NOQA
 from .core import DataSetError  # NOQA
 from .core import DataSetNotFoundError  # NOQA
-from .core import FilepathVersionMixIn  # NOQA
-from .core import S3PathVersionMixIn  # NOQA
 from .core import Version  # NOQA
+from .csv_http import CSVHTTPDataSet  # NOQA
 from .csv_local import CSVLocalDataSet  # NOQA
 from .csv_s3 import CSVS3DataSet  # NOQA
 from .data_catalog import DataCatalog  # NOQA
diff --git a/kedro/io/core.py b/kedro/io/core.py
index b0a347b77..029bb87da 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -33,11 +33,13 @@
 import abc
 import copy
 import logging
+import os
 from collections import namedtuple
 from datetime import datetime, timezone
 from glob import iglob
-from pathlib import Path, PurePosixPath
-from typing import Any, Dict, Type
+from pathlib import Path, PurePath
+from typing import Any, Callable, Dict, List, Optional, Type
+from urllib.parse import urlparse
 from warnings import warn
 
 from kedro.utils import load_obj
@@ -314,12 +316,12 @@ def exists(self) -> bool:
 
     def _exists(self) -> bool:
         logging.getLogger(__name__).warning(
-            "`exists()` not implemented for `%s`. " "Assuming output does not exist.",
+            "`exists()` not implemented for `%s`. Assuming output does not exist.",
             self.__class__.__name__,
         )
         return False
 
-    def release(self) -> bool:
+    def release(self) -> None:
         """Release any cached data.
 
         Raises:
@@ -372,94 +374,112 @@ class Version(namedtuple("Version", ["load", "save"])):
 )
 
 
-# pylint: disable=too-few-public-methods
-class FilepathVersionMixIn:
-    """Mixin class which helps to version filepath-like data sets."""
+def _local_exists(filepath: str) -> bool:
+    return Path(filepath).exists()
+
+
+def is_remote_path(filepath: str) -> bool:
+    """
+    Check if the given path looks like a remote URL (has scheme).
+    """
+    # Get rid of Windows-specific "C:\" start,
+    # which is treated as a URL scheme.
+    _, filepath = os.path.splitdrive(filepath)
+    return bool(urlparse(filepath).scheme)
+
+
+class AbstractVersionedDataSet(AbstractDataSet):
+    """
+    ``AbstractVersionedDataSet`` is the base class for all versioned data set implementations.
+    All data sets that implement versioning should extend this abstract class
+    and implement the methods marked as abstract.
+
+    Example:
+    ::
+
+        >>> from kedro.io import AbstractVersionedDataSet
+        >>> import pandas as pd
+        >>>
+        >>> class MyOwnDataSet(AbstractVersionedDataSet):
+        >>>     def __init__(self, param1, param2, filepath, version):
+        >>>         super().__init__(filepath, version)
+        >>>         self._param1 = param1
+        >>>         self._param2 = param2
+        >>>
+        >>>     def _load(self) -> pd.DataFrame:
+        >>>         load_path = self._get_load_path()
+        >>>         return pd.read_csv(load_path)
+        >>>
+        >>>     def _save(self, df: pd.DataFrame) -> None:
+        >>>         save_path = self._get_save_path()
+        >>>         df.to_csv(save_path)
+        >>>
+        >>>     def _describe(self):
+        >>>         return dict(version=self._version, param1=self._param1, param2=self._param2)
+    """
+
+    # pylint: disable=abstract-method
+
+    def __init__(
+        self,
+        filepath: PurePath,
+        version: Optional[Version],
+        exists_function: Callable[[str], bool] = None,
+        glob_function: Callable[[str], List[str]] = None,
+    ):
+        """Creates a new instance of ``AbstractVersionedDataSet``.
+
+        Args:
+            filepath: Path to file.
+            version: If specified, should be an instance of
+                ``kedro.io.core.Version``. If its ``load`` attribute is
+                None, the latest version will be loaded. If its ``save``
+                attribute is None, save version will be autogenerated.
+            exists_function: Function that is used for determining whether
+                a path exists in a filesystem.
+            glob_function: Function that is used for finding all paths
+                in a filesystem, which match a given pattern.
+        """
+        self._filepath = filepath
+        self._version = version
+        self._exists_function = exists_function or _local_exists
+        self._glob_function = glob_function or iglob
+
+    def _get_load_path(self) -> PurePath:
+        if not self._version:
+            return self._filepath
+        if self._version.load:
+            return self._get_versioned_path(self._version.load)
+
+        pattern = str(self._get_versioned_path("*"))
+        paths = [
+            path for path in self._glob_function(pattern) if self._exists_function(path)
+        ]
 
-    def _get_load_path(self, filepath: str, version: Version = None) -> str:
-        if not version:
-            return filepath
-        if version.load:
-            return self._get_versioned_path(filepath, version.load)
-        pattern = self._get_versioned_path(filepath, "*")
-        paths = [f for f in iglob(pattern) if Path(f).exists()]
         if not paths:
-            message = "Did not find any versions for {}".format(str(self))
-            raise DataSetError(message)
-        return sorted(paths, reverse=True)[0]
-
-    def _get_save_path(self, filepath: str, version: Version = None) -> str:
-        if not version:
-            return filepath
-        save_version = version.save or generate_current_version()
-        versioned_path = self._get_versioned_path(filepath, save_version)
-        if Path(versioned_path).exists():
-            message = (
-                "Save path `{}` for {} must not exist if versioning "
-                "is enabled.".format(versioned_path, str(self))
-            )
-            raise DataSetError(message)
-        return versioned_path
+            raise DataSetError("Did not find any versions for {}".format(str(self)))
 
-    @staticmethod
-    def _get_versioned_path(filepath: str, version: str) -> str:
-        filepath = Path(filepath)
-        return str(filepath / version / filepath.name)
+        most_recent = sorted(paths, reverse=True)[0]
+        return PurePath(most_recent)
 
-    def _check_paths_consistency(self, load_path: str, save_path: str):
-        if load_path != save_path:
-            warn(_PATH_CONSISTENCY_WARNING.format(save_path, load_path, str(self)))
+    def _get_save_path(self) -> PurePath:
+        if not self._version:
+            return self._filepath
 
+        save_version = self._version.save or generate_current_version()
+        versioned_path = self._get_versioned_path(save_version)
 
-# pylint: disable=too-few-public-methods
-class S3PathVersionMixIn:
-    """Mixin class which helps to version S3 data sets."""
-
-    def _get_load_path(
-        self, client: Any, bucket: str, filepath: str, version: Version = None
-    ) -> str:
-        if not version:
-            return filepath
-        if version.load:
-            return self._get_versioned_path(filepath, version.load)
-        prefix = filepath if filepath.endswith("/") else filepath + "/"
-        keys = list(self._list_objects(client, bucket, prefix))
-        if not keys:
-            message = "Did not find any versions for {}".format(str(self))
-            raise DataSetError(message)
-        return sorted(keys, reverse=True)[0]
-
-    def _get_save_path(
-        self, client: Any, bucket: str, filepath: str, version: Version = None
-    ) -> str:
-        if not version:
-            return filepath
-        save_version = version.save or generate_current_version()
-        versioned_path = self._get_versioned_path(filepath, save_version)
-        if versioned_path in self._list_objects(client, bucket, versioned_path):
-            message = (
+        if self._exists_function(str(versioned_path)):
+            raise DataSetError(
                 "Save path `{}` for {} must not exist if versioning "
                 "is enabled.".format(versioned_path, str(self))
             )
-            raise DataSetError(message)
+
         return versioned_path
 
-    def _check_paths_consistency(self, load_path: str, save_path: str):
+    def _get_versioned_path(self, version: str) -> PurePath:
+        return self._filepath / version / self._filepath.name
+
+    def _check_paths_consistency(self, load_path: PurePath, save_path: PurePath):
         if load_path != save_path:
             warn(_PATH_CONSISTENCY_WARNING.format(save_path, load_path, str(self)))
-
-    @staticmethod
-    def _get_versioned_path(filepath: str, version: str) -> str:
-        filepath = PurePosixPath(filepath)
-        return str(filepath / version / filepath.name)
-
-    @staticmethod
-    def _list_objects(client: Any, bucket: str, prefix: str):
-        paginator = client.get_paginator("list_objects_v2")
-        page_iterator = paginator.paginate(Bucket=bucket, Prefix=prefix)
-        for page in page_iterator:
-            yield from (
-                obj["Key"]
-                for obj in page.get("Contents", [])
-                if not obj["Key"].endswith("/")
-            )
diff --git a/kedro/utils.py b/kedro/utils.py
index c126cb916..9a9621f96 100644
--- a/kedro/utils.py
+++ b/kedro/utils.py
@@ -34,7 +34,7 @@
 from typing import Any
 
 
-def load_obj(obj_path: str, default_obj_path: str) -> Any:
+def load_obj(obj_path: str, default_obj_path: str = "") -> Any:
     """Extract an object from a given path.
 
         Args:

From f69bd54c8b9de8c18ac76ff78f4f3072b61b55ba Mon Sep 17 00:00:00 2001
From: Anton Kirilenko <anton.kirilenko@quantumblack.com>
Date: Thu, 12 Sep 2019 16:41:03 +0100
Subject: [PATCH 06/44] Merge pull request #232 from
 quantumblacklabs/release/0.15.1

[KED-1003] Release 0.15.1
---
 kedro/io/core.py | 48 ++++++++++++++++++++++++++++++++++++++++--------
 kedro/utils.py   |  1 -
 2 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index 029bb87da..ba5f053c6 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -191,6 +191,12 @@ def from_config(
             )
         return data_set
 
+    def get_last_load_version(self) -> Optional[str]:
+        """Versioned datasets should override this property to return last loaded
+        version"""
+        # pylint: disable=no-self-use
+        return None  # pragma: no cover
+
     def load(self) -> Any:
         """Loads data by delegation to the provided load method.
 
@@ -215,6 +221,12 @@ def load(self) -> Any:
             )
             raise DataSetError(message) from exc
 
+    def get_last_save_version(self) -> Optional[str]:
+        """Versioned datasets should override this property to return last saved
+        version."""
+        # pylint: disable=no-self-use
+        return None  # pragma: no cover
+
     def save(self, data: Any) -> None:
         """Saves data by delegation to the provided save method.
 
@@ -444,31 +456,51 @@ def __init__(
         self._version = version
         self._exists_function = exists_function or _local_exists
         self._glob_function = glob_function or iglob
+        self._last_load_version = None  # type: Optional[str]
+        self._last_save_version = None  # type: Optional[str]
+
+    def get_last_load_version(self) -> Optional[str]:
+        return self._last_load_version
 
     def _get_load_path(self) -> PurePath:
         if not self._version:
+            # When versioning is disabled, load from provided filepath
+            self._last_load_version = None
             return self._filepath
+
         if self._version.load:
+            # When load version is pinned, get versioned path
+            self._last_load_version = self._version.load
             return self._get_versioned_path(self._version.load)
 
+        # When load version is unpinned, fetch the most recent existing
+        # version from the given path
         pattern = str(self._get_versioned_path("*"))
-        paths = [
-            path for path in self._glob_function(pattern) if self._exists_function(path)
-        ]
+        version_paths = sorted(self._glob_function(pattern), reverse=True)
+        most_recent = next(
+            (path for path in version_paths if self._exists_function(path)), None
+        )
 
-        if not paths:
+        if not most_recent:
             raise DataSetError("Did not find any versions for {}".format(str(self)))
 
-        most_recent = sorted(paths, reverse=True)[0]
-        return PurePath(most_recent)
+        versioned_path = PurePath(most_recent)
+        self._last_load_version = versioned_path.parent.name
+
+        return versioned_path
+
+    def get_last_save_version(self) -> Optional[str]:
+        return self._last_save_version
 
     def _get_save_path(self) -> PurePath:
         if not self._version:
+            # When versioning is disabled, return given filepath
+            self._last_save_version = None
             return self._filepath
 
-        save_version = self._version.save or generate_current_version()
-        versioned_path = self._get_versioned_path(save_version)
+        self._last_save_version = self._version.save or generate_current_version()
 
+        versioned_path = self._get_versioned_path(self._last_save_version)
         if self._exists_function(str(versioned_path)):
             raise DataSetError(
                 "Save path `{}` for {} must not exist if versioning "
diff --git a/kedro/utils.py b/kedro/utils.py
index 9a9621f96..a4e0d07a3 100644
--- a/kedro/utils.py
+++ b/kedro/utils.py
@@ -29,7 +29,6 @@
 """This module provides a set of helper functions being used across different components
 of kedro package.
 """
-
 import importlib
 from typing import Any
 

From f4e6ea88946f739cc9519b1adc834e4111bcc799 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lorena=20B=C4=83lan?= <lorena.balan@quantumblack.com>
Date: Tue, 8 Oct 2019 17:01:46 +0100
Subject: [PATCH 07/44] Release 0.15.2

---
 kedro/io/core.py | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index ba5f053c6..4885add2a 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -132,7 +132,7 @@ def from_config(
 
         """
         config = copy.deepcopy(config)
-        save_version = save_version or generate_current_version()
+        save_version = save_version or generate_timestamp()
 
         if VERSION_KEY in config:
             # remove "version" key so that it's not passed
@@ -191,6 +191,10 @@ def from_config(
             )
         return data_set
 
+    @property
+    def _logger(self) -> logging.Logger:
+        return logging.getLogger(__name__)
+
     def get_last_load_version(self) -> Optional[str]:
         """Versioned datasets should override this property to return last loaded
         version"""
@@ -209,7 +213,7 @@ def load(self) -> Any:
         """
 
         try:
-            logging.getLogger(__name__).debug("Loading %s", str(self))
+            self._logger.debug("Loading %s", str(self))
             return self._load()
         except DataSetError:
             raise
@@ -242,7 +246,7 @@ def save(self, data: Any) -> None:
             raise DataSetError("Saving `None` to a `DataSet` is not allowed")
 
         try:
-            logging.getLogger(__name__).debug("Saving %s", str(self))
+            self._logger.debug("Saving %s", str(self))
             self._save(data)
         except DataSetError:
             raise
@@ -316,9 +320,7 @@ def exists(self) -> bool:
 
         """
         try:
-            logging.getLogger(__name__).debug(
-                "Checking whether target of %s exists", str(self)
-            )
+            self._logger.debug("Checking whether target of %s exists", str(self))
             return self._exists()
         except Exception as exc:
             message = "Failed during exists check for data set {}.\n{}".format(
@@ -327,7 +329,7 @@ def exists(self) -> bool:
             raise DataSetError(message) from exc
 
     def _exists(self) -> bool:
-        logging.getLogger(__name__).warning(
+        self._logger.warning(
             "`exists()` not implemented for `%s`. Assuming output does not exist.",
             self.__class__.__name__,
         )
@@ -341,7 +343,7 @@ def release(self) -> None:
 
         """
         try:
-            logging.getLogger(__name__).debug("Releasing %s", str(self))
+            self._logger.debug("Releasing %s", str(self))
             self._release()
         except Exception as exc:
             message = "Failed during release for data set {}.\n{}".format(
@@ -353,11 +355,11 @@ def _release(self) -> None:
         pass
 
 
-def generate_current_version() -> str:
-    """Generate the current version to be used by versioned data sets.
+def generate_timestamp() -> str:
+    """Generate the timestamp to be used by versioning.
 
     Returns:
-        String representation of the current version.
+        String representation of the current timestamp.
 
     """
     current_ts = datetime.now(tz=timezone.utc)
@@ -387,7 +389,8 @@ class Version(namedtuple("Version", ["load", "save"])):
 
 
 def _local_exists(filepath: str) -> bool:
-    return Path(filepath).exists()
+    filepath = Path(filepath)
+    return filepath.exists() or any(par.is_file() for par in filepath.parents)
 
 
 def is_remote_path(filepath: str) -> bool:
@@ -400,11 +403,11 @@ def is_remote_path(filepath: str) -> bool:
     return bool(urlparse(filepath).scheme)
 
 
-class AbstractVersionedDataSet(AbstractDataSet):
+class AbstractVersionedDataSet(AbstractDataSet, abc.ABC):
     """
-    ``AbstractVersionedDataSet`` is the base class for all versioned data set implementations.
-    All data sets that implement versioning should extend this abstract class
-    and implement the methods marked as abstract.
+    ``AbstractVersionedDataSet`` is the base class for all versioned data set
+    implementations. All data sets that implement versioning should extend this
+    abstract class and implement the methods marked as abstract.
 
     Example:
     ::
@@ -498,7 +501,7 @@ def _get_save_path(self) -> PurePath:
             self._last_save_version = None
             return self._filepath
 
-        self._last_save_version = self._version.save or generate_current_version()
+        self._last_save_version = self._version.save or generate_timestamp()
 
         versioned_path = self._get_versioned_path(self._last_save_version)
         if self._exists_function(str(versioned_path)):

From fc68c721b2a560a792a16d07a5087977f7ce66c9 Mon Sep 17 00:00:00 2001
From: Dmitrii Deriabin <44967953+DmitriiDeriabinQB@users.noreply.github.com>
Date: Wed, 30 Oct 2019 16:50:41 +0000
Subject: [PATCH 08/44] Merge pull request #307 from
 quantumblacklabs/release/0.15.4

Release 0.15.4
---
 kedro/io/core.py | 108 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 78 insertions(+), 30 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index 4885add2a..996945d0a 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -44,7 +44,6 @@
 
 from kedro.utils import load_obj
 
-MAX_DESCRIPTION_LENGTH = 70
 VERSIONED_FLAG_KEY = "versioned"
 VERSION_KEY = "version"
 
@@ -76,6 +75,14 @@ class DataSetAlreadyExistsError(DataSetError):
     pass
 
 
+class VersionNotFoundError(DataSetError):
+    """``VersionNotFoundError`` raised by ``AbstractVersionedDataSet`` implementations
+    in case of no load versions available for the data set.
+    """
+
+    pass
+
+
 class AbstractDataSet(abc.ABC):
     """``AbstractDataSet`` is the base class for all data set implementations.
     All data set implementations should extend this abstract class
@@ -212,8 +219,9 @@ def load(self) -> Any:
 
         """
 
+        self._logger.debug("Loading %s", str(self))
+
         try:
-            self._logger.debug("Loading %s", str(self))
             return self._load()
         except DataSetError:
             raise
@@ -263,8 +271,6 @@ def _to_str(obj, is_root=False):
             formatted like DataSet(key=value).
             2. Dictionaries have the keys alphabetically sorted recursively.
             3. Empty dictionaries and None values are not shown.
-            4. String representations of dictionary values are
-            capped to MAX_DESCRIPTION_LENGTH.
             """
 
             fmt = "{}={}" if is_root else "'{}': {}"  # 1
@@ -281,9 +287,7 @@ def _to_str(obj, is_root=False):
                 return text if is_root else "{" + text + "}"  # 1
 
             # not a dictionary
-            value = str(obj)
-            suffix = "" if len(value) <= MAX_DESCRIPTION_LENGTH else "..."
-            return value[:MAX_DESCRIPTION_LENGTH] + suffix  # 4
+            return str(obj)
 
         return "{}({})".format(type(self).__name__, _to_str(self._describe(), True))
 
@@ -380,8 +384,8 @@ class Version(namedtuple("Version", ["load", "save"])):
     __slots__ = ()
 
 
-_PATH_CONSISTENCY_WARNING = (
-    "Save path `{}` did not match load path `{}` for {}. This is strongly "
+CONSISTENCY_WARNING = (
+    "Save version `{}` did not match load version `{}` for {}. This is strongly "
     "discouraged due to inconsistencies it may cause between `save` and "
     "`load` operations. Please refrain from setting exact load version for "
     "intermediate data sets where possible to avoid this warning."
@@ -415,6 +419,7 @@ class AbstractVersionedDataSet(AbstractDataSet, abc.ABC):
         >>> from kedro.io import AbstractVersionedDataSet
         >>> import pandas as pd
         >>>
+        >>>
         >>> class MyOwnDataSet(AbstractVersionedDataSet):
         >>>     def __init__(self, param1, param2, filepath, version):
         >>>         super().__init__(filepath, version)
@@ -427,7 +432,11 @@ class AbstractVersionedDataSet(AbstractDataSet, abc.ABC):
         >>>
         >>>     def _save(self, df: pd.DataFrame) -> None:
         >>>         save_path = self._get_save_path()
-        >>>         df.to_csv(save_path)
+        >>>         df.to_csv(str(save_path))
+        >>>
+        >>>     def _exists(self) -> bool:
+        >>>         path = self._get_load_path()
+        >>>         return path.is_file()
         >>>
         >>>     def _describe(self):
         >>>         return dict(version=self._version, param1=self._param1, param2=self._param2)
@@ -465,16 +474,11 @@ def __init__(
     def get_last_load_version(self) -> Optional[str]:
         return self._last_load_version
 
-    def _get_load_path(self) -> PurePath:
+    def _lookup_load_version(self) -> Optional[str]:
         if not self._version:
-            # When versioning is disabled, load from provided filepath
-            self._last_load_version = None
-            return self._filepath
-
+            return None
         if self._version.load:
-            # When load version is pinned, get versioned path
-            self._last_load_version = self._version.load
-            return self._get_versioned_path(self._version.load)
+            return self._version.load
 
         # When load version is unpinned, fetch the most recent existing
         # version from the given path
@@ -485,25 +489,35 @@ def _get_load_path(self) -> PurePath:
         )
 
         if not most_recent:
-            raise DataSetError("Did not find any versions for {}".format(str(self)))
+            raise VersionNotFoundError(
+                "Did not find any versions for {}".format(str(self))
+            )
 
-        versioned_path = PurePath(most_recent)
-        self._last_load_version = versioned_path.parent.name
+        return PurePath(most_recent).parent.name
 
-        return versioned_path
+    def _get_load_path(self) -> PurePath:
+        if not self._version:
+            # When versioning is disabled, load from original filepath
+            return self._filepath
+
+        load_version = self._last_load_version or self._lookup_load_version()
+        return self._get_versioned_path(load_version)  # type: ignore
 
     def get_last_save_version(self) -> Optional[str]:
         return self._last_save_version
 
+    def _lookup_save_version(self) -> Optional[str]:
+        if not self._version:
+            return None
+        return self._version.save or generate_timestamp()
+
     def _get_save_path(self) -> PurePath:
         if not self._version:
-            # When versioning is disabled, return given filepath
-            self._last_save_version = None
+            # When versioning is disabled, return original filepath
             return self._filepath
 
-        self._last_save_version = self._version.save or generate_timestamp()
-
-        versioned_path = self._get_versioned_path(self._last_save_version)
+        save_version = self._last_save_version or self._lookup_save_version()
+        versioned_path = self._get_versioned_path(save_version)  # type: ignore
         if self._exists_function(str(versioned_path)):
             raise DataSetError(
                 "Save path `{}` for {} must not exist if versioning "
@@ -515,6 +529,40 @@ def _get_save_path(self) -> PurePath:
     def _get_versioned_path(self, version: str) -> PurePath:
         return self._filepath / version / self._filepath.name
 
-    def _check_paths_consistency(self, load_path: PurePath, save_path: PurePath):
-        if load_path != save_path:
-            warn(_PATH_CONSISTENCY_WARNING.format(save_path, load_path, str(self)))
+    def load(self) -> Any:
+        self._last_load_version = self._lookup_load_version()
+        return super().load()
+
+    def save(self, data: Any) -> None:
+        self._last_save_version = self._lookup_save_version()
+        super().save(data)
+
+        load_version = self._lookup_load_version()
+        if load_version != self._last_save_version:
+            warn(
+                CONSISTENCY_WARNING.format(
+                    self._last_save_version, load_version, str(self)
+                )
+            )
+
+    def exists(self) -> bool:
+        """Checks whether a data set's output already exists by calling
+        the provided _exists() method.
+
+        Returns:
+            Flag indicating whether the output already exists.
+
+        Raises:
+            DataSetError: when underlying exists method raises error.
+
+        """
+        self._logger.debug("Checking whether target of %s exists", str(self))
+        try:
+            return self._exists()
+        except VersionNotFoundError:
+            return False
+        except Exception as exc:
+            message = "Failed during exists check for data set {}.\n{}".format(
+                str(self), str(exc)
+            )
+            raise DataSetError(message) from exc

From e3cf344308653d4c9316b6bd4d7bb85ddae7c859 Mon Sep 17 00:00:00 2001
From: andrii-ivaniuk <andrii.ivaniuk@gmail.com>
Date: Thu, 12 Dec 2019 15:16:26 +0200
Subject: [PATCH 09/44] Merge pull request #352 from
 quantumblacklabs/release/0.15.5

Release 0.15.5
---
 kedro/io/__init__.py |   2 +
 kedro/io/core.py     | 108 +++++++++++++++++++++++++++----------------
 2 files changed, 69 insertions(+), 41 deletions(-)

diff --git a/kedro/io/__init__.py b/kedro/io/__init__.py
index ba16c0651..0a4a76b70 100644
--- a/kedro/io/__init__.py
+++ b/kedro/io/__init__.py
@@ -44,10 +44,12 @@
 from .excel_local import ExcelLocalDataSet  # NOQA
 from .hdf_local import HDFLocalDataSet  # NOQA
 from .hdf_s3 import HDFS3DataSet  # NOQA
+from .json_dataset import JSONDataSet  # NOQA
 from .json_local import JSONLocalDataSet  # NOQA
 from .lambda_data_set import LambdaDataSet  # NOQA
 from .memory_data_set import MemoryDataSet  # NOQA
 from .parquet_local import ParquetLocalDataSet  # NOQA
+from .partitioned_data_set import PartitionedDataSet  # NOQA
 from .pickle_local import PickleLocalDataSet  # NOQA
 from .pickle_s3 import PickleS3DataSet  # NOQA
 from .sql import SQLQueryDataSet  # NOQA
diff --git a/kedro/io/core.py b/kedro/io/core.py
index 996945d0a..f71d65e94 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -38,7 +38,7 @@
 from datetime import datetime, timezone
 from glob import iglob
 from pathlib import Path, PurePath
-from typing import Any, Callable, Dict, List, Optional, Type
+from typing import Any, Callable, Dict, List, Optional, Tuple, Type
 from urllib.parse import urlparse
 from warnings import warn
 
@@ -138,49 +138,18 @@ def from_config(
                 from its config.
 
         """
-        config = copy.deepcopy(config)
-        save_version = save_version or generate_timestamp()
-
-        if VERSION_KEY in config:
-            # remove "version" key so that it's not passed
-            # to the 'unversioned' data set constructor
-            message = (
-                "`%s` attribute removed from `%s` data set "
-                "configuration since it is a reserved word and cannot "
-                "be directly specified",
-                VERSION_KEY,
-                name,
-            )
-            logging.getLogger(__name__).warning(*message)
-            del config[VERSION_KEY]
-        if config.pop(VERSIONED_FLAG_KEY, False):  # data set is versioned
-            config[VERSION_KEY] = Version(load_version, save_version)
-
-        dataset_class_path = config.pop("type")
         try:
-            class_obj = load_obj(dataset_class_path, "kedro.io")
-        except ImportError:
-            raise DataSetError(
-                "Cannot import module when trying to load type "
-                "`{}` for DataSet `{}`.".format(dataset_class_path, name)
+            class_obj, config = parse_dataset_definition(
+                config, load_version, save_version
             )
-        except AttributeError:
+        except Exception as ex:
             raise DataSetError(
-                "Class `{}` for DataSet `{}` not found.".format(
-                    dataset_class_path, name
-                )
+                "An exception occurred when parsing config "
+                "for DataSet `{}`:\n{}".format(name, str(ex))
             )
 
-        if not issubclass(class_obj, AbstractDataSet):
-            raise DataSetError(
-                "DataSet '{}' type `{}.{}` is invalid: "
-                "all data set types must extend "
-                "`AbstractDataSet`.".format(
-                    name, class_obj.__module__, class_obj.__qualname__
-                )
-            )
         try:
-            data_set = class_obj(**config)
+            data_set = class_obj(**config)  # type: ignore
         except TypeError as err:
             raise DataSetError(
                 "\n{}.\nDataSet '{}' must only contain "
@@ -392,15 +361,72 @@ class Version(namedtuple("Version", ["load", "save"])):
 )
 
 
+def parse_dataset_definition(
+    config: Dict[str, Any], load_version: str = None, save_version: str = None
+) -> Tuple[Type[AbstractDataSet], Dict]:
+    """Parse and instantiate a dataset class using the configuration provided.
+
+    Args:
+        config: Data set config dictionary. It *must* contain the `type` key
+            with fully qualified class name.
+        load_version: Version string to be used for ``load`` operation if
+                the data set is versioned. Has no effect on the data set
+                if versioning was not enabled.
+        save_version: Version string to be used for ``save`` operation if
+            the data set is versioned. Has no effect on the data set
+            if versioning was not enabled.
+
+    Raises:
+        DataSetError: If the function fails to parse the configuration provided.
+
+    Returns:
+        2-tuple: (Dataset class object, configuration dictionary)
+    """
+    save_version = save_version or generate_timestamp()
+    config = copy.deepcopy(config)
+
+    if "type" not in config:
+        raise DataSetError("`type` is missing from DataSet catalog configuration")
+
+    class_obj = config.pop("type")
+
+    if isinstance(class_obj, str):
+        try:
+            class_obj = load_obj(class_obj, "kedro.io")
+        except ImportError:
+            raise DataSetError(
+                "Cannot import module when trying to load type `{}`.".format(class_obj)
+            )
+        except AttributeError:
+            raise DataSetError("Class `{}` not found.".format(class_obj))
+    if not issubclass(class_obj, AbstractDataSet):
+        raise DataSetError(
+            "DataSet type `{}.{}` is invalid: all data set types must extend "
+            "`AbstractDataSet`.".format(class_obj.__module__, class_obj.__qualname__)
+        )
+
+    if VERSION_KEY in config:
+        # remove "version" key so that it's not passed
+        # to the "unversioned" data set constructor
+        message = (
+            "`%s` attribute removed from data set configuration since it is a "
+            "reserved word and cannot be directly specified"
+        )
+        logging.getLogger(__name__).warning(message, VERSION_KEY)
+        del config[VERSION_KEY]
+    if config.pop(VERSIONED_FLAG_KEY, False):  # data set is versioned
+        config[VERSION_KEY] = Version(load_version, save_version)
+
+    return class_obj, config
+
+
 def _local_exists(filepath: str) -> bool:
     filepath = Path(filepath)
     return filepath.exists() or any(par.is_file() for par in filepath.parents)
 
 
 def is_remote_path(filepath: str) -> bool:
-    """
-    Check if the given path looks like a remote URL (has scheme).
-    """
+    """Check if the given path looks like a remote URL (has scheme)."""
     # Get rid of Windows-specific "C:\" start,
     # which is treated as a URL scheme.
     _, filepath = os.path.splitdrive(filepath)

From 5d2984083b331c94f5bad0b9c774a193afded763 Mon Sep 17 00:00:00 2001
From: Lim H <limdauto@gmail.com>
Date: Wed, 26 Feb 2020 11:43:52 +0000
Subject: [PATCH 10/44] Merge pull request #455 from
 quantumblacklabs/release/0.15.6

Release/0.15.6
---
 kedro/io/__init__.py |   5 +-
 kedro/io/core.py     | 113 ++++++++++++++++++++++++++++++++++++++-----
 kedro/utils.py       |   2 +-
 3 files changed, 106 insertions(+), 14 deletions(-)

diff --git a/kedro/io/__init__.py b/kedro/io/__init__.py
index 0a4a76b70..9a3a61f90 100644
--- a/kedro/io/__init__.py
+++ b/kedro/io/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2018-2019 QuantumBlack Visual Analytics Limited
+# Copyright 2020 QuantumBlack Visual Analytics Limited
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -31,6 +31,7 @@
 which allows implementation of various ``AbstractDataSet``s.
 """
 
+from .cached_dataset import CachedDataSet  # NOQA
 from .core import AbstractDataSet  # NOQA
 from .core import AbstractVersionedDataSet  # NOQA
 from .core import DataSetAlreadyExistsError  # NOQA
@@ -41,6 +42,7 @@
 from .csv_local import CSVLocalDataSet  # NOQA
 from .csv_s3 import CSVS3DataSet  # NOQA
 from .data_catalog import DataCatalog  # NOQA
+from .data_catalog_with_default import DataCatalogWithDefault  # NOQA
 from .excel_local import ExcelLocalDataSet  # NOQA
 from .hdf_local import HDFLocalDataSet  # NOQA
 from .hdf_s3 import HDFS3DataSet  # NOQA
@@ -49,6 +51,7 @@
 from .lambda_data_set import LambdaDataSet  # NOQA
 from .memory_data_set import MemoryDataSet  # NOQA
 from .parquet_local import ParquetLocalDataSet  # NOQA
+from .partitioned_data_set import IncrementalDataSet  # NOQA
 from .partitioned_data_set import PartitionedDataSet  # NOQA
 from .pickle_local import PickleLocalDataSet  # NOQA
 from .pickle_s3 import PickleS3DataSet  # NOQA
diff --git a/kedro/io/core.py b/kedro/io/core.py
index f71d65e94..5b9c0fe7b 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -1,4 +1,4 @@
-# Copyright 2018-2019 QuantumBlack Visual Analytics Limited
+# Copyright 2020 QuantumBlack Visual Analytics Limited
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -34,18 +34,24 @@
 import copy
 import logging
 import os
+import warnings
 from collections import namedtuple
 from datetime import datetime, timezone
 from glob import iglob
 from pathlib import Path, PurePath
 from typing import Any, Callable, Dict, List, Optional, Tuple, Type
 from urllib.parse import urlparse
-from warnings import warn
+
+from fsspec.utils import infer_storage_options
 
 from kedro.utils import load_obj
 
+warnings.simplefilter("default", DeprecationWarning)
+
 VERSIONED_FLAG_KEY = "versioned"
 VERSION_KEY = "version"
+HTTP_PROTOCOLS = ("http", "https")
+PROTOCOL_DELIMITER = "://"
 
 
 class DataSetError(Exception):
@@ -327,6 +333,12 @@ def release(self) -> None:
     def _release(self) -> None:
         pass
 
+    def _copy(self, **overwrite_params) -> "AbstractDataSet":
+        dataset_copy = copy.deepcopy(self)
+        for name, value in overwrite_params.items():
+            setattr(dataset_copy, name, value)
+        return dataset_copy
+
 
 def generate_timestamp() -> str:
     """Generate the timestamp to be used by versioning.
@@ -353,17 +365,19 @@ class Version(namedtuple("Version", ["load", "save"])):
     __slots__ = ()
 
 
-CONSISTENCY_WARNING = (
+_CONSISTENCY_WARNING = (
     "Save version `{}` did not match load version `{}` for {}. This is strongly "
     "discouraged due to inconsistencies it may cause between `save` and "
     "`load` operations. Please refrain from setting exact load version for "
     "intermediate data sets where possible to avoid this warning."
 )
 
+_DEFAULT_PACKAGES = ["kedro.io.", "kedro.extras.datasets.", ""]
+
 
 def parse_dataset_definition(
     config: Dict[str, Any], load_version: str = None, save_version: str = None
-) -> Tuple[Type[AbstractDataSet], Dict]:
+) -> Tuple[Type[AbstractDataSet], Dict[str, Any]]:
     """Parse and instantiate a dataset class using the configuration provided.
 
     Args:
@@ -389,16 +403,20 @@ def parse_dataset_definition(
         raise DataSetError("`type` is missing from DataSet catalog configuration")
 
     class_obj = config.pop("type")
-
     if isinstance(class_obj, str):
-        try:
-            class_obj = load_obj(class_obj, "kedro.io")
-        except ImportError:
+        if len(class_obj.strip(".")) != len(class_obj):
             raise DataSetError(
-                "Cannot import module when trying to load type `{}`.".format(class_obj)
+                "`type` class path does not support relative "
+                "paths or paths ending with a dot."
             )
-        except AttributeError:
+
+        class_paths = (prefix + class_obj for prefix in _DEFAULT_PACKAGES)
+        trials = (_load_obj(class_path) for class_path in class_paths)
+        try:
+            class_obj = next(obj for obj in trials if obj is not None)
+        except StopIteration:
             raise DataSetError("Class `{}` not found.".format(class_obj))
+
     if not issubclass(class_obj, AbstractDataSet):
         raise DataSetError(
             "DataSet type `{}.{}` is invalid: all data set types must extend "
@@ -420,6 +438,14 @@ def parse_dataset_definition(
     return class_obj, config
 
 
+def _load_obj(class_path: str) -> Optional[object]:
+    try:
+        class_obj = load_obj(class_path)
+    except (ImportError, AttributeError, ValueError):
+        return None
+    return class_obj
+
+
 def _local_exists(filepath: str) -> bool:
     filepath = Path(filepath)
     return filepath.exists() or any(par.is_file() for par in filepath.parents)
@@ -565,8 +591,8 @@ def save(self, data: Any) -> None:
 
         load_version = self._lookup_load_version()
         if load_version != self._last_save_version:
-            warn(
-                CONSISTENCY_WARNING.format(
+            warnings.warn(
+                _CONSISTENCY_WARNING.format(
                     self._last_save_version, load_version, str(self)
                 )
             )
@@ -592,3 +618,66 @@ def exists(self) -> bool:
                 str(self), str(exc)
             )
             raise DataSetError(message) from exc
+
+
+def get_protocol_and_path(filepath: str, version: Version = None) -> Tuple[str, str]:
+    """Parses filepath on protocol and path.
+
+    Args:
+        filepath: raw filepath e.g.: `gcs://bucket/test.json`.
+        version: instance of ``kedro.io.core.Version`` or None.
+
+    Returns:
+            Protocol and path.
+
+    Raises:
+            DataSetError: when protocol is http(s) and version is not None.
+            Note: HTTP(s) dataset doesn't support versioning.
+    """
+    options_dict = infer_storage_options(filepath)
+    path = options_dict["path"]
+    protocol = options_dict["protocol"]
+
+    if protocol in HTTP_PROTOCOLS:
+        if version:
+            raise DataSetError(
+                "HTTP(s) DataSet doesn't support versioning. "
+                "Please remove version flag from the dataset configuration."
+            )
+        path = path.split(PROTOCOL_DELIMITER, 1)[-1]
+
+    return protocol, path
+
+
+def get_filepath_str(path: PurePath, protocol: str) -> str:
+    """Returns filepath. Returns full filepath (with protocol) if protocol is HTTP(s).
+
+    Args:
+        path: filepath without protocol.
+        protocol: protocol.
+
+    Returns:
+        Filepath string.
+    """
+    path = str(path)
+    if protocol in HTTP_PROTOCOLS:
+        path = "".join((protocol, PROTOCOL_DELIMITER, path))
+    return path
+
+
+def validate_on_forbidden_chars(**kwargs):
+    """Validate that string values do not include white-spaces or ;"""
+    for key, value in kwargs.items():
+        if " " in value or ";" in value:
+            raise DataSetError(
+                "Neither white-space nor semicolon are allowed in `{}`.".format(key)
+            )
+
+
+def deprecation_warning(class_name):
+    """Log deprecation warning."""
+    warnings.warn(
+        "{} will be deprecated in future releases. Please refer "
+        "to replacement datasets in kedro.extras.datasets.".format(class_name),
+        DeprecationWarning,
+    )
diff --git a/kedro/utils.py b/kedro/utils.py
index a4e0d07a3..832818fba 100644
--- a/kedro/utils.py
+++ b/kedro/utils.py
@@ -1,4 +1,4 @@
-# Copyright 2018-2019 QuantumBlack Visual Analytics Limited
+# Copyright 2020 QuantumBlack Visual Analytics Limited
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 9ff268e99d021427fdfed6fa8e428d6a4ab7fbf2 Mon Sep 17 00:00:00 2001
From: "Kiyohito Kunii (Kiyo)" <8097799+921kiyo@users.noreply.github.com>
Date: Thu, 5 Mar 2020 09:57:04 +0000
Subject: [PATCH 11/44] Merge pull request #477 from
 quantumblacklabs/hotfix/0.15.8

Hotfix release 0.15.8
---
 kedro/io/core.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index 5b9c0fe7b..9f38bab3d 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -411,6 +411,7 @@ def parse_dataset_definition(
             )
 
         class_paths = (prefix + class_obj for prefix in _DEFAULT_PACKAGES)
+
         trials = (_load_obj(class_path) for class_path in class_paths)
         try:
             class_obj = next(obj for obj in trials if obj is not None)
@@ -441,8 +442,14 @@ def parse_dataset_definition(
 def _load_obj(class_path: str) -> Optional[object]:
     try:
         class_obj = load_obj(class_path)
-    except (ImportError, AttributeError, ValueError):
+    except ImportError as error:
+        if error.name in class_path:
+            return None
+        # class_obj was successfully loaded, but some dependencies are missing.
+        raise DataSetError("{} for {}".format(error, class_path))
+    except (AttributeError, ValueError):
         return None
+
     return class_obj
 
 

From 0c9afa063841830bb44f5cc8a367714c8256e166 Mon Sep 17 00:00:00 2001
From: Lim H <limdauto@gmail.com>
Date: Wed, 20 May 2020 11:50:48 +0100
Subject: [PATCH 12/44] Merge pull request #607 from
 quantumblacklabs/release/0.16.0

Bump version to 0.16.0
---
 kedro/io/__init__.py |  18 +---
 kedro/io/core.py     | 245 +++++++++++++++++++++++++++----------------
 kedro/utils.py       |   4 +-
 3 files changed, 156 insertions(+), 111 deletions(-)

diff --git a/kedro/io/__init__.py b/kedro/io/__init__.py
index 9a3a61f90..4219a158f 100644
--- a/kedro/io/__init__.py
+++ b/kedro/io/__init__.py
@@ -4,7 +4,7 @@
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-# http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
@@ -19,7 +19,7 @@
 # trademarks of QuantumBlack. The License does not grant you any right or
 # license to the QuantumBlack Trademarks. You may not use the QuantumBlack
 # Trademarks or any confusingly similar mark as a trademark for your product,
-#     or use the QuantumBlack Trademarks in any other manner that might cause
+# or use the QuantumBlack Trademarks in any other manner that might cause
 # confusion in the marketplace, including but not limited to in advertising,
 # on websites, or on software.
 #
@@ -38,24 +38,10 @@
 from .core import DataSetError  # NOQA
 from .core import DataSetNotFoundError  # NOQA
 from .core import Version  # NOQA
-from .csv_http import CSVHTTPDataSet  # NOQA
-from .csv_local import CSVLocalDataSet  # NOQA
-from .csv_s3 import CSVS3DataSet  # NOQA
 from .data_catalog import DataCatalog  # NOQA
 from .data_catalog_with_default import DataCatalogWithDefault  # NOQA
-from .excel_local import ExcelLocalDataSet  # NOQA
-from .hdf_local import HDFLocalDataSet  # NOQA
-from .hdf_s3 import HDFS3DataSet  # NOQA
-from .json_dataset import JSONDataSet  # NOQA
-from .json_local import JSONLocalDataSet  # NOQA
 from .lambda_data_set import LambdaDataSet  # NOQA
 from .memory_data_set import MemoryDataSet  # NOQA
-from .parquet_local import ParquetLocalDataSet  # NOQA
 from .partitioned_data_set import IncrementalDataSet  # NOQA
 from .partitioned_data_set import PartitionedDataSet  # NOQA
-from .pickle_local import PickleLocalDataSet  # NOQA
-from .pickle_s3 import PickleS3DataSet  # NOQA
-from .sql import SQLQueryDataSet  # NOQA
-from .sql import SQLTableDataSet  # NOQA
-from .text_local import TextLocalDataSet  # NOQA
 from .transformers import AbstractTransformer  # NOQA
diff --git a/kedro/io/core.py b/kedro/io/core.py
index 9f38bab3d..f03f70f7c 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -4,7 +4,7 @@
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-# http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
@@ -19,7 +19,7 @@
 # trademarks of QuantumBlack. The License does not grant you any right or
 # license to the QuantumBlack Trademarks. You may not use the QuantumBlack
 # Trademarks or any confusingly similar mark as a trademark for your product,
-#     or use the QuantumBlack Trademarks in any other manner that might cause
+# or use the QuantumBlack Trademarks in any other manner that might cause
 # confusion in the marketplace, including but not limited to in advertising,
 # on websites, or on software.
 #
@@ -33,25 +33,30 @@
 import abc
 import copy
 import logging
-import os
+import re
 import warnings
 from collections import namedtuple
 from datetime import datetime, timezone
+from functools import partial
 from glob import iglob
+from operator import attrgetter
 from pathlib import Path, PurePath
 from typing import Any, Callable, Dict, List, Optional, Tuple, Type
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
 
-from fsspec.utils import infer_storage_options
+from cachetools import Cache, cachedmethod
+from cachetools.keys import hashkey
 
 from kedro.utils import load_obj
 
 warnings.simplefilter("default", DeprecationWarning)
 
+VERSION_FORMAT = "%Y-%m-%dT%H.%M.%S.%fZ"
 VERSIONED_FLAG_KEY = "versioned"
 VERSION_KEY = "version"
 HTTP_PROTOCOLS = ("http", "https")
 PROTOCOL_DELIMITER = "://"
+CLOUD_PROTOCOLS = ("s3", "gcs", "gs", "adl", "abfs")
 
 
 class DataSetError(Exception):
@@ -97,23 +102,37 @@ class AbstractDataSet(abc.ABC):
     Example:
     ::
 
-        >>> from kedro.io import AbstractDataSet
+        >>> from pathlib import Path, PurePosixPath
         >>> import pandas as pd
+        >>> from kedro.io import AbstractDataSet
+        >>>
         >>>
         >>> class MyOwnDataSet(AbstractDataSet):
-        >>>     def __init__(self, param1, param2):
+        >>>     def __init__(self, filepath, param1, param2=True):
+        >>>         self._filepath = PurePosixPath(filepath)
         >>>         self._param1 = param1
         >>>         self._param2 = param2
         >>>
         >>>     def _load(self) -> pd.DataFrame:
-        >>>         print("Dummy load: {}".format(self._param1))
-        >>>         return pd.DataFrame()
+        >>>         return pd.read_csv(self._filepath)
         >>>
         >>>     def _save(self, df: pd.DataFrame) -> None:
-        >>>         print("Dummy save: {}".format(self._param2))
+        >>>         df.to_csv(str(self._filepath))
+        >>>
+        >>>     def _exists(self) -> bool:
+        >>>         return Path(self._filepath).exists()
         >>>
         >>>     def _describe(self):
         >>>         return dict(param1=self._param1, param2=self._param2)
+
+    Example catalog.yml specification:
+    ::
+
+        my_dataset:
+            type: <path-to-my-own-dataset>.MyOwnDataSet
+            filepath: data/01_raw/my_data.csv
+            param1: <param1-value> # param1 is a required argument
+            # param2 will be True by default
     """
 
     @classmethod
@@ -177,12 +196,6 @@ def from_config(
     def _logger(self) -> logging.Logger:
         return logging.getLogger(__name__)
 
-    def get_last_load_version(self) -> Optional[str]:
-        """Versioned datasets should override this property to return last loaded
-        version"""
-        # pylint: disable=no-self-use
-        return None  # pragma: no cover
-
     def load(self) -> Any:
         """Loads data by delegation to the provided load method.
 
@@ -208,12 +221,6 @@ def load(self) -> Any:
             )
             raise DataSetError(message) from exc
 
-    def get_last_save_version(self) -> Optional[str]:
-        """Versioned datasets should override this property to return last saved
-        version."""
-        # pylint: disable=no-self-use
-        return None  # pragma: no cover
-
     def save(self, data: Any) -> None:
         """Saves data by delegation to the provided save method.
 
@@ -318,7 +325,7 @@ def release(self) -> None:
         """Release any cached data.
 
         Raises:
-            DataSetError: when underlying exists method raises error.
+            DataSetError: when underlying release method raises error.
 
         """
         try:
@@ -347,12 +354,8 @@ def generate_timestamp() -> str:
         String representation of the current timestamp.
 
     """
-    current_ts = datetime.now(tz=timezone.utc)
-    fmt = (
-        "{d.year:04d}-{d.month:02d}-{d.day:02d}T{d.hour:02d}"
-        ".{d.minute:02d}.{d.second:02d}.{ms:03d}Z"
-    )
-    return fmt.format(d=current_ts, ms=current_ts.microsecond // 1000)
+    current_ts = datetime.now(tz=timezone.utc).strftime(VERSION_FORMAT)
+    return current_ts[:-4] + current_ts[-1:]  # Don't keep microseconds
 
 
 class Version(namedtuple("Version", ["load", "save"])):
@@ -440,32 +443,38 @@ def parse_dataset_definition(
 
 
 def _load_obj(class_path: str) -> Optional[object]:
+    mod_path, _, class_name = class_path.rpartition(".")
+    try:
+        available_classes = load_obj(f"{mod_path}.__all__")
+    # ModuleNotFoundError: When `load_obj` can't find `mod_path` (e.g `kedro.io.pandas`)
+    #                      this is because we try a combination of all prefixes.
+    # AttributeError: When `load_obj` manages to load `mod_path` but it doesn't have an
+    #                 `__all__` attribute -- either because it's a custom or a kedro.io dataset
+    except (ModuleNotFoundError, AttributeError, ValueError):
+        available_classes = None
+
     try:
         class_obj = load_obj(class_path)
-    except ImportError as error:
-        if error.name in class_path:
-            return None
-        # class_obj was successfully loaded, but some dependencies are missing.
-        raise DataSetError("{} for {}".format(error, class_path))
-    except (AttributeError, ValueError):
+    except (ModuleNotFoundError, ValueError):
+        return None
+    except AttributeError as error:
+        if available_classes and class_name in available_classes:
+            raise DataSetError(
+                f"{error} Please see the documentation on how to "
+                f"install relevant dependencies for {class_path}:\n"
+                f"https://kedro.readthedocs.io/en/stable/02_getting_started/"
+                f"02_install.html#optional-dependencies"
+            )
         return None
 
     return class_obj
 
 
-def _local_exists(filepath: str) -> bool:
+def _local_exists(filepath: str) -> bool:  # SKIP_IF_NO_SPARK
     filepath = Path(filepath)
     return filepath.exists() or any(par.is_file() for par in filepath.parents)
 
 
-def is_remote_path(filepath: str) -> bool:
-    """Check if the given path looks like a remote URL (has scheme)."""
-    # Get rid of Windows-specific "C:\" start,
-    # which is treated as a URL scheme.
-    _, filepath = os.path.splitdrive(filepath)
-    return bool(urlparse(filepath).scheme)
-
-
 class AbstractVersionedDataSet(AbstractDataSet, abc.ABC):
     """
     ``AbstractVersionedDataSet`` is the base class for all versioned data set
@@ -475,13 +484,14 @@ class AbstractVersionedDataSet(AbstractDataSet, abc.ABC):
     Example:
     ::
 
-        >>> from kedro.io import AbstractVersionedDataSet
+        >>> from pathlib import Path, PurePosixPath
         >>> import pandas as pd
+        >>> from kedro.io import AbstractVersionedDataSet
         >>>
         >>>
         >>> class MyOwnDataSet(AbstractVersionedDataSet):
-        >>>     def __init__(self, param1, param2, filepath, version):
-        >>>         super().__init__(filepath, version)
+        >>>     def __init__(self, filepath, version, param1, param2=True):
+        >>>         super().__init__(PurePosixPath(filepath), version)
         >>>         self._param1 = param1
         >>>         self._param2 = param2
         >>>
@@ -495,13 +505,21 @@ class AbstractVersionedDataSet(AbstractDataSet, abc.ABC):
         >>>
         >>>     def _exists(self) -> bool:
         >>>         path = self._get_load_path()
-        >>>         return path.is_file()
+        >>>         return Path(path).exists()
         >>>
         >>>     def _describe(self):
         >>>         return dict(version=self._version, param1=self._param1, param2=self._param2)
-    """
 
-    # pylint: disable=abstract-method
+    Example catalog.yml specification:
+    ::
+
+        my_dataset:
+            type: <path-to-my-own-dataset>.MyOwnDataSet
+            filepath: data/01_raw/my_data.csv
+            versioned: true
+            param1: <param1-value> # param1 is a required argument
+            # param2 will be True by default
+    """
 
     def __init__(
         self,
@@ -527,20 +545,15 @@ def __init__(
         self._version = version
         self._exists_function = exists_function or _local_exists
         self._glob_function = glob_function or iglob
-        self._last_load_version = None  # type: Optional[str]
-        self._last_save_version = None  # type: Optional[str]
-
-    def get_last_load_version(self) -> Optional[str]:
-        return self._last_load_version
-
-    def _lookup_load_version(self) -> Optional[str]:
-        if not self._version:
-            return None
-        if self._version.load:
-            return self._version.load
+        # 1 entry for load version, 1 for save version
+        self._version_cache = Cache(maxsize=2)
 
+    # 'key' is set to prevent cache key overlapping for load and save:
+    # https://cachetools.readthedocs.io/en/stable/#cachetools.cachedmethod
+    @cachedmethod(cache=attrgetter("_version_cache"), key=partial(hashkey, "load"))
+    def _fetch_latest_load_version(self) -> str:
         # When load version is unpinned, fetch the most recent existing
-        # version from the given path
+        # version from the given path.
         pattern = str(self._get_versioned_path("*"))
         version_paths = sorted(self._glob_function(pattern), reverse=True)
         most_recent = next(
@@ -548,35 +561,49 @@ def _lookup_load_version(self) -> Optional[str]:
         )
 
         if not most_recent:
-            raise VersionNotFoundError(
-                "Did not find any versions for {}".format(str(self))
-            )
+            raise VersionNotFoundError(f"Did not find any versions for {self}")
 
         return PurePath(most_recent).parent.name
 
+    # 'key' is set to prevent cache key overlapping for load and save:
+    # https://cachetools.readthedocs.io/en/stable/#cachetools.cachedmethod
+    @cachedmethod(cache=attrgetter("_version_cache"), key=partial(hashkey, "save"))
+    def _fetch_latest_save_version(self) -> str:  # pylint: disable=no-self-use
+        """Generate and cache the current save version"""
+        return generate_timestamp()
+
+    def resolve_load_version(self) -> Optional[str]:
+        """Compute the version the dataset should be loaded with."""
+        if not self._version:
+            return None
+        if self._version.load:
+            return self._version.load
+        return self._fetch_latest_load_version()
+
     def _get_load_path(self) -> PurePath:
         if not self._version:
             # When versioning is disabled, load from original filepath
             return self._filepath
 
-        load_version = self._last_load_version or self._lookup_load_version()
+        load_version = self.resolve_load_version()
         return self._get_versioned_path(load_version)  # type: ignore
 
-    def get_last_save_version(self) -> Optional[str]:
-        return self._last_save_version
-
-    def _lookup_save_version(self) -> Optional[str]:
+    def resolve_save_version(self) -> Optional[str]:
+        """Compute the version the dataset should be saved with."""
         if not self._version:
             return None
-        return self._version.save or generate_timestamp()
+        if self._version.save:
+            return self._version.save
+        return self._fetch_latest_save_version()
 
     def _get_save_path(self) -> PurePath:
         if not self._version:
             # When versioning is disabled, return original filepath
             return self._filepath
 
-        save_version = self._last_save_version or self._lookup_save_version()
+        save_version = self.resolve_save_version()
         versioned_path = self._get_versioned_path(save_version)  # type: ignore
+
         if self._exists_function(str(versioned_path)):
             raise DataSetError(
                 "Save path `{}` for {} must not exist if versioning "
@@ -589,19 +616,18 @@ def _get_versioned_path(self, version: str) -> PurePath:
         return self._filepath / version / self._filepath.name
 
     def load(self) -> Any:
-        self._last_load_version = self._lookup_load_version()
+        self.resolve_load_version()  # Make sure last load version is set
         return super().load()
 
     def save(self, data: Any) -> None:
-        self._last_save_version = self._lookup_save_version()
+        self._version_cache.clear()
+        save_version = self.resolve_save_version()  # Make sure last save version is set
         super().save(data)
 
-        load_version = self._lookup_load_version()
-        if load_version != self._last_save_version:
+        load_version = self.resolve_load_version()
+        if load_version != save_version:
             warnings.warn(
-                _CONSISTENCY_WARNING.format(
-                    self._last_save_version, load_version, str(self)
-                )
+                _CONSISTENCY_WARNING.format(save_version, load_version, str(self))
             )
 
     def exists(self) -> bool:
@@ -620,12 +646,54 @@ def exists(self) -> bool:
             return self._exists()
         except VersionNotFoundError:
             return False
-        except Exception as exc:
+        except Exception as exc:  # SKIP_IF_NO_SPARK
             message = "Failed during exists check for data set {}.\n{}".format(
                 str(self), str(exc)
             )
             raise DataSetError(message) from exc
 
+    def _release(self) -> None:
+        super()._release()
+        self._version_cache.clear()
+
+
+def _parse_filepath(filepath: str) -> Dict[str, str]:
+    """Split filepath on protocol and path. Based on `fsspec.utils.infer_storage_options`.
+
+    Args:
+        filepath: Either local absolute file path or URL (s3://bucket/file.csv)
+
+    Returns:
+        Parsed filepath.
+    """
+    if (
+        re.match(r"^[a-zA-Z]:[\\/]", filepath)
+        or re.match(r"^[a-zA-Z0-9]+://", filepath) is None
+    ):
+        return {"protocol": "file", "path": filepath}
+
+    parsed_path = urlsplit(filepath)
+    protocol = parsed_path.scheme or "file"
+
+    if protocol in HTTP_PROTOCOLS:
+        return {"protocol": protocol, "path": filepath}
+
+    path = parsed_path.path
+    if protocol == "file":
+        windows_path = re.match(r"^/([a-zA-Z])[:|]([\\/].*)$", path)
+        if windows_path:
+            path = "{}:{}".format(*windows_path.groups())
+
+    options = {"protocol": protocol, "path": path}
+
+    if parsed_path.netloc:
+        if protocol in CLOUD_PROTOCOLS:
+            host_with_port = parsed_path.netloc.rsplit("@", 1)[-1]
+            host = host_with_port.rsplit(":", 1)[0]
+            options["path"] = host + options["path"]
+
+    return options
+
 
 def get_protocol_and_path(filepath: str, version: Version = None) -> Tuple[str, str]:
     """Parses filepath on protocol and path.
@@ -635,13 +703,13 @@ def get_protocol_and_path(filepath: str, version: Version = None) -> Tuple[str,
         version: instance of ``kedro.io.core.Version`` or None.
 
     Returns:
-            Protocol and path.
+        Protocol and path.
 
     Raises:
-            DataSetError: when protocol is http(s) and version is not None.
-            Note: HTTP(s) dataset doesn't support versioning.
+        DataSetError: when protocol is http(s) and version is not None.
+        Note: HTTP(s) dataset doesn't support versioning.
     """
-    options_dict = infer_storage_options(filepath)
+    options_dict = _parse_filepath(filepath)
     path = options_dict["path"]
     protocol = options_dict["protocol"]
 
@@ -679,12 +747,3 @@ def validate_on_forbidden_chars(**kwargs):
             raise DataSetError(
                 "Neither white-space nor semicolon are allowed in `{}`.".format(key)
             )
-
-
-def deprecation_warning(class_name):
-    """Log deprecation warning."""
-    warnings.warn(
-        "{} will be deprecated in future releases. Please refer "
-        "to replacement datasets in kedro.extras.datasets.".format(class_name),
-        DeprecationWarning,
-    )
diff --git a/kedro/utils.py b/kedro/utils.py
index 832818fba..ed449ac25 100644
--- a/kedro/utils.py
+++ b/kedro/utils.py
@@ -4,7 +4,7 @@
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-# http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
@@ -19,7 +19,7 @@
 # trademarks of QuantumBlack. The License does not grant you any right or
 # license to the QuantumBlack Trademarks. You may not use the QuantumBlack
 # Trademarks or any confusingly similar mark as a trademark for your product,
-#     or use the QuantumBlack Trademarks in any other manner that might cause
+# or use the QuantumBlack Trademarks in any other manner that might cause
 # confusion in the marketplace, including but not limited to in advertising,
 # on websites, or on software.
 #

From 7c9e434e116e83f23a1cb468c53d7ba08419bb6e Mon Sep 17 00:00:00 2001
From: Dmitrii Deriabin <44967953+DmitriiDeriabinQB@users.noreply.github.com>
Date: Tue, 23 Jun 2020 19:19:10 +0100
Subject: [PATCH 13/44] Fix DataSet string representation for falsy values
 (#418)

---
 kedro/io/core.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index f03f70f7c..c5da014ca 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -252,7 +252,7 @@ def _to_str(obj, is_root=False):
             1. The root level (i.e. the DataSet.__init__ arguments) are
             formatted like DataSet(key=value).
             2. Dictionaries have the keys alphabetically sorted recursively.
-            3. Empty dictionaries and None values are not shown.
+            3. None values are not shown.
             """
 
             fmt = "{}={}" if is_root else "'{}': {}"  # 1
@@ -263,8 +263,8 @@ def _to_str(obj, is_root=False):
                 text = ", ".join(
                     fmt.format(key, _to_str(value))  # 2
                     for key, value in sorted_dict
-                    if value or isinstance(value, bool)
-                )  # 3
+                    if value is not None  # 3
+                )
 
                 return text if is_root else "{" + text + "}"  # 1
 

From 5eca5129e2be55a318d00f76edaf28efe56f61e8 Mon Sep 17 00:00:00 2001
From: Andrii Ivaniuk <andrii.ivaniuk@gmail.com>
Date: Fri, 26 Jun 2020 15:10:29 +0300
Subject: [PATCH 14/44] Fixed versioning on Windows (#673)

---
 kedro/io/core.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index c5da014ca..6a9f3a7ff 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -40,7 +40,7 @@
 from functools import partial
 from glob import iglob
 from operator import attrgetter
-from pathlib import Path, PurePath
+from pathlib import Path, PurePath, PurePosixPath
 from typing import Any, Callable, Dict, List, Optional, Tuple, Type
 from urllib.parse import urlsplit
 
@@ -120,7 +120,7 @@ class AbstractDataSet(abc.ABC):
         >>>         df.to_csv(str(self._filepath))
         >>>
         >>>     def _exists(self) -> bool:
-        >>>         return Path(self._filepath).exists()
+        >>>         return Path(self._filepath.as_posix()).exists()
         >>>
         >>>     def _describe(self):
         >>>         return dict(param1=self._param1, param2=self._param2)
@@ -505,7 +505,7 @@ class AbstractVersionedDataSet(AbstractDataSet, abc.ABC):
         >>>
         >>>     def _exists(self) -> bool:
         >>>         path = self._get_load_path()
-        >>>         return Path(path).exists()
+        >>>         return Path(path.as_posix()).exists()
         >>>
         >>>     def _describe(self):
         >>>         return dict(version=self._version, param1=self._param1, param2=self._param2)
@@ -523,7 +523,7 @@ class AbstractVersionedDataSet(AbstractDataSet, abc.ABC):
 
     def __init__(
         self,
-        filepath: PurePath,
+        filepath: PurePosixPath,
         version: Optional[Version],
         exists_function: Callable[[str], bool] = None,
         glob_function: Callable[[str], List[str]] = None,
@@ -531,7 +531,7 @@ def __init__(
         """Creates a new instance of ``AbstractVersionedDataSet``.
 
         Args:
-            filepath: Path to file.
+            filepath: Filepath in POSIX format to a file.
             version: If specified, should be an instance of
                 ``kedro.io.core.Version``. If its ``load`` attribute is
                 None, the latest version will be loaded. If its ``save``
@@ -580,7 +580,7 @@ def resolve_load_version(self) -> Optional[str]:
             return self._version.load
         return self._fetch_latest_load_version()
 
-    def _get_load_path(self) -> PurePath:
+    def _get_load_path(self) -> PurePosixPath:
         if not self._version:
             # When versioning is disabled, load from original filepath
             return self._filepath
@@ -596,7 +596,7 @@ def resolve_save_version(self) -> Optional[str]:
             return self._version.save
         return self._fetch_latest_save_version()
 
-    def _get_save_path(self) -> PurePath:
+    def _get_save_path(self) -> PurePosixPath:
         if not self._version:
             # When versioning is disabled, return original filepath
             return self._filepath
@@ -612,7 +612,7 @@ def _get_save_path(self) -> PurePath:
 
         return versioned_path
 
-    def _get_versioned_path(self, version: str) -> PurePath:
+    def _get_versioned_path(self, version: str) -> PurePosixPath:
         return self._filepath / version / self._filepath.name
 
     def load(self) -> Any:
@@ -734,7 +734,7 @@ def get_filepath_str(path: PurePath, protocol: str) -> str:
     Returns:
         Filepath string.
     """
-    path = str(path)
+    path = path.as_posix()
     if protocol in HTTP_PROTOCOLS:
         path = "".join((protocol, PROTOCOL_DELIMITER, path))
     return path

From ee95b11f55842b87ea8eacbfaef5d4665e2602c4 Mon Sep 17 00:00:00 2001
From: Jesaja Everling <jeverling@gmail.com>
Date: Mon, 3 Aug 2020 10:35:39 +0200
Subject: [PATCH 15/44] Fix broken links to documentation (#473)

---
 kedro/io/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index 6a9f3a7ff..092e7828e 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -462,8 +462,8 @@ def _load_obj(class_path: str) -> Optional[object]:
             raise DataSetError(
                 f"{error} Please see the documentation on how to "
                 f"install relevant dependencies for {class_path}:\n"
-                f"https://kedro.readthedocs.io/en/stable/02_getting_started/"
-                f"02_install.html#optional-dependencies"
+                f"https://kedro.readthedocs.io/en/stable/"
+                f"04_kedro_project_setup/01_dependencies.html"
             )
         return None
 

From cd72df9ea510c805cde693405e31509f55e6c38a Mon Sep 17 00:00:00 2001
From: "Kiyohito Kunii (Kiyo)" <8097799+921kiyo@users.noreply.github.com>
Date: Mon, 24 Aug 2020 16:06:25 +0100
Subject: [PATCH 16/44] Make Pylint 2.6.0 happy again (#765)

---
 kedro/io/core.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index 092e7828e..6ec331d81 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -167,11 +167,11 @@ def from_config(
             class_obj, config = parse_dataset_definition(
                 config, load_version, save_version
             )
-        except Exception as ex:
+        except Exception as exc:
             raise DataSetError(
                 "An exception occurred when parsing config "
-                "for DataSet `{}`:\n{}".format(name, str(ex))
-            )
+                "for DataSet `{}`:\n{}".format(name, str(exc))
+            ) from exc
 
         try:
             data_set = class_obj(**config)  # type: ignore
@@ -182,14 +182,14 @@ def from_config(
                 "of `{}.{}`.".format(
                     str(err), name, class_obj.__module__, class_obj.__qualname__
                 )
-            )
+            ) from err
         except Exception as err:
             raise DataSetError(
                 "\n{}.\nFailed to instantiate DataSet "
                 "'{}' of type `{}.{}`.".format(
                     str(err), name, class_obj.__module__, class_obj.__qualname__
                 )
-            )
+            ) from err
         return data_set
 
     @property
@@ -418,8 +418,8 @@ def parse_dataset_definition(
         trials = (_load_obj(class_path) for class_path in class_paths)
         try:
             class_obj = next(obj for obj in trials if obj is not None)
-        except StopIteration:
-            raise DataSetError("Class `{}` not found.".format(class_obj))
+        except StopIteration as exc:
+            raise DataSetError("Class `{}` not found.".format(class_obj)) from exc
 
     if not issubclass(class_obj, AbstractDataSet):
         raise DataSetError(
@@ -457,14 +457,14 @@ def _load_obj(class_path: str) -> Optional[object]:
         class_obj = load_obj(class_path)
     except (ModuleNotFoundError, ValueError):
         return None
-    except AttributeError as error:
+    except AttributeError as exc:
         if available_classes and class_name in available_classes:
             raise DataSetError(
-                f"{error} Please see the documentation on how to "
+                f"{exc} Please see the documentation on how to "
                 f"install relevant dependencies for {class_path}:\n"
                 f"https://kedro.readthedocs.io/en/stable/"
                 f"04_kedro_project_setup/01_dependencies.html"
-            )
+            ) from exc
         return None
 
     return class_obj

From 573977b049c7e73a141004479a88f56283159173 Mon Sep 17 00:00:00 2001
From: Waylon Walker <quadmx08@gmail.com>
Date: Fri, 20 Nov 2020 08:29:45 -0600
Subject: [PATCH 17/44] Convert all format strings to f-strings (#574)

---
 kedro/io/core.py | 14 +++++---------
 kedro/utils.py   |  4 +---
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index 6ec331d81..72cb234dc 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -241,9 +241,7 @@ def save(self, data: Any) -> None:
         except DataSetError:
             raise
         except Exception as exc:
-            message = "Failed while saving data to data set {}.\n{}".format(
-                str(self), str(exc)
-            )
+            message = f"Failed while saving data to data set {str(self)}.\n{str(exc)}"
             raise DataSetError(message) from exc
 
     def __str__(self):
@@ -271,7 +269,7 @@ def _to_str(obj, is_root=False):
             # not a dictionary
             return str(obj)
 
-        return "{}({})".format(type(self).__name__, _to_str(self._describe(), True))
+        return f"{type(self).__name__}({_to_str(self._describe(), True)})"
 
     @abc.abstractmethod
     def _load(self) -> Any:
@@ -332,9 +330,7 @@ def release(self) -> None:
             self._logger.debug("Releasing %s", str(self))
             self._release()
         except Exception as exc:
-            message = "Failed during release for data set {}.\n{}".format(
-                str(self), str(exc)
-            )
+            message = f"Failed during release for data set {str(self)}.\n{str(exc)}"
             raise DataSetError(message) from exc
 
     def _release(self) -> None:
@@ -419,7 +415,7 @@ def parse_dataset_definition(
         try:
             class_obj = next(obj for obj in trials if obj is not None)
         except StopIteration as exc:
-            raise DataSetError("Class `{}` not found.".format(class_obj)) from exc
+            raise DataSetError(f"Class `{class_obj}` not found.") from exc
 
     if not issubclass(class_obj, AbstractDataSet):
         raise DataSetError(
@@ -745,5 +741,5 @@ def validate_on_forbidden_chars(**kwargs):
     for key, value in kwargs.items():
         if " " in value or ";" in value:
             raise DataSetError(
-                "Neither white-space nor semicolon are allowed in `{}`.".format(key)
+                f"Neither white-space nor semicolon are allowed in `{key}`."
             )
diff --git a/kedro/utils.py b/kedro/utils.py
index ed449ac25..e1fe21065 100644
--- a/kedro/utils.py
+++ b/kedro/utils.py
@@ -52,7 +52,5 @@ def load_obj(obj_path: str, default_obj_path: str = "") -> Any:
     obj_name = obj_path_list[0]
     module_obj = importlib.import_module(obj_path)
     if not hasattr(module_obj, obj_name):
-        raise AttributeError(
-            "Object `{}` cannot be loaded from `{}`.".format(obj_name, obj_path)
-        )
+        raise AttributeError(f"Object `{obj_name}` cannot be loaded from `{obj_path}`.")
     return getattr(module_obj, obj_name)

From fb4a328d1cd2fee1ec07789c58eaf10dbf45ee69 Mon Sep 17 00:00:00 2001
From: Lim Hoang <limdauto@gmail.com>
Date: Thu, 17 Dec 2020 12:33:19 +0000
Subject: [PATCH 18/44] Merge pull request #923 from quantumblacklabs/develop

Merge develop into master
---
 kedro/io/core.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index 72cb234dc..e819c4b80 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -177,18 +177,13 @@ def from_config(
             data_set = class_obj(**config)  # type: ignore
         except TypeError as err:
             raise DataSetError(
-                "\n{}.\nDataSet '{}' must only contain "
-                "arguments valid for the constructor "
-                "of `{}.{}`.".format(
-                    str(err), name, class_obj.__module__, class_obj.__qualname__
-                )
+                f"\n{err}.\nDataSet '{name}' must only contain arguments valid for the "
+                f"constructor of `{class_obj.__module__}.{class_obj.__qualname__}`."
             ) from err
         except Exception as err:
             raise DataSetError(
-                "\n{}.\nFailed to instantiate DataSet "
-                "'{}' of type `{}.{}`.".format(
-                    str(err), name, class_obj.__module__, class_obj.__qualname__
-                )
+                f"\n{err}.\nFailed to instantiate DataSet '{name}' "
+                f"of type `{class_obj.__module__}.{class_obj.__qualname__}`."
             ) from err
         return data_set
 
@@ -419,8 +414,8 @@ def parse_dataset_definition(
 
     if not issubclass(class_obj, AbstractDataSet):
         raise DataSetError(
-            "DataSet type `{}.{}` is invalid: all data set types must extend "
-            "`AbstractDataSet`.".format(class_obj.__module__, class_obj.__qualname__)
+            f"DataSet type `{class_obj.__module__}.{class_obj.__qualname__}` "
+            f"is invalid: all data set types must extend `AbstractDataSet`."
         )
 
     if VERSION_KEY in config:

From 255a65c716cdbce8ab87df40abe8beafb6ce05f1 Mon Sep 17 00:00:00 2001
From: Merel Theisen <49397448+MerelTheisenQB@users.noreply.github.com>
Date: Wed, 13 Jan 2021 16:31:41 +0000
Subject: [PATCH 19/44] Update Copyright date to 2021 (#941)

---
 kedro/io/__init__.py | 2 +-
 kedro/io/core.py     | 2 +-
 kedro/utils.py       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kedro/io/__init__.py b/kedro/io/__init__.py
index 4219a158f..be3c51f38 100644
--- a/kedro/io/__init__.py
+++ b/kedro/io/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2020 QuantumBlack Visual Analytics Limited
+# Copyright 2021 QuantumBlack Visual Analytics Limited
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/kedro/io/core.py b/kedro/io/core.py
index e819c4b80..ff1e5b616 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -1,4 +1,4 @@
-# Copyright 2020 QuantumBlack Visual Analytics Limited
+# Copyright 2021 QuantumBlack Visual Analytics Limited
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/kedro/utils.py b/kedro/utils.py
index e1fe21065..110afcc59 100644
--- a/kedro/utils.py
+++ b/kedro/utils.py
@@ -1,4 +1,4 @@
-# Copyright 2020 QuantumBlack Visual Analytics Limited
+# Copyright 2021 QuantumBlack Visual Analytics Limited
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From e7367269d92d8d48cd5cef76ba86d0fd270dd8c9 Mon Sep 17 00:00:00 2001
From: Ignacio Paricio <54770971+ignacioparicio@users.noreply.github.com>
Date: Mon, 7 Jun 2021 10:59:08 +0200
Subject: [PATCH 20/44] Upgrade project's `requirements.txt` #ked 2540 (#1126)

---
 kedro/utils.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/kedro/utils.py b/kedro/utils.py
index 110afcc59..2411de2c4 100644
--- a/kedro/utils.py
+++ b/kedro/utils.py
@@ -36,15 +36,15 @@
 def load_obj(obj_path: str, default_obj_path: str = "") -> Any:
     """Extract an object from a given path.
 
-        Args:
-            obj_path: Path to an object to be extracted, including the object name.
-            default_obj_path: Default object path.
+    Args:
+        obj_path: Path to an object to be extracted, including the object name.
+        default_obj_path: Default object path.
 
-        Returns:
-            Extracted object.
+    Returns:
+        Extracted object.
 
-        Raises:
-            AttributeError: When the object does not have the given named attribute.
+    Raises:
+        AttributeError: When the object does not have the given named attribute.
 
     """
     obj_path_list = obj_path.rsplit(".", 1)

From e5051aab38901ef3f68c4bb726603669ce152e18 Mon Sep 17 00:00:00 2001
From: Ignacio Paricio <54770971+ignacioparicio@users.noreply.github.com>
Date: Fri, 11 Jun 2021 18:02:47 +0200
Subject: [PATCH 21/44] Improve error msg when versioning an existing dataset
 (#1144)

---
 kedro/io/core.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index ff1e5b616..88f4306be 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -224,7 +224,8 @@ def save(self, data: Any) -> None:
 
         Raises:
             DataSetError: when underlying save method raises error.
-
+            FileNotFoundError: when save method got file instead of dir, on Windows.
+            NotADirectoryError: when save method got file instead of dir, on Unix.
         """
 
         if data is None:
@@ -235,6 +236,8 @@ def save(self, data: Any) -> None:
             self._save(data)
         except DataSetError:
             raise
+        except (FileNotFoundError, NotADirectoryError):
+            raise
         except Exception as exc:
             message = f"Failed while saving data to data set {str(self)}.\n{str(exc)}"
             raise DataSetError(message) from exc
@@ -613,7 +616,22 @@ def load(self) -> Any:
     def save(self, data: Any) -> None:
         self._version_cache.clear()
         save_version = self.resolve_save_version()  # Make sure last save version is set
-        super().save(data)
+        try:
+            super().save(data)
+        except (FileNotFoundError, NotADirectoryError) as err:
+            # FileNotFoundError raised in Win, NotADirectoryError raised in Unix
+            _default_version = "YYYY-MM-DDThh.mm.ss.sssZ"
+            raise DataSetError(
+                f"Cannot save versioned dataset `{self._filepath.name}` to "
+                f"`{self._filepath.parent.as_posix()}` because a file with the same "
+                f"name already exists in the directory. This is likely because "
+                f"versioning was enabled on a dataset already saved previously. Either "
+                f"remove `{self._filepath.name}` from the directory or manually "
+                f"convert it into a versioned dataset by placing it in a versioned "
+                f"directory (e.g. with default versioning format "
+                f"`{self._filepath.as_posix()}/{_default_version}/{self._filepath.name}"
+                f"`)."
+            ) from err
 
         load_version = self.resolve_load_version()
         if load_version != save_version:

From 4d75d89c48935bdc84a57c12b0da667e7998df5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lorena=20B=C4=83lan?= <lorena.balan@quantumblack.com>
Date: Mon, 2 Aug 2021 11:53:43 +0100
Subject: [PATCH 22/44] DataSetError 'parsing config' thrown when
 ModuleNotFound (#1201)

---
 kedro/io/core.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index 88f4306be..e671dd73b 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -413,7 +413,10 @@ def parse_dataset_definition(
         try:
             class_obj = next(obj for obj in trials if obj is not None)
         except StopIteration as exc:
-            raise DataSetError(f"Class `{class_obj}` not found.") from exc
+            raise DataSetError(
+                f"Class `{class_obj}` not found or one of its dependencies"
+                f"has not been installed."
+            ) from exc
 
     if not issubclass(class_obj, AbstractDataSet):
         raise DataSetError(

From 1fe598869c5ae587100fb25e6c0db83155b38811 Mon Sep 17 00:00:00 2001
From: Jiri Klein <44288863+jiriklein@users.noreply.github.com>
Date: Tue, 3 Aug 2021 09:33:45 +0100
Subject: [PATCH 23/44] Update docstrings to mention the meaning of the
 _SINGLE_PROCESS flag in datasets (#1196)

---
 kedro/io/core.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index e671dd73b..e5fdcf1cc 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -98,7 +98,9 @@ class AbstractDataSet(abc.ABC):
     """``AbstractDataSet`` is the base class for all data set implementations.
     All data set implementations should extend this abstract class
     and implement the methods marked as abstract.
-
+    If a specific dataset implementation cannot be used in conjunction with
+    the ``ParallelRunner``, such user-defined dataset should have the
+    attribute `_SINGLE_PROCESS = True`.
     Example:
     ::
 

From 8b4f8b5217848a3f240c17d0930cba40ab20bab7 Mon Sep 17 00:00:00 2001
From: Waylon Walker <waylon@waylonwalker.com>
Date: Thu, 9 Sep 2021 04:12:41 -0500
Subject: [PATCH 24/44] implement __all__ in __init__ modules (#874)

---
 kedro/io/__init__.py | 46 ++++++++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/kedro/io/__init__.py b/kedro/io/__init__.py
index be3c51f38..dde54f72a 100644
--- a/kedro/io/__init__.py
+++ b/kedro/io/__init__.py
@@ -31,17 +31,35 @@
 which allows implementation of various ``AbstractDataSet``s.
 """
 
-from .cached_dataset import CachedDataSet  # NOQA
-from .core import AbstractDataSet  # NOQA
-from .core import AbstractVersionedDataSet  # NOQA
-from .core import DataSetAlreadyExistsError  # NOQA
-from .core import DataSetError  # NOQA
-from .core import DataSetNotFoundError  # NOQA
-from .core import Version  # NOQA
-from .data_catalog import DataCatalog  # NOQA
-from .data_catalog_with_default import DataCatalogWithDefault  # NOQA
-from .lambda_data_set import LambdaDataSet  # NOQA
-from .memory_data_set import MemoryDataSet  # NOQA
-from .partitioned_data_set import IncrementalDataSet  # NOQA
-from .partitioned_data_set import PartitionedDataSet  # NOQA
-from .transformers import AbstractTransformer  # NOQA
+from .cached_dataset import CachedDataSet
+from .core import (
+    AbstractDataSet,
+    AbstractVersionedDataSet,
+    DataSetAlreadyExistsError,
+    DataSetError,
+    DataSetNotFoundError,
+    Version,
+)
+from .data_catalog import DataCatalog
+from .data_catalog_with_default import DataCatalogWithDefault
+from .lambda_data_set import LambdaDataSet
+from .memory_data_set import MemoryDataSet
+from .partitioned_data_set import IncrementalDataSet, PartitionedDataSet
+from .transformers import AbstractTransformer
+
+__all__ = [
+    "AbstractDataSet",
+    "AbstractTransformer",
+    "AbstractVersionedDataSet",
+    "CachedDataSet",
+    "DataCatalog",
+    "DataCatalogWithDefault",
+    "DataSetAlreadyExistsError",
+    "DataSetError",
+    "DataSetNotFoundError",
+    "IncrementalDataSet",
+    "LambdaDataSet",
+    "MemoryDataSet",
+    "PartitionedDataSet",
+    "Version",
+]

From 5de06ed3bf591af4f4acd132fff2afa7a5fedc00 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lorena=20B=C4=83lan?= <lorena.balan@quantumblack.com>
Date: Mon, 20 Sep 2021 15:03:18 +0100
Subject: [PATCH 25/44] Update release notes post release (#1240)

---
 kedro/io/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index e5fdcf1cc..a55e72731 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -171,8 +171,8 @@ def from_config(
             )
         except Exception as exc:
             raise DataSetError(
-                "An exception occurred when parsing config "
-                "for DataSet `{}`:\n{}".format(name, str(exc))
+                f"An exception occurred when parsing config "
+                f"for DataSet `{name}`:\n{str(exc)}"
             ) from exc
 
         try:

From 468b97b37e3bd1fdc3b55db3e627dc16b779b7d3 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Wed, 22 Sep 2021 11:17:53 -0400
Subject: [PATCH 26/44] Use `pyupgrade` to replace format strings and more
 (#1242)

---
 kedro/io/core.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index a55e72731..814712b53 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -213,8 +213,8 @@ def load(self) -> Any:
         except Exception as exc:
             # This exception handling is by design as the composed data sets
             # can throw any type of exception.
-            message = "Failed while loading data from data set {}.\n{}".format(
-                str(self), str(exc)
+            message = (
+                f"Failed while loading data from data set {str(self)}.\n{str(exc)}"
             )
             raise DataSetError(message) from exc
 
@@ -274,22 +274,22 @@ def _to_str(obj, is_root=False):
     @abc.abstractmethod
     def _load(self) -> Any:
         raise NotImplementedError(
-            "`{}` is a subclass of AbstractDataSet and"
-            "it must implement the `_load` method".format(self.__class__.__name__)
+            f"`{self.__class__.__name__}` is a subclass of AbstractDataSet and "
+            f"it must implement the `_load` method"
         )
 
     @abc.abstractmethod
     def _save(self, data: Any) -> None:
         raise NotImplementedError(
-            "`{}` is a subclass of AbstractDataSet and"
-            "it must implement the `_save` method".format(self.__class__.__name__)
+            f"`{self.__class__.__name__}` is a subclass of AbstractDataSet and "
+            f"it must implement the `_save` method"
         )
 
     @abc.abstractmethod
     def _describe(self) -> Dict[str, Any]:
         raise NotImplementedError(
-            "`{}` is a subclass of AbstractDataSet and"
-            "it must implement the `_describe` method".format(self.__class__.__name__)
+            f"`{self.__class__.__name__}` is a subclass of AbstractDataSet and "
+            f"it must implement the `_describe` method"
         )
 
     def exists(self) -> bool:
@@ -307,8 +307,8 @@ def exists(self) -> bool:
             self._logger.debug("Checking whether target of %s exists", str(self))
             return self._exists()
         except Exception as exc:
-            message = "Failed during exists check for data set {}.\n{}".format(
-                str(self), str(exc)
+            message = (
+                f"Failed during exists check for data set {str(self)}.\n{str(exc)}"
             )
             raise DataSetError(message) from exc
 
@@ -605,8 +605,8 @@ def _get_save_path(self) -> PurePosixPath:
 
         if self._exists_function(str(versioned_path)):
             raise DataSetError(
-                "Save path `{}` for {} must not exist if versioning "
-                "is enabled.".format(versioned_path, str(self))
+                f"Save path `{versioned_path}` for {str(self)} must not exist if "
+                f"versioning is enabled."
             )
 
         return versioned_path
@@ -661,8 +661,8 @@ def exists(self) -> bool:
         except VersionNotFoundError:
             return False
         except Exception as exc:  # SKIP_IF_NO_SPARK
-            message = "Failed during exists check for data set {}.\n{}".format(
-                str(self), str(exc)
+            message = (
+                f"Failed during exists check for data set {str(self)}.\n{str(exc)}"
             )
             raise DataSetError(message) from exc
 
@@ -696,7 +696,7 @@ def _parse_filepath(filepath: str) -> Dict[str, str]:
     if protocol == "file":
         windows_path = re.match(r"^/([a-zA-Z])[:|]([\\/].*)$", path)
         if windows_path:
-            path = "{}:{}".format(*windows_path.groups())
+            path = ":".join(windows_path.groups())
 
     options = {"protocol": protocol, "path": path}
 

From d681a8226ffb8bba8454e05ca41ff8f4cadc5210 Mon Sep 17 00:00:00 2001
From: Sajid Alam <90610031+SajidAlamQB@users.noreply.github.com>
Date: Mon, 1 Nov 2021 15:06:30 +0000
Subject: [PATCH 27/44] Remove McK headers + Licence from codebase + docs
 (#1285)

* Reverting license to standard Apache 2.0

* Removing make legal

* Delete license_and_headers.py

* Removed all occurrence of the copyright header from codebase

* Removed copyrighted header from build-docs.sh

* Removing copyright footer from all docs

* Changes based on review

* removed whitespace

* cleanup
---
 kedro/io/__init__.py | 28 ----------------------------
 kedro/io/core.py     | 28 ----------------------------
 kedro/utils.py       | 28 ----------------------------
 3 files changed, 84 deletions(-)

diff --git a/kedro/io/__init__.py b/kedro/io/__init__.py
index dde54f72a..f172dff3a 100644
--- a/kedro/io/__init__.py
+++ b/kedro/io/__init__.py
@@ -1,31 +1,3 @@
-# Copyright 2021 QuantumBlack Visual Analytics Limited
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
-# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
-# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
-# (either separately or in combination, "QuantumBlack Trademarks") are
-# trademarks of QuantumBlack. The License does not grant you any right or
-# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
-# Trademarks or any confusingly similar mark as a trademark for your product,
-# or use the QuantumBlack Trademarks in any other manner that might cause
-# confusion in the marketplace, including but not limited to in advertising,
-# on websites, or on software.
-#
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 """``kedro.io`` provides functionality to read and write to a
 number of data sets. At core of the library is ``AbstractDataSet``
 which allows implementation of various ``AbstractDataSet``s.
diff --git a/kedro/io/core.py b/kedro/io/core.py
index 814712b53..7d4e0b73e 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -1,31 +1,3 @@
-# Copyright 2021 QuantumBlack Visual Analytics Limited
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
-# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
-# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
-# (either separately or in combination, "QuantumBlack Trademarks") are
-# trademarks of QuantumBlack. The License does not grant you any right or
-# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
-# Trademarks or any confusingly similar mark as a trademark for your product,
-# or use the QuantumBlack Trademarks in any other manner that might cause
-# confusion in the marketplace, including but not limited to in advertising,
-# on websites, or on software.
-#
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 """This module provides a set of classes which underpin the data loading and
 saving functionality provided by ``kedro.io``.
 """
diff --git a/kedro/utils.py b/kedro/utils.py
index 2411de2c4..4c57b7911 100644
--- a/kedro/utils.py
+++ b/kedro/utils.py
@@ -1,31 +1,3 @@
-# Copyright 2021 QuantumBlack Visual Analytics Limited
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
-# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
-# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
-# (either separately or in combination, "QuantumBlack Trademarks") are
-# trademarks of QuantumBlack. The License does not grant you any right or
-# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
-# Trademarks or any confusingly similar mark as a trademark for your product,
-# or use the QuantumBlack Trademarks in any other manner that might cause
-# confusion in the marketplace, including but not limited to in advertising,
-# on websites, or on software.
-#
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 """This module provides a set of helper functions being used across different components
 of kedro package.
 """

From 1ea8a92d420227b1acc5247a6650e4c411714bd9 Mon Sep 17 00:00:00 2001
From: Merel Theisen <49397448+MerelTheisenQB@users.noreply.github.com>
Date: Mon, 8 Nov 2021 14:02:14 +0000
Subject: [PATCH 28/44] Fix versioning tracking datasets (#1016)

---
 kedro/io/core.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index 7d4e0b73e..f765f7484 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -380,7 +380,6 @@ def parse_dataset_definition(
                 "`type` class path does not support relative "
                 "paths or paths ending with a dot."
             )
-
         class_paths = (prefix + class_obj for prefix in _DEFAULT_PACKAGES)
 
         trials = (_load_obj(class_path) for class_path in class_paths)
@@ -407,7 +406,12 @@ def parse_dataset_definition(
         )
         logging.getLogger(__name__).warning(message, VERSION_KEY)
         del config[VERSION_KEY]
-    if config.pop(VERSIONED_FLAG_KEY, False):  # data set is versioned
+
+    # dataset is either versioned explicitly by the user or versioned is set to true by default
+    # on the dataset
+    if config.pop(VERSIONED_FLAG_KEY, False) or getattr(
+        class_obj, VERSIONED_FLAG_KEY, False
+    ):
         config[VERSION_KEY] = Version(load_version, save_version)
 
     return class_obj, config

From 5c4ff50d00b42dd12b5158488c4785efb7d83393 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Wed, 8 Dec 2021 08:01:15 -0500
Subject: [PATCH 29/44] Ensure path is of type `str` if `overwrite` is set
 (#1094)

---
 kedro/io/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index f765f7484..075b8dacf 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -704,7 +704,7 @@ def get_protocol_and_path(filepath: str, version: Version = None) -> Tuple[str,
     protocol = options_dict["protocol"]
 
     if protocol in HTTP_PROTOCOLS:
-        if version:
+        if version is not None:
             raise DataSetError(
                 "HTTP(s) DataSet doesn't support versioning. "
                 "Please remove version flag from the dataset configuration."

From 51e589d78af04076df481980e930c749afe4f67e Mon Sep 17 00:00:00 2001
From: Puneet Saini <puneet29saini@gmail.com>
Date: Mon, 7 Feb 2022 21:39:29 +0530
Subject: [PATCH 30/44] Fix error message whitespace (#1206)

Signed-off-by: Puneet Saini <puneet29saini@gmail.com>
---
 kedro/io/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index 075b8dacf..80743ddab 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -387,7 +387,7 @@ def parse_dataset_definition(
             class_obj = next(obj for obj in trials if obj is not None)
         except StopIteration as exc:
             raise DataSetError(
-                f"Class `{class_obj}` not found or one of its dependencies"
+                f"Class `{class_obj}` not found or one of its dependencies "
                 f"has not been installed."
             ) from exc
 

From af55fd0d5ab9f729fb9fed3986e90e8f054aaa10 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lorena=20B=C4=83lan?= <lorena.balan@quantumblack.com>
Date: Thu, 31 Mar 2022 14:18:17 +0100
Subject: [PATCH 31/44] Merge pull request #1397 from kedro-org/develop

Merge develop into main in preparation for 0.18.0 release
---
 kedro/io/__init__.py | 10 +++-------
 kedro/io/core.py     |  4 ++--
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/kedro/io/__init__.py b/kedro/io/__init__.py
index f172dff3a..de6f6e49e 100644
--- a/kedro/io/__init__.py
+++ b/kedro/io/__init__.py
@@ -13,19 +13,15 @@
     Version,
 )
 from .data_catalog import DataCatalog
-from .data_catalog_with_default import DataCatalogWithDefault
-from .lambda_data_set import LambdaDataSet
-from .memory_data_set import MemoryDataSet
-from .partitioned_data_set import IncrementalDataSet, PartitionedDataSet
-from .transformers import AbstractTransformer
+from .lambda_dataset import LambdaDataSet
+from .memory_dataset import MemoryDataSet
+from .partitioned_dataset import IncrementalDataSet, PartitionedDataSet
 
 __all__ = [
     "AbstractDataSet",
-    "AbstractTransformer",
     "AbstractVersionedDataSet",
     "CachedDataSet",
     "DataCatalog",
-    "DataCatalogWithDefault",
     "DataSetAlreadyExistsError",
     "DataSetError",
     "DataSetNotFoundError",
diff --git a/kedro/io/core.py b/kedro/io/core.py
index 80743ddab..6410d3018 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -438,7 +438,7 @@ def _load_obj(class_path: str) -> Optional[object]:
                 f"{exc} Please see the documentation on how to "
                 f"install relevant dependencies for {class_path}:\n"
                 f"https://kedro.readthedocs.io/en/stable/"
-                f"04_kedro_project_setup/01_dependencies.html"
+                f"kedro_project_setup/dependencies.html"
             ) from exc
         return None
 
@@ -521,7 +521,7 @@ def __init__(
         self._exists_function = exists_function or _local_exists
         self._glob_function = glob_function or iglob
         # 1 entry for load version, 1 for save version
-        self._version_cache = Cache(maxsize=2)
+        self._version_cache = Cache(maxsize=2)  # type: Cache
 
     # 'key' is set to prevent cache key overlapping for load and save:
     # https://cachetools.readthedocs.io/en/stable/#cachetools.cachedmethod

From 487eb3349d739711b7b83b20909f2e4cd3592d3f Mon Sep 17 00:00:00 2001
From: philomine <philomene.bobichon@gmail.com>
Date: Tue, 24 May 2022 11:05:42 +0200
Subject: [PATCH 32/44] Add abfss to the list of cloud protocols (#1348)

Signed-off-by: philomene.bobichon@konecranes.com <philomene.bobichon@konecranes.com>
---
 kedro/io/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index 6410d3018..d8f676e8c 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -28,7 +28,7 @@
 VERSION_KEY = "version"
 HTTP_PROTOCOLS = ("http", "https")
 PROTOCOL_DELIMITER = "://"
-CLOUD_PROTOCOLS = ("s3", "gcs", "gs", "adl", "abfs")
+CLOUD_PROTOCOLS = ("s3", "gcs", "gs", "adl", "abfs", "abfss")
 
 
 class DataSetError(Exception):

From 35c1e4531602f2c2dbeb71f1f994e2eb76af9da5 Mon Sep 17 00:00:00 2001
From: Ahdra Merali <90615669+AhdraMeraliQB@users.noreply.github.com>
Date: Wed, 8 Jun 2022 15:43:58 +0100
Subject: [PATCH 33/44] Replace backticks with single quotes (#1584)

* Replace back-ticks with single quotes

Signed-off-by: Ahdra Merali <ahdra.merali@quantumblack.com>

* Replace more backticks

Signed-off-by: Ahdra Merali <ahdra.merali@quantumblack.com>

* Linting

Signed-off-by: Ahdra Merali <ahdra.merali@quantumblack.com>

* Update kedro/extras/datasets/pandas/gbq_dataset.py

Co-authored-by: Antony Milne <49395058+AntonyMilneQB@users.noreply.github.com>

* Update kedro/framework/cli/project.py

Co-authored-by: Antony Milne <49395058+AntonyMilneQB@users.noreply.github.com>

* Fix tests pt 1

Signed-off-by: Ahdra Merali <ahdra.merali@quantumblack.com>

* Fix tests pt 2

Signed-off-by: Ahdra Merali <ahdra.merali@quantumblack.com>

* Fix tests pt 3

Signed-off-by: Ahdra Merali <ahdra.merali@quantumblack.com>

* Change quotes to follow convention

Co-authored-by: Antony Milne <49395058+AntonyMilneQB@users.noreply.github.com>

* Change quotes to follow convention pt 2

Co-authored-by: Antony Milne <49395058+AntonyMilneQB@users.noreply.github.com>

* Fix tests

Signed-off-by: Ahdra Merali <ahdra.merali@quantumblack.com>

* Fix e2e tests

Signed-off-by: Ahdra Merali <ahdra.merali@quantumblack.com>

Co-authored-by: Antony Milne <49395058+AntonyMilneQB@users.noreply.github.com>
---
 kedro/io/core.py | 54 ++++++++++++++++++++++++------------------------
 kedro/utils.py   |  2 +-
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index d8f676e8c..c0f13bf9a 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -144,7 +144,7 @@ def from_config(
         except Exception as exc:
             raise DataSetError(
                 f"An exception occurred when parsing config "
-                f"for DataSet `{name}`:\n{str(exc)}"
+                f"for DataSet '{name}':\n{str(exc)}"
             ) from exc
 
         try:
@@ -152,12 +152,12 @@ def from_config(
         except TypeError as err:
             raise DataSetError(
                 f"\n{err}.\nDataSet '{name}' must only contain arguments valid for the "
-                f"constructor of `{class_obj.__module__}.{class_obj.__qualname__}`."
+                f"constructor of '{class_obj.__module__}.{class_obj.__qualname__}'."
             ) from err
         except Exception as err:
             raise DataSetError(
                 f"\n{err}.\nFailed to instantiate DataSet '{name}' "
-                f"of type `{class_obj.__module__}.{class_obj.__qualname__}`."
+                f"of type '{class_obj.__module__}.{class_obj.__qualname__}'."
             ) from err
         return data_set
 
@@ -203,7 +203,7 @@ def save(self, data: Any) -> None:
         """
 
         if data is None:
-            raise DataSetError("Saving `None` to a `DataSet` is not allowed")
+            raise DataSetError("Saving 'None' to a 'DataSet' is not allowed")
 
         try:
             self._logger.debug("Saving %s", str(self))
@@ -246,22 +246,22 @@ def _to_str(obj, is_root=False):
     @abc.abstractmethod
     def _load(self) -> Any:
         raise NotImplementedError(
-            f"`{self.__class__.__name__}` is a subclass of AbstractDataSet and "
-            f"it must implement the `_load` method"
+            f"'{self.__class__.__name__}' is a subclass of AbstractDataSet and "
+            f"it must implement the '_load' method"
         )
 
     @abc.abstractmethod
     def _save(self, data: Any) -> None:
         raise NotImplementedError(
-            f"`{self.__class__.__name__}` is a subclass of AbstractDataSet and "
-            f"it must implement the `_save` method"
+            f"'{self.__class__.__name__}' is a subclass of AbstractDataSet and "
+            f"it must implement the '_save' method"
         )
 
     @abc.abstractmethod
     def _describe(self) -> Dict[str, Any]:
         raise NotImplementedError(
-            f"`{self.__class__.__name__}` is a subclass of AbstractDataSet and "
-            f"it must implement the `_describe` method"
+            f"'{self.__class__.__name__}' is a subclass of AbstractDataSet and "
+            f"it must implement the '_describe' method"
         )
 
     def exists(self) -> bool:
@@ -286,7 +286,7 @@ def exists(self) -> bool:
 
     def _exists(self) -> bool:
         self._logger.warning(
-            "`exists()` not implemented for `%s`. Assuming output does not exist.",
+            "'exists()' not implemented for '%s'. Assuming output does not exist.",
             self.__class__.__name__,
         )
         return False
@@ -337,9 +337,9 @@ class Version(namedtuple("Version", ["load", "save"])):
 
 
 _CONSISTENCY_WARNING = (
-    "Save version `{}` did not match load version `{}` for {}. This is strongly "
-    "discouraged due to inconsistencies it may cause between `save` and "
-    "`load` operations. Please refrain from setting exact load version for "
+    "Save version '{}' did not match load version '{}' for {}. This is strongly "
+    "discouraged due to inconsistencies it may cause between 'save' and "
+    "'load' operations. Please refrain from setting exact load version for "
     "intermediate data sets where possible to avoid this warning."
 )
 
@@ -371,13 +371,13 @@ def parse_dataset_definition(
     config = copy.deepcopy(config)
 
     if "type" not in config:
-        raise DataSetError("`type` is missing from DataSet catalog configuration")
+        raise DataSetError("'type' is missing from DataSet catalog configuration")
 
     class_obj = config.pop("type")
     if isinstance(class_obj, str):
         if len(class_obj.strip(".")) != len(class_obj):
             raise DataSetError(
-                "`type` class path does not support relative "
+                "'type' class path does not support relative "
                 "paths or paths ending with a dot."
             )
         class_paths = (prefix + class_obj for prefix in _DEFAULT_PACKAGES)
@@ -387,21 +387,21 @@ def parse_dataset_definition(
             class_obj = next(obj for obj in trials if obj is not None)
         except StopIteration as exc:
             raise DataSetError(
-                f"Class `{class_obj}` not found or one of its dependencies "
+                f"Class '{class_obj}' not found or one of its dependencies "
                 f"has not been installed."
             ) from exc
 
     if not issubclass(class_obj, AbstractDataSet):
         raise DataSetError(
-            f"DataSet type `{class_obj.__module__}.{class_obj.__qualname__}` "
-            f"is invalid: all data set types must extend `AbstractDataSet`."
+            f"DataSet type '{class_obj.__module__}.{class_obj.__qualname__}' "
+            f"is invalid: all data set types must extend 'AbstractDataSet'."
         )
 
     if VERSION_KEY in config:
         # remove "version" key so that it's not passed
         # to the "unversioned" data set constructor
         message = (
-            "`%s` attribute removed from data set configuration since it is a "
+            "'%s' attribute removed from data set configuration since it is a "
             "reserved word and cannot be directly specified"
         )
         logging.getLogger(__name__).warning(message, VERSION_KEY)
@@ -581,7 +581,7 @@ def _get_save_path(self) -> PurePosixPath:
 
         if self._exists_function(str(versioned_path)):
             raise DataSetError(
-                f"Save path `{versioned_path}` for {str(self)} must not exist if "
+                f"Save path '{versioned_path}' for {str(self)} must not exist if "
                 f"versioning is enabled."
             )
 
@@ -603,15 +603,15 @@ def save(self, data: Any) -> None:
             # FileNotFoundError raised in Win, NotADirectoryError raised in Unix
             _default_version = "YYYY-MM-DDThh.mm.ss.sssZ"
             raise DataSetError(
-                f"Cannot save versioned dataset `{self._filepath.name}` to "
-                f"`{self._filepath.parent.as_posix()}` because a file with the same "
+                f"Cannot save versioned dataset '{self._filepath.name}' to "
+                f"'{self._filepath.parent.as_posix()}' because a file with the same "
                 f"name already exists in the directory. This is likely because "
                 f"versioning was enabled on a dataset already saved previously. Either "
-                f"remove `{self._filepath.name}` from the directory or manually "
+                f"remove '{self._filepath.name}' from the directory or manually "
                 f"convert it into a versioned dataset by placing it in a versioned "
                 f"directory (e.g. with default versioning format "
-                f"`{self._filepath.as_posix()}/{_default_version}/{self._filepath.name}"
-                f"`)."
+                f"'{self._filepath.as_posix()}/{_default_version}/{self._filepath.name}"
+                f"')."
             ) from err
 
         load_version = self.resolve_load_version()
@@ -735,5 +735,5 @@ def validate_on_forbidden_chars(**kwargs):
     for key, value in kwargs.items():
         if " " in value or ";" in value:
             raise DataSetError(
-                f"Neither white-space nor semicolon are allowed in `{key}`."
+                f"Neither white-space nor semicolon are allowed in '{key}'."
             )
diff --git a/kedro/utils.py b/kedro/utils.py
index 4c57b7911..6067d96b6 100644
--- a/kedro/utils.py
+++ b/kedro/utils.py
@@ -24,5 +24,5 @@ def load_obj(obj_path: str, default_obj_path: str = "") -> Any:
     obj_name = obj_path_list[0]
     module_obj = importlib.import_module(obj_path)
     if not hasattr(module_obj, obj_name):
-        raise AttributeError(f"Object `{obj_name}` cannot be loaded from `{obj_path}`.")
+        raise AttributeError(f"Object '{obj_name}' cannot be loaded from '{obj_path}'.")
     return getattr(module_obj, obj_name)

From d490f9ea53debea08d2cc866c98726c2579c5932 Mon Sep 17 00:00:00 2001
From: Niels Drost <codingdutchman@gmail.com>
Date: Mon, 4 Jul 2022 13:24:02 +0200
Subject: [PATCH 34/44] Generic typings abstract dataset (#1612)

---
 kedro/io/core.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index c0f13bf9a..f475ef02d 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -13,7 +13,7 @@
 from glob import iglob
 from operator import attrgetter
 from pathlib import Path, PurePath, PurePosixPath
-from typing import Any, Callable, Dict, List, Optional, Tuple, Type
+from typing import Any, Callable, Dict, Generic, List, Optional, Tuple, Type, TypeVar
 from urllib.parse import urlsplit
 
 from cachetools import Cache, cachedmethod
@@ -66,7 +66,11 @@ class VersionNotFoundError(DataSetError):
     pass
 
 
-class AbstractDataSet(abc.ABC):
+_DI = TypeVar("_DI")
+_DO = TypeVar("_DO")
+
+
+class AbstractDataSet(abc.ABC, Generic[_DI, _DO]):
     """``AbstractDataSet`` is the base class for all data set implementations.
     All data set implementations should extend this abstract class
     and implement the methods marked as abstract.
@@ -81,7 +85,7 @@ class AbstractDataSet(abc.ABC):
         >>> from kedro.io import AbstractDataSet
         >>>
         >>>
-        >>> class MyOwnDataSet(AbstractDataSet):
+        >>> class MyOwnDataSet(AbstractDataSet[pd.DataFrame, pd.DataFrame]):
         >>>     def __init__(self, filepath, param1, param2=True):
         >>>         self._filepath = PurePosixPath(filepath)
         >>>         self._param1 = param1
@@ -165,7 +169,7 @@ def from_config(
     def _logger(self) -> logging.Logger:
         return logging.getLogger(__name__)
 
-    def load(self) -> Any:
+    def load(self) -> _DO:
         """Loads data by delegation to the provided load method.
 
         Returns:
@@ -190,7 +194,7 @@ def load(self) -> Any:
             )
             raise DataSetError(message) from exc
 
-    def save(self, data: Any) -> None:
+    def save(self, data: _DI) -> None:
         """Saves data by delegation to the provided save method.
 
         Args:
@@ -244,14 +248,14 @@ def _to_str(obj, is_root=False):
         return f"{type(self).__name__}({_to_str(self._describe(), True)})"
 
     @abc.abstractmethod
-    def _load(self) -> Any:
+    def _load(self) -> _DO:
         raise NotImplementedError(
             f"'{self.__class__.__name__}' is a subclass of AbstractDataSet and "
             f"it must implement the '_load' method"
         )
 
     @abc.abstractmethod
-    def _save(self, data: Any) -> None:
+    def _save(self, data: _DI) -> None:
         raise NotImplementedError(
             f"'{self.__class__.__name__}' is a subclass of AbstractDataSet and "
             f"it must implement the '_save' method"
@@ -450,7 +454,7 @@ def _local_exists(filepath: str) -> bool:  # SKIP_IF_NO_SPARK
     return filepath.exists() or any(par.is_file() for par in filepath.parents)
 
 
-class AbstractVersionedDataSet(AbstractDataSet, abc.ABC):
+class AbstractVersionedDataSet(AbstractDataSet[_DI, _DO], abc.ABC):
     """
     ``AbstractVersionedDataSet`` is the base class for all versioned data set
     implementations. All data sets that implement versioning should extend this
@@ -590,11 +594,11 @@ def _get_save_path(self) -> PurePosixPath:
     def _get_versioned_path(self, version: str) -> PurePosixPath:
         return self._filepath / version / self._filepath.name
 
-    def load(self) -> Any:
+    def load(self) -> _DO:
         self.resolve_load_version()  # Make sure last load version is set
         return super().load()
 
-    def save(self, data: Any) -> None:
+    def save(self, data: _DI) -> None:
         self._version_cache.clear()
         save_version = self.resolve_save_version()  # Make sure last save version is set
         try:

From ee39e5ef1b1b5c5825233173327d4162f1adf425 Mon Sep 17 00:00:00 2001
From: Sajid Alam <90610031+SajidAlamQB@users.noreply.github.com>
Date: Fri, 9 Sep 2022 17:20:15 +0100
Subject: [PATCH 35/44] Add gdrive to CLOUD_PROTOCOLS list (#1708)

---
 kedro/io/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index f475ef02d..9765e0bae 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -28,7 +28,7 @@
 VERSION_KEY = "version"
 HTTP_PROTOCOLS = ("http", "https")
 PROTOCOL_DELIMITER = "://"
-CLOUD_PROTOCOLS = ("s3", "gcs", "gs", "adl", "abfs", "abfss")
+CLOUD_PROTOCOLS = ("s3", "gcs", "gs", "adl", "abfs", "abfss", "gdrive")
 
 
 class DataSetError(Exception):

From c713e7d0f536331bab01fb0f0b5d78fbede80d39 Mon Sep 17 00:00:00 2001
From: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com>
Date: Mon, 3 Oct 2022 11:30:47 +0100
Subject: [PATCH 36/44] Update Error message for `VersionNotFoundError` to
 handle Permission related issues better (#1881)

* Update message for VersionNotFoundError

Signed-off-by: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com>

* Add test for VersionNotFoundError for cloud protocols

* Update test_data_catalog.py

Update NoVersionFoundError test

* minor linting update

* update docs link + styling changes

* Revert "update docs link + styling changes"

This reverts commit 6088e00159a9ee844dfee312673654b6d248f931.

* Update test with styling changes

* Update RELEASE.md

Signed-off-by: ankatiyar <ankitakatiyar2401@gmail.com>

Signed-off-by: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com>
Signed-off-by: ankatiyar <ankitakatiyar2401@gmail.com>
Co-authored-by: Ahdra Merali <90615669+AhdraMeraliQB@users.noreply.github.com>
---
 kedro/io/core.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index 9765e0bae..fc6dea587 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -538,9 +538,16 @@ def _fetch_latest_load_version(self) -> str:
         most_recent = next(
             (path for path in version_paths if self._exists_function(path)), None
         )
-
+        protocol = getattr(self, "_protocol", None)
         if not most_recent:
-            raise VersionNotFoundError(f"Did not find any versions for {self}")
+            if protocol in CLOUD_PROTOCOLS:
+                message = (
+                    f"Did not find any versions for {self}. This could be "
+                    f"due to insufficient permission."
+                )
+            else:
+                message = f"Did not find any versions for {self}"
+            raise VersionNotFoundError(message)
 
         return PurePath(most_recent).parent.name
 

From 781fa1e354cebbe1f01d10ef7c52462aeee4e67c Mon Sep 17 00:00:00 2001
From: Nok Lam Chan <mediumnok@gmail.com>
Date: Thu, 20 Oct 2022 11:11:19 +0100
Subject: [PATCH 37/44] Remove redundant `resolve_load_version` call (#1911)

* remove a redundant function call

Signed-off-by: Nok Chan <nok.lam.chan@quantumblack.com>

* Remove redundant resolove_load_version & fix test

Signed-off-by: Nok Chan <nok.lam.chan@quantumblack.com>

* Fix HoloviewWriter tests with more specific error message pattern & Lint

Signed-off-by: Nok Chan <nok.lam.chan@quantumblack.com>

* Rename tests

Signed-off-by: Nok Chan <nok.lam.chan@quantumblack.com>

Signed-off-by: Nok Chan <nok.lam.chan@quantumblack.com>
---
 kedro/io/core.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index fc6dea587..98f2bb1d6 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -548,7 +548,6 @@ def _fetch_latest_load_version(self) -> str:
             else:
                 message = f"Did not find any versions for {self}"
             raise VersionNotFoundError(message)
-
         return PurePath(most_recent).parent.name
 
     # 'key' is set to prevent cache key overlapping for load and save:
@@ -601,8 +600,7 @@ def _get_save_path(self) -> PurePosixPath:
     def _get_versioned_path(self, version: str) -> PurePosixPath:
         return self._filepath / version / self._filepath.name
 
-    def load(self) -> _DO:
-        self.resolve_load_version()  # Make sure last load version is set
+    def load(self) -> _DO:  # pylint: disable=useless-parent-delegation
         return super().load()
 
     def save(self, data: _DI) -> None:

From 5c134e591211a0db2674860c4df81865e96c505c Mon Sep 17 00:00:00 2001
From: Nok Lam Chan <mediumnok@gmail.com>
Date: Wed, 2 Nov 2022 15:08:04 +0000
Subject: [PATCH 38/44] Make Kedro instantiate datasets from
 `kedro_dataset`with higher priority than `kedro.extras.datasets` (#1734)

* Update release doc

Signed-off-by: Nok Chan <nok.lam.chan@quantumblack.com>

* Make kedro.datasets with higher priority

Signed-off-by: Nok Chan <nok.lam.chan@quantumblack.com>

* Update the library priorities

Signed-off-by: Nok Chan <nok.lam.chan@quantumblack.com>

* Update release notes

Signed-off-by: Nok Chan <nok.lam.chan@quantumblack.com>

* Add test

Signed-off-by: Nok Chan <nok.lam.chan@quantumblack.com>

* Modify test to remove the dummy obj

Signed-off-by: Nok Chan <nok.lam.chan@quantumblack.com>

* Fix mocker.spy with different API for Python 3.7

Signed-off-by: Nok Chan <nok.lam.chan@quantumblack.com>

* Please the linter

Signed-off-by: Nok Chan <nok.lam.chan@quantumblack.com>

* Black

Signed-off-by: Nok Chan <nok.lam.chan@quantumblack.com>

Signed-off-by: Nok Chan <nok.lam.chan@quantumblack.com>
---
 kedro/io/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index 98f2bb1d6..289650a79 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -347,7 +347,7 @@ class Version(namedtuple("Version", ["load", "save"])):
     "intermediate data sets where possible to avoid this warning."
 )
 
-_DEFAULT_PACKAGES = ["kedro.io.", "kedro.extras.datasets.", ""]
+_DEFAULT_PACKAGES = ["kedro.io.", "kedro_datasets.", "kedro.extras.datasets.", ""]
 
 
 def parse_dataset_definition(

From 8b03ab61f3ceadc28379adc703cada17704275f0 Mon Sep 17 00:00:00 2001
From: Jannic <37243923+jmholzer@users.noreply.github.com>
Date: Mon, 16 Jan 2023 15:12:19 +0000
Subject: [PATCH 39/44] Add support for Azure Data Lake Storage Gen2 URIs
 (#2190)

* Add failing test

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Add patch specific for abfss

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Fix linting

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Add release note

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Add comment explaining adding username to abfss path

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Modify release note to 'fixed'

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>
---
 kedro/io/core.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index 289650a79..02b3708ed 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -685,11 +685,14 @@ def _parse_filepath(filepath: str) -> Dict[str, str]:
 
     options = {"protocol": protocol, "path": path}
 
-    if parsed_path.netloc:
-        if protocol in CLOUD_PROTOCOLS:
-            host_with_port = parsed_path.netloc.rsplit("@", 1)[-1]
-            host = host_with_port.rsplit(":", 1)[0]
-            options["path"] = host + options["path"]
+    if parsed_path.netloc and protocol in CLOUD_PROTOCOLS:
+        host_with_port = parsed_path.netloc.rsplit("@", 1)[-1]
+        host = host_with_port.rsplit(":", 1)[0]
+        options["path"] = host + options["path"]
+        # Azure Data Lake Storage Gen2 URIs can store the container name in the
+        # 'username' field of a URL (@ syntax), so we need to add it to the path
+        if protocol == "abfss" and parsed_path.username:
+            options["path"] = parsed_path.username + "@" + options["path"]
 
     return options
 

From 860a4ee590374b615aa84c0589b255d9fee59328 Mon Sep 17 00:00:00 2001
From: Miguel Rodriguez Gutierrez <miguel7r@hotmail.com>
Date: Fri, 24 Feb 2023 09:15:59 -0600
Subject: [PATCH 40/44] Fix `s3n` and `s3a` bug by adding them to
 CLOUD_PROTOCOLS (#2326)

Signed-off-by: Miguel Rodriguez Gutierrez <miguel7r@hotmail.com>
---
 kedro/io/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index 02b3708ed..dc64e83e5 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -28,7 +28,7 @@
 VERSION_KEY = "version"
 HTTP_PROTOCOLS = ("http", "https")
 PROTOCOL_DELIMITER = "://"
-CLOUD_PROTOCOLS = ("s3", "gcs", "gs", "adl", "abfs", "abfss", "gdrive")
+CLOUD_PROTOCOLS = ("s3", "s3n", "s3a", "gcs", "gs", "adl", "abfs", "abfss", "gdrive")
 
 
 class DataSetError(Exception):

From bd1ebfdeade49acd2a6aa18b4fd4a58163d7a60d Mon Sep 17 00:00:00 2001
From: brandonmeek <bpmeek.developer@gmail.com>
Date: Thu, 9 Mar 2023 16:21:42 -0600
Subject: [PATCH 41/44] Added kedro/io/core.py kedro/io/__init__.py and
 kedro/utils.py from patch to keep history.

---
 .../kedro_datasets}/io/__init__.py                    | 11 -----------
 {kedro => kedro-datasets/kedro_datasets}/io/core.py   |  0
 {kedro => kedro-datasets/kedro_datasets/io}/utils.py  |  0
 3 files changed, 11 deletions(-)
 rename {kedro => kedro-datasets/kedro_datasets}/io/__init__.py (58%)
 rename {kedro => kedro-datasets/kedro_datasets}/io/core.py (100%)
 rename {kedro => kedro-datasets/kedro_datasets/io}/utils.py (100%)

diff --git a/kedro/io/__init__.py b/kedro-datasets/kedro_datasets/io/__init__.py
similarity index 58%
rename from kedro/io/__init__.py
rename to kedro-datasets/kedro_datasets/io/__init__.py
index de6f6e49e..9e37111c0 100644
--- a/kedro/io/__init__.py
+++ b/kedro-datasets/kedro_datasets/io/__init__.py
@@ -3,7 +3,6 @@
 which allows implementation of various ``AbstractDataSet``s.
 """
 
-from .cached_dataset import CachedDataSet
 from .core import (
     AbstractDataSet,
     AbstractVersionedDataSet,
@@ -12,22 +11,12 @@
     DataSetNotFoundError,
     Version,
 )
-from .data_catalog import DataCatalog
-from .lambda_dataset import LambdaDataSet
-from .memory_dataset import MemoryDataSet
-from .partitioned_dataset import IncrementalDataSet, PartitionedDataSet
 
 __all__ = [
     "AbstractDataSet",
     "AbstractVersionedDataSet",
-    "CachedDataSet",
-    "DataCatalog",
     "DataSetAlreadyExistsError",
     "DataSetError",
     "DataSetNotFoundError",
-    "IncrementalDataSet",
-    "LambdaDataSet",
-    "MemoryDataSet",
-    "PartitionedDataSet",
     "Version",
 ]
diff --git a/kedro/io/core.py b/kedro-datasets/kedro_datasets/io/core.py
similarity index 100%
rename from kedro/io/core.py
rename to kedro-datasets/kedro_datasets/io/core.py
diff --git a/kedro/utils.py b/kedro-datasets/kedro_datasets/io/utils.py
similarity index 100%
rename from kedro/utils.py
rename to kedro-datasets/kedro_datasets/io/utils.py

From 85125f27021af6eba948aad5cc156261f904b38f Mon Sep 17 00:00:00 2001
From: brandonmeek <bpmeek.developer@gmail.com>
Date: Thu, 9 Mar 2023 17:03:27 -0600
Subject: [PATCH 42/44] Make AbstractDataSets from `kedro_datasets` instead of
 `kedro`

---
 kedro-datasets/kedro_datasets/api/api_dataset.py            | 2 +-
 .../kedro_datasets/biosequence/biosequence_dataset.py       | 2 +-
 kedro-datasets/kedro_datasets/dask/parquet_dataset.py       | 2 +-
 kedro-datasets/kedro_datasets/email/message_dataset.py      | 2 +-
 kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py  | 2 +-
 kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py | 2 +-
 kedro-datasets/kedro_datasets/io/core.py                    | 6 +++---
 kedro-datasets/kedro_datasets/json/json_dataset.py          | 2 +-
 .../kedro_datasets/matplotlib/matplotlib_writer.py          | 2 +-
 kedro-datasets/kedro_datasets/networkx/gml_dataset.py       | 2 +-
 kedro-datasets/kedro_datasets/networkx/graphml_dataset.py   | 2 +-
 kedro-datasets/kedro_datasets/networkx/json_dataset.py      | 2 +-
 kedro-datasets/kedro_datasets/pandas/csv_dataset.py         | 2 +-
 kedro-datasets/kedro_datasets/pandas/excel_dataset.py       | 2 +-
 kedro-datasets/kedro_datasets/pandas/feather_dataset.py     | 2 +-
 kedro-datasets/kedro_datasets/pandas/gbq_dataset.py         | 2 +-
 kedro-datasets/kedro_datasets/pandas/generic_dataset.py     | 2 +-
 kedro-datasets/kedro_datasets/pandas/hdf_dataset.py         | 2 +-
 kedro-datasets/kedro_datasets/pandas/json_dataset.py        | 2 +-
 kedro-datasets/kedro_datasets/pandas/parquet_dataset.py     | 2 +-
 kedro-datasets/kedro_datasets/pandas/sql_dataset.py         | 2 +-
 kedro-datasets/kedro_datasets/pandas/xml_dataset.py         | 2 +-
 kedro-datasets/kedro_datasets/pickle/pickle_dataset.py      | 2 +-
 kedro-datasets/kedro_datasets/pillow/image_dataset.py       | 2 +-
 kedro-datasets/kedro_datasets/plotly/json_dataset.py        | 2 +-
 kedro-datasets/kedro_datasets/plotly/plotly_dataset.py      | 2 +-
 kedro-datasets/kedro_datasets/polars/csv_dataset.py         | 2 +-
 kedro-datasets/kedro_datasets/redis/redis_dataset.py        | 2 +-
 kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py | 2 +-
 kedro-datasets/kedro_datasets/spark/deltatable_dataset.py   | 2 +-
 kedro-datasets/kedro_datasets/spark/spark_dataset.py        | 2 +-
 kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py   | 2 +-
 kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py   | 2 +-
 kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py  | 2 +-
 .../kedro_datasets/tensorflow/tensorflow_model_dataset.py   | 2 +-
 kedro-datasets/kedro_datasets/text/text_dataset.py          | 2 +-
 kedro-datasets/kedro_datasets/tracking/json_dataset.py      | 2 +-
 kedro-datasets/kedro_datasets/tracking/metrics_dataset.py   | 2 +-
 kedro-datasets/kedro_datasets/video/video_dataset.py        | 2 +-
 kedro-datasets/kedro_datasets/yaml/yaml_dataset.py          | 2 +-
 kedro-datasets/test_requirements.txt                        | 1 +
 kedro-datasets/tests/api/test_api_dataset.py                | 2 +-
 .../tests/bioinformatics/test_biosequence_dataset.py        | 4 ++--
 kedro-datasets/tests/conftest.py                            | 2 +-
 kedro-datasets/tests/dask/test_parquet_dataset.py           | 2 +-
 kedro-datasets/tests/email/test_message_dataset.py          | 4 ++--
 kedro-datasets/tests/geojson/test_geojson_dataset.py        | 4 ++--
 kedro-datasets/tests/holoviews/test_holoviews_writer.py     | 4 ++--
 kedro-datasets/tests/json/test_json_dataset.py              | 4 ++--
 kedro-datasets/tests/libsvm/test_svmlight_dataset.py        | 4 ++--
 kedro-datasets/tests/matplotlib/test_matplotlib_writer.py   | 2 +-
 kedro-datasets/tests/networkx/test_gml_dataset.py           | 4 ++--
 kedro-datasets/tests/networkx/test_graphml_dataset.py       | 4 ++--
 kedro-datasets/tests/networkx/test_json_dataset.py          | 4 ++--
 kedro-datasets/tests/pandas/test_csv_dataset.py             | 4 ++--
 kedro-datasets/tests/pandas/test_excel_dataset.py           | 4 ++--
 kedro-datasets/tests/pandas/test_feather_dataset.py         | 4 ++--
 kedro-datasets/tests/pandas/test_gbq_dataset.py             | 2 +-
 kedro-datasets/tests/pandas/test_generic_dataset.py         | 4 ++--
 kedro-datasets/tests/pandas/test_hdf_dataset.py             | 4 ++--
 kedro-datasets/tests/pandas/test_json_dataset.py            | 4 ++--
 kedro-datasets/tests/pandas/test_parquet_dataset.py         | 4 ++--
 kedro-datasets/tests/pandas/test_sql_dataset.py             | 2 +-
 kedro-datasets/tests/pandas/test_xml_dataset.py             | 4 ++--
 kedro-datasets/tests/pickle/test_pickle_dataset.py          | 4 ++--
 kedro-datasets/tests/pillow/test_image_dataset.py           | 4 ++--
 kedro-datasets/tests/plotly/test_json_dataset.py            | 4 ++--
 kedro-datasets/tests/plotly/test_plotly_dataset.py          | 4 ++--
 kedro-datasets/tests/polars/test_csv_dataset.py             | 4 ++--
 kedro-datasets/tests/redis/test_redis_dataset.py            | 2 +-
 kedro-datasets/tests/snowflake/test_snowpark_dataset.py     | 2 +-
 kedro-datasets/tests/spark/test_deltatable_dataset.py       | 3 ++-
 kedro-datasets/tests/spark/test_spark_dataset.py            | 5 +++--
 kedro-datasets/tests/spark/test_spark_hive_dataset.py       | 2 +-
 kedro-datasets/tests/spark/test_spark_jdbc_dataset.py       | 2 +-
 .../tests/tensorflow/test_tensorflow_model_dataset.py       | 4 ++--
 kedro-datasets/tests/text/test_text_dataset.py              | 4 ++--
 kedro-datasets/tests/tracking/test_json_dataset.py          | 4 ++--
 kedro-datasets/tests/tracking/test_metrics_dataset.py       | 4 ++--
 kedro-datasets/tests/video/test_video_dataset.py            | 2 +-
 kedro-datasets/tests/yaml/test_yaml_dataset.py              | 4 ++--
 81 files changed, 113 insertions(+), 110 deletions(-)

diff --git a/kedro-datasets/kedro_datasets/api/api_dataset.py b/kedro-datasets/kedro_datasets/api/api_dataset.py
index 4f0ffb4cc..cdb950331 100644
--- a/kedro-datasets/kedro_datasets/api/api_dataset.py
+++ b/kedro-datasets/kedro_datasets/api/api_dataset.py
@@ -4,7 +4,7 @@
 from typing import Any, Dict, Iterable, List, NoReturn, Union
 
 import requests
-from kedro.io.core import AbstractDataSet, DataSetError
+from kedro_datasets.io.core import AbstractDataSet, DataSetError
 from requests.auth import AuthBase
 
 
diff --git a/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py b/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py
index 7c45743da..726e8dbca 100644
--- a/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py
+++ b/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py
@@ -7,7 +7,7 @@
 
 import fsspec
 from Bio import SeqIO
-from kedro.io.core import AbstractDataSet, get_filepath_str, get_protocol_and_path
+from kedro_datasets.io.core import AbstractDataSet, get_filepath_str, get_protocol_and_path
 
 
 class BioSequenceDataSet(AbstractDataSet[List, List]):
diff --git a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py
index f02144892..3b3959e64 100644
--- a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py
+++ b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py
@@ -7,7 +7,7 @@
 import dask.dataframe as dd
 import fsspec
 import triad
-from kedro.io.core import AbstractDataSet, get_protocol_and_path
+from kedro_datasets.io.core import AbstractDataSet, get_protocol_and_path
 
 
 class ParquetDataSet(AbstractDataSet[dd.DataFrame, dd.DataFrame]):
diff --git a/kedro-datasets/kedro_datasets/email/message_dataset.py b/kedro-datasets/kedro_datasets/email/message_dataset.py
index 0b8623f63..9ab59cc76 100644
--- a/kedro-datasets/kedro_datasets/email/message_dataset.py
+++ b/kedro-datasets/kedro_datasets/email/message_dataset.py
@@ -11,7 +11,7 @@
 from typing import Any, Dict
 
 import fsspec
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     DataSetError,
     Version,
diff --git a/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py b/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py
index ba9237909..afccf4a04 100644
--- a/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py
+++ b/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py
@@ -8,7 +8,7 @@
 
 import fsspec
 import geopandas as gpd
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     DataSetError,
     Version,
diff --git a/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py b/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py
index 7f61909b9..e9f12e645 100644
--- a/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py
+++ b/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py
@@ -8,7 +8,7 @@
 
 import fsspec
 import holoviews as hv
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     DataSetError,
     Version,
diff --git a/kedro-datasets/kedro_datasets/io/core.py b/kedro-datasets/kedro_datasets/io/core.py
index dc64e83e5..b8ac06f7c 100644
--- a/kedro-datasets/kedro_datasets/io/core.py
+++ b/kedro-datasets/kedro_datasets/io/core.py
@@ -19,7 +19,7 @@
 from cachetools import Cache, cachedmethod
 from cachetools.keys import hashkey
 
-from kedro.utils import load_obj
+from .utils import load_obj
 
 warnings.simplefilter("default", DeprecationWarning)
 
@@ -82,7 +82,7 @@ class AbstractDataSet(abc.ABC, Generic[_DI, _DO]):
 
         >>> from pathlib import Path, PurePosixPath
         >>> import pandas as pd
-        >>> from kedro.io import AbstractDataSet
+        >>> from kedro_datasets.io import AbstractDataSet
         >>>
         >>>
         >>> class MyOwnDataSet(AbstractDataSet[pd.DataFrame, pd.DataFrame]):
@@ -465,7 +465,7 @@ class AbstractVersionedDataSet(AbstractDataSet[_DI, _DO], abc.ABC):
 
         >>> from pathlib import Path, PurePosixPath
         >>> import pandas as pd
-        >>> from kedro.io import AbstractVersionedDataSet
+        >>> from kedro_datasets.io import AbstractVersionedDataSet
         >>>
         >>>
         >>> class MyOwnDataSet(AbstractVersionedDataSet):
diff --git a/kedro-datasets/kedro_datasets/json/json_dataset.py b/kedro-datasets/kedro_datasets/json/json_dataset.py
index ad86c9a17..58c08a772 100644
--- a/kedro-datasets/kedro_datasets/json/json_dataset.py
+++ b/kedro-datasets/kedro_datasets/json/json_dataset.py
@@ -7,7 +7,7 @@
 from typing import Any, Dict
 
 import fsspec
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     DataSetError,
     Version,
diff --git a/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py b/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py
index 3fc396cb1..4ea44efd3 100644
--- a/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py
+++ b/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py
@@ -9,7 +9,7 @@
 
 import fsspec
 import matplotlib.pyplot as plt
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     DataSetError,
     Version,
diff --git a/kedro-datasets/kedro_datasets/networkx/gml_dataset.py b/kedro-datasets/kedro_datasets/networkx/gml_dataset.py
index bc8d4f86f..1b73facd9 100644
--- a/kedro-datasets/kedro_datasets/networkx/gml_dataset.py
+++ b/kedro-datasets/kedro_datasets/networkx/gml_dataset.py
@@ -9,7 +9,7 @@
 
 import fsspec
 import networkx
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     Version,
     get_filepath_str,
diff --git a/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py b/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py
index 2105fb67f..36230e069 100644
--- a/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py
+++ b/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py
@@ -8,7 +8,7 @@
 
 import fsspec
 import networkx
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     Version,
     get_filepath_str,
diff --git a/kedro-datasets/kedro_datasets/networkx/json_dataset.py b/kedro-datasets/kedro_datasets/networkx/json_dataset.py
index 8cc436721..1dd6ef62c 100644
--- a/kedro-datasets/kedro_datasets/networkx/json_dataset.py
+++ b/kedro-datasets/kedro_datasets/networkx/json_dataset.py
@@ -9,7 +9,7 @@
 
 import fsspec
 import networkx
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     Version,
     get_filepath_str,
diff --git a/kedro-datasets/kedro_datasets/pandas/csv_dataset.py b/kedro-datasets/kedro_datasets/pandas/csv_dataset.py
index 7b20813f3..cebefc2b4 100644
--- a/kedro-datasets/kedro_datasets/pandas/csv_dataset.py
+++ b/kedro-datasets/kedro_datasets/pandas/csv_dataset.py
@@ -9,7 +9,7 @@
 
 import fsspec
 import pandas as pd
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     PROTOCOL_DELIMITER,
     AbstractVersionedDataSet,
     DataSetError,
diff --git a/kedro-datasets/kedro_datasets/pandas/excel_dataset.py b/kedro-datasets/kedro_datasets/pandas/excel_dataset.py
index 4a981bc11..729b48815 100644
--- a/kedro-datasets/kedro_datasets/pandas/excel_dataset.py
+++ b/kedro-datasets/kedro_datasets/pandas/excel_dataset.py
@@ -9,7 +9,7 @@
 
 import fsspec
 import pandas as pd
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     PROTOCOL_DELIMITER,
     AbstractVersionedDataSet,
     DataSetError,
diff --git a/kedro-datasets/kedro_datasets/pandas/feather_dataset.py b/kedro-datasets/kedro_datasets/pandas/feather_dataset.py
index 1116d4168..c6003b02b 100644
--- a/kedro-datasets/kedro_datasets/pandas/feather_dataset.py
+++ b/kedro-datasets/kedro_datasets/pandas/feather_dataset.py
@@ -10,7 +10,7 @@
 
 import fsspec
 import pandas as pd
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     PROTOCOL_DELIMITER,
     AbstractVersionedDataSet,
     Version,
diff --git a/kedro-datasets/kedro_datasets/pandas/gbq_dataset.py b/kedro-datasets/kedro_datasets/pandas/gbq_dataset.py
index c0122a6c0..efeb27b60 100644
--- a/kedro-datasets/kedro_datasets/pandas/gbq_dataset.py
+++ b/kedro-datasets/kedro_datasets/pandas/gbq_dataset.py
@@ -11,7 +11,7 @@
 from google.cloud import bigquery
 from google.cloud.exceptions import NotFound
 from google.oauth2.credentials import Credentials
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractDataSet,
     DataSetError,
     get_filepath_str,
diff --git a/kedro-datasets/kedro_datasets/pandas/generic_dataset.py b/kedro-datasets/kedro_datasets/pandas/generic_dataset.py
index 86e347d70..8c449e298 100644
--- a/kedro-datasets/kedro_datasets/pandas/generic_dataset.py
+++ b/kedro-datasets/kedro_datasets/pandas/generic_dataset.py
@@ -8,7 +8,7 @@
 
 import fsspec
 import pandas as pd
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     DataSetError,
     Version,
diff --git a/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py b/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py
index f11fe320f..fcb54fdb8 100644
--- a/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py
+++ b/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py
@@ -8,7 +8,7 @@
 
 import fsspec
 import pandas as pd
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     DataSetError,
     Version,
diff --git a/kedro-datasets/kedro_datasets/pandas/json_dataset.py b/kedro-datasets/kedro_datasets/pandas/json_dataset.py
index d29ef57bd..7e0f51a00 100644
--- a/kedro-datasets/kedro_datasets/pandas/json_dataset.py
+++ b/kedro-datasets/kedro_datasets/pandas/json_dataset.py
@@ -9,7 +9,7 @@
 
 import fsspec
 import pandas as pd
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     PROTOCOL_DELIMITER,
     AbstractVersionedDataSet,
     DataSetError,
diff --git a/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py b/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py
index acb478bd9..d4c48c8a1 100644
--- a/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py
+++ b/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py
@@ -9,7 +9,7 @@
 
 import fsspec
 import pandas as pd
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     PROTOCOL_DELIMITER,
     AbstractVersionedDataSet,
     DataSetError,
diff --git a/kedro-datasets/kedro_datasets/pandas/sql_dataset.py b/kedro-datasets/kedro_datasets/pandas/sql_dataset.py
index dd5d636a1..fe9ff8e4c 100644
--- a/kedro-datasets/kedro_datasets/pandas/sql_dataset.py
+++ b/kedro-datasets/kedro_datasets/pandas/sql_dataset.py
@@ -8,7 +8,7 @@
 
 import fsspec
 import pandas as pd
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractDataSet,
     DataSetError,
     get_filepath_str,
diff --git a/kedro-datasets/kedro_datasets/pandas/xml_dataset.py b/kedro-datasets/kedro_datasets/pandas/xml_dataset.py
index ca8fc0dd2..35fb4d54e 100644
--- a/kedro-datasets/kedro_datasets/pandas/xml_dataset.py
+++ b/kedro-datasets/kedro_datasets/pandas/xml_dataset.py
@@ -9,7 +9,7 @@
 
 import fsspec
 import pandas as pd
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     PROTOCOL_DELIMITER,
     AbstractVersionedDataSet,
     DataSetError,
diff --git a/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py b/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py
index 436fba29a..c6f0e9cd9 100644
--- a/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py
+++ b/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py
@@ -9,7 +9,7 @@
 from typing import Any, Dict
 
 import fsspec
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     DataSetError,
     Version,
diff --git a/kedro-datasets/kedro_datasets/pillow/image_dataset.py b/kedro-datasets/kedro_datasets/pillow/image_dataset.py
index ca939b722..8083d04cd 100644
--- a/kedro-datasets/kedro_datasets/pillow/image_dataset.py
+++ b/kedro-datasets/kedro_datasets/pillow/image_dataset.py
@@ -6,7 +6,7 @@
 from typing import Any, Dict
 
 import fsspec
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     DataSetError,
     Version,
diff --git a/kedro-datasets/kedro_datasets/plotly/json_dataset.py b/kedro-datasets/kedro_datasets/plotly/json_dataset.py
index f819dd338..f28123361 100644
--- a/kedro-datasets/kedro_datasets/plotly/json_dataset.py
+++ b/kedro-datasets/kedro_datasets/plotly/json_dataset.py
@@ -7,7 +7,7 @@
 
 import fsspec
 import plotly.io as pio
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     Version,
     get_filepath_str,
diff --git a/kedro-datasets/kedro_datasets/plotly/plotly_dataset.py b/kedro-datasets/kedro_datasets/plotly/plotly_dataset.py
index 1bb0acef6..11d638cf6 100644
--- a/kedro-datasets/kedro_datasets/plotly/plotly_dataset.py
+++ b/kedro-datasets/kedro_datasets/plotly/plotly_dataset.py
@@ -7,7 +7,7 @@
 
 import pandas as pd
 import plotly.express as px
-from kedro.io.core import Version
+from kedro_datasets.io.core import Version
 from plotly import graph_objects as go
 
 from .json_dataset import JSONDataSet
diff --git a/kedro-datasets/kedro_datasets/polars/csv_dataset.py b/kedro-datasets/kedro_datasets/polars/csv_dataset.py
index 60a0d456a..52bb96a1e 100644
--- a/kedro-datasets/kedro_datasets/polars/csv_dataset.py
+++ b/kedro-datasets/kedro_datasets/polars/csv_dataset.py
@@ -9,7 +9,7 @@
 
 import fsspec
 import polars as pl
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     PROTOCOL_DELIMITER,
     AbstractVersionedDataSet,
     DataSetError,
diff --git a/kedro-datasets/kedro_datasets/redis/redis_dataset.py b/kedro-datasets/kedro_datasets/redis/redis_dataset.py
index 6d2f80df9..085ed68aa 100644
--- a/kedro-datasets/kedro_datasets/redis/redis_dataset.py
+++ b/kedro-datasets/kedro_datasets/redis/redis_dataset.py
@@ -8,7 +8,7 @@
 from typing import Any, Dict
 
 import redis
-from kedro.io.core import AbstractDataSet, DataSetError
+from kedro_datasets.io.core import AbstractDataSet, DataSetError
 
 
 class PickleDataSet(AbstractDataSet[Any, Any]):
diff --git a/kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py b/kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py
index e0ea1c1db..3746681d1 100644
--- a/kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py
+++ b/kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py
@@ -5,7 +5,7 @@
 from typing import Any, Dict
 
 import snowflake.snowpark as sp
-from kedro.io.core import AbstractDataSet, DataSetError
+from kedro_datasets.io.core import AbstractDataSet, DataSetError
 
 logger = logging.getLogger(__name__)
 
diff --git a/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py b/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py
index 34ee6f6a5..15fda12e9 100644
--- a/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py
+++ b/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py
@@ -5,7 +5,7 @@
 from typing import NoReturn
 
 from delta.tables import DeltaTable
-from kedro.io.core import AbstractDataSet, DataSetError
+from kedro_datasets.io.core import AbstractDataSet, DataSetError
 from pyspark.sql import SparkSession
 from pyspark.sql.utils import AnalysisException
 
diff --git a/kedro-datasets/kedro_datasets/spark/spark_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_dataset.py
index d366eae08..d2cf111f0 100644
--- a/kedro-datasets/kedro_datasets/spark/spark_dataset.py
+++ b/kedro-datasets/kedro_datasets/spark/spark_dataset.py
@@ -13,7 +13,7 @@
 
 import fsspec
 from hdfs import HdfsError, InsecureClient
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     DataSetError,
     Version,
diff --git a/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py
index 08b0666ea..1c8e3d6bf 100644
--- a/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py
+++ b/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py
@@ -5,7 +5,7 @@
 from copy import deepcopy
 from typing import Any, Dict, List
 
-from kedro.io.core import AbstractDataSet, DataSetError
+from kedro_datasets.io.core import AbstractDataSet, DataSetError
 from pyspark.sql import DataFrame, SparkSession, Window
 from pyspark.sql.functions import col, lit, row_number
 
diff --git a/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py
index aab501f26..d02b2124a 100644
--- a/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py
+++ b/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py
@@ -3,7 +3,7 @@
 from copy import deepcopy
 from typing import Any, Dict
 
-from kedro.io.core import AbstractDataSet, DataSetError
+from kedro_datasets.io.core import AbstractDataSet, DataSetError
 from pyspark.sql import DataFrame, SparkSession
 
 __all__ = ["SparkJDBCDataSet"]
diff --git a/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py b/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py
index f909c1976..baf34f8cb 100644
--- a/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py
+++ b/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py
@@ -7,7 +7,7 @@
 from typing import Any, Dict, Optional, Tuple, Union
 
 import fsspec
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     DataSetError,
     Version,
diff --git a/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py b/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py
index 544aadb06..1683cb7ec 100644
--- a/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py
+++ b/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py
@@ -8,7 +8,7 @@
 
 import fsspec
 import tensorflow as tf
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     DataSetError,
     Version,
diff --git a/kedro-datasets/kedro_datasets/text/text_dataset.py b/kedro-datasets/kedro_datasets/text/text_dataset.py
index 0bb559e29..3552d463c 100644
--- a/kedro-datasets/kedro_datasets/text/text_dataset.py
+++ b/kedro-datasets/kedro_datasets/text/text_dataset.py
@@ -6,7 +6,7 @@
 from typing import Any, Dict
 
 import fsspec
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     DataSetError,
     Version,
diff --git a/kedro-datasets/kedro_datasets/tracking/json_dataset.py b/kedro-datasets/kedro_datasets/tracking/json_dataset.py
index 4235df999..2f7e8f4a5 100644
--- a/kedro-datasets/kedro_datasets/tracking/json_dataset.py
+++ b/kedro-datasets/kedro_datasets/tracking/json_dataset.py
@@ -4,7 +4,7 @@
 """
 from typing import NoReturn
 
-from kedro.io.core import DataSetError
+from kedro_datasets.io.core import DataSetError
 
 from kedro_datasets.json import JSONDataSet as JDS
 
diff --git a/kedro-datasets/kedro_datasets/tracking/metrics_dataset.py b/kedro-datasets/kedro_datasets/tracking/metrics_dataset.py
index 7c7546a85..b63b3abfd 100644
--- a/kedro-datasets/kedro_datasets/tracking/metrics_dataset.py
+++ b/kedro-datasets/kedro_datasets/tracking/metrics_dataset.py
@@ -6,7 +6,7 @@
 import json
 from typing import Dict, NoReturn
 
-from kedro.io.core import DataSetError, get_filepath_str
+from kedro_datasets.io.core import DataSetError, get_filepath_str
 
 from kedro_datasets.json import JSONDataSet
 
diff --git a/kedro-datasets/kedro_datasets/video/video_dataset.py b/kedro-datasets/kedro_datasets/video/video_dataset.py
index 03311146d..69e19f479 100644
--- a/kedro-datasets/kedro_datasets/video/video_dataset.py
+++ b/kedro-datasets/kedro_datasets/video/video_dataset.py
@@ -13,7 +13,7 @@
 import fsspec
 import numpy as np
 import PIL.Image
-from kedro.io.core import AbstractDataSet, get_protocol_and_path
+from kedro_datasets.io.core import AbstractDataSet, get_protocol_and_path
 
 
 class SlicedVideo:
diff --git a/kedro-datasets/kedro_datasets/yaml/yaml_dataset.py b/kedro-datasets/kedro_datasets/yaml/yaml_dataset.py
index f2a3c2696..45009cad1 100644
--- a/kedro-datasets/kedro_datasets/yaml/yaml_dataset.py
+++ b/kedro-datasets/kedro_datasets/yaml/yaml_dataset.py
@@ -7,7 +7,7 @@
 
 import fsspec
 import yaml
-from kedro.io.core import (
+from kedro_datasets.io.core import (
     AbstractVersionedDataSet,
     DataSetError,
     Version,
diff --git a/kedro-datasets/test_requirements.txt b/kedro-datasets/test_requirements.txt
index c3baae0c7..f3a83283c 100644
--- a/kedro-datasets/test_requirements.txt
+++ b/kedro-datasets/test_requirements.txt
@@ -20,6 +20,7 @@ Jinja2<3.1.0
 joblib>=0.14
 jupyterlab~=3.0
 jupyter~=1.0
+kedro~=0.18.4
 lxml~=4.6
 matplotlib>=3.0.3, <3.4; python_version < '3.10' # 3.4.0 breaks holoviews
 matplotlib>=3.5, <3.6; python_version == '3.10'
diff --git a/kedro-datasets/tests/api/test_api_dataset.py b/kedro-datasets/tests/api/test_api_dataset.py
index c84290750..9de4604a5 100644
--- a/kedro-datasets/tests/api/test_api_dataset.py
+++ b/kedro-datasets/tests/api/test_api_dataset.py
@@ -5,7 +5,7 @@
 import pytest
 import requests
 import requests_mock
-from kedro.io.core import DataSetError
+from kedro_datasets.io.core import DataSetError
 
 from kedro_datasets.api import APIDataSet
 
diff --git a/kedro-datasets/tests/bioinformatics/test_biosequence_dataset.py b/kedro-datasets/tests/bioinformatics/test_biosequence_dataset.py
index 24666baaf..710791d28 100644
--- a/kedro-datasets/tests/bioinformatics/test_biosequence_dataset.py
+++ b/kedro-datasets/tests/bioinformatics/test_biosequence_dataset.py
@@ -6,8 +6,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER
 from s3fs.core import S3FileSystem
 
 from kedro_datasets.biosequence import BioSequenceDataSet
diff --git a/kedro-datasets/tests/conftest.py b/kedro-datasets/tests/conftest.py
index 91d19f646..fc2916923 100644
--- a/kedro-datasets/tests/conftest.py
+++ b/kedro-datasets/tests/conftest.py
@@ -5,7 +5,7 @@
 https://docs.pytest.org/en/latest/fixture.html
 """
 
-from kedro.io.core import generate_timestamp
+from kedro_datasets.io.core import generate_timestamp
 from pytest import fixture
 
 
diff --git a/kedro-datasets/tests/dask/test_parquet_dataset.py b/kedro-datasets/tests/dask/test_parquet_dataset.py
index a2d89f060..8f41f5d38 100644
--- a/kedro-datasets/tests/dask/test_parquet_dataset.py
+++ b/kedro-datasets/tests/dask/test_parquet_dataset.py
@@ -4,7 +4,7 @@
 import pyarrow as pa
 import pyarrow.parquet as pq
 import pytest
-from kedro.io import DataSetError
+from kedro_datasets.io import DataSetError
 from moto import mock_s3
 from pandas.util.testing import assert_frame_equal
 from s3fs import S3FileSystem
diff --git a/kedro-datasets/tests/email/test_message_dataset.py b/kedro-datasets/tests/email/test_message_dataset.py
index 100daba52..3b8945aaa 100644
--- a/kedro-datasets/tests/email/test_message_dataset.py
+++ b/kedro-datasets/tests/email/test_message_dataset.py
@@ -6,8 +6,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version
 from s3fs.core import S3FileSystem
 
 from kedro_datasets.email import EmailMessageDataSet
diff --git a/kedro-datasets/tests/geojson/test_geojson_dataset.py b/kedro-datasets/tests/geojson/test_geojson_dataset.py
index 52fd292ff..cefc61e9c 100644
--- a/kedro-datasets/tests/geojson/test_geojson_dataset.py
+++ b/kedro-datasets/tests/geojson/test_geojson_dataset.py
@@ -5,8 +5,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version, generate_timestamp
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version, generate_timestamp
 from pandas.util.testing import assert_frame_equal
 from s3fs import S3FileSystem
 from shapely.geometry import Point
diff --git a/kedro-datasets/tests/holoviews/test_holoviews_writer.py b/kedro-datasets/tests/holoviews/test_holoviews_writer.py
index f4f91383e..9eb013f49 100644
--- a/kedro-datasets/tests/holoviews/test_holoviews_writer.py
+++ b/kedro-datasets/tests/holoviews/test_holoviews_writer.py
@@ -7,8 +7,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError, Version
-from kedro.io.core import PROTOCOL_DELIMITER
+from kedro_datasets.io import DataSetError, Version
+from kedro_datasets.io.core import PROTOCOL_DELIMITER
 from s3fs.core import S3FileSystem
 
 from kedro_datasets.holoviews import HoloviewsWriter
diff --git a/kedro-datasets/tests/json/test_json_dataset.py b/kedro-datasets/tests/json/test_json_dataset.py
index 621e51fcd..35e923a3e 100644
--- a/kedro-datasets/tests/json/test_json_dataset.py
+++ b/kedro-datasets/tests/json/test_json_dataset.py
@@ -4,8 +4,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version
 from s3fs.core import S3FileSystem
 
 from kedro_datasets.json import JSONDataSet
diff --git a/kedro-datasets/tests/libsvm/test_svmlight_dataset.py b/kedro-datasets/tests/libsvm/test_svmlight_dataset.py
index 8fff3edd2..e20cf2f4d 100644
--- a/kedro-datasets/tests/libsvm/test_svmlight_dataset.py
+++ b/kedro-datasets/tests/libsvm/test_svmlight_dataset.py
@@ -5,8 +5,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version
 from s3fs.core import S3FileSystem
 
 from kedro_datasets.svmlight import SVMLightDataSet
diff --git a/kedro-datasets/tests/matplotlib/test_matplotlib_writer.py b/kedro-datasets/tests/matplotlib/test_matplotlib_writer.py
index 0745452c6..d25631199 100644
--- a/kedro-datasets/tests/matplotlib/test_matplotlib_writer.py
+++ b/kedro-datasets/tests/matplotlib/test_matplotlib_writer.py
@@ -5,7 +5,7 @@
 import matplotlib
 import matplotlib.pyplot as plt
 import pytest
-from kedro.io import DataSetError, Version
+from kedro_datasets.io import DataSetError, Version
 from moto import mock_s3
 from s3fs import S3FileSystem
 
diff --git a/kedro-datasets/tests/networkx/test_gml_dataset.py b/kedro-datasets/tests/networkx/test_gml_dataset.py
index a3a89eca7..5a2ac51c8 100644
--- a/kedro-datasets/tests/networkx/test_gml_dataset.py
+++ b/kedro-datasets/tests/networkx/test_gml_dataset.py
@@ -5,8 +5,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError, Version
-from kedro.io.core import PROTOCOL_DELIMITER
+from kedro_datasets.io import DataSetError, Version
+from kedro_datasets.io.core import PROTOCOL_DELIMITER
 from s3fs.core import S3FileSystem
 
 from kedro_datasets.networkx import GMLDataSet
diff --git a/kedro-datasets/tests/networkx/test_graphml_dataset.py b/kedro-datasets/tests/networkx/test_graphml_dataset.py
index 4e0dcf40d..fea1796dd 100644
--- a/kedro-datasets/tests/networkx/test_graphml_dataset.py
+++ b/kedro-datasets/tests/networkx/test_graphml_dataset.py
@@ -5,8 +5,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError, Version
-from kedro.io.core import PROTOCOL_DELIMITER
+from kedro_datasets.io import DataSetError, Version
+from kedro_datasets.io.core import PROTOCOL_DELIMITER
 from s3fs.core import S3FileSystem
 
 from kedro_datasets.networkx import GraphMLDataSet
diff --git a/kedro-datasets/tests/networkx/test_json_dataset.py b/kedro-datasets/tests/networkx/test_json_dataset.py
index 4d6e582a8..ca1d59933 100644
--- a/kedro-datasets/tests/networkx/test_json_dataset.py
+++ b/kedro-datasets/tests/networkx/test_json_dataset.py
@@ -5,8 +5,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError, Version
-from kedro.io.core import PROTOCOL_DELIMITER
+from kedro_datasets.io import DataSetError, Version
+from kedro_datasets.io.core import PROTOCOL_DELIMITER
 from s3fs.core import S3FileSystem
 
 from kedro_datasets.networkx import JSONDataSet
diff --git a/kedro-datasets/tests/pandas/test_csv_dataset.py b/kedro-datasets/tests/pandas/test_csv_dataset.py
index 267144ecc..92fb6d046 100644
--- a/kedro-datasets/tests/pandas/test_csv_dataset.py
+++ b/kedro-datasets/tests/pandas/test_csv_dataset.py
@@ -10,8 +10,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version, generate_timestamp
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version, generate_timestamp
 from moto import mock_s3
 from pandas.testing import assert_frame_equal
 from s3fs.core import S3FileSystem
diff --git a/kedro-datasets/tests/pandas/test_excel_dataset.py b/kedro-datasets/tests/pandas/test_excel_dataset.py
index c568d15d0..c88d826a8 100644
--- a/kedro-datasets/tests/pandas/test_excel_dataset.py
+++ b/kedro-datasets/tests/pandas/test_excel_dataset.py
@@ -5,8 +5,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version
 from pandas.testing import assert_frame_equal
 from s3fs.core import S3FileSystem
 
diff --git a/kedro-datasets/tests/pandas/test_feather_dataset.py b/kedro-datasets/tests/pandas/test_feather_dataset.py
index 80c1ce678..1acbc4295 100644
--- a/kedro-datasets/tests/pandas/test_feather_dataset.py
+++ b/kedro-datasets/tests/pandas/test_feather_dataset.py
@@ -5,8 +5,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version
 from pandas.testing import assert_frame_equal
 from s3fs.core import S3FileSystem
 
diff --git a/kedro-datasets/tests/pandas/test_gbq_dataset.py b/kedro-datasets/tests/pandas/test_gbq_dataset.py
index e239dbaba..fcdacffc2 100644
--- a/kedro-datasets/tests/pandas/test_gbq_dataset.py
+++ b/kedro-datasets/tests/pandas/test_gbq_dataset.py
@@ -3,7 +3,7 @@
 import pandas as pd
 import pytest
 from google.cloud.exceptions import NotFound
-from kedro.io.core import DataSetError
+from kedro_datasets.io.core import DataSetError
 from pandas.testing import assert_frame_equal
 
 from kedro_datasets.pandas import GBQQueryDataSet, GBQTableDataSet
diff --git a/kedro-datasets/tests/pandas/test_generic_dataset.py b/kedro-datasets/tests/pandas/test_generic_dataset.py
index 6f40bb0d4..f05027ff8 100644
--- a/kedro-datasets/tests/pandas/test_generic_dataset.py
+++ b/kedro-datasets/tests/pandas/test_generic_dataset.py
@@ -7,8 +7,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError, Version
-from kedro.io.core import PROTOCOL_DELIMITER, generate_timestamp
+from kedro_datasets.io import DataSetError, Version
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, generate_timestamp
 from pandas._testing import assert_frame_equal
 from s3fs import S3FileSystem
 
diff --git a/kedro-datasets/tests/pandas/test_hdf_dataset.py b/kedro-datasets/tests/pandas/test_hdf_dataset.py
index 563ba63d9..d2c20a3d4 100644
--- a/kedro-datasets/tests/pandas/test_hdf_dataset.py
+++ b/kedro-datasets/tests/pandas/test_hdf_dataset.py
@@ -5,8 +5,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version
 from pandas.testing import assert_frame_equal
 from s3fs.core import S3FileSystem
 
diff --git a/kedro-datasets/tests/pandas/test_json_dataset.py b/kedro-datasets/tests/pandas/test_json_dataset.py
index df2e856d5..797a42770 100644
--- a/kedro-datasets/tests/pandas/test_json_dataset.py
+++ b/kedro-datasets/tests/pandas/test_json_dataset.py
@@ -6,8 +6,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version
 from pandas.testing import assert_frame_equal
 from s3fs.core import S3FileSystem
 
diff --git a/kedro-datasets/tests/pandas/test_parquet_dataset.py b/kedro-datasets/tests/pandas/test_parquet_dataset.py
index 2d7ce2996..a4963b36a 100644
--- a/kedro-datasets/tests/pandas/test_parquet_dataset.py
+++ b/kedro-datasets/tests/pandas/test_parquet_dataset.py
@@ -5,8 +5,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version
 from pandas.testing import assert_frame_equal
 from pyarrow.fs import FSSpecHandler, PyFileSystem
 from s3fs.core import S3FileSystem
diff --git a/kedro-datasets/tests/pandas/test_sql_dataset.py b/kedro-datasets/tests/pandas/test_sql_dataset.py
index aa9fe8d17..da8982588 100644
--- a/kedro-datasets/tests/pandas/test_sql_dataset.py
+++ b/kedro-datasets/tests/pandas/test_sql_dataset.py
@@ -5,7 +5,7 @@
 import pandas as pd
 import pytest
 import sqlalchemy
-from kedro.io import DataSetError
+from kedro_datasets.io import DataSetError
 
 from kedro_datasets.pandas import SQLQueryDataSet, SQLTableDataSet
 
diff --git a/kedro-datasets/tests/pandas/test_xml_dataset.py b/kedro-datasets/tests/pandas/test_xml_dataset.py
index bd62ea586..645d2eb6c 100644
--- a/kedro-datasets/tests/pandas/test_xml_dataset.py
+++ b/kedro-datasets/tests/pandas/test_xml_dataset.py
@@ -6,8 +6,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version
 from pandas.testing import assert_frame_equal
 from s3fs.core import S3FileSystem
 
diff --git a/kedro-datasets/tests/pickle/test_pickle_dataset.py b/kedro-datasets/tests/pickle/test_pickle_dataset.py
index fb95681a3..c444bdf49 100644
--- a/kedro-datasets/tests/pickle/test_pickle_dataset.py
+++ b/kedro-datasets/tests/pickle/test_pickle_dataset.py
@@ -6,8 +6,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version
 from pandas.testing import assert_frame_equal
 from s3fs.core import S3FileSystem
 
diff --git a/kedro-datasets/tests/pillow/test_image_dataset.py b/kedro-datasets/tests/pillow/test_image_dataset.py
index ea500b20d..2575065cd 100644
--- a/kedro-datasets/tests/pillow/test_image_dataset.py
+++ b/kedro-datasets/tests/pillow/test_image_dataset.py
@@ -4,8 +4,8 @@
 import pytest
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version, generate_timestamp
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version, generate_timestamp
 from PIL import Image, ImageChops
 from s3fs.core import S3FileSystem
 
diff --git a/kedro-datasets/tests/plotly/test_json_dataset.py b/kedro-datasets/tests/plotly/test_json_dataset.py
index ab6e17d9c..328fa5d88 100644
--- a/kedro-datasets/tests/plotly/test_json_dataset.py
+++ b/kedro-datasets/tests/plotly/test_json_dataset.py
@@ -6,8 +6,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER
 from s3fs.core import S3FileSystem
 
 from kedro_datasets.plotly import JSONDataSet
diff --git a/kedro-datasets/tests/plotly/test_plotly_dataset.py b/kedro-datasets/tests/plotly/test_plotly_dataset.py
index a422060e8..254a1dc7f 100644
--- a/kedro-datasets/tests/plotly/test_plotly_dataset.py
+++ b/kedro-datasets/tests/plotly/test_plotly_dataset.py
@@ -6,8 +6,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER
 from plotly import graph_objects
 from plotly.graph_objs import Scatter
 from s3fs.core import S3FileSystem
diff --git a/kedro-datasets/tests/polars/test_csv_dataset.py b/kedro-datasets/tests/polars/test_csv_dataset.py
index 8b05a2025..46f23c9af 100644
--- a/kedro-datasets/tests/polars/test_csv_dataset.py
+++ b/kedro-datasets/tests/polars/test_csv_dataset.py
@@ -10,8 +10,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version, generate_timestamp
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version, generate_timestamp
 from moto import mock_s3
 from polars.testing import assert_frame_equal
 from s3fs.core import S3FileSystem
diff --git a/kedro-datasets/tests/redis/test_redis_dataset.py b/kedro-datasets/tests/redis/test_redis_dataset.py
index eaa8abbd2..ecc30af18 100644
--- a/kedro-datasets/tests/redis/test_redis_dataset.py
+++ b/kedro-datasets/tests/redis/test_redis_dataset.py
@@ -7,7 +7,7 @@
 import pandas as pd
 import pytest
 import redis
-from kedro.io import DataSetError
+from kedro_datasets.io import DataSetError
 from pandas.testing import assert_frame_equal
 
 from kedro_datasets.redis import PickleDataSet
diff --git a/kedro-datasets/tests/snowflake/test_snowpark_dataset.py b/kedro-datasets/tests/snowflake/test_snowpark_dataset.py
index 2133953b5..929e849e1 100644
--- a/kedro-datasets/tests/snowflake/test_snowpark_dataset.py
+++ b/kedro-datasets/tests/snowflake/test_snowpark_dataset.py
@@ -2,7 +2,7 @@
 import os
 
 import pytest
-from kedro.io import DataSetError
+from kedro_datasets.io import DataSetError
 
 try:
     import snowflake.snowpark as sp
diff --git a/kedro-datasets/tests/spark/test_deltatable_dataset.py b/kedro-datasets/tests/spark/test_deltatable_dataset.py
index 5cbbe62b7..651c2c876 100644
--- a/kedro-datasets/tests/spark/test_deltatable_dataset.py
+++ b/kedro-datasets/tests/spark/test_deltatable_dataset.py
@@ -1,6 +1,7 @@
 import pytest
 from delta import DeltaTable
-from kedro.io import DataCatalog, DataSetError
+from kedro.io import DataCatalog
+from kedro_datasets.io import DataSetError
 from kedro.pipeline import node
 from kedro.pipeline.modular_pipeline import pipeline as modular_pipeline
 from kedro.runner import ParallelRunner
diff --git a/kedro-datasets/tests/spark/test_spark_dataset.py b/kedro-datasets/tests/spark/test_spark_dataset.py
index 74c5ee2bf..1c45e37e5 100644
--- a/kedro-datasets/tests/spark/test_spark_dataset.py
+++ b/kedro-datasets/tests/spark/test_spark_dataset.py
@@ -7,8 +7,9 @@
 import boto3
 import pandas as pd
 import pytest
-from kedro.io import DataCatalog, DataSetError, Version
-from kedro.io.core import generate_timestamp
+from kedro.io import DataCatalog
+from kedro_datasets.io import DataSetError, Version
+from kedro_datasets.io.core import generate_timestamp
 from kedro.pipeline import node
 from kedro.pipeline.modular_pipeline import pipeline as modular_pipeline
 from kedro.runner import ParallelRunner, SequentialRunner
diff --git a/kedro-datasets/tests/spark/test_spark_hive_dataset.py b/kedro-datasets/tests/spark/test_spark_hive_dataset.py
index e0b8fc333..47cbc41a1 100644
--- a/kedro-datasets/tests/spark/test_spark_hive_dataset.py
+++ b/kedro-datasets/tests/spark/test_spark_hive_dataset.py
@@ -4,7 +4,7 @@
 from tempfile import TemporaryDirectory
 
 import pytest
-from kedro.io import DataSetError
+from kedro_datasets.io import DataSetError
 from psutil import Popen
 from pyspark import SparkContext
 from pyspark.sql import SparkSession
diff --git a/kedro-datasets/tests/spark/test_spark_jdbc_dataset.py b/kedro-datasets/tests/spark/test_spark_jdbc_dataset.py
index 0f3d0e66b..f8e0949e0 100644
--- a/kedro-datasets/tests/spark/test_spark_jdbc_dataset.py
+++ b/kedro-datasets/tests/spark/test_spark_jdbc_dataset.py
@@ -1,7 +1,7 @@
 from unittest import mock
 
 import pytest
-from kedro.io import DataSetError
+from kedro_datasets.io import DataSetError
 
 from kedro_datasets.spark import SparkJDBCDataSet
 
diff --git a/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py b/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py
index 26d421853..12928efb2 100644
--- a/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py
+++ b/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py
@@ -6,8 +6,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version
 from s3fs import S3FileSystem
 
 
diff --git a/kedro-datasets/tests/text/test_text_dataset.py b/kedro-datasets/tests/text/test_text_dataset.py
index 733cc6c1f..8b09b17ad 100644
--- a/kedro-datasets/tests/text/test_text_dataset.py
+++ b/kedro-datasets/tests/text/test_text_dataset.py
@@ -4,8 +4,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version
 from s3fs.core import S3FileSystem
 
 from kedro_datasets.text import TextDataSet
diff --git a/kedro-datasets/tests/tracking/test_json_dataset.py b/kedro-datasets/tests/tracking/test_json_dataset.py
index 62172b1a4..7c1a87924 100644
--- a/kedro-datasets/tests/tracking/test_json_dataset.py
+++ b/kedro-datasets/tests/tracking/test_json_dataset.py
@@ -4,8 +4,8 @@
 import pytest
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version
 from s3fs.core import S3FileSystem
 
 from kedro_datasets.tracking import JSONDataSet
diff --git a/kedro-datasets/tests/tracking/test_metrics_dataset.py b/kedro-datasets/tests/tracking/test_metrics_dataset.py
index 2c1157de9..8e9c6796c 100644
--- a/kedro-datasets/tests/tracking/test_metrics_dataset.py
+++ b/kedro-datasets/tests/tracking/test_metrics_dataset.py
@@ -4,8 +4,8 @@
 import pytest
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version
 from s3fs.core import S3FileSystem
 
 from kedro_datasets.tracking import MetricsDataSet
diff --git a/kedro-datasets/tests/video/test_video_dataset.py b/kedro-datasets/tests/video/test_video_dataset.py
index 1ac3d1ce4..c20c5aa65 100644
--- a/kedro-datasets/tests/video/test_video_dataset.py
+++ b/kedro-datasets/tests/video/test_video_dataset.py
@@ -1,6 +1,6 @@
 import boto3
 import pytest
-from kedro.io import DataSetError
+from kedro_datasets.io import DataSetError
 from moto import mock_s3
 from utils import TEST_FPS, assert_videos_equal
 
diff --git a/kedro-datasets/tests/yaml/test_yaml_dataset.py b/kedro-datasets/tests/yaml/test_yaml_dataset.py
index 653606c17..00a72da1d 100644
--- a/kedro-datasets/tests/yaml/test_yaml_dataset.py
+++ b/kedro-datasets/tests/yaml/test_yaml_dataset.py
@@ -5,8 +5,8 @@
 from fsspec.implementations.http import HTTPFileSystem
 from fsspec.implementations.local import LocalFileSystem
 from gcsfs import GCSFileSystem
-from kedro.io import DataSetError
-from kedro.io.core import PROTOCOL_DELIMITER, Version
+from kedro_datasets.io import DataSetError
+from kedro_datasets.io.core import PROTOCOL_DELIMITER, Version
 from pandas.testing import assert_frame_equal
 from s3fs.core import S3FileSystem
 

From 389d34e634489c9c237b9b03a387840a71c76b7b Mon Sep 17 00:00:00 2001
From: brandonmeek <bpmeek.developer@gmail.com>
Date: Thu, 9 Mar 2023 17:07:06 -0600
Subject: [PATCH 43/44] Removed `kedro~=0.18.4` as requirement, added
 `cachetools~=5.3`

---
 kedro-datasets/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kedro-datasets/requirements.txt b/kedro-datasets/requirements.txt
index b5edbb617..5e7b5e454 100644
--- a/kedro-datasets/requirements.txt
+++ b/kedro-datasets/requirements.txt
@@ -1 +1 @@
-kedro~=0.18.4
+cachetools~=5.3
\ No newline at end of file

From 6cc3b099081148b7db82bb52bd0dd6e33df6170d Mon Sep 17 00:00:00 2001
From: brandonmeek <bpmeek.developer@gmail.com>
Date: Thu, 9 Mar 2023 17:18:45 -0600
Subject: [PATCH 44/44] Updated RELEASE.md to reflect changes

---
 kedro-datasets/RELEASE.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
index 3e108e7f4..640244c8e 100644
--- a/kedro-datasets/RELEASE.md
+++ b/kedro-datasets/RELEASE.md
@@ -11,6 +11,10 @@
 | `polars.CSVDataSet` | A `CSVDataSet` backed by [polars](https://www.pola.rs/), a lighting fast dataframe package built entirely using Rust. | `kedro_datasets.polars` |
 | `snowflake.SnowparkTableDataSet` | Work with [Snowpark](https://www.snowflake.com/en/data-cloud/snowpark/) DataFrames from tables in Snowflake. | `kedro_datasets.snowflake` |
 
+* Patched `kedro.io.core.py` and `kedro.utils` to `io.core.py` and `io.core.utils` respectively, 
+allowing for implementations of `AbstractDataSet` and `AbstractVersionedDataSet` to be shared with 
+and used by non-Kedro users.
+
 ## Bug fixes and other changes
 * Add `mssql` backend to the `SQLQueryDataSet` DataSet using `pyodbc` library.