From f135dd94823b1712d69e1a43803f0ab7075fa031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Mon, 22 May 2023 09:09:12 +0200 Subject: [PATCH] Consolidate all requirements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix gh-2588. Signed-off-by: Juan Luis Cano Rodríguez --- .circleci/continue_config.yml | 12 ++--- .gitpod.yml | 3 +- .readthedocs.yml | 2 +- MANIFEST.in | 2 - Makefile | 2 +- dependency/requirements.txt | 24 ---------- features/windows_reqs.txt | 2 +- pyproject.toml | 29 +++++++++++- setup.py | 84 ++++++++++++++++++++++++++++++----- test_requirements.txt | 64 -------------------------- 10 files changed, 109 insertions(+), 115 deletions(-) delete mode 100644 dependency/requirements.txt delete mode 100644 test_requirements.txt diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index cedab70234..c83d6615cb 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -61,7 +61,7 @@ commands: command: conda install -c conda-forge pytables -y - run: name: Install requirements and test requirements - command: pip install --upgrade -r test_requirements.txt + command: pip install --upgrade .[test] - run: # this is needed to fix java cacerts so # spark can automatically download packages from mvn @@ -146,7 +146,7 @@ commands: steps: - restore_cache: name: Restore package cache - key: kedro-deps-v1-win-{{ checksum "dependency/requirements.txt" }}-{{ checksum "test_requirements.txt" }} + key: kedro-deps-v1-win-{{ checksum "pyproject.toml" }}-{{ checksum "setup.py" }} # We don't restore the conda environment cache for python 3.10 as it conflicts with the # 'Install GDAL, Fiona and pytables' step breaking the conda environment (missing zlib.dll). - unless: @@ -155,7 +155,7 @@ commands: steps: - restore_cache: name: Restore conda environment cache - key: kedro-deps-v1-win-<>-{{ checksum "dependency/requirements.txt" }}-{{ checksum "test_requirements.txt" }} + key: kedro-deps-v1-win-<>-{{ checksum "pyproject.toml" }}-{{ checksum "setup.py" }} # pytables and Fiona have a series of binary dependencies under Windows that # are best handled by conda-installing instead of pip-installing them. # Dependency resolution works best when installing these altogether in one @@ -168,7 +168,7 @@ commands: command: conda activate kedro_builder; pip debug --verbose - run: name: Install all requirements - command: conda activate kedro_builder; pip install -v -r test_requirements.txt -U + command: conda activate kedro_builder; pip install -v -U .[test] - run: name: Print Python environment command: conda activate kedro_builder; make print-python-env @@ -337,7 +337,7 @@ jobs: steps: - save_cache: name: Save Python package cache - key: kedro-deps-v1-win-{{ checksum "dependency/requirements.txt" }}-{{ checksum "test_requirements.txt" }} + key: kedro-deps-v1-win-{{ checksum "pyproject.toml" }}-{{ checksum "setup.py" }} paths: # Cache pip cache and conda packages directories - c:\tools\miniconda3\pkgs @@ -350,7 +350,7 @@ jobs: steps: - save_cache: name: Save conda environment cache - key: kedro-deps-v1-win-<>-{{ checksum "dependency/requirements.txt" }}-{{ checksum "test_requirements.txt" }} + key: kedro-deps-v1-win-<>-{{ checksum "pyproject.toml" }}-{{ checksum "setup.py" }} paths: - c:\tools\miniconda3\envs\kedro_builder - run: diff --git a/.gitpod.yml b/.gitpod.yml index c2ec591de9..6fe5e8e825 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -5,10 +5,9 @@ tasks: init: | make sign-off - pip install -e /workspace/kedro + pip install -e /workspace/kedro[test] cd /workspace yes project | kedro new -s pandas-iris --checkout main - pip install -r /workspace/kedro/test_requirements.txt cd /workspace/kedro pre-commit install --install-hooks diff --git a/.readthedocs.yml b/.readthedocs.yml index 2df6853225..2435aac483 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -36,4 +36,4 @@ python: path: . extra_requirements: - docs - - requirements: test_requirements.txt + - test diff --git a/MANIFEST.in b/MANIFEST.in index 245671a5e0..ad41ac26a3 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,5 @@ include README.md include LICENSE.md -include dependency/requirements.txt -include test_requirements.txt include kedro/framework/project/default_logging.yml include kedro/ipython/*.png include kedro/ipython/*.svg diff --git a/Makefile b/Makefile index e680e12620..5a1e85b558 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,7 @@ package: clean install python -m pip install build && python -m build install-test-requirements: - pip install -r test_requirements.txt + pip install .[test] install-pre-commit: install-test-requirements pre-commit install --install-hooks diff --git a/dependency/requirements.txt b/dependency/requirements.txt deleted file mode 100644 index 14b8e2f244..0000000000 --- a/dependency/requirements.txt +++ /dev/null @@ -1,24 +0,0 @@ -anyconfig~=0.10.0 -attrs>=21.3 -build -cachetools~=5.3 -click<9.0 -cookiecutter>=2.1.1, <3.0 -dynaconf>=3.1.2, <4.0 -fsspec>=2021.4, <2024.1 # Upper bound set arbitrarily, to be reassessed in early 2024 -gitpython~=3.0 -importlib-metadata>=3.6; python_version >= '3.8' -importlib_metadata>=3.6, <5.0; python_version < '3.8' # The "selectable" entry points were introduced in `importlib_metadata` 3.6 and Python 3.10. Bandit on Python 3.7 relies on a library with `importlib_metadata` < 5.0 -importlib_resources>=1.3 # The `files()` API was introduced in `importlib_resources` 1.3 and Python 3.9. -jmespath>=0.9.5, <1.0 -more_itertools~=9.0 -omegaconf~=2.3 -parse~=1.19.0 -pip-tools~=6.5 -pluggy~=1.0 -PyYAML>=4.2, <7.0 -rich>=12.0, <14.0 -rope>=0.21, <2.0 # subject to LGPLv3 license -setuptools>=65.5.1 -toml~=0.10 -toposort~=1.5 # Needs to be at least 1.5 to be able to raise CircularDependencyError diff --git a/features/windows_reqs.txt b/features/windows_reqs.txt index c41bf77a4d..5a5fbe8649 100644 --- a/features/windows_reqs.txt +++ b/features/windows_reqs.txt @@ -1,4 +1,4 @@ -# same versions as `test_requirements` +# same versions as [test] optional requirements # e2e tests on Windows are slow but we don't need to install # everything, so just this subset will be enough for CI behave==1.2.6 diff --git a/pyproject.toml b/pyproject.toml index 0421b726c7..bb89d25312 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,32 @@ authors = [ ] description = "Kedro helps you build production-ready data and analytics pipelines" requires-python = ">=3.7" +dependencies = [ + "anyconfig~=0.10.0", + "attrs>=21.3", + "build", + "cachetools~=5.3", + "click<9.0", + "cookiecutter>=2.1.1, <3.0", + "dynaconf>=3.1.2, <4.0", + "fsspec>=2021.4, <2024.1", # Upper bound set arbitrarily, to be reassessed in early 2024 + "gitpython~=3.0", + "importlib-metadata>=3.6; python_version >= '3.8'", + "importlib_metadata>=3.6, <5.0; python_version < '3.8'", # The "selectable" entry points were introduced in `importlib_metadata` 3.6 and Python 3.10. Bandit on Python 3.7 relies on a library with `importlib_metadata` < 5.0 + "importlib_resources>=1.3", # The `files()` API was introduced in `importlib_resources` 1.3 and Python 3.9. + "jmespath>=0.9.5, <1.0", + "more_itertools~=9.0", + "omegaconf~=2.3", + "parse~=1.19.0", + "pip-tools~=6.5", + "pluggy~=1.0", + "PyYAML>=4.2, <7.0", + "rich>=12.0, <14.0", + "rope>=0.21, <2.0", # subject to LGPLv3 license + "setuptools>=65.5.1", + "toml~=0.10", + "toposort~=1.5", # Needs to be at least 1.5 to be able to raise CircularDependencyError +] keywords = [ "pipelines", "machine learning", @@ -26,7 +52,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", ] -dynamic = ["readme", "version", "dependencies", "optional-dependencies"] +dynamic = ["readme", "version", "optional-dependencies"] [project.urls] Homepage = "https://kedro.org" @@ -46,7 +72,6 @@ include = ["kedro*"] [tool.setuptools.dynamic] readme = {file = "README.md", content-type = "text/markdown"} version = {attr = "kedro.__version__"} -dependencies = {file = "dependency/requirements.txt"} [tool.black] exclude = "/templates/|^features/steps/test_starter" diff --git a/setup.py b/setup.py index 92eca04e5a..4463110d97 100644 --- a/setup.py +++ b/setup.py @@ -1,23 +1,14 @@ -from codecs import open from glob import glob from itertools import chain -from os import path from setuptools import setup -name = "kedro" -here = path.abspath(path.dirname(__file__)) - # at least 1.3 to be able to use XMLDataSet and pandas integration with fsspec PANDAS = "pandas~=1.3" SPARK = "pyspark>=2.2, <4.0" HDFS = "hdfs>=2.5.8, <3.0" S3FS = "s3fs>=0.3.0, <0.5" -# get the dependencies and installs -with open("dependency/requirements.txt", encoding="utf-8") as f: - requires = [x.strip() for x in f if x.strip()] - template_files = [] for pattern in ["**/*", "**/.*", "**/.*/**", "**/.*/.**"]: template_files.extend( @@ -59,7 +50,7 @@ def _collect_requirements(requires): "pandas.XMLDataSet": [PANDAS, "lxml~=4.6"], "pandas.GenericDataSet": [PANDAS], } -pickle_require = {"pickle.PickleDataSet": ["compress-pickle[lz4]~=2.1.0"]} +pickle_require = {"pickle.PickleDataSet": ["compress-pickle[lz4]~=1.2.0"]} pillow_require = {"pillow.ImageDataSet": ["Pillow~=9.0"]} video_require = { "video.VideoDataSet": ["opencv-python~=4.5.5.64"] @@ -80,7 +71,9 @@ def _collect_requirements(requires): "tensorflow.TensorflowModelDataset": [ # currently only TensorFlow V2 supported for saving and loading. # V1 requires HDF5 and serialises differently - "tensorflow~=2.0" + "tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'", + # https://developer.apple.com/metal/tensorflow-plugin/ + "tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'", ] } yaml_require = {"yaml.YAMLDataSet": [PANDAS, "PyYAML>=4.2, <7.0"]} @@ -139,10 +132,77 @@ def _collect_requirements(requires): } extras_require["all"] = _collect_requirements(extras_require) +extras_require["test"] = [ + "adlfs>=2021.7.1, <=2022.2; python_version == '3.7'", + "adlfs~=2023.1; python_version >= '3.8'", + "bandit>=1.6.2, <2.0", + "behave==1.2.6", + "biopython~=1.73", + "blacken-docs==1.9.2", + "black~=22.0", + "compress-pickle[lz4]~=1.2.0", + "coverage[toml]", + "dask[complete]~=2021.10", # pinned by Snyk to avoid a vulnerability + "delta-spark~=1.2.1", # 1.2.0 has a bug that breaks some of our tests: https://github.com/delta-io/delta/issues/1070 + "dill~=0.3.1", + "filelock>=3.4.0, <4.0", + "gcsfs>=2021.4, <=2023.1; python_version == '3.7'", + "gcsfs>=2023.1, <2023.3; python_version >= '3.8'", + "geopandas>=0.6.0, <1.0", + "hdfs>=2.5.8, <3.0", + "holoviews~=1.13.0", + "import-linter[toml]==1.8.0", + "ipython>=7.31.1, <8.0; python_version < '3.8'", + "ipython~=8.10; python_version >= '3.8'", + "isort~=5.0", + "Jinja2<3.1.0", + "joblib>=0.14", + "jupyterlab_server>=2.11.1, <2.16.0", # 2.16.0 requires importlib_metedata >= 4.8.3 which conflicts with flake8 requirement + "jupyterlab~=3.0, <3.6.0", # 3.6.0 requires jupyterlab_server~=2.19 + "jupyter~=1.0", + "lxml~=4.6", + "matplotlib>=3.0.3, <3.4; python_version < '3.10'", # 3.4.0 breaks holoviews + "matplotlib>=3.5, <3.6; python_version == '3.10'", + "memory_profiler>=0.50.0, <1.0", + "moto==1.3.7; python_version < '3.10'", + "moto==3.0.4; python_version == '3.10'", + "networkx~=2.4", + "opencv-python~=4.5.5.64", + "openpyxl>=3.0.3, <4.0", + "pandas-gbq>=0.12.0, <0.18.0", + "pandas~=1.3 # 1.3 for read_xml/to_xml", + "Pillow~=9.0", + "plotly>=4.8.0, <6.0", + "pre-commit>=2.9.2, <3.0", # The hook `mypy` requires pre-commit version 2.9.2. + "psutil~=5.8", + "pyarrow>=1.0, <7.0", + "pylint>=2.17.0, <3.0", + "pyproj~=3.0", + "pyspark>=2.2, <4.0", + "pytest-cov~=3.0", + "pytest-mock>=1.7.1, <2.0", + "pytest-xdist[psutil]~=2.2.1", + "pytest~=7.2", + "redis~=4.1", + "requests-mock~=1.6", + "requests~=2.20", + "s3fs>=0.3.0, <0.5", # Needs to be at least 0.3.0 to make use of `cachable` attribute on S3FileSystem. + "scikit-learn~=1.0.2", + "scipy~=1.7.3", + "SQLAlchemy~=1.2", + "tables~=3.6.0; platform_system == 'Windows' and python_version<'3.9'", + "tables~=3.6; platform_system != 'Windows'", + "tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'", + # https://developer.apple.com/metal/tensorflow-plugin/ + "tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'", + "triad>=0.6.7, <1.0", + "trufflehog~=2.1", + "xlsxwriter~=1.0", +] setup( package_data={ - name: ["py.typed", "test_requirements.txt"] + template_files + "kedro": ["py.typed"] + template_files }, extras_require=extras_require, ) diff --git a/test_requirements.txt b/test_requirements.txt deleted file mode 100644 index 5c81ebdc89..0000000000 --- a/test_requirements.txt +++ /dev/null @@ -1,64 +0,0 @@ --r dependency/requirements.txt -adlfs>=2021.7.1, <=2022.2; python_version == '3.7' -adlfs~=2023.1; python_version >= '3.8' -bandit>=1.6.2, <2.0 -behave==1.2.6 -biopython~=1.73 -blacken-docs==1.9.2 -black~=22.0 -compress-pickle[lz4]~=1.2.0 -coverage[toml] -dask[complete]~=2021.10 # pinned by Snyk to avoid a vulnerability -delta-spark~=1.2.1 # 1.2.0 has a bug that breaks some of our tests: https://github.com/delta-io/delta/issues/1070 -dill~=0.3.1 -filelock>=3.4.0, <4.0 -gcsfs>=2021.4, <=2023.1; python_version == '3.7' -gcsfs>=2023.1, <2023.3; python_version >= '3.8' -geopandas>=0.6.0, <1.0 -hdfs>=2.5.8, <3.0 -holoviews~=1.13.0 -import-linter[toml]==1.8.0 -ipython>=7.31.1, <8.0; python_version < '3.8' -ipython~=8.10; python_version >= '3.8' -isort~=5.0 -Jinja2<3.1.0 -joblib>=0.14 -jupyterlab_server>=2.11.1, <2.16.0 # 2.16.0 requires importlib_metedata >= 4.8.3 which conflicts with flake8 requirement -jupyterlab~=3.0, <3.6.0 # 3.6.0 requires jupyterlab_server~=2.19 -jupyter~=1.0 -lxml~=4.6 -matplotlib>=3.0.3, <3.4; python_version < '3.10' # 3.4.0 breaks holoviews -matplotlib>=3.5, <3.6; python_version == '3.10' -memory_profiler>=0.50.0, <1.0 -moto==1.3.7; python_version < '3.10' -moto==3.0.4; python_version == '3.10' -networkx~=2.4 -opencv-python~=4.5.5.64 -openpyxl>=3.0.3, <4.0 -pandas-gbq>=0.12.0, <0.18.0 -pandas~=1.3 # 1.3 for read_xml/to_xml -Pillow~=9.0 -plotly>=4.8.0, <6.0 -pre-commit>=2.9.2, <3.0 # The hook `mypy` requires pre-commit version 2.9.2. -psutil~=5.8 -pyarrow>=1.0, <7.0 -pylint>=2.17.0, <3.0 -pyproj~=3.0 -pyspark>=2.2, <4.0 -pytest-cov~=3.0 -pytest-mock>=1.7.1, <2.0 -pytest-xdist[psutil]~=2.2.1 -pytest~=7.2 -redis~=4.1 -requests-mock~=1.6 -requests~=2.20 -s3fs>=0.3.0, <0.5 # Needs to be at least 0.3.0 to make use of `cachable` attribute on S3FileSystem. -scikit-learn~=1.0.2 -scipy~=1.7.3 -SQLAlchemy~=1.2 -tables~=3.6.0; platform_system == "Windows" and python_version<'3.9' -tables~=3.6; platform_system != "Windows" -tensorflow~=2.0 -triad>=0.6.7, <1.0 -trufflehog~=2.1 -xlsxwriter~=1.0