Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the viz add-ons to kedro new #3228

Merged
merged 43 commits into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
d1c9c45
adapt pyspark add-ons
noklam Oct 25, 2023
8a2afb3
Merge branch 'develop' into noklam/viz-add-on
noklam Oct 26, 2023
ee92c57
fix promopt
noklam Oct 27, 2023
be2561b
fix minor stuff in prompt
noklam Oct 27, 2023
5fb695c
refactor utils and fix template switch to starters
SajidAlamQB Oct 30, 2023
32dab55
Merge branch 'develop' into noklam/viz-add-on
SajidAlamQB Oct 30, 2023
75bb8a3
lint
SajidAlamQB Oct 30, 2023
743c46d
Merge branch 'noklam/viz-add-on' of https://github.com/kedro-org/kedr…
SajidAlamQB Oct 30, 2023
c8df0ed
Refactor utils.py use toml and unstrip pyproject.toml and requirement…
SajidAlamQB Oct 31, 2023
6e01176
Merge branch 'develop' into noklam/viz-add-on
noklam Oct 31, 2023
52442d5
fix broken test missing options 7
noklam Oct 31, 2023
18e48e7
add more test
noklam Oct 31, 2023
f690907
fix test
noklam Oct 31, 2023
3977045
attempt to fix tests
SajidAlamQB Oct 31, 2023
37d2ef9
fix test
noklam Oct 31, 2023
2a5ed92
remove reporting when viz selected
SajidAlamQB Oct 31, 2023
46d6365
update tests for viz add-on
SajidAlamQB Oct 31, 2023
876839b
Merge branch 'develop' into noklam/viz-add-on
SajidAlamQB Oct 31, 2023
bda5563
remove reporting.yml and update tests
SajidAlamQB Oct 31, 2023
506e007
Merge branch 'develop' into noklam/viz-add-on
SajidAlamQB Oct 31, 2023
aeaa585
Merge branch 'noklam/viz-add-on' of https://github.com/kedro-org/kedr…
noklam Oct 31, 2023
0bc9094
add comment to _get_expected_files number counts
SajidAlamQB Nov 1, 2023
78419fa
Update tests to parse pyprojec.toml (#3230)
noklam Nov 1, 2023
8a7d811
Add .gitkeep for conf/local/
SajidAlamQB Nov 1, 2023
049f83e
Fix tests
SajidAlamQB Nov 1, 2023
cb8d42a
add comments for more context
SajidAlamQB Nov 1, 2023
c1f4bfa
Merge branch 'develop' into noklam/viz-add-on
SajidAlamQB Nov 1, 2023
a1255bc
More descriptive variable names
SajidAlamQB Nov 1, 2023
5cec43e
changes based on review - use patterns to remove parameter files
SajidAlamQB Nov 2, 2023
fc511a5
Merge branch 'develop' into noklam/viz-add-on
SajidAlamQB Nov 2, 2023
a42e0a0
changes based on review cleaner template switching with more comments
SajidAlamQB Nov 2, 2023
5f5d3fb
Merge branch 'develop' into noklam/viz-add-on
SajidAlamQB Nov 2, 2023
2a49de0
add full stop to comment (nit)
SajidAlamQB Nov 2, 2023
2f456de
Merge branch 'noklam/viz-add-on' of https://github.com/kedro-org/kedr…
SajidAlamQB Nov 2, 2023
13d19d5
Merge branch 'develop' into noklam/viz-add-on
noklam Nov 3, 2023
57a8940
Replace anyconfig with toml for tests
noklam Nov 3, 2023
5386292
add docstrings
noklam Nov 3, 2023
406ab93
add docstrings to helper methods and make them private
SajidAlamQB Nov 3, 2023
56b758f
update docstring
SajidAlamQB Nov 3, 2023
158b996
changes based on review
SajidAlamQB Nov 3, 2023
a79e0f4
Merge branch 'develop' into noklam/viz-add-on
noklam Nov 3, 2023
b206bce
lint
noklam Nov 3, 2023
17ffa53
add viz into the dict constants
noklam Nov 3, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 38 additions & 15 deletions kedro/framework/cli/starters.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ class KedroStarterSpec: # noqa: too-few-public-methods
3) Custom Logging: Provides more logging options\n
4) Documentation: Basic documentation setup with Sphinx\n
5) Data Structure: Provides a directory structure for storing data\n
6) Pyspark: Provides a basic PySpark set up\n
6) Pyspark: Provides set up configuration for working with PySpark\n
7) Kedro Viz: Provides Kedro's native visualisation tool \n

Example usage:\n
kedro new --addons=lint,test,log,docs,data,pyspark (or any subset of these options)\n
Expand All @@ -123,6 +124,7 @@ class KedroStarterSpec: # noqa: too-few-public-methods
"4": "Documentation",
"5": "Data Structure",
"6": "Pyspark",
"7": "Kedro Viz",
}

NAME_ARG_HELP = "The name of your new Kedro project."
Expand Down Expand Up @@ -214,7 +216,7 @@ def _validate_range(start, end):
def _validate_selection(add_ons: list[str]):
for add_on in add_ons:
if int(add_on) < 1 or int(add_on) > len(ADD_ONS_DICT):
message = f"'{add_on}' is not a valid selection.\nPlease select from the available add-ons: 1, 2, 3, 4, 5, 6." # nosec
message = f"'{add_on}' is not a valid selection.\nPlease select from the available add-ons: 1, 2, 3, 4, 5, 6, 7." # nosec
click.secho(message, fg="red", err=True)
sys.exit(1)

Expand Down Expand Up @@ -331,7 +333,6 @@ def new( # noqa: too-many-arguments
)

cookiecutter_args = _make_cookiecutter_args(config, checkout, directory)

project_template = fetch_template_based_on_add_ons(template_path, cookiecutter_args)

_create_project(project_template, cookiecutter_args)
Expand Down Expand Up @@ -435,6 +436,7 @@ def _get_addons_from_cli_input(selected_addons: str) -> str:
"docs": "4",
"data": "5",
"pyspark": "6",
"viz": "7",
}

if selected_addons is not None:
Expand Down Expand Up @@ -466,14 +468,24 @@ def _select_prompts_to_display(
Returns:
the prompts_required dictionary, with all the redundant information removed.
"""
valid_addons = ["lint", "test", "log", "docs", "data", "pyspark", "all", "none"]
valid_addons = [
"lint",
"test",
"log",
"docs",
"data",
"pyspark",
"viz",
"all",
"none",
]

if selected_addons is not None:
addons = re.sub(r"\s", "", selected_addons).split(",")
for addon in addons:
if addon not in valid_addons:
click.secho(
"Please select from the available add-ons: lint, test, log, docs, data, pyspark, all, none",
"Please select from the available add-ons: lint, test, log, docs, data, pyspark, viz, all, none",
fg="red",
err=True,
)
Expand Down Expand Up @@ -579,11 +591,24 @@ def _make_cookiecutter_args(
def fetch_template_based_on_add_ons(template_path, cookiecutter_args: dict[str, Any]):
extra_context = cookiecutter_args["extra_context"]
add_ons = extra_context.get("add_ons")
if add_ons and "Pyspark" in add_ons:
cookiecutter_args["directory"] = "spaceflights-pyspark"
pyspark_path = "git+https://github.com/kedro-org/kedro-starters.git"
return pyspark_path
return template_path
starter_path = "git+https://github.com/kedro-org/kedro-starters.git"
if add_ons:
if "Pyspark" in add_ons and "Kedro Viz" in add_ons:
# Use the spaceflights-pyspark-viz starter if both Pyspark and Kedro Viz are chosen.
cookiecutter_args["directory"] = "spaceflights-pyspark-viz"
elif "Pyspark" in add_ons:
# Use the spaceflights-pyspark starter if only Pyspark is chosen.
cookiecutter_args["directory"] = "spaceflights-pyspark"
elif "Kedro Viz" in add_ons:
# Use the spaceflights-pandas-viz starter if only Kedro Viz is chosen.
cookiecutter_args["directory"] = "spaceflights-pandas-viz"
else:
# Use the default template path for any other combinations or if "none" is chosen.
starter_path = template_path
else:
# Use the default template path if add_ons is None, which can occur if there is no prompts.yml or its empty.
starter_path = template_path
return starter_path


def _create_project(template_path: str, cookiecutter_args: dict[str, Any]):
Expand Down Expand Up @@ -617,11 +642,9 @@ def _create_project(template_path: str, cookiecutter_args: dict[str, Any]):
)
add_ons = extra_context.get("add_ons")

# Only core template and spaceflights-pyspark have configurable add-ons
if (
template_path == str(TEMPLATE_PATH)
or add_ons is not None
and "Pyspark" in add_ons
# Only core template and spaceflight starters have configurable add-ons
if template_path == str(TEMPLATE_PATH) or (
add_ons and ("Pyspark" in add_ons or "Kedro Viz" in add_ons)
):
if add_ons == "[]": # TODO: This should be a list
click.secho("\nYou have selected no add-ons")
Expand Down
226 changes: 115 additions & 111 deletions kedro/templates/project/hooks/utils.py
Original file line number Diff line number Diff line change
@@ -1,133 +1,137 @@
from pathlib import Path
import shutil
import sys
import click
import toml

current_dir = Path.cwd()

lint_requirements = "black~=22.12.0\nruff~=0.0.290\n"
lint_pyproject_requirements = """
[tool.ruff]
select = [
"F", # Pyflakes
"E", # Pycodestyle
"W", # Pycodestyle
"UP", # pyupgrade
"I", # isort
"PL", # Pylint
]
ignore = ["E501"] # Black takes care of line-too-long
"""
lint_requirements = "black~=22.0\nruff~=0.0.290\n"
lint_pyproject_requirements = ["tool.ruff"]

test_requirements = "pytest-cov~=3.0\npytest-mock>=1.7.1, <2.0\npytest~=7.2"
test_pyproject_requirements = """
[tool.pytest.ini_options]
addopts = \"\"\"
--cov-report term-missing \\
--cov src/{{ cookiecutter.python_package }} -ra
\"\"\"

[tool.coverage.report]
fail_under = 0
show_missing = true
exclude_lines = ["pragma: no cover", "raise NotImplementedError"]
"""

docs_pyproject_requirements = """
[project.optional-dependencies]
docs = [
"docutils<0.18.0",
"sphinx~=3.4.3",
"sphinx_rtd_theme==0.5.1",
"nbsphinx==0.8.1",
"sphinx-autodoc-typehints==1.11.1",
"sphinx_copybutton==0.3.1",
"ipykernel>=5.3, <7.0",
"Jinja2<3.1.0",
"myst-parser~=0.17.2",
]
"""
test_pyproject_requirements = ["tool.pytest.ini_options", "tool.coverage.report"]

docs_pyproject_requirements = ["project.optional-dependencies"]

def setup_template_add_ons(selected_add_ons_list, requirements_file_path, pyproject_file_path, python_package_name):
"""Removes directories and files related to unwanted addons from
a Kedro project template. Adds the necessary requirements for
the addons that were selected.

Args:
selected_add_ons_list: a list containing numbers from 1 to 5,
representing specific add-ons.
requirements_file_path: the path to the requirements.txt file.
pyproject_file_path: the path to the pyproject.toml file
located on the the root of the template.
"""
if "Linting" not in selected_add_ons_list:
pass
# Helper Functions
def remove_from_file(file_path, content_to_remove):
with open(file_path, 'r') as file:
lines = file.readlines()

# Split the content to remove into lines and remove trailing whitespaces/newlines
content_to_remove_lines = [line.strip() for line in content_to_remove.split('\n')]

# Keep lines that are not in content_to_remove
lines = [line for line in lines if line.strip() not in content_to_remove_lines]

with open(file_path, 'w') as file:
file.writelines(lines)


def remove_nested_section(data, nested_key):
keys = nested_key.split('.')
current_data = data
# Look for Parent section
for key in keys[:-1]: # Iterate over all but last element
if key in current_data:
current_data = current_data[key]
else:
return # Parent section not found, nothing to remove

# Remove the nested section and any empty parent sections
current_data.pop(keys[-1], None) # Remove last element otherwise return None
for key in reversed(keys[:-1]):
parent_section = data
for key_part in keys[:keys.index(key)]:
parent_section = parent_section[key_part]
if not current_data: # If the section is empty, remove it
parent_section.pop(key, None)
current_data = parent_section
else:
break # If the section is not empty, stop removing


def remove_from_toml(file_path, sections_to_remove):
# Load the TOML file
with open(file_path, 'r') as file:
data = toml.load(file)

# Remove the specified sections
for section in sections_to_remove:
remove_nested_section(data, section)

with open(file_path, 'w') as file:
toml.dump(data, file)


def remove_dir(path):
if path.exists():
shutil.rmtree(str(path))


def remove_file(path):
if path.exists():
path.unlink()
SajidAlamQB marked this conversation as resolved.
Show resolved Hide resolved


def handle_starter_setup(selected_add_ons_list, python_package_name):
# Remove all .csv and .xlsx files from data/01_raw/
raw_data_path = current_dir / "data/01_raw/"
for file_path in raw_data_path.glob("*.*"):
if file_path.suffix in [".csv", ".xlsx"]:
file_path.unlink()

# Empty the contents of conf/base/catalog.yml
catalog_yml_path = current_dir / "conf/base/catalog.yml"
if catalog_yml_path.exists():
catalog_yml_path.write_text('')
# Remove parameter/reporting files from conf/base
SajidAlamQB marked this conversation as resolved.
Show resolved Hide resolved
conf_base_path = current_dir / "conf/base/"
parameter_file_patterns = ["parameters_*.yml", "parameters/*.yml"]
for pattern in parameter_file_patterns:
for param_file in conf_base_path.glob(pattern):
remove_file(param_file)

# Remove the pipelines subdirectories
if "Kedro Viz" in selected_add_ons_list: # Remove reporting if Kedro Viz is selected
pipelines_to_remove = ["data_science", "data_processing", "reporting"]
else:
with open(requirements_file_path, 'a') as file:
file.write(lint_requirements)
with open(pyproject_file_path, 'a') as file:
file.write(lint_pyproject_requirements)
pipelines_to_remove = ["data_science", "data_processing"]

pipelines_path = current_dir / f"src/{python_package_name}/pipelines/"
for pipeline_subdir in pipelines_to_remove:
remove_dir(pipelines_path / pipeline_subdir)

# Remove all test files from tests/pipelines/
test_pipeline_path = current_dir / "tests/pipelines/test_data_science.py"
remove_file(test_pipeline_path)


def setup_template_add_ons(selected_add_ons_list, requirements_file_path, pyproject_file_path, python_package_name):
if "Linting" not in selected_add_ons_list:
remove_from_file(requirements_file_path, lint_requirements)
remove_from_toml(pyproject_file_path, lint_pyproject_requirements)

if "Testing" not in selected_add_ons_list:
tests_path = current_dir / "tests"
if tests_path.exists():
shutil.rmtree(str(tests_path))
else:
with open(requirements_file_path, 'a') as file:
file.write(test_requirements)
with open(pyproject_file_path, 'a') as file:
file.write(test_pyproject_requirements)
remove_from_file(requirements_file_path, test_requirements)
remove_from_toml(pyproject_file_path, test_pyproject_requirements)
remove_dir(current_dir / "tests")

if "Logging" not in selected_add_ons_list:
logging_yml_path = current_dir / "conf/logging.yml"
if logging_yml_path.exists():
logging_yml_path.unlink()
remove_file(current_dir / "conf/logging.yml")

if "Documentation" not in selected_add_ons_list:
docs_path = current_dir / "docs"
if docs_path.exists():
shutil.rmtree(str(docs_path))
else:
with open(pyproject_file_path, 'a') as file:
file.write(docs_pyproject_requirements)
remove_from_toml(pyproject_file_path, docs_pyproject_requirements)
remove_dir(current_dir / "docs")

if "Data Structure" not in selected_add_ons_list:
data_path = current_dir / "data"
if data_path.exists():
shutil.rmtree(str(data_path))

if "Pyspark" not in selected_add_ons_list: # If PySpark not selected
pass
else: # Use spaceflights-pyspark to create pyspark template
# Remove all .csv and .xlsx files from data/01_raw/
raw_data_path = current_dir / "data/01_raw/"
if raw_data_path.exists() and raw_data_path.is_dir():
for file_path in raw_data_path.glob("*.*"):
if file_path.suffix in [".csv", ".xlsx"]:
file_path.unlink()

# Remove parameter files from conf/base/
param_files = [
"parameters_data_processing.yml",
"parameters_data_science.yml",
]
conf_base_path = current_dir / "conf/base/"
if conf_base_path.exists() and conf_base_path.is_dir():
for param_file in param_files:
file_path = conf_base_path / param_file
if file_path.exists():
file_path.unlink()

# Remove specific pipeline subdirectories
pipelines_path = current_dir / f"src/{python_package_name}/pipelines/"
for pipeline_subdir in ["data_science", "data_processing"]:
shutil.rmtree(pipelines_path / pipeline_subdir, ignore_errors=True)

# Remove all test file from tests/pipelines/
test_pipeline_path = current_dir / "tests/pipelines/test_data_science.py"
if test_pipeline_path.exists():
test_pipeline_path.unlink()
remove_dir(current_dir / "data")

if "Pyspark" in selected_add_ons_list:
handle_starter_setup(selected_add_ons_list, python_package_name)

if "Kedro Viz" in selected_add_ons_list:
handle_starter_setup(selected_add_ons_list, python_package_name)


def sort_requirements(requirements_file_path):
Expand Down
5 changes: 3 additions & 2 deletions kedro/templates/project/prompts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ add_ons:
3) Custom Logging : Provides more logging options
4) Documentation: Provides basic documentations setup with Sphinx
5) Data Structure: Provides a directory structure for storing data
6) PySpark : Provides a basic PySpark set up
6) Pyspark: Provides set up configuration for working with PySpark
7) Kedro Viz: Provides Kedro's native visualisation tool

Which add-ons would you like to include in your project? [1-6/1,3/all/none]:
Which add-ons would you like to include in your project? [1-7/1,3/all/none]:
regex_validator: "^(all|none|(\\d(,\\d)*|(\\d-\\d)))$"
error_message: |
Invalid input. Please select valid options for add-ons using comma-separated values, ranges, or 'all/none'.
Expand Down
Loading