Skip to content

Commit

Permalink
Add implementation of telemetry consent flag for kedro run (#3876)
Browse files Browse the repository at this point in the history
* Add base implementation of telemetry flag for kedro run

Signed-off-by: lrcouto <[email protected]>

* Update function on test

Signed-off-by: lrcouto <[email protected]>

* Lint

Signed-off-by: lrcouto <[email protected]>

* Update tests

Signed-off-by: lrcouto <[email protected]>

* Add tests for the new telemetry flag

Signed-off-by: lrcouto <[email protected]>

* Add release note

Signed-off-by: lrcouto <[email protected]>

* Make util functions private

Signed-off-by: lrcouto <[email protected]>

* Lint

Signed-off-by: lrcouto <[email protected]>

---------

Signed-off-by: lrcouto <[email protected]>
Signed-off-by: L. R. Couto <[email protected]>
  • Loading branch information
lrcouto authored May 22, 2024
1 parent a53500a commit 570f66a
Show file tree
Hide file tree
Showing 6 changed files with 157 additions and 52 deletions.
1 change: 1 addition & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Major features and improvements
* It is now possible to use Kedro without having `rich` installed.
* Added a `--telemetry` flag to `kedro run`, allowing consent to data usage to be granted or revoked at the same time the command is run.

## Bug fixes and other changes
* User defined catch-all dataset factory patterns now override the default pattern provided by the runner.
Expand Down
18 changes: 17 additions & 1 deletion kedro/framework/cli/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
from kedro.framework.cli.utils import (
_check_module_importable,
_config_file_callback,
_parse_yes_to_bool,
_split_load_versions,
_split_params,
_validate_input_with_regex_pattern,
call,
env_option,
forward_command,
Expand All @@ -22,7 +24,7 @@
from kedro.framework.project import settings
from kedro.framework.session import KedroSession
from kedro.framework.startup import ProjectMetadata
from kedro.utils import load_obj
from kedro.utils import _find_kedro_project, load_obj

NO_DEPENDENCY_MESSAGE = """{module} is not installed. Please make sure {module} is in
requirements.txt and run 'pip install -r requirements.txt'."""
Expand Down Expand Up @@ -59,6 +61,10 @@
INPUT_FILE_HELP = """Name of the requirements file to compile."""
OUTPUT_FILE_HELP = """Name of the file where compiled requirements should be stored."""
CONF_SOURCE_HELP = """Path of a directory where project configuration is stored."""
TELEMETRY_ARG_HELP = """Allow or not allow Kedro to collect usage analytics.
We cannot see nor store information contained into a Kedro project. Opt in with "yes"
and out with "no".
"""


@click.group(name="Kedro")
Expand Down Expand Up @@ -196,6 +202,7 @@ def package(metadata: ProjectMetadata) -> None:
help=PARAMS_ARG_HELP,
callback=_split_params,
)
@click.option("--telemetry", "-tc", "telemetry_consent", help=TELEMETRY_ARG_HELP)
def run( # noqa: PLR0913
tags: str,
env: str,
Expand All @@ -212,8 +219,17 @@ def run( # noqa: PLR0913
conf_source: str,
params: dict[str, Any],
namespace: str,
telemetry_consent: str,
) -> None:
"""Run the pipeline."""
if telemetry_consent is not None:
_validate_input_with_regex_pattern("yes_no", telemetry_consent)
telemetry_consent = "true" if _parse_yes_to_bool(telemetry_consent) else "false"

project_path = str(_find_kedro_project(Path.cwd()))

with open(project_path + "/.telemetry", "w") as telemetry_file:
telemetry_file.write("consent: " + telemetry_consent)

runner_obj = load_obj(runner or "SequentialRunner", "kedro.runner")
tuple_tags = tuple(tags)
Expand Down
47 changes: 6 additions & 41 deletions kedro/framework/cli/starters.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
KedroCliError,
_clean_pycache,
_get_entry_points,
_parse_yes_to_bool,
_safe_load_entry_point,
_validate_input_with_regex_pattern,
command_with_verbosity,
)

Expand Down Expand Up @@ -154,41 +156,6 @@ def _validate_flag_inputs(flag_inputs: dict[str, Any]) -> None:
)


def _validate_input_with_regex_pattern(pattern_name: str, input: str) -> None:
VALIDATION_PATTERNS = {
"yes_no": {
"regex": r"(?i)^\s*(y|yes|n|no)\s*$",
"error_message": f"'{input}' is an invalid value for example pipeline. It must contain only y, n, YES, or NO (case insensitive).",
},
"project_name": {
"regex": r"^[\w -]{2,}$",
"error_message": f"'{input}' is an invalid value for project name. It must contain only alphanumeric symbols, spaces, underscores and hyphens and be at least 2 characters long",
},
"tools": {
"regex": r"""^(
all|none| # A: "all" or "none" or
(\ *\d+ # B: any number of spaces followed by one or more digits
(\ *-\ *\d+)? # C: zero or one instances of: a hyphen followed by one or more digits, spaces allowed
(\ *,\ *\d+(\ *-\ *\d+)?)* # D: any number of instances of: a comma followed by B and C, spaces allowed
\ *)?) # E: zero or one instances of (B,C,D) as empty strings are also permissible
$""",
"error_message": f"'{input}' is an invalid value for project tools. Please select valid options for tools using comma-separated values, ranges, or 'all/none'.",
},
}

if not re.match(VALIDATION_PATTERNS[pattern_name]["regex"], input, flags=re.X):
click.secho(
VALIDATION_PATTERNS[pattern_name]["error_message"],
fg="red",
err=True,
)
sys.exit(1)


def _parse_yes_no_to_bool(value: str) -> Any:
return value.strip().lower() in ["y", "yes"] if value is not None else None


def _validate_selected_tools(selected_tools: str | None) -> None:
valid_tools = list(TOOLS_SHORTNAME_TO_NUMBER) + ["all", "none"]

Expand Down Expand Up @@ -353,9 +320,7 @@ def new( # noqa: PLR0913

if telemetry_consent is not None:
_validate_input_with_regex_pattern("yes_no", telemetry_consent)
telemetry_consent = (
"true" if _parse_yes_no_to_bool(telemetry_consent) else "false"
)
telemetry_consent = "true" if _parse_yes_to_bool(telemetry_consent) else "false"

_create_project(project_template, cookiecutter_args, telemetry_consent)

Expand Down Expand Up @@ -571,7 +536,7 @@ def _get_extra_context( # noqa: PLR0913
if project_name is not None:
extra_context["project_name"] = project_name
if example_pipeline is not None:
extra_context["example_pipeline"] = str(_parse_yes_no_to_bool(example_pipeline))
extra_context["example_pipeline"] = str(_parse_yes_to_bool(example_pipeline))

# set defaults for required fields, will be used mostly for starters
extra_context.setdefault("kedro_version", version)
Expand Down Expand Up @@ -669,7 +634,7 @@ def _fetch_validate_parse_config_from_file(

example_pipeline = config.get("example_pipeline", "no")
_validate_input_with_regex_pattern("yes_no", example_pipeline)
config["example_pipeline"] = str(_parse_yes_no_to_bool(example_pipeline))
config["example_pipeline"] = str(_parse_yes_to_bool(example_pipeline))

tools_short_names = config.get("tools", "none").lower()
_validate_selected_tools(tools_short_names)
Expand Down Expand Up @@ -722,7 +687,7 @@ def _fetch_validate_parse_config_from_user_prompts(
_validate_tool_selection(tools_numbers)
config["tools"] = _convert_tool_numbers_to_readable_names(tools_numbers)
if "example_pipeline" in config:
example_pipeline_bool = _parse_yes_no_to_bool(config["example_pipeline"])
example_pipeline_bool = _parse_yes_to_bool(config["example_pipeline"])
config["example_pipeline"] = str(example_pipeline_bool)

return config
Expand Down
35 changes: 35 additions & 0 deletions kedro/framework/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,3 +497,38 @@ def _split_load_versions(ctx: click.Context, param: Any, value: str) -> dict[str
load_versions_dict[load_version_list[0]] = load_version_list[1]

return load_versions_dict


def _validate_input_with_regex_pattern(pattern_name: str, input: str) -> None:
VALIDATION_PATTERNS = {
"yes_no": {
"regex": r"(?i)^\s*(y|yes|n|no)\s*$",
"error_message": f"'{input}' is an invalid value for example pipeline or telemetry consent. It must contain only y, n, YES, or NO (case insensitive).",
},
"project_name": {
"regex": r"^[\w -]{2,}$",
"error_message": f"'{input}' is an invalid value for project name. It must contain only alphanumeric symbols, spaces, underscores and hyphens and be at least 2 characters long",
},
"tools": {
"regex": r"""^(
all|none| # A: "all" or "none" or
(\ *\d+ # B: any number of spaces followed by one or more digits
(\ *-\ *\d+)? # C: zero or one instances of: a hyphen followed by one or more digits, spaces allowed
(\ *,\ *\d+(\ *-\ *\d+)?)* # D: any number of instances of: a comma followed by B and C, spaces allowed
\ *)?) # E: zero or one instances of (B,C,D) as empty strings are also permissible
$""",
"error_message": f"'{input}' is an invalid value for project tools. Please select valid options for tools using comma-separated values, ranges, or 'all/none'.",
},
}

if not re.match(VALIDATION_PATTERNS[pattern_name]["regex"], input, flags=re.X):
click.secho(
VALIDATION_PATTERNS[pattern_name]["error_message"],
fg="red",
err=True,
)
sys.exit(1)


def _parse_yes_to_bool(value: str) -> Any:
return value.strip().lower() in ["y", "yes"] if value is not None else None
88 changes: 88 additions & 0 deletions tests/framework/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,3 +934,91 @@ def test_run_with_non_existent_conf_source(self, fake_project_cli, fake_metadata
" does not exist."
)
assert expected_output in result.output

@mark.parametrize(
"input",
["yes", "YES", "y", "Y", "yEs"],
)
def test_run_telemetry_flag_yes(
self, fake_project_cli, fake_metadata, fake_session, mocker, input
):
result = CliRunner().invoke(
fake_project_cli, ["run", "--telemetry", input], obj=fake_metadata
)
assert not result.exit_code

fake_session.run.assert_called_once_with(
tags=(),
runner=mocker.ANY,
node_names=(),
from_nodes=[],
to_nodes=[],
from_inputs=[],
to_outputs=[],
load_versions={},
pipeline_name=None,
namespace=None,
)

runner = fake_session.run.call_args_list[0][1]["runner"]
project_path = str(fake_metadata.project_path)
telemetry_file_path = Path(project_path + "/.telemetry")
assert isinstance(runner, SequentialRunner)
assert not runner._is_async
assert telemetry_file_path.exists()

with open(telemetry_file_path) as file:
file_content = file.read()
target_string = "consent: true"

assert target_string in file_content

@mark.parametrize(
"input",
["no", "NO", "n", "N", "No"],
)
def test_run_telemetry_flag_no(
self, fake_project_cli, fake_metadata, fake_session, mocker, input
):
result = CliRunner().invoke(
fake_project_cli, ["run", "--telemetry", input], obj=fake_metadata
)
assert not result.exit_code

fake_session.run.assert_called_once_with(
tags=(),
runner=mocker.ANY,
node_names=(),
from_nodes=[],
to_nodes=[],
from_inputs=[],
to_outputs=[],
load_versions={},
pipeline_name=None,
namespace=None,
)

runner = fake_session.run.call_args_list[0][1]["runner"]
project_path = str(fake_metadata.project_path)
telemetry_file_path = Path(project_path + "/.telemetry")
assert isinstance(runner, SequentialRunner)
assert not runner._is_async
assert telemetry_file_path.exists()

with open(telemetry_file_path) as file:
file_content = file.read()
target_string = "consent: false"

assert target_string in file_content

def test_run_telemetry_flag_invalid(
self, fake_project_cli, fake_metadata, fake_session, mocker
):
result = CliRunner().invoke(
fake_project_cli, ["run", "--telemetry", "wrong"], obj=fake_metadata
)
assert result.exit_code == 1
assert (
"'wrong' is an invalid value for example pipeline or telemetry consent. It must contain only y, n, YES, or NO (case insensitive)."
in result.output
)
20 changes: 10 additions & 10 deletions tests/framework/cli/test_starters.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
_fetch_validate_parse_config_from_user_prompts,
_make_cookiecutter_args_and_fetch_template,
_parse_tools_input,
_parse_yes_no_to_bool,
_validate_tool_selection,
)
from kedro.framework.cli.utils import _parse_yes_to_bool

FILES_IN_TEMPLATE_WITH_NO_TOOLS = 15

Expand Down Expand Up @@ -103,7 +103,7 @@ def _get_expected_files(tools: str, example_pipeline: str):
"7": 0, # Kedro Viz does not add any files
} # files added to template by each tool
tools_list = _parse_tools_input(tools)
example_pipeline_bool = _parse_yes_no_to_bool(example_pipeline)
example_pipeline_bool = _parse_yes_to_bool(example_pipeline)
expected_files = FILES_IN_TEMPLATE_WITH_NO_TOOLS

for tool in tools_list:
Expand Down Expand Up @@ -1323,7 +1323,7 @@ def test_invalid_example(self, fake_kedro_cli, bad_input):

assert result.exit_code != 0
assert (
f"'{bad_input}' is an invalid value for example pipeline." in result.output
f"'{bad_input}' is an invalid value for example pipeline" in result.output
)
assert (
"It must contain only y, n, YES, or NO (case insensitive).\n"
Expand Down Expand Up @@ -1482,18 +1482,18 @@ class TestParseYesNoToBools:
"input",
["yes", "YES", "y", "Y", "yEs"],
)
def parse_yes_no_to_bool_responds_true(self, input):
assert _parse_yes_no_to_bool(input) is True
def _parse_yes_to_bool_responds_true(self, input):
assert _parse_yes_to_bool(input) is True

@pytest.mark.parametrize(
"input",
["no", "NO", "n", "N", "No", ""],
)
def parse_yes_no_to_bool_responds_false(self, input):
assert _parse_yes_no_to_bool(input) is False
def _parse_yes_to_bool_responds_false(self, input):
assert _parse_yes_to_bool(input) is False

def parse_yes_no_to_bool_responds_none(self):
assert _parse_yes_no_to_bool(None) is None
def _parse_yes_to_bool_responds_none(self):
assert _parse_yes_to_bool(None) is None


class TestValidateSelection:
Expand Down Expand Up @@ -1661,7 +1661,7 @@ def test_flag_value_is_invalid(self, fake_kedro_cli):
assert result.exit_code == 1

assert (
"'wrong' is an invalid value for example pipeline. It must contain only y, n, YES, or NO (case insensitive)."
"'wrong' is an invalid value for example pipeline or telemetry consent. It must contain only y, n, YES, or NO (case insensitive)."
in result.output
)

Expand Down

0 comments on commit 570f66a

Please sign in to comment.