From 570f66ac15df27bd3f15b64f0013fbfe17d4cdc1 Mon Sep 17 00:00:00 2001 From: "L. R. Couto" <57910428+lrcouto@users.noreply.github.com> Date: Wed, 22 May 2024 09:58:35 -0300 Subject: [PATCH] Add implementation of telemetry consent flag for kedro run (#3876) * Add base implementation of telemetry flag for kedro run Signed-off-by: lrcouto * Update function on test Signed-off-by: lrcouto * Lint Signed-off-by: lrcouto * Update tests Signed-off-by: lrcouto * Add tests for the new telemetry flag Signed-off-by: lrcouto * Add release note Signed-off-by: lrcouto * Make util functions private Signed-off-by: lrcouto * Lint Signed-off-by: lrcouto --------- Signed-off-by: lrcouto Signed-off-by: L. R. Couto <57910428+lrcouto@users.noreply.github.com> --- RELEASE.md | 1 + kedro/framework/cli/project.py | 18 +++++- kedro/framework/cli/starters.py | 47 ++------------- kedro/framework/cli/utils.py | 35 +++++++++++ tests/framework/cli/test_cli.py | 88 ++++++++++++++++++++++++++++ tests/framework/cli/test_starters.py | 20 +++---- 6 files changed, 157 insertions(+), 52 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index fabff95266..b75cc57a22 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -2,6 +2,7 @@ ## Major features and improvements * It is now possible to use Kedro without having `rich` installed. +* Added a `--telemetry` flag to `kedro run`, allowing consent to data usage to be granted or revoked at the same time the command is run. ## Bug fixes and other changes * User defined catch-all dataset factory patterns now override the default pattern provided by the runner. diff --git a/kedro/framework/cli/project.py b/kedro/framework/cli/project.py index cd9a072184..adbf6ce6bd 100644 --- a/kedro/framework/cli/project.py +++ b/kedro/framework/cli/project.py @@ -11,8 +11,10 @@ from kedro.framework.cli.utils import ( _check_module_importable, _config_file_callback, + _parse_yes_to_bool, _split_load_versions, _split_params, + _validate_input_with_regex_pattern, call, env_option, forward_command, @@ -22,7 +24,7 @@ from kedro.framework.project import settings from kedro.framework.session import KedroSession from kedro.framework.startup import ProjectMetadata -from kedro.utils import load_obj +from kedro.utils import _find_kedro_project, load_obj NO_DEPENDENCY_MESSAGE = """{module} is not installed. Please make sure {module} is in requirements.txt and run 'pip install -r requirements.txt'.""" @@ -59,6 +61,10 @@ INPUT_FILE_HELP = """Name of the requirements file to compile.""" OUTPUT_FILE_HELP = """Name of the file where compiled requirements should be stored.""" CONF_SOURCE_HELP = """Path of a directory where project configuration is stored.""" +TELEMETRY_ARG_HELP = """Allow or not allow Kedro to collect usage analytics. +We cannot see nor store information contained into a Kedro project. Opt in with "yes" +and out with "no". +""" @click.group(name="Kedro") @@ -196,6 +202,7 @@ def package(metadata: ProjectMetadata) -> None: help=PARAMS_ARG_HELP, callback=_split_params, ) +@click.option("--telemetry", "-tc", "telemetry_consent", help=TELEMETRY_ARG_HELP) def run( # noqa: PLR0913 tags: str, env: str, @@ -212,8 +219,17 @@ def run( # noqa: PLR0913 conf_source: str, params: dict[str, Any], namespace: str, + telemetry_consent: str, ) -> None: """Run the pipeline.""" + if telemetry_consent is not None: + _validate_input_with_regex_pattern("yes_no", telemetry_consent) + telemetry_consent = "true" if _parse_yes_to_bool(telemetry_consent) else "false" + + project_path = str(_find_kedro_project(Path.cwd())) + + with open(project_path + "/.telemetry", "w") as telemetry_file: + telemetry_file.write("consent: " + telemetry_consent) runner_obj = load_obj(runner or "SequentialRunner", "kedro.runner") tuple_tags = tuple(tags) diff --git a/kedro/framework/cli/starters.py b/kedro/framework/cli/starters.py index 5a4b76a347..b61e42658f 100644 --- a/kedro/framework/cli/starters.py +++ b/kedro/framework/cli/starters.py @@ -28,7 +28,9 @@ KedroCliError, _clean_pycache, _get_entry_points, + _parse_yes_to_bool, _safe_load_entry_point, + _validate_input_with_regex_pattern, command_with_verbosity, ) @@ -154,41 +156,6 @@ def _validate_flag_inputs(flag_inputs: dict[str, Any]) -> None: ) -def _validate_input_with_regex_pattern(pattern_name: str, input: str) -> None: - VALIDATION_PATTERNS = { - "yes_no": { - "regex": r"(?i)^\s*(y|yes|n|no)\s*$", - "error_message": f"'{input}' is an invalid value for example pipeline. It must contain only y, n, YES, or NO (case insensitive).", - }, - "project_name": { - "regex": r"^[\w -]{2,}$", - "error_message": f"'{input}' is an invalid value for project name. It must contain only alphanumeric symbols, spaces, underscores and hyphens and be at least 2 characters long", - }, - "tools": { - "regex": r"""^( - all|none| # A: "all" or "none" or - (\ *\d+ # B: any number of spaces followed by one or more digits - (\ *-\ *\d+)? # C: zero or one instances of: a hyphen followed by one or more digits, spaces allowed - (\ *,\ *\d+(\ *-\ *\d+)?)* # D: any number of instances of: a comma followed by B and C, spaces allowed - \ *)?) # E: zero or one instances of (B,C,D) as empty strings are also permissible - $""", - "error_message": f"'{input}' is an invalid value for project tools. Please select valid options for tools using comma-separated values, ranges, or 'all/none'.", - }, - } - - if not re.match(VALIDATION_PATTERNS[pattern_name]["regex"], input, flags=re.X): - click.secho( - VALIDATION_PATTERNS[pattern_name]["error_message"], - fg="red", - err=True, - ) - sys.exit(1) - - -def _parse_yes_no_to_bool(value: str) -> Any: - return value.strip().lower() in ["y", "yes"] if value is not None else None - - def _validate_selected_tools(selected_tools: str | None) -> None: valid_tools = list(TOOLS_SHORTNAME_TO_NUMBER) + ["all", "none"] @@ -353,9 +320,7 @@ def new( # noqa: PLR0913 if telemetry_consent is not None: _validate_input_with_regex_pattern("yes_no", telemetry_consent) - telemetry_consent = ( - "true" if _parse_yes_no_to_bool(telemetry_consent) else "false" - ) + telemetry_consent = "true" if _parse_yes_to_bool(telemetry_consent) else "false" _create_project(project_template, cookiecutter_args, telemetry_consent) @@ -571,7 +536,7 @@ def _get_extra_context( # noqa: PLR0913 if project_name is not None: extra_context["project_name"] = project_name if example_pipeline is not None: - extra_context["example_pipeline"] = str(_parse_yes_no_to_bool(example_pipeline)) + extra_context["example_pipeline"] = str(_parse_yes_to_bool(example_pipeline)) # set defaults for required fields, will be used mostly for starters extra_context.setdefault("kedro_version", version) @@ -669,7 +634,7 @@ def _fetch_validate_parse_config_from_file( example_pipeline = config.get("example_pipeline", "no") _validate_input_with_regex_pattern("yes_no", example_pipeline) - config["example_pipeline"] = str(_parse_yes_no_to_bool(example_pipeline)) + config["example_pipeline"] = str(_parse_yes_to_bool(example_pipeline)) tools_short_names = config.get("tools", "none").lower() _validate_selected_tools(tools_short_names) @@ -722,7 +687,7 @@ def _fetch_validate_parse_config_from_user_prompts( _validate_tool_selection(tools_numbers) config["tools"] = _convert_tool_numbers_to_readable_names(tools_numbers) if "example_pipeline" in config: - example_pipeline_bool = _parse_yes_no_to_bool(config["example_pipeline"]) + example_pipeline_bool = _parse_yes_to_bool(config["example_pipeline"]) config["example_pipeline"] = str(example_pipeline_bool) return config diff --git a/kedro/framework/cli/utils.py b/kedro/framework/cli/utils.py index eda9bc005c..c65c9970bc 100644 --- a/kedro/framework/cli/utils.py +++ b/kedro/framework/cli/utils.py @@ -497,3 +497,38 @@ def _split_load_versions(ctx: click.Context, param: Any, value: str) -> dict[str load_versions_dict[load_version_list[0]] = load_version_list[1] return load_versions_dict + + +def _validate_input_with_regex_pattern(pattern_name: str, input: str) -> None: + VALIDATION_PATTERNS = { + "yes_no": { + "regex": r"(?i)^\s*(y|yes|n|no)\s*$", + "error_message": f"'{input}' is an invalid value for example pipeline or telemetry consent. It must contain only y, n, YES, or NO (case insensitive).", + }, + "project_name": { + "regex": r"^[\w -]{2,}$", + "error_message": f"'{input}' is an invalid value for project name. It must contain only alphanumeric symbols, spaces, underscores and hyphens and be at least 2 characters long", + }, + "tools": { + "regex": r"""^( + all|none| # A: "all" or "none" or + (\ *\d+ # B: any number of spaces followed by one or more digits + (\ *-\ *\d+)? # C: zero or one instances of: a hyphen followed by one or more digits, spaces allowed + (\ *,\ *\d+(\ *-\ *\d+)?)* # D: any number of instances of: a comma followed by B and C, spaces allowed + \ *)?) # E: zero or one instances of (B,C,D) as empty strings are also permissible + $""", + "error_message": f"'{input}' is an invalid value for project tools. Please select valid options for tools using comma-separated values, ranges, or 'all/none'.", + }, + } + + if not re.match(VALIDATION_PATTERNS[pattern_name]["regex"], input, flags=re.X): + click.secho( + VALIDATION_PATTERNS[pattern_name]["error_message"], + fg="red", + err=True, + ) + sys.exit(1) + + +def _parse_yes_to_bool(value: str) -> Any: + return value.strip().lower() in ["y", "yes"] if value is not None else None diff --git a/tests/framework/cli/test_cli.py b/tests/framework/cli/test_cli.py index d147a6a0d1..df77362b88 100644 --- a/tests/framework/cli/test_cli.py +++ b/tests/framework/cli/test_cli.py @@ -934,3 +934,91 @@ def test_run_with_non_existent_conf_source(self, fake_project_cli, fake_metadata " does not exist." ) assert expected_output in result.output + + @mark.parametrize( + "input", + ["yes", "YES", "y", "Y", "yEs"], + ) + def test_run_telemetry_flag_yes( + self, fake_project_cli, fake_metadata, fake_session, mocker, input + ): + result = CliRunner().invoke( + fake_project_cli, ["run", "--telemetry", input], obj=fake_metadata + ) + assert not result.exit_code + + fake_session.run.assert_called_once_with( + tags=(), + runner=mocker.ANY, + node_names=(), + from_nodes=[], + to_nodes=[], + from_inputs=[], + to_outputs=[], + load_versions={}, + pipeline_name=None, + namespace=None, + ) + + runner = fake_session.run.call_args_list[0][1]["runner"] + project_path = str(fake_metadata.project_path) + telemetry_file_path = Path(project_path + "/.telemetry") + assert isinstance(runner, SequentialRunner) + assert not runner._is_async + assert telemetry_file_path.exists() + + with open(telemetry_file_path) as file: + file_content = file.read() + target_string = "consent: true" + + assert target_string in file_content + + @mark.parametrize( + "input", + ["no", "NO", "n", "N", "No"], + ) + def test_run_telemetry_flag_no( + self, fake_project_cli, fake_metadata, fake_session, mocker, input + ): + result = CliRunner().invoke( + fake_project_cli, ["run", "--telemetry", input], obj=fake_metadata + ) + assert not result.exit_code + + fake_session.run.assert_called_once_with( + tags=(), + runner=mocker.ANY, + node_names=(), + from_nodes=[], + to_nodes=[], + from_inputs=[], + to_outputs=[], + load_versions={}, + pipeline_name=None, + namespace=None, + ) + + runner = fake_session.run.call_args_list[0][1]["runner"] + project_path = str(fake_metadata.project_path) + telemetry_file_path = Path(project_path + "/.telemetry") + assert isinstance(runner, SequentialRunner) + assert not runner._is_async + assert telemetry_file_path.exists() + + with open(telemetry_file_path) as file: + file_content = file.read() + target_string = "consent: false" + + assert target_string in file_content + + def test_run_telemetry_flag_invalid( + self, fake_project_cli, fake_metadata, fake_session, mocker + ): + result = CliRunner().invoke( + fake_project_cli, ["run", "--telemetry", "wrong"], obj=fake_metadata + ) + assert result.exit_code == 1 + assert ( + "'wrong' is an invalid value for example pipeline or telemetry consent. It must contain only y, n, YES, or NO (case insensitive)." + in result.output + ) diff --git a/tests/framework/cli/test_starters.py b/tests/framework/cli/test_starters.py index 5d00fa5401..6cdd154755 100644 --- a/tests/framework/cli/test_starters.py +++ b/tests/framework/cli/test_starters.py @@ -20,9 +20,9 @@ _fetch_validate_parse_config_from_user_prompts, _make_cookiecutter_args_and_fetch_template, _parse_tools_input, - _parse_yes_no_to_bool, _validate_tool_selection, ) +from kedro.framework.cli.utils import _parse_yes_to_bool FILES_IN_TEMPLATE_WITH_NO_TOOLS = 15 @@ -103,7 +103,7 @@ def _get_expected_files(tools: str, example_pipeline: str): "7": 0, # Kedro Viz does not add any files } # files added to template by each tool tools_list = _parse_tools_input(tools) - example_pipeline_bool = _parse_yes_no_to_bool(example_pipeline) + example_pipeline_bool = _parse_yes_to_bool(example_pipeline) expected_files = FILES_IN_TEMPLATE_WITH_NO_TOOLS for tool in tools_list: @@ -1323,7 +1323,7 @@ def test_invalid_example(self, fake_kedro_cli, bad_input): assert result.exit_code != 0 assert ( - f"'{bad_input}' is an invalid value for example pipeline." in result.output + f"'{bad_input}' is an invalid value for example pipeline" in result.output ) assert ( "It must contain only y, n, YES, or NO (case insensitive).\n" @@ -1482,18 +1482,18 @@ class TestParseYesNoToBools: "input", ["yes", "YES", "y", "Y", "yEs"], ) - def parse_yes_no_to_bool_responds_true(self, input): - assert _parse_yes_no_to_bool(input) is True + def _parse_yes_to_bool_responds_true(self, input): + assert _parse_yes_to_bool(input) is True @pytest.mark.parametrize( "input", ["no", "NO", "n", "N", "No", ""], ) - def parse_yes_no_to_bool_responds_false(self, input): - assert _parse_yes_no_to_bool(input) is False + def _parse_yes_to_bool_responds_false(self, input): + assert _parse_yes_to_bool(input) is False - def parse_yes_no_to_bool_responds_none(self): - assert _parse_yes_no_to_bool(None) is None + def _parse_yes_to_bool_responds_none(self): + assert _parse_yes_to_bool(None) is None class TestValidateSelection: @@ -1661,7 +1661,7 @@ def test_flag_value_is_invalid(self, fake_kedro_cli): assert result.exit_code == 1 assert ( - "'wrong' is an invalid value for example pipeline. It must contain only y, n, YES, or NO (case insensitive)." + "'wrong' is an invalid value for example pipeline or telemetry consent. It must contain only y, n, YES, or NO (case insensitive)." in result.output )