Add implementation of telemetry consent flag for kedro run (#3876)

* Add base implementation of telemetry flag for kedro run Signed-off-by: lrcouto <[email protected]> * Update function on test Signed-off-by: lrcouto <[email protected]> * Lint Signed-off-by: lrcouto <[email protected]> * Update tests Signed-off-by: lrcouto <[email protected]> * Add tests for the new telemetry flag Signed-off-by: lrcouto <[email protected]> * Add release note Signed-off-by: lrcouto <[email protected]> * Make util functions private Signed-off-by: lrcouto <[email protected]> * Lint Signed-off-by: lrcouto <[email protected]> --------- Signed-off-by: lrcouto <[email protected]> Signed-off-by: L. R. Couto <[email protected]>
kedro-org · May 22, 2024 · 570f66a · 570f66a
1 parent a53500a
commit 570f66a
Show file tree

Hide file tree

Showing 6 changed files with 157 additions and 52 deletions.
diff --git a/RELEASE.md b/RELEASE.md
@@ -2,6 +2,7 @@
 
 ## Major features and improvements
 * It is now possible to use Kedro without having `rich` installed.
+* Added a `--telemetry` flag to `kedro run`, allowing consent to data usage to be granted or revoked at the same time the command is run.
 
 ## Bug fixes and other changes
 * User defined catch-all dataset factory patterns now override the default pattern provided by the runner.

diff --git a/kedro/framework/cli/project.py b/kedro/framework/cli/project.py
@@ -11,8 +11,10 @@
 from kedro.framework.cli.utils import (
     _check_module_importable,
     _config_file_callback,
+    _parse_yes_to_bool,
     _split_load_versions,
     _split_params,
+    _validate_input_with_regex_pattern,
     call,
     env_option,
     forward_command,
@@ -22,7 +24,7 @@
 from kedro.framework.project import settings
 from kedro.framework.session import KedroSession
 from kedro.framework.startup import ProjectMetadata
-from kedro.utils import load_obj
+from kedro.utils import _find_kedro_project, load_obj
 
 NO_DEPENDENCY_MESSAGE = """{module} is not installed. Please make sure {module} is in
 requirements.txt and run 'pip install -r requirements.txt'."""
@@ -59,6 +61,10 @@
 INPUT_FILE_HELP = """Name of the requirements file to compile."""
 OUTPUT_FILE_HELP = """Name of the file where compiled requirements should be stored."""
 CONF_SOURCE_HELP = """Path of a directory where project configuration is stored."""
+TELEMETRY_ARG_HELP = """Allow or not allow Kedro to collect usage analytics.
+We cannot see nor store information contained into a Kedro project. Opt in with "yes"
+and out with "no".
+"""
 
 
 @click.group(name="Kedro")
@@ -196,6 +202,7 @@ def package(metadata: ProjectMetadata) -> None:
     help=PARAMS_ARG_HELP,
     callback=_split_params,
 )
+@click.option("--telemetry", "-tc", "telemetry_consent", help=TELEMETRY_ARG_HELP)
 def run(  # noqa: PLR0913
     tags: str,
     env: str,
@@ -212,8 +219,17 @@ def run(  # noqa: PLR0913
     conf_source: str,
     params: dict[str, Any],
     namespace: str,
+    telemetry_consent: str,
 ) -> None:
     """Run the pipeline."""
+    if telemetry_consent is not None:
+        _validate_input_with_regex_pattern("yes_no", telemetry_consent)
+        telemetry_consent = "true" if _parse_yes_to_bool(telemetry_consent) else "false"
+
+        project_path = str(_find_kedro_project(Path.cwd()))
+
+        with open(project_path + "/.telemetry", "w") as telemetry_file:
+            telemetry_file.write("consent: " + telemetry_consent)
 
     runner_obj = load_obj(runner or "SequentialRunner", "kedro.runner")
     tuple_tags = tuple(tags)

diff --git a/kedro/framework/cli/starters.py b/kedro/framework/cli/starters.py
@@ -28,7 +28,9 @@
     KedroCliError,
     _clean_pycache,
     _get_entry_points,
+    _parse_yes_to_bool,
     _safe_load_entry_point,
+    _validate_input_with_regex_pattern,
     command_with_verbosity,
 )
 
@@ -154,41 +156,6 @@ def _validate_flag_inputs(flag_inputs: dict[str, Any]) -> None:
         )
 
 
-def _validate_input_with_regex_pattern(pattern_name: str, input: str) -> None:
-    VALIDATION_PATTERNS = {
-        "yes_no": {
-            "regex": r"(?i)^\s*(y|yes|n|no)\s*$",
-            "error_message": f"'{input}' is an invalid value for example pipeline. It must contain only y, n, YES, or NO (case insensitive).",
-        },
-        "project_name": {
-            "regex": r"^[\w -]{2,}$",
-            "error_message": f"'{input}' is an invalid value for project name. It must contain only alphanumeric symbols, spaces, underscores and hyphens and be at least 2 characters long",
-        },
-        "tools": {
-            "regex": r"""^(
-                all|none|                        # A: "all" or "none" or
-                (\ *\d+                          # B: any number of spaces followed by one or more digits
-                (\ *-\ *\d+)?                    # C: zero or one instances of: a hyphen followed by one or more digits, spaces allowed
-                (\ *,\ *\d+(\ *-\ *\d+)?)*       # D: any number of instances of: a comma followed by B and C, spaces allowed
-                \ *)?)                           # E: zero or one instances of (B,C,D) as empty strings are also permissible
-                $""",
-            "error_message": f"'{input}' is an invalid value for project tools. Please select valid options for tools using comma-separated values, ranges, or 'all/none'.",
-        },
-    }
-
-    if not re.match(VALIDATION_PATTERNS[pattern_name]["regex"], input, flags=re.X):
-        click.secho(
-            VALIDATION_PATTERNS[pattern_name]["error_message"],
-            fg="red",
-            err=True,
-        )
-        sys.exit(1)
-
-
-def _parse_yes_no_to_bool(value: str) -> Any:
-    return value.strip().lower() in ["y", "yes"] if value is not None else None
-
-
 def _validate_selected_tools(selected_tools: str | None) -> None:
     valid_tools = list(TOOLS_SHORTNAME_TO_NUMBER) + ["all", "none"]
 
@@ -353,9 +320,7 @@ def new(  # noqa: PLR0913
 
     if telemetry_consent is not None:
         _validate_input_with_regex_pattern("yes_no", telemetry_consent)
-        telemetry_consent = (
-            "true" if _parse_yes_no_to_bool(telemetry_consent) else "false"
-        )
+        telemetry_consent = "true" if _parse_yes_to_bool(telemetry_consent) else "false"
 
     _create_project(project_template, cookiecutter_args, telemetry_consent)
 
@@ -571,7 +536,7 @@ def _get_extra_context(  # noqa: PLR0913
     if project_name is not None:
         extra_context["project_name"] = project_name
     if example_pipeline is not None:
-        extra_context["example_pipeline"] = str(_parse_yes_no_to_bool(example_pipeline))
+        extra_context["example_pipeline"] = str(_parse_yes_to_bool(example_pipeline))
 
     # set defaults for required fields, will be used mostly for starters
     extra_context.setdefault("kedro_version", version)
@@ -669,7 +634,7 @@ def _fetch_validate_parse_config_from_file(
 
     example_pipeline = config.get("example_pipeline", "no")
     _validate_input_with_regex_pattern("yes_no", example_pipeline)
-    config["example_pipeline"] = str(_parse_yes_no_to_bool(example_pipeline))
+    config["example_pipeline"] = str(_parse_yes_to_bool(example_pipeline))
 
     tools_short_names = config.get("tools", "none").lower()
     _validate_selected_tools(tools_short_names)
@@ -722,7 +687,7 @@ def _fetch_validate_parse_config_from_user_prompts(
         _validate_tool_selection(tools_numbers)
         config["tools"] = _convert_tool_numbers_to_readable_names(tools_numbers)
     if "example_pipeline" in config:
-        example_pipeline_bool = _parse_yes_no_to_bool(config["example_pipeline"])
+        example_pipeline_bool = _parse_yes_to_bool(config["example_pipeline"])
         config["example_pipeline"] = str(example_pipeline_bool)
 
     return config

diff --git a/kedro/framework/cli/utils.py b/kedro/framework/cli/utils.py
@@ -497,3 +497,38 @@ def _split_load_versions(ctx: click.Context, param: Any, value: str) -> dict[str
         load_versions_dict[load_version_list[0]] = load_version_list[1]
 
     return load_versions_dict
+
+
+def _validate_input_with_regex_pattern(pattern_name: str, input: str) -> None:
+    VALIDATION_PATTERNS = {
+        "yes_no": {
+            "regex": r"(?i)^\s*(y|yes|n|no)\s*$",
+            "error_message": f"'{input}' is an invalid value for example pipeline or telemetry consent. It must contain only y, n, YES, or NO (case insensitive).",
+        },
+        "project_name": {
+            "regex": r"^[\w -]{2,}$",
+            "error_message": f"'{input}' is an invalid value for project name. It must contain only alphanumeric symbols, spaces, underscores and hyphens and be at least 2 characters long",
+        },
+        "tools": {
+            "regex": r"""^(
+                all|none|                        # A: "all" or "none" or
+                (\ *\d+                          # B: any number of spaces followed by one or more digits
+                (\ *-\ *\d+)?                    # C: zero or one instances of: a hyphen followed by one or more digits, spaces allowed
+                (\ *,\ *\d+(\ *-\ *\d+)?)*       # D: any number of instances of: a comma followed by B and C, spaces allowed
+                \ *)?)                           # E: zero or one instances of (B,C,D) as empty strings are also permissible
+                $""",
+            "error_message": f"'{input}' is an invalid value for project tools. Please select valid options for tools using comma-separated values, ranges, or 'all/none'.",
+        },
+    }
+
+    if not re.match(VALIDATION_PATTERNS[pattern_name]["regex"], input, flags=re.X):
+        click.secho(
+            VALIDATION_PATTERNS[pattern_name]["error_message"],
+            fg="red",
+            err=True,
+        )
+        sys.exit(1)
+
+
+def _parse_yes_to_bool(value: str) -> Any:
+    return value.strip().lower() in ["y", "yes"] if value is not None else None
diff --git a/tests/framework/cli/test_cli.py b/tests/framework/cli/test_cli.py
@@ -934,3 +934,91 @@ def test_run_with_non_existent_conf_source(self, fake_project_cli, fake_metadata
             " does not exist."
         )
         assert expected_output in result.output
+
+    @mark.parametrize(
+        "input",
+        ["yes", "YES", "y", "Y", "yEs"],
+    )
+    def test_run_telemetry_flag_yes(
+        self, fake_project_cli, fake_metadata, fake_session, mocker, input
+    ):
+        result = CliRunner().invoke(
+            fake_project_cli, ["run", "--telemetry", input], obj=fake_metadata
+        )
+        assert not result.exit_code
+
+        fake_session.run.assert_called_once_with(
+            tags=(),
+            runner=mocker.ANY,
+            node_names=(),
+            from_nodes=[],
+            to_nodes=[],
+            from_inputs=[],
+            to_outputs=[],
+            load_versions={},
+            pipeline_name=None,
+            namespace=None,
+        )
+
+        runner = fake_session.run.call_args_list[0][1]["runner"]
+        project_path = str(fake_metadata.project_path)
+        telemetry_file_path = Path(project_path + "/.telemetry")
+        assert isinstance(runner, SequentialRunner)
+        assert not runner._is_async
+        assert telemetry_file_path.exists()
+
+        with open(telemetry_file_path) as file:
+            file_content = file.read()
+            target_string = "consent: true"
+
+        assert target_string in file_content
+
+    @mark.parametrize(
+        "input",
+        ["no", "NO", "n", "N", "No"],
+    )
+    def test_run_telemetry_flag_no(
+        self, fake_project_cli, fake_metadata, fake_session, mocker, input
+    ):
+        result = CliRunner().invoke(
+            fake_project_cli, ["run", "--telemetry", input], obj=fake_metadata
+        )
+        assert not result.exit_code
+
+        fake_session.run.assert_called_once_with(
+            tags=(),
+            runner=mocker.ANY,
+            node_names=(),
+            from_nodes=[],
+            to_nodes=[],
+            from_inputs=[],
+            to_outputs=[],
+            load_versions={},
+            pipeline_name=None,
+            namespace=None,
+        )
+
+        runner = fake_session.run.call_args_list[0][1]["runner"]
+        project_path = str(fake_metadata.project_path)
+        telemetry_file_path = Path(project_path + "/.telemetry")
+        assert isinstance(runner, SequentialRunner)
+        assert not runner._is_async
+        assert telemetry_file_path.exists()
+
+        with open(telemetry_file_path) as file:
+            file_content = file.read()
+            target_string = "consent: false"
+
+        assert target_string in file_content
+
+    def test_run_telemetry_flag_invalid(
+        self, fake_project_cli, fake_metadata, fake_session, mocker
+    ):
+        result = CliRunner().invoke(
+            fake_project_cli, ["run", "--telemetry", "wrong"], obj=fake_metadata
+        )
+        assert result.exit_code == 1
+        assert (
+            "'wrong' is an invalid value for example pipeline or telemetry consent. It must contain only y, n, YES, or NO (case insensitive)."
+            in result.output
+        )
diff --git a/tests/framework/cli/test_starters.py b/tests/framework/cli/test_starters.py
@@ -20,9 +20,9 @@
     _fetch_validate_parse_config_from_user_prompts,
     _make_cookiecutter_args_and_fetch_template,
     _parse_tools_input,
-    _parse_yes_no_to_bool,
     _validate_tool_selection,
 )
+from kedro.framework.cli.utils import _parse_yes_to_bool
 
 FILES_IN_TEMPLATE_WITH_NO_TOOLS = 15
 
@@ -103,7 +103,7 @@ def _get_expected_files(tools: str, example_pipeline: str):
         "7": 0,  # Kedro Viz does not add any files
     }  # files added to template by each tool
     tools_list = _parse_tools_input(tools)
-    example_pipeline_bool = _parse_yes_no_to_bool(example_pipeline)
+    example_pipeline_bool = _parse_yes_to_bool(example_pipeline)
     expected_files = FILES_IN_TEMPLATE_WITH_NO_TOOLS
 
     for tool in tools_list:
@@ -1323,7 +1323,7 @@ def test_invalid_example(self, fake_kedro_cli, bad_input):
 
         assert result.exit_code != 0
         assert (
-            f"'{bad_input}' is an invalid value for example pipeline." in result.output
+            f"'{bad_input}' is an invalid value for example pipeline" in result.output
         )
         assert (
             "It must contain only y, n, YES, or NO (case insensitive).\n"
@@ -1482,18 +1482,18 @@ class TestParseYesNoToBools:
         "input",
         ["yes", "YES", "y", "Y", "yEs"],
     )
-    def parse_yes_no_to_bool_responds_true(self, input):
-        assert _parse_yes_no_to_bool(input) is True
+    def _parse_yes_to_bool_responds_true(self, input):
+        assert _parse_yes_to_bool(input) is True
 
     @pytest.mark.parametrize(
         "input",
         ["no", "NO", "n", "N", "No", ""],
     )
-    def parse_yes_no_to_bool_responds_false(self, input):
-        assert _parse_yes_no_to_bool(input) is False
+    def _parse_yes_to_bool_responds_false(self, input):
+        assert _parse_yes_to_bool(input) is False
 
-    def parse_yes_no_to_bool_responds_none(self):
-        assert _parse_yes_no_to_bool(None) is None
+    def _parse_yes_to_bool_responds_none(self):
+        assert _parse_yes_to_bool(None) is None
 
 
 class TestValidateSelection:
@@ -1661,7 +1661,7 @@ def test_flag_value_is_invalid(self, fake_kedro_cli):
         assert result.exit_code == 1
 
         assert (
-            "'wrong' is an invalid value for example pipeline. It must contain only y, n, YES, or NO (case insensitive)."
+            "'wrong' is an invalid value for example pipeline or telemetry consent. It must contain only y, n, YES, or NO (case insensitive)."
             in result.output
         )