diff --git a/docs/examples/workflows-examples.md b/docs/examples/workflows-examples.md index a0c56e5e7..f5bbff610 100644 --- a/docs/examples/workflows-examples.md +++ b/docs/examples/workflows-examples.md @@ -107,7 +107,8 @@ Explore the examples through the side bar! | [template-defaults](https://github.com/argoproj/argo-workflows/blob/main/examples/template-defaults.yaml) | | [testvolume](https://github.com/argoproj/argo-workflows/blob/main/examples/testvolume.yaml) | | [timeouts-step](https://github.com/argoproj/argo-workflows/blob/main/examples/timeouts-step.yaml) | -| [title-and-descriptin-with-markdown](https://github.com/argoproj/argo-workflows/blob/main/examples/title-and-descriptin-with-markdown.yaml) | +| [title-and-description-with-markdown](https://github.com/argoproj/argo-workflows/blob/main/examples/title-and-description-with-markdown.yaml) | +| [withsequence-nested-result](https://github.com/argoproj/argo-workflows/blob/main/examples/withsequence-nested-result.yaml) | | [work-avoidance](https://github.com/argoproj/argo-workflows/blob/main/examples/work-avoidance.yaml) | | [workflow-count-resourcequota](https://github.com/argoproj/argo-workflows/blob/main/examples/workflow-count-resourcequota.yaml) | | [workflow-event-binding/event-consumer-workfloweventbinding](https://github.com/argoproj/argo-workflows/blob/main/examples/workflow-event-binding/event-consumer-workfloweventbinding.yaml) | diff --git a/docs/examples/workflows/experimental/script_pydantic_io.md b/docs/examples/workflows/experimental/script_runner_io.md similarity index 53% rename from docs/examples/workflows/experimental/script_pydantic_io.md rename to docs/examples/workflows/experimental/script_runner_io.md index 1bc6d4320..d631cf038 100644 --- a/docs/examples/workflows/experimental/script_pydantic_io.md +++ b/docs/examples/workflows/experimental/script_runner_io.md @@ -1,4 +1,4 @@ -# Script Pydantic Io +# Script Runner Io @@ -14,7 +14,8 @@ from pydantic import BaseModel from hera.shared import global_config - from hera.workflows import Artifact, ArtifactLoader, Parameter, Workflow, script + from hera.workflows import Artifact, ArtifactLoader, Parameter, Steps, Workflow, script + from hera.workflows.archive import NoneArchiveStrategy from hera.workflows.io import RunnerInput, RunnerOutput try: @@ -27,7 +28,7 @@ class MyObject(BaseModel): - a_dict: dict = {} + a_dict: dict # not giving a default makes the field a required input for the template a_str: str = "a default string" @@ -44,7 +45,12 @@ artifact_int: Annotated[int, Artifact(name="artifact-output")] - @script(constructor="runner") + @script(constructor="runner", image="python-image-built-with-my-package") + def writer() -> Annotated[int, Artifact(name="int-artifact", archive=NoneArchiveStrategy())]: + return 100 + + + @script(constructor="runner", image="python-image-built-with-my-package") def pydantic_io( my_input: MyInput, ) -> MyOutput: @@ -52,7 +58,17 @@ with Workflow(generate_name="pydantic-io-") as w: - pydantic_io() + with Steps(name="use-pydantic-io"): + write_step = writer() + pydantic_io( + arguments=[ + write_step.get_artifact("int-artifact").with_name("artifact-input"), + { + "param_int": 101, + "an_object": MyObject(a_dict={"my-new-key": "my-new-value"}), + }, + ] + ) ``` === "YAML" @@ -64,6 +80,46 @@ generateName: pydantic-io- spec: templates: + - name: use-pydantic-io + steps: + - - name: writer + template: writer + - - arguments: + artifacts: + - from: '{{steps.writer.outputs.artifacts.int-artifact}}' + name: artifact-input + parameters: + - name: param_int + value: '101' + - name: an_object + value: '{"a_dict": {"my-new-key": "my-new-value"}, "a_str": "a default + string"}' + name: pydantic-io + template: pydantic-io + - name: writer + outputs: + artifacts: + - archive: + none: {} + name: int-artifact + path: /tmp/hera-outputs/artifacts/int-artifact + script: + args: + - -m + - hera.workflows.runner + - -e + - examples.workflows.experimental.script_runner_io:writer + command: + - python + env: + - name: hera__script_annotations + value: '' + - name: hera__outputs_directory + value: /tmp/hera-outputs + - name: hera__script_pydantic_io + value: '' + image: python-image-built-with-my-package + source: '{{inputs.parameters}}' - inputs: artifacts: - name: artifact-input @@ -87,7 +143,7 @@ - -m - hera.workflows.runner - -e - - examples.workflows.experimental.script_pydantic_io:pydantic_io + - examples.workflows.experimental.script_runner_io:pydantic_io command: - python env: @@ -97,7 +153,7 @@ value: /tmp/hera-outputs - name: hera__script_pydantic_io value: '' - image: python:3.8 + image: python-image-built-with-my-package source: '{{inputs.parameters}}' ``` diff --git a/docs/examples/workflows/template_level_volume.md b/docs/examples/workflows/template_level_volume.md index 883e6758f..a4c1fe017 100644 --- a/docs/examples/workflows/template_level_volume.md +++ b/docs/examples/workflows/template_level_volume.md @@ -73,35 +73,17 @@ See https://argo-workflows.readthedocs.io/en/latest/walk-through/volumes/ spec: entrypoint: generate-and-use-volume templates: - - name: generate-and-use-volume - steps: - - - name: generate-volume - template: generate-volume - arguments: - parameters: - - name: pvc-size - # In a real-world example, this could be generated by a previous workflow step. - value: '1Gi' - - - name: generate - template: whalesay - arguments: - parameters: - - name: pvc-name - value: '{{steps.generate-volume.outputs.parameters.pvc-name}}' - - - name: print - template: print-message - arguments: - parameters: - - name: pvc-name - value: '{{steps.generate-volume.outputs.parameters.pvc-name}}' - - - name: generate-volume - inputs: + - inputs: + parameters: + - name: pvc-size + name: generate-volume + outputs: parameters: - - name: pvc-size + - name: pvc-name + valueFrom: + jsonPath: '{.metadata.name}' resource: action: create - setOwnerReference: true manifest: | apiVersion: v1 kind: PersistentVolumeClaim @@ -112,42 +94,62 @@ See https://argo-workflows.readthedocs.io/en/latest/walk-through/volumes/ resources: requests: storage: '{{inputs.parameters.pvc-size}}' - outputs: - parameters: - - name: pvc-name - valueFrom: - jsonPath: '{.metadata.name}' - - - name: whalesay - inputs: - parameters: - - name: pvc-name - volumes: - - name: workdir - persistentVolumeClaim: - claimName: '{{inputs.parameters.pvc-name}}' - container: + setOwnerReference: true + - container: + args: + - echo generating message in volume; cowsay hello world | tee /mnt/vol/hello_world.txt + command: + - sh + - -c image: docker/whalesay:latest - command: [sh, -c] - args: ["echo generating message in volume; cowsay hello world | tee /mnt/vol/hello_world.txt"] volumeMounts: - - name: workdir - mountPath: /mnt/vol - - - name: print-message + - mountPath: /mnt/vol + name: workdir inputs: - parameters: - - name: pvc-name + parameters: + - name: pvc-name + name: whalesay volumes: - - name: workdir - persistentVolumeClaim: - claimName: '{{inputs.parameters.pvc-name}}' - container: + - name: workdir + persistentVolumeClaim: + claimName: '{{inputs.parameters.pvc-name}}' + - container: + args: + - echo getting message from volume; find /mnt/vol; cat /mnt/vol/hello_world.txt + command: + - sh + - -c image: alpine:latest - command: [sh, -c] - args: ["echo getting message from volume; find /mnt/vol; cat /mnt/vol/hello_world.txt"] volumeMounts: - - name: workdir - mountPath: /mnt/vol + - mountPath: /mnt/vol + name: workdir + inputs: + parameters: + - name: pvc-name + name: print-message + volumes: + - name: workdir + persistentVolumeClaim: + claimName: '{{inputs.parameters.pvc-name}}' + - name: generate-and-use-volume + steps: + - - arguments: + parameters: + - name: pvc-size + value: 1Gi + name: generate-volume + template: generate-volume + - - arguments: + parameters: + - name: pvc-name + value: '{{steps.generate-volume.outputs.parameters.pvc-name}}' + name: generate + template: whalesay + - - arguments: + parameters: + - name: pvc-name + value: '{{steps.generate-volume.outputs.parameters.pvc-name}}' + name: print + template: print-message ``` diff --git a/docs/user-guides/script-annotations.md b/docs/user-guides/script-annotations.md new file mode 100644 index 000000000..e2a844c36 --- /dev/null +++ b/docs/user-guides/script-annotations.md @@ -0,0 +1,236 @@ +# Script Annotations + +Annotation syntax is an experimental feature using `typing.Annotated` for `Parameter`s and `Artifact`s to declare inputs +and outputs for functions decorated as `scripts`. They use `Annotated` as the type in the function parameters and allow +us to simplify writing scripts with parameters and artifacts that require additional fields such as a `description` or +alternative `name`. + +This feature must be enabled by setting the `experimental_feature` flag `script_annotations` on the global config. + +```py +global_config.experimental_features["script_annotations"] = True +``` + +## Parameters + +In Hera, we can currently specify inputs inside the `@script` decorator as follows: + +```python +@script( + inputs=[ + Parameter(name="an_int", description="an_int parameter", default=1, enum=[1, 2, 3]), + Parameter(name="a_bool", description="a_bool parameter", default=True, enum=[True, False]), + Parameter(name="a_string", description="a_string parameter", default="a", enum=["a", "b", "c"]) + ] +) +def echo_all(an_int=1, a_bool=True, a_string="a"): + print(an_int) + print(a_bool) + print(a_string) +``` + +Notice how the `name` and `default` values are duplicated for each `Parameter`. Using annotations, we can rewrite this +as: + +```python +@script() +def echo_all( + an_int: Annotated[int, Parameter(description="an_int parameter", enum=[1, 2, 3])] = 1, + a_bool: Annotated[bool, Parameter(description="a_bool parameter", enum=[True, False])] = True, + a_string: Annotated[str, Parameter(description="a_string parameter", enum=["a", "b", "c"])] = "a", +): + print(an_int) + print(a_bool) + print(a_string) +``` + +The fields allowed in the `Parameter` annotations are: `name`, `enum`, and `description`. + +## Artifacts + +> Note: `Artifact` annotations are only supported when used with the `RunnerScriptConstructor`. + + +The feature is even more powerful for `Artifact`s. In Hera we are currently able to specify `Artifact`s in `inputs`, but +the given path is not programmatically linked to the code within the function unless defined outside the scope of the +function: + +```python +@script(inputs=Artifact(name="my-artifact", path="/tmp/file")) +def read_artifact(): + with open("/tmp/file") as a_file: # Repeating "/tmp/file" is prone to human error! + print(a_file.read()) + +# or + +MY_PATH = "/tmp/file" # Now accessible outside of the function scope! +@script(inputs=Artifact(name="my-artifact", path=MY_PATH)) +def read_artifact(): + with open(MY_PATH) as a_file: + print(a_file.read()) +``` + +By using annotations we can avoid repeating the `path` of the file, and the function can use the variable directly as a +`Path` object, with its value already set to the given path: + +```python +@script(constructor="runner") +def read_artifact(an_artifact: Annotated[Path, Artifact(name="my-artifact", path="/tmp/file")]): + print(an_artifact.read_text()) +``` + +The fields allowed in the `Artifact` annotations are: `name`, `path`, and `loader`. + +## Artifact Loaders + +In case you want to load an object directly from the `path` of the `Artifact`, we allow two types of loaders besides the +default `Path` behaviour used when no loader is specified. The `ArtifactLoader` enum provides `file` and `json` loaders. + +### `None` loader +With `None` set as the loader (which is by default) in the Artifact annotation, the `path` attribute of `Artifact` is +extracted and used to provide a `pathlib.Path` object for the given argument, which can be used directly in the function +body. The following example is the same as above except for explicitly setting the loader to `None`: + +```python +@script(constructor="runner") +def read_artifact( + an_artifact: Annotated[Path, Artifact(name="my-artifact", path="/tmp/file", loader=None)] +): + print(an_artifact.read_text()) +``` + +### `file` loader + +When the loader is set to `file`, the function parameter type should be `str`, and will contain the contents string +representation of the file stored at `path` (essentially performing `path.read_text()` automatically): + +```python +@script(constructor="runner") +def read_artifact( + an_artifact: Annotated[str, Artifact(name="my-artifact", path="/tmp/file", loader=ArtifactLoader.file)] +) -> str: + return an_artifact +``` + +This loads the contents of the file at `"/tmp/file"` to the argument `an_artifact` and subsequently can be used as a +string inside the function. + +### `json` loader + +When the loader is set to `json`, the contents of the file at `path` are read and parsed to a dictionary via `json.load` +(essentially performing `json.load(path.open())` automatically). By specifying a Pydantic type, this dictionary can even +be automatically parsed to that type: + +```python +class MyArtifact(BaseModel): + a = "a" + b = "b" + + +@script(constructor="runner") +def read_artifact( + an_artifact: Annotated[MyArtifact, Artifact(name="my-artifact", path="/tmp/file", loader=ArtifactLoader.json)] +) -> str: + return an_artifact.a + an_artifact.b +``` + +Here, we have a json representation of `MyArtifact` such as `{"a": "hello ", "b": "world"}` stored at `"/tmp/file"`. We +can load it with `ArtifactLoader.json` and then use `an_artifact` as an instance of `MyArtifact` inside the function, so +the function will return `"hello world"`. + +### Function parameter name aliasing + +Script annotations can work on top of the `RunnerScriptConstructor` for name aliasing of function +parameters, in particular to allow a public `kebab-case` parameter, while using a `snake_case` +Python function parameter. When using a `RunnerScriptConstructor`, an environment variable +`hera__script_annotations` will be added to the Script template (visible in the exported YAML file). + +## Outputs + +> Note: Output annotations are only supported when used with the `RunnerScriptConstructor`. + +There are two ways to specify output Artifacts and Parameters. + +### Function return annotations + +Function return annotations can be used to specify the output type information for output Artifacts and Parameters, and +the function should return a value or tuple. An example can be seen +[here](../examples/workflows/experimental/script_annotations_outputs.md). + +For a simple hello world output artifact example we currently have: +```python +@script(outputs=Artifact(name="hello-artifact", path="/tmp/hello_world.txt")) +def hello_world(): + with open("/tmp/hello_world.txt", "w") as f: + f.write("Hello, world!") +``` + +The new approach allows us to avoid duplication of the path, which is now optional, and results in more readable code: +```python +@script() +def hello_world() -> Annotated[str, Artifact(name="hello-artifact")]: + return "Hello, world!" +``` + +For `Parameter`s we have a similar syntax: + +```python +@script() +def hello_world() -> Annotated[str, Parameter(name="hello-param")]: + return "Hello, world!" +``` + +The returned values will be automatically saved in files within the Argo container according to this schema: +* `/hera/outputs/parameters/` +* `/hera/outputs/artifacts/` + +These outputs are also exposed in the `outputs` section of the template in YAML. + +The object returned from the function can be of any serialisable Pydantic type (or basic Python type) and must be +`Annotated` as an `Artifact` or `Parameter`. The `Parameter`/`Artifact`'s `name` will be used for the path of the output unless provided: +* if the annotation is an `Artifact` with a `path`, we use that `path` +* if the annotation is a `Parameter`, with a `value_from` that contains a `path`, we use that `path` + +See the following two functions for specifying custom paths: + +```python +@script() +def hello_world() -> Annotated[str, Artifact(name="hello-artifact", path="/tmp/hello_world_art.txt")]: + return "Hello, world!" + +@script() +def hello_world() -> Annotated[str, Parameter(name="hello-param", value_from={"path": "/tmp/hello_world_param.txt"})]: + return "Hello, world!" +``` + +For multiple outputs, the return type should be a `Tuple` of arbitrary Pydantic types with individual +`Parameter`/`Artifact` annotations, and the function must return a tuple from the function matching these types: +```python +@script() +def func(...) -> Tuple[ + Annotated[arbitrary_pydantic_type_a, Artifact], + Annotated[arbitrary_pydantic_type_b, Parameter], + Annotated[arbitrary_pydantic_type_c, Parameter], + ...]: + return output_a, output_b, output_c +``` + +### Input-Output function parameters + +Hera also allows output `Parameter`/`Artifact`s as part of the function signature when specified as a `Path` type, +allowing users to write to the path as an output, without needing an explicit return. They require an additional field +`output=True` to distinguish them from the input parameters and must have an underlying `Path` type (or another type +that will write to disk). + +```python +@script() +def func(..., output_param: Annotated[Path, Parameter(output=True, global_name="...", name="")]) -> Annotated[arbitrary_pydantic_type, OutputItem]: + output_param.write_text("...") + return output +``` + +The parent outputs directory, `/hera/outputs` by default, can be set by the user. This is done by adding: + +```python +global_config.set_class_defaults(RunnerScriptConstructor, outputs_directory="user/chosen/outputs") +``` diff --git a/docs/user-guides/script-basics.md b/docs/user-guides/script-basics.md new file mode 100644 index 000000000..2148e81cf --- /dev/null +++ b/docs/user-guides/script-basics.md @@ -0,0 +1,198 @@ +# Script Basics + +The `Script` class is an essential part of Hera's extension on top of Argo. As Hera is a Python library, +[Script templates](https://argoproj.github.io/argo-workflows/fields/#scripttemplate) running Python become the standard +template, which is reflected by the greater feature set provided for writing them. + +## Script Decorator + +The `script` decorator function is a key offering of Hera to achieve near-native Python function orchestration. It +allows you to call the function under a Hera context manager such as a `Workflow` or `Steps` context, and it will be +treated as the intended sub-object, which would be a `template` when under a `Workflow`, or a `Step` when under a +`Steps`. The function will still behave as normal outside of any Hera contexts, meaning you can write unit tests on the +given function. + +> **For advanced users**: the exact mechanism of the `script` decorator is to prepare a `Script` object within the +> decorator, so that when your function is invoked under a Hera context, the call is redirected to the `Script.__call__` +> function. This takes the kwargs of a `Step` or `Task` depending on whether the context manager is a `Steps` or a +> `DAG`. Under a Workflow itself, your function is not expected to take arguments, so the call will add the function as +> a template. + +When decorating a function, you should pass `Script` parameters to the `script` decorator. This includes values such as +the `image` to use, and `resources` to request. + +```py +from hera.workflows import Resources, script + +@script(image="python:3.11", resources=Resources(memory_request="5Gi")) +def echo(message: str): + print(message) +``` + +When calling the function under a `Steps` or `DAG` context, you should pass `Step` or `Task` kwargs, such as the `name` +of the `Step`/`Task`, a `when` clause, a `with_param` list to loop over a given template, or `arguments` for the +function. + +```py +with Workflow(generate_name="dag-diamond-", entrypoint="diamond") as w: + with DAG(name="diamond"): + A = echo(name="A", arguments={"message": "A"}) + B = echo(name="B", arguments={"message": "B"}, when=f"{A.result == 'A'}") + C = echo(name="C", arguments={"message": "C"}, when=f"{A.result != 'A'}") + D = echo(name="D", arguments={"message": "D"}) + A >> [B, C] >> D +``` + +Alternatively, you can specify your DAG using `Task` directly: + +```py +with Workflow(generate_name="dag-diamond-", entrypoint="diamond") as w: + with DAG(name="diamond"): + A = Task(name="A", source=echo, arguments={"message": "A"}) + B = Task(name="B", source=echo, arguments={"message": "B"}, when=f"{A.result == 'A'}") + C = Task(name="C", source=echo, arguments={"message": "C"}, when=f"{A.result != 'A'}") + D = Task(name="D", source=echo, arguments={"message": "D"}) + A >> [B, C] >> D +``` + +> **Note** in the `DAG` above, `D` will still run, even though `C` will be skipped. This is because of the `depends` logic +> resolving to `C.Succeeded || C.Skipped || C.Daemoned` due to Argo's default +> [depends logic](https://argoproj.github.io/argo-workflows/enhanced-depends-logic/#depends). + +## Script Constructors + +### InlineScriptConstructor + +Script templates submitted to Argo typically run the given Python function in a Python image. By default, the Python +function itself is dumped to the YAML, and the Argo cluster will run that code. For the code below, we will see it +directly in the output YAML. + +```py +from hera.workflows import Workflow, script + +@script(add_cwd_to_sys_path=False) +def hello(s: str): + print("Hello, {s}!".format(s=s)) + + +with Workflow( + generate_name="hello-world-", + entrypoint="hello", + arguments={"s": "world"}, +) as w: + hello() +``` + +We added `add_cwd_to_sys_path=False` to remove some boilerplate from the `source` below. You will see Hera adds a +`json.loads` to bridge the YAML input to a Python variable: + +```yaml +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: hello-world- +spec: + arguments: + parameters: + - name: s + value: world + entrypoint: hello + templates: + - inputs: + parameters: + - name: s + name: hello + script: + command: + - python + image: python:3.8 + source: 'import json + + try: s = json.loads(r''''''{{inputs.parameters.s}}'''''') + + except: s = r''''''{{inputs.parameters.s}}'''''' + + + print(''Hello, {s}!''.format(s=s))' +``` + +This method of running the function is handled by the `InlineScriptConstructor`, called such because it constructs the +`Script` template to run the function "inline" in the YAML. + +#### Importing modules + +A caveat of the `InlineScriptConstructor` is that it is quite limited - as the `InlineScriptConstructor` dumps your code +to the `source` field as-is, you must also `import` (within the function itself) any modules you use in the function: + +```py +@script(image="python:3.10") +def my_matcher(string: str): + import re + + print(bool(re.match("test", string))) +``` + +> **Note** This also applies to other functions in your code - you will not be able to call functions defined outside of +> the scope of the script-decorated function! + +If your function uses standard library imports from Python, you will be able to run your function with any standard +Python image, specified by the `image` argument of the decorator. Therefore, if you use non-standard imports, such as +`numpy`, you will need to use an image that includes `numpy`, or build your own (e.g. as a Docker image on DockerHub). + +### RunnerScriptConstructor + +The `RunnerScriptConstructor` is an alternative `ScriptConstructor` that uses the "Hera Runner" (think of this as being +like the PyTest Runner) to run your function on Argo. This avoids dumping the function to the `source` of a template, +keeping the YAML manageable and small, and allows you to arrange your code in natural Python fashion: imports can be +anywhere in the package, the script-decorated function can call other functions in the package, and the function itself +can take Pydantic objects as arguments. The use of the `RunnerScriptConstructor` necessitates building your own image, +as the Hera Runner runs the function by referencing it as an entrypoint of your module. The image used by the script +should be built from the source code package itself and its dependencies, so that the source code's functions, +dependencies, and Hera itself are available to run. + +A function can set its `constructor` to `"runner"` to use the `RunnerScriptConstructor`, or use the +`global_config.set_class_defaults` function to set it once for all script-decorated functions. We can write a script +template function using Pydantic objects such as: + +```py +global_config.set_class_defaults(Script, constructor="runner") + +class Input(BaseModel): + a: int + b: str = "foo" + +class Output(BaseModel): + output: List[Input] + +@script() +def my_function(input: Input) -> Output: + return Output(output=[input]) +``` + +This creates a template in YAML that looks like: + +```yaml +- name: my-function + inputs: + parameters: + - name: input + script: + command: + - python + args: + - -m + - hera.workflows.runner + - -e + - examples.workflows.callable_script:my_function + image: my-image-with-python-source-code-and-dependencies + source: '{{inputs.parameters}}' +``` + +You will notice some pecularities of this template. Firstly, it is running the `hera.workflows.runner` module, rather +than a user-module such as `examples.workflows.callable_script`. Instead, the `-e` arg specifies the `--entrypoint` to +be called by the runner, in this case the `my_function` of the `examples.workflows.callable_script` module. We do not +give a real `image` here, but we assume it exists in this example. Finally, the `source` parameter is passed the +`inputs.parameters` of the template. This is because the Hera Runner relies on a mechanism in Argo where the value +passed to `source` is dumped to a file, and then the filename is passed as the final `arg` to the `command`. Therefore, +the `source` will actually contain a list of parameters as dictionaries, which are dumped to a file which is passed to +`hera.workflows.runner`. Of course, this is all handled for you! diff --git a/docs/user-guides/script-runner-io.md b/docs/user-guides/script-runner-io.md new file mode 100644 index 000000000..e5a4b5778 --- /dev/null +++ b/docs/user-guides/script-runner-io.md @@ -0,0 +1,105 @@ +# Script Runner IO + +Hera provides the `RunnerInput` and `RunnerOutput` Pydantic classes which can be used to more succinctly write your +script function inputs and outputs, and requires use of the Hera Runner. Use of these classes also requires the +`"script_pydantic_io"` experimental feature flag to be enabled: + +```py +global_config.experimental_features["script_pydantic_io"] = True +``` + +## Pydantic V1 or V2? + +You can import `RunnerInput` and `RunnerOutput` from the `hera.workflows.io` submodule to import the version of Pydantic +that matches your V1 or V2 installation. + +If you need to use V1 models when you have V2 installed, you should import +`RunnerInput` and `RunnerOutput` from the `hera.workflows.io.v1` or `hera.workflows.io.v2` module explicitly. The V2 +models will not be available if you have installed `pydantic<2`, but the V1 models are usable for either version, +allowing you to migrate at your own pace. + +## Script inputs using `RunnerInput` + +For your script inputs, you can create a derived class of `RunnerInput`, and declare all your input parameters (and +artifacts) as fields of the class. If you want to use `Annotated` to declare `Artifacts` add metadata to your +`Parameters`, you will also need to enable the `"script_annotations"` experimental feature flag. + +```py +from typing import Annotated +from pydantic import BaseModel + +from hera.workflows import Artifact, ArtifactLoader, Parameter, script +from hera.workflows.io import RunnerInput + + +class MyObject(BaseModel): + a_dict: dict + a_str: str = "a default string" + + +class MyInput(RunnerInput): + param_int: Annotated[int, Parameter(name="param-input")] = 42 + an_object: Annotated[MyObject, Parameter(name="obj-input")] = MyObject( + a_dict={"my-key": "a-value"}, a_str="hello world!" + ) + artifact_int: Annotated[int, Artifact(name="artifact-input", loader=ArtifactLoader.json)] + + +@script(constructor="runner") +def pydantic_io( + my_input: MyInput, +) -> ...: + ... +``` + +This will create a script template named `pydantic_io`, with input parameters `"param-input"` and `"obj-input"`, but +_not_ `"my_input"` (hence inline script templates will not work, as references to `my_input` will not resolve); the +template will also have the `"artifact-input"` artifact. The yaml generated from the Python will look something like the following: + +```yaml + templates: + - name: pydantic-io + inputs: + parameters: + - name: param-input + default: '42' + - name: obj-input + default: '{"a_dict": {"my-key": "a-value"}, "a_str": "hello world!"}' + artifacts: + - name: artifact-input + path: /tmp/hera-inputs/artifacts/artifact-input + script: + ... +``` + +## Script outputs using `RunnerOutput` + +The `RunnerOutput` class comes with two special variables, `exit_code` and `result`. The `exit_code` is used to exit the +container when running on Argo with the specific exit code - it is set to `0` by default. The `result` is used to print +any serializable object to stdout, which means you can now use `.result` on tasks or steps that use a "runner +constructor" script - you should be mindful of printing/logging anything else to stdout, which will stop the `result` +functionality working as intended. If you want an output parameters/artifacts with the name `exit_code` or `result`, you +can declare another field with an annotation of that name, e.g. +`my_exit_code: Annotated[int, Parameter(name="exit_code")]`. + +Aside from the `exit_code` and `result`, the `RunnerOutput` behaves exactly like the `RunnerInput`: + +```py +from typing import Annotated + +from hera.workflows import Artifact, Parameter, script +from hera.workflows.io import RunnerOutput + + +class MyOutput(RunnerOutput): + param_int: Annotated[int, Parameter(name="param-output")] + artifact_int: Annotated[int, Artifact(name="artifact-output")] + + +@script(constructor="runner") +def pydantic_io() -> MyOutput: + return MyOutput(exit_code=1, result="Test!", param_int=42, artifact_int=my_input.param_int) + +``` + +See the full Pydantic IO example [here](../examples/workflows/experimental/script_pydantic_io.md)! diff --git a/docs/user-guides/scripts.md b/docs/user-guides/scripts.md deleted file mode 100644 index 458474351..000000000 --- a/docs/user-guides/scripts.md +++ /dev/null @@ -1,529 +0,0 @@ -# Scripts - -The `Script` class is an essential part of Hera's extension on top of Argo. As Hera is a Python library, -[Script templates](https://argoproj.github.io/argo-workflows/fields/#scripttemplate) running Python become the standard -template, which is reflected by the greater feature set provided for writing them. - -## Script Decorator - -The `script` decorator function is a key offering of Hera to achieve near-native Python function orchestration. It -allows you to call the function under a Hera context manager such as a `Workflow` or `Steps` context, and it will be -treated as the intended sub-object, which would be a `template` when under a `Workflow`, or a `Step` when under a -`Steps`. The function will still behave as normal outside of any Hera contexts, meaning you can write unit tests on the -given function. - -> **For advanced users**: the exact mechanism of the `script` decorator is to prepare a `Script` object within the -> decorator, so that when your function is invoked under a Hera context, the call is redirected to the `Script.__call__` -> function. This takes the kwargs of a `Step` or `Task` depending on whether the context manager is a `Steps` or a -> `DAG`. Under a Workflow itself, your function is not expected to take arguments, so the call will add the function as -> a template. - -When decorating a function, you should pass `Script` parameters to the `script` decorator. This includes values such as -the `image` to use, and `resources` to request. - -```py -from hera.workflows import Resources, script - -@script(image="python:3.11", resources=Resources(memory_request="5Gi")) -def echo(message: str): - print(message) -``` - -When calling the function under a `Steps` or `DAG` context, you should pass `Step` or `Task` kwargs, such as the `name` -of the `Step`/`Task`, a `when` clause, a `with_param` list to loop over a given template, or `arguments` for the -function. - -```py -with Workflow(generate_name="dag-diamond-", entrypoint="diamond") as w: - with DAG(name="diamond"): - A = echo(name="A", arguments={"message": "A"}) - B = echo(name="B", arguments={"message": "B"}, when=f"{A.result == 'A'}") - C = echo(name="C", arguments={"message": "C"}, when=f"{A.result != 'A'}") - D = echo(name="D", arguments={"message": "D"}) - A >> [B, C] >> D -``` - -> **Note** in the `DAG` above, `D` will still run, even though `C` will be skipped. This is because of the `depends` logic -> resolving to `C.Succeeded || C.Skipped || C.Daemoned` due to Argo's default -> [depends logic](https://argoproj.github.io/argo-workflows/enhanced-depends-logic/#depends). - -## Script Constructors - -### InlineScriptConstructor - -Script templates submitted to Argo typically run the given Python function in a Python image. By default, the Python -function itself is dumped to the YAML, and the Argo cluster will run that code. For the code below, we will see it -directly in the output YAML. - -```py -from hera.workflows import Workflow, script - -@script(add_cwd_to_sys_path=False) -def hello(s: str): - print("Hello, {s}!".format(s=s)) - - -with Workflow( - generate_name="hello-world-", - entrypoint="hello", - arguments={"s": "world"}, -) as w: - hello() -``` - -We added `add_cwd_to_sys_path=False` to remove some boilerplate from the `source` below. You will see Hera adds a -`json.loads` to bridge the YAML input to a Python variable: - -```yaml -apiVersion: argoproj.io/v1alpha1 -kind: Workflow -metadata: - generateName: hello-world- -spec: - arguments: - parameters: - - name: s - value: world - entrypoint: hello - templates: - - inputs: - parameters: - - name: s - name: hello - script: - command: - - python - image: python:3.8 - source: 'import json - - try: s = json.loads(r''''''{{inputs.parameters.s}}'''''') - - except: s = r''''''{{inputs.parameters.s}}'''''' - - - print(''Hello, {s}!''.format(s=s))' -``` - -This method of running the function is handled by the `InlineScriptConstructor`, called such because it constructs the -`Script` template to run the function "inline" in the YAML. - -#### Importing modules - -A caveat of the `InlineScriptConstructor` is that it is quite limited - as the `InlineScriptConstructor` dumps your code -to the `source` field as-is, you must also `import` (within the function itself) any modules you use in the function: - -```py -@script(image="python:3.10") -def my_matcher(string: str): - import re - - print(bool(re.match("test", string))) -``` - -> **Note** This also applies to other functions in your code - you will not be able to call functions defined outside of -> the scope of the script-decorated function! - -If your function uses standard library imports from Python, you will be able to run your function with any standard -Python image, specified by the `image` argument of the decorator. Therefore, if you use non-standard imports, such as -`numpy`, you will need to use an image that includes `numpy`, or build your own (e.g. as a Docker image on DockerHub). - -### RunnerScriptConstructor - -The `RunnerScriptConstructor` is an alternative `ScriptConstructor` that uses the "Hera Runner" (think of this as being -like the PyTest Runner) to run your function on Argo. This avoids dumping the function to the `source` of a template, -keeping the YAML manageable and small, and allows you to arrange your code in natural Python fashion: imports can be -anywhere in the package, the script-decorated function can call other functions in the package, and the function itself -can take Pydantic objects as arguments. The use of the `RunnerScriptConstructor` necessitates building your own image, -as the Hera Runner runs the function by referencing it as an entrypoint of your module. The image used by the script -should be built from the source code package itself and its dependencies, so that the source code's functions, -dependencies, and Hera itself are available to run. - -A function can set its `constructor` to `"runner"` to use the `RunnerScriptConstructor`, or use the -`global_config.set_class_defaults` function to set it once for all script-decorated functions. We can write a script -template function using Pydantic objects such as: - -```py -global_config.set_class_defaults(Script, constructor="runner") - -class Input(BaseModel): - a: int - b: str = "foo" - -class Output(BaseModel): - output: List[Input] - -@script() -def my_function(input: Input) -> Output: - return Output(output=[input]) -``` - -This creates a template in YAML that looks like: - -```yaml -- name: my-function - inputs: - parameters: - - name: input - script: - command: - - python - args: - - -m - - hera.workflows.runner - - -e - - examples.workflows.callable_script:my_function - image: my-image-with-python-source-code-and-dependencies - source: '{{inputs.parameters}}' -``` - -You will notice some pecularities of this template. Firstly, it is running the `hera.workflows.runner` module, rather -than a user-module such as `examples.workflows.callable_script`. Instead, the `-e` arg specifies the `--entrypoint` to -be called by the runner, in this case the `my_function` of the `examples.workflows.callable_script` module. We do not -give a real `image` here, but we assume it exists in this example. Finally, the `source` parameter is passed the -`inputs.parameters` of the template. This is because the Hera Runner relies on a mechanism in Argo where the value -passed to `source` is dumped to a file, and then the filename is passed as the final `arg` to the `command`. Therefore, -the `source` will actually contain a list of parameters as dictionaries, which are dumped to a file which is passed to -`hera.workflows.runner`. Of course, this is all handled for you! - -## Script Annotations - -Annotation syntax is an experimental feature using `typing.Annotated` for `Parameter`s and `Artifact`s to declare inputs -and outputs for functions decorated as `scripts`. They use `Annotated` as the type in the function parameters and allow -us to simplify writing scripts with parameters and artifacts that require additional fields such as a `description` or -alternative `name`. - -This feature must be enabled by setting the `experimental_feature` flag `script_annotations` on the global config. - -```py -global_config.experimental_features["script_annotations"] = True -``` - -### Parameters - -In Hera, we can currently specify inputs inside the `@script` decorator as follows: - -```python -@script( - inputs=[ - Parameter(name="an_int", description="an_int parameter", default=1, enum=[1, 2, 3]), - Parameter(name="a_bool", description="a_bool parameter", default=True, enum=[True, False]), - Parameter(name="a_string", description="a_string parameter", default="a", enum=["a", "b", "c"]) - ] -) -def echo_all(an_int=1, a_bool=True, a_string="a"): - print(an_int) - print(a_bool) - print(a_string) -``` - -Notice how the `name` and `default` values are duplicated for each `Parameter`. Using annotations, we can rewrite this -as: - -```python -@script() -def echo_all( - an_int: Annotated[int, Parameter(description="an_int parameter", default=1, enum=[1, 2, 3])], - a_bool: Annotated[bool, Parameter(description="a_bool parameter", default=True, enum=[True, False])], - a_string: Annotated[str, Parameter(description="a_string parameter", default="a", enum=["a", "b", "c"])] -): - print(an_int) - print(a_bool) - print(a_string) -``` - -The fields allowed in the `Parameter` annotations are: `name`, `default`, `enum`, and `description`. - -### Artifacts - -> Note: `Artifact` annotations are only supported when used with the `RunnerScriptConstructor`. - - -The feature is even more powerful for `Artifact`s. In Hera we are currently able to specify `Artifact`s in `inputs`, but -the given path is not programmatically linked to the code within the function unless defined outside the scope of the -function: - -```python -@script(inputs=Artifact(name="my-artifact", path="/tmp/file")) -def read_artifact(): - with open("/tmp/file") as a_file: # Repeating "/tmp/file" is prone to human error! - print(a_file.read()) - -# or - -MY_PATH = "/tmp/file" # Now accessible outside of the function scope! -@script(inputs=Artifact(name="my-artifact", path=MY_PATH)) -def read_artifact(): - with open(MY_PATH) as a_file: - print(a_file.read()) -``` - -By using annotations we can avoid repeating the `path` of the file, and the function can use the variable directly as a -`Path` object, with its value already set to the given path: - -```python -@script(constructor="runner") -def read_artifact(an_artifact: Annotated[Path, Artifact(name="my-artifact", path="/tmp/file")]): - print(an_artifact.read_text()) -``` - -The fields allowed in the `Artifact` annotations are: `name`, `path`, and `loader`. - -### Artifact Loaders - -In case you want to load an object directly from the `path` of the `Artifact`, we allow two types of loaders besides the -default `Path` behaviour used when no loader is specified. The `ArtifactLoader` enum provides `file` and `json` loaders. - -#### `None` loader -With `None` set as the loader (which is by default) in the Artifact annotation, the `path` attribute of `Artifact` is -extracted and used to provide a `pathlib.Path` object for the given argument, which can be used directly in the function -body. The following example is the same as above except for explicitly setting the loader to `None`: - -```python -@script(constructor="runner") -def read_artifact( - an_artifact: Annotated[Path, Artifact(name="my-artifact", path="/tmp/file", loader=None)] -): - print(an_artifact.read_text()) -``` - -#### `file` loader - -When the loader is set to `file`, the function parameter type should be `str`, and will contain the contents string -representation of the file stored at `path` (essentially performing `path.read_text()` automatically): - -```python -@script(constructor="runner") -def read_artifact( - an_artifact: Annotated[str, Artifact(name="my-artifact", path="/tmp/file", loader=ArtifactLoader.file)] -) -> str: - return an_artifact -``` - -This loads the contents of the file at `"/tmp/file"` to the argument `an_artifact` and subsequently can be used as a -string inside the function. - -#### `json` loader - -When the loader is set to `json`, the contents of the file at `path` are read and parsed to a dictionary via `json.load` -(essentially performing `json.load(path.open())` automatically). By specifying a Pydantic type, this dictionary can even -be automatically parsed to that type: - -```python -class MyArtifact(BaseModel): - a = "a" - b = "b" - - -@script(constructor="runner") -def read_artifact( - an_artifact: Annotated[MyArtifact, Artifact(name="my-artifact", path="/tmp/file", loader=ArtifactLoader.json)] -) -> str: - return an_artifact.a + an_artifact.b -``` - -Here, we have a json representation of `MyArtifact` such as `{"a": "hello ", "b": "world"}` stored at `"/tmp/file"`. We -can load it with `ArtifactLoader.json` and then use `an_artifact` as an instance of `MyArtifact` inside the function, so -the function will return `"hello world"`. - -#### Function parameter name aliasing - -Script annotations can work on top of the `RunnerScriptConstructor` for name aliasing of function -parameters, in particular to allow a public `kebab-case` parameter, while using a `snake_case` -Python function parameter. When using a `RunnerScriptConstructor`, an environment variable -`hera__script_annotations` will be added to the Script template (visible in the exported YAML file). - -### Outputs - -> Note: Output annotations are only supported when used with the `RunnerScriptConstructor`. - -There are two ways to specify output Artifacts and Parameters. - -#### Function return annotations - -Function return annotations can be used to specify the output type information for output Artifacts and Parameters, and -the function should return a value or tuple. An example can be seen -[here](../examples/workflows/experimental/script_annotations_outputs.md). - -For a simple hello world output artifact example we currently have: -```python -@script(outputs=Artifact(name="hello-artifact", path="/tmp/hello_world.txt")) -def hello_world(): - with open("/tmp/hello_world.txt", "w") as f: - f.write("Hello, world!") -``` - -The new approach allows us to avoid duplication of the path, which is now optional, and results in more readable code: -```python -@script() -def hello_world() -> Annotated[str, Artifact(name="hello-artifact")]: - return "Hello, world!" -``` - -For `Parameter`s we have a similar syntax: - -```python -@script() -def hello_world() -> Annotated[str, Parameter(name="hello-param")]: - return "Hello, world!" -``` - -The returned values will be automatically saved in files within the Argo container according to this schema: -* `/hera/outputs/parameters/` -* `/hera/outputs/artifacts/` - -These outputs are also exposed in the `outputs` section of the template in YAML. - -The object returned from the function can be of any serialisable Pydantic type (or basic Python type) and must be -`Annotated` as an `Artifact` or `Parameter`. The `Parameter`/`Artifact`'s `name` will be used for the path of the output unless provided: -* if the annotation is an `Artifact` with a `path`, we use that `path` -* if the annotation is a `Parameter`, with a `value_from` that contains a `path`, we use that `path` - -See the following two functions for specifying custom paths: - -```python -@script() -def hello_world() -> Annotated[str, Artifact(name="hello-artifact", path="/tmp/hello_world_art.txt")]: - return "Hello, world!" - -@script() -def hello_world() -> Annotated[str, Parameter(name="hello-param", value_from={"path": "/tmp/hello_world_param.txt"})]: - return "Hello, world!" -``` - -For multiple outputs, the return type should be a `Tuple` of arbitrary Pydantic types with individual -`Parameter`/`Artifact` annotations, and the function must return a tuple from the function matching these types: -```python -@script() -def func(...) -> Tuple[ - Annotated[arbitrary_pydantic_type_a, Artifact], - Annotated[arbitrary_pydantic_type_b, Parameter], - Annotated[arbitrary_pydantic_type_c, Parameter], - ...]: - return output_a, output_b, output_c -``` - -#### Input-Output function parameters - -Hera also allows output `Parameter`/`Artifact`s as part of the function signature when specified as a `Path` type, -allowing users to write to the path as an output, without needing an explicit return. They require an additional field -`output=True` to distinguish them from the input parameters and must have an underlying `Path` type (or another type -that will write to disk). - -```python -@script() -def func(..., output_param: Annotated[Path, Parameter(output=True, global_name="...", name="")]) -> Annotated[arbitrary_pydantic_type, OutputItem]: - output_param.write_text("...") - return output -``` - -The parent outputs directory, `/hera/outputs` by default, can be set by the user. This is done by adding: - -```python -global_config.set_class_defaults(RunnerScriptConstructor, outputs_directory="user/chosen/outputs") -``` - -## Script Pydantic IO - -Hera provides the `RunnerInput` and `RunnerOutput` classes which can be used to more succinctly write your script -function inputs and outputs, and requires use of the Hera Runner. Use of these classes also requires the -`"script_pydantic_io"` experimental feature flag to be enabled: - -```py -global_config.experimental_features["script_pydantic_io"] = True -``` - -### Pydantic V1 or V2? - -You can import `RunnerInput` and `RunnerOutput` from the `hera.workflows.io` submodule to import the version of Pydantic -that matches your V1 or V2 installation. - -If you need to use V1 models when you have V2 installed, you should import -`RunnerInput` and `RunnerOutput` from the `hera.workflows.io.v1` or `hera.workflows.io.v2` module explicitly. The V2 -models will not be available if you have installed `pydantic<2`, but the V1 models are usable for either version, -allowing you to migrate at your own pace. - -### Script inputs using `RunnerInput` - -For your script inputs, you can create a derived class of `RunnerInput`, and declare all your input parameters (and -artifacts) as fields of the class. If you want to use `Annotated` to declare `Artifacts` add metadata to your -`Parameters`, you will also need to enable the `"script_annotations"` experimental feature flag. - -```py -from typing import Annotated -from pydantic import BaseModel - -from hera.workflows import Artifact, ArtifactLoader, Parameter, script -from hera.workflows.io import RunnerInput - - -class MyObject(BaseModel): - a_dict: dict = {} - a_str: str = "a default string" - - -class MyInput(RunnerInput): - param_int: Annotated[int, Parameter(name="param-input")] = 42 - an_object: Annotated[MyObject, Parameter(name="obj-input")] = MyObject( - a_dict={"my-key": "a-value"}, a_str="hello world!" - ) - artifact_int: Annotated[int, Artifact(name="artifact-input", loader=ArtifactLoader.json)] - - -@script(constructor="runner") -def pydantic_io( - my_input: MyInput, -) -> ...: - ... -``` - -This will create a script template named `pydantic_io`, with input parameters `"param-input"` and `"obj-input"`, but -_not_ `"my_input"` (hence inline script templates will not work, as references to `my_input` will not resolve); the -template will also have the `"artifact-input"` artifact. The yaml generated from the Python will look something like the following: - -```yaml - templates: - - name: pydantic-io - inputs: - parameters: - - name: param-input - default: '42' - - name: obj-input - default: '{"a_dict": {"my-key": "a-value"}, "a_str": "hello world!"}' - artifacts: - - name: artifact-input - path: /tmp/hera-inputs/artifacts/artifact-input - script: - ... -``` - -### Script outputs using `RunnerOutput` - -The `RunnerOutput` class comes with two special variables, `exit_code` and `result`. The `exit_code` is used to exit the -container when running on Argo with the specific exit code - it is set to `0` by default. The `result` is used to print -any serializable object to stdout, which means you can now use `.result` on tasks or steps that use a "runner -constructor" script - you should be mindful of printing/logging anything else to stdout, which will stop the `result` -functionality working as intended. If you want an output parameters/artifacts with the name `exit_code` or `result`, you -can declare another field with an annotation of that name, e.g. -`my_exit_code: Annotated[int, Parameter(name="exit_code")]`. - -Aside from the `exit_code` and `result`, the `RunnerOutput` behaves exactly like the `RunnerInput`: - -```py -from typing import Annotated - -from hera.workflows import Artifact, Parameter, script -from hera.workflows.io import RunnerOutput - - -class MyOutput(RunnerOutput): - param_int: Annotated[int, Parameter(name="param-output")] - artifact_int: Annotated[int, Artifact(name="artifact-output")] - - -@script(constructor="runner") -def pydantic_io() -> MyOutput: - return MyOutput(exit_code=1, result="Test!", param_int=42, artifact_int=my_input.param_int) - -``` - -See the full Pydantic IO example [here](../examples/workflows/experimental/script_pydantic_io.md)! diff --git a/docs/walk-through/advanced-hera-features.md b/docs/walk-through/advanced-hera-features.md index 774fa3a5c..dbdf0791b 100644 --- a/docs/walk-through/advanced-hera-features.md +++ b/docs/walk-through/advanced-hera-features.md @@ -117,7 +117,7 @@ This feature can be enabled by setting the `experimental_feature` flag `script_a global_config.experimental_features["script_annotations"] = True ``` -Read the full guide on script annotations in [the script user guide](../user-guides/scripts.md#script-annotations). +Read the full guide on script annotations in [the script user guide](../user-guides/script-annotations.md). ### Script IO Models @@ -133,7 +133,7 @@ To enable Hera input/output models, you must set the `experimental_feature` flag global_config.experimental_features["script_pydantic_io"] = True ``` -Read the full guide on script pydantic IO in [the script user guide](../user-guides/scripts.md#script-pydantic-io). +Read the full guide on script pydantic IO in [the script user guide](../user-guides/script-runner-io.md). ## Graduated features @@ -150,4 +150,4 @@ Argo. The image used by the script should be built from the source code package source code's functions, dependencies, and Hera itself are available to run. The `RunnerScriptConstructor` is also compatible with Pydantic so supports deserializing inputs to Python objects and serializing outputs to json strings. -Read [the Script Guide](../user-guides/scripts.md#runnerscriptconstructor) to learn more! +Read [the Script Guide](../user-guides/script-basics.md#runnerscriptconstructor) to learn more! diff --git a/docs/walk-through/hello-world.md b/docs/walk-through/hello-world.md index 3131cfe77..c581117d7 100644 --- a/docs/walk-through/hello-world.md +++ b/docs/walk-through/hello-world.md @@ -67,8 +67,8 @@ def echo_twice(message: str): ``` For an in-depth explanation of the mechanics of the script decorator, see the -[script decorator section](../user-guides/scripts.md#script-decorator) in the scripts user guide, and read about -building your own image in the [script constructors section](../user-guides/scripts.md#script-constructors). +[script decorator section](../user-guides/script-basics.md#script-decorator) in the scripts user guide, and read about +building your own image in the [script constructors section](../user-guides/script-basics.md#script-constructors). ## The Workflow Context Manager diff --git a/docs/walk-through/pydantic-support.md b/docs/walk-through/pydantic-support.md index c7b8a4cbf..217dad6a8 100644 --- a/docs/walk-through/pydantic-support.md +++ b/docs/walk-through/pydantic-support.md @@ -14,3 +14,6 @@ validate the function call. Using Pydantic classes in your function parameters u de-serializing features of Pydantic when running on Argo. Your functions can return objects that are serialized, passed to another `Step` as a string argument, and then de-serialized in another function. This flow can be seen in [the callable scripts example](../examples/workflows/scripts/callable_script.md). + +The new experimental Runner IO feature provides a way to specify composite inputs using the class fields, which become the +template's inputs. Read more in the [Script Runner IO guide](../user-guides/scripts-runner-io.md). diff --git a/examples/workflows/experimental/script-pydantic-io.yaml b/examples/workflows/experimental/script-pydantic-io.yaml deleted file mode 100644 index 55256a985..000000000 --- a/examples/workflows/experimental/script-pydantic-io.yaml +++ /dev/null @@ -1,41 +0,0 @@ -apiVersion: argoproj.io/v1alpha1 -kind: Workflow -metadata: - generateName: pydantic-io- -spec: - templates: - - inputs: - artifacts: - - name: artifact-input - path: /tmp/hera-inputs/artifacts/artifact-input - parameters: - - default: '42' - name: param-input - - default: '{"a_dict": {"my-key": "a-value"}, "a_str": "hello world!"}' - name: obj-input - name: pydantic-io - outputs: - artifacts: - - name: artifact-output - path: /tmp/hera-outputs/artifacts/artifact-output - parameters: - - name: param-output - valueFrom: - path: /tmp/hera-outputs/parameters/param-output - script: - args: - - -m - - hera.workflows.runner - - -e - - examples.workflows.experimental.script_pydantic_io:pydantic_io - command: - - python - env: - - name: hera__script_annotations - value: '' - - name: hera__outputs_directory - value: /tmp/hera-outputs - - name: hera__script_pydantic_io - value: '' - image: python:3.8 - source: '{{inputs.parameters}}' diff --git a/examples/workflows/experimental/script-runner-io.yaml b/examples/workflows/experimental/script-runner-io.yaml new file mode 100644 index 000000000..3ee5a5c08 --- /dev/null +++ b/examples/workflows/experimental/script-runner-io.yaml @@ -0,0 +1,81 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: pydantic-io- +spec: + templates: + - name: use-pydantic-io + steps: + - - name: writer + template: writer + - - arguments: + artifacts: + - from: '{{steps.writer.outputs.artifacts.int-artifact}}' + name: artifact-input + parameters: + - name: param_int + value: '101' + - name: an_object + value: '{"a_dict": {"my-new-key": "my-new-value"}, "a_str": "a default + string"}' + name: pydantic-io + template: pydantic-io + - name: writer + outputs: + artifacts: + - archive: + none: {} + name: int-artifact + path: /tmp/hera-outputs/artifacts/int-artifact + script: + args: + - -m + - hera.workflows.runner + - -e + - examples.workflows.experimental.script_runner_io:writer + command: + - python + env: + - name: hera__script_annotations + value: '' + - name: hera__outputs_directory + value: /tmp/hera-outputs + - name: hera__script_pydantic_io + value: '' + image: python-image-built-with-my-package + source: '{{inputs.parameters}}' + - inputs: + artifacts: + - name: artifact-input + path: /tmp/hera-inputs/artifacts/artifact-input + parameters: + - default: '42' + name: param-input + - default: '{"a_dict": {"my-key": "a-value"}, "a_str": "hello world!"}' + name: obj-input + name: pydantic-io + outputs: + artifacts: + - name: artifact-output + path: /tmp/hera-outputs/artifacts/artifact-output + parameters: + - name: param-output + valueFrom: + path: /tmp/hera-outputs/parameters/param-output + script: + args: + - -m + - hera.workflows.runner + - -e + - examples.workflows.experimental.script_runner_io:pydantic_io + command: + - python + env: + - name: hera__script_annotations + value: '' + - name: hera__outputs_directory + value: /tmp/hera-outputs + - name: hera__script_pydantic_io + value: '' + image: python-image-built-with-my-package + source: '{{inputs.parameters}}' diff --git a/examples/workflows/experimental/script_pydantic_io.py b/examples/workflows/experimental/script_runner_io.py similarity index 61% rename from examples/workflows/experimental/script_pydantic_io.py rename to examples/workflows/experimental/script_runner_io.py index 418795554..1afc0486f 100644 --- a/examples/workflows/experimental/script_pydantic_io.py +++ b/examples/workflows/experimental/script_runner_io.py @@ -4,7 +4,8 @@ from pydantic import BaseModel from hera.shared import global_config -from hera.workflows import Artifact, ArtifactLoader, Parameter, Workflow, script +from hera.workflows import Artifact, ArtifactLoader, Parameter, Steps, Workflow, script +from hera.workflows.archive import NoneArchiveStrategy from hera.workflows.io import RunnerInput, RunnerOutput try: @@ -17,7 +18,7 @@ class MyObject(BaseModel): - a_dict: dict = {} + a_dict: dict # not giving a default makes the field a required input for the template a_str: str = "a default string" @@ -34,7 +35,12 @@ class MyOutput(RunnerOutput): artifact_int: Annotated[int, Artifact(name="artifact-output")] -@script(constructor="runner") +@script(constructor="runner", image="python-image-built-with-my-package") +def writer() -> Annotated[int, Artifact(name="int-artifact", archive=NoneArchiveStrategy())]: + return 100 + + +@script(constructor="runner", image="python-image-built-with-my-package") def pydantic_io( my_input: MyInput, ) -> MyOutput: @@ -42,4 +48,14 @@ def pydantic_io( with Workflow(generate_name="pydantic-io-") as w: - pydantic_io() + with Steps(name="use-pydantic-io"): + write_step = writer() + pydantic_io( + arguments=[ + write_step.get_artifact("int-artifact").with_name("artifact-input"), + { + "param_int": 101, + "an_object": MyObject(a_dict={"my-new-key": "my-new-value"}), + }, + ] + ) diff --git a/examples/workflows/template-level-volume.yaml b/examples/workflows/template-level-volume.yaml index 5032067bb..c0d87e5cc 100644 --- a/examples/workflows/template-level-volume.yaml +++ b/examples/workflows/template-level-volume.yaml @@ -5,35 +5,17 @@ metadata: spec: entrypoint: generate-and-use-volume templates: - - name: generate-and-use-volume - steps: - - - name: generate-volume - template: generate-volume - arguments: - parameters: - - name: pvc-size - # In a real-world example, this could be generated by a previous workflow step. - value: '1Gi' - - - name: generate - template: whalesay - arguments: - parameters: - - name: pvc-name - value: '{{steps.generate-volume.outputs.parameters.pvc-name}}' - - - name: print - template: print-message - arguments: - parameters: - - name: pvc-name - value: '{{steps.generate-volume.outputs.parameters.pvc-name}}' - - - name: generate-volume - inputs: + - inputs: parameters: - - name: pvc-size + - name: pvc-size + name: generate-volume + outputs: + parameters: + - name: pvc-name + valueFrom: + jsonPath: '{.metadata.name}' resource: action: create - setOwnerReference: true manifest: | apiVersion: v1 kind: PersistentVolumeClaim @@ -44,40 +26,60 @@ spec: resources: requests: storage: '{{inputs.parameters.pvc-size}}' - outputs: - parameters: - - name: pvc-name - valueFrom: - jsonPath: '{.metadata.name}' - - - name: whalesay - inputs: - parameters: - - name: pvc-name - volumes: - - name: workdir - persistentVolumeClaim: - claimName: '{{inputs.parameters.pvc-name}}' - container: + setOwnerReference: true + - container: + args: + - echo generating message in volume; cowsay hello world | tee /mnt/vol/hello_world.txt + command: + - sh + - -c image: docker/whalesay:latest - command: [sh, -c] - args: ["echo generating message in volume; cowsay hello world | tee /mnt/vol/hello_world.txt"] volumeMounts: - - name: workdir - mountPath: /mnt/vol - - - name: print-message + - mountPath: /mnt/vol + name: workdir inputs: - parameters: - - name: pvc-name + parameters: + - name: pvc-name + name: whalesay volumes: - - name: workdir - persistentVolumeClaim: - claimName: '{{inputs.parameters.pvc-name}}' - container: + - name: workdir + persistentVolumeClaim: + claimName: '{{inputs.parameters.pvc-name}}' + - container: + args: + - echo getting message from volume; find /mnt/vol; cat /mnt/vol/hello_world.txt + command: + - sh + - -c image: alpine:latest - command: [sh, -c] - args: ["echo getting message from volume; find /mnt/vol; cat /mnt/vol/hello_world.txt"] volumeMounts: - - name: workdir - mountPath: /mnt/vol + - mountPath: /mnt/vol + name: workdir + inputs: + parameters: + - name: pvc-name + name: print-message + volumes: + - name: workdir + persistentVolumeClaim: + claimName: '{{inputs.parameters.pvc-name}}' + - name: generate-and-use-volume + steps: + - - arguments: + parameters: + - name: pvc-size + value: 1Gi + name: generate-volume + template: generate-volume + - - arguments: + parameters: + - name: pvc-name + value: '{{steps.generate-volume.outputs.parameters.pvc-name}}' + name: generate + template: whalesay + - - arguments: + parameters: + - name: pvc-name + value: '{{steps.generate-volume.outputs.parameters.pvc-name}}' + name: print + template: print-message diff --git a/examples/workflows/upstream/title-and-descriptin-with-markdown.upstream.yaml b/examples/workflows/upstream/title-and-description-with-markdown.upstream.yaml similarity index 58% rename from examples/workflows/upstream/title-and-descriptin-with-markdown.upstream.yaml rename to examples/workflows/upstream/title-and-description-with-markdown.upstream.yaml index ff25ccdf3..7750e5791 100644 --- a/examples/workflows/upstream/title-and-descriptin-with-markdown.upstream.yaml +++ b/examples/workflows/upstream/title-and-description-with-markdown.upstream.yaml @@ -5,10 +5,12 @@ metadata: labels: workflows.argoproj.io/archive-strategy: "false" annotations: - workflows.argoproj.io/title: "**Test Title**" + # both annotations are available since v3.4.4 + # embedded markdown is available since v3.6 + workflows.argoproj.io/title: "**Test Title**" # defaults to `metadata.name` if not specified workflows.argoproj.io/description: | `This is a simple hello world example.` - You can also run it in Python: https://couler-proj.github.io/couler/examples/#hello-world + This is an embedded link to the docs: https://argo-workflows.readthedocs.io/en/latest/title-and-description/ spec: entrypoint: whalesay templates: diff --git a/examples/workflows/upstream/withsequence-nested-result.upstream.yaml b/examples/workflows/upstream/withsequence-nested-result.upstream.yaml new file mode 100644 index 000000000..1a6a2f3b4 --- /dev/null +++ b/examples/workflows/upstream/withsequence-nested-result.upstream.yaml @@ -0,0 +1,43 @@ +# This example shows how to nest withSequence loops in a Workflow. +# A is the first step. A's output determines how many times B is executed. +# B's output then determines how many times C is executed. +# A +# / \ +# B1 B2 +# / | \ / | +# C1 C2 C3 C4 C5 +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: withsequence-nested-result- +spec: + entrypoint: hello-entrypoint + templates: + - name: hello-entrypoint + steps: + - - name: hello-a + template: hello + - - name: hello-b + template: hello-hello + withSequence: + start: "1" + end: "{{steps.hello-a.outputs.result}}" + + - name: hello-hello + steps: + - - name: hello-b + template: hello + - - name: hello-c + template: hello + withSequence: + start: "1" + end: "{{steps.hello-b.outputs.result}}" + + - name: hello + script: + image: python:alpine3.6 + command: [python] + source: | + import random + result = random.randint(0,5) + print(result) diff --git a/mkdocs.yml b/mkdocs.yml index c2d400563..164012dc9 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -27,7 +27,10 @@ nav: - History of Hera: contributing/history.md - User Guides: - Core Concepts: user-guides/core-concepts.md - - Scripts: user-guides/scripts.md + - Scripts: + - Script Basics: user-guides/script-basics.md + - Script Annotations: user-guides/script-annotations.md + - Script Runner IO: user-guides/script-runner-io.md - Expr Transpiler: user-guides/expr.md - Examples: - About: examples/workflows-examples.md