-
Notifications
You must be signed in to change notification settings - Fork 915
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add benchmark dependencies Signed-off-by: Nok <[email protected]> * add structure Signed-off-by: Nok Lam Chan <[email protected]> * add benchmark Signed-off-by: Nok Lam Chan <[email protected]> * tmp commit Signed-off-by: Nok Lam Chan <[email protected]> * force benchmarks to be a package Signed-off-by: Nok Lam Chan <[email protected]> * update config Signed-off-by: Nok Lam Chan <[email protected]> * rename folder Signed-off-by: Nok Lam Chan <[email protected]> * fix asv config Signed-off-by: Nok Lam Chan <[email protected]> * rename Signed-off-by: Nok Lam Chan <[email protected]> * update format Signed-off-by: Nok Lam Chan <[email protected]> * typo Signed-off-by: Nok Lam Chan <[email protected]> * update Signed-off-by: Nok Lam Chan <[email protected]> * incorrect config Signed-off-by: Nok Lam Chan <[email protected]> * update Signed-off-by: Nok Lam Chan <[email protected]> * update Signed-off-by: Nok Lam Chan <[email protected]> * back to kedro_benchmarks Signed-off-by: Nok Lam Chan <[email protected]> * rename benchmark file Signed-off-by: Nok Lam Chan <[email protected]> * clean up Signed-off-by: Nok Lam Chan <[email protected]> * update asv config Signed-off-by: Nok Lam Chan <[email protected]> * update config Signed-off-by: Nok Lam Chan <[email protected]> * update config Signed-off-by: Nok Lam Chan <[email protected]> * fix memory test Signed-off-by: Nok Lam Chan <[email protected]> * remove memory tracking since it's not meaningful Signed-off-by: Nok Lam Chan <[email protected]> * test Signed-off-by: Nok Lam Chan <[email protected]> * commit benchmark module Signed-off-by: Nok Lam Chan <[email protected]> * ADD README Signed-off-by: Nok Lam Chan <[email protected]> * rename kedro_benchmarks Signed-off-by: Nok Lam Chan <[email protected]> * update asv config Signed-off-by: Nok Lam Chan <[email protected]> * lint Signed-off-by: Nok Lam Chan <[email protected]> * test matrix of runner Signed-off-by: Nok Lam Chan <[email protected]> --------- Signed-off-by: Nok <[email protected]> Signed-off-by: Nok Lam Chan <[email protected]>
- Loading branch information
Showing
9 changed files
with
194 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
This is the benchmark repository of Kedro, which is mainly used internally: | ||
|
||
# Installation | ||
`pip install asv` | ||
|
||
|
||
# Run the benchmark | ||
Run this in the terminal: | ||
`asv run` | ||
|
||
You can also run the benchmark for specific commits or a range of commits, for details | ||
checkout the [official documentation](https://asv.readthedocs.io/en/stable/using.html#benchmarking) | ||
|
||
For example, `asv run main..mybranch` will run benchmark against every single commits since branching off from | ||
`main`. | ||
|
||
## Compare benchmark for two commits: | ||
Run this in the terminal: | ||
`asv compare v0.1 v0.2` | ||
|
||
This run benchmark against two different commits |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
# Write the benchmarking functions here. | ||
# See "Writing benchmarks" in the asv docs for more information. | ||
|
||
import importlib | ||
import time | ||
from pathlib import Path | ||
|
||
import yaml | ||
|
||
from kedro.io.data_catalog import DataCatalog | ||
from kedro.pipeline import node | ||
from kedro.pipeline.modular_pipeline import pipeline | ||
|
||
|
||
# Simulate an I/O-bound task | ||
def io_bound_task(input_data): | ||
time.sleep(2) # Simulate an I/O wait (e.g., reading from a file) | ||
output = input_data | ||
return output | ||
|
||
|
||
# Simulate a compute-bound task (matrix multiplication) | ||
def compute_bound_task(input_data) -> str: | ||
# Simulate heavy compute that are not using multicore (not pandas/numpy etc) | ||
ans = 1 | ||
for i in range(1, 50000): | ||
ans = ans * i | ||
return "dummy" | ||
|
||
|
||
def create_data_catalog(): | ||
""" | ||
Use dataset factory pattern to make sure the benchmark cover the slowest path. | ||
""" | ||
catalog_conf = """ | ||
'output_{pattern}': | ||
type: pandas.CSVDataset | ||
filepath: benchmarks/data/'{pattern}.csv' | ||
'numpy_{pattern}': | ||
type: pickle.PickleDataset | ||
filepath: benchmarks/data/'{pattern}.pkl' | ||
'{catch_all_dataset_pattern}': | ||
type: pandas.CSVDataset | ||
filepath: benchmarks/data/data.csv | ||
""" | ||
catalog_conf = yaml.safe_load(catalog_conf) | ||
catalog = DataCatalog.from_config(catalog_conf) | ||
return catalog | ||
|
||
|
||
def create_io_bound_node(inputs=None, outputs=None, name=None): | ||
io_node = node(io_bound_task, inputs=inputs, outputs=outputs, name=name) | ||
return io_node | ||
|
||
|
||
def create_io_bound_pipeline(): | ||
dummy_pipeline = pipeline( | ||
[ | ||
create_io_bound_node("dummy_1", "output_1"), | ||
create_io_bound_node("dummy_2", "output_2"), | ||
create_io_bound_node("dummy_3", "output_3"), | ||
create_io_bound_node("dummy_4", "output_4"), | ||
create_io_bound_node("dummy_5", "output_5"), | ||
create_io_bound_node("dummy_6", "output_6"), | ||
create_io_bound_node("dummy_7", "output_7"), | ||
create_io_bound_node("dummy_1", "output_8"), | ||
create_io_bound_node("dummy_1", "output_9"), | ||
create_io_bound_node("dummy_1", "output_10"), | ||
] | ||
) | ||
return dummy_pipeline | ||
|
||
|
||
def create_compute_bound_node(inputs=None, outputs=None, name=None): | ||
io_node = node(compute_bound_task, inputs=inputs, outputs=outputs, name=name) | ||
return io_node | ||
|
||
|
||
def create_compute_bound_pipeline(): | ||
dummy_pipeline = pipeline( | ||
[ | ||
create_compute_bound_node("dummy_1", "numpy_1"), | ||
create_compute_bound_node("dummy_2", "numpy_2"), | ||
create_compute_bound_node("dummy_3", "numpy_3"), | ||
create_compute_bound_node("dummy_4", "numpy_4"), | ||
create_compute_bound_node("dummy_5", "numpy_5"), | ||
create_compute_bound_node("dummy_6", "numpy_6"), | ||
create_compute_bound_node("dummy_7", "numpy_7"), | ||
create_compute_bound_node("dummy_1", "numpy_8"), | ||
create_compute_bound_node("dummy_1", "numpy_9"), | ||
create_compute_bound_node("dummy_1", "numpy_10"), | ||
] | ||
) | ||
return dummy_pipeline | ||
|
||
|
||
class RunnerMemorySuite: | ||
params = ( | ||
"SequentialRunner", | ||
"ThreadRunner", | ||
"ParallelRunner", | ||
) | ||
param_names = ("runner",) | ||
|
||
def setup(self, *args, **kwargs): | ||
data_dir = Path("benchmarks/data") | ||
data_dir.mkdir(exist_ok=True, parents=True) | ||
|
||
# Create a dummy csv | ||
with open(data_dir / "data.csv", "w") as f: | ||
f.write("col1,col2\n1,2\n") | ||
|
||
def mem_runners(self, runner): | ||
catalog = create_data_catalog() | ||
test_pipeline = create_compute_bound_pipeline() | ||
runner_module = importlib.import_module("kedro.runner") | ||
runner_obj = getattr(runner_module, runner)() | ||
runner_obj.run(test_pipeline, catalog=catalog) | ||
|
||
def peakmem_runners(self, runner): | ||
catalog = create_data_catalog() | ||
test_pipeline = create_compute_bound_pipeline() | ||
runner_module = importlib.import_module("kedro.runner") | ||
runner_obj = getattr(runner_module, runner)() | ||
runner_obj.run(test_pipeline, catalog=catalog) | ||
|
||
|
||
class RunnerTimeSuite: | ||
params = ( | ||
"SequentialRunner", | ||
"ThreadRunner", | ||
"ParallelRunner", | ||
) | ||
param_names = ("runner",) | ||
|
||
def setup(self, *args, **kwargs): | ||
data_dir = Path("benchmarks/data") | ||
data_dir.mkdir(exist_ok=True, parents=True) | ||
|
||
# Create a dummy csv | ||
with open(data_dir / "data.csv", "w") as f: | ||
f.write("col1,col2\n1,2\n") | ||
|
||
def time_compute_bound_runner(self, runner): | ||
catalog = create_data_catalog() | ||
test_pipeline = create_compute_bound_pipeline() | ||
runner_module = importlib.import_module("kedro.runner") | ||
runner_obj = getattr(runner_module, runner)() | ||
runner_obj.run(test_pipeline, catalog=catalog) | ||
|
||
def time_io_bound_runner(self, runner): | ||
"""IO bound pipeline""" | ||
catalog = create_data_catalog() | ||
test_pipeline = create_io_bound_pipeline() | ||
runner_module = importlib.import_module("kedro.runner") | ||
runner_obj = getattr(runner_module, runner)() | ||
runner_obj.run(test_pipeline, catalog=catalog) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters