From 462e3e605130d135fe65d0570e1a0ae7612ed67f Mon Sep 17 00:00:00 2001
From: Umut <umutsahin@protonmail.com>
Date: Fri, 12 Jul 2024 12:46:34 +0300
Subject: [PATCH] feat(frontend): benchmark infrastructure

---
 .../workflows/concrete_python_benchmark.yml   | 125 ++++++++++++
 ci/slab.toml                                  |   7 +
 frontends/concrete-python/.gitignore          |   4 +
 frontends/concrete-python/.ruff.toml          |   2 +
 frontends/concrete-python/Makefile            |  30 ++-
 .../concrete-python/benchmarks/primitive.py   | 134 +++++++++++++
 .../concrete-python/requirements.dev.txt      |   2 +
 .../scripts/benchmark/postprocessor.py        | 186 ++++++++++++++++++
 8 files changed, 486 insertions(+), 4 deletions(-)
 create mode 100644 .github/workflows/concrete_python_benchmark.yml
 create mode 100644 frontends/concrete-python/benchmarks/primitive.py
 create mode 100644 frontends/concrete-python/scripts/benchmark/postprocessor.py

diff --git a/.github/workflows/concrete_python_benchmark.yml b/.github/workflows/concrete_python_benchmark.yml
new file mode 100644
index 0000000000..6a0cfd41c4
--- /dev/null
+++ b/.github/workflows/concrete_python_benchmark.yml
@@ -0,0 +1,125 @@
+name: Concrete Python Benchmark
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 1 * * SAT"
+
+env:
+  DOCKER_IMAGE: ghcr.io/zama-ai/concrete-compiler
+  GLIB_VER: 2_28
+
+jobs:
+  setup-instance:
+    name: Setup Instance
+    runs-on: ubuntu-latest
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+    steps:
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@9e939a10db25c698cddf0da0f4f015bd47bb6838
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          backend: aws
+          profile: m7i-cpu-bench
+
+  concrete-python-benchmarks:
+    name: Run Concrete Python Benchmarks
+    needs: setup-instance
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        with:
+          submodules: recursive
+          fetch-depth: 0
+
+      - name: Benchmark
+        uses: addnab/docker-run-action@4f65fabd2431ebc8d299f8e5a018d79a769ae185 # v3
+        id: build-compiler-bindings
+        with:
+          registry: ghcr.io
+          image: ${{ env.DOCKER_IMAGE }}
+          username: ${{ secrets.GHCR_LOGIN }}
+          password: ${{ secrets.GHCR_PASSWORD }}
+          options: >-
+            -v ${{ github.workspace }}:/concrete
+            -v ${{ github.workspace }}/build:/build
+            -v ${{ env.SSH_AUTH_SOCK }}:/ssh.socket
+            -e SSH_AUTH_SOCK=/ssh.socket
+            ${{ env.DOCKER_GPU_OPTION }}
+          shell: bash
+          run: |
+            set -e
+
+            rustup toolchain install nightly-2024-07-01
+            rm -rf /build/*
+
+            export PYTHON=${{ format('python{0}', matrix.python-version) }}
+            echo "Using $PYTHON"
+
+            dnf -y install graphviz graphviz-devel
+
+            cd /concrete/frontends/concrete-python
+            make PYTHON=$PYTHON venv
+            source .venv/bin/activate
+
+            cd /concrete/compilers/concrete-compiler/compiler
+            make BUILD_DIR=/build CCACHE=ON DATAFLOW_EXECUTION_ENABLED=ON Python3_EXECUTABLE=$(which python) python-bindings
+
+            echo "Debug: ccache statistics (after the build):"
+            ccache -s
+
+            cd /concrete/frontends/concrete-python
+
+            export COMPILER_BUILD_DIRECTORY="/build"
+            export PROGRESS_MACHINE_NAME="m7i.48xlarge"
+            
+            make benchmark
+            make process-benchmark-results-for-grafana
+
+            deactivate
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh frontends/concrete-python/progress.processed.json '${{ secrets.JOB_SECRET }}')"
+          
+          cd frontends/concrete-python
+          
+          echo "Sending results to Slab..."
+          curl -v -k \
+            -H "Content-Type: application/json" \
+            -H "X-Slab-Repository: ${{ github.repository }}" \
+            -H "X-Slab-Command: store_data_v2" \
+            -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+            -d @progress.processed.json \
+            ${{ secrets.SLAB_URL }}
+
+  teardown-instance:
+    name: Teardown Instance
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    needs: [ setup-instance, concrete-python-benchmarks ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@9e939a10db25c698cddf0da0f4f015bd47bb6838
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}
diff --git a/ci/slab.toml b/ci/slab.toml
index 1492c21857..01e4372744 100644
--- a/ci/slab.toml
+++ b/ci/slab.toml
@@ -1,3 +1,10 @@
+# This is the new version of Slab that handles multi backend providers.
+[aws.backend.m7i-cpu-bench]
+region = "eu-west-1"
+image_id = "ami-002bdcd64b8472cf9" # Based on Ubuntu 22.4
+instance_type = "m7i.48xlarge"
+security_group = ["sg-0e55cc31dfda0d8a7", ]
+
 [profile.m7i-cpu-bench]
 region = "eu-west-1"
 image_id = "ami-002bdcd64b8472cf9" # Based on Ubuntu 22.4
diff --git a/frontends/concrete-python/.gitignore b/frontends/concrete-python/.gitignore
index 19a65caccd..1813eb4064 100644
--- a/frontends/concrete-python/.gitignore
+++ b/frontends/concrete-python/.gitignore
@@ -141,3 +141,7 @@ dmypy.json
 
 # fhe keys
 .keys
+
+# progress tracker
+progress.json
+progress.processed.json
diff --git a/frontends/concrete-python/.ruff.toml b/frontends/concrete-python/.ruff.toml
index 2b95aca548..b520e6e7dc 100644
--- a/frontends/concrete-python/.ruff.toml
+++ b/frontends/concrete-python/.ruff.toml
@@ -20,3 +20,5 @@ ignore = [
 "concrete/fhe/mlir/converter.py" = ["ARG002", "B011", "F403", "F405"]
 "examples/**" = ["PLR2004"]
 "tests/**" = ["PLR2004", "PLW0603", "SIM300", "S311"]
+"benchmarks/**" = ["S311", "B023"]
+"scripts/**" = ["DTZ005"]
diff --git a/frontends/concrete-python/Makefile b/frontends/concrete-python/Makefile
index 6e311e14a9..c82480deec 100644
--- a/frontends/concrete-python/Makefile
+++ b/frontends/concrete-python/Makefile
@@ -94,6 +94,25 @@ pytest-gpu:
 		--key-cache "${KEY_CACHE_DIRECTORY}" \
 		-m "${PYTEST_MARKERS}"
 
+benchmark:
+	export LD_PRELOAD=$(RUNTIME_LIBRARY)
+	export PYTHONPATH=$(NEW_PYTHON_PATH)
+
+	export PROGRESS_SAMPLES=3
+	export PROGRESS_OUTPUT_INDENT=2
+
+	rm -rf progress.json
+	find ./benchmarks/ -name "*.py" | xargs python
+
+process-benchmark-results-for-grafana:
+	export LD_PRELOAD=$(RUNTIME_LIBRARY)
+	export PYTHONPATH=$(NEW_PYTHON_PATH)
+
+	python scripts/benchmark/postprocessor.py \
+		--source progress.json \
+		--target progress.processed.json \
+		--path_to_repository ../..
+
 # ==========
 # Formatting
 # ==========
@@ -103,7 +122,8 @@ format:
 		--dir concrete \
 		--dir examples \
 		--dir scripts \
-		--dir tests
+		--dir tests \
+		--dir benchmarks
 
 sanitize-notebooks:
 	$(PYTHON) scripts/notebook/sanitizer.py docs
@@ -119,13 +139,14 @@ check-format:
 		--dir concrete \
 		--dir examples \
 		--dir scripts \
-		--dir tests
+		--dir tests \
+        --dir benchmarks
 
 check-sanitize-notebooks:
 	$(PYTHON) scripts/notebook/sanitizer.py docs --check
 
 mypy:
-	mypy concrete examples scripts tests --ignore-missing-imports
+	mypy concrete examples scripts tests benchmarks --ignore-missing-imports
 
 pydocstyle:
 	pydocstyle concrete --convention google --add-ignore=D1,D200,D202,D212,D402 --add-select=D401
@@ -135,9 +156,10 @@ pylint:
 	pylint --rcfile=.pylintrc examples --disable=C0103,C0114,C0115,C0116,E0401,R1721
 	pylint --rcfile=.pylintrc scripts
 	pylint --rcfile=.pylintrc tests --disable=C0301,W0108
+	pylint --rcfile=.pylintrc benchmarks
 
 ruff:
-	ruff concrete/ examples/ scripts/ tests/
+	ruff concrete/ examples/ scripts/ tests/ benchmarks/
 
 pcc: check-format check-sanitize-notebooks mypy pydocstyle pylint ruff
 
diff --git a/frontends/concrete-python/benchmarks/primitive.py b/frontends/concrete-python/benchmarks/primitive.py
new file mode 100644
index 0000000000..c5dfec54e7
--- /dev/null
+++ b/frontends/concrete-python/benchmarks/primitive.py
@@ -0,0 +1,134 @@
+"""
+Benchmark primitive operations.
+"""
+
+# pylint: disable=import-error,cell-var-from-loop,redefined-outer-name
+
+import random
+
+import py_progress_tracker as progress
+
+from concrete import fhe
+
+targets = []
+configuration = fhe.Configuration()
+
+# Table Lookup
+for bit_width in range(2, 8 + 1):
+    targets.append(
+        {
+            "id": f"table-lookup :: tlu[eint{bit_width}]",
+            "name": f"{bit_width}-bit table lookup",
+            "parameters": {
+                "function": lambda x: x // 2,
+                "encryption": {"x": "encrypted"},
+                "inputset": fhe.inputset(lambda _: random.randint(0, (2**bit_width) - 1)),
+                "configuration": configuration,
+            },
+        }
+    )
+
+# Encrypted Multiplication
+for bit_width in range(2, 8 + 1):
+    targets.append(
+        {
+            "id": f"encrypted-multiplication :: eint{bit_width} * eint{bit_width}",
+            "name": f"{bit_width}-bit encrypted multiplication",
+            "parameters": {
+                "function": lambda x, y: x * y,
+                "encryption": {"x": "encrypted", "y": "encrypted"},
+                "inputset": fhe.inputset(
+                    lambda _: random.randint(0, (2**bit_width) - 1),
+                    lambda _: random.randint(0, (2**bit_width) - 1),
+                ),
+                "configuration": configuration,
+            },
+        }
+    )
+
+
+@progress.track(targets)
+def main(function, encryption, inputset, configuration):
+    """
+    Benchmark a target.
+
+    Args:
+        function:
+            function to benchmark
+
+        encryption:
+            encryption status of the arguments of the function
+
+        inputset:
+            inputset to use for compiling the function
+
+        configuration:
+            configuration to use for compilation
+    """
+
+    compiler = fhe.Compiler(function, encryption)
+
+    print("Compiling...")
+    with progress.measure(id="compilation-time-ms", label="Compilation Time (ms)"):
+        circuit = compiler.compile(inputset, configuration)
+
+    progress.measure(
+        id="complexity",
+        label="Complexity",
+        value=circuit.complexity,
+    )
+
+    print("Generating keys...")
+    with progress.measure(id="key-generation-time-ms", label="Key Generation Time (ms)"):
+        circuit.keygen(force=True)
+
+    progress.measure(
+        id="evaluation-key-size-mb",
+        label="Evaluation Key Size (MB)",
+        value=(len(circuit.keys.evaluation.serialize()) / (1024 * 1024)),
+    )
+
+    # pylint: disable=unused-variable
+
+    print("Warming up...")
+    sample = random.choice(inputset)
+    encrypted = circuit.encrypt(*sample)
+    ran = circuit.run(encrypted)
+    decrypted = circuit.decrypt(ran)  # noqa: F841
+
+    # pylint: enable=unused-variable
+
+    def calculate_input_output_size(input_output):
+        if isinstance(input_output, tuple):
+            result = sum(len(value.serialize()) for value in input_output)
+        else:
+            result = len(input_output.serialize())
+        return result / (1024 * 1024)
+
+    progress.measure(
+        id="input-ciphertext-size-mb",
+        label="Input Ciphertext Size (MB)",
+        value=calculate_input_output_size(encrypted),
+    )
+    progress.measure(
+        id="output-ciphertext-size-mb",
+        label="Output Ciphertext Size (MB)",
+        value=calculate_input_output_size(ran),
+    )
+
+    for i in range(10):
+        print(f"Running subsample {i + 1} out of 10...")
+
+        sample = random.choice(inputset)
+        with progress.measure(id="encryption-time-ms", label="Encryption Time (ms)"):
+            encrypted = circuit.encrypt(*sample)
+        with progress.measure(id="evaluation-time-ms", label="Evaluation Time (ms)"):
+            ran = circuit.run(encrypted)
+        with progress.measure(id="decryption-time-ms", label="Decryption Time (ms)"):
+            output = circuit.decrypt(ran)
+
+        progress.measure(
+            id="accuracy",
+            label="Accuracy",
+            value=int(output == function(*sample)),
+        )
diff --git a/frontends/concrete-python/requirements.dev.txt b/frontends/concrete-python/requirements.dev.txt
index bcf5a297ae..9b92bce314 100644
--- a/frontends/concrete-python/requirements.dev.txt
+++ b/frontends/concrete-python/requirements.dev.txt
@@ -16,3 +16,5 @@ ruff==0.0.259
 auditwheel==5.3.0; sys_platform == 'linux'
 delocate==0.10.4; sys_platform == 'darwin'
 wheel==0.40.0
+
+py-progress-tracker==0.7.0
diff --git a/frontends/concrete-python/scripts/benchmark/postprocessor.py b/frontends/concrete-python/scripts/benchmark/postprocessor.py
new file mode 100644
index 0000000000..0650c9bab0
--- /dev/null
+++ b/frontends/concrete-python/scripts/benchmark/postprocessor.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python
+"""Used to convert output format from python-progress-tracker to new postgres DB format"""
+
+# pylint: disable=import-error
+
+import argparse
+import json
+import math
+import subprocess
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Any, List, Optional, Tuple, Union
+
+from concrete import fhe
+
+# pylint: disable=import-error
+
+
+def is_git_diff(path: Union[None, Path, str]) -> bool:
+    """Check if there is a diff in a repository."""
+    path = path if path is not None else "."
+    completed_process = subprocess.run(
+        ["git", "diff", "HEAD"], capture_output=True, cwd=path, check=True
+    )
+    if completed_process.stderr:
+        message = f"Check git diff raised an error:\n {completed_process.stderr.decode()}"
+        raise ValueError(message)
+    return bool(completed_process.stdout)
+
+
+def get_git_branch(path: Union[None, Path, str]) -> str:
+    """Get git branch of repository."""
+    path = path if path is not None else "."
+    completed_process = subprocess.run(
+        ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, cwd=path, check=True
+    )
+    if completed_process.stderr:
+        message = "Check git branch raised an error:\n" f"{completed_process.stderr.decode()}"
+        raise ValueError(message)
+    return completed_process.stdout.decode().strip()
+
+
+def get_git_hash(path: Union[None, Path, str]) -> str:
+    """Get git hash of repository."""
+    path = path if path is not None else "."
+    completed_process = subprocess.run(
+        ["git", "rev-parse", "HEAD"], capture_output=True, cwd=path, check=True
+    )
+    if completed_process.stderr:
+        message = "Check git hash raised an error:\n" f"{completed_process.stderr.decode()}"
+        raise ValueError(message)
+    return completed_process.stdout.decode().strip()
+
+
+def get_git_hash_date(hash_str: str, path: Union[None, Path, str]) -> str:
+    """Get repository git hash date."""
+    path = path if path is not None else "."
+    # We get the author date (%ai) and not the commit date (%ci)
+    # for more details please refer to https://git-scm.com/docs/git-show
+    completed_process = subprocess.run(
+        ["git", "show", "-s", "--date=iso-strict", "--format=%ai", hash_str],
+        capture_output=True,
+        cwd=path,
+        check=True,
+    )
+    if completed_process.stderr:
+        message = f"Check git hash raised an error:\n {completed_process.stderr.decode()}"
+        raise ValueError(message)
+    print(completed_process.stdout.decode().strip())
+    return completed_process.stdout.decode().strip()
+
+
+def git_iso_to_python_iso(date_str: str) -> str:
+    """Transform git iso into Python iso."""
+    splitted = date_str.split()
+    return f"{splitted[0]}T{splitted[1]}{splitted[2][:3]}:{splitted[2][3:]}"
+
+
+def find_element_in_zip(elements: List[Tuple[str, Any]], key: str) -> Any:
+    """Find the element in a dict represented as a zip."""
+    for key_, value in elements:
+        if key_ == key:
+            return value
+    message = f"Couldn't find key {key} in {[key for key, _ in elements]}"
+    raise ValueError(message)
+
+
+def convert_to_new_postgres(
+    source: Path, target: Path, path_to_repository: Path, machine_name: Optional[str] = None
+):
+    """Convert json file generated via python-progress-tracker to new format."""
+    # Load from direct result of script
+    assert source.exists(), source
+    with open(source, "r", encoding="utf-8") as file:
+        progress = json.load(file)
+
+    # Get git information
+    # assert not is_git_diff(path_to_repository)
+    current_git_branch = get_git_branch(path_to_repository)
+    current_git_hash = get_git_hash(path_to_repository)
+    current_git_hash_timestamp = datetime.fromisoformat(
+        git_iso_to_python_iso(get_git_hash_date(current_git_hash, path_to_repository))
+    )
+    current_timestamp = datetime.now()
+
+    session_data = {
+        "database": "concrete_python",
+        "hardware": progress["machine"]["name"] if machine_name is None else machine_name,
+        "project_version": fhe.__version__,
+        "branch": current_git_branch,
+        "insert_date": current_timestamp.astimezone().isoformat(timespec="seconds"),
+        "commit_date": current_git_hash_timestamp.astimezone().isoformat(timespec="seconds"),
+        "points": [],
+    }
+
+    # Create experiments
+    for target_name, target_data in progress["targets"].items():
+        if "measurements" in target_data:
+            for metric_id, metric_value in target_data["measurements"].items():
+                metric_type = progress["metrics"][metric_id]["label"]
+                if math.isnan(metric_value):  # NaN
+                    continue
+
+                operation_and_case = target_name.split("::")
+
+                point = {
+                    "type": metric_type,
+                    "backend": "cpu",
+                    "name": operation_and_case[0].strip(),
+                    "test": operation_and_case[1].strip(),
+                    "class": "",
+                    "operator": "",
+                    "params": None,
+                    "value": metric_value,
+                }
+
+                session_data["points"].append(point)
+
+    # Dump modified file
+    with open(target, "w", encoding="utf-8") as file:
+        json.dump(session_data, file)
+
+
+def main():
+    """Main function to convert json into new format."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--source",
+        dest="source",
+        type=Path,
+        default=Path("./source.json"),
+        help="Path to json file to convert.",
+    )
+    parser.add_argument(
+        "--target",
+        dest="target",
+        type=Path,
+        default=Path("./target.json"),
+        help="Path to converted json file.",
+    )
+    parser.add_argument(
+        "--path_to_repository",
+        dest="path_to_repository",
+        type=Path,
+        default=Path("./"),
+        help="Path to repository used to run the benchmark",
+    )
+    parser.add_argument(
+        "--machine_name",
+        dest="machine_name",
+        type=str,
+        default=None,
+        help="Overwrite machine_name (default is None)",
+    )
+    args = parser.parse_args(sys.argv[1:])
+    convert_to_new_postgres(
+        source=args.source,
+        target=args.target,
+        path_to_repository=args.path_to_repository,
+        machine_name=args.machine_name,
+    )
+
+
+if __name__ == "__main__":
+    main()