Merge pull request #10 from zkonduit/runs

feat: run through the benchmarks 10 times.
zkonduit · Jan 22, 2024 · 9814335 · 9814335
2 parents 9725379 + 946f6a0
commit 9814335
Show file tree

Hide file tree

Showing 24 changed files with 1,049 additions and 739 deletions.
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
@@ -15,24 +15,28 @@ env:
 jobs:
   benchmark-tests:
     runs-on:
-      large-self-hosted
+      1000gb
     steps:
       - uses: actions/checkout@v4
       - uses: software-mansion/setup-scarb@v1
         with:
-          scarb-version: "2.4.0"
+          scarb-version: "2.4.2"
       - uses: actions/setup-python@v4
         with:
           python-version: "3.9"
       - uses: actions-rs/toolchain@v1
         with:
-          toolchain: nightly
+          toolchain: nightly-2024-01-16
           override: true
           components: rustfmt, clippy
       - uses: baptiste0928/cargo-install@v1
         with:
           crate: cargo-nextest
           locked: true
+      - name: Cargo clean
+        run: cargo clean
+      - name: Delete Cargo.lock
+        run: rm -f Cargo.lock
       - name: Install Risc0 toolchain
         run: |
           cargo install cargo-binstall
@@ -42,7 +46,22 @@ jobs:
         run: |
           cargo install evcxr_jupyter
           evcxr_jupyter --install
+      - name: Install GNU Time
+        run: sudo apt-get update && sudo apt-get install -y time
+      - name: Download and Install EZKL Binary
+        run: |
+          curl -L -o ezkl.tar.gz https://github.com/zkonduit/ezkl/releases/download/v7.1.4/build-artifacts.ezkl-linux-gnu.tar.gz
+          tar -xzf ezkl.tar.gz
+          sudo mv ezkl /usr/local/bin/  # Move the binary to a directory in your PATH
       - name: Setup Virtual Env
         run: python -m venv .env; source .env/bin/activate;
-      - name: Run all benchmarks across all models
-        run: source .env/bin/activate; cargo nextest run benchmarking_tests::tests::run_benchmarks_ --no-capture
+      - name: Run random forest benchmarks across all frameworks
+        run: source .env/bin/activate; cargo nextest run benchmarking_tests::tests::run_benchmarks_::tests_0 --no-capture
+      - name: Run linear regression benchmarks across all frameworks
+        run: source .env/bin/activate; cargo nextest run benchmarking_tests::tests::run_benchmarks_::tests_1 --no-capture
+      - name: Run svm classification benchmarks across all frameworks
+        run: source .env/bin/activate; cargo nextest run benchmarking_tests::tests::run_benchmarks_::tests_2 --no-capture
+      - name: Run tree ensemble regression benchmarks across all frameworks
+        run: source .env/bin/activate; cargo nextest run benchmarking_tests::tests::run_benchmarks_::tests_3 --no-capture
+      - name: Pretty Print benchmarks.json
+        run: jq '.' benchmarks.json  # Pretty print the benchmarks.json file
diff --git a/.gitignore b/.gitignore
@@ -1,11 +1,12 @@
 *.onnx
 tree_model_bytes.json
 tree_model_data_bytes.json
-*.pf
+*.key
 *.pk
 *.vk
 *.compiled
 *.cairo
+proof.json
 *settings.json
 *witness.json
 *calibration.json

diff --git a/README.md b/README.md
@@ -4,24 +4,38 @@
 
 To run the benchmarks, you need to first install python (version 3.9.18 specifically), rust, rust jupyter kernel, risc0 toolchain, and scarb on your unix-like machine.
 
-To install the required dependencies run.
+First, you will need to install ezkl cli version 7.1.4 which you can do from [here](https://github.com/zkonduit/ezkl/releases/tag/v7.1.4) 
+
+To install the other required dependencies run: 
 
 ```bash
 bash install_dep_run.sh
 ```
 
 For windows systems, you will need to install the dependencies manually.
 
+For linux systems, you may need to install jq.
+
+```bash
+sudo apt-get install jq
+```
+
 You may run the following to activate the virtual environment if had been deactivated.
 
 ```bash
 source .env/bin/activate
 ```
 
+For linux systems you will also need to set the OS environment variable to linux (default is Mac).
+
+```bash
+export OS=linux
+```
+
 Finally run this cargo nextest test command to get the benchmarks:
 
 ```bash
-source .env/bin/activate; cargo nextest run benchmarking_tests::tests::run_benchmarks_ --test-threads 1
+source .env/bin/activate; cargo nextest run benchmarking_tests::tests::run_benchmarks_ --no-capture
 ```
 
 The data will stored in a `benchmarks.json` file in the root directory.

diff --git a/Scarb.lock b/Scarb.lock
@@ -50,7 +50,7 @@ dependencies = [
 [[package]]
 name = "orion"
 version = "0.2.0"
-source = "git+https://github.com/gizatechxyz/orion.git?branch=develop#4d34849c342ca74d3113527f666df55994809eb3"
+source = "git+https://github.com/gizatechxyz/onnx-cairo#3cfa61067d24215a828d27e8c6a208ca3a43ec1b"
 dependencies = [
  "alexandria_data_structures",
  "alexandria_merkle_tree",

diff --git a/Scarb.toml b/Scarb.toml
@@ -5,4 +5,4 @@ version = "0.1.0"
 # See more keys and their definitions at https://docs.swmansion.com/scarb/docs/reference/manifest
 
 [dependencies]
-orion = { git = "https://github.com/gizatechxyz/orion.git", branch = "develop"   }
+orion = { git = "https://github.com/gizatechxyz/onnx-cairo" }
diff --git a/benchmark_file.sh b/benchmark_file.sh
@@ -40,7 +40,7 @@ for subdir in "$notebooks_dir"/*; do
                 subdir_object=$(jq -n \
                     --arg name "$notebook_name" \
                     --argjson obj "$subdir_object" \
-                    '$obj + {($name): {"provingTime": null, "memoryUsage": null}}')
+                    '$obj + {($name): {"provingTime": [], "memoryUsage": []}}')
             fi
         done
 

diff --git a/install_dep_run.sh b/install_dep_run.sh
@@ -101,6 +101,22 @@ then
     all_dependencies_installed=false
 fi
 
+# Check gtime
+if ! command -v gtime &> /dev/null
+then
+    echo "gtime not found, installing gtime..."
+    brew install gnu-time
+    all_dependencies_installed=false
+fi
+
+# Check jq
+if ! command -v jq &> /dev/null
+then
+    echo "jq not found, installing jq..."
+    sudo apt-get install jq
+    all_dependencies_installed=false
+fi
+
 # Install Rust jupyter kernel
 
 source $HOME/.cargo/env

diff --git a/methods/Cargo.toml b/methods/Cargo.toml
@@ -7,4 +7,4 @@ edition = "2021"
 risc0-build = { version = "0.19.1" }
 
 [package.metadata.risc0]
-methods = ["linear_regression", "random_forest", "svm_classification"]
+methods = ["linear_regression", "random_forest", "svm_classification", "te_regression"]
diff --git a/methods/te_regression/Cargo.toml b/methods/te_regression/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "te_regression"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+# If you want to try (experimental) std support, add `features = [ "std" ]` to risc0-zkvm
+risc0-zkvm = { version = "0.19.1", default-features = false, features = [
+  "std",
+] }
+# Using git dependency as a workaround for https://github.com/smartcorelib/smartcore/issues/267
+smartcore = { git = "https://github.com/risc0/smartcore.git", rev = "4bd3cadd50ed988c45c239f5264c3e2c2af0a690", features = ["serde"]}
diff --git a/methods/te_regression/src/main.rs b/methods/te_regression/src/main.rs
@@ -0,0 +1,53 @@
+// Copyright 2023 RISC Zero, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#![no_main]
+
+use risc0_zkvm::guest::env;
+use smartcore::{ensemble::random_forest_regressor::*, linalg::basic::matrix::DenseMatrix};
+
+risc0_zkvm::guest::entry!(main);
+
+pub fn main() {
+    // Read the model from the host into a SmartCore Decesion Tree model object.
+    // We MUST explicitly declare the correct type in order for deserialization to be
+    // successful.
+    type Model = RandomForestRegressor<f64, u32, DenseMatrix<f64>, Vec<u32>>;
+    let trained_model: Model = env::read();
+
+    // Read the input data into a DenseMatrix.
+    let x_data: DenseMatrix<f64> = env::read();
+
+    // We call the predict() function on our trained model to perform inference.
+    let y_hat = trained_model.predict(&x_data).unwrap();
+
+    // This line is optional and can be commented out, but it's useful to see
+    // the output of the computation before the proving step begins.
+    println!("answer: {:?}", &y_hat);
+
+    // We commit the output to the journal.
+    env::commit(&y_hat);
+
+    // Logging the total cycle count is optional, though it's quite useful for benchmarking
+    // the various operations in the guest code. env::get_cycle_count() can be
+    // called anywhere in the guest, multiple times. So if we are interested in
+    // knowing how many cycles the inference computation takes, we can calculate
+    // total cycles before and after model.predict() and the difference between
+    // the two values equals the total cycle count for that section of the guest
+    // code.
+    println!(
+        "Total cycles for guest code execution: {}",
+        env::get_cycle_count()
+    );
+}
diff --git a/notebooks/linear_regressions/ezkl.ipynb b/notebooks/linear_regressions/ezkl.ipynb
@@ -62,8 +62,8 @@
             "outputs": [],
             "source": [
                 "model_path = os.path.join('network.onnx')\n",
-                "compiled_model_path = os.path.join('network.compiled')\n",
-                "pk_path = os.path.join('test.pk')\n",
+                "compiled_model_path = os.path.join('model.compiled')\n",
+                "pk_path = os.path.join('pk.key')\n",
                 "vk_path = os.path.join('test.vk')\n",
                 "settings_path = os.path.join('settings.json')\n",
                 "\n",
@@ -74,6 +74,7 @@
         {
             "cell_type": "code",
             "execution_count": null,
+            "id": "0775d3dd",
             "metadata": {},
             "outputs": [],
             "source": [
@@ -181,81 +182,6 @@
                 "assert os.path.isfile(pk_path)\n",
                 "assert os.path.isfile(settings_path)"
             ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "id": "c384cbc8",
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "import os\n",
-                "import json\n",
-                "import time\n",
-                "import subprocess\n",
-                "\n",
-                "def get_memory_usage(pid):\n",
-                "    \"\"\"Function to get memory usage of process by PID.\"\"\"\n",
-                "    try:\n",
-                "        # Execute Bash command to get memory usage\n",
-                "        process = subprocess.Popen(['ps', '-o', 'rss=', '-p', str(pid)],\n",
-                "                                   stdout=subprocess.PIPE,\n",
-                "                                   stderr=subprocess.PIPE)\n",
-                "        stdout, stderr = process.communicate()\n",
-                "        # Convert output to int and return\n",
-                "        return int(stdout)\n",
-                "    except Exception as e:\n",
-                "        print(f\"Error getting memory usage: {e}\")\n",
-                "        return 0\n",
-                "\n",
-                "proof_path = os.path.join('test.pf')\n",
-                "# log time and memory it takes to generate proof\n",
-                "start = time.time()\n",
-                "pid = os.getpid()\n",
-                "initial_memory = get_memory_usage(pid)\n",
-                "\n",
-                "res = ezkl.prove(\n",
-                "    witness_path,\n",
-                "    compiled_model_path,\n",
-                "    pk_path,\n",
-                "    proof_path,\n",
-                "    \"single\",\n",
-                ")\n",
-                "\n",
-                "end = time.time()\n",
-                "proving_time = end - start\n",
-                "final_memory = get_memory_usage(pid)\n",
-                "memory_used = final_memory - initial_memory\n",
-                "\n",
-                "print(\"PROOF GENERATION TIME: \", proving_time)\n",
-                "print(\"MEMORY USAGE: \", memory_used, \"KB\")\n",
-                "\n",
-                "# define the path that stores the benchmarking results\n",
-                "benchmark_path = os.path.join('../../benchmarks.json')\n",
-                "\n",
-                "# assume benchmark file has already been created (run `bash benchmark_file.sh` to create it)\n",
-                "with open(benchmark_path, 'r') as f:\n",
-                "    benchmark = json.load(f)\n",
-                "\n",
-                "proving_time =str(proving_time) + \"s\"\n",
-                "\n",
-                "memory_used = str(memory_used) + \"kb\"\n",
-                "\n",
-                "# Update the proving time in the loaded benchmark\n",
-                "benchmark['linear_regressions']['ezkl']['provingTime'] = proving_time\n",
-                "\n",
-                "# Update the memory usage in the loaded benchmark\n",
-                "benchmark['linear_regressions']['ezkl']['memoryUsage'] = memory_used\n",
-                "\n",
-                "\n",
-                "# Write the updated benchmark back to the file\n",
-                "with open(benchmark_path, 'w') as f:\n",
-                "    json.dump(benchmark, f, indent=4)\n",
-                "\n",
-                "\n",
-                "print(res['instances'])\n",
-                "assert os.path.isfile(proof_path)"
-            ]
         }
     ],
     "metadata": {