Execute ZKVM-Perf (Matrix) #29

Workflow file for this run

.github/workflows/adhoc-matrix.yml at 837db7b

	name: Execute ZKVM-Perf (Matrix)

	on:
	workflow_dispatch:
	inputs:
	provers:
	description: 'Provers to use (comma-separated)'
	required: false
	type: string
	default: 'sp1'
	programs:
	description: 'Programs to benchmark (comma-separated)'
	required: false
	type: string
	default: 'loop,fibonacci,tendermint,reth1,reth2'
	filename:
	description: 'Filename for the benchmark'
	required: false
	type: string
	default: 'benchmark'
	trials:
	description: 'Number of trials to run'
	required: false
	type: string
	default: '1'
	sp1_ref:
	description: 'SP1 reference (commit hash or branch name)'
	required: false
	type: string
	default: 'dev'
	additional_params:
	description: 'Additional parameters as JSON'
	required: false
	type: string
	default: '{"hashfns":"poseidon","shard_sizes":"22"}'

	jobs:
	run-benchmarks:
	strategy:
	matrix:
	include:
	- instance_type: g6.16xlarge
	enable_gpu: true
	ami_id: ami-079a6a210557ef0e4
	- instance_type: r7i.16xlarge
	enable_gpu: false
	ami_id: ami-079a6a210557ef0e4

	name: Run on ${{ matrix.instance_type }}
	runs-on: ubuntu-latest

	steps:
	- name: Configure AWS credentials
	uses: aws-actions/configure-aws-credentials@v1
	with:
	aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
	aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	aws-region: ${{ secrets.AWS_REGION }}

	- name: Start EC2 runner
	id: start-ec2-runner
	uses: xJonathanLEI/ec2-github-runner@main
	with:
	mode: start
	# Must use personal access token here as `GITHUB_TOKEN` does not have access to runners.
	# Use a fine-grained token with these permissions to at least this repository:
	# - Administration: Read and write
	# - Contents: Read and write
	# - Metadata: Read-only
	# - Workflows: Read and write
	# - Actions: Read and write
	github-token: ${{ secrets.GH_PAT }}
	ec2-image-id: ${{ matrix.ami_id }}
	ec2-instance-type: ${{ matrix.instance_type }}
	subnet-id: ${{ secrets.AWS_SUBNET_ID }}
	security-group-id: ${{ secrets.AWS_SG_ID }}
	storage-size: 1024

	- name: Run benchmarks
	id: run-benchmarks
	uses: actions/github-script@v6
	with:
	github-token: ${{ secrets.GH_PAT }}
	script: \|
	const runnerName = '${{ steps.start-ec2-runner.outputs.label }}';
	const maxAttempts = 30;
	const pollInterval = 10000; // 10 seconds
	let triggeredRunId = null;

	console.log('Triggering benchmark workflow');

	try {
	await github.rest.actions.createWorkflowDispatch({
	owner: context.repo.owner,
	repo: context.repo.repo,
	workflow_id: 'run-on-runner.yml',
	ref: context.ref,
	inputs: {
	runner_name: runnerName,
	instance_type: '${{ matrix.instance_type }}',
	enable_gpu: '${{ matrix.enable_gpu }}',
	provers: '${{ inputs.provers }}',
	programs: '${{ inputs.programs }}',
	filename: '${{ inputs.filename }}_${{ matrix.instance_type }}',
	trials: '${{ inputs.trials }}',
	sp1_ref: '${{ inputs.sp1_ref }}',
	additional_params: '${{ inputs.additional_params }}'
	}
	});
	console.log('Benchmark workflow triggered successfully');
	} catch (error) {
	core.setFailed(`Failed to trigger workflow: ${error.message}`);
	return;
	}

	console.log('Polling for the triggered run');
	for (let attempt = 1; attempt <= maxAttempts; attempt++) {
	await new Promise(resolve => setTimeout(resolve, pollInterval));

	try {
	const runs = await github.rest.actions.listWorkflowRuns({
	owner: context.repo.owner,
	repo: context.repo.repo,
	workflow_id: 'run-on-runner.yml',
	status: 'in_progress'
	});

	console.log(`Found ${runs.data.workflow_runs.length} in-progress runs`);

	for (const run of runs.data.workflow_runs) {
	if (new Date(run.created_at).getTime() > Date.now() - 300000) { // Within last 5 minutes
	console.log(`Checking run ${run.id} created at ${run.created_at}`);
	try {
	const jobs = await github.rest.actions.listJobsForWorkflowRun({
	owner: context.repo.owner,
	repo: context.repo.repo,
	run_id: run.id
	});

	console.log(`Run ${run.id} has ${jobs.data.jobs.length} jobs`);
	for (const job of jobs.data.jobs) {
	console.log(` Job: ${job.name}`);
	}

	const matchingJob = jobs.data.jobs.find(job =>
	job.name === `Run Benchmark on ${runnerName}`
	);

	if (matchingJob) {
	triggeredRunId = run.id;
	console.log(`Found matching run. Triggered run ID: ${triggeredRunId}`);
	break;
	} else {
	console.log(`No matching job found for run ${run.id}`);
	}
	} catch (error) {
	console.log(`Error checking jobs for run ${run.id}: ${error.message}`);
	continue;
	}
	} else {
	console.log(`Skipping run ${run.id} as it's older than 5 minutes`);
	}
	}

	if (triggeredRunId) break;

	console.log(`Attempt ${attempt}: Matching run not found yet. Continuing to poll...`);
	} catch (error) {
	console.log(`Error while polling: ${error.message}`);
	}
	}

	if (!triggeredRunId) {
	core.setFailed('Failed to find the triggered workflow run with matching job after maximum attempts');
	return;
	}

	core.exportVariable('TRIGGERED_RUN_ID', triggeredRunId);
	console.log(`Triggered run ID: ${triggeredRunId}`);

	- name: Wait for benchmark completion
	uses: actions/github-script@v6
	with:
	github-token: ${{ secrets.GH_PAT }}
	script: \|
	const triggeredRunId = process.env.TRIGGERED_RUN_ID;
	if (!triggeredRunId) {
	core.setFailed('No triggered run ID found');
	return;
	}

	const maxWaitTime = 7200000; // 2 hours in milliseconds
	const checkInterval = 60000; // 1 minute in milliseconds
	const startTime = Date.now();

	const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${triggeredRunId}`;
	console.log(`Waiting for benchmark job to complete. Job URL: ${runUrl}`);

	while (true) {
	const run = await github.rest.actions.getWorkflowRun({
	owner: context.repo.owner,
	repo: context.repo.repo,
	run_id: triggeredRunId
	});

	if (run.data.status === 'completed') {
	console.log(`Benchmark workflow completed with conclusion: ${run.data.conclusion}`);
	if (run.data.conclusion !== 'success') {
	core.setFailed(`Benchmark workflow failed with conclusion: ${run.data.conclusion}. Job URL: ${runUrl}`);
	}
	break;
	}

	if (Date.now() - startTime > maxWaitTime) {
	core.setFailed(`Benchmark workflow did not complete within the maximum wait time. Job URL: ${runUrl}`);
	break;
	}

	console.log(`Waiting for benchmark to complete... Current status: ${run.data.status}. Job URL: ${runUrl}`);
	await new Promise(resolve => setTimeout(resolve, checkInterval));
	}

	- name: Download benchmark results
	uses: actions/download-artifact@v3
	with:
	name: benchmark-results-${{ matrix.instance_type }}-${{ env.TRIGGERED_RUN_ID }}
	path: ./benchmark-results-${{ matrix.instance_type }}

	- name: Process benchmark results
	run: \|
	echo "Results for ${{ matrix.instance_type }}:"
	cat ./benchmark-results-${{ matrix.instance_type }}/*.csv

	- name: Stop EC2 runner
	if: always()
	uses: xJonathanLEI/ec2-github-runner@main
	with:
	mode: stop
	github-token: ${{ secrets.GH_PAT }}
	label: ${{ steps.start-ec2-runner.outputs.label }}
	ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}

	combine-results:
	needs: run-benchmarks
	runs-on: ubuntu-latest
	steps:
	- name: Download all artifacts
	uses: actions/download-artifact@v3

	- name: Combine CSV files
	run: \|
	echo "Combining CSV files:"
	# Get the header from the first CSV file
	head -n 1 $(ls benchmark-results-/.csv \| head -n 1) > combined_results.csv
	# Append data from all CSV files, skipping the header
	for file in benchmark-results-/.csv; do
	tail -n +2 "$file" >> combined_results.csv
	done
	cat combined_results.csv

	- name: Upload combined results
	uses: actions/upload-artifact@v2
	with:
	name: combined-benchmark-results
	path: combined_results.csv

	- name: Create summary
	run: \|
	echo "## Benchmark Results Summary" >> $GITHUB_STEP_SUMMARY
	echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
	cat combined_results.csv >> $GITHUB_STEP_SUMMARY
	echo "\`\`\`" >> $GITHUB_STEP_SUMMARY

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Execute ZKVM-Perf (Matrix) #29

Workflow file

Execute ZKVM-Perf (Matrix) #29

Jobs

Run details

Workflow file for this run