fix cache problems & add fail-fast #38
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build Docker and Create Release | |
on: | |
push: | |
jobs: | |
build: | |
runs-on: self-hosted | |
strategy: | |
fail-fast: false | |
matrix: | |
slurm: | |
- version: 23.11.6 | |
- version: 23.11.9 | |
- version: 24.05.2 | |
image: | |
- context: . | |
push: false | |
platforms: linux/amd64 | |
load: true | |
ubuntu_version: focal | |
pmix_version: 5.0.2 | |
cuda_version: 12.2.2 | |
build_args: | |
BASE_IMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu20.04 | |
- context: . | |
push: false | |
platforms: linux/amd64 | |
load: true | |
ubuntu_version: jammy | |
pmix_version: 5.0.3 | |
cuda_version: 12.2.2 | |
build_args: | |
BASE_IMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04 | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v3 | |
- name: Build docker images | |
uses: docker/build-push-action@v6 | |
with: | |
context: ${{ matrix.image.context }} | |
push: ${{ matrix.image.push }} | |
tags: slurm_builder:${{ matrix.image.cuda_version }}-${{ matrix.image.ubuntu_version }}-slurm${{ matrix.slurm.version }} | |
platforms: ${{ matrix.image.platforms }} | |
load: ${{ matrix.image.load }} | |
build-args: | | |
${{ matrix.image.build_args }} | |
SLURM_VERSION=${{ matrix.slurm.version }} | |
PMIX_VERSION=${{ matrix.image.pmix_version }} | |
cache-from: type=local,src=.buildx-cache | |
cache-to: type=local,dest=.buildx-cache,mode=max | |
- name: Create slurm_build_output directory | |
run: mkdir -p slurm_build_output | |
- name: Run Docker container and copy files | |
run: | | |
container_id=$(docker create slurm_builder:${{ matrix.image.cuda_version }}-${{ matrix.image.ubuntu_version }}-slurm${{ matrix.slurm.version }}) | |
docker start $container_id | |
docker cp $container_id:/usr/src/debs/ ./slurm_build_output/ | |
docker cp $container_id:/usr/src/nccl-tests/build/nccl-tests-perf.tar.gz ./slurm_build_output/ | |
docker rm $container_id | |
- name: Create GitHub Release and Upload DEB packages | |
uses: softprops/action-gh-release@v2 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
tag_name: ${{ matrix.image.cuda_version }}-${{ matrix.image.ubuntu_version }}-slurm${{ matrix.slurm.version }} | |
name: Cuda ${{ matrix.image.cuda_version }}, Ubuntu ${{ matrix.image.ubuntu_version }}, Slurm ${{ matrix.slurm.version }} | |
body: "Release based on Ubuntu ${{ matrix.image.ubuntu_version }} for Slurm ${{ matrix.slurm.version }} with cuda ${{ matrix.image.cuda_version }}, PMIx ${{ matrix.image.pmix_version }} and libnccl" | |
draft: false | |
prerelease: false | |
files: | | |
slurm_build_output/debs/*.deb | |
slurm_build_output/nccl-tests-perf.tar.gz | |
- name: Cleanup | |
run: rm -rf slurm_build_output/*.deb |