From 543c926617e64ac373749a66c9f896ebada7856c Mon Sep 17 00:00:00 2001 From: Pavel Sofronii Date: Mon, 19 Aug 2024 10:37:50 +0200 Subject: [PATCH] try to build --- .github/workflows/release.yml | 68 +++++++++++++++++++++++++---------- Dockerfile | 11 +++--- 2 files changed, 56 insertions(+), 23 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 653397e..6301c9e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,37 +1,67 @@ name: Build Docker and Create Release on: - workflow_dispatch: - inputs: - slurm_version: - description: 'Slurm version' - required: true - default: '24.05.2' - pmix_version: - description: 'Pmix version' - required: true - default: '5.0.3' + push: jobs: build: runs-on: self-hosted + strategy: + max-parallel: 3 + matrix: + slurm: + - version: 23.11.6 +# - version: 23.11.9 +# - version: 24.05.2 + image: + - context: . + push: false + platforms: linux/amd64 + load: true + ubuntu_version: focal + pmix_version: 5.0.2 + cuda_version: 12.2.2 + build_args: + BASE_IMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu20.04 + - context: . + push: false + platforms: linux/amd64 + load: true + ubuntu_version: jammy + pmix_version: 5.0.3 + cuda_version: 12.2.2 + build_args: + BASE_IMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04 + steps: - name: Checkout repository uses: actions/checkout@v2 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 - - name: Build Docker image - run: docker build --no-cache --build-arg SLURM_VERSION=${{ github.event.inputs.slurm_version }} --build-arg PMIX_VERSION=${{ github.event.inputs.pmix_version }} -t slurm_deb_packages . + - name: Build docker images + uses: docker/build-push-action@v6 + with: + context: ${{ matrix.image.context }} + push: ${{ matrix.image.push }} + tags: slurm_builder:${{ matrix.image.cuda_version }}-${{ matrix.image.ubuntu_version }}-slurm${{ matrix.slurm.version }} + platforms: ${{ matrix.image.platforms }} + load: ${{ matrix.image.load }} + build-args: | + ${{ matrix.image.build_args }} + SLURM_VERSION=${{ matrix.slurm.version }} + PMIX_VERSION=${{ matrix.image.pmix_version }} + cache-from: type=local,src=/tmp/.buildx-cache + cache-to: type=local,dest=/tmp/.buildx-cache,mode=max - name: Create slurm_build_output directory run: mkdir -p slurm_build_output - name: Run Docker container and copy files run: | - container_id=$(docker create slurm_deb_packages) + container_id=$(docker create slurm_builder:${{ matrix.image.cuda_version }}-${{ matrix.image.ubuntu_version }}-slurm${{ matrix.slurm.version }}) docker start $container_id docker cp $container_id:/usr/src/debs/ ./slurm_build_output/ docker cp $container_id:/usr/src/nccl-tests/build/nccl-tests-perf.tar.gz ./slurm_build_output/ @@ -42,10 +72,10 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: - tag_name: v${{ github.event.inputs.slurm_version }} - name: Slurm v${{ github.event.inputs.slurm_version }} - body: "Release for Slurm v${{ github.event.inputs.slurm_version }} with OpenPMIx v${{ github.event.inputs.pmix_version }}, libnccl packages and NCCL-test binaries" - release_name: Slurm-${{ github.event.inputs.slurm_version }} + tag_name: ${{ matrix.image.cuda_version }}-${{ matrix.image.ubuntu_version }}-slurm${{ matrix.slurm.version }} + name: Cuda ${{ matrix.image.cuda_version }}, Ubuntu ${{ matrix.image.ubuntu_version }}, Slurm ${{ matrix.slurm.version }} + body: "Release based on Ubuntu ${{ matrix.image.ubuntu_version }} for Slurm ${{ matrix.slurm.version }} with cuda ${{ matrix.image.cuda_version }}, PMIx ${{ matrix.image.pmix_version }} and libnccl" + release_name: Cuda ${{ matrix.image.cuda_version }}, Ubuntu ${{ matrix.image.ubuntu_version }}, Slurm ${{ matrix.slurm.version }} draft: false prerelease: false files: | @@ -54,4 +84,4 @@ jobs: - name: Cleanup - run: rm -rf slurm_build_output + run: rm -rf slurm_build_output/*.deb diff --git a/Dockerfile b/Dockerfile index e83c665..478c6da 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,9 @@ -FROM nvidia/cuda:12.2.2-cudnn8-devel-ubuntu20.04 +ARG BASE_IMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu20.04 + +FROM $BASE_IMAGE + +ARG SLURM_VERSION=24.05.2 +ARG PMIX_VERSION=5.0.3 ARG DEBIAN_FRONTEND=noninteractive @@ -30,11 +35,10 @@ RUN apt-get update && \ jq \ squashfs-tools \ zstd \ - zlibc \ + zlib1g \ zlib1g-dev # Download Slurm -ARG SLURM_VERSION=24.05.2 RUN cd /usr/src && \ wget https://download.schedmd.com/slurm/slurm-${SLURM_VERSION}.tar.bz2 && \ tar -xvf slurm-${SLURM_VERSION}.tar.bz2 && \ @@ -42,7 +46,6 @@ RUN cd /usr/src && \ # Install PMIx in order to build Slurm with PMIx support # Slurm deb packages will be already compiled with PMIx support even without it, but only with v3, while we use v5 -ARG PMIX_VERSION=5.0.3 RUN cd /usr/src && \ wget https://github.com/openpmix/openpmix/releases/download/v${PMIX_VERSION}/pmix-${PMIX_VERSION}.tar.gz && \ tar -xzvf pmix-${PMIX_VERSION}.tar.gz && \