Skip to content

Commit

Permalink
try to build
Browse files Browse the repository at this point in the history
  • Loading branch information
asteny committed Aug 19, 2024
1 parent 7bd19a4 commit 543c926
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 23 deletions.
68 changes: 49 additions & 19 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -1,37 +1,67 @@
name: Build Docker and Create Release

on:
workflow_dispatch:
inputs:
slurm_version:
description: 'Slurm version'
required: true
default: '24.05.2'
pmix_version:
description: 'Pmix version'
required: true
default: '5.0.3'
push:

jobs:
build:
runs-on: self-hosted

strategy:
max-parallel: 3
matrix:
slurm:
- version: 23.11.6
# - version: 23.11.9
# - version: 24.05.2
image:
- context: .
push: false
platforms: linux/amd64
load: true
ubuntu_version: focal
pmix_version: 5.0.2
cuda_version: 12.2.2
build_args:
BASE_IMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu20.04
- context: .
push: false
platforms: linux/amd64
load: true
ubuntu_version: jammy
pmix_version: 5.0.3
cuda_version: 12.2.2
build_args:
BASE_IMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04

steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
uses: docker/setup-buildx-action@v3

- name: Build Docker image
run: docker build --no-cache --build-arg SLURM_VERSION=${{ github.event.inputs.slurm_version }} --build-arg PMIX_VERSION=${{ github.event.inputs.pmix_version }} -t slurm_deb_packages .
- name: Build docker images
uses: docker/build-push-action@v6
with:
context: ${{ matrix.image.context }}
push: ${{ matrix.image.push }}
tags: slurm_builder:${{ matrix.image.cuda_version }}-${{ matrix.image.ubuntu_version }}-slurm${{ matrix.slurm.version }}
platforms: ${{ matrix.image.platforms }}
load: ${{ matrix.image.load }}
build-args: |
${{ matrix.image.build_args }}
SLURM_VERSION=${{ matrix.slurm.version }}
PMIX_VERSION=${{ matrix.image.pmix_version }}
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache,mode=max

- name: Create slurm_build_output directory
run: mkdir -p slurm_build_output

- name: Run Docker container and copy files
run: |
container_id=$(docker create slurm_deb_packages)
container_id=$(docker create slurm_builder:${{ matrix.image.cuda_version }}-${{ matrix.image.ubuntu_version }}-slurm${{ matrix.slurm.version }})
docker start $container_id
docker cp $container_id:/usr/src/debs/ ./slurm_build_output/
docker cp $container_id:/usr/src/nccl-tests/build/nccl-tests-perf.tar.gz ./slurm_build_output/
Expand All @@ -42,10 +72,10 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: v${{ github.event.inputs.slurm_version }}
name: Slurm v${{ github.event.inputs.slurm_version }}
body: "Release for Slurm v${{ github.event.inputs.slurm_version }} with OpenPMIx v${{ github.event.inputs.pmix_version }}, libnccl packages and NCCL-test binaries"
release_name: Slurm-${{ github.event.inputs.slurm_version }}
tag_name: ${{ matrix.image.cuda_version }}-${{ matrix.image.ubuntu_version }}-slurm${{ matrix.slurm.version }}
name: Cuda ${{ matrix.image.cuda_version }}, Ubuntu ${{ matrix.image.ubuntu_version }}, Slurm ${{ matrix.slurm.version }}
body: "Release based on Ubuntu ${{ matrix.image.ubuntu_version }} for Slurm ${{ matrix.slurm.version }} with cuda ${{ matrix.image.cuda_version }}, PMIx ${{ matrix.image.pmix_version }} and libnccl"
release_name: Cuda ${{ matrix.image.cuda_version }}, Ubuntu ${{ matrix.image.ubuntu_version }}, Slurm ${{ matrix.slurm.version }}
draft: false
prerelease: false
files: |
Expand All @@ -54,4 +84,4 @@ jobs:
- name: Cleanup
run: rm -rf slurm_build_output
run: rm -rf slurm_build_output/*.deb
11 changes: 7 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
FROM nvidia/cuda:12.2.2-cudnn8-devel-ubuntu20.04
ARG BASE_IMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu20.04

FROM $BASE_IMAGE

ARG SLURM_VERSION=24.05.2
ARG PMIX_VERSION=5.0.3

ARG DEBIAN_FRONTEND=noninteractive

Expand Down Expand Up @@ -30,19 +35,17 @@ RUN apt-get update && \
jq \
squashfs-tools \
zstd \
zlibc \
zlib1g \
zlib1g-dev

# Download Slurm
ARG SLURM_VERSION=24.05.2
RUN cd /usr/src && \
wget https://download.schedmd.com/slurm/slurm-${SLURM_VERSION}.tar.bz2 && \
tar -xvf slurm-${SLURM_VERSION}.tar.bz2 && \
rm -rf slurm-${SLURM_VERSION}.tar.bz2

# Install PMIx in order to build Slurm with PMIx support
# Slurm deb packages will be already compiled with PMIx support even without it, but only with v3, while we use v5
ARG PMIX_VERSION=5.0.3
RUN cd /usr/src && \
wget https://github.com/openpmix/openpmix/releases/download/v${PMIX_VERSION}/pmix-${PMIX_VERSION}.tar.gz && \
tar -xzvf pmix-${PMIX_VERSION}.tar.gz && \
Expand Down

0 comments on commit 543c926

Please sign in to comment.