Skip to content

Commit

Permalink
Fix build race condition, remove wgrib2 from ARM builds
Browse files Browse the repository at this point in the history
  • Loading branch information
christopherwharrop-noaa committed Nov 8, 2024
1 parent 1c90a5f commit 96d3481
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 122 deletions.
12 changes: 0 additions & 12 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,6 @@ services:
links:
- slurmmaster
slurmnode2:
build:
context: ./node
dockerfile: ./Dockerfile
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest
container_name: spack-stack-node2
hostname: slurmnode2
Expand All @@ -62,9 +59,6 @@ services:
links:
- slurmmaster
slurmnode3:
build:
context: ./node
dockerfile: ./Dockerfile
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest
container_name: spack-stack-node3
hostname: slurmnode3
Expand All @@ -78,9 +72,6 @@ services:
links:
- slurmmaster
slurmnode4:
build:
context: ./node
dockerfile: ./Dockerfile
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest
container_name: spack-stack-node4
hostname: slurmnode4
Expand All @@ -94,9 +85,6 @@ services:
links:
- slurmmaster
slurmnode5:
build:
context: ./node
dockerfile: ./Dockerfile
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest
container_name: spack-stack-node5
hostname: slurmnode5
Expand Down
213 changes: 103 additions & 110 deletions frontend/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,22 +1,13 @@
FROM ghcr.io/noaa-gsl/dockerslurmcluster/slurm-frontend:latest
FROM ghcr.io/noaa-gsl/dockerslurmcluster/slurm-frontend:latest AS builder

ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=Etc/UTC

SHELL ["/bin/bash", "-c"]

# Copy patch files into /tmp for later use
COPY cc.patch.aarch64 /tmp
COPY cc.patch.x86_64 /tmp
COPY openmpi.package.py.patch.aarch64 /tmp
COPY openmpi.package.py.patch.x86_64 /tmp

RUN --mount=type=secret,id=access_key_id --mount=type=secret,id=secret_access_key --mount=type=secret,id=session_token <<EOF
set -e

# Install OS packages
apt-get -y update
apt-get -y install --no-install-recommends \
# Install OS packages
RUN apt-get -y update \
&& apt-get -y install --no-install-recommends \
awscli \
bc \
build-essential \
Expand All @@ -39,54 +30,94 @@ RUN --mount=type=secret,id=access_key_id --mount=type=secret,id=secret_access_ke
subversion \
tcl-dev \
unzip \
zstd
pip3 install boto3
rm -rf /var/lib/apt/lists/*
zstd \
&& pip3 install boto3 \
&& rm -rf /var/lib/apt/lists/*

# Install Lua and Lmod
RUN wget https://sourceforge.net/projects/lmod/files/lua-5.1.4.9.tar.bz2 \
&& tar xvfj lua-5.1.4.9.tar.bz2 \
&& pushd lua-5.1.4.9 \
&& ./configure --prefix=/usr \
&& make -j 4 \
&& make install \
&& popd \
&& git clone --recursive https://github.com/TACC/Lmod.git \
&& pushd Lmod \
&& ./configure --prefix=/usr \
&& make -j 4 \
&& make install \
&& echo "source /usr/lmod/lmod/init/bash" >> /etc/bash.bashrc \
&& popd \
&& rm -rf lua* Lmod

# Copy patch files into /tmp for use when installing spack-stack
COPY cc.patch.aarch64 /tmp
COPY cc.patch.x86_64 /tmp
COPY openmpi.package.py.patch.aarch64 /tmp
COPY openmpi.package.py.patch.x86_64 /tmp


# Install Lua
wget https://sourceforge.net/projects/lmod/files/lua-5.1.4.9.tar.bz2
tar xvfj lua-5.1.4.9.tar.bz2
pushd lua-5.1.4.9
./configure --prefix=/usr
make -j 4
make install
popd

# Install Lmod
git clone --recursive https://github.com/TACC/Lmod.git
pushd Lmod
./configure --prefix=/usr
make -j 4
make install
echo "source /usr/lmod/lmod/init/bash" >> /etc/bash.bashrc
popd

# Cleanup
rm -rf lua* Lmod

# Clone spack-stack and create and configure the unified env
RUN cd /opt \
&& git clone -b release/1.8.0 --recurse-submodules https://github.com/jcsda/spack-stack.git \
&& pushd spack-stack \
&& . ./setup.sh \
&& pushd spack \
&& mv /tmp/cc.patch.$(uname -m) cc.patch \
&& mv /tmp/openmpi.package.py.patch.$(uname -m) openmpi.package.py.patch \
&& patch -f -p0 < openmpi.package.py.patch \
&& patch -f -p0 < cc.patch \
&& popd \
&& spack stack create env --site linux.default --template unified-dev --name unified-env --compiler gcc \
&& pushd envs/unified-env \
&& spack env activate . \
&& spack mirror add --s3-access-key-id "" --s3-access-key-secret "" s3_spack_stack_buildcache_ro s3://chiltepin-us-east-2/spack-stack/ \
&& export SPACK_SYSTEM_CONFIG_PATH="$PWD/site" \
&& spack external find --scope system \
--exclude cmake \
--exclude curl \
--exclude openssl \
--exclude openssh \
--exclude python \
&& spack external find --scope system wget \
&& spack compiler find --scope system \
&& unset SPACK_SYSTEM_CONFIG_PATH \
&& spack config add "packages:all:compiler:[[email protected]]" \
&& spack config add "packages:all:providers:mpi:[[email protected]]" \
&& spack config add "packages:fontconfig:variants:+pic" \
&& spack config add "packages:pixman:variants:+pic" \
&& spack config add "packages:cairo:variants:+pic" \
&& spack config --scope env:/opt/spack-stack/envs/unified-env:common add "packages:openmpi:require:~internal-hwloc +two_level_namespace schedulers=slurm +pmi" \
&& echo " slurm:" >> /opt/spack-stack/envs/unified-env/site/packages.yaml \
&& echo " externals:" >> /opt/spack-stack/envs/unified-env/site/packages.yaml \
&& echo " - spec: [email protected]" >> /opt/spack-stack/envs/unified-env/site/packages.yaml \
&& echo " prefix: /usr" >> /opt/spack-stack/envs/unified-env/site/packages.yaml \
&& echo " buildable: false" >> /opt/spack-stack/envs/unified-env/site/packages.yaml \
&& if [ "$(uname -m)" == "aarch64" ]; then \
spack config --scope env:/opt/spack-stack/envs/unified-env:common remove "packages:wgrib2" ; \
spack config --scope env:/opt/spack-stack/envs/unified-env:common remove "modules:default:lmod:wgrib2" ; \
fi \
&& sed -i 's/tcl/lmod/g' site/modules.yaml \
&& sed -i 's/tcl/lmod/g' common/modules.yaml \
&& sed -i 's:{^mpi.name}/{^mpi.version}/{compiler.name}/{compiler.version}/{name}:{name}:' common/modules.yaml \
&& sed -i 's:{compiler.name}/{compiler.version}/{name}:{name}:' common/modules.yaml

# Concretize the Spack environment
RUN cd /opt/spack-stack \
&& . ./setup.sh \
&& cd /opt/spack-stack/envs/unified-env \
&& spack env activate . \
&& spack concretize 2>&1 | tee log.concretize

# Install the Spack environment
RUN --mount=type=secret,id=access_key_id --mount=type=secret,id=secret_access_key --mount=type=secret,id=session_token <<EOF
# Install spack-stack and setup environment
cd /opt
git clone -b release/1.8.0 --recurse-submodules https://github.com/jcsda/spack-stack.git
pushd spack-stack
. ./setup.sh

# Patch spack for correct builds of openmpi and openblas (on arm64)
pushd spack
mv /tmp/cc.patch.$(uname -m) cc.patch
mv /tmp/openmpi.package.py.patch.$(uname -m) openmpi.package.py.patch
patch -f -p0 < openmpi.package.py.patch
patch -f -p0 < cc.patch
popd

# Create the unified environment and activate it
spack stack create env --site linux.default --template unified-dev --name unified-env --compiler gcc
pushd envs/unified-env
spack env activate .

# Add a read-only buildcache mirror
spack mirror add --s3-access-key-id "" --s3-access-key-secret "" s3_spack_stack_buildcache_ro s3://chiltepin-us-east-2/spack-stack/

# Add an autopush buildcache mirror if credentials were provided
if [ -f /run/secrets/access_key_id ]; then
export AWS_ACCESS_KEY_ID=$(cat /run/secrets/access_key_id)
Expand All @@ -95,67 +126,29 @@ RUN --mount=type=secret,id=access_key_id --mount=type=secret,id=secret_access_ke
spack mirror add --s3-access-key-id $AWS_ACCESS_KEY_ID --s3-access-key-secret $AWS_SECRET_ACCESS_KEY --s3-access-token $AWS_SESSION_TOKEN --autopush --unsigned s3_spack_stack_buildcache_rw s3://chiltepin-us-east-2/spack-stack/
fi

# Find external packages
export SPACK_SYSTEM_CONFIG_PATH="$PWD/site"
spack external find --scope system \
--exclude cmake \
--exclude curl \
--exclude openssl \
--exclude openssh \
--exclude python
spack external find --scope system wget
spack compiler find --scope system
unset SPACK_SYSTEM_CONFIG_PATH

# Set compiler and MPI provider and tweak a few package configs
spack config add "packages:all:compiler:[[email protected]]"
spack config add "packages:all:providers:mpi:[[email protected]]"
spack config add "packages:fontconfig:variants:+pic"
spack config add "packages:pixman:variants:+pic"
spack config add "packages:cairo:variants:+pic"

# Modify the configuration to build openmpi with slurm and PMI support
spack config --scope env:/opt/spack-stack/envs/unified-env:common add "packages:openmpi:require:~internal-hwloc +two_level_namespace schedulers=slurm +pmi"

# Add Slurm as an external package for use as a dependency for openmpi
echo " slurm:" >> /opt/spack-stack/envs/unified-env/site/packages.yaml
echo " externals:" >> /opt/spack-stack/envs/unified-env/site/packages.yaml
echo " - spec: [email protected]" >> /opt/spack-stack/envs/unified-env/site/packages.yaml
echo " prefix: /usr" >> /opt/spack-stack/envs/unified-env/site/packages.yaml
echo " buildable: false" >> /opt/spack-stack/envs/unified-env/site/packages.yaml

# Make sure we use Lmod modules
sed -i 's/tcl/lmod/g' site/modules.yaml
sed -i 's/tcl/lmod/g' common/modules.yaml

# Remove unwanted path prefixes for modules
sed -i 's:{^mpi.name}/{^mpi.version}/{compiler.name}/{compiler.version}/{name}:{name}:' common/modules.yaml
sed -i 's:{compiler.name}/{compiler.version}/{name}:{name}:' common/modules.yaml

# Concretize the environment
spack concretize 2>&1 | tee log.concretize
spack mirror list

# Install the environment, use autopush buildcache if credentials were provided
spack install --no-check-signature # 2>&1 | tee log.install
if [ -f /run/secrets/access_key_id ]; then
spack buildcache update-index s3_spack_stack_buildcache_rw
fi
EOF

# Generate modules
source /usr/lmod/lmod/init/bash
spack module lmod refresh -y
spack stack setup-meta-modules

# Cleanup unneccessary packages
spack gc -y

# Do not strip binaries, it breaks JEDI/UFS builds

# Cleanup unneeded .spack directories
find /opt/spack-stack/envs/unified-env/install/gcc/11.4.0 -name .spack -type d -print0 | xargs -0 rm -rf "{}"
rm -rf ~/.spack
# Create the modulefiles and cleanup
RUN cd /opt \
&& pushd spack-stack \
&& . ./setup.sh \
&& pushd envs/unified-env \
&& spack env activate . \
&& source /usr/lmod/lmod/init/bash \
&& spack module lmod refresh -y \
&& spack stack setup-meta-modules \
&& spack gc -y \
&& find /opt/spack-stack/envs/unified-env/install/gcc/11.4.0 -name .spack -type d -print0 | xargs -0 rm -rf "{}" \
&& rm -rf ~/.spack

# Copy installed environment into final images
FROM ghcr.io/noaa-gsl/dockerslurmcluster/slurm-frontend:latest

# Cleanup /tmp
rm -rf /tmp/*
EOF
COPY --from=builder /usr /usr
COPY --from=builder /etc /etc
COPY --from=builder /opt/spack-stack/envs/unified-env/install /opt/spack-stack/envs/unified-env/install

0 comments on commit 96d3481

Please sign in to comment.