Skip to content

Commit

Permalink
created build_openacc and build_disable_openacc
Browse files Browse the repository at this point in the history
  • Loading branch information
seshadri levante committed Dec 1, 2023
1 parent a383dfc commit 3a5d625
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ slurm.err
slurm.out
fesom2.out
/work/*.nsys-rep
build_*/
2 changes: 1 addition & 1 deletion work/job_levante_gpu
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ JOBID=`echo $SLURM_JOB_ID |cut -d"." -f1`

rm -f fesom.x
#ln -s ../bin/fesom.x . # cp -n ../bin/fesom.x
ln -s /work/ab0995/a270232/base_fesom/build/src/fesom fesom.x
ln -s /work/ab0995/a270232/base_fesom/build_openacc_atomics/src/fesom fesom.x
#ln -s ../bin/fesom.x . # cp -n ../bin/fesom.x

export OMP_NUM_THREADS=4
Expand Down
72 changes: 72 additions & 0 deletions work/job_levante_gpu_disable_openacc
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/bin/bash
#SBATCH --job-name=fesom_gpu_test
#SBATCH --partition=gpu
#SBATCH --nodes=2 # Specify number of nodes
#SBATCH --ntasks-per-node=8
#SBATCH --cpus-per-task=4
#SBATCH --gpus=2 # 4 # 8 for 2 nodes
##SBATCH --gpus-per-task=1 #specific case when tasks=gpues
#SBATCH --exclusive
#SBATCH --mem=0 # Request all memory available on all nodes
#SBATCH --time=00:20:00 # Set a limit on the total run time
#SBATCH -o slurm.out
#SBATCH -e slurm.err
#SBATCH --account=ab0995
set -e

source /sw/etc/profile.levante
#source ../env/levante.dkrz.de/shell
#read -r USED_SHELL < ../bin/current_shell_path
USED_SHELL="/work/ab0995/a270232/base_fesom/env/levante.dkrz.de/shell.nvhpc"
source $USED_SHELL

#source /work/ab0995/a270232/refactoring/fesom2/env/levante.dkrz.de/shell.nvhpc
echo "using environment from" $USED_SHELL

ulimit -s 204800 # https://docs.dkrz.de/doc/levante/running-jobs/runtime-settings.html

echo Submitted job: $jobid
squeue -u $USER

# Check GPUs available for the job
nvidia-smi

# determine JOBID
JOBID=`echo $SLURM_JOB_ID |cut -d"." -f1`

rm -f fesom.x
#ln -s ../bin/fesom.x . # cp -n ../bin/fesom.x
ln -s /work/ab0995/a270232/base_fesom/build_disable_openacc_atomics/src/fesom fesom.x
#ln -s ../bin/fesom.x . # cp -n ../bin/fesom.x

export OMP_NUM_THREADS=4
#cp -n ../config/namelist.config .
#cp -n ../config/namelist.forcing .
#cp -n ../config/namelist.oce .
#cp -n ../config/namelist.ice .
#cp -n ../config/namelist.icepack .

## levante specific gpu env used for ICON otherwise segfault
export OMPI_MCA_pml=ucx # Use UCX to support InfiniBand devices and CUDA [1]

export OMPI_MCA_btl="self" # Only use self transport to reduce overhead [2]

export UCX_RNDV_SCHEME=put_zcopy # Preferred communication scheme with Rendezvous protocol
export UCX_RNDV_THRESH=16384 # Threshold when to switch transport from TCP to NVLINK [3]

export UCX_IB_GPU_DIRECT_RDMA=yes # Allow remote direct memory access from/to GPU

export UCX_TLS=cma,rc,mm,cuda_ipc,cuda_copy,gdr_copy # Include cuda and gdr based transport layers for communication [4]

export UCX_MEMTYPE_CACHE=n

date
#srun -l fesom.x > fesom2.out 2>&1 #> "fesom2.0.out" 2>&1
srun -l nsys profile fesom.x > fesom2.out 2>&1 #> "fesom2.0.out" 2>&1
date

# qstat -f $PBS_JOBID
#export EXITSTATUS=$?
#if [ ${EXITSTATUS} -eq 0 ] || [ ${EXITSTATUS} -eq 127 ] ; then
#sbatch job_mistral
#fi

0 comments on commit 3a5d625

Please sign in to comment.