Skip to content

Commit

Permalink
Merge pull request #3932 from QMCPACK/rc_3140
Browse files Browse the repository at this point in the history
Rc_3140
  • Loading branch information
prckent authored Apr 6, 2022
2 parents 5f4ba01 + 801bead commit dbe4ceb
Show file tree
Hide file tree
Showing 386 changed files with 8,864 additions and 4,470 deletions.
23 changes: 23 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,29 @@

Notable changes to QMCPACK are documented in this file.

## [3.14.0] - 2022-04-06

This release focuses on performance improvements to the OpenMP target offload version for GPUs as well as ongoing minor
improvements. The new GPU implementation rivals the legacy CUDA version for performance for broad range of problems
while offering more functionality, such as three body Jastrow functions. Developers are very interested in feedback from
users about the new version and will prioritize developments based on comments received. A new driver\_version switch is
introduced, currently optional, to disambiguate between the versions and their inputs.

- New global driver\_version switch to select between batched and legacy codes. This will become a required input tag in the next major release series of QMCPACK, but remains optional in 3.x versions [\#3897](https://github.com/QMCPACK/qmcpack/pull/3897)
- Optimization of block sizes in GPU offload kernels [\#3910](https://github.com/QMCPACK/qmcpack/pull/3910)
- GPU Offload of one-body Jastrow ratio calculation in pseudopotential evaluation [\#3905](https://github.com/QMCPACK/qmcpack/pull/3905)
- GPU Offload of some Coulomb potential evaluations [\#3842](https://github.com/QMCPACK/qmcpack/pull/3842)
- Partial GPU offload of multideterminant evaluation e.g. [\#3892](https://github.com/QMCPACK/qmcpack/pull/3892)
- Increased performance via more selective distance table computation [\#3846](https://github.com/QMCPACK/qmcpack/pull/3846)
- Improved performance on AMD GPUs via rocSOLVER integration [\#3756](https://github.com/QMCPACK/qmcpack/issues/3756)
- HIP build options shown in output [\#3919](https://github.com/QMCPACK/qmcpack/pull/3919)
- Documentation improvements, particularly relating to installation.
- Various bug fixes and ongoing cleanup.

### NEXUS

- Nexus: proper use of max\_seconds in legacy drivers [\#3877](https://github.com/QMCPACK/qmcpack/pull/3877)

## [3.13.0] - 2022-02-16

### Notes
Expand Down
19 changes: 12 additions & 7 deletions CMake/ClangCompilers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ if(QMC_OMP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")

if(ENABLE_OFFLOAD)
if (QMC_CUDA2HIP)
if(QMC_CUDA2HIP)
set(OFFLOAD_TARGET_DEFAULT "amdgcn-amd-amdhsa")
else()
set(OFFLOAD_TARGET_DEFAULT "nvptx64-nvidia-cuda")
Expand All @@ -36,14 +36,18 @@ if(QMC_OMP)
set(OFFLOAD_ARCH gfx906)
endif()

if(NOT DEFINED OFFLOAD_ARCH AND OFFLOAD_TARGET MATCHES "nvptx64" AND DEFINED CMAKE_CUDA_ARCHITECTURES)
if(NOT DEFINED OFFLOAD_ARCH
AND OFFLOAD_TARGET MATCHES "nvptx64"
AND DEFINED CMAKE_CUDA_ARCHITECTURES)
list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES)
if(NUMBER_CUDA_ARCHITECTURES EQUAL "1")
set(OFFLOAD_ARCH sm_${CMAKE_CUDA_ARCHITECTURES})
else()
message(FATAL_ERROR "LLVM does not yet support offload to multiple architectures! "
"Deriving OFFLOAD_ARCH from CMAKE_CUDA_ARCHITECTURES failed. "
"Please keep only one entry in CMAKE_CUDA_ARCHITECTURES or set OFFLOAD_ARCH.")
message(
FATAL_ERROR
"LLVM does not yet support offload to multiple architectures! "
"Deriving OFFLOAD_ARCH from CMAKE_CUDA_ARCHITECTURES failed. "
"Please keep only one entry in CMAKE_CUDA_ARCHITECTURES or set OFFLOAD_ARCH.")
endif()
endif()

Expand All @@ -69,7 +73,8 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wvla")

# set compiler warnings
string(APPEND CMAKE_CXX_FLAGS " -Wall -Wno-unused-variable -Wno-overloaded-virtual -Wno-unused-private-field -Wno-unused-local-typedef")
string(APPEND CMAKE_CXX_FLAGS
" -Wall -Wno-unused-variable -Wno-overloaded-virtual -Wno-unused-private-field -Wno-unused-local-typedef")

if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 11.0)
string(APPEND CMAKE_CXX_FLAGS " -Wsuggest-override")
Expand Down Expand Up @@ -142,7 +147,7 @@ endif(QMC_BUILD_STATIC)

# Coverage
if(ENABLE_GCOV)
set(GCOV_COVERAGE TRUE)
set(GCOV_SUPPORTED TRUE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --coverage")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --coverage")
Expand Down
4 changes: 2 additions & 2 deletions CMake/FindRMG.cmake
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Locate rmg-cpu
# Locate rmg-cpu
# Take RMG_BIN as hint for location

find_program(RMG_CPU_EXE rmg-cpu HINTS ${RMG_BIN})

set(RMG_FOUND FALSE)
if(RMG_CPU_EXE)
MESSAGE(STATUS "RMG_CPU_EXE=${RMG_CPU_EXE}")
message(STATUS "RMG_CPU_EXE=${RMG_CPU_EXE}")
set(RMG_FOUND TRUE)
endif()

Expand Down
22 changes: 14 additions & 8 deletions CMake/GNUCompilers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ if(QMC_OMP)
message(WARNING "GCC OpenMP offload feature requires 12.0 or higher.")
endif()

if (QMC_CUDA2HIP)
if(QMC_CUDA2HIP)
set(OFFLOAD_TARGET_DEFAULT "amdgcn-amdhsa")
else()
set(OFFLOAD_TARGET_DEFAULT "nvptx-none")
Expand All @@ -28,14 +28,18 @@ if(QMC_OMP)
set(OFFLOAD_ARCH gfx906)
endif()

if(NOT DEFINED OFFLOAD_ARCH AND OFFLOAD_TARGET MATCHES "nvptx-none" AND DEFINED CMAKE_CUDA_ARCHITECTURES)
if(NOT DEFINED OFFLOAD_ARCH
AND OFFLOAD_TARGET MATCHES "nvptx-none"
AND DEFINED CMAKE_CUDA_ARCHITECTURES)
list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES)
if(NUMBER_CUDA_ARCHITECTURES EQUAL "1")
set(OFFLOAD_ARCH sm_${CMAKE_CUDA_ARCHITECTURES})
else()
message(FATAL_ERROR "GCC does not yet support offload to multiple architectures! "
"Deriving OFFLOAD_ARCH from CMAKE_CUDA_ARCHITECTURES failed. "
"Please keep only one entry in CMAKE_CUDA_ARCHITECTURES or set OFFLOAD_ARCH.")
message(
FATAL_ERROR
"GCC does not yet support offload to multiple architectures! "
"Deriving OFFLOAD_ARCH from CMAKE_CUDA_ARCHITECTURES failed. "
"Please keep only one entry in CMAKE_CUDA_ARCHITECTURES or set OFFLOAD_ARCH.")
endif()
endif()

Expand All @@ -47,7 +51,10 @@ if(QMC_OMP)
set(OPENMP_OFFLOAD_COMPILE_OPTIONS
"${OPENMP_OFFLOAD_COMPILE_OPTIONS} -foffload-options=${OFFLOAD_TARGET}=\"-misa=${OFFLOAD_ARCH}\"")
else()
message(WARNING "We don't know how to handle OFFLOAD_ARCH=${OFFLOAD_ARCH} for OFFLOAD_TARGET=${OFFLOAD_TARGET}. Got ignored.")
message(
WARNING
"We don't know how to handle OFFLOAD_ARCH=${OFFLOAD_ARCH} for OFFLOAD_TARGET=${OFFLOAD_TARGET}. Got ignored."
)
endif()
endif()
else()
Expand All @@ -59,8 +66,7 @@ endif(QMC_OMP)
add_definitions(-Drestrict=__restrict__)

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -finline-limit=1000 -fstrict-aliasing -funroll-all-loops")
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -finline-limit=1000 -fstrict-aliasing -funroll-all-loops")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -finline-limit=1000 -fstrict-aliasing -funroll-all-loops")

set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fno-omit-frame-pointer")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-omit-frame-pointer")
Expand Down
3 changes: 2 additions & 1 deletion CMake/macros.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ function(

if(NOT QMC_OMP)
if(${THREADS} GREATER 1)
message(VERBOSE "Disabling test ${TESTNAME} (exceeds maximum number of threads=1 if OpenMP is disabled -DQMC_OMP=0)")
message(VERBOSE
"Disabling test ${TESTNAME} (exceeds maximum number of threads=1 if OpenMP is disabled -DQMC_OMP=0)")
return()
endif()
endif()
Expand Down
71 changes: 27 additions & 44 deletions CMake/run_rmg.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,23 @@ if(QMC_NO_SLOW_CUSTOM_TESTING_COMMANDS)
else(QMC_NO_SLOW_CUSTOM_TESTING_COMMANDS)

function(
ADD_RMG_TEST
TESTNAME
NPROCS
NTHREADS
TEST_BINARY
WORKDIR
TEST_INPUT)
ADD_RMG_TEST
TESTNAME
NPROCS
NTHREADS
TEST_BINARY
WORKDIR
TEST_INPUT)
#if(HAVE_MPI)
# add_test(NAME ${TESTNAME} COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${NPROCS} ${MPIEXEC_PREFLAGS}
# ${TEST_BINARY} ${TEST_INPUT})
#else(HAVE_MPI)
add_test(NAME ${TESTNAME} COMMAND ${TEST_BINARY} ${TEST_INPUT})
#endif(HAVE_MPI)
add_test(NAME ${TESTNAME} COMMAND ${TEST_BINARY} ${TEST_INPUT})
#endif(HAVE_MPI)
set_tests_properties(
${TESTNAME}
PROPERTIES ENVIRONMENT
"OMP_NUM_THREADS=${NTHREADS};RMG_NUM_THREADS=${NTHREADS}"
"OMP_NUM_THREADS=${NTHREADS};RMG_NUM_THREADS=${NTHREADS}"
PROCESSORS
${NPROCS}
PROCESSOR_AFFINITY
Expand All @@ -39,31 +39,13 @@ else(QMC_NO_SLOW_CUSTOM_TESTING_COMMANDS)
PROPERTY LABELS "converter;rmg")
endfunction()

function(
ADD_RMG_CONVERT_TEST
TESTNAME
PREFIX
WORKDIR
TEST_INPUT)
function(ADD_RMG_CONVERT_TEST TESTNAME PREFIX WORKDIR TEST_INPUT)
add_test(NAME ${TESTNAME} COMMAND $<TARGET_FILE:convert4qmc> -rmg ${TEST_INPUT} -prefix ${PREFIX})
set_tests_properties(
${TESTNAME}
PROPERTIES
WORKING_DIRECTORY
${WORKDIR})
set_property(
TEST ${TESTNAME}
APPEND
PROPERTY LABELS "converter;rmg")
set_tests_properties(${TESTNAME} PROPERTIES WORKING_DIRECTORY ${WORKDIR})
set_property(TEST ${TESTNAME} APPEND PROPERTY LABELS "converter;rmg")
endfunction()

function(
RUN_RMG_TEST
BASE_NAME
SRC_DIR
NPROCS
NTHREADS
TEST_NAME)
function(RUN_RMG_TEST BASE_NAME SRC_DIR NPROCS NTHREADS TEST_NAME)
set(FULL_NAME ${BASE_NAME}-np-${NPROCS})
set(${TEST_NAME}
${FULL_NAME}
Expand All @@ -85,26 +67,27 @@ function(SOFTLINK_H5_RMG_WAVES SOURCE PREFIX)
# set(${TEST_NAME}
# "LINK_${SOURCE}_h5_Waves"
# PARENT_SCOPE)
add_test(NAME LINK_${SOURCE}_h5_Waves COMMAND ${qmcpack_SOURCE_DIR}/tests/scripts/clean_and_link_h5.sh
${SOURCE}/Waves/wave.out.h5 ${SOURCE}/${PREFIX}.h5)
add_test(NAME LINK_${SOURCE}_h5_Waves COMMAND ${qmcpack_SOURCE_DIR}/tests/scripts/clean_and_link_h5.sh
${SOURCE}/Waves/wave.out.h5 ${SOURCE}/${PREFIX}.h5)
set_tests_properties(LINK_${SOURCE}_h5_Waves PROPERTIES DEPENDS ${SOURCE}-scf)
set_property(TEST LINK_${SOURCE}_h5_Waves APPEND PROPERTY LABELS "rmg")
endfunction()

function(SOFTLINK_RMG_INPUT SOURCE TARGET PREFIX TEST_NAME)
set(${TEST_NAME}
"LINK_${SOURCE}_TO_${TARGET}"
"LINK_${SOURCE}_TO_${TARGET}"
PARENT_SCOPE)
add_test(NAME LINK_${SOURCE}_TO_${TARGET} COMMAND ${qmcpack_SOURCE_DIR}/tests/scripts/clean_and_link_h5.sh
${SOURCE}/${PREFIX}.h5 ${SOURCE}-${TARGET}/${PREFIX}.h5)
set_tests_properties(LINK_${SOURCE}_TO_${TARGET} PROPERTIES DEPENDS ${SOURCE}-scf)
set_property(TEST LINK_${SOURCE}_TO_${TARGET} APPEND PROPERTY LABELS "rmg")
add_test(NAME COPY_${SOURCE}_XML_TO_${TARGET} COMMAND
bash -c "mkdir -p ${SOURCE}-${TARGET}; \
add_test(NAME LINK_${SOURCE}_TO_${TARGET} COMMAND ${qmcpack_SOURCE_DIR}/tests/scripts/clean_and_link_h5.sh
${SOURCE}/${PREFIX}.h5 ${SOURCE}-${TARGET}/${PREFIX}.h5)
set_tests_properties(LINK_${SOURCE}_TO_${TARGET} PROPERTIES DEPENDS ${SOURCE}-scf)
set_property(TEST LINK_${SOURCE}_TO_${TARGET} APPEND PROPERTY LABELS "rmg")
add_test(
NAME COPY_${SOURCE}_XML_TO_${TARGET}
COMMAND
bash -c "mkdir -p ${SOURCE}-${TARGET}; \
cp ${SOURCE}/${PREFIX}.structure.xml ${SOURCE}-${TARGET}/${PREFIX}.structure.xml ; \
cp ${SOURCE}/${PREFIX}.wfnoj.xml ${SOURCE}-${TARGET}/${PREFIX}.wfnoj.xml ; \
cp ${SOURCE}/*.qmcpp.xml ${SOURCE}-${TARGET}/")
set_tests_properties(COPY_${SOURCE}_XML_TO_${TARGET} PROPERTIES DEPENDS ${SOURCE}-scf)
set_property(TEST COPY_${SOURCE}_XML_TO_${TARGET} APPEND PROPERTY LABELS "rmg")
set_tests_properties(COPY_${SOURCE}_XML_TO_${TARGET} PROPERTIES DEPENDS ${SOURCE}-scf)
set_property(TEST COPY_${SOURCE}_XML_TO_${TARGET} APPEND PROPERTY LABELS "rmg")
endfunction()

10 changes: 6 additions & 4 deletions CMake/test_labels.cmake
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
function(ADD_TEST_LABELS TEST_NAME TEST_LABELS)
set(TEST_LABELS_TEMP "")
set(TEST_LABELS_UNIQUE_NAME TEST_LABELS_${TEST_NAME}_${QMC_CUDA}_${QMC_COMPLEX}_${QMC_MIXED_PRECISION})
if (DEFINED ${TEST_LABELS_UNIQUE_NAME})
if(DEFINED ${TEST_LABELS_UNIQUE_NAME})
set(TEST_LABELS_TEMP ${${TEST_LABELS_UNIQUE_NAME}})
else()
set(SUCCESS FALSE)
execute_process(
COMMAND ${Python3_EXECUTABLE} ${qmcpack_SOURCE_DIR}/tests/scripts/test_labels.py ${TEST_NAME} ${QMC_CUDA} ${QMC_COMPLEX}
${QMC_MIXED_PRECISION}
COMMAND ${Python3_EXECUTABLE} ${qmcpack_SOURCE_DIR}/tests/scripts/test_labels.py ${TEST_NAME} ${QMC_CUDA}
${QMC_COMPLEX} ${QMC_MIXED_PRECISION}
OUTPUT_VARIABLE TEST_LABELS_TEMP
RESULT_VARIABLE SUCCESS)
if(${SUCCESS} STREQUAL "0")
set(${TEST_LABELS_UNIQUE_NAME} ${TEST_LABELS_TEMP} CACHE INTERNAL "for internal use only; do not modify")
set(${TEST_LABELS_UNIQUE_NAME}
${TEST_LABELS_TEMP}
CACHE INTERNAL "for internal use only; do not modify")
else()
message("Warning: test labeling failed. Test labeling error output:\n${TEST_LABELS_TEMP}")
set(TEST_LABELS_TEMP "")
Expand Down
5 changes: 1 addition & 4 deletions CMake/unit_test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,5 @@ function(add_test_target_in_output_location TARGET_NAME_TO_TEST EXE_DIR_RELATIVE
set(TESTNAME build_output_${TARGET_NAME_TO_TEST}_exists)
add_test(NAME ${TESTNAME} COMMAND ls ${qmcpack_BINARY_DIR}/bin/${BASE_NAME})

set_property(
TEST ${TESTNAME}
APPEND
PROPERTY LABELS "unit;deterministic")
set_property(TEST ${TESTNAME} APPEND PROPERTY LABELS "unit;deterministic")
endfunction()
Loading

0 comments on commit dbe4ceb

Please sign in to comment.