From 306cd1a4f91b677d0074cdc319ce76d27692dfef Mon Sep 17 00:00:00 2001 From: Pramod S Kumbhar Date: Mon, 4 Apr 2022 00:26:03 +0200 Subject: [PATCH 001/128] Support for shared libraries in GPU execution (python launch support) * mod2c now generates code without need of global variables * coreneuron and mechanism library can be built as shared and it enables launching coreneuron on GPU via pyton * scopmath library can be also shared * removed acc/openmp global annotations for celsius, pi and secondorder and they don't need to be copied on GPU - [x] MOD2C generates code without using globals / acc declare See see BlueBrain/mod2c/pull/78 - [x] Basic test with special and python on GPU See https://github.com/BlueBrain/CoreNeuron/issues/141#issuecomment-1086742194 - [ ] Link issues with CUDA part e.g. nrnran123.cu functions result into link errors, see https://github.com/BlueBrain/CoreNeuron/issues/141#issuecomment-1086742194 @olupton to rescue! - [ ] Check celsius usage within coreneuron source code - [ ] Investigate why acc_deviceptr(ml->data) returns host pointer when coreneuron is launched via python. See https://github.com/BlueBrain/CoreNeuron/issues/141#issuecomment-1086746848 - [ ] Run neuron test suite and external models like olfactory-buld via python - [ ] Update submodule BlueBrain/mod2c/pull/78 --- CMake/OpenAccHelper.cmake | 7 ++++++- CMakeLists.txt | 8 -------- coreneuron/CMakeLists.txt | 2 +- coreneuron/apps/main1.cpp | 2 -- coreneuron/gpu/nrn_acc_manager.cpp | 17 ++++++++++++----- coreneuron/nrnconf.h | 8 -------- external/mod2c | 2 +- extra/nrnivmodl_core_makefile.in | 3 ++- 8 files changed, 22 insertions(+), 27 deletions(-) diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index 1c18225b6..8bdf5726a 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -67,7 +67,12 @@ if(CORENRN_ENABLE_GPU) # and offloaded OpenACC/OpenMP code. Using -cuda when compiling seems to improve error messages in # some cases, and to be recommended by NVIDIA. We pass -gpu=cudaX.Y to ensure that OpenACC/OpenMP # code is compiled with the same CUDA version as the explicit CUDA code. - set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo") + # TODO nordc option is added based on the recommendation from: + # https://forums.developer.nvidia.com/t/separate-compilation-of-mixed-cuda-openacc-code/192701 + # but as discussed in + # https://github.com/BlueBrain/CoreNeuron/issues/141#issuecomment-1086742194 + # this is still not completely solving underlying link issue. + set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo,nordc") # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the # same default compute capabilities as each other, particularly on GPU-less build machines. diff --git a/CMakeLists.txt b/CMakeLists.txt index cb1c96b6c..26cc84360 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -303,14 +303,6 @@ if(CORENRN_HAVE_NVHPC_COMPILER) endif() endif() -# ~~~ -# OpenACC needs to build static library in order to have global/routines working. -# See https://www.pgroup.com/userforum/viewtopic.php?t=5350 -# ~~~ -if(CORENRN_ENABLE_GPU) - set(CORENRN_ENABLE_SHARED OFF) -endif() - if(CORENRN_ENABLE_SHARED) set(COMPILE_LIBRARY_TYPE "SHARED") else() diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 0dc648628..489c85a05 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -248,7 +248,7 @@ target_compile_options(coreneuron add_dependencies(coreneuron nrnivmodl-core) # scopmath is created separately for nrnivmodl-core workflow -add_library(scopmath STATIC ${CORENEURON_HEADER_FILES} ${SCOPMATH_CODE_FILES}) +add_library(scopmath ${COMPILE_LIBRARY_TYPE} ${CORENEURON_HEADER_FILES} ${SCOPMATH_CODE_FILES}) target_include_directories(scopmath PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR} ${CORENEURON_PROJECT_BINARY_DIR}/generated) diff --git a/coreneuron/apps/main1.cpp b/coreneuron/apps/main1.cpp index fb74df7d0..b7139754d 100644 --- a/coreneuron/apps/main1.cpp +++ b/coreneuron/apps/main1.cpp @@ -563,8 +563,6 @@ extern "C" int run_solve_core(int argc, char** argv) { #endif bool compute_gpu = corenrn_param.gpu; - nrn_pragma_acc(update device(celsius, secondorder, pi) if (compute_gpu)) - nrn_pragma_omp(target update to(celsius, secondorder, pi) if (compute_gpu)) { double v = corenrn_param.voltage; double dt = corenrn_param.dt; diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index 3eff82fe1..098b943be 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -77,7 +77,7 @@ void cnrn_target_set_default_device(int device_num) { #ifdef CORENEURON_ENABLE_GPU -static Memb_list* copy_ml_to_device(const Memb_list* ml, int type) { +static Memb_list* copy_ml_to_device(const Memb_list* ml, int type, double* dml_data) { // As we never run code for artificial cell inside GPU we don't copy it. int is_art = corenrn.get_is_artificial()[type]; if (is_art) { @@ -90,9 +90,9 @@ static Memb_list* copy_ml_to_device(const Memb_list* ml, int type) { int szp = corenrn.get_prop_param_size()[type]; int szdp = corenrn.get_prop_dparam_size()[type]; - double* dptr = cnrn_target_deviceptr(ml->data); - cnrn_target_memcpy_to_device(&(d_ml->data), &(dptr)); + double* dptr = dml_data; + cnrn_target_memcpy_to_device(&(d_ml->data), &(dptr)); int* d_nodeindices = cnrn_target_copyin(ml->nodeindices, n); cnrn_target_memcpy_to_device(&(d_ml->nodeindices), &d_nodeindices); @@ -325,7 +325,6 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) { /*copy all double data for thread */ d__data = cnrn_target_copyin(nt->_data, nt->_ndata); - /* Here is the example of using OpenACC data enter/exit * Remember that we are not allowed to use nt->_data but we have to use: * double *dtmp = nt->_data; // now use dtmp! @@ -395,9 +394,17 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) { // book keeping for linked-list d_last_tml = d_tml; + // TODO: acc_deviceptr is returning host pointer when + // coreneuron is launched via python instead of special + // see: https://github.com/BlueBrain/CoreNeuron/issues/141#issuecomment-1086746848 + // As ml->data is always within nt->_data, temporarily calculate + // device pointer of ml->data on using offset. + double* dml_data = d__data + (tml->ml->data - nt->_data); + /* now for every tml, there is a ml. copy that and setup pointer */ - Memb_list* d_ml = copy_ml_to_device(tml->ml, tml->index); + Memb_list* d_ml = copy_ml_to_device(tml->ml, tml->index, dml_data); cnrn_target_memcpy_to_device(&(d_tml->ml), &d_ml); + /* setup nt._ml_list */ cnrn_target_memcpy_to_device(&(d_ml_list[tml->index]), &d_ml); } diff --git a/coreneuron/nrnconf.h b/coreneuron/nrnconf.h index b25a2764a..7e4cb6d4e 100644 --- a/coreneuron/nrnconf.h +++ b/coreneuron/nrnconf.h @@ -32,17 +32,9 @@ using Symbol = char; #define VEC_AREA(i) (_nt->_actual_area[(i)]) #define VECTORIZE 1 -// extern variables require acc declare -nrn_pragma_omp(declare target) extern double celsius; -nrn_pragma_acc(declare create(celsius)) - extern double pi; -nrn_pragma_acc(declare create(pi)) - extern int secondorder; -nrn_pragma_acc(declare create(secondorder)) -nrn_pragma_omp(end declare target) extern double t, dt; extern int rev_dt; diff --git a/external/mod2c b/external/mod2c index 8565d3c17..9d21b18a0 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit 8565d3c178a195a489fae0623d6338c2e92cd1e5 +Subproject commit 9d21b18a0036810f3ced1a8b16428754b87c8e87 diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index 6601f7123..bdc9387f1 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -212,6 +212,7 @@ $(SPECIAL_EXE): coremech_lib_target $(CXX_LINK_EXE_CMD) -o $(SPECIAL_EXE) $(CORENRN_SHARE_CORENRN_DIR)/coreneuron.cpp \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ -L$(OUTPUT_DIR) -l$(COREMECH_LIB_NAME) $(CORENRNLIB_FLAGS) $(LDFLAGS) \ + -L$(CORENRN_LIB_DIR) -lscopmath \ -Wl,-rpath,'$(LIB_RPATH)' -Wl,-rpath,$(CORENRN_LIB_DIR) -Wl,-rpath,'$(INSTALL_LIB_RPATH)' coremech_lib_target: $(corenrnmech_lib_target) @@ -226,7 +227,7 @@ $(ENGINEMECH_OBJ): $(CORENRN_SHARE_CORENRN_DIR)/enginemech.cpp | $(MOD_OBJS_DIR) coremech_lib_shared: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always $(CXX_SHARED_LIB_CMD) $(ENGINEMECH_OBJ) -o ${COREMECH_LIB_PATH} $(ALL_OBJS) \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ - $(LDFLAGS) $(CORENRN_LIB_DIR)/libscopmath.a \ + $(LDFLAGS) -L$(CORENRN_LIB_DIR) -lscopmath\ ${SONAME_OPTION} $(CORENRNLIB_FLAGS) -Wl,-rpath,$(CORENRN_LIB_DIR); # build static library of mechanisms From 045b9cd9de53c475da03926c54abf1db5785cd82 Mon Sep 17 00:00:00 2001 From: Pramod Kumbhar Date: Wed, 20 Apr 2022 12:57:11 +0200 Subject: [PATCH 002/128] Pass Memb_list* as an argument for all common prototypes in order to support global variables via argument * add ml parameter to all relevant function * switch to static build and remove nordc temporarily * free ml->instance if not empty * avoid extracting libscopmath objects and linking --- CMake/OpenAccHelper.cmake | 2 +- CMakeLists.txt | 4 ++++ coreneuron/CMakeLists.txt | 2 +- coreneuron/io/core2nrn_data_return.cpp | 16 +++++++++++++--- coreneuron/io/nrn2core_data_init.cpp | 3 ++- coreneuron/io/nrn_checkpoint.cpp | 6 ++++-- coreneuron/io/nrn_setup.cpp | 5 +++++ coreneuron/io/phase2.cpp | 1 + coreneuron/mechanism/mech/mod2c_core_thread.hpp | 8 ++++---- coreneuron/mechanism/mechanism.hpp | 3 ++- coreneuron/mechanism/membfunc.hpp | 2 ++ coreneuron/mechanism/patternstim.cpp | 3 ++- extra/nrnivmodl_core_makefile.in | 6 ++---- 13 files changed, 43 insertions(+), 18 deletions(-) diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index 8bdf5726a..e4e2f7f89 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -72,7 +72,7 @@ if(CORENRN_ENABLE_GPU) # but as discussed in # https://github.com/BlueBrain/CoreNeuron/issues/141#issuecomment-1086742194 # this is still not completely solving underlying link issue. - set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo,nordc") + set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo") # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the # same default compute capabilities as each other, particularly on GPU-less build machines. diff --git a/CMakeLists.txt b/CMakeLists.txt index 26cc84360..4366cfee1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -303,6 +303,10 @@ if(CORENRN_HAVE_NVHPC_COMPILER) endif() endif() +if(CORENRN_ENABLE_GPU) + set(CORENRN_ENABLE_SHARED OFF) +endif() + if(CORENRN_ENABLE_SHARED) set(COMPILE_LIBRARY_TYPE "SHARED") else() diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 489c85a05..21861649e 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -305,7 +305,7 @@ add_custom_command( OUTPUT ${output_binaries} DEPENDS scopmath coreneuron ${NMODL_TARGET_TO_DEPEND} ${modfiles} ${CORENEURON_BUILTIN_MODFILES} COMMAND ${CMAKE_BINARY_DIR}/bin/nrnivmodl-core -b ${COMPILE_LIBRARY_TYPE} -m - ${CORENRN_MOD2CPP_BINARY} -p 1 "${modfile_directory}" + ${CORENRN_MOD2CPP_BINARY} -p 4 "${modfile_directory}" WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/bin COMMENT "Running nrnivmodl-core with halfgap.mod") add_custom_target(nrniv-core ALL DEPENDS ${output_binaries}) diff --git a/coreneuron/io/core2nrn_data_return.cpp b/coreneuron/io/core2nrn_data_return.cpp index 6a12c197f..87a549ac6 100644 --- a/coreneuron/io/core2nrn_data_return.cpp +++ b/coreneuron/io/core2nrn_data_return.cpp @@ -137,7 +137,7 @@ static void core2nrn_corepointer(int tid, NrnThreadMembList* tml) { d = ml->data + nrn_i_layout(jp, ml->nodecount, 0, dsz, layout); pd = ml->pdata + nrn_i_layout(jp, ml->nodecount, 0, pdsz, layout); (*corenrn.get_bbcore_write()[type])( - nullptr, nullptr, &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, 0.0); + nullptr, nullptr, &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, ml, 0.0); } std::unique_ptr iArray; @@ -159,8 +159,18 @@ static void core2nrn_corepointer(int tid, NrnThreadMembList* tml) { d = ml->data + nrn_i_layout(jp, ml->nodecount, 0, dsz, layout); pd = ml->pdata + nrn_i_layout(jp, ml->nodecount, 0, pdsz, layout); - (*corenrn.get_bbcore_write()[type])( - dArray.get(), iArray.get(), &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, 0.0); + (*corenrn.get_bbcore_write()[type])(dArray.get(), + iArray.get(), + &dcnt, + &icnt, + 0, + aln_cntml, + d, + pd, + ml->_thread, + &nt, + ml, + 0.0); } (*core2nrn_corepointer_mech_)(tid, type, icnt, dcnt, iArray.get(), dArray.get()); diff --git a/coreneuron/io/nrn2core_data_init.cpp b/coreneuron/io/nrn2core_data_init.cpp index e732dec11..ad7106f6e 100644 --- a/coreneuron/io/nrn2core_data_init.cpp +++ b/coreneuron/io/nrn2core_data_init.cpp @@ -407,6 +407,7 @@ extern void** pattern_stim_info_ref(int icnt, Datum* _ppvar, ThreadDatum* _thread, NrnThread* _nt, + Memb_list* ml, double v); extern "C" { @@ -437,7 +438,7 @@ void nrn2core_patstim_share_info() { assert(0); } - void** info = pattern_stim_info_ref(_iml, _cntml, _p, _ppvar, nullptr, nt, 0.0); + void** info = pattern_stim_info_ref(_iml, _cntml, _p, _ppvar, nullptr, nt, ml, 0.0); (*nrn2core_patternstim_)(info); } } diff --git a/coreneuron/io/nrn_checkpoint.cpp b/coreneuron/io/nrn_checkpoint.cpp index 955848901..ecf432422 100644 --- a/coreneuron/io/nrn_checkpoint.cpp +++ b/coreneuron/io/nrn_checkpoint.cpp @@ -449,7 +449,7 @@ void CheckPoints::write_phase2(NrnThread& nt) const { d = ml->data + nrn_i_layout(jp, ml->nodecount, 0, dsz, layout); pd = ml->pdata + nrn_i_layout(jp, ml->nodecount, 0, pdsz, layout); (*corenrn.get_bbcore_write()[type])( - nullptr, nullptr, &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, 0.0); + nullptr, nullptr, &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, ml, 0.0); } fh << icnt << "\n"; fh << dcnt << "\n"; @@ -478,7 +478,7 @@ void CheckPoints::write_phase2(NrnThread& nt) const { pd = ml->pdata + nrn_i_layout(jp, ml->nodecount, 0, pdsz, layout); (*corenrn.get_bbcore_write()[type])( - dArray, iArray, &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, 0.0); + dArray, iArray, &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, ml, 0.0); } if (icnt) { @@ -592,6 +592,7 @@ bool CheckPoints::initialize() { ml->pdata, ml->_thread, nrn_threads, + ml, 0.0); break; } @@ -802,6 +803,7 @@ void CheckPoints::write_tqueue(NrnThread& nt, FileHandler& fh) const { ml->pdata, ml->_thread, nrn_threads, + ml, 0.0); break; } diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp index c22ffc0ce..361ccd185 100644 --- a/coreneuron/io/nrn_setup.cpp +++ b/coreneuron/io/nrn_setup.cpp @@ -754,6 +754,11 @@ void nrn_cleanup() { ml->_thread = nullptr; } + if (ml->instance) { + free(ml->instance); + ml->instance = nullptr; + } + NetReceiveBuffer_t* nrb = ml->_net_receive_buffer; if (nrb) { if (nrb->_size) { diff --git a/coreneuron/io/phase2.cpp b/coreneuron/io/phase2.cpp index a97b335bb..bb3f7f99f 100644 --- a/coreneuron/io/phase2.cpp +++ b/coreneuron/io/phase2.cpp @@ -867,6 +867,7 @@ void Phase2::get_info_from_bbcore(NrnThread& nt, pd, ml->_thread, &nt, + ml, 0.0); } assert(dk == static_cast(tmls[i].dArray.size())); diff --git a/coreneuron/mechanism/mech/mod2c_core_thread.hpp b/coreneuron/mechanism/mech/mod2c_core_thread.hpp index 85ed348f6..d18160f3a 100644 --- a/coreneuron/mechanism/mech/mod2c_core_thread.hpp +++ b/coreneuron/mechanism/mech/mod2c_core_thread.hpp @@ -16,14 +16,14 @@ namespace coreneuron { #define _STRIDE _cntml_padded + _iml -#define _threadargscomma_ _iml, _cntml_padded, _p, _ppvar, _thread, _nt, _v, +#define _threadargscomma_ _iml, _cntml_padded, _p, _ppvar, _thread, _nt, _ml, _v, #define _threadargsprotocomma_ \ int _iml, int _cntml_padded, double *_p, Datum *_ppvar, ThreadDatum *_thread, NrnThread *_nt, \ - double _v, -#define _threadargs_ _iml, _cntml_padded, _p, _ppvar, _thread, _nt, _v + Memb_list *_ml, double _v, +#define _threadargs_ _iml, _cntml_padded, _p, _ppvar, _thread, _nt, _ml, _v #define _threadargsproto_ \ int _iml, int _cntml_padded, double *_p, Datum *_ppvar, ThreadDatum *_thread, NrnThread *_nt, \ - double _v + Memb_list *_ml, double _v struct Elm { unsigned row; /* Row location */ diff --git a/coreneuron/mechanism/mechanism.hpp b/coreneuron/mechanism/mechanism.hpp index ab78ad502..71b8b0fc6 100644 --- a/coreneuron/mechanism/mechanism.hpp +++ b/coreneuron/mechanism/mechanism.hpp @@ -143,6 +143,7 @@ struct Memb_list { NetSendBuffer_t* _net_send_buffer = nullptr; int nodecount; /* actual node count */ int _nodecount_padded; - void* instance = nullptr; /* mechanism instance */ + void* instance = nullptr; /* mechanism instance struct from NMODL or global variables struct in + mod2c */ }; } // namespace coreneuron diff --git a/coreneuron/mechanism/membfunc.hpp b/coreneuron/mechanism/membfunc.hpp index 2556f0f87..c7d58e82a 100644 --- a/coreneuron/mechanism/membfunc.hpp +++ b/coreneuron/mechanism/membfunc.hpp @@ -151,6 +151,7 @@ using bbcore_read_t = void (*)(double*, Datum*, ThreadDatum*, NrnThread*, + Memb_list*, double); using bbcore_write_t = void (*)(double*, @@ -163,6 +164,7 @@ using bbcore_write_t = void (*)(double*, Datum*, ThreadDatum*, NrnThread*, + Memb_list*, double); extern int nrn_mech_depend(int type, int* dependencies); diff --git a/coreneuron/mechanism/patternstim.cpp b/coreneuron/mechanism/patternstim.cpp index e22b19e98..ca1159788 100644 --- a/coreneuron/mechanism/patternstim.cpp +++ b/coreneuron/mechanism/patternstim.cpp @@ -38,6 +38,7 @@ extern void pattern_stim_setup_helper(int size, Datum* _ppvar, ThreadDatum* _thread, NrnThread* _nt, + Memb_list* ml, double v); static size_t read_raster_file(const char* fname, double** tvec, int** gidvec, double tstop); @@ -93,7 +94,7 @@ void nrn_mkPatternStim(const char* fname, double tstop) { } else { assert(0); } - pattern_stim_setup_helper(size, tvec, gidvec, _iml, _cntml, _p, _ppvar, nullptr, nt, 0.0); + pattern_stim_setup_helper(size, tvec, gidvec, _iml, _cntml, _p, _ppvar, nullptr, nt, ml, 0.0); } size_t read_raster_file(const char* fname, double** tvec, int** gidvec, double tstop) { diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index bdc9387f1..585cf3795 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -227,15 +227,13 @@ $(ENGINEMECH_OBJ): $(CORENRN_SHARE_CORENRN_DIR)/enginemech.cpp | $(MOD_OBJS_DIR) coremech_lib_shared: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always $(CXX_SHARED_LIB_CMD) $(ENGINEMECH_OBJ) -o ${COREMECH_LIB_PATH} $(ALL_OBJS) \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ - $(LDFLAGS) -L$(CORENRN_LIB_DIR) -lscopmath\ + $(LDFLAGS)\ ${SONAME_OPTION} $(CORENRNLIB_FLAGS) -Wl,-rpath,$(CORENRN_LIB_DIR); # build static library of mechanisms coremech_lib_static: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always - mkdir -p $(MOD_OBJS_DIR)/scopmath; \ - cd $(MOD_OBJS_DIR)/scopmath && ar -x $(CORENRN_LIB_DIR)/libscopmath.a && cd -;\ rm -f ${COREMECH_LIB_PATH}; \ - ar cq ${COREMECH_LIB_PATH} $(ENGINEMECH_OBJ) $(ALL_OBJS) $(MOD_OBJS_DIR)/scopmath/*.o; + ar cq ${COREMECH_LIB_PATH} $(ENGINEMECH_OBJ) $(ALL_OBJS); # compile cpp files to .o $(MOD_OBJS_DIR)/%.o: $(MOD_TO_CPP_DIR)/%.cpp | $(MOD_OBJS_DIR) From ccb8b6bf588dcc6b7a382a2dccba56672d7042d4 Mon Sep 17 00:00:00 2001 From: Pramod Kumbhar Date: Thu, 21 Apr 2022 01:54:38 +0200 Subject: [PATCH 003/128] Add link to libscopmath in neuron as well --- CMake/OpenAccHelper.cmake | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index e4e2f7f89..d64227154 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -66,12 +66,11 @@ if(CORENRN_ENABLE_GPU) # linking. Without this, we had problems with linking between the explicit CUDA (.cu) device code # and offloaded OpenACC/OpenMP code. Using -cuda when compiling seems to improve error messages in # some cases, and to be recommended by NVIDIA. We pass -gpu=cudaX.Y to ensure that OpenACC/OpenMP - # code is compiled with the same CUDA version as the explicit CUDA code. - # TODO nordc option is added based on the recommendation from: - # https://forums.developer.nvidia.com/t/separate-compilation-of-mixed-cuda-openacc-code/192701 - # but as discussed in - # https://github.com/BlueBrain/CoreNeuron/issues/141#issuecomment-1086742194 - # this is still not completely solving underlying link issue. + # code is compiled with the same CUDA version as the explicit CUDA code. TODO nordc option is + # added based on the recommendation from: + # https://forums.developer.nvidia.com/t/separate-compilation-of-mixed-cuda-openacc-code/192701 but + # as discussed in https://github.com/BlueBrain/CoreNeuron/issues/141#issuecomment-1086742194 this + # is still not completely solving underlying link issue. set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo") # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the @@ -106,11 +105,11 @@ if(CORENRN_ENABLE_GPU) GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS - "${NVHPC_ACC_COMP_FLAGS} -rdynamic -lrt -Wl,--whole-archive -L${CMAKE_HOST_SYSTEM_PROCESSOR} -lcorenrnmech -L$(libdir) -lcoreneuron -Wl,--no-whole-archive" + "${NVHPC_ACC_COMP_FLAGS} -rdynamic -lrt -Wl,--whole-archive -L${CMAKE_HOST_SYSTEM_PROCESSOR} -lcorenrnmech -lscopmath -L$(libdir) -lcoreneuron -Wl,--no-whole-archive" ) else() set_property(GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS - "-L${CMAKE_HOST_SYSTEM_PROCESSOR} -lcorenrnmech") + "-L${CMAKE_HOST_SYSTEM_PROCESSOR} -lcorenrnmech -lscopmath") endif(CORENRN_ENABLE_GPU) if(CORENRN_HAVE_NVHPC_COMPILER) From b2fcc727b7824ff6aa8f4ede0e0389f59c020bf5 Mon Sep 17 00:00:00 2001 From: Pramod S Kumbhar Date: Thu, 21 Apr 2022 14:27:21 +0200 Subject: [PATCH 004/128] Memb_list for pattern.mod should be calloc'd for zero-initialisation --- coreneuron/mechanism/patternstim.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/coreneuron/mechanism/patternstim.cpp b/coreneuron/mechanism/patternstim.cpp index ca1159788..4f5e4e4e6 100644 --- a/coreneuron/mechanism/patternstim.cpp +++ b/coreneuron/mechanism/patternstim.cpp @@ -138,7 +138,7 @@ size_t read_raster_file(const char* fname, double** tvec, int** gidvec, double t // see nrn_setup.cpp:read_phase2 for how it creates NrnThreadMembList instances. static NrnThreadMembList* alloc_nrn_thread_memb(int type) { - NrnThreadMembList* tml = (NrnThreadMembList*) emalloc(sizeof(NrnThreadMembList)); + NrnThreadMembList* tml = (NrnThreadMembList*) ecalloc(1, sizeof(NrnThreadMembList)); tml->dependencies = nullptr; tml->ndependencies = 0; tml->index = type; @@ -149,7 +149,7 @@ static NrnThreadMembList* alloc_nrn_thread_memb(int type) { int psize = corenrn.get_prop_param_size()[type]; int dsize = corenrn.get_prop_dparam_size()[type]; int layout = corenrn.get_mech_data_layout()[type]; - tml->ml = (Memb_list*) emalloc(sizeof(Memb_list)); + tml->ml = (Memb_list*) ecalloc(1, sizeof(Memb_list)); tml->ml->nodecount = 1; tml->ml->_nodecount_padded = tml->ml->nodecount; tml->ml->nodeindices = nullptr; From 59d6d96e6d767de2b5d693cc0ef08a8b7e8daa30 Mon Sep 17 00:00:00 2001 From: Pramod Kumbhar Date: Fri, 22 Apr 2022 23:22:37 +0200 Subject: [PATCH 005/128] Add global_variables per membrane list and cleanup for GPU * update mod2c * clean global_variables on cpu and gpu --- coreneuron/gpu/nrn_acc_manager.cpp | 5 +++++ coreneuron/io/nrn_setup.cpp | 5 +++++ coreneuron/mechanism/mechanism.hpp | 5 +++-- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index 098b943be..ed3bf659c 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -256,6 +256,11 @@ static void delete_ml_from_device(Memb_list* ml, int type) { int pcnt = nrn_soa_padded_size(n, SOA_LAYOUT) * szdp; cnrn_target_delete(ml->pdata, pcnt); } + if (ml->global_variables) { + cnrn_target_delete(reinterpret_cast(ml->global_variables), + ml->global_variables_size); + } + cnrn_target_delete(ml->nodeindices, n); cnrn_target_delete(ml); } diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp index 361ccd185..98382f9da 100644 --- a/coreneuron/io/nrn_setup.cpp +++ b/coreneuron/io/nrn_setup.cpp @@ -759,6 +759,11 @@ void nrn_cleanup() { ml->instance = nullptr; } + if (ml->global_variables) { + free(ml->global_variables); + ml->global_variables = nullptr; + } + NetReceiveBuffer_t* nrb = ml->_net_receive_buffer; if (nrb) { if (nrb->_size) { diff --git a/coreneuron/mechanism/mechanism.hpp b/coreneuron/mechanism/mechanism.hpp index 71b8b0fc6..1c177976c 100644 --- a/coreneuron/mechanism/mechanism.hpp +++ b/coreneuron/mechanism/mechanism.hpp @@ -143,7 +143,8 @@ struct Memb_list { NetSendBuffer_t* _net_send_buffer = nullptr; int nodecount; /* actual node count */ int _nodecount_padded; - void* instance = nullptr; /* mechanism instance struct from NMODL or global variables struct in - mod2c */ + void* instance = nullptr; /* mechanism instance struct from NMODL */ + void* global_variables = nullptr; /* global variables struct for each mechanism */ + int global_variables_size = 0; /* size of global variables struct in bytes */ }; } // namespace coreneuron From 12dabf1d99bb9ca82a6805e15fda12cbf9aeec67 Mon Sep 17 00:00:00 2001 From: Pramod Kumbhar Date: Sat, 23 Apr 2022 11:46:41 +0200 Subject: [PATCH 006/128] redefine nrn_ghk with celsius as an argument * remove duplicate prototypes from nrnoc_aux.hpp * update mod2c --- coreneuron/mechanism/eion.cpp | 2 +- coreneuron/mechanism/membfunc.hpp | 3 ++- coreneuron/utils/nrnoc_aux.hpp | 4 ---- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/coreneuron/mechanism/eion.cpp b/coreneuron/mechanism/eion.cpp index 4bc077880..f61a13579 100644 --- a/coreneuron/mechanism/eion.cpp +++ b/coreneuron/mechanism/eion.cpp @@ -211,7 +211,7 @@ static double efun(double x) { nrn_pragma_omp(end declare target) -double nrn_ghk(double v, double ci, double co, double z) { +double nrn_ghk(double v, double ci, double co, double z, double celsius) { double temp = z * v / ktf; double eco = co * efun(temp); double eci = ci * efun(-temp); diff --git a/coreneuron/mechanism/membfunc.hpp b/coreneuron/mechanism/membfunc.hpp index c7d58e82a..ab07d7ea8 100644 --- a/coreneuron/mechanism/membfunc.hpp +++ b/coreneuron/mechanism/membfunc.hpp @@ -116,7 +116,8 @@ extern void nrn_wrote_conc(int, double*, int, int, double**, double, int); nrn_pragma_acc(routine seq) double nrn_nernst(double ci, double co, double z, double celsius); nrn_pragma_acc(routine seq) -extern double nrn_ghk(double v, double ci, double co, double z); +//TODO: check if this should be via overload +extern double nrn_ghk(double v, double ci, double co, double z, double celsius); nrn_pragma_omp(end declare target) extern void hoc_register_prop_size(int, int, int); extern void hoc_register_dparam_semantics(int type, int, const char* name); diff --git a/coreneuron/utils/nrnoc_aux.hpp b/coreneuron/utils/nrnoc_aux.hpp index 3c2f23326..a67569d56 100644 --- a/coreneuron/utils/nrnoc_aux.hpp +++ b/coreneuron/utils/nrnoc_aux.hpp @@ -35,8 +35,4 @@ extern void hoc_warning(const char*, const char*); extern double hoc_Exp(double x); -// defined in eion.cpp and this file included in translated mod files. -extern double nrn_nernst(double ci, double co, double z, double celsius); -extern double nrn_ghk(double v, double ci, double co, double z); - } // namespace coreneuron From 1139a74e99524eb43660d0e44a4fbd011c8b3a0f Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 22 Apr 2022 14:15:49 +0200 Subject: [PATCH 007/128] Reorganise unit definitions, inline functions. - nrn_ghk is now declared inline. --- coreneuron/CMakeLists.txt | 11 ++------ coreneuron/apps/main1.cpp | 6 ++--- coreneuron/io/global_vars.cpp | 2 +- coreneuron/mechanism/eion.cpp | 42 +++++------------------------- coreneuron/mechanism/membfunc.hpp | 38 ++++++++++++++++++++------- coreneuron/nrnoc/nrnunits_modern.h | 36 ------------------------- coreneuron/utils/nrnoc_aux.hpp | 1 - coreneuron/utils/units.hpp | 38 +++++++++++++++++++++++++++ tests/integration/CMakeLists.txt | 2 +- 9 files changed, 80 insertions(+), 96 deletions(-) delete mode 100644 coreneuron/nrnoc/nrnunits_modern.h create mode 100644 coreneuron/utils/units.hpp diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 21861649e..665205802 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -120,16 +120,9 @@ if(CORENRN_ENABLE_GPU) endif() # ============================================================================= -# eion.cpp depends on CORENRN_USE_LEGACY_UNITS +# CORENEURON_USE_LEGACY_UNITS is used in membfunc.hpp so define it everywhere # ============================================================================= -set(LegacyFR_FILES - ${CMAKE_CURRENT_SOURCE_DIR}/mechanism/eion.cpp ${CMAKE_CURRENT_SOURCE_DIR}/apps/main1.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/io/global_vars.cpp) - -set_property( - SOURCE ${LegacyFR_FILES} - APPEND - PROPERTY COMPILE_DEFINITIONS "CORENRN_USE_LEGACY_UNITS=${CORENRN_USE_LEGACY_UNITS}") +add_compile_definitions(CORENEURON_USE_LEGACY_UNITS=${CORENRN_USE_LEGACY_UNITS}) # ============================================================================= # create libraries diff --git a/coreneuron/apps/main1.cpp b/coreneuron/apps/main1.cpp index b7139754d..8e05a5d69 100644 --- a/coreneuron/apps/main1.cpp +++ b/coreneuron/apps/main1.cpp @@ -1,6 +1,6 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. @@ -51,9 +51,9 @@ const char* corenrn_version() { return coreneuron::bbcore_write_version; } -// the CORENRN_USE_LEGACY_UNITS determined by CORENRN_ENABLE_LEGACY_UNITS +// the CORENEURON_USE_LEGACY_UNITS determined by CORENRN_ENABLE_LEGACY_UNITS bool corenrn_units_use_legacy() { - return CORENRN_USE_LEGACY_UNITS; + return CORENEURON_USE_LEGACY_UNITS; } void (*nrn2core_part2_clean_)(); diff --git a/coreneuron/io/global_vars.cpp b/coreneuron/io/global_vars.cpp index 128a1cdb9..815423ea9 100644 --- a/coreneuron/io/global_vars.cpp +++ b/coreneuron/io/global_vars.cpp @@ -142,7 +142,7 @@ void set_globals(const char* path, bool cli_global_seed, int cli_global_seed_val } else if (strcmp(name, "Random123_globalindex") == 0) { nrnran123_set_globalindex((uint32_t) n); } else if (strcmp(name, "_nrnunit_use_legacy_") == 0) { - if (n != CORENRN_USE_LEGACY_UNITS) { + if (n != CORENEURON_USE_LEGACY_UNITS) { hoc_execerror( "CORENRN_ENABLE_LEGACY_UNITS not" " consistent with NEURON value of" diff --git a/coreneuron/mechanism/eion.cpp b/coreneuron/mechanism/eion.cpp index f61a13579..1dbd0d2db 100644 --- a/coreneuron/mechanism/eion.cpp +++ b/coreneuron/mechanism/eion.cpp @@ -1,6 +1,6 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. @@ -154,23 +154,9 @@ the USEION statement of any model using this ion\n", } } -#ifndef CORENRN_USE_LEGACY_UNITS -#define CORENRN_USE_LEGACY_UNITS 0 -#endif - -#if CORENRN_USE_LEGACY_UNITS == 1 -#define FARADAY 96485.309 -#define gasconstant 8.3134 -#else -#include "coreneuron/nrnoc/nrnunits_modern.h" -#define FARADAY _faraday_codata2018 -#define gasconstant _gasconstant_codata2018 -#endif - -#define ktf (1000. * gasconstant * (celsius + 273.15) / FARADAY) - -double nrn_nernst(double ci, double co, double z, double celsius) { - /*printf("nrn_nernst %g %g %g\n", ci, co, z);*/ +// std::log isn't constexpr, but there are argument values for which nrn_nernst +// is a constant expression +constexpr double nrn_nernst(double ci, double co, double z, double celsius) { if (z == 0) { return 0.; } @@ -179,7 +165,7 @@ double nrn_nernst(double ci, double co, double z, double celsius) { } else if (co <= 0.) { return -1e6; } else { - return ktf / z * log(co / ci); + return ktf(celsius) / z * std::log(co / ci); } } @@ -200,24 +186,8 @@ void nrn_wrote_conc(int type, pe[0] = nrn_nernst(pe[1 * _STRIDE], pe[2 * _STRIDE], gimap[type][2], celsius); } } - -static double efun(double x) { - if (fabs(x) < 1e-4) { - return 1. - x / 2.; - } else { - return x / (exp(x) - 1); - } -} - nrn_pragma_omp(end declare target) -double nrn_ghk(double v, double ci, double co, double z, double celsius) { - double temp = z * v / ktf; - double eco = co * efun(temp); - double eci = ci * efun(-temp); - return (.001) * z * FARADAY * (eci - eco); -} - #if VECTORIZE #define erev pd[0 * _STRIDE] /* From Eion */ #define conci pd[1 * _STRIDE] @@ -257,7 +227,7 @@ ion_style("name_ion", [c_style, e_style, einit, eadvance, cinit]) double nrn_nernst_coef(int type) { /* for computing jacobian element dconc'/dconc */ - return ktf / charge; + return ktf(celsius) / charge; } /* Must be called prior to any channels which update the currents */ diff --git a/coreneuron/mechanism/membfunc.hpp b/coreneuron/mechanism/membfunc.hpp index ab07d7ea8..638b52d87 100644 --- a/coreneuron/mechanism/membfunc.hpp +++ b/coreneuron/mechanism/membfunc.hpp @@ -1,17 +1,19 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ - #pragma once -#include - #include "coreneuron/mechanism/mechanism.hpp" #include "coreneuron/utils/offload.hpp" +#include "coreneuron/utils/units.hpp" + +#include +#include + namespace coreneuron { using Pfrpdat = Datum* (*) (void); @@ -113,12 +115,30 @@ extern void nrn_writes_conc(int, int); nrn_pragma_omp(declare target) nrn_pragma_acc(routine seq) extern void nrn_wrote_conc(int, double*, int, int, double**, double, int); -nrn_pragma_acc(routine seq) -double nrn_nernst(double ci, double co, double z, double celsius); -nrn_pragma_acc(routine seq) -//TODO: check if this should be via overload -extern double nrn_ghk(double v, double ci, double co, double z, double celsius); nrn_pragma_omp(end declare target) +constexpr double ktf(double celsius) { + return 1000. * units::gasconstant * (celsius + 273.15) / units::faraday; +} +inline double nrn_ghk(double v, double ci, double co, double z, double celsius) { + auto const efun = [](double x) { + if (std::abs(x) < 1e-4) { + return 1. - x / 2.; + } else { + return x / (std::exp(x) - 1.); + } + }; + double const temp{z * v / ktf(celsius)}; + double const eco{co * efun(+temp)}; + double const eci{ci * efun(-temp)}; + return .001 * z * units::faraday * (eci - eco); +} +/** + * This signature requires the use of the `celsius` global variable, which can + * cause problems when executing on GPU. + */ +[[deprecated]] inline double nrn_ghk(double v, double ci, double co, double z) { + return nrn_ghk(v, ci, co, z, celsius); +} extern void hoc_register_prop_size(int, int, int); extern void hoc_register_dparam_semantics(int type, int, const char* name); extern void hoc_reg_ba(int, mod_f_t, int); diff --git a/coreneuron/nrnoc/nrnunits_modern.h b/coreneuron/nrnoc/nrnunits_modern.h deleted file mode 100644 index d93638841..000000000 --- a/coreneuron/nrnoc/nrnunits_modern.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -# ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL -# -# See top-level LICENSE file for details. -# ============================================================================= -*/ - -#pragma once - -/** - NMODL translated MOD files get unit constants typically from - share/lib/nrnunits.lib.in. But there were other source files that - hardcode some of the constants. Here we gather a few modern units into - a single place (but, unfortunately, also in nrnunits.lib.in). Legacy units - cannot be gathered here because they can differ slightly from place to place. - - These come from https://physics.nist.gov/cuu/Constants/index.html. - Termed the "2018 CODATA recommended values", they became available - on 20 May 2019 and replace the 2014 CODATA set. - - See oc/hoc_init.c, nrnoc/eion.c, nrniv/kschan.h -**/ - - -#define _electron_charge_codata2018 1.602176634e-19 /* coulomb exact*/ -#define _avogadro_number_codata2018 6.02214076e+23 /* exact */ -#define _boltzmann_codata2018 1.380649e-23 /* joule/K exact */ -#define _faraday_codata2018 \ - (_electron_charge_codata2018 * _avogadro_number_codata2018) /* 96485.33212... coulomb/mol */ -#define _gasconstant_codata2018 \ - (_boltzmann_codata2018 * _avogadro_number_codata2018) /* 8.314462618... joule/mol-K */ - -/* e/k in K/millivolt */ -#define _e_over_k_codata2018 \ - (.001 * _electron_charge_codata2018 / _boltzmann_codata2018) /* 11.604518... K/mV */ diff --git a/coreneuron/utils/nrnoc_aux.hpp b/coreneuron/utils/nrnoc_aux.hpp index a67569d56..10b5880ea 100644 --- a/coreneuron/utils/nrnoc_aux.hpp +++ b/coreneuron/utils/nrnoc_aux.hpp @@ -34,5 +34,4 @@ extern void hoc_execerror(const char*, const char*); /* print and abort */ extern void hoc_warning(const char*, const char*); extern double hoc_Exp(double x); - } // namespace coreneuron diff --git a/coreneuron/utils/units.hpp b/coreneuron/utils/units.hpp new file mode 100644 index 000000000..de44343fe --- /dev/null +++ b/coreneuron/utils/units.hpp @@ -0,0 +1,38 @@ +/* +# ============================================================================= +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL +# +# See top-level LICENSE file for details. +# ============================================================================= +*/ +#pragma once +namespace coreneuron { +namespace units { +#if CORENEURON_USE_LEGACY_UNITS == 1 +constexpr double faraday{96485.309}; +constexpr double gasconstant{8.3134}; +#else +/* NMODL translated MOD files get unit constants typically from + * share/lib/nrnunits.lib.in. But there were other source files that hardcode + * some of the constants. Here we gather a few modern units into a single place + * (but, unfortunately, also in nrnunits.lib.in). Legacy units cannot be + * gathered here because they can differ slightly from place to place. + * + * These come from https://physics.nist.gov/cuu/Constants/index.html. + * Termed the "2018 CODATA recommended values", they became available + * on 20 May 2019 and replace the 2014 CODATA set. + * + * See oc/hoc_init.c, nrnoc/eion.c, nrniv/kschan.h + */ +namespace detail { +constexpr double electron_charge{1.602176634e-19}; // coulomb exact +constexpr double avogadro_number{6.02214076e+23}; // exact +constexpr double boltzmann{1.380649e-23}; // joule/K exact +} // namespace detail +constexpr double faraday{detail::electron_charge * detail::avogadro_number}; // 96485.33212... + // coulomb/mol +constexpr double gasconstant{detail::boltzmann * detail::avogadro_number}; // 8.314462618... + // joule/mol-K +#endif +} // namespace units +} // namespace coreneuron diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index 7b7e1e1a5..4217af270 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -64,7 +64,7 @@ if(NOT CORENRN_ENABLE_REPORTING) list( APPEND TEST_CASES_WITH_ARGS - "ring_serial!--tstop 100. --celsius 6.3 --datpath ${RING_DATASET_DIR} ${MODEL_STATS_ARG} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_serial" + "ring_serial!${GPU_ARGS} --tstop 100. --celsius 6.3 --datpath ${RING_DATASET_DIR} ${MODEL_STATS_ARG} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_serial" ) endif() From e2585beb77035c3839f83f634c1405f3341186ad Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 22 Apr 2022 17:15:52 +0200 Subject: [PATCH 008/128] Build a shared library --- coreneuron/CMakeLists.txt | 8 ++++---- coreneuron/mechanism/membfunc.hpp | 2 +- .../mpi/core/{nrnmpi.cpp => resolve.cpp} | 0 coreneuron/permute/cellorder.cpp | 2 +- extra/nrnivmodl_core_makefile.in | 20 +++++++++++++------ 5 files changed, 20 insertions(+), 12 deletions(-) rename coreneuron/mpi/core/{nrnmpi.cpp => resolve.cpp} (100%) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 665205802..68b0f54ff 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -38,7 +38,7 @@ set(MPI_LIB_FILES "mpi/lib/mpispike.cpp" "mpi/lib/nrnmpi.cpp") set(MPI_CORE_FILES "mpi/core/nrnmpi_def_cinc.cpp") if(CORENRN_ENABLE_MPI) # Building these requires -ldl, which is only added if MPI is enabled. - list(APPEND MPI_CORE_FILES "mpi/core/nrnmpi.cpp" "mpi/core/nrnmpidec.cpp") + list(APPEND MPI_CORE_FILES "mpi/core/resolve.cpp" "mpi/core/nrnmpidec.cpp") endif() file(COPY ${CORENEURON_PROJECT_SOURCE_DIR}/external/Random123/include/Random123 DESTINATION ${CMAKE_BINARY_DIR}/include) @@ -108,14 +108,14 @@ if(CORENRN_ENABLE_GPU) set_source_files_properties(${OPENACC_EXCLUDED_FILES} PROPERTIES COMPILE_FLAGS "-DDISABLE_OPENACC") # Only compile the explicit CUDA implementation of the Hines solver in GPU builds. - list(APPEND CORENEURON_CODE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/permute/cellorder.cu) + # list(APPEND CORENEURON_CODE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/permute/cellorder.cu) # Eigen-3.5+ provides better GPU support. However, some functions cannot be called directly from # within an OpenACC region. Therefore, we need to wrap them in a special API (decorate them with # __device__ & acc routine tokens), which allows us to eventually call them from OpenACC. Calling # these functions from CUDA kernels presents no issue ... if(CORENRN_ENABLE_NMODL AND EXISTS ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) - list(APPEND CORENEURON_CODE_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) + # list(APPEND CORENEURON_CODE_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) endif() endif() @@ -287,7 +287,7 @@ if(CORENRN_ENABLE_SHARED) CACHE INTERNAL "coreneuron mechanism library") else() set(corenrn_mech_library - "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}/libcorenrnmech${CMAKE_STATIC_LIBRARY_SUFFIX}" + "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}/libcorenrnmech${CMAKE_SHARED_LIBRARY_SUFFIX}" CACHE INTERNAL "coreneuron mechanism library") endif() diff --git a/coreneuron/mechanism/membfunc.hpp b/coreneuron/mechanism/membfunc.hpp index 638b52d87..1bda5aba4 100644 --- a/coreneuron/mechanism/membfunc.hpp +++ b/coreneuron/mechanism/membfunc.hpp @@ -114,7 +114,7 @@ extern void nrn_jacob_capacitance(NrnThread*, Memb_list*, int); extern void nrn_writes_conc(int, int); nrn_pragma_omp(declare target) nrn_pragma_acc(routine seq) -extern void nrn_wrote_conc(int, double*, int, int, double**, double, int); +void nrn_wrote_conc(int, double*, int, int, double**, double, int); nrn_pragma_omp(end declare target) constexpr double ktf(double celsius) { return 1000. * units::gasconstant * (celsius + 273.15) / units::faraday; diff --git a/coreneuron/mpi/core/nrnmpi.cpp b/coreneuron/mpi/core/resolve.cpp similarity index 100% rename from coreneuron/mpi/core/nrnmpi.cpp rename to coreneuron/mpi/core/resolve.cpp diff --git a/coreneuron/permute/cellorder.cpp b/coreneuron/permute/cellorder.cpp index c95fedcf2..54c2e9b91 100644 --- a/coreneuron/permute/cellorder.cpp +++ b/coreneuron/permute/cellorder.cpp @@ -576,7 +576,7 @@ void solve_interleaved2(int ith) { if (corenrn_param.gpu && corenrn_param.cuda_interface) { auto* d_nt = static_cast(acc_deviceptr(nt)); auto* d_info = static_cast(acc_deviceptr(interleave_info + ith)); - solve_interleaved2_launcher(d_nt, d_info, ncore, acc_get_cuda_stream(nt->stream_id)); + //solve_interleaved2_launcher(d_nt, d_info, ncore, acc_get_cuda_stream(nt->stream_id)); } else { #endif int* ncycles = ii.cellsize; // nwarp of these diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index 585cf3795..9388059fe 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -38,7 +38,7 @@ MOD_OBJS_DIR = $(OUTPUT_DIR)/corenrn/build # Linked libraries gathered by CMake LDFLAGS = $(LINKFLAGS) @CORENRN_COMMON_LDFLAGS@ -CORENRNLIB_FLAGS = -L$(CORENRN_LIB_DIR) -lcoreneuron +CORENRNLIB_FLAGS = CORENRNLIB_FLAGS += $(if @reportinglib_LIB_DIR@, -W$(subst ;, -W,l,-rpath,@reportinglib_LIB_DIR@),) CORENRNLIB_FLAGS += $(if @sonatareport_LIB_DIR@, -W$(subst ;, -W,l,-rpath,@sonatareport_LIB_DIR@),) CORENRNLIB_FLAGS += $(if @caliper_LIB_DIR@, -W$(subst ;, -W,l,-rpath,@caliper_LIB_DIR@),) @@ -114,8 +114,8 @@ ENGINEMECH_OBJ = $(MOD_OBJS_DIR)/enginemech.o # Depending on static/shared build, determine library name and it's suffix ifeq ($(TARGET_LIB_TYPE), STATIC) - LIB_SUFFIX = @CMAKE_STATIC_LIBRARY_SUFFIX@ - corenrnmech_lib_target = coremech_lib_static + LIB_SUFFIX = @CMAKE_SHARED_LIBRARY_SUFFIX@ + corenrnmech_lib_target = coremech_lib_shared else LIB_SUFFIX = @CMAKE_SHARED_LIBRARY_SUFFIX@ corenrnmech_lib_target = coremech_lib_shared @@ -209,7 +209,7 @@ endif # main target to build binary $(SPECIAL_EXE): coremech_lib_target @printf " => $(C_GREEN)Binary$(C_RESET) creating $(SPECIAL_EXE)\n" - $(CXX_LINK_EXE_CMD) -o $(SPECIAL_EXE) $(CORENRN_SHARE_CORENRN_DIR)/coreneuron.cpp \ + g++ -o $(SPECIAL_EXE) $(CORENRN_SHARE_CORENRN_DIR)/coreneuron.cpp \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ -L$(OUTPUT_DIR) -l$(COREMECH_LIB_NAME) $(CORENRNLIB_FLAGS) $(LDFLAGS) \ -L$(CORENRN_LIB_DIR) -lscopmath \ @@ -225,10 +225,18 @@ $(ENGINEMECH_OBJ): $(CORENRN_SHARE_CORENRN_DIR)/enginemech.cpp | $(MOD_OBJS_DIR) # build shared library of mechanisms coremech_lib_shared: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always + # extract the object files from libcoreneuron.a + mkdir -p $(MOD_OBJS_DIR)/libcoreneuron + ar --output=$(MOD_OBJS_DIR)/libcoreneuron x $(CORENRN_LIB_DIR)/libcoreneuron.a + # extract the object files from libscopmath.a + mkdir -p $(MOD_OBJS_DIR)/libscopmath + ar --output=$(MOD_OBJS_DIR)/libscopmath x $(CORENRN_LIB_DIR)/libscopmath.a $(CXX_SHARED_LIB_CMD) $(ENGINEMECH_OBJ) -o ${COREMECH_LIB_PATH} $(ALL_OBJS) \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ - $(LDFLAGS)\ - ${SONAME_OPTION} $(CORENRNLIB_FLAGS) -Wl,-rpath,$(CORENRN_LIB_DIR); + $(LDFLAGS) ${SONAME_OPTION} -Wl,--start-group \ + $(MOD_OBJS_DIR)/libcoreneuron/*.o \ + -Wl,--end-group -Wl,--start-group $(MOD_OBJS_DIR)/libscopmath/*.o \ + -Wl,--end-group $(CORENRNLIB_FLAGS) -Wl,-rpath,$(CORENRN_LIB_DIR); # build static library of mechanisms coremech_lib_static: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always From 11785bc72e509650b4ea82aac07957d522541fb9 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 22 Apr 2022 17:45:51 +0200 Subject: [PATCH 009/128] fudge --- coreneuron/gpu/nrn_acc_manager.cpp | 3 +++ coreneuron/gpu/nrn_acc_manager.hpp | 2 +- coreneuron/utils/randoms/nrnran123.cpp | 5 +++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index ed3bf659c..8121b3b0b 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -10,6 +10,7 @@ #include #include "coreneuron/apps/corenrn_parameters.hpp" +#include "coreneuron/gpu/nrn_acc_manager.hpp" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/network/netcon.hpp" #include "coreneuron/nrniv/nrniv_decl.h" @@ -1287,6 +1288,8 @@ void init_gpu() { std::cout << " Info : " << num_devices_per_node << " GPUs shared by " << local_size << " ranks per node\n"; } + + init_nrnran123(); } void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay) { diff --git a/coreneuron/gpu/nrn_acc_manager.hpp b/coreneuron/gpu/nrn_acc_manager.hpp index 72d222cdd..ee5ed2483 100644 --- a/coreneuron/gpu/nrn_acc_manager.hpp +++ b/coreneuron/gpu/nrn_acc_manager.hpp @@ -24,6 +24,6 @@ void update_net_send_buffer_on_host(NrnThread* nt, NetSendBuffer_t* nsb); void update_weights_from_gpu(NrnThread* threads, int nthreads); void init_gpu(); - +void init_nrnran123(); } // namespace coreneuron #endif // _nrn_device_manager_ diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp index 77ff88fb3..63c205f5b 100644 --- a/coreneuron/utils/randoms/nrnran123.cpp +++ b/coreneuron/utils/randoms/nrnran123.cpp @@ -5,6 +5,7 @@ # See top-level LICENSE file for details. # =============================================================================. */ +#include "coreneuron/gpu/nrn_acc_manager.hpp" #include "coreneuron/mpi/core/nrnmpi.hpp" #include "coreneuron/utils/memory.h" #include "coreneuron/utils/nrnmutdec.hpp" @@ -96,6 +97,10 @@ CORENRN_HOST_DEVICE philox4x32_ctr_t philox4x32_helper(coreneuron::nrnran123_Sta } // namespace namespace coreneuron { +void init_nrnran123() { + nrn_pragma_acc(enter data copyin(g_k)) +} + std::size_t nrnran123_instance_count() { return g_instance_count; } From 49953ff03d135fa04be601ed892b3e04373a1afe Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Mon, 25 Apr 2022 12:36:10 +0200 Subject: [PATCH 010/128] fudge --- coreneuron/gpu/nrn_acc_manager.cpp | 8 +++++--- coreneuron/io/nrn_setup.cpp | 3 ++- .../mechanism/mech/mod2c_core_thread.hpp | 2 +- coreneuron/utils/memory.cpp | 2 +- coreneuron/utils/memory.h | 12 ++++++------ coreneuron/utils/randoms/nrnran123.cpp | 19 +++++++++---------- 6 files changed, 24 insertions(+), 22 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index 8121b3b0b..d4db35a75 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -77,7 +77,7 @@ void cnrn_target_set_default_device(int device_num) { } #ifdef CORENEURON_ENABLE_GPU - +#ifndef CORENEURON_UNIFIED_MEMORY static Memb_list* copy_ml_to_device(const Memb_list* ml, int type, double* dml_data) { // As we never run code for artificial cell inside GPU we don't copy it. int is_art = corenrn.get_is_artificial()[type]; @@ -169,6 +169,7 @@ static Memb_list* copy_ml_to_device(const Memb_list* ml, int type, double* dml_d return d_ml; } +#endif static void update_ml_on_host(const Memb_list* ml, int type) { int is_art = corenrn.get_is_artificial()[type]; @@ -258,6 +259,7 @@ static void delete_ml_from_device(Memb_list* ml, int type) { cnrn_target_delete(ml->pdata, pcnt); } if (ml->global_variables) { + // std::byte* in C++17 cnrn_target_delete(reinterpret_cast(ml->global_variables), ml->global_variables_size); } @@ -1121,7 +1123,7 @@ void nrn_newtonspace_delete_from_device(NewtonSpace* ns) { } void nrn_sparseobj_copyto_device(SparseObj* so) { -#ifdef CORENEURON_ENABLE_GPU +#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_UNIFIED_MEMORY) // FIXME this check needs to be tweaked if we ever want to run with a mix // of CPU and GPU threads. if (nrn_threads[0].compute_gpu == 0) { @@ -1204,7 +1206,7 @@ void nrn_sparseobj_copyto_device(SparseObj* so) { } void nrn_sparseobj_delete_from_device(SparseObj* so) { -#ifdef CORENEURON_ENABLE_GPU +#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_UNIFIED_MEMORY) // FIXME this check needs to be tweaked if we ever want to run with a mix // of CPU and GPU threads. if (nrn_threads[0].compute_gpu == 0) { diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp index 98382f9da..8f9461cb1 100644 --- a/coreneuron/io/nrn_setup.cpp +++ b/coreneuron/io/nrn_setup.cpp @@ -760,7 +760,8 @@ void nrn_cleanup() { } if (ml->global_variables) { - free(ml->global_variables); + std::cout << "Cannot generically free Memb_list::global_variables, leaking it" << std::endl; + // free(ml->global_variables); ml->global_variables = nullptr; } diff --git a/coreneuron/mechanism/mech/mod2c_core_thread.hpp b/coreneuron/mechanism/mech/mod2c_core_thread.hpp index d18160f3a..4ec7b4ff6 100644 --- a/coreneuron/mechanism/mech/mod2c_core_thread.hpp +++ b/coreneuron/mechanism/mech/mod2c_core_thread.hpp @@ -44,7 +44,7 @@ struct Item { using List = Item; /* list of mixed items */ -struct SparseObj { /* all the state information */ +struct SparseObj : public MemoryManaged { /* all the state information */ Elm** rowst{}; /* link to first element in row (solution order)*/ Elm** diag{}; /* link to pivot element in row (solution order)*/ void* elmpool{}; /* no interthread cache line sharing for elements */ diff --git a/coreneuron/utils/memory.cpp b/coreneuron/utils/memory.cpp index 70d928b63..8f45487dc 100644 --- a/coreneuron/utils/memory.cpp +++ b/coreneuron/utils/memory.cpp @@ -15,7 +15,7 @@ #include namespace coreneuron { -bool unified_memory_enabled() { +bool gpu_enabled() { #ifdef CORENEURON_ENABLE_GPU return corenrn_param.gpu; #else diff --git a/coreneuron/utils/memory.h b/coreneuron/utils/memory.h index 9a2e65645..254c21544 100644 --- a/coreneuron/utils/memory.h +++ b/coreneuron/utils/memory.h @@ -22,13 +22,13 @@ #endif namespace coreneuron { -/** @brief Check if allocate_unified will return a unified memory address. - * - * If false, [de]allocate_unified simply forward to new/delete. It is - * convenient to include this method here to avoid having to access - * corenrn_param directly. +/** + * @brief Check if GPU support is enabled. + * + * This returns true if GPU support was enabled at compile time and at runtime + * via coreneuron.gpu = True and/or --gpu, otherwise it returnss false. */ -bool unified_memory_enabled(); +bool gpu_enabled(); /** @brief Allocate unified memory in GPU builds iff GPU enabled, otherwise new */ diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp index 63c205f5b..b550a460b 100644 --- a/coreneuron/utils/randoms/nrnran123.cpp +++ b/coreneuron/utils/randoms/nrnran123.cpp @@ -98,6 +98,7 @@ CORENRN_HOST_DEVICE philox4x32_ctr_t philox4x32_helper(coreneuron::nrnran123_Sta namespace coreneuron { void init_nrnran123() { + // TODO only do this if it isn't already present? nrn_pragma_acc(enter data copyin(g_k)) } @@ -160,20 +161,16 @@ double nrnran123_negexp(nrnran123_State* s) { /* at cost of a cached value we could compute two at a time. */ double nrnran123_normal(nrnran123_State* s) { - double w, x, y; - double u1, u2; - + double w, u1; do { u1 = nrnran123_dblpick(s); - u2 = nrnran123_dblpick(s); + double u2{nrnran123_dblpick(s)}; u1 = 2. * u1 - 1.; u2 = 2. * u2 - 1.; w = (u1 * u1) + (u2 * u2); } while (w > 1); - - y = std::sqrt((-2. * log(w)) / w); - x = u1 * y; - return x; + double y{std::sqrt((-2. * std::log(w)) / w)}; + return u1 * y; } double nrnran123_uint2dbl(uint32_t u) { @@ -196,8 +193,10 @@ void nrnran123_set_globalindex(uint32_t gix) { } } g_k.v[0] = gix; - nrn_pragma_acc(update device(g_k)) - nrn_pragma_omp(target update to(g_k)) + if(coreneuron::gpu_enabled()) { + nrn_pragma_acc(update device(g_k)) + nrn_pragma_omp(target update to(g_k)) + } } /** @brief Allocate a new Random123 stream. From edf36082ea067a00a546f57ea24e0745abbff3bc Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Mon, 25 Apr 2022 16:36:24 +0200 Subject: [PATCH 011/128] scopmath and coreneuron are inside corenrnmech --- CMake/OpenAccHelper.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index d64227154..f1ff3a4c3 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -105,7 +105,7 @@ if(CORENRN_ENABLE_GPU) GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS - "${NVHPC_ACC_COMP_FLAGS} -rdynamic -lrt -Wl,--whole-archive -L${CMAKE_HOST_SYSTEM_PROCESSOR} -lcorenrnmech -lscopmath -L$(libdir) -lcoreneuron -Wl,--no-whole-archive" + "${NVHPC_ACC_COMP_FLAGS} -rdynamic -lrt -Wl,--whole-archive -L${CMAKE_HOST_SYSTEM_PROCESSOR} -lcorenrnmech -Wl,--no-whole-archive" ) else() set_property(GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS From b79cab74dde67b6039271da7ac89578ba2ea3abf Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Mon, 25 Apr 2022 16:36:40 +0200 Subject: [PATCH 012/128] fast_imem may be in unified memory --- coreneuron/sim/fast_imem.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/coreneuron/sim/fast_imem.cpp b/coreneuron/sim/fast_imem.cpp index 1218b7967..b1665645d 100644 --- a/coreneuron/sim/fast_imem.cpp +++ b/coreneuron/sim/fast_imem.cpp @@ -21,9 +21,9 @@ bool nrn_use_fast_imem; void fast_imem_free() { for (auto nt = nrn_threads; nt < nrn_threads + nrn_nthread; ++nt) { if (nt->nrn_fast_imem) { - free(nt->nrn_fast_imem->nrn_sav_rhs); - free(nt->nrn_fast_imem->nrn_sav_d); - free(nt->nrn_fast_imem); + free_memory(nt->nrn_fast_imem->nrn_sav_rhs); + free_memory(nt->nrn_fast_imem->nrn_sav_d); + free_memory(nt->nrn_fast_imem); nt->nrn_fast_imem = nullptr; } } From 5090beb91c97c0249c2602c5b864650769a4674e Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Mon, 25 Apr 2022 16:36:52 +0200 Subject: [PATCH 013/128] cleanup --- coreneuron/mechanism/membfunc.hpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/coreneuron/mechanism/membfunc.hpp b/coreneuron/mechanism/membfunc.hpp index 1bda5aba4..a40d77438 100644 --- a/coreneuron/mechanism/membfunc.hpp +++ b/coreneuron/mechanism/membfunc.hpp @@ -132,13 +132,6 @@ inline double nrn_ghk(double v, double ci, double co, double z, double celsius) double const eci{ci * efun(-temp)}; return .001 * z * units::faraday * (eci - eco); } -/** - * This signature requires the use of the `celsius` global variable, which can - * cause problems when executing on GPU. - */ -[[deprecated]] inline double nrn_ghk(double v, double ci, double co, double z) { - return nrn_ghk(v, ci, co, z, celsius); -} extern void hoc_register_prop_size(int, int, int); extern void hoc_register_dparam_semantics(int type, int, const char* name); extern void hoc_reg_ba(int, mod_f_t, int); From 5f86115ff939ff2828e58b12fa872df3abc0bbf9 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Mon, 25 Apr 2022 16:37:15 +0200 Subject: [PATCH 014/128] don't cudaFree things allocated by NEURON --- coreneuron/io/phase2.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coreneuron/io/phase2.cpp b/coreneuron/io/phase2.cpp index bb3f7f99f..77cfaa609 100644 --- a/coreneuron/io/phase2.cpp +++ b/coreneuron/io/phase2.cpp @@ -337,7 +337,7 @@ void Phase2::read_direct(int thread_id, const NrnThread& nt) { offset += nrn_soa_padded_size(nodecounts[i], layout) * param_sizes[type]; if (nodeindices_) { std::copy(nodeindices_, nodeindices_ + nodecounts[i], tml.nodeindices.data()); - free_memory(nodeindices_); + free(nodeindices_); // not free_memory because this is allocated by NEURON? } if (corenrn.get_is_artificial()[type]) { assert(nodeindices_ == nullptr); From 6acc288ec82c3c8927295e8a4fa27860e97df2d9 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Mon, 25 Apr 2022 16:37:23 +0200 Subject: [PATCH 015/128] random123 hackery --- coreneuron/utils/memory.h | 2 +- coreneuron/utils/randoms/nrnran123.cpp | 61 ++++++++++++++++++++------ 2 files changed, 48 insertions(+), 15 deletions(-) diff --git a/coreneuron/utils/memory.h b/coreneuron/utils/memory.h index 254c21544..f1b7042c8 100644 --- a/coreneuron/utils/memory.h +++ b/coreneuron/utils/memory.h @@ -24,7 +24,7 @@ namespace coreneuron { /** * @brief Check if GPU support is enabled. - * + * * This returns true if GPU support was enabled at compile time and at runtime * via coreneuron.gpu = True and/or --gpu, otherwise it returnss false. */ diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp index b550a460b..c815a8f24 100644 --- a/coreneuron/utils/randoms/nrnran123.cpp +++ b/coreneuron/utils/randoms/nrnran123.cpp @@ -21,10 +21,14 @@ #include #endif +#include + // Defining these attributes seems to help nvc++ in OpenMP target offload mode. #if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) && defined(__CUDACC__) #define CORENRN_HOST_DEVICE __host__ __device__ +#elif defined(__CUDACC__) +#define CORENRN_HOST_DEVICE __host__ __device__ #else #define CORENRN_HOST_DEVICE #endif @@ -77,29 +81,45 @@ using random123_allocator = coreneuron::unified_allocatorc, g_k); + return philox4x32(s->c, global_state()); } } // namespace namespace coreneuron { void init_nrnran123() { - // TODO only do this if it isn't already present? - nrn_pragma_acc(enter data copyin(g_k)) + // if(coreneuron::gpu_enabled()) { + // // TODO only do this if it isn't already present? + // auto& g_k = global_state(); + // nrn_pragma_acc(enter data copyin(g_k)) + // } } std::size_t nrnran123_instance_count() { @@ -108,7 +128,7 @@ std::size_t nrnran123_instance_count() { /* if one sets the global, one should reset all the stream sequences. */ uint32_t nrnran123_get_globalindex() { - return g_k.v[0]; + return global_state().v[0]; } void nrnran123_getseq(nrnran123_State* s, uint32_t* seq, char* which) { @@ -182,6 +202,7 @@ double nrnran123_uint2dbl(uint32_t u) { /* nrn123 streams are created from cpu launcher routine */ void nrnran123_set_globalindex(uint32_t gix) { // If the global seed is changing then we shouldn't have any active streams. + auto& g_k = global_state(); { std::lock_guard _{g_instance_count_mutex}; if (g_instance_count != 0 && nrnmpi_myid == 0) { @@ -192,10 +213,22 @@ void nrnran123_set_globalindex(uint32_t gix) { << g_k.v[0] << ')' << std::endl; } } - g_k.v[0] = gix; - if(coreneuron::gpu_enabled()) { - nrn_pragma_acc(update device(g_k)) - nrn_pragma_omp(target update to(g_k)) + if(g_k.v[0] != gix) { + g_k.v[0] = gix; + if(coreneuron::gpu_enabled()) { + { + auto const code = cudaMemcpyToSymbol(g_k_dev, &g_k, sizeof(g_k)); + assert(code == cudaSuccess); + } + { + auto const code = cudaDeviceSynchronize(); + assert(code == cudaSuccess); + } + std::cout << "trying to read g_k_dev from host..." << std::endl; + std::cout << g_k_dev.v[0] << std::endl; + // nrn_pragma_acc(update device(g_k)) + // nrn_pragma_omp(target update to(g_k)) + } } } From 7ceaff6f48b89fb3ae59f04e01e88864f41cbf84 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Mon, 25 Apr 2022 21:33:31 +0200 Subject: [PATCH 016/128] homegrown present table to avoid dynamic loading + acc_deviceptr limitations --- coreneuron/gpu/nrn_acc_manager.cpp | 64 +++++++++++++++++++++++--- coreneuron/io/nrn_setup.cpp | 5 +- coreneuron/io/phase2.cpp | 2 +- coreneuron/network/partrans.cpp | 19 +++----- coreneuron/permute/cellorder.cpp | 2 +- coreneuron/sim/fast_imem.cpp | 2 +- coreneuron/sim/multicore.hpp | 2 +- coreneuron/utils/memory.h | 2 +- coreneuron/utils/offload.hpp | 24 ++++++++-- coreneuron/utils/randoms/nrnran123.cpp | 4 +- 10 files changed, 96 insertions(+), 30 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index d4db35a75..20ade530b 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -32,15 +32,68 @@ #include #endif +#ifdef CORENEURON_ENABLE_PRESENT_TABLE +#include +#include +#include +#include +namespace { +enum class byte : unsigned char {}; // std::byte in C++17 +std::map> present_table; +std::mutex present_table_mutex; +} // namespace +#endif + namespace coreneuron { extern InterleaveInfo* interleave_info; -void copy_ivoc_vect_to_device(const IvocVect& iv, IvocVect& div); -void delete_ivoc_vect_from_device(IvocVect&); void nrn_ion_global_map_copyto_device(); void nrn_ion_global_map_delete_from_device(); void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay); void nrn_VecPlay_delete_from_device(NrnThread* nt); +#ifdef CORENEURON_ENABLE_PRESENT_TABLE +void* cnrn_target_deviceptr_impl(void const* h_ptr) { + if (!h_ptr) { + return nullptr; + } + // note no locking, undefined behaviour if you call this concurrently with + // the copyin/delete methods (which do lock) + assert(!present_table.empty()); + // prev(first iterator greater than h_ptr or last if not found) gives the first iterator less + // than or equal to h_ptr + auto const iter = std::prev(std::upper_bound( + present_table.begin(), present_table.end(), h_ptr, [](void const* hp, auto const& entry) { + return hp < entry.first; + })); + assert(iter != present_table.end()); + byte const* const h_byte_ptr{static_cast(h_ptr)}; + byte const* const h_start_of_block{iter->first}; + std::size_t const block_size{iter->second.first}; + byte* const d_start_of_block{iter->second.second}; + assert(h_byte_ptr < h_start_of_block + block_size); + return d_start_of_block + (h_byte_ptr - h_start_of_block); +} +void cnrn_target_copyin_update_present_table(void const* h_ptr, void* d_ptr, std::size_t len) { + if (!h_ptr) { + assert(!d_ptr); + return; + } + std::lock_guard _{present_table_mutex}; + auto const result = present_table.emplace(static_cast(h_ptr), + std::make_pair(len, static_cast(d_ptr))); +} +void cnrn_target_delete_update_present_table(void const* h_ptr, std::size_t len) { + if (!h_ptr) { + return; + } + std::lock_guard _{present_table_mutex}; + auto const iter = present_table.find(static_cast(h_ptr)); + assert(iter != present_table.end()); + assert(iter->second.first == len); + present_table.erase(iter); +} +#endif + int cnrn_target_get_num_devices() { #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) @@ -260,8 +313,7 @@ static void delete_ml_from_device(Memb_list* ml, int type) { } if (ml->global_variables) { // std::byte* in C++17 - cnrn_target_delete(reinterpret_cast(ml->global_variables), - ml->global_variables_size); + cnrn_target_delete(static_cast(ml->global_variables), ml->global_variables_size); } cnrn_target_delete(ml->nodeindices, n); @@ -618,7 +670,7 @@ void delete_ivoc_vect_from_device(IvocVect& vec) { if (n) { cnrn_target_delete(vec.data(), n); } - cnrn_target_delete(&vec); + // cnrn_target_delete(&vec); #else (void) vec; #endif @@ -1329,7 +1381,7 @@ void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay) { void nrn_VecPlay_delete_from_device(NrnThread* nt) { for (int i = 0; i < nt->n_vecplay; i++) { - auto* vecplay_instance = reinterpret_cast(nt->_vecplay[i]); + auto* vecplay_instance = static_cast(nt->_vecplay[i]); cnrn_target_delete(vecplay_instance->e_); if (vecplay_instance->discon_indices_) { delete_ivoc_vect_from_device(*(vecplay_instance->discon_indices_)); diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp index 8f9461cb1..f34a489c1 100644 --- a/coreneuron/io/nrn_setup.cpp +++ b/coreneuron/io/nrn_setup.cpp @@ -754,13 +754,16 @@ void nrn_cleanup() { ml->_thread = nullptr; } + // Probably causes problems with NMODL, which allocates its instance + // in unified memory. if (ml->instance) { free(ml->instance); ml->instance = nullptr; } if (ml->global_variables) { - std::cout << "Cannot generically free Memb_list::global_variables, leaking it" << std::endl; + std::cout << "Cannot generically free Memb_list::global_variables, leaking it" + << std::endl; // free(ml->global_variables); ml->global_variables = nullptr; } diff --git a/coreneuron/io/phase2.cpp b/coreneuron/io/phase2.cpp index 77cfaa609..0b96e1956 100644 --- a/coreneuron/io/phase2.cpp +++ b/coreneuron/io/phase2.cpp @@ -337,7 +337,7 @@ void Phase2::read_direct(int thread_id, const NrnThread& nt) { offset += nrn_soa_padded_size(nodecounts[i], layout) * param_sizes[type]; if (nodeindices_) { std::copy(nodeindices_, nodeindices_ + nodecounts[i], tml.nodeindices.data()); - free(nodeindices_); // not free_memory because this is allocated by NEURON? + free(nodeindices_); // not free_memory because this is allocated by NEURON? } if (corenrn.get_is_artificial()[type]) { assert(nodeindices_ == nullptr); diff --git a/coreneuron/network/partrans.cpp b/coreneuron/network/partrans.cpp index ddfb49421..28fee5d86 100644 --- a/coreneuron/network/partrans.cpp +++ b/coreneuron/network/partrans.cpp @@ -133,12 +133,9 @@ void nrnthread_v_transfer(NrnThread* _nt) { void nrn_partrans::copy_gap_indices_to_device() { // Ensure index vectors, src_gather, and insrc_buf_ are on the gpu. if (insrcdspl_) { - int n_insrc_buf = insrcdspl_[nrnmpi_numprocs]; - static_cast(n_insrc_buf); - nrn_pragma_acc(enter data create(insrc_buf_[:n_insrc_buf])) - // clang-format off - nrn_pragma_omp(target enter data map(alloc: insrc_buf_[:n_insrc_buf])) - // clang-format off + // TODO: we don't actually need to copy here, just allocate + associate + // storage on the device + cnrn_target_copyin(insrc_buf_, insrcdspl_[nrnmpi_numprocs]); } for (int tid = 0; tid < nrn_nthread; ++tid) { const NrnThread* nt = nrn_threads + tid; @@ -150,13 +147,9 @@ void nrn_partrans::copy_gap_indices_to_device() { if (!ttd.src_indices.empty()) { cnrn_target_copyin(ttd.src_indices.data(), ttd.src_indices.size()); - - size_t n_src_gather = ttd.src_gather.size(); - const double* src_gather = ttd.src_gather.data(); - static_cast(n_src_gather); - static_cast(src_gather); - nrn_pragma_acc(enter data create(src_gather[:n_src_gather])) - nrn_pragma_omp(target enter data map(alloc: src_gather[:n_src_gather])) + // TODO: we don't actually need to copy here, just allocate + + // associate storage on the device. + cnrn_target_copyin(ttd.src_gather.data(), ttd.src_gather.size()); } if (ttd.insrc_indices.size()) { diff --git a/coreneuron/permute/cellorder.cpp b/coreneuron/permute/cellorder.cpp index 54c2e9b91..2c2fca92e 100644 --- a/coreneuron/permute/cellorder.cpp +++ b/coreneuron/permute/cellorder.cpp @@ -576,7 +576,7 @@ void solve_interleaved2(int ith) { if (corenrn_param.gpu && corenrn_param.cuda_interface) { auto* d_nt = static_cast(acc_deviceptr(nt)); auto* d_info = static_cast(acc_deviceptr(interleave_info + ith)); - //solve_interleaved2_launcher(d_nt, d_info, ncore, acc_get_cuda_stream(nt->stream_id)); + // solve_interleaved2_launcher(d_nt, d_info, ncore, acc_get_cuda_stream(nt->stream_id)); } else { #endif int* ncycles = ii.cellsize; // nwarp of these diff --git a/coreneuron/sim/fast_imem.cpp b/coreneuron/sim/fast_imem.cpp index b1665645d..d3b463a48 100644 --- a/coreneuron/sim/fast_imem.cpp +++ b/coreneuron/sim/fast_imem.cpp @@ -34,7 +34,7 @@ void nrn_fast_imem_alloc() { fast_imem_free(); for (auto nt = nrn_threads; nt < nrn_threads + nrn_nthread; ++nt) { int n = nt->end; - nt->nrn_fast_imem = (NrnFastImem*) ecalloc(1, sizeof(NrnFastImem)); + nt->nrn_fast_imem = (NrnFastImem*) ecalloc_align(1, sizeof(NrnFastImem)); nt->nrn_fast_imem->nrn_sav_rhs = (double*) ecalloc_align(n, sizeof(double)); nt->nrn_fast_imem->nrn_sav_d = (double*) ecalloc_align(n, sizeof(double)); } diff --git a/coreneuron/sim/multicore.hpp b/coreneuron/sim/multicore.hpp index c9b3cb58e..18cd613f3 100644 --- a/coreneuron/sim/multicore.hpp +++ b/coreneuron/sim/multicore.hpp @@ -53,7 +53,7 @@ struct NrnFastImem { double* nrn_sav_d; }; -struct TrajectoryRequests { +struct TrajectoryRequests: public MemoryManaged { void** vpr; /* PlayRecord Objects known by NEURON */ double** scatter; /* if bsize == 0, each time step */ double** varrays; /* if bsize > 0, the Vector data pointers. */ diff --git a/coreneuron/utils/memory.h b/coreneuron/utils/memory.h index f1b7042c8..286cfa5f2 100644 --- a/coreneuron/utils/memory.h +++ b/coreneuron/utils/memory.h @@ -1,6 +1,6 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index 078990107..1f068c4d7 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -1,6 +1,6 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= @@ -25,9 +25,20 @@ #include namespace coreneuron { +#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ + defined(_OPENACC) +// Homegrown implementation for buggy NVHPC versions (<=22.3?) +#define CORENEURON_ENABLE_PRESENT_TABLE +void* cnrn_target_deviceptr_impl(void const* h_ptr); +void cnrn_target_copyin_update_present_table(void const* h_ptr, void* d_ptr, std::size_t len); +void cnrn_target_delete_update_present_table(void const* h_ptr, std::size_t len); +#endif + template T* cnrn_target_deviceptr(const T* h_ptr) { -#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ +#ifdef CORENEURON_ENABLE_PRESENT_TABLE + return static_cast(cnrn_target_deviceptr_impl(h_ptr)); +#elif defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) return static_cast(acc_deviceptr(const_cast(h_ptr))); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ @@ -48,7 +59,11 @@ template T* cnrn_target_copyin(const T* h_ptr, std::size_t len = 1) { #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) - return static_cast(acc_copyin(const_cast(h_ptr), len * sizeof(T))); + auto* d_ptr = static_cast(acc_copyin(const_cast(h_ptr), len * sizeof(T))); +#ifdef CORENEURON_ENABLE_PRESENT_TABLE + cnrn_target_copyin_update_present_table(h_ptr, d_ptr, len * sizeof(T)); +#endif + return d_ptr; #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) nrn_pragma_omp(target enter data map(to : h_ptr[:len])) @@ -63,6 +78,9 @@ template void cnrn_target_delete(T* h_ptr, std::size_t len = 1) { #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) +#ifdef CORENEURON_ENABLE_PRESENT_TABLE + cnrn_target_delete_update_present_table(h_ptr, len * sizeof(T)); +#endif acc_delete(h_ptr, len * sizeof(T)); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp index c815a8f24..6ea75a16b 100644 --- a/coreneuron/utils/randoms/nrnran123.cpp +++ b/coreneuron/utils/randoms/nrnran123.cpp @@ -213,9 +213,9 @@ void nrnran123_set_globalindex(uint32_t gix) { << g_k.v[0] << ')' << std::endl; } } - if(g_k.v[0] != gix) { + if (g_k.v[0] != gix) { g_k.v[0] = gix; - if(coreneuron::gpu_enabled()) { + if (coreneuron::gpu_enabled()) { { auto const code = cudaMemcpyToSymbol(g_k_dev, &g_k, sizeof(g_k)); assert(code == cudaSuccess); From 8b2ffa7f16d07d047519c3a7e995a3c142c3e535 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 26 Apr 2022 10:54:29 +0200 Subject: [PATCH 017/128] Cleanup --- coreneuron/gpu/nrn_acc_manager.cpp | 5 +--- coreneuron/gpu/nrn_acc_manager.hpp | 13 ++++------- coreneuron/utils/randoms/nrnran123.cpp | 32 +++++++++++++------------- 3 files changed, 22 insertions(+), 28 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index 20ade530b..d0862b31e 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -670,9 +670,8 @@ void delete_ivoc_vect_from_device(IvocVect& vec) { if (n) { cnrn_target_delete(vec.data(), n); } - // cnrn_target_delete(&vec); #else - (void) vec; + static_cast(vec); #endif } @@ -1342,8 +1341,6 @@ void init_gpu() { std::cout << " Info : " << num_devices_per_node << " GPUs shared by " << local_size << " ranks per node\n"; } - - init_nrnran123(); } void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay) { diff --git a/coreneuron/gpu/nrn_acc_manager.hpp b/coreneuron/gpu/nrn_acc_manager.hpp index ee5ed2483..5a2a6f544 100644 --- a/coreneuron/gpu/nrn_acc_manager.hpp +++ b/coreneuron/gpu/nrn_acc_manager.hpp @@ -1,17 +1,16 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ - -#ifndef _nrn_device_manager_ -#define _nrn_device_manager_ - -#include "coreneuron/sim/multicore.hpp" +#pragma once namespace coreneuron { +struct Memb_list; +struct NrnThread; +struct NetSendBuffer_t; void setup_nrnthreads_on_device(NrnThread* threads, int nthreads); void delete_nrnthreads_on_device(NrnThread* threads, int nthreads); void update_nrnthreads_on_host(NrnThread* threads, int nthreads); @@ -24,6 +23,4 @@ void update_net_send_buffer_on_host(NrnThread* nt, NetSendBuffer_t* nsb); void update_weights_from_gpu(NrnThread* threads, int nthreads); void init_gpu(); -void init_nrnran123(); } // namespace coreneuron -#endif // _nrn_device_manager_ diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp index 6ea75a16b..f5258968e 100644 --- a/coreneuron/utils/randoms/nrnran123.cpp +++ b/coreneuron/utils/randoms/nrnran123.cpp @@ -21,13 +21,16 @@ #include #endif +#ifdef __CUDACC__ #include +#endif // Defining these attributes seems to help nvc++ in OpenMP target offload mode. #if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) && defined(__CUDACC__) #define CORENRN_HOST_DEVICE __host__ __device__ #elif defined(__CUDACC__) +// This is necessary to make the new CUDA-syntax-in-.cpp version compile #define CORENRN_HOST_DEVICE __host__ __device__ #else #define CORENRN_HOST_DEVICE @@ -88,20 +91,24 @@ using random123_allocator = coreneuron::unified_allocator Date: Wed, 6 Jul 2022 16:56:29 +0200 Subject: [PATCH 018/128] generate some more ringtests --- tests/integration/CMakeLists.txt | 49 +++++++++++++++----------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index 4217af270..891f1443e 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -31,6 +31,9 @@ set(PERMUTE2_ARGS "--cell-permute 2") set(CUDA_INTERFACE "--cuda-interface") if(CORENRN_ENABLE_GPU) set(GPU_ARGS "--gpu") + set(permutation_modes 1 2) +else() + set(permutation_modes 0 1) endif() # List of tests with arguments @@ -39,32 +42,37 @@ set(TEST_CASES_WITH_ARGS "ring_binqueue!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_binqueue --binqueue" "ring_multisend!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_multisend --multisend" "ring_spike_buffer!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_spike_buffer --spikebuf 1" - "ring_permute1!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_permute1 ${PERMUTE1_ARGS}" - "ring_permute2!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_permute2 ${PERMUTE2_ARGS}" "ring_gap!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap" "ring_gap_binqueue!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_binqueue --binqueue" "ring_gap_multisend!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_multisend --multisend" - "ring_gap_permute1!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_permute1 ${PERMUTE1_ARGS}" - "ring_gap_permute2!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_permute2 ${PERMUTE2_ARGS}" ) - -if(CORENRN_ENABLE_GPU) +set(test_suffixes "" "_binqueue" "_multisend") +foreach(cell_permute ${permutation_modes}) + list(APPEND test_suffixes "_permute${cell_permute}") list( APPEND TEST_CASES_WITH_ARGS - "ring_permute2_cudaInterface!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_permute2_cudaInterface ${PERMUTE2_ARGS} ${CUDA_INTERFACE}" - "ring_gap_permute2_cudaInterface!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_permute2_cudaInterface ${PERMUTE2_ARGS} ${CUDA_INTERFACE}" + "ring_permute${cell_permute}!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_permute${cell_permute} --cell-permute=${cell_permute}" + "ring_gap_permute${cell_permute}!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_permute${cell_permute} --cell-permute=${cell_permute}" ) -endif() + # As reports require MPI, do not add test if report is enabled. + if(NOT CORENRN_ENABLE_REPORTING) + list(APPEND test_suffixes "_serial_permute${cell_permute}") + list( + APPEND + TEST_CASES_WITH_ARGS + "ring_serial_permute${cell_permute}!${GPU_ARGS} --cell-permute=${cell_permute} --tstop 100. --celsius 6.3 --datpath ${RING_DATASET_DIR} ${MODEL_STATS_ARG} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_serial_permute${cell_permute}" + ) + endif() +endforeach() -# ~~~ -# As reports require MPI, do not add test if report is enabled. -# ~~~ -if(NOT CORENRN_ENABLE_REPORTING) +if(CORENRN_ENABLE_GPU) + list(APPEND test_suffixes "_permute2_cudaInterface") list( APPEND TEST_CASES_WITH_ARGS - "ring_serial!${GPU_ARGS} --tstop 100. --celsius 6.3 --datpath ${RING_DATASET_DIR} ${MODEL_STATS_ARG} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_serial" + "ring_permute2_cudaInterface!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_permute2_cudaInterface ${PERMUTE2_ARGS} ${CUDA_INTERFACE}" + "ring_gap_permute2_cudaInterface!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_permute2_cudaInterface ${PERMUTE2_ARGS} ${CUDA_INTERFACE}" ) endif() @@ -73,18 +81,7 @@ endif() # create them and copy reference spikes # ~~~ foreach(data_dir "ring" "ring_gap") - foreach( - test_suffix - "" - "_serial" - "_multisend" - "_binqueue" - "_savestate_permute0" - "_savestate_permute1" - "_savestate_permute2" - "_permute1" - "_permute2" - "_permute2_cudaInterface") + foreach(test_suffix ${test_suffixes}) file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/${data_dir}/out.dat.ref" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/${data_dir}${test_suffix}/") endforeach() From d933c04cea0f8a4422f979e66a98e2c332c4d3c4 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 12 Jul 2022 11:50:41 +0200 Subject: [PATCH 019/128] clang-format --- .../mechanism/mech/mod2c_core_thread.hpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/coreneuron/mechanism/mech/mod2c_core_thread.hpp b/coreneuron/mechanism/mech/mod2c_core_thread.hpp index 4ec7b4ff6..e224137e0 100644 --- a/coreneuron/mechanism/mech/mod2c_core_thread.hpp +++ b/coreneuron/mechanism/mech/mod2c_core_thread.hpp @@ -44,16 +44,16 @@ struct Item { using List = Item; /* list of mixed items */ -struct SparseObj : public MemoryManaged { /* all the state information */ - Elm** rowst{}; /* link to first element in row (solution order)*/ - Elm** diag{}; /* link to pivot element in row (solution order)*/ - void* elmpool{}; /* no interthread cache line sharing for elements */ - unsigned neqn{}; /* number of equations */ - unsigned _cntml_padded{}; /* number of instances */ - unsigned* varord{}; /* row and column order for pivots */ - double* rhs{}; /* initially- right hand side finally - answer */ - unsigned* ngetcall{}; /* per instance counter for number of calls to _getelm */ - int phase{}; /* 0-solution phase; 1-count phase; 2-build list phase */ +struct SparseObj: public MemoryManaged { /* all the state information */ + Elm** rowst{}; /* link to first element in row (solution order)*/ + Elm** diag{}; /* link to pivot element in row (solution order)*/ + void* elmpool{}; /* no interthread cache line sharing for elements */ + unsigned neqn{}; /* number of equations */ + unsigned _cntml_padded{}; /* number of instances */ + unsigned* varord{}; /* row and column order for pivots */ + double* rhs{}; /* initially- right hand side finally - answer */ + unsigned* ngetcall{}; /* per instance counter for number of calls to _getelm */ + int phase{}; /* 0-solution phase; 1-count phase; 2-build list phase */ int numop{}; unsigned coef_list_size{}; double** coef_list{}; /* pointer to (first instance) value in _getelm order */ From d0e7b2cbac084d09cbdc5ed85ea61a549a0830ee Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 12 Jul 2022 14:21:35 +0200 Subject: [PATCH 020/128] cmake-format --- coreneuron/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 68b0f54ff..ef5f577bd 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -107,8 +107,8 @@ if(CORENRN_ENABLE_GPU) set_source_files_properties(${OPENACC_EXCLUDED_FILES} PROPERTIES COMPILE_FLAGS "-DDISABLE_OPENACC") - # Only compile the explicit CUDA implementation of the Hines solver in GPU builds. - # list(APPEND CORENEURON_CODE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/permute/cellorder.cu) + # Only compile the explicit CUDA implementation of the Hines solver in GPU builds. list(APPEND + # CORENEURON_CODE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/permute/cellorder.cu) # Eigen-3.5+ provides better GPU support. However, some functions cannot be called directly from # within an OpenACC region. Therefore, we need to wrap them in a special API (decorate them with From 52951c7253457ce9e2117be20408a4d2fe190a13 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 12 Jul 2022 14:44:09 +0200 Subject: [PATCH 021/128] Don't delete threads from the GPU if we didn't copy them there. Disable the CUDA backend while it is disabled in Coreneuron --- tests/unit/solver/test_solver.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/unit/solver/test_solver.cpp b/tests/unit/solver/test_solver.cpp index 6511f03e1..b797ac711 100644 --- a/tests/unit/solver/test_solver.cpp +++ b/tests/unit/solver/test_solver.cpp @@ -196,7 +196,9 @@ struct SetupThreads { } ~SetupThreads() { - delete_nrnthreads_on_device(nrn_threads, nrn_nthread); + if (corenrn_param.gpu) { + delete_nrnthreads_on_device(nrn_threads, nrn_nthread); + } for (auto& nt: *this) { free_memory(std::exchange(nt._data, nullptr)); delete[] std::exchange(nt._permute, nullptr); @@ -273,7 +275,7 @@ auto active_implementations() { ret.push_back(SolverImplementation::CellPermute0_GPU); ret.push_back(SolverImplementation::CellPermute1_GPU); ret.push_back(SolverImplementation::CellPermute2_GPU); - ret.push_back(SolverImplementation::CellPermute2_CUDA); + // ret.push_back(SolverImplementation::CellPermute2_CUDA); #endif return ret; } From 90dd8234f1a6ea22c2347ea679ac382a211c9a6e Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 14 Jul 2022 09:01:01 +0200 Subject: [PATCH 022/128] use -gpu=nordc and make #pragma acc routine seq functions inline --- CMake/OpenAccHelper.cmake | 2 +- coreneuron/mechanism/eion.cpp | 34 ------- coreneuron/mechanism/membfunc.hpp | 39 ++++++-- coreneuron/network/cvodestb.cpp | 12 --- coreneuron/sim/multicore.hpp | 11 +- coreneuron/utils/randoms/nrnran123.cpp | 110 +------------------- coreneuron/utils/randoms/nrnran123.h | 133 +++++++++++++++++++------ 7 files changed, 145 insertions(+), 196 deletions(-) diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index f1ff3a4c3..10c942f75 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -71,7 +71,7 @@ if(CORENRN_ENABLE_GPU) # https://forums.developer.nvidia.com/t/separate-compilation-of-mixed-cuda-openacc-code/192701 but # as discussed in https://github.com/BlueBrain/CoreNeuron/issues/141#issuecomment-1086742194 this # is still not completely solving underlying link issue. - set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo") + set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo,nordc") # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the # same default compute capabilities as each other, particularly on GPU-less build machines. diff --git a/coreneuron/mechanism/eion.cpp b/coreneuron/mechanism/eion.cpp index 1dbd0d2db..deab46627 100644 --- a/coreneuron/mechanism/eion.cpp +++ b/coreneuron/mechanism/eion.cpp @@ -154,40 +154,6 @@ the USEION statement of any model using this ion\n", } } -// std::log isn't constexpr, but there are argument values for which nrn_nernst -// is a constant expression -constexpr double nrn_nernst(double ci, double co, double z, double celsius) { - if (z == 0) { - return 0.; - } - if (ci <= 0.) { - return 1e6; - } else if (co <= 0.) { - return -1e6; - } else { - return ktf(celsius) / z * std::log(co / ci); - } -} - -nrn_pragma_omp(declare target) -void nrn_wrote_conc(int type, - double* p1, - int p2, - int it, - double** gimap, - double celsius, - int _cntml_padded) { - if (it & 040) { - int _iml = 0; - /* passing _nt to this function causes cray compiler to segfault during compilation - * hence passing _cntml_padded - */ - double* pe = p1 - p2 * _STRIDE; - pe[0] = nrn_nernst(pe[1 * _STRIDE], pe[2 * _STRIDE], gimap[type][2], celsius); - } -} -nrn_pragma_omp(end declare target) - #if VECTORIZE #define erev pd[0 * _STRIDE] /* From Eion */ #define conci pd[1 * _STRIDE] diff --git a/coreneuron/mechanism/membfunc.hpp b/coreneuron/mechanism/membfunc.hpp index a40d77438..3d7e9e239 100644 --- a/coreneuron/mechanism/membfunc.hpp +++ b/coreneuron/mechanism/membfunc.hpp @@ -112,13 +112,40 @@ extern void hoc_register_watch_check(nrn_watch_check_t, int); extern void nrn_jacob_capacitance(NrnThread*, Memb_list*, int); extern void nrn_writes_conc(int, int); -nrn_pragma_omp(declare target) -nrn_pragma_acc(routine seq) -void nrn_wrote_conc(int, double*, int, int, double**, double, int); -nrn_pragma_omp(end declare target) constexpr double ktf(double celsius) { return 1000. * units::gasconstant * (celsius + 273.15) / units::faraday; } +// std::log isn't constexpr, but there are argument values for which nrn_nernst +// is a constant expression +constexpr double nrn_nernst(double ci, double co, double z, double celsius) { + if (z == 0) { + return 0.; + } + if (ci <= 0.) { + return 1e6; + } else if (co <= 0.) { + return -1e6; + } else { + return ktf(celsius) / z * std::log(co / ci); + } +} +constexpr void nrn_wrote_conc(int type, + double* p1, + int p2, + int it, + double** gimap, + double celsius, + int _cntml_padded) { + if (it & 040) { + constexpr int _iml = 0; + int const STRIDE{_cntml_padded + _iml}; + /* passing _nt to this function causes cray compiler to segfault during compilation + * hence passing _cntml_padded + */ + double* pe = p1 - p2 * STRIDE; + pe[0] = nrn_nernst(pe[1 * STRIDE], pe[2 * STRIDE], gimap[type][2], celsius); + } +} inline double nrn_ghk(double v, double ci, double co, double z, double celsius) { auto const efun = [](double x) { if (std::abs(x) < 1e-4) { @@ -195,10 +222,6 @@ extern void artcell_net_move(void**, Point_process*, double); extern void nrn2ncs_outputevent(int netcon_output_index, double firetime); extern bool nrn_use_localgid_; extern void net_sem_from_gpu(int sendtype, int i_vdata, int, int ith, int ipnt, double, double); -nrn_pragma_acc(routine seq) -nrn_pragma_omp(declare target) -extern int at_time(NrnThread*, double); -nrn_pragma_omp(end declare target) // _OPENACC and/or NET_RECEIVE_BUFFERING extern void net_sem_from_gpu(int, int, int, int, int, double, double); diff --git a/coreneuron/network/cvodestb.cpp b/coreneuron/network/cvodestb.cpp index 31c18807e..bd3de5f4c 100644 --- a/coreneuron/network/cvodestb.cpp +++ b/coreneuron/network/cvodestb.cpp @@ -84,16 +84,4 @@ void fixed_play_continuous(NrnThread* nt) { } } -// NOTE : this implementation is duplicated in "coreneuron/mechanism/nrnoc_ml.ispc" -// for the ISPC backend. If changes are required, make sure to change ISPC as well. -nrn_pragma_omp(declare target) -int at_time(NrnThread* nt, double te) { - double x = te - 1e-11; - if (x <= nt->_t && x > (nt->_t - nt->_dt)) { - return 1; - } - return 0; -} -nrn_pragma_omp(end declare target) - } // namespace coreneuron diff --git a/coreneuron/sim/multicore.hpp b/coreneuron/sim/multicore.hpp index 18cd613f3..391b5dcaa 100644 --- a/coreneuron/sim/multicore.hpp +++ b/coreneuron/sim/multicore.hpp @@ -192,6 +192,13 @@ extern void direct_mode_initialize(); extern void nrn_mk_table_check(void); extern void nonvint(NrnThread* _nt); extern void update(NrnThread*); - - +// NOTE : this implementation is duplicated in "coreneuron/mechanism/nrnoc_ml.ispc" +// for the ISPC backend. If changes are required, make sure to change ISPC as well. +constexpr int at_time(NrnThread* nt, double te) { + double x = te - 1e-11; + if (x <= nt->_t && x > (nt->_t - nt->_dt)) { + return 1; + } + return 0; +} } // namespace coreneuron diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp index f5258968e..7e2538f3d 100644 --- a/coreneuron/utils/randoms/nrnran123.cpp +++ b/coreneuron/utils/randoms/nrnran123.cpp @@ -21,10 +21,6 @@ #include #endif -#ifdef __CUDACC__ -#include -#endif - // Defining these attributes seems to help nvc++ in OpenMP target offload mode. #if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) && defined(__CUDACC__) @@ -84,40 +80,8 @@ using random123_allocator = coreneuron::unified_allocatorc, global_state()); -} } // namespace namespace coreneuron { @@ -127,81 +91,13 @@ std::size_t nrnran123_instance_count() { /* if one sets the global, one should reset all the stream sequences. */ uint32_t nrnran123_get_globalindex() { - return global_state().v[0]; -} - -void nrnran123_getseq(nrnran123_State* s, uint32_t* seq, char* which) { - *seq = s->c.v[0]; - *which = s->which_; -} - -void nrnran123_setseq(nrnran123_State* s, uint32_t seq, char which) { - if (which > 3) { - s->which_ = 0; - } else { - s->which_ = which; - } - s->c.v[0] = seq; - s->r = philox4x32_helper(s); -} - -void nrnran123_getids(nrnran123_State* s, uint32_t* id1, uint32_t* id2) { - *id1 = s->c.v[2]; - *id2 = s->c.v[3]; -} - -void nrnran123_getids3(nrnran123_State* s, uint32_t* id1, uint32_t* id2, uint32_t* id3) { - *id3 = s->c.v[1]; - *id1 = s->c.v[2]; - *id2 = s->c.v[3]; -} - -uint32_t nrnran123_ipick(nrnran123_State* s) { - uint32_t rval; - char which = s->which_; - rval = s->r.v[int{which++}]; - if (which > 3) { - which = 0; - s->c.v[0]++; - s->r = philox4x32_helper(s); - } - s->which_ = which; - return rval; -} - -double nrnran123_dblpick(nrnran123_State* s) { - return nrnran123_uint2dbl(nrnran123_ipick(s)); -} - -double nrnran123_negexp(nrnran123_State* s) { - /* min 2.3283064e-10 to max 22.18071 */ - return -std::log(nrnran123_dblpick(s)); -} - -/* at cost of a cached value we could compute two at a time. */ -double nrnran123_normal(nrnran123_State* s) { - double w, u1; - do { - u1 = nrnran123_dblpick(s); - double u2{nrnran123_dblpick(s)}; - u1 = 2. * u1 - 1.; - u2 = 2. * u2 - 1.; - w = (u1 * u1) + (u2 * u2); - } while (w > 1); - double y{std::sqrt((-2. * std::log(w)) / w)}; - return u1 * y; -} - -double nrnran123_uint2dbl(uint32_t u) { - /* 0 to 2^32-1 transforms to double value in open (0,1) interval */ - /* min 2.3283064e-10 to max (1 - 2.3283064e-10) */ - return ((double) u + 1.0) * SHIFT32; + return random123::detail::global_state().v[0]; } /* nrn123 streams are created from cpu launcher routine */ void nrnran123_set_globalindex(uint32_t gix) { // If the global seed is changing then we shouldn't have any active streams. - auto& g_k = global_state(); + auto& g_k = random123::detail::global_state(); { std::lock_guard _{g_instance_count_mutex}; if (g_instance_count != 0 && nrnmpi_myid == 0) { @@ -217,7 +113,7 @@ void nrnran123_set_globalindex(uint32_t gix) { if (coreneuron::gpu_enabled()) { #ifdef __CUDACC__ { - auto const code = cudaMemcpyToSymbol(g_k_dev, &g_k, sizeof(g_k)); + auto const code = cudaMemcpyToSymbol(random123::detail::g_k_dev, &g_k, sizeof(g_k)); assert(code == cudaSuccess); } { diff --git a/coreneuron/utils/randoms/nrnran123.h b/coreneuron/utils/randoms/nrnran123.h index ccd3fa5db..8290749c7 100644 --- a/coreneuron/utils/randoms/nrnran123.h +++ b/coreneuron/utils/randoms/nrnran123.h @@ -39,6 +39,10 @@ of the full distribution available from #include #include +#ifdef __CUDACC__ +#include +#endif + // Some files are compiled with DISABLE_OPENACC, and some builds have no GPU // support at all. In these two cases, request that the random123 state is // allocated using new/delete instead of CUDA unified memory. @@ -86,38 +90,103 @@ void nrnran123_deletestream(nrnran123_State* s, bool use_unified_memory = CORENRN_RAN123_USE_UNIFIED_MEMORY); /* minimal data stream */ -nrn_pragma_omp(declare target) -nrn_pragma_acc(routine seq) -void nrnran123_getseq(nrnran123_State*, uint32_t* seq, char* which); -nrn_pragma_acc(routine seq) -void nrnran123_getids(nrnran123_State*, uint32_t* id1, uint32_t* id2); -nrn_pragma_acc(routine seq) -void nrnran123_getids3(nrnran123_State*, uint32_t* id1, uint32_t* id2, uint32_t* id3); -nrn_pragma_acc(routine seq) -uint32_t nrnran123_ipick(nrnran123_State*); /* uniform 0 to 2^32-1 */ - -/* this could be called from openacc parallel construct */ -nrn_pragma_acc(routine seq) -double nrnran123_dblpick(nrnran123_State*); /* uniform open interval (0,1)*/ -/* nrnran123_dblpick minimum value is 2.3283064e-10 and max value is 1-min */ +constexpr void nrnran123_getseq(nrnran123_State* s, uint32_t* seq, char* which) { + *seq = s->c.v[0]; + *which = s->which_; +} +constexpr void nrnran123_getids(nrnran123_State* s, uint32_t* id1, uint32_t* id2) { + *id1 = s->c.v[2]; + *id2 = s->c.v[3]; +} +constexpr void nrnran123_getids3(nrnran123_State* s, uint32_t* id1, uint32_t* id2, uint32_t* id3) { + *id3 = s->c.v[1]; + *id1 = s->c.v[2]; + *id2 = s->c.v[3]; +} + +namespace random123::detail { +inline philox4x32_key_t g_k{}; +#ifdef __CUDACC__ +// Not 100% clear we need a different name (g_k_dev) here in addition to g_k, +// but it's clearer and the overhead cannot be high (if it exists). +__constant__ __device__ inline philox4x32_key_t g_k_dev{}; +// noinline to force "CUDA" not "acc routine seq" behaviour :shrug: +__attribute__((noinline)) inline philox4x32_key_t& global_state() { + if target (nv::target::is_device) { + return g_k_dev; + } else { + return g_k; + } +} +#else +inline philox4x32_key_t& global_state() { + return g_k; +} +#endif + +/** @brief Provide a helper function in global namespace that is declared target for OpenMP + * offloading to function correctly with NVHPC + */ +inline philox4x32_ctr_t philox4x32_helper(coreneuron::nrnran123_State* s) { + return philox4x32(s->c, global_state()); +} +} // namespace random123::detail + +// Uniform 0 to 2*32-1 +inline uint32_t nrnran123_ipick(nrnran123_State* s) { + uint32_t rval; + char which = s->which_; + rval = s->r.v[int{which++}]; + if (which > 3) { + which = 0; + s->c.v[0]++; + s->r = random123::detail::philox4x32_helper(s); + } + s->which_ = which; + return rval; +} + +constexpr double nrnran123_uint2dbl(uint32_t u) { + constexpr double SHIFT32 = 1.0 / 4294967297.0; /* 1/(2^32 + 1) */ + /* 0 to 2^32-1 transforms to double value in open (0,1) interval */ + /* min 2.3283064e-10 to max (1 - 2.3283064e-10) */ + return (static_cast(u) + 1.0) * SHIFT32; +} + +// Uniform open interval (0,1), minimum value is 2.3283064e-10 and max value is 1-min +inline double nrnran123_dblpick(nrnran123_State* s) { + return nrnran123_uint2dbl(nrnran123_ipick(s)); +} /* this could be called from openacc parallel construct (in INITIAL block) */ -nrn_pragma_acc(routine seq) -void nrnran123_setseq(nrnran123_State*, uint32_t seq, char which); -nrn_pragma_acc(routine seq) -double nrnran123_negexp(nrnran123_State*); /* mean 1.0 */ -/* nrnran123_negexp min value is 2.3283064e-10, max is 22.18071 */ - -/* missing declaration in coreneuron */ -nrn_pragma_acc(routine seq) -double nrnran123_normal(nrnran123_State*); -nrn_pragma_acc(routine seq) -double nrnran123_gauss(nrnran123_State*); /* mean 0.0, std 1.0 */ - -/* more fundamental (stateless) (though the global index is still used) */ -nrn_pragma_acc(routine seq) -nrnran123_array4x32 nrnran123_iran(uint32_t seq, uint32_t id1, uint32_t id2); -nrn_pragma_acc(routine seq) -double nrnran123_uint2dbl(uint32_t); -nrn_pragma_omp(end declare target) +inline void nrnran123_setseq(nrnran123_State* s, uint32_t seq, char which) { + if (which > 3) { + s->which_ = 0; + } else { + s->which_ = which; + } + s->c.v[0] = seq; + s->r = random123::detail::philox4x32_helper(s); +} + +// nrnran123_negexp min value is 2.3283064e-10, max is 22.18071, mean 1.0 +inline double nrnran123_negexp(nrnran123_State* s) { + return -std::log(nrnran123_dblpick(s)); +} + +/* at cost of a cached value we could compute two at a time. */ +inline double nrnran123_normal(nrnran123_State* s) { + double w, u1; + do { + u1 = nrnran123_dblpick(s); + double u2{nrnran123_dblpick(s)}; + u1 = 2. * u1 - 1.; + u2 = 2. * u2 - 1.; + w = (u1 * u1) + (u2 * u2); + } while (w > 1); + double y{std::sqrt((-2. * std::log(w)) / w)}; + return u1 * y; +} + +// nrnran123_gauss, nrnran123_iran were declared but not defined in CoreNEURON } // namespace coreneuron From 6cca56f19ab9f4ab1c78690f56de862aad3a25e4 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 14 Jul 2022 09:01:28 +0200 Subject: [PATCH 023/128] fix handing of empty suffix --- tests/integration/CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index 891f1443e..fa488028e 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -81,7 +81,12 @@ endif() # create them and copy reference spikes # ~~~ foreach(data_dir "ring" "ring_gap") - foreach(test_suffix ${test_suffixes}) + # Naïve foreach(test_suffix ${test_suffixes}) does not seem to handle empty suffixes correctly. + list(LENGTH test_suffixes num_suffixes) + math(EXPR num_suffixes_m1 "${num_suffixes} - 1") + foreach(suffix_index RANGE 0 ${num_suffixes_m1}) + list(GET test_suffixes ${suffix_index} test_suffix) + message(STATUS "test_suffix=${test_suffix}") file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/${data_dir}/out.dat.ref" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/${data_dir}${test_suffix}/") endforeach() From 258ef9425a7f4f1a3b6d1e6dccd0a95237deed11 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 15 Jul 2022 10:25:26 +0200 Subject: [PATCH 024/128] revert hack to compile/link main() with g++ --- extra/nrnivmodl_core_makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index 9388059fe..7f233b9aa 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -209,7 +209,7 @@ endif # main target to build binary $(SPECIAL_EXE): coremech_lib_target @printf " => $(C_GREEN)Binary$(C_RESET) creating $(SPECIAL_EXE)\n" - g++ -o $(SPECIAL_EXE) $(CORENRN_SHARE_CORENRN_DIR)/coreneuron.cpp \ + $(CXX_LINK_EXE_CMD) -o $(SPECIAL_EXE) $(CORENRN_SHARE_CORENRN_DIR)/coreneuron.cpp \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ -L$(OUTPUT_DIR) -l$(COREMECH_LIB_NAME) $(CORENRNLIB_FLAGS) $(LDFLAGS) \ -L$(CORENRN_LIB_DIR) -lscopmath \ From 708864c3b8cb5c286cc67778787aefd59a10cf4a Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 15 Jul 2022 10:50:01 +0200 Subject: [PATCH 025/128] revert to -gpu=rdc and put the random123 global state back in .cpp because inline device variables don't seem to work --- CMake/OpenAccHelper.cmake | 2 +- coreneuron/utils/randoms/nrnran123.cpp | 29 +++++++++++-- coreneuron/utils/randoms/nrnran123.h | 56 +++++++++----------------- 3 files changed, 45 insertions(+), 42 deletions(-) diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index 10c942f75..bc21e051f 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -71,7 +71,7 @@ if(CORENRN_ENABLE_GPU) # https://forums.developer.nvidia.com/t/separate-compilation-of-mixed-cuda-openacc-code/192701 but # as discussed in https://github.com/BlueBrain/CoreNeuron/issues/141#issuecomment-1086742194 this # is still not completely solving underlying link issue. - set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo,nordc") + set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo,rdc") # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the # same default compute capabilities as each other, particularly on GPU-less build machines. diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp index 7e2538f3d..b23c01485 100644 --- a/coreneuron/utils/randoms/nrnran123.cpp +++ b/coreneuron/utils/randoms/nrnran123.cpp @@ -82,8 +82,31 @@ using random123_allocator = coreneuron::unified_allocatorc, global_state()); +} + namespace coreneuron { std::size_t nrnran123_instance_count() { return g_instance_count; @@ -91,13 +114,13 @@ std::size_t nrnran123_instance_count() { /* if one sets the global, one should reset all the stream sequences. */ uint32_t nrnran123_get_globalindex() { - return random123::detail::global_state().v[0]; + return global_state().v[0]; } /* nrn123 streams are created from cpu launcher routine */ void nrnran123_set_globalindex(uint32_t gix) { // If the global seed is changing then we shouldn't have any active streams. - auto& g_k = random123::detail::global_state(); + auto& g_k = global_state(); { std::lock_guard _{g_instance_count_mutex}; if (g_instance_count != 0 && nrnmpi_myid == 0) { @@ -113,7 +136,7 @@ void nrnran123_set_globalindex(uint32_t gix) { if (coreneuron::gpu_enabled()) { #ifdef __CUDACC__ { - auto const code = cudaMemcpyToSymbol(random123::detail::g_k_dev, &g_k, sizeof(g_k)); + auto const code = cudaMemcpyToSymbol(g_k_dev, &g_k, sizeof(g_k)); assert(code == cudaSuccess); } { diff --git a/coreneuron/utils/randoms/nrnran123.h b/coreneuron/utils/randoms/nrnran123.h index 8290749c7..c68cc9017 100644 --- a/coreneuron/utils/randoms/nrnran123.h +++ b/coreneuron/utils/randoms/nrnran123.h @@ -60,9 +60,17 @@ struct nrnran123_State { char which_; }; -struct nrnran123_array4x32 { - uint32_t v[4]; -}; +} + +/** @brief Provide a helper function in global namespace that is declared target for OpenMP + * offloading to function correctly with NVHPC + */ +nrn_pragma_acc(routine seq) +nrn_pragma_omp(declare target) +philox4x32_ctr_t coreneuron_random123_philox4x32_helper(coreneuron::nrnran123_State* s); +nrn_pragma_omp(end declare target) + +namespace coreneuron { /* global index. eg. run number */ /* all generator instances share this global index */ @@ -104,43 +112,14 @@ constexpr void nrnran123_getids3(nrnran123_State* s, uint32_t* id1, uint32_t* id *id2 = s->c.v[3]; } -namespace random123::detail { -inline philox4x32_key_t g_k{}; -#ifdef __CUDACC__ -// Not 100% clear we need a different name (g_k_dev) here in addition to g_k, -// but it's clearer and the overhead cannot be high (if it exists). -__constant__ __device__ inline philox4x32_key_t g_k_dev{}; -// noinline to force "CUDA" not "acc routine seq" behaviour :shrug: -__attribute__((noinline)) inline philox4x32_key_t& global_state() { - if target (nv::target::is_device) { - return g_k_dev; - } else { - return g_k; - } -} -#else -inline philox4x32_key_t& global_state() { - return g_k; -} -#endif - -/** @brief Provide a helper function in global namespace that is declared target for OpenMP - * offloading to function correctly with NVHPC - */ -inline philox4x32_ctr_t philox4x32_helper(coreneuron::nrnran123_State* s) { - return philox4x32(s->c, global_state()); -} -} // namespace random123::detail - // Uniform 0 to 2*32-1 -inline uint32_t nrnran123_ipick(nrnran123_State* s) { - uint32_t rval; +constexpr uint32_t nrnran123_ipick(nrnran123_State* s) { char which = s->which_; - rval = s->r.v[int{which++}]; + uint32_t rval{s->r.v[int{which++}]}; if (which > 3) { which = 0; s->c.v[0]++; - s->r = random123::detail::philox4x32_helper(s); + s->r = coreneuron_random123_philox4x32_helper(s); } s->which_ = which; return rval; @@ -154,19 +133,19 @@ constexpr double nrnran123_uint2dbl(uint32_t u) { } // Uniform open interval (0,1), minimum value is 2.3283064e-10 and max value is 1-min -inline double nrnran123_dblpick(nrnran123_State* s) { +constexpr double nrnran123_dblpick(nrnran123_State* s) { return nrnran123_uint2dbl(nrnran123_ipick(s)); } /* this could be called from openacc parallel construct (in INITIAL block) */ -inline void nrnran123_setseq(nrnran123_State* s, uint32_t seq, char which) { +constexpr void nrnran123_setseq(nrnran123_State* s, uint32_t seq, char which) { if (which > 3) { s->which_ = 0; } else { s->which_ = which; } s->c.v[0] = seq; - s->r = random123::detail::philox4x32_helper(s); + s->r = coreneuron_random123_philox4x32_helper(s); } // nrnran123_negexp min value is 2.3283064e-10, max is 22.18071, mean 1.0 @@ -189,4 +168,5 @@ inline double nrnran123_normal(nrnran123_State* s) { } // nrnran123_gauss, nrnran123_iran were declared but not defined in CoreNEURON +// nrnran123_array4x32 was declared but not used in CoreNEURON } // namespace coreneuron From 6a74cf940482e1232b6e9de99272b87df8cd323d Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 15 Jul 2022 16:03:33 +0200 Subject: [PATCH 026/128] promise we will never try and allocate from the device --- coreneuron/sim/scopmath/sparse_thread.hpp | 32 ++++++++++++++++++----- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/coreneuron/sim/scopmath/sparse_thread.hpp b/coreneuron/sim/scopmath/sparse_thread.hpp index 6614a0a1b..fc2cc89c7 100644 --- a/coreneuron/sim/scopmath/sparse_thread.hpp +++ b/coreneuron/sim/scopmath/sparse_thread.hpp @@ -13,6 +13,10 @@ #include "coreneuron/mechanism/mech/mod2c_core_thread.hpp" #include "coreneuron/sim/scopmath/errcodes.h" +#ifdef __CUDACC__ +#include +#endif + namespace coreneuron { namespace scopmath { namespace sparse { @@ -90,10 +94,17 @@ inline Elm* getelm(SparseObj* so, unsigned row, unsigned col, Elm* new_elem) { } /* insert below el */ if (!new_elem) { - new_elem = new Elm{}; - // Using array-new here causes problems in GPU compilation. - new_elem->value = static_cast(std::malloc(so->_cntml_padded * sizeof(double))); - increase_order(so, row); +#ifdef __CUDACC__ + if target (nv::target::is_device) { + assert(false); + } else +#endif + { + new_elem = new Elm{}; + // Using array-new here causes problems in GPU compilation. + new_elem->value = static_cast(std::malloc(so->_cntml_padded * sizeof(double))); + increase_order(so, row); + } } new_elem->r_down = el->r_down; el->r_down = new_elem; @@ -133,9 +144,16 @@ inline Elm* getelm(SparseObj* so, unsigned row, unsigned col, Elm* new_elem) { } /* insert above el */ if (!new_elem) { - new_elem = new Elm{}; - new_elem->value = static_cast(std::malloc(so->_cntml_padded * sizeof(double))); - increase_order(so, row); +#ifdef __CUDACC__ + if target (nv::target::is_device) { + assert(false); + } else +#endif + { + new_elem = new Elm{}; + new_elem->value = static_cast(std::malloc(so->_cntml_padded * sizeof(double))); + increase_order(so, row); + } } new_elem->r_up = el->r_up; el->r_up = new_elem; From 5dabb3663f350b2c866599425de272bff49613cf Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 15 Jul 2022 16:03:54 +0200 Subject: [PATCH 027/128] drop -lscopmath as its folded in elsewhere --- CMake/OpenAccHelper.cmake | 2 +- extra/nrnivmodl_core_makefile.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index bc21e051f..2d84efb70 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -109,7 +109,7 @@ if(CORENRN_ENABLE_GPU) ) else() set_property(GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS - "-L${CMAKE_HOST_SYSTEM_PROCESSOR} -lcorenrnmech -lscopmath") + "-L${CMAKE_HOST_SYSTEM_PROCESSOR} -lcorenrnmech") endif(CORENRN_ENABLE_GPU) if(CORENRN_HAVE_NVHPC_COMPILER) diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index 7f233b9aa..c26066e4c 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -212,7 +212,7 @@ $(SPECIAL_EXE): coremech_lib_target $(CXX_LINK_EXE_CMD) -o $(SPECIAL_EXE) $(CORENRN_SHARE_CORENRN_DIR)/coreneuron.cpp \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ -L$(OUTPUT_DIR) -l$(COREMECH_LIB_NAME) $(CORENRNLIB_FLAGS) $(LDFLAGS) \ - -L$(CORENRN_LIB_DIR) -lscopmath \ + -L$(CORENRN_LIB_DIR) \ -Wl,-rpath,'$(LIB_RPATH)' -Wl,-rpath,$(CORENRN_LIB_DIR) -Wl,-rpath,'$(INSTALL_LIB_RPATH)' coremech_lib_target: $(corenrnmech_lib_target) From 532b8adfb062b92287e6520e1390828757f9bb3f Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 15 Jul 2022 16:04:16 +0200 Subject: [PATCH 028/128] random123 header reorganisation --- coreneuron/utils/randoms/nrnran123.cpp | 14 +++++++++----- coreneuron/utils/randoms/nrnran123.h | 4 ---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp index b23c01485..f2dd2dee2 100644 --- a/coreneuron/utils/randoms/nrnran123.cpp +++ b/coreneuron/utils/randoms/nrnran123.cpp @@ -11,16 +11,20 @@ #include "coreneuron/utils/nrnmutdec.hpp" #include "coreneuron/utils/randoms/nrnran123.h" -#include -#include -#include -#include - #ifdef CORENEURON_USE_BOOST_POOL #include #include #endif +#ifdef __CUDACC__ +#include +#endif + +#include +#include +#include +#include + // Defining these attributes seems to help nvc++ in OpenMP target offload mode. #if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) && defined(__CUDACC__) diff --git a/coreneuron/utils/randoms/nrnran123.h b/coreneuron/utils/randoms/nrnran123.h index c68cc9017..6c8e97cf9 100644 --- a/coreneuron/utils/randoms/nrnran123.h +++ b/coreneuron/utils/randoms/nrnran123.h @@ -39,10 +39,6 @@ of the full distribution available from #include #include -#ifdef __CUDACC__ -#include -#endif - // Some files are compiled with DISABLE_OPENACC, and some builds have no GPU // support at all. In these two cases, request that the random123 state is // allocated using new/delete instead of CUDA unified memory. From e193d3113b46742ece4481d6311f145d772bad37 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 15 Jul 2022 16:27:44 +0200 Subject: [PATCH 029/128] revert a different allocation workaround. --- coreneuron/sim/scopmath/sparse_thread.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/coreneuron/sim/scopmath/sparse_thread.hpp b/coreneuron/sim/scopmath/sparse_thread.hpp index fc2cc89c7..85580011e 100644 --- a/coreneuron/sim/scopmath/sparse_thread.hpp +++ b/coreneuron/sim/scopmath/sparse_thread.hpp @@ -101,8 +101,7 @@ inline Elm* getelm(SparseObj* so, unsigned row, unsigned col, Elm* new_elem) { #endif { new_elem = new Elm{}; - // Using array-new here causes problems in GPU compilation. - new_elem->value = static_cast(std::malloc(so->_cntml_padded * sizeof(double))); + new_elem->value = new double[so->_cntml_padded]; increase_order(so, row); } } @@ -151,7 +150,7 @@ inline Elm* getelm(SparseObj* so, unsigned row, unsigned col, Elm* new_elem) { #endif { new_elem = new Elm{}; - new_elem->value = static_cast(std::malloc(so->_cntml_padded * sizeof(double))); + new_elem->value = new double[so->_cntml_padded]; increase_order(so, row); } } From 986e82e5f814c1ce2094a24cfcbecd8f928c1fd3 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 15 Jul 2022 16:27:56 +0200 Subject: [PATCH 030/128] clang-format --- coreneuron/mechanism/membfunc.hpp | 12 ++++++------ coreneuron/utils/randoms/nrnran123.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/coreneuron/mechanism/membfunc.hpp b/coreneuron/mechanism/membfunc.hpp index 3d7e9e239..8fe04a06c 100644 --- a/coreneuron/mechanism/membfunc.hpp +++ b/coreneuron/mechanism/membfunc.hpp @@ -130,12 +130,12 @@ constexpr double nrn_nernst(double ci, double co, double z, double celsius) { } } constexpr void nrn_wrote_conc(int type, - double* p1, - int p2, - int it, - double** gimap, - double celsius, - int _cntml_padded) { + double* p1, + int p2, + int it, + double** gimap, + double celsius, + int _cntml_padded) { if (it & 040) { constexpr int _iml = 0; int const STRIDE{_cntml_padded + _iml}; diff --git a/coreneuron/utils/randoms/nrnran123.h b/coreneuron/utils/randoms/nrnran123.h index 6c8e97cf9..e75ec3f69 100644 --- a/coreneuron/utils/randoms/nrnran123.h +++ b/coreneuron/utils/randoms/nrnran123.h @@ -56,7 +56,7 @@ struct nrnran123_State { char which_; }; -} +} // namespace coreneuron /** @brief Provide a helper function in global namespace that is declared target for OpenMP * offloading to function correctly with NVHPC From f03cdfde5f9d4b857b2b627b5b9df9b1fe001707 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 19 Jul 2022 10:34:29 +0200 Subject: [PATCH 031/128] build system tmpcommit --- CMake/OpenAccHelper.cmake | 5 +- CMakeLists.txt | 8 +- coreneuron/CMakeLists.txt | 128 ++++++++++++-------- coreneuron/permute/cellorder.cpp | 2 +- extra/nrnivmodl_core_makefile.in | 25 ++-- tests/unit/cmdline_interface/CMakeLists.txt | 7 +- tests/unit/interleave_info/CMakeLists.txt | 9 +- tests/unit/lfp/CMakeLists.txt | 14 +-- tests/unit/queueing/CMakeLists.txt | 6 +- tests/unit/solver/CMakeLists.txt | 6 +- tests/unit/solver/test_solver.cpp | 2 +- 11 files changed, 112 insertions(+), 100 deletions(-) diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index 2d84efb70..5be6af61b 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -100,16 +100,17 @@ endif() # ============================================================================= # Set global property that will be used by NEURON to link with CoreNEURON # ============================================================================= +# TODO this should be derived from what we use internally to link special-core? if(CORENRN_ENABLE_GPU) set_property( GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS - "${NVHPC_ACC_COMP_FLAGS} -rdynamic -lrt -Wl,--whole-archive -L${CMAKE_HOST_SYSTEM_PROCESSOR} -lcorenrnmech -Wl,--no-whole-archive" + "${NVHPC_ACC_COMP_FLAGS} -rdynamic -lrt -Wl,--whole-archive -L${CMAKE_HOST_SYSTEM_PROCESSOR} -lcoreneuron -lcoreneuron-cuda -Wl,--no-whole-archive" ) else() set_property(GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS - "-L${CMAKE_HOST_SYSTEM_PROCESSOR} -lcorenrnmech") + "-L${CMAKE_HOST_SYSTEM_PROCESSOR} -lcoreneuron") endif(CORENRN_ENABLE_GPU) if(CORENRN_HAVE_NVHPC_COMPILER) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4366cfee1..7ef147a41 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -303,10 +303,6 @@ if(CORENRN_HAVE_NVHPC_COMPILER) endif() endif() -if(CORENRN_ENABLE_GPU) - set(CORENRN_ENABLE_SHARED OFF) -endif() - if(CORENRN_ENABLE_SHARED) set(COMPILE_LIBRARY_TYPE "SHARED") else() @@ -489,7 +485,7 @@ endif() add_subdirectory(coreneuron) if(CORENRN_ENABLE_GPU) - get_target_property(CORENRN_LINK_LIBRARIES coreneuron INTERFACE_LINK_LIBRARIES) + get_target_property(CORENRN_LINK_LIBRARIES coreneuron-core INTERFACE_LINK_LIBRARIES) if(CORENRN_LINK_LIBRARIES) foreach(LIB ${CORENRN_LINK_LIBRARIES}) get_filename_component(dir_path ${LIB} DIRECTORY) @@ -499,7 +495,7 @@ if(CORENRN_ENABLE_GPU) # https://github.com/BlueBrain/CoreNeuron/blob/856cea4aa647c8f2b0d5bda6d0fc32144c5942e3/CMakeLists.txt#L411-L412 message( NOTICE - "Ignoring dependency '${LIB}' of 'coreneuron' and assuming relevant flags have already been added to CORENEURON_LIB_LINK_FLAGS." + "Ignoring dependency '${LIB}' of 'coreneuron-core' and assuming relevant flags have already been added to CORENEURON_LIB_LINK_FLAGS." ) elseif(NOT dir_path) # In case LIB is not a target but is just the name of a library, e.g. "dl" diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index ef5f577bd..28411fd53 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -16,8 +16,6 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) # ============================================================================= # gather various source files # ============================================================================= -file(GLOB_RECURSE CORENEURON_HEADER_FILES "*.h*") -file(GLOB_RECURSE CORENEURON_TEMPLATE_FILES "*.ipp") file( GLOB CORENEURON_CODE_FILES @@ -27,18 +25,19 @@ file( "io/*.cpp" "io/reports/*.cpp" "mechanism/*.cpp" + "mpi/core/nrnmpi_def_cinc.cpp" "network/*.cpp" "permute/*.cpp" "sim/*.cpp" + "sim/scopmath/abort.cpp" + "sim/scopmath/newton_thread.cpp" "utils/*.cpp" "utils/*/*.c" "utils/*/*.cpp") -set(SCOPMATH_CODE_FILES "sim/scopmath/abort.cpp" "sim/scopmath/newton_thread.cpp") set(MPI_LIB_FILES "mpi/lib/mpispike.cpp" "mpi/lib/nrnmpi.cpp") -set(MPI_CORE_FILES "mpi/core/nrnmpi_def_cinc.cpp") if(CORENRN_ENABLE_MPI) # Building these requires -ldl, which is only added if MPI is enabled. - list(APPEND MPI_CORE_FILES "mpi/core/resolve.cpp" "mpi/core/nrnmpidec.cpp") + list(APPEND CORENEURON_CODE_FILES "mpi/core/resolve.cpp" "mpi/core/nrnmpidec.cpp") endif() file(COPY ${CORENEURON_PROJECT_SOURCE_DIR}/external/Random123/include/Random123 DESTINATION ${CMAKE_BINARY_DIR}/include) @@ -107,15 +106,20 @@ if(CORENRN_ENABLE_GPU) set_source_files_properties(${OPENACC_EXCLUDED_FILES} PROPERTIES COMPILE_FLAGS "-DDISABLE_OPENACC") - # Only compile the explicit CUDA implementation of the Hines solver in GPU builds. list(APPEND - # CORENEURON_CODE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/permute/cellorder.cu) + # Only compile the explicit CUDA implementation of the Hines solver in GPU + # builds. Because of + # https://forums.developer.nvidia.com/t/cannot-dynamically-load-a-shared-library-containing-both-openacc-and-cuda-code/210972 + # this cannot be included in the same shared library as the rest of the + # OpenACC code. + set(CORENEURON_CUDA_FILES ${CMAKE_CURRENT_SOURCE_DIR}/permute/cellorder.cu) # Eigen-3.5+ provides better GPU support. However, some functions cannot be called directly from # within an OpenACC region. Therefore, we need to wrap them in a special API (decorate them with # __device__ & acc routine tokens), which allows us to eventually call them from OpenACC. Calling # these functions from CUDA kernels presents no issue ... + # TODO is it going to work to call these from libcoreneuron-cuda.so? probably not... if(CORENRN_ENABLE_NMODL AND EXISTS ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) - # list(APPEND CORENEURON_CODE_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) + list(APPEND CORENEURON_CUDA_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) endif() endif() @@ -143,23 +147,42 @@ if(CORENRN_ENABLE_MPI AND NOT CORENRN_ENABLE_MPI_DYNAMIC) set(CORENRN_MPI_OBJ $) endif() -# main coreneuron library +# Library containing the bulk of the non-mechanism CoreNEURON code. This is +# always created and installed as a static library, and then the nrnivmodl-core +# workflow extracts the object files from it and does one of the following: +# ~~~ +# - shared build: creates libcoreneuron.so from these objects plus those from +# the translated MOD files +# - static build: creates a (temporary) libcoreneuron.a from these objects plus +# those from the translated MOD files, then statically links that into +# special-core (nrniv-core) +# ~~~ +# This scheme means that both core and mechanism .o files are linked in a single +# step, which is important for GPU linking. It does, however, mean that in a +# shared library CPU build then the core code is installed twice, once in +# libcoreneuron-core.a and once in the libcoreneuron.so that contains the +# default mechanisms for the installed nrniv-core binary. In a GPU build, +# libcoreneuron-cuda.{a,so} is also linked to provide the CUDA implementation of +# the Hines solver. add_library( - coreneuron - ${COMPILE_LIBRARY_TYPE} - ${CORENEURON_HEADER_FILES} - ${CORENEURON_TEMPLATE_FILES} + coreneuron-core + STATIC ${CORENEURON_CODE_FILES} - ${cudacorenrn_objs} - ${NMODL_INBUILT_MOD_OUTPUTS} - ${MPI_CORE_FILES} ${CORENRN_MPI_OBJ}) -target_include_directories(coreneuron PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR} - ${CORENEURON_PROJECT_BINARY_DIR}/generated) +if(CORENRN_ENABLE_GPU) + set(coreneuron_cuda_target coreneuron-cuda) + add_library(coreneuron-cuda ${COMPILE_LIBRARY_TYPE} ${CORENEURON_CUDA_FILES}) +endif() + +foreach(target coreneuron-core ${coreneuron_cuda_target}) + target_include_directories(${target} PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR} + ${CORENEURON_PROJECT_BINARY_DIR}/generated) +endforeach() + # we can link to MPI libraries in non-dynamic-mpi build if(CORENRN_ENABLE_MPI AND NOT CORENRN_ENABLE_MPI_DYNAMIC) - target_link_libraries(coreneuron ${MPI_CXX_LIBRARIES}) + target_link_libraries(coreneuron-core ${MPI_CXX_LIBRARIES}) endif() # this is where we handle dynamic mpi library build @@ -168,7 +191,7 @@ if(CORENRN_ENABLE_MPI AND CORENRN_ENABLE_MPI_DYNAMIC) # main coreneuron library needs to be linked to libdl.so and # and should be aware of shared library suffix on different platforms. # ~~~ - target_link_libraries(coreneuron ${CMAKE_DL_LIBS}) + target_link_libraries(coreneuron-core ${CMAKE_DL_LIBS}) # store mpi library targets that will be built list(APPEND corenrn_mpi_targets "") @@ -215,9 +238,9 @@ if(CORENRN_ENABLE_MPI AND CORENRN_ENABLE_MPI_DYNAMIC) # when we will test coreneuron on windows. # ~~~ if(MINGW) # type msmpi only - add_dependencies(core${libname}_lib coreneuron) + add_dependencies(core${libname}_lib coreneuron-core) target_link_libraries(core${libname}_lib ${MPI_C_LIBRARIES}) - target_link_libraries(core${libname}_lib coreneuron) + target_link_libraries(core${libname}_lib coreneuron-core) endif() set_property(TARGET core${libname}_lib PROPERTY OUTPUT_NAME core${libname}) list(APPEND corenrn_mpi_targets "core${libname}_lib") @@ -234,28 +257,26 @@ endif() # Prevent CMake from running a device code link step when assembling libcoreneuron.a in GPU builds. # The device code linking needs to be deferred to the final step, where it is done by `nvc++ -cuda`. -set_target_properties(coreneuron PROPERTIES CUDA_SEPARABLE_COMPILATION ON) -# Suppress some compiler warnings. Note in GPU builds this library includes CUDA files. -target_compile_options(coreneuron +#set_target_properties(coreneuron-core PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF CUDA_SEPARABLE_COMPILATION ON) +#if(TARGET coreneuron-cuda) + # set_target_properties(coreneuron-cuda PROPERTIES ) +#endif() +# Suppress some compiler warnings. TODO no it doesn't: Note in GPU builds this library includes CUDA files. +target_compile_options(coreneuron-core PRIVATE $<$:${CORENEURON_CXX_WARNING_SUPPRESSIONS}>) -add_dependencies(coreneuron nrnivmodl-core) +add_dependencies(coreneuron-core nrnivmodl-core) # TODO why? -# scopmath is created separately for nrnivmodl-core workflow -add_library(scopmath ${COMPILE_LIBRARY_TYPE} ${CORENEURON_HEADER_FILES} ${SCOPMATH_CODE_FILES}) -target_include_directories(scopmath PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR} - ${CORENEURON_PROJECT_BINARY_DIR}/generated) - -target_link_libraries(coreneuron ${reportinglib_LIBRARY} ${sonatareport_LIBRARY} ${CALIPER_LIB} +target_link_libraries(coreneuron-core ${reportinglib_LIBRARY} ${sonatareport_LIBRARY} ${CALIPER_LIB} ${likwid_LIBRARIES}) -target_include_directories(coreneuron SYSTEM +target_include_directories(coreneuron-core SYSTEM PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR}/external/Random123/include) -target_include_directories(coreneuron SYSTEM +target_include_directories(coreneuron-core SYSTEM PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR}/external/CLI11/include) # See: https://en.cppreference.com/w/cpp/filesystem#Notes if(CMAKE_CXX_COMPILER_IS_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.1) - target_link_libraries(coreneuron stdc++fs) + target_link_libraries(coreneuron-core stdc++fs) endif() if(CORENRN_ENABLE_GPU) @@ -263,17 +284,17 @@ if(CORENRN_ENABLE_GPU) find_package(Boost QUIET) if(Boost_FOUND) message(STATUS "Boost found, enabling use of memory pools for Random123...") - target_include_directories(coreneuron SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) - target_compile_definitions(coreneuron PRIVATE CORENEURON_USE_BOOST_POOL) + target_include_directories(coreneuron-core SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) + target_compile_definitions(coreneuron-core PRIVATE CORENEURON_USE_BOOST_POOL) endif() endif() set_target_properties( - coreneuron scopmath + coreneuron-core ${coreneuron_cuda_target} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib POSITION_INDEPENDENT_CODE ON) -cpp_cc_configure_sanitizers(TARGET coreneuron scopmath ${corenrn_mpi_targets}) +cpp_cc_configure_sanitizers(TARGET coreneuron-core ${coreneuron_cuda_target} ${corenrn_mpi_targets}) # ============================================================================= # create special-core with halfgap.mod for tests @@ -283,11 +304,11 @@ file(GLOB modfiles "${modfile_directory}/*.mod") if(CORENRN_ENABLE_SHARED) set(corenrn_mech_library - "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}/libcorenrnmech${CMAKE_SHARED_LIBRARY_SUFFIX}" + "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}/libcoreneuron${CMAKE_SHARED_LIBRARY_SUFFIX}" CACHE INTERNAL "coreneuron mechanism library") else() set(corenrn_mech_library - "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}/libcorenrnmech${CMAKE_SHARED_LIBRARY_SUFFIX}" + "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}/libcoreneuron${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE INTERNAL "coreneuron mechanism library") endif() @@ -296,7 +317,7 @@ set(output_binaries "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}/special-c add_custom_command( OUTPUT ${output_binaries} - DEPENDS scopmath coreneuron ${NMODL_TARGET_TO_DEPEND} ${modfiles} ${CORENEURON_BUILTIN_MODFILES} + DEPENDS coreneuron-core ${coreneuron_cuda_target} ${NMODL_TARGET_TO_DEPEND} ${modfiles} ${CORENEURON_BUILTIN_MODFILES} COMMAND ${CMAKE_BINARY_DIR}/bin/nrnivmodl-core -b ${COMPILE_LIBRARY_TYPE} -m ${CORENRN_MOD2CPP_BINARY} -p 4 "${modfile_directory}" WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/bin @@ -305,14 +326,18 @@ add_custom_target(nrniv-core ALL DEPENDS ${output_binaries}) if(CORENRN_ENABLE_GPU) separate_arguments(CORENRN_ACC_FLAGS UNIX_COMMAND "${NVHPC_ACC_COMP_FLAGS}") - target_compile_options(coreneuron BEFORE PRIVATE $<$:${CORENRN_ACC_FLAGS}>) - target_compile_options(scopmath BEFORE PRIVATE $<$:${CORENRN_ACC_FLAGS}>) + target_compile_options(coreneuron-core BEFORE PRIVATE $<$:${CORENRN_ACC_FLAGS}>) endif() +# Create an extra target for internal use that unit tests and so on can depend +# on +add_library(coreneuron-all INTERFACE) +target_link_libraries(coreneuron-all INTERFACE coreneuron-core ${coreneuron_cuda_target} "${corenrn_mech_library}") + # ============================================================================= # Extract link definitions to be used with nrnivmodl-core # ============================================================================= -get_target_property(CORENRN_LINK_LIBS coreneuron LINK_LIBRARIES) +get_target_property(CORENRN_LINK_LIBS coreneuron-core LINK_LIBRARIES) if(NOT CORENRN_LINK_LIBS) set(CORENRN_LINK_LIBS "") endif() @@ -346,18 +371,19 @@ file(COPY apps/coreneuron.cpp DESTINATION ${CMAKE_BINARY_DIR}/share/coreneuron) # coreneuron main libraries install( - TARGETS coreneuron + TARGETS coreneuron-core EXPORT coreneuron LIBRARY DESTINATION lib ARCHIVE DESTINATION lib INCLUDES DESTINATION $) -# scopemath into share for nrnivmodl-core -install( - TARGETS scopmath - EXPORT coreneuron - DESTINATION lib) +if(TARGET coreneuron-cuda) + install(TARGETS coreneuron-cuda + EXPORT coreneuron + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) +endif() # headers and some standalone code files for nrnivmodl-core install( diff --git a/coreneuron/permute/cellorder.cpp b/coreneuron/permute/cellorder.cpp index 2c2fca92e..c95fedcf2 100644 --- a/coreneuron/permute/cellorder.cpp +++ b/coreneuron/permute/cellorder.cpp @@ -576,7 +576,7 @@ void solve_interleaved2(int ith) { if (corenrn_param.gpu && corenrn_param.cuda_interface) { auto* d_nt = static_cast(acc_deviceptr(nt)); auto* d_info = static_cast(acc_deviceptr(interleave_info + ith)); - // solve_interleaved2_launcher(d_nt, d_info, ncore, acc_get_cuda_stream(nt->stream_id)); + solve_interleaved2_launcher(d_nt, d_info, ncore, acc_get_cuda_stream(nt->stream_id)); } else { #endif int* ncycles = ii.cellsize; // nwarp of these diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index c26066e4c..73ed76738 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -104,7 +104,7 @@ ifeq (@CORENRN_ENABLE_NMODL@, ON) endif # name of the mechanism library with suffix if provided -COREMECH_LIB_NAME = corenrnmech$(if $(MECHLIB_SUFFIX),_$(MECHLIB_SUFFIX),) +COREMECH_LIB_NAME = coreneuron$(if $(MECHLIB_SUFFIX),_$(MECHLIB_SUFFIX),) COREMECH_LIB_PATH = $(OUTPUT_DIR)/lib$(COREMECH_LIB_NAME)$(LIB_SUFFIX) # Various header and C++/Object file @@ -114,8 +114,8 @@ ENGINEMECH_OBJ = $(MOD_OBJS_DIR)/enginemech.o # Depending on static/shared build, determine library name and it's suffix ifeq ($(TARGET_LIB_TYPE), STATIC) - LIB_SUFFIX = @CMAKE_SHARED_LIBRARY_SUFFIX@ - corenrnmech_lib_target = coremech_lib_shared + LIB_SUFFIX = @CMAKE_STATIC_LIBRARY_SUFFIX@ + corenrnmech_lib_target = coremech_lib_static else LIB_SUFFIX = @CMAKE_SHARED_LIBRARY_SUFFIX@ corenrnmech_lib_target = coremech_lib_shared @@ -211,7 +211,7 @@ $(SPECIAL_EXE): coremech_lib_target @printf " => $(C_GREEN)Binary$(C_RESET) creating $(SPECIAL_EXE)\n" $(CXX_LINK_EXE_CMD) -o $(SPECIAL_EXE) $(CORENRN_SHARE_CORENRN_DIR)/coreneuron.cpp \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ - -L$(OUTPUT_DIR) -l$(COREMECH_LIB_NAME) $(CORENRNLIB_FLAGS) $(LDFLAGS) \ + -L$(OUTPUT_DIR) -l$(COREMECH_LIB_NAME) -lcoreneuron-cuda $(CORENRNLIB_FLAGS) $(LDFLAGS) \ -L$(CORENRN_LIB_DIR) \ -Wl,-rpath,'$(LIB_RPATH)' -Wl,-rpath,$(CORENRN_LIB_DIR) -Wl,-rpath,'$(INSTALL_LIB_RPATH)' @@ -225,18 +225,17 @@ $(ENGINEMECH_OBJ): $(CORENRN_SHARE_CORENRN_DIR)/enginemech.cpp | $(MOD_OBJS_DIR) # build shared library of mechanisms coremech_lib_shared: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always - # extract the object files from libcoreneuron.a - mkdir -p $(MOD_OBJS_DIR)/libcoreneuron - ar --output=$(MOD_OBJS_DIR)/libcoreneuron x $(CORENRN_LIB_DIR)/libcoreneuron.a - # extract the object files from libscopmath.a - mkdir -p $(MOD_OBJS_DIR)/libscopmath - ar --output=$(MOD_OBJS_DIR)/libscopmath x $(CORENRN_LIB_DIR)/libscopmath.a + # extract the object files from libcoreneuron-core.a + mkdir -p $(MOD_OBJS_DIR)/libcoreneuron-core + rm -f $(MOD_OBJS_DIR)/libcoreneuron-core/*.o + ar --output=$(MOD_OBJS_DIR)/libcoreneuron-core x $(CORENRN_LIB_DIR)/libcoreneuron-core.a $(CXX_SHARED_LIB_CMD) $(ENGINEMECH_OBJ) -o ${COREMECH_LIB_PATH} $(ALL_OBJS) \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ $(LDFLAGS) ${SONAME_OPTION} -Wl,--start-group \ - $(MOD_OBJS_DIR)/libcoreneuron/*.o \ - -Wl,--end-group -Wl,--start-group $(MOD_OBJS_DIR)/libscopmath/*.o \ - -Wl,--end-group $(CORENRNLIB_FLAGS) -Wl,-rpath,$(CORENRN_LIB_DIR); + $(MOD_OBJS_DIR)/libcoreneuron-core/*.o \ + -Wl,--end-group $(CORENRNLIB_FLAGS) -Wl,-rpath,$(CORENRN_LIB_DIR) + # cleanup + rm $(MOD_OBJS_DIR)/libcoreneuron-core/*.o # build static library of mechanisms coremech_lib_static: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always diff --git a/tests/unit/cmdline_interface/CMakeLists.txt b/tests/unit/cmdline_interface/CMakeLists.txt index cd177c521..856ce0779 100644 --- a/tests/unit/cmdline_interface/CMakeLists.txt +++ b/tests/unit/cmdline_interface/CMakeLists.txt @@ -4,14 +4,13 @@ # See top-level LICENSE file for details. # ============================================================================= add_executable(cmd_interface_test_bin test_cmdline_interface.cpp) -target_link_libraries(cmd_interface_test_bin ${MPI_CXX_LIBRARIES} coreneuron - ${corenrn_mech_library} ${reportinglib_LIBRARY} ${sonatareport_LIBRARY}) +target_link_libraries(cmd_interface_test_bin coreneuron-all) target_include_directories(cmd_interface_test_bin SYSTEM PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR}/external/CLI11/include) -add_dependencies(cmd_interface_test_bin nrniv-core) +#add_dependencies(cmd_interface_test_bin nrniv-core) # Tell CMake *not* to run an explicit device code linker step (which will produce errors); let the # NVHPC C++ compiler handle this implicitly. -set_target_properties(cmd_interface_test_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) +#set_target_properties(cmd_interface_test_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) target_compile_options(cmd_interface_test_bin PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) add_test(NAME cmd_interface_test COMMAND ${TEST_EXEC_PREFIX} $) cpp_cc_configure_sanitizers(TARGET cmd_interface_test_bin TEST cmd_interface_test) diff --git a/tests/unit/interleave_info/CMakeLists.txt b/tests/unit/interleave_info/CMakeLists.txt index ce69b097e..153fc1f75 100644 --- a/tests/unit/interleave_info/CMakeLists.txt +++ b/tests/unit/interleave_info/CMakeLists.txt @@ -4,13 +4,12 @@ # See top-level LICENSE file for details. # ============================================================================= add_executable(interleave_info_bin check_constructors.cpp) -target_link_libraries(interleave_info_bin ${MPI_CXX_LIBRARIES} coreneuron ${corenrn_mech_library} - ${reportinglib_LIBRARY} ${sonatareport_LIBRARY}) -add_dependencies(interleave_info_bin nrniv-core) +target_link_libraries(interleave_info_bin coreneuron-all) # Tell CMake *not* to run an explicit device code linker step (which will produce errors); let the # NVHPC C++ compiler handle this implicitly. -set_target_properties(interleave_info_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) -target_compile_options(interleave_info_bin PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) +#set_target_properties(interleave_info_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) +#separate_arguments(CORENRN_ACC_FLAGS UNIX_COMMAND "${NVHPC_ACC_COMP_FLAGS}") +target_compile_options(interleave_info_bin PRIVATE ${CORENRN_ACC_FLAGS} ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) add_test(NAME interleave_info_constructor_test COMMAND ${TEST_EXEC_PREFIX} $) cpp_cc_configure_sanitizers(TARGET interleave_info_bin TEST interleave_info_constructor_test) diff --git a/tests/unit/lfp/CMakeLists.txt b/tests/unit/lfp/CMakeLists.txt index 61d749aa9..fc716fbc0 100644 --- a/tests/unit/lfp/CMakeLists.txt +++ b/tests/unit/lfp/CMakeLists.txt @@ -3,18 +3,12 @@ # # See top-level LICENSE file for details. # ============================================================================= - -include_directories(${CMAKE_SOURCE_DIR}/coreneuron ${Boost_INCLUDE_DIRS}) -file(GLOB lfp_test_src "*.cpp") - -add_executable(lfp_test_bin ${lfp_test_src}) -target_link_libraries(lfp_test_bin ${MPI_CXX_LIBRARIES} coreneuron ${corenrn_mech_library} - ${reportinglib_LIBRARY} ${sonatareport_LIBRARY}) +add_executable(lfp_test_bin lfp.cpp) +target_link_libraries(lfp_test_bin coreneuron-all) # Tell CMake *not* to run an explicit device code linker step (which will produce errors); let the # NVHPC C++ compiler handle this implicitly. -set_target_properties(lfp_test_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) -target_compile_options(lfp_test_bin PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) -add_dependencies(lfp_test_bin nrniv-core) +#set_target_properties(lfp_test_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) +#add_dependencies(lfp_test_bin nrniv-core) add_test(NAME lfp_test COMMAND ${TEST_EXEC_PREFIX} $) cpp_cc_configure_sanitizers(TARGET lfp_test_bin TEST lfp_test) set_property( diff --git a/tests/unit/queueing/CMakeLists.txt b/tests/unit/queueing/CMakeLists.txt index ba3725d32..b7d3f46a6 100644 --- a/tests/unit/queueing/CMakeLists.txt +++ b/tests/unit/queueing/CMakeLists.txt @@ -4,12 +4,10 @@ # See top-level LICENSE file for details. # ============================================================================= add_executable(queuing_test_bin test_queueing.cpp) -target_link_libraries(queuing_test_bin ${Boost_SYSTEM_LIBRARY} coreneuron ${corenrn_mech_library} - ${reportinglib_LIBRARY} ${sonatareport_LIBRARY}) -add_dependencies(queuing_test_bin nrniv-core) +target_link_libraries(queuing_test_bin coreneuron-all ${Boost_SYSTEM_LIBRARY}) # Tell CMake *not* to run an explicit device code linker step (which will produce errors); let the # NVHPC C++ compiler handle this implicitly. -set_target_properties(queuing_test_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) +#set_target_properties(queuing_test_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) target_compile_options(queuing_test_bin PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) add_test(NAME queuing_test COMMAND ${TEST_EXEC_PREFIX} $) cpp_cc_configure_sanitizers(TARGET queuing_test_bin TEST queuing_test) diff --git a/tests/unit/solver/CMakeLists.txt b/tests/unit/solver/CMakeLists.txt index 1d01ea4b1..77d46a464 100644 --- a/tests/unit/solver/CMakeLists.txt +++ b/tests/unit/solver/CMakeLists.txt @@ -6,14 +6,14 @@ include_directories(${CMAKE_SOURCE_DIR}/coreneuron ${Boost_INCLUDE_DIRS}) add_executable(test-solver test_solver.cpp) -target_link_libraries(test-solver coreneuron ${corenrn_mech_library}) +target_link_libraries(test-solver coreneuron-all) target_include_directories(test-solver SYSTEM PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR}/external/CLI11/include) # Tell CMake *not* to run an explicit device code linker step (which will produce errors); let the # NVHPC C++ compiler handle this implicitly. -set_target_properties(test-solver PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) +#set_target_properties(test-solver PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) target_compile_options(test-solver PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) -add_dependencies(test-solver nrniv-core) +#add_dependencies(test-solver nrniv-core) add_test(NAME test-solver COMMAND $) cpp_cc_configure_sanitizers(TARGET test-solver TEST test-solver) diff --git a/tests/unit/solver/test_solver.cpp b/tests/unit/solver/test_solver.cpp index b797ac711..c1021bcb7 100644 --- a/tests/unit/solver/test_solver.cpp +++ b/tests/unit/solver/test_solver.cpp @@ -275,7 +275,7 @@ auto active_implementations() { ret.push_back(SolverImplementation::CellPermute0_GPU); ret.push_back(SolverImplementation::CellPermute1_GPU); ret.push_back(SolverImplementation::CellPermute2_GPU); - // ret.push_back(SolverImplementation::CellPermute2_CUDA); + ret.push_back(SolverImplementation::CellPermute2_CUDA); #endif return ret; } From 6bb5fe93d2d1125214e5257bbb74d923e965b031 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 19 Jul 2022 10:48:41 +0200 Subject: [PATCH 032/128] cleanup unit test cmake --- tests/CMakeLists.txt | 5 ++++- tests/unit/cmdline_interface/CMakeLists.txt | 7 ++----- tests/unit/interleave_info/CMakeLists.txt | 7 +------ tests/unit/lfp/CMakeLists.txt | 6 +----- tests/unit/queueing/CMakeLists.txt | 6 +----- tests/unit/solver/CMakeLists.txt | 14 ++------------ 6 files changed, 11 insertions(+), 34 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e9cacd422..b3d8a30f1 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -27,7 +27,10 @@ find_package(Boost 1.59 QUIET COMPONENTS filesystem system atomic unit_test_fram if(Boost_FOUND) if(CORENRN_ENABLE_UNIT_TESTS) - include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) + add_library(coreneuron-unit-test INTERFACE) + target_compile_options(coreneuron-unit-test INTERFACE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) + target_include_directories(coreneuron-unit-test SYSTEM INTERFACE ${Boost_INCLUDE_DIRS} ${CORENEURON_PROJECT_SOURCE_DIR}/external/CLI11/include) + target_link_libraries(coreneuron-unit-test INTERFACE coreneuron-all ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}) add_subdirectory(unit/cmdline_interface) add_subdirectory(unit/interleave_info) add_subdirectory(unit/alignment) diff --git a/tests/unit/cmdline_interface/CMakeLists.txt b/tests/unit/cmdline_interface/CMakeLists.txt index 856ce0779..26f0b62c7 100644 --- a/tests/unit/cmdline_interface/CMakeLists.txt +++ b/tests/unit/cmdline_interface/CMakeLists.txt @@ -4,13 +4,10 @@ # See top-level LICENSE file for details. # ============================================================================= add_executable(cmd_interface_test_bin test_cmdline_interface.cpp) -target_link_libraries(cmd_interface_test_bin coreneuron-all) -target_include_directories(cmd_interface_test_bin SYSTEM - PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR}/external/CLI11/include) -#add_dependencies(cmd_interface_test_bin nrniv-core) +target_link_libraries(cmd_interface_test_bin coreneuron-unit-test) # Tell CMake *not* to run an explicit device code linker step (which will produce errors); let the # NVHPC C++ compiler handle this implicitly. #set_target_properties(cmd_interface_test_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) -target_compile_options(cmd_interface_test_bin PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) +#target_compile_options(cmd_interface_test_bin PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) add_test(NAME cmd_interface_test COMMAND ${TEST_EXEC_PREFIX} $) cpp_cc_configure_sanitizers(TARGET cmd_interface_test_bin TEST cmd_interface_test) diff --git a/tests/unit/interleave_info/CMakeLists.txt b/tests/unit/interleave_info/CMakeLists.txt index 153fc1f75..948f32405 100644 --- a/tests/unit/interleave_info/CMakeLists.txt +++ b/tests/unit/interleave_info/CMakeLists.txt @@ -4,12 +4,7 @@ # See top-level LICENSE file for details. # ============================================================================= add_executable(interleave_info_bin check_constructors.cpp) -target_link_libraries(interleave_info_bin coreneuron-all) -# Tell CMake *not* to run an explicit device code linker step (which will produce errors); let the -# NVHPC C++ compiler handle this implicitly. -#set_target_properties(interleave_info_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) -#separate_arguments(CORENRN_ACC_FLAGS UNIX_COMMAND "${NVHPC_ACC_COMP_FLAGS}") -target_compile_options(interleave_info_bin PRIVATE ${CORENRN_ACC_FLAGS} ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) +target_link_libraries(interleave_info_bin coreneuron-unit-test) add_test(NAME interleave_info_constructor_test COMMAND ${TEST_EXEC_PREFIX} $) cpp_cc_configure_sanitizers(TARGET interleave_info_bin TEST interleave_info_constructor_test) diff --git a/tests/unit/lfp/CMakeLists.txt b/tests/unit/lfp/CMakeLists.txt index fc716fbc0..8b5b201c4 100644 --- a/tests/unit/lfp/CMakeLists.txt +++ b/tests/unit/lfp/CMakeLists.txt @@ -4,11 +4,7 @@ # See top-level LICENSE file for details. # ============================================================================= add_executable(lfp_test_bin lfp.cpp) -target_link_libraries(lfp_test_bin coreneuron-all) -# Tell CMake *not* to run an explicit device code linker step (which will produce errors); let the -# NVHPC C++ compiler handle this implicitly. -#set_target_properties(lfp_test_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) -#add_dependencies(lfp_test_bin nrniv-core) +target_link_libraries(lfp_test_bin coreneuron-unit-test) add_test(NAME lfp_test COMMAND ${TEST_EXEC_PREFIX} $) cpp_cc_configure_sanitizers(TARGET lfp_test_bin TEST lfp_test) set_property( diff --git a/tests/unit/queueing/CMakeLists.txt b/tests/unit/queueing/CMakeLists.txt index b7d3f46a6..fc653ea98 100644 --- a/tests/unit/queueing/CMakeLists.txt +++ b/tests/unit/queueing/CMakeLists.txt @@ -4,10 +4,6 @@ # See top-level LICENSE file for details. # ============================================================================= add_executable(queuing_test_bin test_queueing.cpp) -target_link_libraries(queuing_test_bin coreneuron-all ${Boost_SYSTEM_LIBRARY}) -# Tell CMake *not* to run an explicit device code linker step (which will produce errors); let the -# NVHPC C++ compiler handle this implicitly. -#set_target_properties(queuing_test_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) -target_compile_options(queuing_test_bin PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) +target_link_libraries(queuing_test_bin coreneuron-unit-test) add_test(NAME queuing_test COMMAND ${TEST_EXEC_PREFIX} $) cpp_cc_configure_sanitizers(TARGET queuing_test_bin TEST queuing_test) diff --git a/tests/unit/solver/CMakeLists.txt b/tests/unit/solver/CMakeLists.txt index 77d46a464..01e058525 100644 --- a/tests/unit/solver/CMakeLists.txt +++ b/tests/unit/solver/CMakeLists.txt @@ -1,19 +1,9 @@ # ============================================================================= -# Copyright (C) 2022 Blue Brain Project +# Copyright (c) 2022 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= - -include_directories(${CMAKE_SOURCE_DIR}/coreneuron ${Boost_INCLUDE_DIRS}) add_executable(test-solver test_solver.cpp) -target_link_libraries(test-solver coreneuron-all) -target_include_directories(test-solver SYSTEM - PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR}/external/CLI11/include) - -# Tell CMake *not* to run an explicit device code linker step (which will produce errors); let the -# NVHPC C++ compiler handle this implicitly. -#set_target_properties(test-solver PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) -target_compile_options(test-solver PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) -#add_dependencies(test-solver nrniv-core) +target_link_libraries(test-solver coreneuron-unit-test) add_test(NAME test-solver COMMAND $) cpp_cc_configure_sanitizers(TARGET test-solver TEST test-solver) From bddef616f76d3d58f84eee5fa54498febb03a6ba Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 19 Jul 2022 10:58:36 +0200 Subject: [PATCH 033/128] more cmake --- coreneuron/CMakeLists.txt | 105 ++++++++++---------- tests/CMakeLists.txt | 10 +- tests/integration/CMakeLists.txt | 1 - tests/unit/cmdline_interface/CMakeLists.txt | 4 - 4 files changed, 57 insertions(+), 63 deletions(-) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 28411fd53..6a4a7136b 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -106,18 +106,16 @@ if(CORENRN_ENABLE_GPU) set_source_files_properties(${OPENACC_EXCLUDED_FILES} PROPERTIES COMPILE_FLAGS "-DDISABLE_OPENACC") - # Only compile the explicit CUDA implementation of the Hines solver in GPU - # builds. Because of + # Only compile the explicit CUDA implementation of the Hines solver in GPU builds. Because of # https://forums.developer.nvidia.com/t/cannot-dynamically-load-a-shared-library-containing-both-openacc-and-cuda-code/210972 - # this cannot be included in the same shared library as the rest of the - # OpenACC code. + # this cannot be included in the same shared library as the rest of the OpenACC code. set(CORENEURON_CUDA_FILES ${CMAKE_CURRENT_SOURCE_DIR}/permute/cellorder.cu) # Eigen-3.5+ provides better GPU support. However, some functions cannot be called directly from # within an OpenACC region. Therefore, we need to wrap them in a special API (decorate them with # __device__ & acc routine tokens), which allows us to eventually call them from OpenACC. Calling - # these functions from CUDA kernels presents no issue ... - # TODO is it going to work to call these from libcoreneuron-cuda.so? probably not... + # these functions from CUDA kernels presents no issue ... TODO is it going to work to call these + # from libcoreneuron-cuda.so? probably not... if(CORENRN_ENABLE_NMODL AND EXISTS ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) list(APPEND CORENEURON_CUDA_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) endif() @@ -147,29 +145,26 @@ if(CORENRN_ENABLE_MPI AND NOT CORENRN_ENABLE_MPI_DYNAMIC) set(CORENRN_MPI_OBJ $) endif() -# Library containing the bulk of the non-mechanism CoreNEURON code. This is -# always created and installed as a static library, and then the nrnivmodl-core -# workflow extracts the object files from it and does one of the following: +# Library containing the bulk of the non-mechanism CoreNEURON code. This is always created and +# installed as a static library, and then the nrnivmodl-core workflow extracts the object files from +# it and does one of the following: # ~~~ # - shared build: creates libcoreneuron.so from these objects plus those from # the translated MOD files -# - static build: creates a (temporary) libcoreneuron.a from these objects plus -# those from the translated MOD files, then statically links that into -# special-core (nrniv-core) +# - static build: creates a (temporary, does not get installed) libcoreneuron.a +# from these objects plus those from the translated MOD files, then +# statically links that into special-core (nrniv-core) # ~~~ -# This scheme means that both core and mechanism .o files are linked in a single -# step, which is important for GPU linking. It does, however, mean that in a -# shared library CPU build then the core code is installed twice, once in -# libcoreneuron-core.a and once in the libcoreneuron.so that contains the -# default mechanisms for the installed nrniv-core binary. In a GPU build, -# libcoreneuron-cuda.{a,so} is also linked to provide the CUDA implementation of -# the Hines solver. -add_library( - coreneuron-core - STATIC - ${CORENEURON_CODE_FILES} - ${CORENRN_MPI_OBJ}) - +# This scheme means that both core and mechanism .o files are linked in a single step, which is +# important for GPU linking. It does, however, mean that in a shared library CPU build then the core +# code is installed twice, once in libcoreneuron-core.a and once in the libcoreneuron.so that +# contains the default mechanisms for the installed nrniv-core binary. In a GPU build, +# libcoreneuron-cuda.{a,so} is also linked to provide the CUDA implementation of the Hines solver. +add_library(coreneuron-core STATIC ${CORENEURON_CODE_FILES} ${CORENRN_MPI_OBJ}) + +# Library containing explicit CUDA code, compiled by nvcc. This cannot be included in +# coreneuron-core because of this issue: +# https://forums.developer.nvidia.com/t/cannot-dynamically-load-a-shared-library-containing-both-openacc-and-cuda-code/210972 if(CORENRN_ENABLE_GPU) set(coreneuron_cuda_target coreneuron-cuda) add_library(coreneuron-cuda ${COMPILE_LIBRARY_TYPE} ${CORENEURON_CUDA_FILES}) @@ -257,17 +252,17 @@ endif() # Prevent CMake from running a device code link step when assembling libcoreneuron.a in GPU builds. # The device code linking needs to be deferred to the final step, where it is done by `nvc++ -cuda`. -#set_target_properties(coreneuron-core PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF CUDA_SEPARABLE_COMPILATION ON) -#if(TARGET coreneuron-cuda) - # set_target_properties(coreneuron-cuda PROPERTIES ) -#endif() -# Suppress some compiler warnings. TODO no it doesn't: Note in GPU builds this library includes CUDA files. +# set_target_properties(coreneuron-core PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF +# CUDA_SEPARABLE_COMPILATION ON) if(TARGET coreneuron-cuda) +# set_target_properties(coreneuron-cuda PROPERTIES ) +# endif() Suppress some compiler warnings. TODO no it doesn't: Note in GPU builds this library +# includes CUDA files. target_compile_options(coreneuron-core PRIVATE $<$:${CORENEURON_CXX_WARNING_SUPPRESSIONS}>) add_dependencies(coreneuron-core nrnivmodl-core) # TODO why? -target_link_libraries(coreneuron-core ${reportinglib_LIBRARY} ${sonatareport_LIBRARY} ${CALIPER_LIB} - ${likwid_LIBRARIES}) +target_link_libraries(coreneuron-core ${reportinglib_LIBRARY} ${sonatareport_LIBRARY} + ${CALIPER_LIB} ${likwid_LIBRARIES}) target_include_directories(coreneuron-core SYSTEM PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR}/external/Random123/include) @@ -302,22 +297,17 @@ cpp_cc_configure_sanitizers(TARGET coreneuron-core ${coreneuron_cuda_target} ${c set(modfile_directory "${CORENEURON_PROJECT_SOURCE_DIR}/tests/integration/ring_gap/mod files") file(GLOB modfiles "${modfile_directory}/*.mod") -if(CORENRN_ENABLE_SHARED) - set(corenrn_mech_library - "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}/libcoreneuron${CMAKE_SHARED_LIBRARY_SUFFIX}" - CACHE INTERNAL "coreneuron mechanism library") -else() - set(corenrn_mech_library - "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}/libcoreneuron${CMAKE_STATIC_LIBRARY_SUFFIX}" - CACHE INTERNAL "coreneuron mechanism library") -endif() - -set(output_binaries "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}/special-core" - "${corenrn_mech_library}") +# We have to link things like unit tests against this because some "core" .cpp files refer to +# symbols in the translated versions of default .mod files +set(nrniv_core_prefix "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}") +set(corenrn_mech_library + "${nrniv_core_prefix}/libcoreneuron${CMAKE_${COMPILE_LIBRARY_TYPE}_LIBRARY_SUFFIX}") +set(output_binaries "${nrniv_core_prefix}/special-core" "${corenrn_mech_library}") add_custom_command( OUTPUT ${output_binaries} - DEPENDS coreneuron-core ${coreneuron_cuda_target} ${NMODL_TARGET_TO_DEPEND} ${modfiles} ${CORENEURON_BUILTIN_MODFILES} + DEPENDS coreneuron-core ${coreneuron_cuda_target} ${NMODL_TARGET_TO_DEPEND} ${modfiles} + ${CORENEURON_BUILTIN_MODFILES} COMMAND ${CMAKE_BINARY_DIR}/bin/nrnivmodl-core -b ${COMPILE_LIBRARY_TYPE} -m ${CORENRN_MOD2CPP_BINARY} -p 4 "${modfile_directory}" WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/bin @@ -326,13 +316,14 @@ add_custom_target(nrniv-core ALL DEPENDS ${output_binaries}) if(CORENRN_ENABLE_GPU) separate_arguments(CORENRN_ACC_FLAGS UNIX_COMMAND "${NVHPC_ACC_COMP_FLAGS}") - target_compile_options(coreneuron-core BEFORE PRIVATE $<$:${CORENRN_ACC_FLAGS}>) + target_compile_options(coreneuron-core BEFORE + PRIVATE $<$:${CORENRN_ACC_FLAGS}>) endif() -# Create an extra target for internal use that unit tests and so on can depend -# on +# Create an extra target for internal use that unit tests and so on can depend on add_library(coreneuron-all INTERFACE) -target_link_libraries(coreneuron-all INTERFACE coreneuron-core ${coreneuron_cuda_target} "${corenrn_mech_library}") +target_link_libraries(coreneuron-all INTERFACE coreneuron-core ${coreneuron_cuda_target} + "${corenrn_mech_library}") # ============================================================================= # Extract link definitions to be used with nrnivmodl-core @@ -379,10 +370,11 @@ install( DESTINATION $) if(TARGET coreneuron-cuda) - install(TARGETS coreneuron-cuda - EXPORT coreneuron - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib) + install( + TARGETS coreneuron-cuda + EXPORT coreneuron + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) endif() # headers and some standalone code files for nrnivmodl-core @@ -409,8 +401,11 @@ install( RENAME nrniv-core) install(FILES apps/coreneuron.cpp DESTINATION share/coreneuron) -# install mechanism library -install(FILES ${corenrn_mech_library} DESTINATION lib) +# install mechanism library in shared library builds, if we're linking statically then there is no +# need +if(CORENRN_ENABLE_SHARED) + install(FILES ${corenrn_mech_library} DESTINATION lib) +endif() # install random123 and nmodl headers install(DIRECTORY ${CMAKE_BINARY_DIR}/include/ DESTINATION include) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b3d8a30f1..ea8052d7b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -28,9 +28,13 @@ find_package(Boost 1.59 QUIET COMPONENTS filesystem system atomic unit_test_fram if(Boost_FOUND) if(CORENRN_ENABLE_UNIT_TESTS) add_library(coreneuron-unit-test INTERFACE) - target_compile_options(coreneuron-unit-test INTERFACE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) - target_include_directories(coreneuron-unit-test SYSTEM INTERFACE ${Boost_INCLUDE_DIRS} ${CORENEURON_PROJECT_SOURCE_DIR}/external/CLI11/include) - target_link_libraries(coreneuron-unit-test INTERFACE coreneuron-all ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}) + target_compile_options(coreneuron-unit-test + INTERFACE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) + target_include_directories( + coreneuron-unit-test SYSTEM INTERFACE ${Boost_INCLUDE_DIRS} + ${CORENEURON_PROJECT_SOURCE_DIR}/external/CLI11/include) + target_link_libraries(coreneuron-unit-test INTERFACE coreneuron-all + ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}) add_subdirectory(unit/cmdline_interface) add_subdirectory(unit/interleave_info) add_subdirectory(unit/alignment) diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index fa488028e..75ae106e1 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -86,7 +86,6 @@ foreach(data_dir "ring" "ring_gap") math(EXPR num_suffixes_m1 "${num_suffixes} - 1") foreach(suffix_index RANGE 0 ${num_suffixes_m1}) list(GET test_suffixes ${suffix_index} test_suffix) - message(STATUS "test_suffix=${test_suffix}") file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/${data_dir}/out.dat.ref" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/${data_dir}${test_suffix}/") endforeach() diff --git a/tests/unit/cmdline_interface/CMakeLists.txt b/tests/unit/cmdline_interface/CMakeLists.txt index 26f0b62c7..fadbe60a3 100644 --- a/tests/unit/cmdline_interface/CMakeLists.txt +++ b/tests/unit/cmdline_interface/CMakeLists.txt @@ -5,9 +5,5 @@ # ============================================================================= add_executable(cmd_interface_test_bin test_cmdline_interface.cpp) target_link_libraries(cmd_interface_test_bin coreneuron-unit-test) -# Tell CMake *not* to run an explicit device code linker step (which will produce errors); let the -# NVHPC C++ compiler handle this implicitly. -#set_target_properties(cmd_interface_test_bin PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF) -#target_compile_options(cmd_interface_test_bin PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) add_test(NAME cmd_interface_test COMMAND ${TEST_EXEC_PREFIX} $) cpp_cc_configure_sanitizers(TARGET cmd_interface_test_bin TEST cmd_interface_test) From 745676638d90d3e93806b9947e622e8f5bb6c71c Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 19 Jul 2022 11:20:37 +0200 Subject: [PATCH 034/128] cleanup --- CMakeLists.txt | 3 --- coreneuron/CMakeLists.txt | 21 +++++++-------------- coreneuron/mechanism/mech/enginemech.cpp | 2 +- extra/nrnivmodl-core.in | 2 +- extra/nrnivmodl_core_makefile.in | 2 +- 5 files changed, 10 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ef147a41..e128652a0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,9 +171,6 @@ set(CORENRN_ACCELERATOR_OFFLOAD "Disabled") if(CORENRN_ENABLE_GPU) # Older CMake versions than 3.15 have not been tested for GPU/CUDA/OpenACC support after # https://github.com/BlueBrain/CoreNeuron/pull/609. - # https://cmake.org/cmake/help/latest/release/3.14.html#properties suggests there would be - # problems because of expressions like set_target_properties(lfp_test_bin PROPERTIES - # CUDA_RESOLVE_DEVICE_SYMBOLS OFF) # Fail hard and early if we don't have the PGI/NVHPC compiler. if(NOT CORENRN_HAVE_NVHPC_COMPILER) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 6a4a7136b..971f3fa41 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -250,18 +250,9 @@ if(CORENRN_ENABLE_MPI AND CORENRN_ENABLE_MPI_DYNAMIC) install(TARGETS ${corenrn_mpi_targets} DESTINATION lib) endif() -# Prevent CMake from running a device code link step when assembling libcoreneuron.a in GPU builds. -# The device code linking needs to be deferred to the final step, where it is done by `nvc++ -cuda`. -# set_target_properties(coreneuron-core PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS OFF -# CUDA_SEPARABLE_COMPILATION ON) if(TARGET coreneuron-cuda) -# set_target_properties(coreneuron-cuda PROPERTIES ) -# endif() Suppress some compiler warnings. TODO no it doesn't: Note in GPU builds this library -# includes CUDA files. -target_compile_options(coreneuron-core - PRIVATE $<$:${CORENEURON_CXX_WARNING_SUPPRESSIONS}>) -add_dependencies(coreneuron-core nrnivmodl-core) # TODO why? - -target_link_libraries(coreneuron-core ${reportinglib_LIBRARY} ${sonatareport_LIBRARY} +# Suppress some compiler warnings. +target_compile_options(coreneuron-core PRIVATE ${CORENEURON_CXX_WARNING_SUPPRESSIONS}) +target_link_libraries(coreneuron-core PUBLIC ${reportinglib_LIBRARY} ${sonatareport_LIBRARY} ${CALIPER_LIB} ${likwid_LIBRARIES}) target_include_directories(coreneuron-core SYSTEM @@ -320,9 +311,11 @@ if(CORENRN_ENABLE_GPU) PRIVATE $<$:${CORENRN_ACC_FLAGS}>) endif() -# Create an extra target for internal use that unit tests and so on can depend on +# Create an extra target for internal use that unit tests and so on can depend +# on. ${corenrn_mech_library} is libcoreneuron.{a,so}, which contains both the +# compiled default mechanisms and the content of libcoreneuron-core.a add_library(coreneuron-all INTERFACE) -target_link_libraries(coreneuron-all INTERFACE coreneuron-core ${coreneuron_cuda_target} +target_link_libraries(coreneuron-all INTERFACE ${coreneuron_cuda_target} "${corenrn_mech_library}") # ============================================================================= diff --git a/coreneuron/mechanism/mech/enginemech.cpp b/coreneuron/mechanism/mech/enginemech.cpp index 2c20d1293..ee9cc9e28 100644 --- a/coreneuron/mechanism/mech/enginemech.cpp +++ b/coreneuron/mechanism/mech/enginemech.cpp @@ -1,6 +1,6 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= diff --git a/extra/nrnivmodl-core.in b/extra/nrnivmodl-core.in index 742409d88..08804b159 100755 --- a/extra/nrnivmodl-core.in +++ b/extra/nrnivmodl-core.in @@ -83,7 +83,7 @@ while getopts "n:m:a:d:i:l:Vp:r:b:h" OPT; do echo " -r <0|1> Enable NRN_PRCELLSTATE mechanism. Default: @CORENRN_NRN_PRCELLSTATE@." echo " -V Verbose: show commands executed by make" echo " -p Number of parallel builds (Default: $PARALLEL_BUILDS)" - echo " -b libcorenrnmech library type" + echo " -b libcoreneuron library type" exit 0;; ?) exit 1;; diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index 73ed76738..749de3ace 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -233,7 +233,7 @@ coremech_lib_shared: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always -I$(CORENRN_INC_DIR) $(INCFLAGS) \ $(LDFLAGS) ${SONAME_OPTION} -Wl,--start-group \ $(MOD_OBJS_DIR)/libcoreneuron-core/*.o \ - -Wl,--end-group $(CORENRNLIB_FLAGS) -Wl,-rpath,$(CORENRN_LIB_DIR) + -Wl,--end-group $(CORENRNLIB_FLAGS) -Wl,-rpath,$(CORENRN_LIB_DIR) -L$(CORENRN_LIB_DIR) -lcoreneuron-cuda # cleanup rm $(MOD_OBJS_DIR)/libcoreneuron-core/*.o From 955719396a8a54bc470585b14ca76d162a7b02d7 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 19 Jul 2022 12:43:26 +0200 Subject: [PATCH 035/128] drop .libs stuff --- CMake/coreneuron-config.cmake.in | 1 + extra/nrnivmodl_core_makefile.in | 9 ++------- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/CMake/coreneuron-config.cmake.in b/CMake/coreneuron-config.cmake.in index 29f67c92f..c5f8573d0 100644 --- a/CMake/coreneuron-config.cmake.in +++ b/CMake/coreneuron-config.cmake.in @@ -14,6 +14,7 @@ set(CORENRN_VERSION_PATCH @PROJECT_VERSION_PATCH@) set(CORENRN_ENABLE_GPU @CORENRN_ENABLE_GPU@) set(CORENRN_ENABLE_NMODL @CORENRN_ENABLE_NMODL@) set(CORENRN_ENABLE_REPORTING @CORENRN_ENABLE_REPORTING@) +set(CORENRN_ENABLE_SHARED @CORENRN_ENABLE_SHARED@) set(CORENEURON_LIB_LINK_FLAGS "@CORENEURON_LIB_LINK_FLAGS@") find_path(CORENEURON_INCLUDE_DIR "coreneuron/coreneuron.h" HINTS "${CONFIG_PATH}/../../include") diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index 749de3ace..1c29e7499 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -207,7 +207,7 @@ endif # main target to build binary -$(SPECIAL_EXE): coremech_lib_target +$(SPECIAL_EXE): $(corenrnmech_lib_target) @printf " => $(C_GREEN)Binary$(C_RESET) creating $(SPECIAL_EXE)\n" $(CXX_LINK_EXE_CMD) -o $(SPECIAL_EXE) $(CORENRN_SHARE_CORENRN_DIR)/coreneuron.cpp \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ @@ -215,11 +215,6 @@ $(SPECIAL_EXE): coremech_lib_target -L$(CORENRN_LIB_DIR) \ -Wl,-rpath,'$(LIB_RPATH)' -Wl,-rpath,$(CORENRN_LIB_DIR) -Wl,-rpath,'$(INSTALL_LIB_RPATH)' -coremech_lib_target: $(corenrnmech_lib_target) - rm -rf $(OUTPUT_DIR)/.libs/lib$(COREMECH_LIB_NAME)$(LIB_SUFFIX); \ - mkdir -p $(OUTPUT_DIR)/.libs; \ - ln -s ../lib$(COREMECH_LIB_NAME)$(LIB_SUFFIX) $(OUTPUT_DIR)/.libs/lib$(COREMECH_LIB_NAME)$(LIB_SUFFIX) - $(ENGINEMECH_OBJ): $(CORENRN_SHARE_CORENRN_DIR)/enginemech.cpp | $(MOD_OBJS_DIR) $(CXX_COMPILE_CMD) -c -DADDITIONAL_MECHS $(CORENRN_SHARE_CORENRN_DIR)/enginemech.cpp -o $(ENGINEMECH_OBJ) @@ -279,7 +274,7 @@ $(MOD_OBJS_DIR): mkdir -p $(MOD_OBJS_DIR) # install binary and libraries -install: $(SPECIAL_EXE) coremech_lib_target +install: $(SPECIAL_EXE) install -d $(DESTDIR)/bin $(DESTDIR)/lib install ${COREMECH_LIB_PATH} $(DESTDIR)/lib install $(SPECIAL_EXE) $(DESTDIR)/bin From e5b1240793325e4a4eabc5b75115b2403899403d Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 19 Jul 2022 14:07:57 +0200 Subject: [PATCH 036/128] format --- coreneuron/CMakeLists.txt | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 971f3fa41..9091787dd 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -253,7 +253,7 @@ endif() # Suppress some compiler warnings. target_compile_options(coreneuron-core PRIVATE ${CORENEURON_CXX_WARNING_SUPPRESSIONS}) target_link_libraries(coreneuron-core PUBLIC ${reportinglib_LIBRARY} ${sonatareport_LIBRARY} - ${CALIPER_LIB} ${likwid_LIBRARIES}) + ${CALIPER_LIB} ${likwid_LIBRARIES}) target_include_directories(coreneuron-core SYSTEM PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR}/external/Random123/include) @@ -311,12 +311,11 @@ if(CORENRN_ENABLE_GPU) PRIVATE $<$:${CORENRN_ACC_FLAGS}>) endif() -# Create an extra target for internal use that unit tests and so on can depend -# on. ${corenrn_mech_library} is libcoreneuron.{a,so}, which contains both the -# compiled default mechanisms and the content of libcoreneuron-core.a +# Create an extra target for internal use that unit tests and so on can depend on. +# ${corenrn_mech_library} is libcoreneuron.{a,so}, which contains both the compiled default +# mechanisms and the content of libcoreneuron-core.a add_library(coreneuron-all INTERFACE) -target_link_libraries(coreneuron-all INTERFACE ${coreneuron_cuda_target} - "${corenrn_mech_library}") +target_link_libraries(coreneuron-all INTERFACE ${coreneuron_cuda_target} "${corenrn_mech_library}") # ============================================================================= # Extract link definitions to be used with nrnivmodl-core From 2e40b11cbb701833a72c5782855179f26ad00c34 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 19 Jul 2022 14:13:16 +0200 Subject: [PATCH 037/128] Fixes nvhpc didn't care about. --- coreneuron/utils/randoms/nrnran123.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/coreneuron/utils/randoms/nrnran123.h b/coreneuron/utils/randoms/nrnran123.h index e75ec3f69..c3aa4d7ef 100644 --- a/coreneuron/utils/randoms/nrnran123.h +++ b/coreneuron/utils/randoms/nrnran123.h @@ -39,6 +39,8 @@ of the full distribution available from #include #include +#include + // Some files are compiled with DISABLE_OPENACC, and some builds have no GPU // support at all. In these two cases, request that the random123 state is // allocated using new/delete instead of CUDA unified memory. @@ -109,7 +111,7 @@ constexpr void nrnran123_getids3(nrnran123_State* s, uint32_t* id1, uint32_t* id } // Uniform 0 to 2*32-1 -constexpr uint32_t nrnran123_ipick(nrnran123_State* s) { +inline uint32_t nrnran123_ipick(nrnran123_State* s) { char which = s->which_; uint32_t rval{s->r.v[int{which++}]}; if (which > 3) { @@ -134,7 +136,7 @@ constexpr double nrnran123_dblpick(nrnran123_State* s) { } /* this could be called from openacc parallel construct (in INITIAL block) */ -constexpr void nrnran123_setseq(nrnran123_State* s, uint32_t seq, char which) { +inline void nrnran123_setseq(nrnran123_State* s, uint32_t seq, char which) { if (which > 3) { s->which_ = 0; } else { From 0b17c683799555a24162be1578607bc9720c8533 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 19 Jul 2022 14:16:52 +0200 Subject: [PATCH 038/128] more fixes from CI errors. --- coreneuron/CMakeLists.txt | 9 ++++----- coreneuron/utils/randoms/nrnran123.h | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 9091787dd..0dbf8c8a7 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -177,7 +177,7 @@ endforeach() # we can link to MPI libraries in non-dynamic-mpi build if(CORENRN_ENABLE_MPI AND NOT CORENRN_ENABLE_MPI_DYNAMIC) - target_link_libraries(coreneuron-core ${MPI_CXX_LIBRARIES}) + target_link_libraries(coreneuron-core PUBLIC ${MPI_CXX_LIBRARIES}) endif() # this is where we handle dynamic mpi library build @@ -186,7 +186,7 @@ if(CORENRN_ENABLE_MPI AND CORENRN_ENABLE_MPI_DYNAMIC) # main coreneuron library needs to be linked to libdl.so and # and should be aware of shared library suffix on different platforms. # ~~~ - target_link_libraries(coreneuron-core ${CMAKE_DL_LIBS}) + target_link_libraries(coreneuron-core PUBLIC ${CMAKE_DL_LIBS}) # store mpi library targets that will be built list(APPEND corenrn_mpi_targets "") @@ -234,8 +234,7 @@ if(CORENRN_ENABLE_MPI AND CORENRN_ENABLE_MPI_DYNAMIC) # ~~~ if(MINGW) # type msmpi only add_dependencies(core${libname}_lib coreneuron-core) - target_link_libraries(core${libname}_lib ${MPI_C_LIBRARIES}) - target_link_libraries(core${libname}_lib coreneuron-core) + target_link_libraries(core${libname}_lib ${MPI_C_LIBRARIES} coreneuron-core) endif() set_property(TARGET core${libname}_lib PROPERTY OUTPUT_NAME core${libname}) list(APPEND corenrn_mpi_targets "core${libname}_lib") @@ -262,7 +261,7 @@ target_include_directories(coreneuron-core SYSTEM # See: https://en.cppreference.com/w/cpp/filesystem#Notes if(CMAKE_CXX_COMPILER_IS_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.1) - target_link_libraries(coreneuron-core stdc++fs) + target_link_libraries(coreneuron-core PUBLIC stdc++fs) endif() if(CORENRN_ENABLE_GPU) diff --git a/coreneuron/utils/randoms/nrnran123.h b/coreneuron/utils/randoms/nrnran123.h index c3aa4d7ef..12484d3d4 100644 --- a/coreneuron/utils/randoms/nrnran123.h +++ b/coreneuron/utils/randoms/nrnran123.h @@ -131,7 +131,7 @@ constexpr double nrnran123_uint2dbl(uint32_t u) { } // Uniform open interval (0,1), minimum value is 2.3283064e-10 and max value is 1-min -constexpr double nrnran123_dblpick(nrnran123_State* s) { +inline double nrnran123_dblpick(nrnran123_State* s) { return nrnran123_uint2dbl(nrnran123_ipick(s)); } From 71e1895d05a2c3e79e89c00957fc1fd0d5e4df25 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 19 Jul 2022 14:19:30 +0200 Subject: [PATCH 039/128] OpenACC + shared tests in GitLab CI. --- .gitlab-ci.yml | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 737e867f0..c91eea6ae 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -86,7 +86,14 @@ build:coreneuron:mod2c:nvhpc:acc: variables: SPACK_PACKAGE: coreneuron # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type - SPACK_PACKAGE_SPEC: +gpu+openmp+tests~legacy-unit build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +gpu+openmp~shared+tests~legacy-unit build_type=RelWithDebInfo + +build:coreneuron:mod2c:nvhpc:acc:shared: + extends: [.build, .spack_nvhpc] + variables: + SPACK_PACKAGE: coreneuron + # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type + SPACK_PACKAGE_SPEC: +gpu+openmp+shared+tests~legacy-unit build_type=RelWithDebInfo # Build CoreNEURON with Unified Memory on GPU build:coreneuron:mod2c:nvhpc:acc:unified: @@ -94,7 +101,7 @@ build:coreneuron:mod2c:nvhpc:acc:unified: variables: SPACK_PACKAGE: coreneuron # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type - SPACK_PACKAGE_SPEC: +gpu+unified+openmp+tests~legacy-unit build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +gpu+unified+openmp~shared+tests~legacy-unit build_type=RelWithDebInfo .build_coreneuron_nmodl: extends: [.build] @@ -114,7 +121,7 @@ build:coreneuron:nmodl:nvhpc:omp: variables: SPACK_PACKAGE: coreneuron # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type - SPACK_PACKAGE_SPEC: +nmodl+openmp+gpu+tests~legacy-unit~sympy build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +nmodl+openmp+gpu~shared+tests~legacy-unit~sympy build_type=RelWithDebInfo needs: ["build:nmodl"] build:coreneuron:nmodl:nvhpc:acc: @@ -123,7 +130,7 @@ build:coreneuron:nmodl:nvhpc:acc: SPACK_PACKAGE: coreneuron # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type # Sympy + OpenMP target offload does not currently work with NVHPC - SPACK_PACKAGE_SPEC: +nmodl~openmp+gpu+tests~legacy-unit+sympy build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +nmodl~openmp+gpu~shared+tests~legacy-unit+sympy build_type=RelWithDebInfo needs: ["build:nmodl"] build:coreneuron:mod2c:intel: @@ -144,6 +151,10 @@ build:neuron:mod2c:nvhpc:acc: extends: [.build_neuron, .spack_nvhpc] needs: ["build:coreneuron:mod2c:nvhpc:acc"] +build:neuron:mod2c:nvhpc:acc:shared: + extends: [.build_neuron, .spack_nvhpc] + needs: ["build:coreneuron:mod2c:nvhpc:acc:shared"] + build:neuron:nmodl:nvhpc:omp: extends: [.build_neuron, .spack_nvhpc] needs: ["build:coreneuron:nmodl:nvhpc:omp"] @@ -165,6 +176,10 @@ test:coreneuron:mod2c:nvhpc:acc: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:mod2c:nvhpc:acc"] +test:coreneuron:mod2c:nvhpc:acc:shared: + extends: [.ctest, .gpu_node] + needs: ["build:coreneuron:mod2c:nvhpc:acc:shared"] + test:coreneuron:mod2c:nvhpc:acc:unified: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:mod2c:nvhpc:acc:unified"] @@ -190,6 +205,10 @@ test:neuron:mod2c:nvhpc:acc: extends: [.test_neuron, .gpu_node] needs: ["build:neuron:mod2c:nvhpc:acc"] +test:neuron:mod2c:nvhpc:acc:shared: + extends: [.test_neuron, .gpu_node] + needs: ["build:neuron:mod2c:nvhpc:acc:shared"] + test:neuron:nmodl:nvhpc:omp: extends: [.test_neuron, .gpu_node] needs: ["build:neuron:nmodl:nvhpc:omp"] From 3492fe462e09876f15a541301da3636154999f14 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 19 Jul 2022 16:17:37 +0200 Subject: [PATCH 040/128] Try and cleanup CLI11 handling. --- coreneuron/CMakeLists.txt | 28 ++++++++------- coreneuron/apps/corenrn_parameters.cpp | 21 +++++++---- coreneuron/apps/corenrn_parameters.hpp | 36 +++++++++++++------ coreneuron/apps/main1.cpp | 2 +- tests/CMakeLists.txt | 4 +-- .../test_cmdline_interface.cpp | 2 +- 6 files changed, 59 insertions(+), 34 deletions(-) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 0dbf8c8a7..e0ed1b71f 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -148,13 +148,13 @@ endif() # Library containing the bulk of the non-mechanism CoreNEURON code. This is always created and # installed as a static library, and then the nrnivmodl-core workflow extracts the object files from # it and does one of the following: -# ~~~ -# - shared build: creates libcoreneuron.so from these objects plus those from -# the translated MOD files -# - static build: creates a (temporary, does not get installed) libcoreneuron.a -# from these objects plus those from the translated MOD files, then -# statically links that into special-core (nrniv-core) -# ~~~ +# +# * shared build: creates libcoreneuron.so from these objects plus those from the translated MOD +# files +# * static build: creates a (temporary, does not get installed) libcoreneuron.a from these objects +# plus those from the translated MOD files, then statically links that into special-core +# (nrniv-core) +# # This scheme means that both core and mechanism .o files are linked in a single step, which is # important for GPU linking. It does, however, mean that in a shared library CPU build then the core # code is installed twice, once in libcoreneuron-core.a and once in the libcoreneuron.so that @@ -254,10 +254,14 @@ target_compile_options(coreneuron-core PRIVATE ${CORENEURON_CXX_WARNING_SUPPRESS target_link_libraries(coreneuron-core PUBLIC ${reportinglib_LIBRARY} ${sonatareport_LIBRARY} ${CALIPER_LIB} ${likwid_LIBRARIES}) -target_include_directories(coreneuron-core SYSTEM - PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR}/external/Random123/include) -target_include_directories(coreneuron-core SYSTEM - PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR}/external/CLI11/include) +# TODO: fix adding a dependency of coreneuron-core on CLI11::CLI11 when CLI11 is a submodule. Right +# now this doesn't work because the CLI11 targets are not exported/installed, but coreneuron-core +# is. +get_target_property(CLI11_HEADER_DIRECTORY CLI11::CLI11 INTERFACE_INCLUDE_DIRECTORIES) +message(STATUS "CLI11_HEADER_DIRECTORY=${CLI11_HEADER_DIRECTORY}") +target_include_directories( + coreneuron-core SYSTEM PRIVATE ${CLI11_HEADER_DIRECTORY} + ${CORENEURON_PROJECT_SOURCE_DIR}/external/Random123/include) # See: https://en.cppreference.com/w/cpp/filesystem#Notes if(CMAKE_CXX_COMPILER_IS_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.1) @@ -265,7 +269,7 @@ if(CMAKE_CXX_COMPILER_IS_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.1) endif() if(CORENRN_ENABLE_GPU) - # nrnran123.cpp possibly-temporarily uses Boost.Pool in GPU builds if it's available. + # nrnran123.cpp uses Boost.Pool in GPU builds if it's available. find_package(Boost QUIET) if(Boost_FOUND) message(STATUS "Boost found, enabling use of memory pools for Random123...") diff --git a/coreneuron/apps/corenrn_parameters.cpp b/coreneuron/apps/corenrn_parameters.cpp index 40c322b18..6ee920d1f 100644 --- a/coreneuron/apps/corenrn_parameters.cpp +++ b/coreneuron/apps/corenrn_parameters.cpp @@ -5,15 +5,17 @@ # See top-level LICENSE file for details. # =============================================================================. */ - #include "coreneuron/apps/corenrn_parameters.hpp" +#include namespace coreneuron { extern std::string cnrn_version(); -corenrn_parameters::corenrn_parameters() { +corenrn_parameters::corenrn_parameters() + : m_app{std::make_unique("CoreNeuron - Optimised Simulator Engine for NEURON.")} { + auto& app = *m_app; app.set_config("--read-config", "", "Read parameters from ini file", false) ->check(CLI::ExistingFile); app.add_option("--write-config", @@ -167,14 +169,21 @@ corenrn_parameters::corenrn_parameters() { CLI::retire_option(app, "--show"); } +// Implementation in .cpp file where CLI types are complete. +corenrn_parameters::~corenrn_parameters() = default; + +std::string corenrn_parameters::config_to_str(bool default_also, bool write_description) const { + return m_app->config_to_str(default_also, write_description); +} + void corenrn_parameters::reset() { static_cast(*this) = corenrn_parameters_data{}; - app.clear(); + m_app->clear(); } void corenrn_parameters::parse(int argc, char** argv) { try { - app.parse(argc, argv); + m_app->parse(argc, argv); if (verbose == verbose_level::NONE) { nrn_nobanner_ = 1; } @@ -182,11 +191,11 @@ void corenrn_parameters::parse(int argc, char** argv) { // in case of parsing errors, show message with exception std::cerr << "CLI parsing error, see nrniv-core --help for more information. \n" << std::endl; - app.exit(e); + m_app->exit(e); throw e; } catch (const CLI::ParseError& e) { // use --help is also ParseError; in this case exit by showing all options - app.exit(e); + m_app->exit(e); exit(0); } diff --git a/coreneuron/apps/corenrn_parameters.hpp b/coreneuron/apps/corenrn_parameters.hpp index bfe646622..8db8ce06c 100644 --- a/coreneuron/apps/corenrn_parameters.hpp +++ b/coreneuron/apps/corenrn_parameters.hpp @@ -1,18 +1,14 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ - #pragma once - +#include +#include #include -#include -#include -#include -#include /** * \class corenrn_parameters @@ -32,6 +28,10 @@ * Also single dash long options are not supported anymore (-mpi -> --mpi). */ +namespace CLI { +struct App; +} + namespace coreneuron { struct corenrn_parameters_data { @@ -94,10 +94,8 @@ struct corenrn_parameters_data { }; struct corenrn_parameters: corenrn_parameters_data { - CLI::App app{"CoreNeuron - Optimised Simulator Engine for NEURON."}; /// CLI app that performs - /// CLI parsing - - corenrn_parameters(); /// Constructor that initializes the CLI11 app. + corenrn_parameters(); /// Constructor that initializes the CLI11 app. + ~corenrn_parameters(); /// Destructor defined in .cpp where CLI11 types are complete. void parse(int argc, char* argv[]); /// Runs the CLI11_PARSE macro. @@ -111,6 +109,22 @@ struct corenrn_parameters: corenrn_parameters_data { inline bool is_quiet() { return verbose == verbose_level::NONE; } + + /** @brief Return a string summarising the current parameter values. + * + * This forwards to the CLI11 method of the same name. Returns a string that + * could be read in as a config of the current values of the App. + * + * @param default_also Include any defaulted arguments. + * @param write_description Include option descriptions and the App description. + */ + std::string config_to_str(bool default_also = false, bool write_description = false) const; + + private: + // CLI app that performs CLI parsing. std::unique_ptr avoids having to + // include CLI11 headers from CoreNEURON headers, and therefore avoids + // CoreNEURON having to install CLI11 when using it from a submodule. + std::unique_ptr m_app; }; std::ostream& operator<<(std::ostream& os, diff --git a/coreneuron/apps/main1.cpp b/coreneuron/apps/main1.cpp index 8e05a5d69..86275fff0 100644 --- a/coreneuron/apps/main1.cpp +++ b/coreneuron/apps/main1.cpp @@ -511,7 +511,7 @@ extern "C" void mk_mech_init(int argc, char** argv) { if (!corenrn_param.writeParametersFilepath.empty()) { std::ofstream out(corenrn_param.writeParametersFilepath, std::ios::trunc); - out << corenrn_param.app.config_to_str(false, false); + out << corenrn_param.config_to_str(false, false); out.close(); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ea8052d7b..7ef3d9647 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -30,9 +30,7 @@ if(Boost_FOUND) add_library(coreneuron-unit-test INTERFACE) target_compile_options(coreneuron-unit-test INTERFACE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) - target_include_directories( - coreneuron-unit-test SYSTEM INTERFACE ${Boost_INCLUDE_DIRS} - ${CORENEURON_PROJECT_SOURCE_DIR}/external/CLI11/include) + target_include_directories(coreneuron-unit-test SYSTEM INTERFACE ${Boost_INCLUDE_DIRS}) target_link_libraries(coreneuron-unit-test INTERFACE coreneuron-all ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}) add_subdirectory(unit/cmdline_interface) diff --git a/tests/unit/cmdline_interface/test_cmdline_interface.cpp b/tests/unit/cmdline_interface/test_cmdline_interface.cpp index caef6ca14..ccd9e1f66 100644 --- a/tests/unit/cmdline_interface/test_cmdline_interface.cpp +++ b/tests/unit/cmdline_interface/test_cmdline_interface.cpp @@ -130,5 +130,5 @@ BOOST_AUTO_TEST_CASE(cmdline_interface) { // Everything has its default value, and the first `false` says not to // include default values in the output, so this should be empty - BOOST_CHECK(corenrn_param_test.app.config_to_str(false, false).empty()); + BOOST_CHECK(corenrn_param_test.config_to_str(false, false).empty()); } From 61e3fad4e5a3a54a3c48b37ba27285eea8d48193 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 19 Jul 2022 17:57:41 +0200 Subject: [PATCH 041/128] try and consolidate build logic more --- .gitlab-ci.yml | 14 ++--- CMake/MakefileBuildOptions.cmake | 93 +++++++++++++++++++------------- CMake/OpenAccHelper.cmake | 28 +++++----- CMakeLists.txt | 83 +++++++++++----------------- coreneuron/CMakeLists.txt | 54 +++++-------------- coreneuron/apps/main1.cpp | 4 +- coreneuron/permute/cellorder.cu | 2 +- extra/nrnivmodl_core_makefile.in | 11 ++-- 8 files changed, 125 insertions(+), 164 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c91eea6ae..6a9abb44b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -86,14 +86,14 @@ build:coreneuron:mod2c:nvhpc:acc: variables: SPACK_PACKAGE: coreneuron # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type - SPACK_PACKAGE_SPEC: +gpu+openmp~shared+tests~legacy-unit build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +caliper+gpu+openmp~shared+tests~legacy-unit build_type=RelWithDebInfo build:coreneuron:mod2c:nvhpc:acc:shared: extends: [.build, .spack_nvhpc] variables: SPACK_PACKAGE: coreneuron # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type - SPACK_PACKAGE_SPEC: +gpu+openmp+shared+tests~legacy-unit build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +caliper+gpu+openmp+shared+tests~legacy-unit build_type=RelWithDebInfo # Build CoreNEURON with Unified Memory on GPU build:coreneuron:mod2c:nvhpc:acc:unified: @@ -101,7 +101,7 @@ build:coreneuron:mod2c:nvhpc:acc:unified: variables: SPACK_PACKAGE: coreneuron # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type - SPACK_PACKAGE_SPEC: +gpu+unified+openmp~shared+tests~legacy-unit build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: ~caliper+gpu+unified+openmp~shared+tests~legacy-unit build_type=RelWithDebInfo .build_coreneuron_nmodl: extends: [.build] @@ -121,7 +121,7 @@ build:coreneuron:nmodl:nvhpc:omp: variables: SPACK_PACKAGE: coreneuron # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type - SPACK_PACKAGE_SPEC: +nmodl+openmp+gpu~shared+tests~legacy-unit~sympy build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +caliper+nmodl+openmp+gpu~shared+tests~legacy-unit~sympy build_type=RelWithDebInfo needs: ["build:nmodl"] build:coreneuron:nmodl:nvhpc:acc: @@ -130,20 +130,20 @@ build:coreneuron:nmodl:nvhpc:acc: SPACK_PACKAGE: coreneuron # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type # Sympy + OpenMP target offload does not currently work with NVHPC - SPACK_PACKAGE_SPEC: +nmodl~openmp+gpu~shared+tests~legacy-unit+sympy build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: ~caliper+nmodl~openmp+gpu~shared+tests~legacy-unit+sympy build_type=RelWithDebInfo needs: ["build:nmodl"] build:coreneuron:mod2c:intel: extends: [.build, .spack_intel] variables: SPACK_PACKAGE: coreneuron - SPACK_PACKAGE_SPEC: +tests~legacy-unit build_type=Debug + SPACK_PACKAGE_SPEC: +caliper+tests~legacy-unit build_type=Debug build:coreneuron:nmodl:intel: extends: [.build_coreneuron_nmodl, .spack_intel] variables: SPACK_PACKAGE: coreneuron - SPACK_PACKAGE_SPEC: +nmodl+tests~legacy-unit build_type=Debug + SPACK_PACKAGE_SPEC: ~caliper+nmodl+tests~legacy-unit build_type=Debug needs: ["build:nmodl"] # Build NEURON diff --git a/CMake/MakefileBuildOptions.cmake b/CMake/MakefileBuildOptions.cmake index 7aef0c549..710b8ad4e 100644 --- a/CMake/MakefileBuildOptions.cmake +++ b/CMake/MakefileBuildOptions.cmake @@ -21,7 +21,7 @@ set(CMAKE_ISPC_FLAGS "${CMAKE_ISPC_FLAGS} --pic") set(NMODL_COMMON_ARGS "passes --inline") if(NOT "${CORENRN_NMODL_FLAGS}" STREQUAL "") - set(NMODL_COMMON_ARGS "${NMODL_COMMON_ARGS} ${CORENRN_NMODL_FLAGS}") + string(APPEND NMODL_COMMON_ARGS " ${CORENRN_NMODL_FLAGS}") endif() set(NMODL_CPU_BACKEND_ARGS "host --c") @@ -29,62 +29,81 @@ set(NMODL_ISPC_BACKEND_ARGS "host --ispc") set(NMODL_ACC_BACKEND_ARGS "host --c acc --oacc") # ============================================================================= -# Extract Compile definitions : common to all backend +# Construct the linker arguments that are used inside nrnivmodl-core (to build +# libcoreneuron from libcoreneuron-core, libcoreneuron-cuda and mechanism object +# files) and inside nrnivmodl (to link NEURON's special against CoreNEURON's +# libcoreneuron). # ============================================================================= -get_directory_property(COMPILE_DEFS COMPILE_DEFINITIONS) -if(COMPILE_DEFS) - set(CORENRN_COMMON_COMPILE_DEFS "") - foreach(flag ${COMPILE_DEFS}) - set(CORENRN_COMMON_COMPILE_DEFS "${CORENRN_COMMON_COMPILE_DEFS} -D${flag}") - endforeach() -endif() +# Essentially we "just" want to unpack the CMake dependencies of the +# `coreneuron-core` target into a plain string that we can bake into the +# Makefiles in both NEURON and CoreNEURON. +function(coreneuron_process_target target) + if(TARGET ${target}) + if(NOT target STREQUAL "coreneuron-core") + # This is a special case: libcoreneuron-core.a is manually unpacked into .o + # files by the nrnivmodl-core Makefile, so we do not want to also emit an + # -lcoreneuron-core argument. + # TODO: probably need to extract an -L and RPATH path and include that here? + set_property(GLOBAL APPEND_STRING PROPERTY CORENEURON_LIB_LINK_FLAGS " -l${target}") + endif() + get_target_property(target_libraries ${target} LINK_LIBRARIES) + if(target_libraries) + foreach(child_target ${target_libraries}) + coreneuron_process_target(${child_target}) + endforeach() + endif() + return() + endif() + get_filename_component(target_dir "${target}" DIRECTORY) + message(STATUS "target=${target} target_dir=${target_dir}") + if(NOT target_dir) + # In case target is not a target but is just the name of a library, e.g. "dl" + set_property(GLOBAL APPEND_STRING PROPERTY CORENEURON_LIB_LINK_FLAGS " -l${target}") + elseif("${target_dir}" MATCHES "^(/lib|/lib64|/usr/lib|/usr/lib64)$") + # e.g. /usr/lib64/libpthread.so -> -lpthread + get_filename_component(libname ${target} NAME_WE) + string(REGEX REPLACE "^lib" "" libname ${libname}) + set_property(GLOBAL APPEND_STRING PROPERTY CORENEURON_LIB_LINK_FLAGS " -l${libname}") + else() + # It's a full path, include that on the line + set_property(GLOBAL APPEND_STRING PROPERTY CORENEURON_LIB_LINK_FLAGS " ${target}") + endif() +endfunction() +coreneuron_process_target(coreneuron-core) +get_property(CORENEURON_LIB_LINK_FLAGS GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS) +message(STATUS "CORENEURON_LIB_LINK_FLAGS=${CORENEURON_LIB_LINK_FLAGS}") + +# Things that used to be in CORENEURON_LIB_LINK_FLAGS: -rdynamic -lrt +# -Wl,--whole-archive -L${CMAKE_HOST_SYSTEM_PROCESSOR} -Wl,--no-whole-archive +# -L${caliper_LIB_DIR} -l${CALIPER_LIB} # ============================================================================= -# link flags : common to all backend +# Turn CORENRN_COMPILE_DEFS into a list of -DFOO[=BAR] options. # ============================================================================= -# ~~~ -# find_cuda uses FindThreads that adds below imported target we -# shouldn't add imported target to link line -# ~~~ -list(REMOVE_ITEM CORENRN_LINK_LIBS "Threads::Threads") +list(TRANSFORM CORENRN_COMPILE_DEFS PREPEND -D OUTPUT_VARIABLE CORENRN_COMPILE_DEF_FLAGS) -string(JOIN " " CORENRN_COMMON_LDFLAGS ${CORENRN_EXTRA_LINK_FLAGS}) +# ============================================================================= +# Extra link flags that we need to include when linking libcoreneuron.{a,so} in +# CoreNEURON but that do not need to be passed to NEURON to use when linking +# nrniv/special (why?) +# ============================================================================= +string(JOIN " " CORENRN_COMMON_LDFLAGS ${CORENEURON_LIB_LINK_FLAGS} ${CORENRN_EXTRA_LINK_FLAGS}) if(CORENRN_SANITIZER_LIBRARY_DIR) string(APPEND CORENRN_COMMON_LDFLAGS " -Wl,-rpath,${CORENRN_SANITIZER_LIBRARY_DIR}") endif() string(JOIN " " CORENRN_SANITIZER_ENABLE_ENVIRONMENT_STRING ${CORENRN_SANITIZER_ENABLE_ENVIRONMENT}) -# replicate CMake magic to transform system libs to -l -foreach(link_lib ${CORENRN_LINK_LIBS}) - if(${link_lib} MATCHES "\-l.*") - string(APPEND CORENRN_COMMON_LDFLAGS " ${link_lib}") - continue() - endif() - get_filename_component(path ${link_lib} DIRECTORY) - if(NOT path) - string(APPEND CORENRN_COMMON_LDFLAGS " -l${link_lib}") - elseif("${path}" MATCHES "^(/lib|/lib64|/usr/lib|/usr/lib64)$") - get_filename_component(libname ${link_lib} NAME_WE) - string(REGEX REPLACE "^lib" "" libname ${libname}) - string(APPEND CORENRN_COMMON_LDFLAGS " -l${libname}") - else() - string(APPEND CORENRN_COMMON_LDFLAGS " ${link_lib}") - endif() -endforeach() - # ============================================================================= # compile flags : common to all backend # ============================================================================= -string(JOIN " " CMAKE_CXX17_STANDARD_COMPILE_OPTION_STRING ${CMAKE_CXX17_STANDARD_COMPILE_OPTION}) string(TOUPPER "${CMAKE_BUILD_TYPE}" _BUILD_TYPE) -list(TRANSFORM CORENRN_COMPILE_DEFS PREPEND -D OUTPUT_VARIABLE CORENRN_COMPILE_DEF_FLAGS) string( JOIN " " CORENRN_CXX_FLAGS ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${_BUILD_TYPE}} - ${CMAKE_CXX17_STANDARD_COMPILE_OPTION_STRING} + ${CMAKE_CXX17_STANDARD_COMPILE_OPTION} ${NVHPC_ACC_COMP_FLAGS} ${NVHPC_CXX_INLINE_FLAGS} ${CORENRN_COMPILE_DEF_FLAGS} diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index 5be6af61b..d2eed3d1d 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -34,13 +34,13 @@ if(CORENRN_ENABLE_GPU) cnrn_parse_version(${CMAKE_CXX_COMPILER_VERSION} OUTPUT_MAJOR_MINOR CORENRN_NVHPC_MAJOR_MINOR_VERSION) # Enable cudaProfiler{Start,Stop}() behind the Instrumentor::phase... APIs - add_compile_definitions(CORENEURON_CUDA_PROFILING CORENEURON_ENABLE_GPU) + list(APPEND CORENRN_COMPILE_DEFS CORENEURON_CUDA_PROFILING CORENEURON_ENABLE_GPU) # Plain C++ code in CoreNEURON may need to use CUDA runtime APIs for, for example, starting and # stopping profiling. This makes sure those headers can be found. include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) # cuda unified memory support if(CORENRN_ENABLE_CUDA_UNIFIED_MEMORY) - add_compile_definitions(CORENEURON_UNIFIED_MEMORY) + list(APPEND CORENRN_COMPILE_DEFS CORENEURON_UNIFIED_MEMORY) endif() if(${CMAKE_VERSION} VERSION_LESS 3.17) # Hopefully we can drop this soon. Parse ${CMAKE_CUDA_COMPILER_VERSION} into a shorter X.Y @@ -81,7 +81,7 @@ if(CORENRN_ENABLE_GPU) if(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenMP") # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available # for a region then prefer OpenMP. - add_compile_definitions(CORENEURON_PREFER_OPENMP_OFFLOAD) + list(APPEND CORENRN_COMPILE_DEFS CORENEURON_PREFER_OPENMP_OFFLOAD) string(APPEND NVHPC_ACC_COMP_FLAGS " -mp=gpu") elseif(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenACC") # Only enable OpenACC offload for GPU @@ -98,20 +98,16 @@ if(CORENRN_ENABLE_GPU) endif() # ============================================================================= -# Set global property that will be used by NEURON to link with CoreNEURON +# Initialise global property that will be used by NEURON to link with CoreNEURON # ============================================================================= -# TODO this should be derived from what we use internally to link special-core? -if(CORENRN_ENABLE_GPU) - set_property( - GLOBAL - PROPERTY - CORENEURON_LIB_LINK_FLAGS - "${NVHPC_ACC_COMP_FLAGS} -rdynamic -lrt -Wl,--whole-archive -L${CMAKE_HOST_SYSTEM_PROCESSOR} -lcoreneuron -lcoreneuron-cuda -Wl,--no-whole-archive" - ) -else() - set_property(GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS - "-L${CMAKE_HOST_SYSTEM_PROCESSOR} -lcoreneuron") -endif(CORENRN_ENABLE_GPU) +if(CORENRN_ENABLE_GPU AND CORENRN_ENABLE_SHARED) + # Because of + # https://forums.developer.nvidia.com/t/dynamically-loading-an-openacc-enabled-shared-library-from-an-executable-compiled-with-nvc-does-not-work/210968 + # we have to tell NEURON to pass OpenACC flags when linking special, otherwise + # we end up with an `nrniv` binary that cannot dynamically load CoreNEURON in + # shared-library builds + set_property(GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS "${NVHPC_ACC_COMP_FLAGS}") +endif() if(CORENRN_HAVE_NVHPC_COMPILER) if(${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 20.7) diff --git a/CMakeLists.txt b/CMakeLists.txt index e128652a0..ccf5f0aa0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -271,12 +271,11 @@ find_package(Perl REQUIRED) # Common build options # ============================================================================= # build mod files for coreneuron -add_definitions(-DCORENEURON_BUILD) - +list(APPEND CORENRN_COMPILE_DEFS CORENEURON_BUILD) set(CMAKE_REQUIRED_QUIET TRUE) check_include_files(malloc.h have_malloc_h) if(have_malloc_h) - add_definitions("-DHAVE_MALLOC_H") + list(APPEND CORENRN_COMPILE_DEFS HAVE_MALLOC_H) endif() # ============================================================================= @@ -313,14 +312,14 @@ endif() if(CORENRN_ENABLE_MPI) find_package(MPI REQUIRED) - add_definitions("-DNRNMPI=1") + list(APPEND CORENRN_COMPILE_DEFS NRNMPI=1) # avoid linking to C++ bindings - add_definitions("-DMPI_NO_CPPBIND=1") - add_definitions("-DOMPI_SKIP_MPICXX=1") - add_definitions("-DMPICH_SKIP_MPICXX=1") + list(APPEND CORENRN_COMPILE_DEFS MPI_NO_CPPBIND=1) + list(APPEND CORENRN_COMPILE_DEFS OMPI_SKIP_MPICXX=1) + list(APPEND CORENRN_COMPILE_DEFS MPICH_SKIP_MPICXX=1) else() - add_definitions("-DNRNMPI=0") - add_definitions("-DNRN_MULTISEND=0") + list(APPEND CORENRN_COMPILE_DEFS NRNMPI=0) + list(APPEND CORENRN_COMPILE_DEFS NRN_MULTISEND=0) endif() if(CORENRN_ENABLE_OPENMP) @@ -331,23 +330,23 @@ if(CORENRN_ENABLE_OPENMP) endif() endif() -add_definitions("-DLAYOUT=0") +list(APPEND CORENRN_COMPILE_DEFS LAYOUT=0) if(NOT CORENRN_ENABLE_HOC_EXP) - add_definitions("-DDISABLE_HOC_EXP") + list(APPEND CORENRN_COMPILE_DEFS DISABLE_HOC_EXP) endif() # splay tree required for net_move if(CORENRN_ENABLE_SPLAYTREE_QUEUING) - add_definitions("-DENABLE_SPLAYTREE_QUEUING") + list(APPEND CORENRN_COMPILE_DEFS ENABLE_SPLAYTREE_QUEUING) endif() if(NOT CORENRN_ENABLE_NET_RECEIVE_BUFFER) - add_definitions("-DNET_RECEIVE_BUFFERING=0") + list(APPEND CORENRN_COMPILE_DEFS NET_RECEIVE_BUFFERING=0) endif() if(NOT CORENRN_ENABLE_TIMEOUT) - add_definitions("-DDISABLE_TIMEOUT") + list(APPEND CORENRN_COMPILE_DEFS DISABLE_TIMEOUT) endif() if(CORENRN_ENABLE_REPORTING) @@ -356,7 +355,7 @@ if(CORENRN_ENABLE_REPORTING) find_program(H5DUMP_EXECUTABLE h5dump) if(reportinglib_FOUND) - add_definitions("-DENABLE_BIN_REPORTS") + list(APPEND CORENRN_COMPILE_DEFS ENABLE_BIN_REPORTS) set(ENABLE_BIN_REPORTS_TESTS ON) else() set(reportinglib_INCLUDE_DIR "") @@ -364,7 +363,7 @@ if(CORENRN_ENABLE_REPORTING) endif() if(sonata_FOUND) if(TARGET sonata::sonata_report) - add_definitions("-DENABLE_SONATA_REPORTS") + list(APPEND CORENRN_COMPILE_DEFS ENABLE_SONATA_REPORTS) set(ENABLE_SONATA_REPORTS_TESTS ON) else() message(SEND_ERROR "SONATA library was found but without reporting support") @@ -384,6 +383,7 @@ if(CORENRN_ENABLE_LEGACY_UNITS) else() set(CORENRN_USE_LEGACY_UNITS 0) endif() +list(APPEND CORENRN_COMPILE_DEFS CORENEURON_USE_LEGACY_UNITS=${CORENRN_USE_LEGACY_UNITS}) # Propagate Legacy Units flag to backends. set(MOD2C_ENABLE_LEGACY_UNITS ${CORENRN_ENABLE_LEGACY_UNITS} @@ -396,7 +396,7 @@ if(CORENRN_ENABLE_MPI_DYNAMIC) if(NOT CORENRN_ENABLE_MPI) message(FATAL_ERROR "Cannot enable dynamic mpi without mpi") endif() - add_compile_definitions(CORENRN_ENABLE_MPI_DYNAMIC) + list(APPEND CORENRN_COMPILE_DEFS CORENEURON_ENABLE_MPI_DYNAMIC) endif() if(CORENRN_ENABLE_PRCELLSTATE) @@ -405,7 +405,7 @@ else() set(CORENRN_NRN_PRCELLSTATE 0) endif() if(MINGW) - add_definitions("-DMINGW") + list(APPEND CORENRN_COMPILE_DEFS MINGW) endif() # ============================================================================= @@ -448,22 +448,20 @@ endif() # ============================================================================= if(CORENRN_ENABLE_CALIPER_PROFILING) find_package(caliper REQUIRED) - include_directories(${caliper_INCLUDE_DIR}) - add_definitions("-DCORENEURON_CALIPER") - set(CALIPER_LIB "caliper") - set_property(GLOBAL APPEND_STRING PROPERTY CORENEURON_LIB_LINK_FLAGS - " -L${caliper_LIB_DIR} -l${CALIPER_LIB}") + list(APPEND CORENRN_COMPILE_DEFS CORENEURON_CALIPER) + set(CORENRN_CALIPER_LIB caliper) endif() if(CORENRN_ENABLE_LIKWID_PROFILING) find_package(likwid REQUIRED) + list(APPEND CORENRN_COMPILE_DEFS LIKWID_PERFMON) + # TODO: avoid this part, probably by using some likwid CMake target include_directories(${likwid_INCLUDE_DIRS}) - add_definitions("-DLIKWID_PERFMON") endif() # enable debugging code with extra logs to stdout if(CORENRN_ENABLE_DEBUG_CODE) - add_definitions(-DCORENRN_DEBUG -DCHKPNTDEBUG -DCORENRN_DEBUG_QUEUE -DINTERLEAVE_DEBUG) + list(APPEND CORENRN_COMPILE_DEFS CORENRN_DEBUG CHKPNTDEBUG CORENRN_DEBUG_QUEUE INTERLEAVE_DEBUG) endif() # ============================================================================= @@ -473,38 +471,19 @@ endif() # compiler will be invoked with these flags, so we have to use flags that are as generic as # possible. if(NOT DEFINED NRN_WHEEL_BUILD OR NOT NRN_WHEEL_BUILD) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${IGNORE_UNKNOWN_PRAGMA_FLAGS}") + list(APPEND CORENRN_EXTRA_CXX_FLAGS "${IGNORE_UNKNOWN_PRAGMA_FLAGS}") endif() -# ============================================================================= -# Add main directories -# ============================================================================= +# Add the main source directory add_subdirectory(coreneuron) -if(CORENRN_ENABLE_GPU) - get_target_property(CORENRN_LINK_LIBRARIES coreneuron-core INTERFACE_LINK_LIBRARIES) - if(CORENRN_LINK_LIBRARIES) - foreach(LIB ${CORENRN_LINK_LIBRARIES}) - get_filename_component(dir_path ${LIB} DIRECTORY) - if(TARGET ${LIB}) - # See, for example, caliper where the coreneuron target depends on the caliper target (so we - # get LIB=caliper in this loop), but -l and -L are already added manually here: - # https://github.com/BlueBrain/CoreNeuron/blob/856cea4aa647c8f2b0d5bda6d0fc32144c5942e3/CMakeLists.txt#L411-L412 - message( - NOTICE - "Ignoring dependency '${LIB}' of 'coreneuron-core' and assuming relevant flags have already been added to CORENEURON_LIB_LINK_FLAGS." - ) - elseif(NOT dir_path) - # In case LIB is not a target but is just the name of a library, e.g. "dl" - set_property(GLOBAL APPEND_STRING PROPERTY CORENEURON_LIB_LINK_FLAGS " -l${LIB}") - else() - set_property(GLOBAL APPEND_STRING PROPERTY CORENEURON_LIB_LINK_FLAGS " ${LIB}") - endif() - endforeach() - endif() -endif() - +# Extract the various compiler option strings to use inside nrnivmodl-core. Sets +# the global property CORENEURON_LIB_LINK_FLAGS, which contains the arguments +# that must be added to the link line for `special` to link against +# `libcoreneuron.{a,so}` include(MakefileBuildOptions) + +# Generate the nrnivmodl-core script and makefile using the options from MakefileBuildOptions add_subdirectory(extra) if(CORENRN_ENABLE_UNIT_TESTS) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index e0ed1b71f..befb2f8dc 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -21,7 +21,7 @@ file( CORENEURON_CODE_FILES "apps/main1.cpp" "apps/corenrn_parameters.cpp" - "gpu/*.cpp" + "gpu/nrn_acc_manager.cpp" "io/*.cpp" "io/reports/*.cpp" "mechanism/*.cpp" @@ -121,11 +121,6 @@ if(CORENRN_ENABLE_GPU) endif() endif() -# ============================================================================= -# CORENEURON_USE_LEGACY_UNITS is used in membfunc.hpp so define it everywhere -# ============================================================================= -add_compile_definitions(CORENEURON_USE_LEGACY_UNITS=${CORENRN_USE_LEGACY_UNITS}) - # ============================================================================= # create libraries # ============================================================================= @@ -156,18 +151,16 @@ endif() # (nrniv-core) # # This scheme means that both core and mechanism .o files are linked in a single step, which is -# important for GPU linking. It does, however, mean that in a shared library CPU build then the core -# code is installed twice, once in libcoreneuron-core.a and once in the libcoreneuron.so that -# contains the default mechanisms for the installed nrniv-core binary. In a GPU build, +# important for GPU linking. It does, however, mean that the core code is installed twice, once in +# libcoreneuron-core.a and once in libcoreneuron.so (shared) or nrniv-core (static). In a GPU build, # libcoreneuron-cuda.{a,so} is also linked to provide the CUDA implementation of the Hines solver. -add_library(coreneuron-core STATIC ${CORENEURON_CODE_FILES} ${CORENRN_MPI_OBJ}) - -# Library containing explicit CUDA code, compiled by nvcc. This cannot be included in -# coreneuron-core because of this issue: +# This cannot be included in coreneuron-core because of this issue: # https://forums.developer.nvidia.com/t/cannot-dynamically-load-a-shared-library-containing-both-openacc-and-cuda-code/210972 +add_library(coreneuron-core STATIC ${CORENEURON_CODE_FILES} ${CORENRN_MPI_OBJ}) if(CORENRN_ENABLE_GPU) set(coreneuron_cuda_target coreneuron-cuda) add_library(coreneuron-cuda ${COMPILE_LIBRARY_TYPE} ${CORENEURON_CUDA_FILES}) + target_link_libraries(coreneuron-core PUBLIC coreneuron-cuda) endif() foreach(target coreneuron-core ${coreneuron_cuda_target}) @@ -252,13 +245,11 @@ endif() # Suppress some compiler warnings. target_compile_options(coreneuron-core PRIVATE ${CORENEURON_CXX_WARNING_SUPPRESSIONS}) target_link_libraries(coreneuron-core PUBLIC ${reportinglib_LIBRARY} ${sonatareport_LIBRARY} - ${CALIPER_LIB} ${likwid_LIBRARIES}) + ${CORENRN_CALIPER_LIB} ${likwid_LIBRARIES}) # TODO: fix adding a dependency of coreneuron-core on CLI11::CLI11 when CLI11 is a submodule. Right -# now this doesn't work because the CLI11 targets are not exported/installed, but coreneuron-core -# is. +# now this doesn't work because the CLI11 targets are not exported/installed but coreneuron-core is. get_target_property(CLI11_HEADER_DIRECTORY CLI11::CLI11 INTERFACE_INCLUDE_DIRECTORIES) -message(STATUS "CLI11_HEADER_DIRECTORY=${CLI11_HEADER_DIRECTORY}") target_include_directories( coreneuron-core SYSTEM PRIVATE ${CLI11_HEADER_DIRECTORY} ${CORENEURON_PROJECT_SOURCE_DIR}/external/Random123/include) @@ -282,7 +273,7 @@ set_target_properties( coreneuron-core ${coreneuron_cuda_target} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib - POSITION_INDEPENDENT_CODE ON) + POSITION_INDEPENDENT_CODE ${CORENRN_ENABLE_SHARED}) cpp_cc_configure_sanitizers(TARGET coreneuron-core ${coreneuron_cuda_target} ${corenrn_mpi_targets}) # ============================================================================= @@ -295,12 +286,12 @@ file(GLOB modfiles "${modfile_directory}/*.mod") # symbols in the translated versions of default .mod files set(nrniv_core_prefix "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}") set(corenrn_mech_library - "${nrniv_core_prefix}/libcoreneuron${CMAKE_${COMPILE_LIBRARY_TYPE}_LIBRARY_SUFFIX}") + "${nrniv_core_prefix}/${CMAKE_${COMPILE_LIBRARY_TYPE}_LIBRARY_PREFIX}coreneuron${CMAKE_${COMPILE_LIBRARY_TYPE}_LIBRARY_SUFFIX}") set(output_binaries "${nrniv_core_prefix}/special-core" "${corenrn_mech_library}") add_custom_command( OUTPUT ${output_binaries} - DEPENDS coreneuron-core ${coreneuron_cuda_target} ${NMODL_TARGET_TO_DEPEND} ${modfiles} + DEPENDS coreneuron-core ${NMODL_TARGET_TO_DEPEND} ${modfiles} ${CORENEURON_BUILTIN_MODFILES} COMMAND ${CMAKE_BINARY_DIR}/bin/nrnivmodl-core -b ${COMPILE_LIBRARY_TYPE} -m ${CORENRN_MOD2CPP_BINARY} -p 4 "${modfile_directory}" @@ -318,18 +309,7 @@ endif() # ${corenrn_mech_library} is libcoreneuron.{a,so}, which contains both the compiled default # mechanisms and the content of libcoreneuron-core.a add_library(coreneuron-all INTERFACE) -target_link_libraries(coreneuron-all INTERFACE ${coreneuron_cuda_target} "${corenrn_mech_library}") - -# ============================================================================= -# Extract link definitions to be used with nrnivmodl-core -# ============================================================================= -get_target_property(CORENRN_LINK_LIBS coreneuron-core LINK_LIBRARIES) -if(NOT CORENRN_LINK_LIBS) - set(CORENRN_LINK_LIBS "") -endif() -set(CORENRN_LINK_LIBS - "${CORENRN_LINK_LIBS}" - PARENT_SCOPE) +target_link_libraries(coreneuron-all INTERFACE "${corenrn_mech_library}") # Make headers avail to build tree configure_file(engine.h.in ${CMAKE_BINARY_DIR}/include/coreneuron/engine.h @ONLY) @@ -357,21 +337,13 @@ file(COPY apps/coreneuron.cpp DESTINATION ${CMAKE_BINARY_DIR}/share/coreneuron) # coreneuron main libraries install( - TARGETS coreneuron-core + TARGETS coreneuron-core ${coreneuron_cuda_target} EXPORT coreneuron LIBRARY DESTINATION lib ARCHIVE DESTINATION lib INCLUDES DESTINATION $) -if(TARGET coreneuron-cuda) - install( - TARGETS coreneuron-cuda - EXPORT coreneuron - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib) -endif() - # headers and some standalone code files for nrnivmodl-core install( DIRECTORY ${CMAKE_BINARY_DIR}/include/coreneuron diff --git a/coreneuron/apps/main1.cpp b/coreneuron/apps/main1.cpp index 86275fff0..b019748fd 100644 --- a/coreneuron/apps/main1.cpp +++ b/coreneuron/apps/main1.cpp @@ -456,7 +456,7 @@ std::unique_ptr create_report_handler(ReportConfiguration& config using namespace coreneuron; -#if NRNMPI && defined CORENRN_ENABLE_MPI_DYNAMIC +#if NRNMPI && defined(CORENEURON_ENABLE_MPI_DYNAMIC) static void* load_dynamic_mpi(const std::string& libname) { dlerror(); void* handle = dlopen(libname.c_str(), RTLD_NOW | RTLD_GLOBAL); @@ -478,7 +478,7 @@ extern "C" void mk_mech_init(int argc, char** argv) { #if NRNMPI if (corenrn_param.mpi_enable) { -#ifdef CORENRN_ENABLE_MPI_DYNAMIC +#ifdef CORENEURON_ENABLE_MPI_DYNAMIC // coreneuron rely on neuron to detect mpi library distribution and // the name of the library itself. Make sure the library name is specified // via CLI option. diff --git a/coreneuron/permute/cellorder.cu b/coreneuron/permute/cellorder.cu index 1f1bdff94..ed8975148 100644 --- a/coreneuron/permute/cellorder.cu +++ b/coreneuron/permute/cellorder.cu @@ -1,6 +1,6 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index 1c29e7499..24e630f92 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -38,11 +38,6 @@ MOD_OBJS_DIR = $(OUTPUT_DIR)/corenrn/build # Linked libraries gathered by CMake LDFLAGS = $(LINKFLAGS) @CORENRN_COMMON_LDFLAGS@ -CORENRNLIB_FLAGS = -CORENRNLIB_FLAGS += $(if @reportinglib_LIB_DIR@, -W$(subst ;, -W,l,-rpath,@reportinglib_LIB_DIR@),) -CORENRNLIB_FLAGS += $(if @sonatareport_LIB_DIR@, -W$(subst ;, -W,l,-rpath,@sonatareport_LIB_DIR@),) -CORENRNLIB_FLAGS += $(if @caliper_LIB_DIR@, -W$(subst ;, -W,l,-rpath,@caliper_LIB_DIR@),) -CORENRNLIB_FLAGS += $(if @caliper_LIB_DIR@,-L@caliper_LIB_DIR@,) # Includes paths gathered by CMake # coreneuron/utils/randoms goes first because it needs to override the NEURON @@ -82,7 +77,7 @@ ifeq ($(wildcard $(CORENRN_PERLEXE)),) endif CXXFLAGS = @CORENRN_CXX_FLAGS@ -CXX_COMPILE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_CXX_COMPILE_OPTIONS_PIC@ @CORENRN_COMMON_COMPILE_DEFS@ $(INCLUDES) +CXX_COMPILE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_CXX_COMPILE_OPTIONS_PIC@ $(INCLUDES) CXX_LINK_EXE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_EXE_LINKER_FLAGS@ CXX_SHARED_LIB_CMD = $(CXX) $(CXXFLAGS) @CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS@ @CMAKE_SHARED_LIBRARY_CXX_FLAGS@ @CMAKE_SHARED_LINKER_FLAGS@ @@ -211,7 +206,7 @@ $(SPECIAL_EXE): $(corenrnmech_lib_target) @printf " => $(C_GREEN)Binary$(C_RESET) creating $(SPECIAL_EXE)\n" $(CXX_LINK_EXE_CMD) -o $(SPECIAL_EXE) $(CORENRN_SHARE_CORENRN_DIR)/coreneuron.cpp \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ - -L$(OUTPUT_DIR) -l$(COREMECH_LIB_NAME) -lcoreneuron-cuda $(CORENRNLIB_FLAGS) $(LDFLAGS) \ + -L$(OUTPUT_DIR) -l$(COREMECH_LIB_NAME) $(LDFLAGS) \ -L$(CORENRN_LIB_DIR) \ -Wl,-rpath,'$(LIB_RPATH)' -Wl,-rpath,$(CORENRN_LIB_DIR) -Wl,-rpath,'$(INSTALL_LIB_RPATH)' @@ -228,7 +223,7 @@ coremech_lib_shared: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always -I$(CORENRN_INC_DIR) $(INCFLAGS) \ $(LDFLAGS) ${SONAME_OPTION} -Wl,--start-group \ $(MOD_OBJS_DIR)/libcoreneuron-core/*.o \ - -Wl,--end-group $(CORENRNLIB_FLAGS) -Wl,-rpath,$(CORENRN_LIB_DIR) -L$(CORENRN_LIB_DIR) -lcoreneuron-cuda + -Wl,--end-group -Wl,-rpath,$(CORENRN_LIB_DIR) -L$(CORENRN_LIB_DIR) # cleanup rm $(MOD_OBJS_DIR)/libcoreneuron-core/*.o From a05830cc0bc54243a5c41d4b4eab26e89402c7db Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 19 Jul 2022 17:58:32 +0200 Subject: [PATCH 042/128] format --- CMake/MakefileBuildOptions.cmake | 33 ++++++++++++++------------------ CMake/OpenAccHelper.cmake | 5 ++--- CMakeLists.txt | 7 +++---- coreneuron/CMakeLists.txt | 6 +++--- 4 files changed, 22 insertions(+), 29 deletions(-) diff --git a/CMake/MakefileBuildOptions.cmake b/CMake/MakefileBuildOptions.cmake index 710b8ad4e..29edf2bd1 100644 --- a/CMake/MakefileBuildOptions.cmake +++ b/CMake/MakefileBuildOptions.cmake @@ -29,28 +29,25 @@ set(NMODL_ISPC_BACKEND_ARGS "host --ispc") set(NMODL_ACC_BACKEND_ARGS "host --c acc --oacc") # ============================================================================= -# Construct the linker arguments that are used inside nrnivmodl-core (to build -# libcoreneuron from libcoreneuron-core, libcoreneuron-cuda and mechanism object -# files) and inside nrnivmodl (to link NEURON's special against CoreNEURON's -# libcoreneuron). -# ============================================================================= -# Essentially we "just" want to unpack the CMake dependencies of the -# `coreneuron-core` target into a plain string that we can bake into the -# Makefiles in both NEURON and CoreNEURON. +# Construct the linker arguments that are used inside nrnivmodl-core (to build libcoreneuron from +# libcoreneuron-core, libcoreneuron-cuda and mechanism object files) and inside nrnivmodl (to link +# NEURON's special against CoreNEURON's libcoreneuron). +# ============================================================================= +# Essentially we "just" want to unpack the CMake dependencies of the `coreneuron-core` target into a +# plain string that we can bake into the Makefiles in both NEURON and CoreNEURON. function(coreneuron_process_target target) if(TARGET ${target}) if(NOT target STREQUAL "coreneuron-core") - # This is a special case: libcoreneuron-core.a is manually unpacked into .o - # files by the nrnivmodl-core Makefile, so we do not want to also emit an - # -lcoreneuron-core argument. - # TODO: probably need to extract an -L and RPATH path and include that here? + # This is a special case: libcoreneuron-core.a is manually unpacked into .o files by the + # nrnivmodl-core Makefile, so we do not want to also emit an -lcoreneuron-core argument. TODO: + # probably need to extract an -L and RPATH path and include that here? set_property(GLOBAL APPEND_STRING PROPERTY CORENEURON_LIB_LINK_FLAGS " -l${target}") endif() get_target_property(target_libraries ${target} LINK_LIBRARIES) if(target_libraries) foreach(child_target ${target_libraries}) coreneuron_process_target(${child_target}) - endforeach() + endforeach() endif() return() endif() @@ -73,9 +70,8 @@ coreneuron_process_target(coreneuron-core) get_property(CORENEURON_LIB_LINK_FLAGS GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS) message(STATUS "CORENEURON_LIB_LINK_FLAGS=${CORENEURON_LIB_LINK_FLAGS}") -# Things that used to be in CORENEURON_LIB_LINK_FLAGS: -rdynamic -lrt -# -Wl,--whole-archive -L${CMAKE_HOST_SYSTEM_PROCESSOR} -Wl,--no-whole-archive -# -L${caliper_LIB_DIR} -l${CALIPER_LIB} +# Things that used to be in CORENEURON_LIB_LINK_FLAGS: -rdynamic -lrt -Wl,--whole-archive +# -L${CMAKE_HOST_SYSTEM_PROCESSOR} -Wl,--no-whole-archive -L${caliper_LIB_DIR} -l${CALIPER_LIB} # ============================================================================= # Turn CORENRN_COMPILE_DEFS into a list of -DFOO[=BAR] options. @@ -83,9 +79,8 @@ message(STATUS "CORENEURON_LIB_LINK_FLAGS=${CORENEURON_LIB_LINK_FLAGS}") list(TRANSFORM CORENRN_COMPILE_DEFS PREPEND -D OUTPUT_VARIABLE CORENRN_COMPILE_DEF_FLAGS) # ============================================================================= -# Extra link flags that we need to include when linking libcoreneuron.{a,so} in -# CoreNEURON but that do not need to be passed to NEURON to use when linking -# nrniv/special (why?) +# Extra link flags that we need to include when linking libcoreneuron.{a,so} in CoreNEURON but that +# do not need to be passed to NEURON to use when linking nrniv/special (why?) # ============================================================================= string(JOIN " " CORENRN_COMMON_LDFLAGS ${CORENEURON_LIB_LINK_FLAGS} ${CORENRN_EXTRA_LINK_FLAGS}) if(CORENRN_SANITIZER_LIBRARY_DIR) diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index d2eed3d1d..8ba850456 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -103,9 +103,8 @@ endif() if(CORENRN_ENABLE_GPU AND CORENRN_ENABLE_SHARED) # Because of # https://forums.developer.nvidia.com/t/dynamically-loading-an-openacc-enabled-shared-library-from-an-executable-compiled-with-nvc-does-not-work/210968 - # we have to tell NEURON to pass OpenACC flags when linking special, otherwise - # we end up with an `nrniv` binary that cannot dynamically load CoreNEURON in - # shared-library builds + # we have to tell NEURON to pass OpenACC flags when linking special, otherwise we end up with an + # `nrniv` binary that cannot dynamically load CoreNEURON in shared-library builds set_property(GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS "${NVHPC_ACC_COMP_FLAGS}") endif() diff --git a/CMakeLists.txt b/CMakeLists.txt index ccf5f0aa0..00e74896e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -477,10 +477,9 @@ endif() # Add the main source directory add_subdirectory(coreneuron) -# Extract the various compiler option strings to use inside nrnivmodl-core. Sets -# the global property CORENEURON_LIB_LINK_FLAGS, which contains the arguments -# that must be added to the link line for `special` to link against -# `libcoreneuron.{a,so}` +# Extract the various compiler option strings to use inside nrnivmodl-core. Sets the global property +# CORENEURON_LIB_LINK_FLAGS, which contains the arguments that must be added to the link line for +# `special` to link against `libcoreneuron.{a,so}` include(MakefileBuildOptions) # Generate the nrnivmodl-core script and makefile using the options from MakefileBuildOptions diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index befb2f8dc..e1754cc66 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -286,13 +286,13 @@ file(GLOB modfiles "${modfile_directory}/*.mod") # symbols in the translated versions of default .mod files set(nrniv_core_prefix "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}") set(corenrn_mech_library - "${nrniv_core_prefix}/${CMAKE_${COMPILE_LIBRARY_TYPE}_LIBRARY_PREFIX}coreneuron${CMAKE_${COMPILE_LIBRARY_TYPE}_LIBRARY_SUFFIX}") + "${nrniv_core_prefix}/${CMAKE_${COMPILE_LIBRARY_TYPE}_LIBRARY_PREFIX}coreneuron${CMAKE_${COMPILE_LIBRARY_TYPE}_LIBRARY_SUFFIX}" +) set(output_binaries "${nrniv_core_prefix}/special-core" "${corenrn_mech_library}") add_custom_command( OUTPUT ${output_binaries} - DEPENDS coreneuron-core ${NMODL_TARGET_TO_DEPEND} ${modfiles} - ${CORENEURON_BUILTIN_MODFILES} + DEPENDS coreneuron-core ${NMODL_TARGET_TO_DEPEND} ${modfiles} ${CORENEURON_BUILTIN_MODFILES} COMMAND ${CMAKE_BINARY_DIR}/bin/nrnivmodl-core -b ${COMPILE_LIBRARY_TYPE} -m ${CORENRN_MOD2CPP_BINARY} -p 4 "${modfile_directory}" WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/bin From c504a872986a2563cd57623c79d096862dca03a6 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 19 Jul 2022 18:14:55 +0200 Subject: [PATCH 043/128] fix static linking --- extra/nrnivmodl_core_makefile.in | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index 24e630f92..6c7bd2cb2 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -229,8 +229,10 @@ coremech_lib_shared: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always # build static library of mechanisms coremech_lib_static: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always - rm -f ${COREMECH_LIB_PATH}; \ - ar cq ${COREMECH_LIB_PATH} $(ENGINEMECH_OBJ) $(ALL_OBJS); + # make a libcoreneuron.a by copying libcoreneuron-core.a and then appending + # the newly compiled objects + cp $(CORENRN_LIB_DIR)/libcoreneuron-core.a ${COREMECH_LIB_PATH} + ar r ${COREMECH_LIB_PATH} $(ENGINEMECH_OBJ) $(ALL_OBJS) # compile cpp files to .o $(MOD_OBJS_DIR)/%.o: $(MOD_TO_CPP_DIR)/%.cpp | $(MOD_OBJS_DIR) From 5d61ca26479058ea0e8d474e889349c888d49251 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 19 Jul 2022 18:23:40 +0200 Subject: [PATCH 044/128] linking fixups --- coreneuron/CMakeLists.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index e1754cc66..cae2d8f66 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -307,9 +307,16 @@ endif() # Create an extra target for internal use that unit tests and so on can depend on. # ${corenrn_mech_library} is libcoreneuron.{a,so}, which contains both the compiled default -# mechanisms and the content of libcoreneuron-core.a +# mechanisms and the content of libcoreneuron-core.a. Also copy the dependencies of +# libcoreneuron-core as interface dependencies of this new target (example: ${corenrn_mech_library} +# will probably depend on MPI, so when the unit tests link against ${corenrn_mech_library} they need +# to know about MPI too). add_library(coreneuron-all INTERFACE) target_link_libraries(coreneuron-all INTERFACE "${corenrn_mech_library}") +get_target_property(coreneuron_core_deps coreneuron-core LINK_LIBRARIES) +foreach(dep ${coreneuron_core_deps}) + target_link_libraries(coreneuron-all INTERFACE ${dep}) +endforeach() # Make headers avail to build tree configure_file(engine.h.in ${CMAKE_BINARY_DIR}/include/coreneuron/engine.h @ONLY) From c147425c52cb55951a4222d6a78031d41e855507 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 19 Jul 2022 18:27:51 +0200 Subject: [PATCH 045/128] minor tweaks --- coreneuron/CMakeLists.txt | 5 ++--- coreneuron/mpi/nrnmpi.h | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index cae2d8f66..da473fdac 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -301,8 +301,7 @@ add_custom_target(nrniv-core ALL DEPENDS ${output_binaries}) if(CORENRN_ENABLE_GPU) separate_arguments(CORENRN_ACC_FLAGS UNIX_COMMAND "${NVHPC_ACC_COMP_FLAGS}") - target_compile_options(coreneuron-core BEFORE - PRIVATE $<$:${CORENRN_ACC_FLAGS}>) + target_compile_options(coreneuron-core PRIVATE ${CORENRN_ACC_FLAGS}) endif() # Create an extra target for internal use that unit tests and so on can depend on. @@ -310,7 +309,7 @@ endif() # mechanisms and the content of libcoreneuron-core.a. Also copy the dependencies of # libcoreneuron-core as interface dependencies of this new target (example: ${corenrn_mech_library} # will probably depend on MPI, so when the unit tests link against ${corenrn_mech_library} they need -# to know about MPI too). +# to know to link against MPI too). add_library(coreneuron-all INTERFACE) target_link_libraries(coreneuron-all INTERFACE "${corenrn_mech_library}") get_target_property(coreneuron_core_deps coreneuron-core LINK_LIBRARIES) diff --git a/coreneuron/mpi/nrnmpi.h b/coreneuron/mpi/nrnmpi.h index 04df699ff..03a1d2461 100644 --- a/coreneuron/mpi/nrnmpi.h +++ b/coreneuron/mpi/nrnmpi.h @@ -81,7 +81,7 @@ struct mpi_function>: mpi_function_ba using mpi_function_base::mpi_function_base; template // in principle deducible from `function_ptr` auto operator()(Args&&... args) const { -#ifdef CORENRN_ENABLE_MPI_DYNAMIC +#ifdef CORENEURON_ENABLE_MPI_DYNAMIC // Dynamic MPI, m_fptr should have been initialised via dlsym. assert(m_fptr); return (*reinterpret_cast(m_fptr))(std::forward(args)...); From 6a7c824c2514e105b62f66a29fe9512481a2a643 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 19 Jul 2022 18:47:17 +0200 Subject: [PATCH 046/128] One more tweak --- coreneuron/CMakeLists.txt | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index da473fdac..24facf06a 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -306,16 +306,18 @@ endif() # Create an extra target for internal use that unit tests and so on can depend on. # ${corenrn_mech_library} is libcoreneuron.{a,so}, which contains both the compiled default -# mechanisms and the content of libcoreneuron-core.a. Also copy the dependencies of -# libcoreneuron-core as interface dependencies of this new target (example: ${corenrn_mech_library} -# will probably depend on MPI, so when the unit tests link against ${corenrn_mech_library} they need -# to know to link against MPI too). +# mechanisms and the content of libcoreneuron-core.a. add_library(coreneuron-all INTERFACE) target_link_libraries(coreneuron-all INTERFACE "${corenrn_mech_library}") +# Also copy the dependencies of libcoreneuron-core as interface dependencies of this new target +# (example: ${corenrn_mech_library} will probably depend on MPI, so when the unit tests link against +# ${corenrn_mech_library} they need to know to link against MPI too). get_target_property(coreneuron_core_deps coreneuron-core LINK_LIBRARIES) -foreach(dep ${coreneuron_core_deps}) - target_link_libraries(coreneuron-all INTERFACE ${dep}) -endforeach() +if(coreneuron_core_deps) + foreach(dep ${coreneuron_core_deps}) + target_link_libraries(coreneuron-all INTERFACE ${dep}) + endforeach() +endif() # Make headers avail to build tree configure_file(engine.h.in ${CMAKE_BINARY_DIR}/include/coreneuron/engine.h @ONLY) From fc2b5722113e26e31e6182b98807613f5148ece3 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 26 Jul 2022 12:24:26 +0200 Subject: [PATCH 047/128] ar: avoid --output --- extra/nrnivmodl_core_makefile.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index 6c7bd2cb2..47681fb5a 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -218,7 +218,8 @@ coremech_lib_shared: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always # extract the object files from libcoreneuron-core.a mkdir -p $(MOD_OBJS_DIR)/libcoreneuron-core rm -f $(MOD_OBJS_DIR)/libcoreneuron-core/*.o - ar --output=$(MOD_OBJS_DIR)/libcoreneuron-core x $(CORENRN_LIB_DIR)/libcoreneuron-core.a + # --output is only supported by modern versions of ar + (cd $(MOD_OBJS_DIR)/libcoreneuron-core && ar x $(CORENRN_LIB_DIR)/libcoreneuron-core.a) $(CXX_SHARED_LIB_CMD) $(ENGINEMECH_OBJ) -o ${COREMECH_LIB_PATH} $(ALL_OBJS) \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ $(LDFLAGS) ${SONAME_OPTION} -Wl,--start-group \ From 0e9f713b8ae5f764a21a748ba4fdb8029a23f2b1 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 26 Jul 2022 13:52:08 +0200 Subject: [PATCH 048/128] Try and fix macOS linking. --- CMake/MakefileBuildOptions.cmake | 9 +++++++++ extra/nrnivmodl_core_makefile.in | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/CMake/MakefileBuildOptions.cmake b/CMake/MakefileBuildOptions.cmake index 29edf2bd1..b99442018 100644 --- a/CMake/MakefileBuildOptions.cmake +++ b/CMake/MakefileBuildOptions.cmake @@ -70,6 +70,15 @@ coreneuron_process_target(coreneuron-core) get_property(CORENEURON_LIB_LINK_FLAGS GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS) message(STATUS "CORENEURON_LIB_LINK_FLAGS=${CORENEURON_LIB_LINK_FLAGS}") +# Detect if --start-group and --end-group are valid linker arguments. These are typically needed +# when linking mutually-dependent .o files (or where we don't know the correct order) on Linux, but +# they are not needed *or* recognised by the macOS linker. +include(CheckLinkerFlag) # requires CMake 3.18 +check_linker_flag(CXX -Wl,--start-group CORENRN_CXX_LINKER_SUPPORTS_START_GROUP) +if(CORENRN_CXX_LINKER_SUPPORTS_START_GROUP) + set(CORENEURON_LINKER_START_GROUP -Wl,--start-group) + set(CORENEURON_LINKER_END_GROUP -Wl,--end-group) +endif() # Things that used to be in CORENEURON_LIB_LINK_FLAGS: -rdynamic -lrt -Wl,--whole-archive # -L${CMAKE_HOST_SYSTEM_PROCESSOR} -Wl,--no-whole-archive -L${caliper_LIB_DIR} -l${CALIPER_LIB} diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index 47681fb5a..2c732a288 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -222,9 +222,9 @@ coremech_lib_shared: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always (cd $(MOD_OBJS_DIR)/libcoreneuron-core && ar x $(CORENRN_LIB_DIR)/libcoreneuron-core.a) $(CXX_SHARED_LIB_CMD) $(ENGINEMECH_OBJ) -o ${COREMECH_LIB_PATH} $(ALL_OBJS) \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ - $(LDFLAGS) ${SONAME_OPTION} -Wl,--start-group \ - $(MOD_OBJS_DIR)/libcoreneuron-core/*.o \ - -Wl,--end-group -Wl,-rpath,$(CORENRN_LIB_DIR) -L$(CORENRN_LIB_DIR) + $(LDFLAGS) ${SONAME_OPTION} @CORENEURON_LINKER_START_GROUP@ \ + $(MOD_OBJS_DIR)/libcoreneuron-core/*.o @CORENEURON_LINKER_END_GROUP@ \ + -Wl,-rpath,$(CORENRN_LIB_DIR) -L$(CORENRN_LIB_DIR) # cleanup rm $(MOD_OBJS_DIR)/libcoreneuron-core/*.o From 3dc6884b5ee016a2d54ab22f099fa6c3cc895155 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 26 Jul 2022 14:00:01 +0200 Subject: [PATCH 049/128] Try and fix Caliper in GitLab CI. --- .gitlab-ci.yml | 49 ++++++++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6a9abb44b..c299a836e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -81,68 +81,63 @@ build:nmodl: SPACK_PACKAGE_COMPILER: gcc # Build CoreNEURON -build:coreneuron:mod2c:nvhpc:acc: - extends: [.build, .spack_nvhpc] +.build_coreneuron: + extends: [.build] variables: SPACK_PACKAGE: coreneuron + # NEURON depends on py-mpi4py, most of whose dependencies are pulled in by + # nmodl%gcc, with the exception of MPI, which is pulled in by + # coreneuron%{nvhpc,intel}. hpe-mpi is an external package anyway, so + # setting its compiler is just changing how it is labelled in the + # dependency graph and not changing which installation is used, but this + # means that in the NEURON step an existing py-mpi4py%gcc can be used. + # Otherwise a new py-mpi4py with hpe-mpi%{nvhpc,intel} will be built. + # caliper: papi%nvhpc does not build; use the caliper from the deployment + # TODO: fix this more robustly so we don't have to play so many games. + SPACK_PACKAGE_DEPENDENCIES: ^hpe-mpi%gcc ^caliper%gcc+cuda cuda_arch=70 + +build:coreneuron:mod2c:nvhpc:acc: + extends: [.build_coreneuron, .spack_nvhpc] + variables: # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type SPACK_PACKAGE_SPEC: +caliper+gpu+openmp~shared+tests~legacy-unit build_type=RelWithDebInfo build:coreneuron:mod2c:nvhpc:acc:shared: - extends: [.build, .spack_nvhpc] + extends: [.build_coreneuron, .spack_nvhpc] variables: - SPACK_PACKAGE: coreneuron # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type SPACK_PACKAGE_SPEC: +caliper+gpu+openmp+shared+tests~legacy-unit build_type=RelWithDebInfo # Build CoreNEURON with Unified Memory on GPU build:coreneuron:mod2c:nvhpc:acc:unified: - extends: [.build, .spack_nvhpc] + extends: [.build_coreneuron, .spack_nvhpc] variables: - SPACK_PACKAGE: coreneuron # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type SPACK_PACKAGE_SPEC: ~caliper+gpu+unified+openmp~shared+tests~legacy-unit build_type=RelWithDebInfo -.build_coreneuron_nmodl: - extends: [.build] - variables: - # NEURON depends on py-mpi4py, most of whose dependencies are pulled in by - # nmodl%gcc, with the exception of MPI, which is pulled in by - # coreneuron%{nvhpc,intel}. hpe-mpi is an external package anyway, so - # setting its compiler is just changing how it is labelled in the - # dependency graph and not changing which installation is used, but this - # means that in the NEURON step an existing py-mpi4py%gcc can be used. - # Otherwise a new py-mpi4py with hpe-mpi%{nvhpc,intel} will be built. - # TODO: fix this more robustly so we don't have to play so many games. - SPACK_PACKAGE_DEPENDENCIES: ^hpe-mpi%gcc - build:coreneuron:nmodl:nvhpc:omp: - extends: [.build_coreneuron_nmodl, .spack_nvhpc] + extends: [.build_coreneuron, .spack_nvhpc] variables: - SPACK_PACKAGE: coreneuron # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type SPACK_PACKAGE_SPEC: +caliper+nmodl+openmp+gpu~shared+tests~legacy-unit~sympy build_type=RelWithDebInfo needs: ["build:nmodl"] build:coreneuron:nmodl:nvhpc:acc: - extends: [.build_coreneuron_nmodl, .spack_nvhpc] + extends: [.build_coreneuron, .spack_nvhpc] variables: - SPACK_PACKAGE: coreneuron # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type # Sympy + OpenMP target offload does not currently work with NVHPC SPACK_PACKAGE_SPEC: ~caliper+nmodl~openmp+gpu~shared+tests~legacy-unit+sympy build_type=RelWithDebInfo needs: ["build:nmodl"] build:coreneuron:mod2c:intel: - extends: [.build, .spack_intel] + extends: [.build_coreneuron, .spack_intel] variables: - SPACK_PACKAGE: coreneuron SPACK_PACKAGE_SPEC: +caliper+tests~legacy-unit build_type=Debug build:coreneuron:nmodl:intel: - extends: [.build_coreneuron_nmodl, .spack_intel] + extends: [.build_coreneuron, .spack_intel] variables: - SPACK_PACKAGE: coreneuron SPACK_PACKAGE_SPEC: ~caliper+nmodl+tests~legacy-unit build_type=Debug needs: ["build:nmodl"] From d41e82bf1bc849b3830a5fe04fa3d8bbe45489cb Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 26 Jul 2022 14:08:19 +0200 Subject: [PATCH 050/128] simplify: +caliper in all CoreNEURON builds --- .gitlab-ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c299a836e..a7e0f39bc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -113,7 +113,7 @@ build:coreneuron:mod2c:nvhpc:acc:unified: extends: [.build_coreneuron, .spack_nvhpc] variables: # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type - SPACK_PACKAGE_SPEC: ~caliper+gpu+unified+openmp~shared+tests~legacy-unit build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +caliper+gpu+unified+openmp~shared+tests~legacy-unit build_type=RelWithDebInfo build:coreneuron:nmodl:nvhpc:omp: extends: [.build_coreneuron, .spack_nvhpc] @@ -127,7 +127,7 @@ build:coreneuron:nmodl:nvhpc:acc: variables: # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type # Sympy + OpenMP target offload does not currently work with NVHPC - SPACK_PACKAGE_SPEC: ~caliper+nmodl~openmp+gpu~shared+tests~legacy-unit+sympy build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu~shared+tests~legacy-unit+sympy build_type=RelWithDebInfo needs: ["build:nmodl"] build:coreneuron:mod2c:intel: @@ -138,7 +138,7 @@ build:coreneuron:mod2c:intel: build:coreneuron:nmodl:intel: extends: [.build_coreneuron, .spack_intel] variables: - SPACK_PACKAGE_SPEC: ~caliper+nmodl+tests~legacy-unit build_type=Debug + SPACK_PACKAGE_SPEC: +caliper+nmodl+tests~legacy-unit build_type=Debug needs: ["build:nmodl"] # Build NEURON From acd5a7e84d23eac71ecc82ff41f2cf4984b33d42 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 26 Jul 2022 15:16:09 +0200 Subject: [PATCH 051/128] Disable present table when unified memory enabled. --- coreneuron/utils/offload.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index 1f068c4d7..f37724bb4 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -26,7 +26,7 @@ namespace coreneuron { #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ - defined(_OPENACC) + defined(_OPENACC) && !defined(CORENEURON_UNIFIED_MEMORY) // Homegrown implementation for buggy NVHPC versions (<=22.3?) #define CORENEURON_ENABLE_PRESENT_TABLE void* cnrn_target_deviceptr_impl(void const* h_ptr); From 2a07e146ad159f45074d8288207d2c6a9ff5b4e2 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 26 Jul 2022 15:17:36 +0200 Subject: [PATCH 052/128] Shuffle Random123 + GPU yet again for 22.3 --- coreneuron/utils/randoms/nrnran123.cpp | 28 ++++---------------------- 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp index f2dd2dee2..8a2f07866 100644 --- a/coreneuron/utils/randoms/nrnran123.cpp +++ b/coreneuron/utils/randoms/nrnran123.cpp @@ -87,24 +87,15 @@ using random123_allocator = coreneuron::unified_allocator Date: Tue, 26 Jul 2022 16:31:54 +0200 Subject: [PATCH 053/128] Some CORENEURON_ -> CORENRN_ for consistency. Export OpenACC flags to NEURON separately as well as as part of of the whole ... -lcoreneuron ... link line. --- CMake/MakefileBuildOptions.cmake | 15 +++++++-------- CMake/OpenAccHelper.cmake | 17 +++++++++-------- CMake/coreneuron-config.cmake.in | 3 ++- CMakeLists.txt | 4 ++-- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/CMake/MakefileBuildOptions.cmake b/CMake/MakefileBuildOptions.cmake index b99442018..2a5b6a7d9 100644 --- a/CMake/MakefileBuildOptions.cmake +++ b/CMake/MakefileBuildOptions.cmake @@ -41,7 +41,7 @@ function(coreneuron_process_target target) # This is a special case: libcoreneuron-core.a is manually unpacked into .o files by the # nrnivmodl-core Makefile, so we do not want to also emit an -lcoreneuron-core argument. TODO: # probably need to extract an -L and RPATH path and include that here? - set_property(GLOBAL APPEND_STRING PROPERTY CORENEURON_LIB_LINK_FLAGS " -l${target}") + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -l${target}") endif() get_target_property(target_libraries ${target} LINK_LIBRARIES) if(target_libraries) @@ -55,20 +55,19 @@ function(coreneuron_process_target target) message(STATUS "target=${target} target_dir=${target_dir}") if(NOT target_dir) # In case target is not a target but is just the name of a library, e.g. "dl" - set_property(GLOBAL APPEND_STRING PROPERTY CORENEURON_LIB_LINK_FLAGS " -l${target}") + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -l${target}") elseif("${target_dir}" MATCHES "^(/lib|/lib64|/usr/lib|/usr/lib64)$") # e.g. /usr/lib64/libpthread.so -> -lpthread get_filename_component(libname ${target} NAME_WE) string(REGEX REPLACE "^lib" "" libname ${libname}) - set_property(GLOBAL APPEND_STRING PROPERTY CORENEURON_LIB_LINK_FLAGS " -l${libname}") + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -l${libname}") else() # It's a full path, include that on the line - set_property(GLOBAL APPEND_STRING PROPERTY CORENEURON_LIB_LINK_FLAGS " ${target}") + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " ${target}") endif() endfunction() coreneuron_process_target(coreneuron-core) -get_property(CORENEURON_LIB_LINK_FLAGS GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS) -message(STATUS "CORENEURON_LIB_LINK_FLAGS=${CORENEURON_LIB_LINK_FLAGS}") +get_property(CORENRN_LIB_LINK_FLAGS GLOBAL PROPERTY CORENRN_LIB_LINK_FLAGS) # Detect if --start-group and --end-group are valid linker arguments. These are typically needed # when linking mutually-dependent .o files (or where we don't know the correct order) on Linux, but @@ -79,7 +78,7 @@ if(CORENRN_CXX_LINKER_SUPPORTS_START_GROUP) set(CORENEURON_LINKER_START_GROUP -Wl,--start-group) set(CORENEURON_LINKER_END_GROUP -Wl,--end-group) endif() -# Things that used to be in CORENEURON_LIB_LINK_FLAGS: -rdynamic -lrt -Wl,--whole-archive +# Things that used to be in CORENRN_LIB_LINK_FLAGS: -rdynamic -lrt -Wl,--whole-archive # -L${CMAKE_HOST_SYSTEM_PROCESSOR} -Wl,--no-whole-archive -L${caliper_LIB_DIR} -l${CALIPER_LIB} # ============================================================================= @@ -91,7 +90,7 @@ list(TRANSFORM CORENRN_COMPILE_DEFS PREPEND -D OUTPUT_VARIABLE CORENRN_COMPILE_D # Extra link flags that we need to include when linking libcoreneuron.{a,so} in CoreNEURON but that # do not need to be passed to NEURON to use when linking nrniv/special (why?) # ============================================================================= -string(JOIN " " CORENRN_COMMON_LDFLAGS ${CORENEURON_LIB_LINK_FLAGS} ${CORENRN_EXTRA_LINK_FLAGS}) +string(JOIN " " CORENRN_COMMON_LDFLAGS ${CORENRN_LIB_LINK_FLAGS} ${CORENRN_EXTRA_LINK_FLAGS}) if(CORENRN_SANITIZER_LIBRARY_DIR) string(APPEND CORENRN_COMMON_LDFLAGS " -Wl,-rpath,${CORENRN_SANITIZER_LIBRARY_DIR}") endif() diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index 8ba850456..f50f1436a 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -66,12 +66,8 @@ if(CORENRN_ENABLE_GPU) # linking. Without this, we had problems with linking between the explicit CUDA (.cu) device code # and offloaded OpenACC/OpenMP code. Using -cuda when compiling seems to improve error messages in # some cases, and to be recommended by NVIDIA. We pass -gpu=cudaX.Y to ensure that OpenACC/OpenMP - # code is compiled with the same CUDA version as the explicit CUDA code. TODO nordc option is - # added based on the recommendation from: - # https://forums.developer.nvidia.com/t/separate-compilation-of-mixed-cuda-openacc-code/192701 but - # as discussed in https://github.com/BlueBrain/CoreNeuron/issues/141#issuecomment-1086742194 this - # is still not completely solving underlying link issue. - set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo,rdc") + # code is compiled with the same CUDA version as the explicit CUDA code. + set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo") # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the # same default compute capabilities as each other, particularly on GPU-less build machines. @@ -104,8 +100,13 @@ if(CORENRN_ENABLE_GPU AND CORENRN_ENABLE_SHARED) # Because of # https://forums.developer.nvidia.com/t/dynamically-loading-an-openacc-enabled-shared-library-from-an-executable-compiled-with-nvc-does-not-work/210968 # we have to tell NEURON to pass OpenACC flags when linking special, otherwise we end up with an - # `nrniv` binary that cannot dynamically load CoreNEURON in shared-library builds - set_property(GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS "${NVHPC_ACC_COMP_FLAGS}") + # `nrniv` binary that cannot dynamically load CoreNEURON in shared-library builds. + # CORENRN_LIB_LINK_FLAGS is the full set of flags needed to link against libcoreneuron.so: + # something like `-acc -lcoreneuron ...`. CORENRN_NEURON_LINK_FLAGS only contains flags that need + # to be used when linking the NEURON Python module to make sure it is able to dynamically load + # libcoreneuron.so. + set_property(GLOBAL PROPERTY CORENRN_LIB_LINK_FLAGS "${NVHPC_ACC_COMP_FLAGS}") + set_property(GLOBAL PROPERTY CORENRN_NEURON_LINK_FLAGS "${NVHPC_ACC_COMP_FLAGS}") endif() if(CORENRN_HAVE_NVHPC_COMPILER) diff --git a/CMake/coreneuron-config.cmake.in b/CMake/coreneuron-config.cmake.in index c5f8573d0..4fe3988c3 100644 --- a/CMake/coreneuron-config.cmake.in +++ b/CMake/coreneuron-config.cmake.in @@ -15,7 +15,8 @@ set(CORENRN_ENABLE_GPU @CORENRN_ENABLE_GPU@) set(CORENRN_ENABLE_NMODL @CORENRN_ENABLE_NMODL@) set(CORENRN_ENABLE_REPORTING @CORENRN_ENABLE_REPORTING@) set(CORENRN_ENABLE_SHARED @CORENRN_ENABLE_SHARED@) -set(CORENEURON_LIB_LINK_FLAGS "@CORENEURON_LIB_LINK_FLAGS@") +set(CORENRN_LIB_LINK_FLAGS "@CORENRN_LIB_LINK_FLAGS@") +set(CORENRN_NEURON_LINK_FLAGS "@CORENRN_NEURON_LINK_FLAGS@") find_path(CORENEURON_INCLUDE_DIR "coreneuron/coreneuron.h" HINTS "${CONFIG_PATH}/../../include") find_path( diff --git a/CMakeLists.txt b/CMakeLists.txt index 00e74896e..f3b1e7ee9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -478,7 +478,7 @@ endif() add_subdirectory(coreneuron) # Extract the various compiler option strings to use inside nrnivmodl-core. Sets the global property -# CORENEURON_LIB_LINK_FLAGS, which contains the arguments that must be added to the link line for +# CORENRN_LIB_LINK_FLAGS, which contains the arguments that must be added to the link line for # `special` to link against `libcoreneuron.{a,so}` include(MakefileBuildOptions) @@ -492,7 +492,7 @@ endif() # ============================================================================= # Install cmake modules # ============================================================================= -get_property(CORENEURON_LIB_LINK_FLAGS GLOBAL PROPERTY CORENEURON_LIB_LINK_FLAGS) +get_property(CORENRN_NEURON_LINK_FLAGS GLOBAL PROPERTY CORENRN_NEURON_LINK_FLAGS) configure_file(CMake/coreneuron-config.cmake.in CMake/coreneuron-config.cmake @ONLY) install(FILES "${CMAKE_CURRENT_BINARY_DIR}/CMake/coreneuron-config.cmake" DESTINATION share/cmake) install(EXPORT coreneuron DESTINATION share/cmake) From bd10048d8c4ff89fe61e8b7372a23c43d359d002 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 26 Jul 2022 17:52:17 +0200 Subject: [PATCH 054/128] cmake fixups --- CMake/MakefileBuildOptions.cmake | 57 ++++++++++++++++++++++---------- coreneuron/CMakeLists.txt | 1 + tests/integration/CMakeLists.txt | 2 +- 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/CMake/MakefileBuildOptions.cmake b/CMake/MakefileBuildOptions.cmake index 2a5b6a7d9..7fec40860 100644 --- a/CMake/MakefileBuildOptions.cmake +++ b/CMake/MakefileBuildOptions.cmake @@ -35,13 +35,45 @@ set(NMODL_ACC_BACKEND_ARGS "host --c acc --oacc") # ============================================================================= # Essentially we "just" want to unpack the CMake dependencies of the `coreneuron-core` target into a # plain string that we can bake into the Makefiles in both NEURON and CoreNEURON. +function(coreneuron_process_library_path library) + get_filename_component(library_dir "${library}" DIRECTORY) + if(NOT library_dir) + # In case target is not a target but is just the name of a library, e.g. "dl" + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -l${library}") + elseif("${library_dir}" MATCHES "^(/lib|/lib64|/usr/lib|/usr/lib64)$") + # e.g. /usr/lib64/libpthread.so -> -lpthread + get_filename_component(libname ${library} NAME_WE) + string(REGEX REPLACE "^lib" "" libname ${libname}) + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -l${libname}") + else() + # It's a full path, include that on the line + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " ${library}") + endif() +endfunction() function(coreneuron_process_target target) if(TARGET ${target}) if(NOT target STREQUAL "coreneuron-core") # This is a special case: libcoreneuron-core.a is manually unpacked into .o files by the - # nrnivmodl-core Makefile, so we do not want to also emit an -lcoreneuron-core argument. TODO: - # probably need to extract an -L and RPATH path and include that here? - set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -l${target}") + # nrnivmodl-core Makefile, so we do not want to also emit an -lcoreneuron-core argument. + get_target_property(target_inc_dirs ${target} INTERFACE_INCLUDE_DIRECTORIES) + if(target_inc_dirs) + foreach(inc_dir_genex ${target_inc_dirs}) + string(GENEX_STRIP "${inc_dir_genex}" inc_dir) + if(inc_dir) + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_EXTRA_COMPILE_FLAGS " -I${inc_dir}") + endif() + endforeach() + endif() + get_target_property(target_imported ${target} IMPORTED) + if(target_imported) + # In this case we can extract the full path to the library + get_target_property(target_location ${target} LOCATION) + coreneuron_process_library_path(${target_location}) + else() + # This is probably another of our libraries, like -lcoreneuron-cuda. We might need to add -L + # and an RPATH later. + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -l${target}") + endif() endif() get_target_property(target_libraries ${target} LINK_LIBRARIES) if(target_libraries) @@ -51,22 +83,10 @@ function(coreneuron_process_target target) endif() return() endif() - get_filename_component(target_dir "${target}" DIRECTORY) - message(STATUS "target=${target} target_dir=${target_dir}") - if(NOT target_dir) - # In case target is not a target but is just the name of a library, e.g. "dl" - set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -l${target}") - elseif("${target_dir}" MATCHES "^(/lib|/lib64|/usr/lib|/usr/lib64)$") - # e.g. /usr/lib64/libpthread.so -> -lpthread - get_filename_component(libname ${target} NAME_WE) - string(REGEX REPLACE "^lib" "" libname ${libname}) - set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -l${libname}") - else() - # It's a full path, include that on the line - set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " ${target}") - endif() + coreneuron_process_library_path("${target}") endfunction() coreneuron_process_target(coreneuron-core) +get_property(CORENRN_EXTRA_COMPILE_FLAGS GLOBAL PROPERTY CORENRN_EXTRA_COMPILE_FLAGS) get_property(CORENRN_LIB_LINK_FLAGS GLOBAL PROPERTY CORENRN_LIB_LINK_FLAGS) # Detect if --start-group and --end-group are valid linker arguments. These are typically needed @@ -110,7 +130,8 @@ string( ${NVHPC_ACC_COMP_FLAGS} ${NVHPC_CXX_INLINE_FLAGS} ${CORENRN_COMPILE_DEF_FLAGS} - ${CORENRN_EXTRA_MECH_CXX_FLAGS}) + ${CORENRN_EXTRA_MECH_CXX_FLAGS} + ${CORENRN_EXTRA_COMPILE_FLAGS}) # ============================================================================= # nmodl/mod2c related options : TODO diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 24facf06a..168fdf708 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -216,6 +216,7 @@ if(CORENRN_ENABLE_MPI AND CORENRN_ENABLE_MPI_DYNAMIC) list(GET NRN_MPI_LIBNAME_LIST ${val} libname) add_library(core${libname}_lib SHARED ${MPI_LIB_FILES}) + target_link_libraries(core${libname}_lib ${CORENRN_CALIPER_LIB}) target_include_directories( core${libname}_lib PUBLIC ${include} diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index 75ae106e1..17b57084b 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -12,7 +12,7 @@ if(CORENRN_ENABLE_MPI_DYNAMIC) # building single generic mpi library libcorenrn_mpi. # ~~~ if(CORENEURON_AS_SUBPROJECT) - message(INFO "CoreNEURON integration tests are disabled with dynamic MPI") + message(STATUS "CoreNEURON integration tests are disabled with dynamic MPI") return() else() set(CORENRN_MPI_LIB_ARG From b0b74517c4289918c5270b29c107a2ada95ce651 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 26 Jul 2022 18:04:12 +0200 Subject: [PATCH 055/128] Explicit random123 global state data transfer. --- coreneuron/apps/main1.cpp | 2 ++ coreneuron/utils/randoms/nrnran123.cpp | 14 ++++++++++++++ coreneuron/utils/randoms/nrnran123.h | 2 ++ 3 files changed, 18 insertions(+) diff --git a/coreneuron/apps/main1.cpp b/coreneuron/apps/main1.cpp index b019748fd..4408234b6 100644 --- a/coreneuron/apps/main1.cpp +++ b/coreneuron/apps/main1.cpp @@ -506,6 +506,7 @@ extern "C" void mk_mech_init(int argc, char** argv) { #ifdef CORENEURON_ENABLE_GPU if (corenrn_param.gpu) { init_gpu(); + nrnran123_initialise_global_state_on_device(); } #endif @@ -683,6 +684,7 @@ extern "C" int run_solve_core(int argc, char** argv) { if (nrn_have_gaps) { nrn_partrans::delete_gap_indices_from_device(); } + nrnran123_destroy_global_state_on_device(); } // Cleaning the memory diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp index 8a2f07866..5a92ae8ee 100644 --- a/coreneuron/utils/randoms/nrnran123.cpp +++ b/coreneuron/utils/randoms/nrnran123.cpp @@ -135,6 +135,20 @@ void nrnran123_set_globalindex(uint32_t gix) { } } +void nrnran123_initialise_global_state_on_device() { + if (coreneuron::gpu_enabled()) { + auto& g_k = global_state(); + nrn_pragma_acc(enter data copyin(g_k)) + } +} + +void nrnran123_destroy_global_state_on_device() { + if (coreneuron::gpu_enabled()) { + auto& g_k = global_state(); + nrn_pragma_acc(exit data delete (g_k)) + } +} + /** @brief Allocate a new Random123 stream. * @todo It would be nicer if the API return type was * std::unique_ptr, so we could use a diff --git a/coreneuron/utils/randoms/nrnran123.h b/coreneuron/utils/randoms/nrnran123.h index 12484d3d4..d4108612d 100644 --- a/coreneuron/utils/randoms/nrnran123.h +++ b/coreneuron/utils/randoms/nrnran123.h @@ -69,6 +69,8 @@ philox4x32_ctr_t coreneuron_random123_philox4x32_helper(coreneuron::nrnran123_St nrn_pragma_omp(end declare target) namespace coreneuron { +void nrnran123_initialise_global_state_on_device(); +void nrnran123_destroy_global_state_on_device(); /* global index. eg. run number */ /* all generator instances share this global index */ From 179af80466457b2722c7c727a8aae2445c844506 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 26 Jul 2022 18:17:41 +0200 Subject: [PATCH 056/128] random123 global state v59 --- coreneuron/utils/randoms/nrnran123.cpp | 28 ++++++++++++++------------ 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp index 5a92ae8ee..fadfa7b5b 100644 --- a/coreneuron/utils/randoms/nrnran123.cpp +++ b/coreneuron/utils/randoms/nrnran123.cpp @@ -25,17 +25,6 @@ #include #include -// Defining these attributes seems to help nvc++ in OpenMP target offload mode. -#if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ - defined(_OPENMP) && defined(__CUDACC__) -#define CORENRN_HOST_DEVICE __host__ __device__ -#elif defined(__CUDACC__) -// This is necessary to make the new CUDA-syntax-in-.cpp version compile -#define CORENRN_HOST_DEVICE __host__ __device__ -#else -#define CORENRN_HOST_DEVICE -#endif - namespace { #ifdef CORENEURON_USE_BOOST_POOL /** Tag type for use with boost::fast_pool_allocator that forwards to @@ -129,23 +118,36 @@ void nrnran123_set_globalindex(uint32_t gix) { if (g_k.v[0] != gix) { g_k.v[0] = gix; if (coreneuron::gpu_enabled()) { +#ifdef __CUDACC__ + { + auto const code = cudaMemcpyToSymbol(g_k, &g_k, sizeof(g_k)); + assert(code == cudaSuccess); + } + { + auto const code = cudaDeviceSynchronize(); + assert(code == cudaSuccess); + } +#else nrn_pragma_acc(update device(g_k)) nrn_pragma_omp(target update to(g_k)) +#endif } } } void nrnran123_initialise_global_state_on_device() { if (coreneuron::gpu_enabled()) { - auto& g_k = global_state(); +#ifndef __CUDACC__ nrn_pragma_acc(enter data copyin(g_k)) +#endif } } void nrnran123_destroy_global_state_on_device() { if (coreneuron::gpu_enabled()) { - auto& g_k = global_state(); +#ifndef __CUDACC__ nrn_pragma_acc(exit data delete (g_k)) +#endif } } From fb8018e41e28eea3653fbcaf1e123ca51c5e3f3b Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 27 Jul 2022 10:38:44 +0200 Subject: [PATCH 057/128] random123 global state v73 --- coreneuron/utils/randoms/nrnran123.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp index fadfa7b5b..f2bfed11a 100644 --- a/coreneuron/utils/randoms/nrnran123.cpp +++ b/coreneuron/utils/randoms/nrnran123.cpp @@ -82,7 +82,10 @@ std::size_t g_instance_count{}; #define g_k_qualifiers #endif g_k_qualifiers philox4x32_key_t g_k{}; -philox4x32_key_t& global_state() { +// Cannot refer to g_k directly from a nrn_pragma_acc(routine seq) method like +// coreneuron_random123_philox4x32_helper, and cannot have this inlined there at +// higher optimisation levels +__attribute__((noinline)) philox4x32_key_t& global_state() { return g_k; } } // namespace From 66a39385a438933b6135c95d809bfcd0279a6b55 Mon Sep 17 00:00:00 2001 From: pramodk Date: Wed, 27 Jul 2022 12:05:08 +0200 Subject: [PATCH 058/128] fix prototype for _check_table_thread --- coreneuron/mechanism/membfunc.hpp | 2 +- coreneuron/mechanism/register_mech.cpp | 2 +- coreneuron/mechanism/register_mech.hpp | 2 +- coreneuron/sim/multicore.cpp | 2 +- external/mod2c | 2 +- external/nmodl | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/coreneuron/mechanism/membfunc.hpp b/coreneuron/mechanism/membfunc.hpp index 8fe04a06c..87927780c 100644 --- a/coreneuron/mechanism/membfunc.hpp +++ b/coreneuron/mechanism/membfunc.hpp @@ -40,7 +40,7 @@ struct Memb_func { int thread_size_; /* how many Datum needed in Memb_list if vectorized */ void (*thread_mem_init_)(ThreadDatum*); /* after Memb_list._thread is allocated */ void (*thread_cleanup_)(ThreadDatum*); /* before Memb_list._thread is freed */ - void (*thread_table_check_)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, int); + void (*thread_table_check_)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, Memb_list*, int); int is_point; void (*setdata_)(double*, Datum*); int* dparam_semantics; /* for nrncore writing. */ diff --git a/coreneuron/mechanism/register_mech.cpp b/coreneuron/mechanism/register_mech.cpp index 01b82814c..20fbcd424 100644 --- a/coreneuron/mechanism/register_mech.cpp +++ b/coreneuron/mechanism/register_mech.cpp @@ -418,7 +418,7 @@ void _nrn_thread_reg1(int i, void (*f)(ThreadDatum*)) { } void _nrn_thread_table_reg(int i, - void (*f)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, int)) { + void (*f)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, Memb_list*, int)) { if (i == -1) return; diff --git a/coreneuron/mechanism/register_mech.hpp b/coreneuron/mechanism/register_mech.hpp index df80d958a..07fa1ca5c 100644 --- a/coreneuron/mechanism/register_mech.hpp +++ b/coreneuron/mechanism/register_mech.hpp @@ -18,7 +18,7 @@ extern void hoc_reg_bbcore_read(int type, bbcore_read_t f); extern void hoc_reg_bbcore_write(int type, bbcore_write_t f); extern void _nrn_thread_table_reg( int i, - void (*f)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, int)); + void (*f)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, Memb_list*, int)); extern void alloc_mech(int); } // namespace coreneuron diff --git a/coreneuron/sim/multicore.cpp b/coreneuron/sim/multicore.cpp index d5368a29c..cf8daaac8 100644 --- a/coreneuron/sim/multicore.cpp +++ b/coreneuron/sim/multicore.cpp @@ -166,7 +166,7 @@ void nrn_thread_table_check() { auto tml = static_cast(table_check_[i + 1]._pvoid); Memb_list* ml = tml->ml; (*corenrn.get_memb_func(tml->index).thread_table_check_)( - 0, ml->_nodecount_padded, ml->data, ml->pdata, ml->_thread, &nt, tml->index); + 0, ml->_nodecount_padded, ml->data, ml->pdata, ml->_thread, &nt, ml, tml->index); } } } // namespace coreneuron diff --git a/external/mod2c b/external/mod2c index 9d21b18a0..d8507dec1 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit 9d21b18a0036810f3ced1a8b16428754b87c8e87 +Subproject commit d8507dec1671b43b012e4e1ab36160e6da21aabb diff --git a/external/nmodl b/external/nmodl index 7000ff612..b99496a91 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 7000ff612208ed8b27837438731903c58d1786e3 +Subproject commit b99496a919df98a89cd97cb6898dda49f0d17c56 From ad634e909a7d05057a410f9f560bd289974e3754 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 27 Jul 2022 17:48:40 +0200 Subject: [PATCH 059/128] add more GPU builds --- .gitlab-ci.yml | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a7e0f39bc..eb68083a1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -99,35 +99,40 @@ build:nmodl: build:coreneuron:mod2c:nvhpc:acc: extends: [.build_coreneuron, .spack_nvhpc] variables: - # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type SPACK_PACKAGE_SPEC: +caliper+gpu+openmp~shared+tests~legacy-unit build_type=RelWithDebInfo +build:coreneuron:mod2c:nvhpc:acc:debug: + extends: [.build_coreneuron, .spack_nvhpc] + variables: + SPACK_PACKAGE_SPEC: +caliper+gpu+openmp~shared+tests~legacy-unit build_type=Debug + build:coreneuron:mod2c:nvhpc:acc:shared: extends: [.build_coreneuron, .spack_nvhpc] variables: - # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type SPACK_PACKAGE_SPEC: +caliper+gpu+openmp+shared+tests~legacy-unit build_type=RelWithDebInfo +build:coreneuron:mod2c:nvhpc:acc:shared:debug: + extends: [.build_coreneuron, .spack_nvhpc] + variables: + SPACK_PACKAGE_SPEC: +caliper+gpu+openmp+shared+tests~legacy-unit build_type=Debug + # Build CoreNEURON with Unified Memory on GPU build:coreneuron:mod2c:nvhpc:acc:unified: extends: [.build_coreneuron, .spack_nvhpc] variables: - # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type - SPACK_PACKAGE_SPEC: +caliper+gpu+unified+openmp~shared+tests~legacy-unit build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +caliper+gpu+unified+openmp~shared+tests~legacy-unit build_type=Debug -build:coreneuron:nmodl:nvhpc:omp: +build:coreneuron:nmodl:nvhpc:acc: extends: [.build_coreneuron, .spack_nvhpc] variables: - # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type - SPACK_PACKAGE_SPEC: +caliper+nmodl+openmp+gpu~shared+tests~legacy-unit~sympy build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu~shared+tests~legacy-unit+sympy build_type=RelWithDebInfo needs: ["build:nmodl"] -build:coreneuron:nmodl:nvhpc:acc: +build:coreneuron:nmodl:nvhpc:omp: extends: [.build_coreneuron, .spack_nvhpc] variables: - # See https://github.com/BlueBrain/CoreNeuron/issues/518 re: build_type - # Sympy + OpenMP target offload does not currently work with NVHPC - SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu~shared+tests~legacy-unit+sympy build_type=RelWithDebInfo + # Sympy + OpenMP target offload does not currently work with NVHPC (?) + SPACK_PACKAGE_SPEC: +caliper+nmodl+openmp+gpu~shared+tests~legacy-unit~sympy build_type=RelWithDebInfo needs: ["build:nmodl"] build:coreneuron:mod2c:intel: @@ -146,10 +151,18 @@ build:neuron:mod2c:nvhpc:acc: extends: [.build_neuron, .spack_nvhpc] needs: ["build:coreneuron:mod2c:nvhpc:acc"] +build:neuron:mod2c:nvhpc:acc:debug: + extends: [.build_neuron, .spack_nvhpc] + needs: ["build:coreneuron:mod2c:nvhpc:acc:debug"] + build:neuron:mod2c:nvhpc:acc:shared: extends: [.build_neuron, .spack_nvhpc] needs: ["build:coreneuron:mod2c:nvhpc:acc:shared"] +build:neuron:mod2c:nvhpc:acc:shared:debug: + extends: [.build_neuron, .spack_nvhpc] + needs: ["build:coreneuron:mod2c:nvhpc:acc:shared:debug"] + build:neuron:nmodl:nvhpc:omp: extends: [.build_neuron, .spack_nvhpc] needs: ["build:coreneuron:nmodl:nvhpc:omp"] @@ -171,10 +184,18 @@ test:coreneuron:mod2c:nvhpc:acc: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:mod2c:nvhpc:acc"] +test:coreneuron:mod2c:nvhpc:acc:debug: + extends: [.ctest, .gpu_node] + needs: ["build:coreneuron:mod2c:nvhpc:acc:debug"] + test:coreneuron:mod2c:nvhpc:acc:shared: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:mod2c:nvhpc:acc:shared"] +test:coreneuron:mod2c:nvhpc:acc:shared: + extends: [.ctest, .gpu_node] + needs: ["build:coreneuron:mod2c:nvhpc:acc:shared:debug"] + test:coreneuron:mod2c:nvhpc:acc:unified: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:mod2c:nvhpc:acc:unified"] From 9ce83c2fcff002473ed7a9ac5beb98ace9d526fe Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 28 Jul 2022 15:13:46 +0200 Subject: [PATCH 060/128] avoid if target (nv::target::is_device) --- coreneuron/sim/scopmath/errcodes.h | 39 ++++++++++++++++------- coreneuron/sim/scopmath/sparse_thread.hpp | 35 ++++++++------------ external/mod2c | 2 +- 3 files changed, 43 insertions(+), 33 deletions(-) diff --git a/coreneuron/sim/scopmath/errcodes.h b/coreneuron/sim/scopmath/errcodes.h index 5f32c5785..94d08f73c 100644 --- a/coreneuron/sim/scopmath/errcodes.h +++ b/coreneuron/sim/scopmath/errcodes.h @@ -1,16 +1,33 @@ -/****************************************************************************** - * - * File: errcodes.h - * - * Copyright (c) 1984, 1985, 1986, 1987, 1988, 1989, 1990 - * Duke University - * - * errcodes.h,v 1.1.1.1 1994/10/12 17:22:18 hines Exp - * - ******************************************************************************/ +/* +# ============================================================================= +# Originally errcodes.h from SCoP library, Copyright (c) 1984-90 Duke University +# ============================================================================= +# Subsequent extensive prototype and memory layout changes for CoreNEURON +# +# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL +# +# See top-level LICENSE file for details. +# =============================================================================. +*/ +#pragma once namespace coreneuron { extern int abort_run(int); -} +namespace scopmath { +/** @brief Flag to disable some code sections at compile time. + * + * Some methods, such as coreneuron::scopmath::sparse::getelm(...), decide at + * runtime whether they are simply accessors, or if they dynamically modify the + * matrix in question, possibly allocating new memory. Typically the second + * mode will be used during model initialisation, while the first will be used + * during computation/simulation. Compiling the more complicated code for the + * second mode can be problematic for targets such as GPU, where dynamic + * allocation and global state are complex. This enum is intended to be used as + * a template parameter to flag (at compile time) when this code can be + * omitted. + */ +enum struct enabled_code { all, compute_only }; +} // namespace scopmath +} // namespace coreneuron #define ROUNDOFF 1.e-20 #define ZERO 1.e-8 #define STEP 1.e-6 diff --git a/coreneuron/sim/scopmath/sparse_thread.hpp b/coreneuron/sim/scopmath/sparse_thread.hpp index 85580011e..8d84cbb0e 100644 --- a/coreneuron/sim/scopmath/sparse_thread.hpp +++ b/coreneuron/sim/scopmath/sparse_thread.hpp @@ -13,10 +13,6 @@ #include "coreneuron/mechanism/mech/mod2c_core_thread.hpp" #include "coreneuron/sim/scopmath/errcodes.h" -#ifdef __CUDACC__ -#include -#endif - namespace coreneuron { namespace scopmath { namespace sparse { @@ -71,7 +67,8 @@ inline void increase_order(SparseObj* so, unsigned row) { * biggest difference is that elements are no longer removed and this saves much * time allocating and freeing during the solve phase. */ -inline Elm* getelm(SparseObj* so, unsigned row, unsigned col, Elm* new_elem) { +template +Elm* getelm(SparseObj* so, unsigned row, unsigned col, Elm* new_elem) { Elm *el, *elnext; unsigned vrow = so->varord[row]; @@ -94,12 +91,10 @@ inline Elm* getelm(SparseObj* so, unsigned row, unsigned col, Elm* new_elem) { } /* insert below el */ if (!new_elem) { -#ifdef __CUDACC__ - if target (nv::target::is_device) { + if constexpr (code_to_enable == enabled_code::compute_only) { + // Dynamic allocation should not happen during the compute phase. assert(false); - } else -#endif - { + } else { new_elem = new Elm{}; new_elem->value = new double[so->_cntml_padded]; increase_order(so, row); @@ -143,12 +138,9 @@ inline Elm* getelm(SparseObj* so, unsigned row, unsigned col, Elm* new_elem) { } /* insert above el */ if (!new_elem) { -#ifdef __CUDACC__ - if target (nv::target::is_device) { + if constexpr (code_to_enable == enabled_code::compute_only) { assert(false); - } else -#endif - { + } else { new_elem = new Elm{}; new_elem->value = new double[so->_cntml_padded]; increase_order(so, row); @@ -508,16 +500,13 @@ void create_coef_list(SparseObj* so, int n, SPFUN fun, _threadargsproto_) { fun(so, so->rhs, _threadargs_); // std::invoke in C++17 so->phase = 0; } -} // namespace sparse -} // namespace scopmath -// Methods that may be called from translated MOD files are kept outside the -// scopmath::sparse namespace. -inline double* _nrn_thread_getelm(SparseObj* so, int row, int col, int _iml) { +template +double* thread_getelm(SparseObj* so, int row, int col, int _iml) { if (!so->phase) { return so->coef_list[so->ngetcall[_iml]++]; } - Elm* el = scopmath::sparse::getelm(so, (unsigned) row, (unsigned) col, nullptr); + Elm* el = scopmath::sparse::getelm(so, (unsigned) row, (unsigned) col, nullptr); if (so->phase == 1) { so->ngetcall[_iml]++; } else { @@ -525,7 +514,11 @@ inline double* _nrn_thread_getelm(SparseObj* so, int row, int col, int _iml) { } return el->value; } +} // namespace sparse +} // namespace scopmath +// Methods that may be called from translated MOD files are kept outside the +// scopmath::sparse namespace. #define scopmath_sparse_s(arg) _p[scopmath_sparse_ix(s[arg])] #define scopmath_sparse_d(arg) _p[scopmath_sparse_ix(d[arg])] diff --git a/external/mod2c b/external/mod2c index d8507dec1..7d1557eec 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit d8507dec1671b43b012e4e1ab36160e6da21aabb +Subproject commit 7d1557eecc8800c09ae1368a8a6401957b8bcb57 From 202a5f51f531db996765609c48028e842c5189e0 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 28 Jul 2022 15:35:58 +0200 Subject: [PATCH 061/128] submodule --- external/mod2c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/mod2c b/external/mod2c index 7d1557eec..c37aff7d0 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit 7d1557eecc8800c09ae1368a8a6401957b8bcb57 +Subproject commit c37aff7d0bdefbee83ffcb4f2e0d58e9ad806864 From c77d7c286f1c524b5bb887f3343c066b5be95624 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 28 Jul 2022 17:44:57 +0200 Subject: [PATCH 062/128] tweaks --- .gitlab-ci.yml | 10 +++++++++- coreneuron/gpu/nrn_acc_manager.cpp | 29 +++++++++++++++-------------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index eb68083a1..e867b7eb1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -192,7 +192,7 @@ test:coreneuron:mod2c:nvhpc:acc:shared: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:mod2c:nvhpc:acc:shared"] -test:coreneuron:mod2c:nvhpc:acc:shared: +test:coreneuron:mod2c:nvhpc:acc:shared:debug: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:mod2c:nvhpc:acc:shared:debug"] @@ -221,10 +221,18 @@ test:neuron:mod2c:nvhpc:acc: extends: [.test_neuron, .gpu_node] needs: ["build:neuron:mod2c:nvhpc:acc"] +test:neuron:mod2c:nvhpc:acc:debug: + extends: [.test_neuron, .gpu_node] + needs: ["build:neuron:mod2c:nvhpc:acc:debug"] + test:neuron:mod2c:nvhpc:acc:shared: extends: [.test_neuron, .gpu_node] needs: ["build:neuron:mod2c:nvhpc:acc:shared"] +test:neuron:mod2c:nvhpc:acc:shared:debug: + extends: [.test_neuron, .gpu_node] + needs: ["build:neuron:mod2c:nvhpc:acc:shared:debug"] + test:neuron:nmodl:nvhpc:omp: extends: [.test_neuron, .gpu_node] needs: ["build:neuron:nmodl:nvhpc:omp"] diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index d0862b31e..82ef53e0f 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -34,13 +34,13 @@ #ifdef CORENEURON_ENABLE_PRESENT_TABLE #include +#include #include #include -#include +#include namespace { -enum class byte : unsigned char {}; // std::byte in C++17 -std::map> present_table; -std::mutex present_table_mutex; +std::map> present_table; +std::shared_mutex present_table_mutex; } // namespace #endif @@ -56,8 +56,9 @@ void* cnrn_target_deviceptr_impl(void const* h_ptr) { if (!h_ptr) { return nullptr; } - // note no locking, undefined behaviour if you call this concurrently with - // the copyin/delete methods (which do lock) + // Concurrent calls to this method are safe, but they must be serialised + // w.r.t. calls to the cnrn_target_*_update_present_table methods. + std::shared_lock _{present_table_mutex}; assert(!present_table.empty()); // prev(first iterator greater than h_ptr or last if not found) gives the first iterator less // than or equal to h_ptr @@ -66,10 +67,10 @@ void* cnrn_target_deviceptr_impl(void const* h_ptr) { return hp < entry.first; })); assert(iter != present_table.end()); - byte const* const h_byte_ptr{static_cast(h_ptr)}; - byte const* const h_start_of_block{iter->first}; + std::byte const* const h_byte_ptr{static_cast(h_ptr)}; + std::byte const* const h_start_of_block{iter->first}; std::size_t const block_size{iter->second.first}; - byte* const d_start_of_block{iter->second.second}; + std::byte* const d_start_of_block{iter->second.second}; assert(h_byte_ptr < h_start_of_block + block_size); return d_start_of_block + (h_byte_ptr - h_start_of_block); } @@ -78,16 +79,16 @@ void cnrn_target_copyin_update_present_table(void const* h_ptr, void* d_ptr, std assert(!d_ptr); return; } - std::lock_guard _{present_table_mutex}; - auto const result = present_table.emplace(static_cast(h_ptr), - std::make_pair(len, static_cast(d_ptr))); + std::lock_guard _{present_table_mutex}; + auto const result = present_table.emplace(static_cast(h_ptr), + std::make_pair(len, static_cast(d_ptr))); } void cnrn_target_delete_update_present_table(void const* h_ptr, std::size_t len) { if (!h_ptr) { return; } - std::lock_guard _{present_table_mutex}; - auto const iter = present_table.find(static_cast(h_ptr)); + std::lock_guard _{present_table_mutex}; + auto const iter = present_table.find(static_cast(h_ptr)); assert(iter != present_table.end()); assert(iter->second.first == len); present_table.erase(iter); From d6d9c0c7e8949ea1be1ec9e9820cf07dac02e637 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 28 Jul 2022 17:46:01 +0200 Subject: [PATCH 063/128] submodule --- external/mod2c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/mod2c b/external/mod2c index c37aff7d0..4f8df8877 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit c37aff7d0bdefbee83ffcb4f2e0d58e9ad806864 +Subproject commit 4f8df887736f24c4d59262984f62312bb7851363 From 1eabb5610b43cb42ccf2ade0135c79de0807fd05 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 2 Aug 2022 15:51:47 +0200 Subject: [PATCH 064/128] fix non-dynamic MPI + caliper --- coreneuron/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 168fdf708..319d6b13b 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -136,6 +136,7 @@ if(CORENRN_ENABLE_MPI AND NOT CORENRN_ENABLE_MPI_DYNAMIC) target_include_directories( ${CORENRN_MPI_LIB_NAME} PRIVATE ${MPI_INCLUDE_PATH} ${CORENEURON_PROJECT_SOURCE_DIR} ${CORENEURON_PROJECT_BINARY_DIR}/generated) + target_link_libraries(${CORENRN_MPI_LIB_NAME} ${CORENRN_CALIPER_LIB}) set_property(TARGET ${CORENRN_MPI_LIB_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON) set(CORENRN_MPI_OBJ $) endif() From fb36084fcce9cc1439533961c118f42e684bf868 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 2 Aug 2022 16:30:57 +0200 Subject: [PATCH 065/128] clang-format --- coreneuron/mechanism/membfunc.hpp | 3 ++- coreneuron/mechanism/register_mech.cpp | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/coreneuron/mechanism/membfunc.hpp b/coreneuron/mechanism/membfunc.hpp index 87927780c..2a7c8f54e 100644 --- a/coreneuron/mechanism/membfunc.hpp +++ b/coreneuron/mechanism/membfunc.hpp @@ -40,7 +40,8 @@ struct Memb_func { int thread_size_; /* how many Datum needed in Memb_list if vectorized */ void (*thread_mem_init_)(ThreadDatum*); /* after Memb_list._thread is allocated */ void (*thread_cleanup_)(ThreadDatum*); /* before Memb_list._thread is freed */ - void (*thread_table_check_)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, Memb_list*, int); + void ( + *thread_table_check_)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, Memb_list*, int); int is_point; void (*setdata_)(double*, Datum*); int* dparam_semantics; /* for nrncore writing. */ diff --git a/coreneuron/mechanism/register_mech.cpp b/coreneuron/mechanism/register_mech.cpp index 20fbcd424..4f545998a 100644 --- a/coreneuron/mechanism/register_mech.cpp +++ b/coreneuron/mechanism/register_mech.cpp @@ -417,8 +417,9 @@ void _nrn_thread_reg1(int i, void (*f)(ThreadDatum*)) { corenrn.get_memb_func(i).thread_mem_init_ = f; } -void _nrn_thread_table_reg(int i, - void (*f)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, Memb_list*, int)) { +void _nrn_thread_table_reg( + int i, + void (*f)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, Memb_list*, int)) { if (i == -1) return; From ca987194715ee6dd613c7accdf563c50206a921e Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 3 Aug 2022 09:48:54 +0200 Subject: [PATCH 066/128] libcoreneuron.so -> libcorenrnmech.so, try and fix static builds --- CMake/MakefileBuildOptions.cmake | 23 ++++++++++++++++++----- CMake/OpenAccHelper.cmake | 29 +++++++++++++++++++---------- CMake/coreneuron-config.cmake.in | 2 +- CMakeLists.txt | 2 +- coreneuron/CMakeLists.txt | 12 ++++++------ extra/nrnivmodl-core.in | 2 +- extra/nrnivmodl_core_makefile.in | 6 +++--- 7 files changed, 49 insertions(+), 27 deletions(-) diff --git a/CMake/MakefileBuildOptions.cmake b/CMake/MakefileBuildOptions.cmake index 7fec40860..51785c0f9 100644 --- a/CMake/MakefileBuildOptions.cmake +++ b/CMake/MakefileBuildOptions.cmake @@ -29,10 +29,17 @@ set(NMODL_ISPC_BACKEND_ARGS "host --ispc") set(NMODL_ACC_BACKEND_ARGS "host --c acc --oacc") # ============================================================================= -# Construct the linker arguments that are used inside nrnivmodl-core (to build libcoreneuron from +# Construct the linker arguments that are used inside nrnivmodl-core (to build libcorenrnmech from # libcoreneuron-core, libcoreneuron-cuda and mechanism object files) and inside nrnivmodl (to link -# NEURON's special against CoreNEURON's libcoreneuron). +# NEURON's special against CoreNEURON's libcorenrnmech). # ============================================================================= +if(NOT CORENRN_ENABLE_SHARED) + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -Wl,--whole-archive") +endif() +set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -lcorenrnmech") +if(NOT CORENRN_ENABLE_SHARED) + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -Wl,--no-whole-archive") +endif() # Essentially we "just" want to unpack the CMake dependencies of the `coreneuron-core` target into a # plain string that we can bake into the Makefiles in both NEURON and CoreNEURON. function(coreneuron_process_library_path library) @@ -86,6 +93,11 @@ function(coreneuron_process_target target) coreneuron_process_library_path("${target}") endfunction() coreneuron_process_target(coreneuron-core) +# In static builds then NEURON uses dlopen(nullptr, ...) to look for the corenrn_embedded_run +# symbol, which comes from libcoreneuron-core.a and gets included in libcorenrnmech. +if(NOT CORENRN_ENABLE_SHARED) + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -rdynamic") +endif() get_property(CORENRN_EXTRA_COMPILE_FLAGS GLOBAL PROPERTY CORENRN_EXTRA_COMPILE_FLAGS) get_property(CORENRN_LIB_LINK_FLAGS GLOBAL PROPERTY CORENRN_LIB_LINK_FLAGS) @@ -98,8 +110,9 @@ if(CORENRN_CXX_LINKER_SUPPORTS_START_GROUP) set(CORENEURON_LINKER_START_GROUP -Wl,--start-group) set(CORENEURON_LINKER_END_GROUP -Wl,--end-group) endif() -# Things that used to be in CORENRN_LIB_LINK_FLAGS: -rdynamic -lrt -Wl,--whole-archive -# -L${CMAKE_HOST_SYSTEM_PROCESSOR} -Wl,--no-whole-archive -L${caliper_LIB_DIR} -l${CALIPER_LIB} + +# Things that used to be in CORENRN_LIB_LINK_FLAGS: -lrt -L${CMAKE_HOST_SYSTEM_PROCESSOR} +# -L${caliper_LIB_DIR} -l${CALIPER_LIB} # ============================================================================= # Turn CORENRN_COMPILE_DEFS into a list of -DFOO[=BAR] options. @@ -107,7 +120,7 @@ endif() list(TRANSFORM CORENRN_COMPILE_DEFS PREPEND -D OUTPUT_VARIABLE CORENRN_COMPILE_DEF_FLAGS) # ============================================================================= -# Extra link flags that we need to include when linking libcoreneuron.{a,so} in CoreNEURON but that +# Extra link flags that we need to include when linking libcorenrnmech.{a,so} in CoreNEURON but that # do not need to be passed to NEURON to use when linking nrniv/special (why?) # ============================================================================= string(JOIN " " CORENRN_COMMON_LDFLAGS ${CORENRN_LIB_LINK_FLAGS} ${CORENRN_EXTRA_LINK_FLAGS}) diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index f50f1436a..f232d2bab 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -67,7 +67,14 @@ if(CORENRN_ENABLE_GPU) # and offloaded OpenACC/OpenMP code. Using -cuda when compiling seems to improve error messages in # some cases, and to be recommended by NVIDIA. We pass -gpu=cudaX.Y to ensure that OpenACC/OpenMP # code is compiled with the same CUDA version as the explicit CUDA code. - set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo") + set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT}") + # Combining -gpu=lineinfo with -O0 -g gives a warning: Conflicting options --device-debug and + # --generate-line-info specified, ignoring --generate-line-info option + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + string(APPEND NVHPC_ACC_COMP_FLAGS ",debug") + else() + string(APPEND NVHPC_ACC_COMP_FLAGS ",lineinfo") + endif() # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the # same default compute capabilities as each other, particularly on GPU-less build machines. @@ -96,17 +103,19 @@ endif() # ============================================================================= # Initialise global property that will be used by NEURON to link with CoreNEURON # ============================================================================= -if(CORENRN_ENABLE_GPU AND CORENRN_ENABLE_SHARED) - # Because of - # https://forums.developer.nvidia.com/t/dynamically-loading-an-openacc-enabled-shared-library-from-an-executable-compiled-with-nvc-does-not-work/210968 - # we have to tell NEURON to pass OpenACC flags when linking special, otherwise we end up with an - # `nrniv` binary that cannot dynamically load CoreNEURON in shared-library builds. - # CORENRN_LIB_LINK_FLAGS is the full set of flags needed to link against libcoreneuron.so: - # something like `-acc -lcoreneuron ...`. CORENRN_NEURON_LINK_FLAGS only contains flags that need +if(CORENRN_ENABLE_GPU) + # CORENRN_LIB_LINK_FLAGS is the full set of flags needed to link against libcorenrnmech.so: + # something like `-acc -lcorenrnmech ...`. CORENRN_NEURON_LINK_FLAGS only contains flags that need # to be used when linking the NEURON Python module to make sure it is able to dynamically load - # libcoreneuron.so. + # libcorenrnmech.so. set_property(GLOBAL PROPERTY CORENRN_LIB_LINK_FLAGS "${NVHPC_ACC_COMP_FLAGS}") - set_property(GLOBAL PROPERTY CORENRN_NEURON_LINK_FLAGS "${NVHPC_ACC_COMP_FLAGS}") + # Because of + if(CORENRN_ENABLE_SHARED) + # https://forums.developer.nvidia.com/t/dynamically-loading-an-openacc-enabled-shared-library-from-an-executable-compiled-with-nvc-does-not-work/210968 + # we have to tell NEURON to pass OpenACC flags when linking special, otherwise we end up with an + # `nrniv` binary that cannot dynamically load CoreNEURON in shared-library builds. + set_property(GLOBAL PROPERTY CORENRN_NEURON_LINK_FLAGS "${NVHPC_ACC_COMP_FLAGS}") + endif() endif() if(CORENRN_HAVE_NVHPC_COMPILER) diff --git a/CMake/coreneuron-config.cmake.in b/CMake/coreneuron-config.cmake.in index 4fe3988c3..9f7ac4997 100644 --- a/CMake/coreneuron-config.cmake.in +++ b/CMake/coreneuron-config.cmake.in @@ -21,7 +21,7 @@ set(CORENRN_NEURON_LINK_FLAGS "@CORENRN_NEURON_LINK_FLAGS@") find_path(CORENEURON_INCLUDE_DIR "coreneuron/coreneuron.h" HINTS "${CONFIG_PATH}/../../include") find_path( CORENEURON_LIB_DIR - NAMES libcoreneuron.a libcoreneuron.so libcoreneuron.dylib + NAMES libcorenrnmech.a libcorenrnmech.so libcorenrnmech.dylib HINTS "${CONFIG_PATH}/../../lib") include(${CONFIG_PATH}/coreneuron.cmake) diff --git a/CMakeLists.txt b/CMakeLists.txt index f3b1e7ee9..ab3de7345 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -479,7 +479,7 @@ add_subdirectory(coreneuron) # Extract the various compiler option strings to use inside nrnivmodl-core. Sets the global property # CORENRN_LIB_LINK_FLAGS, which contains the arguments that must be added to the link line for -# `special` to link against `libcoreneuron.{a,so}` +# `special` to link against `libcorenrnmech.{a,so}` include(MakefileBuildOptions) # Generate the nrnivmodl-core script and makefile using the options from MakefileBuildOptions diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 319d6b13b..0bc986ae6 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -145,17 +145,17 @@ endif() # installed as a static library, and then the nrnivmodl-core workflow extracts the object files from # it and does one of the following: # -# * shared build: creates libcoreneuron.so from these objects plus those from the translated MOD +# * shared build: creates libcorenrnmech.so from these objects plus those from the translated MOD # files -# * static build: creates a (temporary, does not get installed) libcoreneuron.a from these objects +# * static build: creates a (temporary, does not get installed) libcorenrnmech.a from these objects # plus those from the translated MOD files, then statically links that into special-core # (nrniv-core) # # This scheme means that both core and mechanism .o files are linked in a single step, which is # important for GPU linking. It does, however, mean that the core code is installed twice, once in -# libcoreneuron-core.a and once in libcoreneuron.so (shared) or nrniv-core (static). In a GPU build, -# libcoreneuron-cuda.{a,so} is also linked to provide the CUDA implementation of the Hines solver. -# This cannot be included in coreneuron-core because of this issue: +# libcoreneuron-core.a and once in libcorenrnmech.so (shared) or nrniv-core (static). In a GPU +# build, libcoreneuron-cuda.{a,so} is also linked to provide the CUDA implementation of the Hines +# solver. This cannot be included in coreneuron-core because of this issue: # https://forums.developer.nvidia.com/t/cannot-dynamically-load-a-shared-library-containing-both-openacc-and-cuda-code/210972 add_library(coreneuron-core STATIC ${CORENEURON_CODE_FILES} ${CORENRN_MPI_OBJ}) if(CORENRN_ENABLE_GPU) @@ -307,7 +307,7 @@ if(CORENRN_ENABLE_GPU) endif() # Create an extra target for internal use that unit tests and so on can depend on. -# ${corenrn_mech_library} is libcoreneuron.{a,so}, which contains both the compiled default +# ${corenrn_mech_library} is libcorenrnmech.{a,so}, which contains both the compiled default # mechanisms and the content of libcoreneuron-core.a. add_library(coreneuron-all INTERFACE) target_link_libraries(coreneuron-all INTERFACE "${corenrn_mech_library}") diff --git a/extra/nrnivmodl-core.in b/extra/nrnivmodl-core.in index 08804b159..742409d88 100755 --- a/extra/nrnivmodl-core.in +++ b/extra/nrnivmodl-core.in @@ -83,7 +83,7 @@ while getopts "n:m:a:d:i:l:Vp:r:b:h" OPT; do echo " -r <0|1> Enable NRN_PRCELLSTATE mechanism. Default: @CORENRN_NRN_PRCELLSTATE@." echo " -V Verbose: show commands executed by make" echo " -p Number of parallel builds (Default: $PARALLEL_BUILDS)" - echo " -b libcoreneuron library type" + echo " -b libcorenrnmech library type" exit 0;; ?) exit 1;; diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index 2c732a288..2804a297f 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -99,7 +99,7 @@ ifeq (@CORENRN_ENABLE_NMODL@, ON) endif # name of the mechanism library with suffix if provided -COREMECH_LIB_NAME = coreneuron$(if $(MECHLIB_SUFFIX),_$(MECHLIB_SUFFIX),) +COREMECH_LIB_NAME = corenrnmech$(if $(MECHLIB_SUFFIX),_$(MECHLIB_SUFFIX),) COREMECH_LIB_PATH = $(OUTPUT_DIR)/lib$(COREMECH_LIB_NAME)$(LIB_SUFFIX) # Various header and C++/Object file @@ -206,7 +206,7 @@ $(SPECIAL_EXE): $(corenrnmech_lib_target) @printf " => $(C_GREEN)Binary$(C_RESET) creating $(SPECIAL_EXE)\n" $(CXX_LINK_EXE_CMD) -o $(SPECIAL_EXE) $(CORENRN_SHARE_CORENRN_DIR)/coreneuron.cpp \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ - -L$(OUTPUT_DIR) -l$(COREMECH_LIB_NAME) $(LDFLAGS) \ + -L$(OUTPUT_DIR) $(LDFLAGS) \ -L$(CORENRN_LIB_DIR) \ -Wl,-rpath,'$(LIB_RPATH)' -Wl,-rpath,$(CORENRN_LIB_DIR) -Wl,-rpath,'$(INSTALL_LIB_RPATH)' @@ -230,7 +230,7 @@ coremech_lib_shared: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always # build static library of mechanisms coremech_lib_static: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always - # make a libcoreneuron.a by copying libcoreneuron-core.a and then appending + # make a libcorenrnmech.a by copying libcoreneuron-core.a and then appending # the newly compiled objects cp $(CORENRN_LIB_DIR)/libcoreneuron-core.a ${COREMECH_LIB_PATH} ar r ${COREMECH_LIB_PATH} $(ENGINEMECH_OBJ) $(ALL_OBJS) From f33d56ef3d4d14dea588919f6ab6fe74515a9329 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 3 Aug 2022 10:37:23 +0200 Subject: [PATCH 067/128] fixups --- CMake/MakefileBuildOptions.cmake | 17 +++++++++++------ coreneuron/CMakeLists.txt | 2 +- extra/nrnivmodl_core_makefile.in | 2 +- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/CMake/MakefileBuildOptions.cmake b/CMake/MakefileBuildOptions.cmake index 51785c0f9..77bd4c9a3 100644 --- a/CMake/MakefileBuildOptions.cmake +++ b/CMake/MakefileBuildOptions.cmake @@ -31,7 +31,10 @@ set(NMODL_ACC_BACKEND_ARGS "host --c acc --oacc") # ============================================================================= # Construct the linker arguments that are used inside nrnivmodl-core (to build libcorenrnmech from # libcoreneuron-core, libcoreneuron-cuda and mechanism object files) and inside nrnivmodl (to link -# NEURON's special against CoreNEURON's libcorenrnmech). +# NEURON's special against CoreNEURON's libcorenrnmech). These are stored in two global properties: +# CORENRN_LIB_LINK_FLAGS (used by NEURON/nrnivmodl to link special against CoreNEURON) and +# CORENRN_LIB_LINK_DEP_FLAGS (used by CoreNEURON/nrnivmodl-core to link libcorenrnmech.so). +# Conceptually: CORENRN_LIB_LINK_FLAGS = -lcorenrnmech $CORENRN_LIB_LINK_DEP_FLAGS # ============================================================================= if(NOT CORENRN_ENABLE_SHARED) set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -Wl,--whole-archive") @@ -46,15 +49,15 @@ function(coreneuron_process_library_path library) get_filename_component(library_dir "${library}" DIRECTORY) if(NOT library_dir) # In case target is not a target but is just the name of a library, e.g. "dl" - set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -l${library}") + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_DEP_FLAGS " -l${library}") elseif("${library_dir}" MATCHES "^(/lib|/lib64|/usr/lib|/usr/lib64)$") # e.g. /usr/lib64/libpthread.so -> -lpthread get_filename_component(libname ${library} NAME_WE) string(REGEX REPLACE "^lib" "" libname ${libname}) - set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -l${libname}") + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_DEP_FLAGS " -l${libname}") else() # It's a full path, include that on the line - set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " ${library}") + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_DEP_FLAGS " ${library}") endif() endfunction() function(coreneuron_process_target target) @@ -79,7 +82,7 @@ function(coreneuron_process_target target) else() # This is probably another of our libraries, like -lcoreneuron-cuda. We might need to add -L # and an RPATH later. - set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -l${target}") + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_DEP_FLAGS " -l${target}") endif() endif() get_target_property(target_libraries ${target} LINK_LIBRARIES) @@ -93,6 +96,8 @@ function(coreneuron_process_target target) coreneuron_process_library_path("${target}") endfunction() coreneuron_process_target(coreneuron-core) +get_property(CORENRN_LIB_LINK_DEP_FLAGS GLOBAL PROPERTY CORENRN_LIB_LINK_DEP_FLAGS) +set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " ${CORENRN_LIB_LINK_DEP_FLAGS}") # In static builds then NEURON uses dlopen(nullptr, ...) to look for the corenrn_embedded_run # symbol, which comes from libcoreneuron-core.a and gets included in libcorenrnmech. if(NOT CORENRN_ENABLE_SHARED) @@ -123,7 +128,7 @@ list(TRANSFORM CORENRN_COMPILE_DEFS PREPEND -D OUTPUT_VARIABLE CORENRN_COMPILE_D # Extra link flags that we need to include when linking libcorenrnmech.{a,so} in CoreNEURON but that # do not need to be passed to NEURON to use when linking nrniv/special (why?) # ============================================================================= -string(JOIN " " CORENRN_COMMON_LDFLAGS ${CORENRN_LIB_LINK_FLAGS} ${CORENRN_EXTRA_LINK_FLAGS}) +string(JOIN " " CORENRN_COMMON_LDFLAGS ${CORENRN_LIB_LINK_DEP_FLAGS} ${CORENRN_EXTRA_LINK_FLAGS}) if(CORENRN_SANITIZER_LIBRARY_DIR) string(APPEND CORENRN_COMMON_LDFLAGS " -Wl,-rpath,${CORENRN_SANITIZER_LIBRARY_DIR}") endif() diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 0bc986ae6..57af85b6e 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -288,7 +288,7 @@ file(GLOB modfiles "${modfile_directory}/*.mod") # symbols in the translated versions of default .mod files set(nrniv_core_prefix "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}") set(corenrn_mech_library - "${nrniv_core_prefix}/${CMAKE_${COMPILE_LIBRARY_TYPE}_LIBRARY_PREFIX}coreneuron${CMAKE_${COMPILE_LIBRARY_TYPE}_LIBRARY_SUFFIX}" + "${nrniv_core_prefix}/${CMAKE_${COMPILE_LIBRARY_TYPE}_LIBRARY_PREFIX}corenrnmech${CMAKE_${COMPILE_LIBRARY_TYPE}_LIBRARY_SUFFIX}" ) set(output_binaries "${nrniv_core_prefix}/special-core" "${corenrn_mech_library}") diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index 2804a297f..4d7df0388 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -206,7 +206,7 @@ $(SPECIAL_EXE): $(corenrnmech_lib_target) @printf " => $(C_GREEN)Binary$(C_RESET) creating $(SPECIAL_EXE)\n" $(CXX_LINK_EXE_CMD) -o $(SPECIAL_EXE) $(CORENRN_SHARE_CORENRN_DIR)/coreneuron.cpp \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ - -L$(OUTPUT_DIR) $(LDFLAGS) \ + -L$(OUTPUT_DIR) -l$(COREMECH_LIB_NAME) $(LDFLAGS) \ -L$(CORENRN_LIB_DIR) \ -Wl,-rpath,'$(LIB_RPATH)' -Wl,-rpath,$(CORENRN_LIB_DIR) -Wl,-rpath,'$(INSTALL_LIB_RPATH)' From b2e7f30295958c973ad5676a0c52e634ac5800da Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 3 Aug 2022 15:49:21 +0200 Subject: [PATCH 068/128] typo --- coreneuron/utils/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coreneuron/utils/memory.h b/coreneuron/utils/memory.h index 286cfa5f2..9e612680c 100644 --- a/coreneuron/utils/memory.h +++ b/coreneuron/utils/memory.h @@ -26,7 +26,7 @@ namespace coreneuron { * @brief Check if GPU support is enabled. * * This returns true if GPU support was enabled at compile time and at runtime - * via coreneuron.gpu = True and/or --gpu, otherwise it returnss false. + * via coreneuron.gpu = True and/or --gpu, otherwise it returns false. */ bool gpu_enabled(); From d3545ff005d87c72d7569d467777e3dcb63ab1eb Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 3 Aug 2022 15:56:23 +0200 Subject: [PATCH 069/128] add fallback logic for cmake<3.18 --- CMake/MakefileBuildOptions.cmake | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/CMake/MakefileBuildOptions.cmake b/CMake/MakefileBuildOptions.cmake index 77bd4c9a3..b6f872115 100644 --- a/CMake/MakefileBuildOptions.cmake +++ b/CMake/MakefileBuildOptions.cmake @@ -109,8 +109,13 @@ get_property(CORENRN_LIB_LINK_FLAGS GLOBAL PROPERTY CORENRN_LIB_LINK_FLAGS) # Detect if --start-group and --end-group are valid linker arguments. These are typically needed # when linking mutually-dependent .o files (or where we don't know the correct order) on Linux, but # they are not needed *or* recognised by the macOS linker. -include(CheckLinkerFlag) # requires CMake 3.18 -check_linker_flag(CXX -Wl,--start-group CORENRN_CXX_LINKER_SUPPORTS_START_GROUP) +if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) + include(CheckLinkerFlag) + check_linker_flag(CXX -Wl,--start-group CORENRN_CXX_LINKER_SUPPORTS_START_GROUP) +elseif(CMAKE_SYSTEM_NAME MATCHES Linux) + # Assume that --start-group and --end-group are only supported on Linux + set(CORENRN_CXX_LINKER_SUPPORTS_START_GROUP ON) +endif() if(CORENRN_CXX_LINKER_SUPPORTS_START_GROUP) set(CORENEURON_LINKER_START_GROUP -Wl,--start-group) set(CORENEURON_LINKER_END_GROUP -Wl,--end-group) From 8db9162792c5269bfa60bd178518b11dece12683 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 4 Aug 2022 13:09:23 +0200 Subject: [PATCH 070/128] Do not enable OpenMP in shared/OpenACC builds. --- .gitlab-ci.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e867b7eb1..3ba880c40 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -106,15 +106,17 @@ build:coreneuron:mod2c:nvhpc:acc:debug: variables: SPACK_PACKAGE_SPEC: +caliper+gpu+openmp~shared+tests~legacy-unit build_type=Debug +# Shared + OpenACC + OpenMP host threading has problems build:coreneuron:mod2c:nvhpc:acc:shared: extends: [.build_coreneuron, .spack_nvhpc] variables: - SPACK_PACKAGE_SPEC: +caliper+gpu+openmp+shared+tests~legacy-unit build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +caliper+gpu~openmp+shared+tests~legacy-unit build_type=RelWithDebInfo +# Shared + OpenACC + OpenMP host threading has problems build:coreneuron:mod2c:nvhpc:acc:shared:debug: extends: [.build_coreneuron, .spack_nvhpc] variables: - SPACK_PACKAGE_SPEC: +caliper+gpu+openmp+shared+tests~legacy-unit build_type=Debug + SPACK_PACKAGE_SPEC: +caliper+gpu~openmp+shared+tests~legacy-unit build_type=Debug # Build CoreNEURON with Unified Memory on GPU build:coreneuron:mod2c:nvhpc:acc:unified: From fc465940f2735c7a1db34ace159b16e9667fefef Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 4 Aug 2022 13:30:38 +0200 Subject: [PATCH 071/128] Add rpaths inside nrnivmodl-core. --- CMake/MakefileBuildOptions.cmake | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/CMake/MakefileBuildOptions.cmake b/CMake/MakefileBuildOptions.cmake index b6f872115..e4c658349 100644 --- a/CMake/MakefileBuildOptions.cmake +++ b/CMake/MakefileBuildOptions.cmake @@ -51,13 +51,16 @@ function(coreneuron_process_library_path library) # In case target is not a target but is just the name of a library, e.g. "dl" set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_DEP_FLAGS " -l${library}") elseif("${library_dir}" MATCHES "^(/lib|/lib64|/usr/lib|/usr/lib64)$") - # e.g. /usr/lib64/libpthread.so -> -lpthread + # e.g. /usr/lib64/libpthread.so -> -lpthread TODO: consider using + # https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_IMPLICIT_LINK_DIRECTORIES.html, or + # dropping this special case entirely get_filename_component(libname ${library} NAME_WE) string(REGEX REPLACE "^lib" "" libname ${libname}) set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_DEP_FLAGS " -l${libname}") else() # It's a full path, include that on the line - set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_DEP_FLAGS " ${library}") + set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_DEP_FLAGS + " -Wl,-rpath,${library_dir} ${library}") endif() endfunction() function(coreneuron_process_target target) From 9c96a36e4d68241c00908acbdc6f89c61aea00b4 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 4 Aug 2022 16:21:02 +0200 Subject: [PATCH 072/128] accept a private destructor function pointer from generated mechanisms --- coreneuron/gpu/nrn_acc_manager.cpp | 4 ---- coreneuron/io/nrn_setup.cpp | 9 ++++----- coreneuron/mechanism/capac.cpp | 11 ++++++----- coreneuron/mechanism/eion.cpp | 9 +++++---- coreneuron/mechanism/mechanism.hpp | 8 +++++++- coreneuron/mechanism/membfunc.hpp | 10 ++++++++-- coreneuron/mechanism/register_mech.cpp | 10 +++++++--- external/mod2c | 2 +- 8 files changed, 38 insertions(+), 25 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index 82ef53e0f..4c5f28b67 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -312,10 +312,6 @@ static void delete_ml_from_device(Memb_list* ml, int type) { int pcnt = nrn_soa_padded_size(n, SOA_LAYOUT) * szdp; cnrn_target_delete(ml->pdata, pcnt); } - if (ml->global_variables) { - // std::byte* in C++17 - cnrn_target_delete(static_cast(ml->global_variables), ml->global_variables_size); - } cnrn_target_delete(ml->nodeindices, n); cnrn_target_delete(ml); diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp index f34a489c1..a7ee2fdf8 100644 --- a/coreneuron/io/nrn_setup.cpp +++ b/coreneuron/io/nrn_setup.cpp @@ -761,11 +761,10 @@ void nrn_cleanup() { ml->instance = nullptr; } - if (ml->global_variables) { - std::cout << "Cannot generically free Memb_list::global_variables, leaking it" - << std::endl; - // free(ml->global_variables); - ml->global_variables = nullptr; + // Destroy the global variables struct allocated in nrn_init + if (auto* const priv_dtor = corenrn.get_memb_func(tml->index).private_destructor) { + (*priv_dtor)(nt, ml, tml->index); + assert(!ml->global_variables); } NetReceiveBuffer_t* nrb = ml->_net_receive_buffer; diff --git a/coreneuron/mechanism/capac.cpp b/coreneuron/mechanism/capac.cpp index 42c65cb18..5333767c5 100644 --- a/coreneuron/mechanism/capac.cpp +++ b/coreneuron/mechanism/capac.cpp @@ -32,12 +32,13 @@ void capacitance_reg(void) { /* all methods deal with capacitance in special ways */ register_mech(mechanism, nrn_alloc_capacitance, - (mod_f_t) 0, - (mod_f_t) 0, - (mod_f_t) 0, - (mod_f_t) nrn_init_capacitance, + nullptr, + nullptr, + nullptr, + nrn_init_capacitance, -1, - 1); + 1, + nullptr); int mechtype = nrn_get_mechtype(mechanism[1]); _nrn_layout_reg(mechtype, SOA_LAYOUT); hoc_register_prop_size(mechtype, nparm, 0); diff --git a/coreneuron/mechanism/eion.cpp b/coreneuron/mechanism/eion.cpp index deab46627..350b4ff90 100644 --- a/coreneuron/mechanism/eion.cpp +++ b/coreneuron/mechanism/eion.cpp @@ -94,11 +94,12 @@ void ion_reg(const char* name, double valence) { register_mech((const char**) mechanism, nrn_alloc_ion, nrn_cur_ion, - (mod_f_t) 0, - (mod_f_t) 0, - (mod_f_t) nrn_init_ion, + nullptr, + nullptr, + nrn_init_ion, -1, - 1); + 1, + nullptr); mechtype = nrn_get_mechtype(mechanism[1]); _nrn_layout_reg(mechtype, SOA_LAYOUT); hoc_register_prop_size(mechtype, nparm, 1); diff --git a/coreneuron/mechanism/mechanism.hpp b/coreneuron/mechanism/mechanism.hpp index 1c177976c..caa895a0a 100644 --- a/coreneuron/mechanism/mechanism.hpp +++ b/coreneuron/mechanism/mechanism.hpp @@ -143,8 +143,14 @@ struct Memb_list { NetSendBuffer_t* _net_send_buffer = nullptr; int nodecount; /* actual node count */ int _nodecount_padded; + // Not obvious that these need to be distinct (i.e. we could just have + // `instance` and `instance_size`, and use them in mod2c for global + // variables while NMODL could use the existing instance struct for globals + // too). nrn_acc_manager.cpp could handle data movement to/from the + // accelerator if the "constructor" in the translated MOD file code was + // called before the main nrn_acc_manager methods that copy + // thread/mechanism data to the device. void* instance = nullptr; /* mechanism instance struct from NMODL */ void* global_variables = nullptr; /* global variables struct for each mechanism */ - int global_variables_size = 0; /* size of global variables struct in bytes */ }; } // namespace coreneuron diff --git a/coreneuron/mechanism/membfunc.hpp b/coreneuron/mechanism/membfunc.hpp index 2a7c8f54e..64b9443c5 100644 --- a/coreneuron/mechanism/membfunc.hpp +++ b/coreneuron/mechanism/membfunc.hpp @@ -35,6 +35,10 @@ struct Memb_func { mod_f_t initialize; mod_f_t constructor; mod_f_t destructor; /* only for point processes */ + // This is used for CoreNEURON-internal cleanup; it is kept separate from + // the DESTRUCTOR function just above (which apparently is only for point + // processes) for simplicity; + mod_f_t private_destructor; Symbol* sym; int vectorized; int thread_size_; /* how many Datum needed in Memb_list if vectorized */ @@ -91,7 +95,8 @@ extern int register_mech(const char** m, mod_f_t stat, mod_f_t initialize, int nrnpointerindex, - int vectorized); + int vectorized, + mod_f_t private_destructor); extern int point_register_mech(const char**, mod_alloc_t alloc, mod_f_t cur, @@ -101,7 +106,8 @@ extern int point_register_mech(const char**, int nrnpointerindex, mod_f_t constructor, mod_f_t destructor, - int vectorized); + int vectorized, + mod_f_t private_destructor); extern void register_constructor(mod_f_t constructor); using NetBufReceive_t = void (*)(NrnThread*); extern void hoc_register_net_receive_buffering(NetBufReceive_t, int); diff --git a/coreneuron/mechanism/register_mech.cpp b/coreneuron/mechanism/register_mech.cpp index 4f545998a..44d4b5f8f 100644 --- a/coreneuron/mechanism/register_mech.cpp +++ b/coreneuron/mechanism/register_mech.cpp @@ -118,7 +118,8 @@ int register_mech(const char** m, mod_f_t stat, mod_f_t initialize, int /* nrnpointerindex */, - int vectorized) { + int vectorized, + mod_f_t private_destructor) { auto& memb_func = corenrn.get_memb_funcs(); int type = nrn_get_mechtype(m[1]); @@ -144,6 +145,7 @@ int register_mech(const char** m, memb_func[type].initialize = initialize; memb_func[type].constructor = nullptr; memb_func[type].destructor = nullptr; + memb_func[type].private_destructor = private_destructor; #if VECTORIZE memb_func[type].vectorized = vectorized ? 1 : 0; memb_func[type].thread_size_ = vectorized ? (vectorized - 1) : 0; @@ -343,9 +345,11 @@ int point_register_mech(const char** m, int nrnpointerindex, mod_f_t constructor, mod_f_t destructor, - int vectorized) { + int vectorized, + mod_f_t private_destructor) { const Symbol* s = m[1]; - register_mech(m, alloc, cur, jacob, stat, initialize, nrnpointerindex, vectorized); + register_mech( + m, alloc, cur, jacob, stat, initialize, nrnpointerindex, vectorized, private_destructor); register_constructor(constructor); register_destructor(destructor); return point_reg_helper(s); diff --git a/external/mod2c b/external/mod2c index 4f8df8877..eb9a42f79 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit 4f8df887736f24c4d59262984f62312bb7851363 +Subproject commit eb9a42f79d4a9ad82b06f5a080e1927a3f7c5f9f From 37b9291dcc0702d7074d9288420def78547e5277 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 4 Aug 2022 18:05:52 +0200 Subject: [PATCH 073/128] Support private constructor. - Private in the sense that it is for CoreNEURON-internal usage (in collaboration with mod2c/nmodl) and not visible to MOD files. - Storage for global variables is now allocated there, so it is already visible when TABLE statements are initialised (before nrn_init is called for the first time). - This means that the global variable structure is copied to/from the device in nrn_acc_manager.cpp, just like the other members of Memb_list. --- coreneuron/gpu/nrn_acc_manager.cpp | 14 ++++++++++++++ coreneuron/io/phase2.cpp | 3 ++- coreneuron/mechanism/capac.cpp | 1 + coreneuron/mechanism/eion.cpp | 1 + coreneuron/mechanism/mechanism.hpp | 1 + coreneuron/mechanism/membfunc.hpp | 14 +++++++++----- coreneuron/mechanism/patternstim.cpp | 8 ++++++-- coreneuron/mechanism/register_mech.cpp | 19 ++++++++++++++----- coreneuron/sim/multicore.cpp | 7 ++++++- coreneuron/sim/multicore.hpp | 3 ++- external/mod2c | 2 +- 11 files changed, 57 insertions(+), 16 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index 4c5f28b67..9501a758d 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -141,6 +141,13 @@ static Memb_list* copy_ml_to_device(const Memb_list* ml, int type, double* dml_d auto d_ml = cnrn_target_copyin(ml); + if (ml->global_variables) { + assert(ml->global_variables_size); + void* d_glob_vars = cnrn_target_copyin(static_cast(ml->global_variables), + ml->global_variables_size); + cnrn_target_memcpy_to_device(&(d_ml->global_variables), &d_glob_vars); + } + int n = ml->nodecount; int szp = corenrn.get_prop_param_size()[type]; int szdp = corenrn.get_prop_dparam_size()[type]; @@ -314,6 +321,13 @@ static void delete_ml_from_device(Memb_list* ml, int type) { } cnrn_target_delete(ml->nodeindices, n); + + if (ml->global_variables) { + assert(ml->global_variables_size); + cnrn_target_delete(static_cast(ml->global_variables), + ml->global_variables_size); + } + cnrn_target_delete(ml); } diff --git a/coreneuron/io/phase2.cpp b/coreneuron/io/phase2.cpp index 0b96e1956..ad5748ad7 100644 --- a/coreneuron/io/phase2.cpp +++ b/coreneuron/io/phase2.cpp @@ -959,7 +959,8 @@ void Phase2::populate(NrnThread& nt, const UserParams& userParams) { NrnThreadMembList* tml_last = nullptr; for (int i = 0; i < n_mech; ++i) { - auto tml = create_tml(i, memb_func[mech_types[i]], shadow_rhs_cnt, mech_types, nodecounts); + auto tml = + create_tml(nt, i, memb_func[mech_types[i]], shadow_rhs_cnt, mech_types, nodecounts); nt._ml_list[tml->index] = tml->ml; diff --git a/coreneuron/mechanism/capac.cpp b/coreneuron/mechanism/capac.cpp index 5333767c5..2ffabb4e9 100644 --- a/coreneuron/mechanism/capac.cpp +++ b/coreneuron/mechanism/capac.cpp @@ -38,6 +38,7 @@ void capacitance_reg(void) { nrn_init_capacitance, -1, 1, + nullptr, nullptr); int mechtype = nrn_get_mechtype(mechanism[1]); _nrn_layout_reg(mechtype, SOA_LAYOUT); diff --git a/coreneuron/mechanism/eion.cpp b/coreneuron/mechanism/eion.cpp index 350b4ff90..9f442e12a 100644 --- a/coreneuron/mechanism/eion.cpp +++ b/coreneuron/mechanism/eion.cpp @@ -99,6 +99,7 @@ void ion_reg(const char* name, double valence) { nrn_init_ion, -1, 1, + nullptr, nullptr); mechtype = nrn_get_mechtype(mechanism[1]); _nrn_layout_reg(mechtype, SOA_LAYOUT); diff --git a/coreneuron/mechanism/mechanism.hpp b/coreneuron/mechanism/mechanism.hpp index caa895a0a..d82729a1d 100644 --- a/coreneuron/mechanism/mechanism.hpp +++ b/coreneuron/mechanism/mechanism.hpp @@ -152,5 +152,6 @@ struct Memb_list { // thread/mechanism data to the device. void* instance = nullptr; /* mechanism instance struct from NMODL */ void* global_variables = nullptr; /* global variables struct for each mechanism */ + std::size_t global_variables_size{}; }; } // namespace coreneuron diff --git a/coreneuron/mechanism/membfunc.hpp b/coreneuron/mechanism/membfunc.hpp index 64b9443c5..5380e16b6 100644 --- a/coreneuron/mechanism/membfunc.hpp +++ b/coreneuron/mechanism/membfunc.hpp @@ -23,6 +23,8 @@ struct NrnThread; using mod_alloc_t = void (*)(double*, Datum*, int); using mod_f_t = void (*)(NrnThread*, Memb_list*, int); using pnt_receive_t = void (*)(Point_process*, int, double); +using thread_table_check_t = + void (*)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, Memb_list*, int); /* * Memb_func structure contains all related informations of a mechanism @@ -35,17 +37,17 @@ struct Memb_func { mod_f_t initialize; mod_f_t constructor; mod_f_t destructor; /* only for point processes */ - // This is used for CoreNEURON-internal cleanup; it is kept separate from - // the DESTRUCTOR function just above (which apparently is only for point - // processes) for simplicity; + // These are used for CoreNEURON-internal allocation/cleanup; they are kept + // separate from the CONSTRUCTOR/DESTRUCTOR functions just above (one of + // which is apparently only for point processes) for simplicity. + mod_f_t private_constructor; mod_f_t private_destructor; Symbol* sym; int vectorized; int thread_size_; /* how many Datum needed in Memb_list if vectorized */ void (*thread_mem_init_)(ThreadDatum*); /* after Memb_list._thread is allocated */ void (*thread_cleanup_)(ThreadDatum*); /* before Memb_list._thread is freed */ - void ( - *thread_table_check_)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, Memb_list*, int); + thread_table_check_t thread_table_check_; int is_point; void (*setdata_)(double*, Datum*); int* dparam_semantics; /* for nrncore writing. */ @@ -96,6 +98,7 @@ extern int register_mech(const char** m, mod_f_t initialize, int nrnpointerindex, int vectorized, + mod_f_t private_constructor, mod_f_t private_destructor); extern int point_register_mech(const char**, mod_alloc_t alloc, @@ -107,6 +110,7 @@ extern int point_register_mech(const char**, mod_f_t constructor, mod_f_t destructor, int vectorized, + mod_f_t private_constructor, mod_f_t private_destructor); extern void register_constructor(mod_f_t constructor); using NetBufReceive_t = void (*)(NrnThread*); diff --git a/coreneuron/mechanism/patternstim.cpp b/coreneuron/mechanism/patternstim.cpp index 4f5e4e4e6..e680a6187 100644 --- a/coreneuron/mechanism/patternstim.cpp +++ b/coreneuron/mechanism/patternstim.cpp @@ -137,7 +137,7 @@ size_t read_raster_file(const char* fname, double** tvec, int** gidvec, double t } // see nrn_setup.cpp:read_phase2 for how it creates NrnThreadMembList instances. -static NrnThreadMembList* alloc_nrn_thread_memb(int type) { +static NrnThreadMembList* alloc_nrn_thread_memb(NrnThread* nt, int type) { NrnThreadMembList* tml = (NrnThreadMembList*) ecalloc(1, sizeof(NrnThreadMembList)); tml->dependencies = nullptr; tml->ndependencies = 0; @@ -161,6 +161,10 @@ static NrnThreadMembList* alloc_nrn_thread_memb(int type) { tml->ml->_net_send_buffer = nullptr; tml->ml->_permute = nullptr; + if (auto* const priv_ctor = corenrn.get_memb_func(tml->index).private_constructor) { + priv_ctor(nt, tml->ml, tml->index); + } + return tml; } @@ -178,7 +182,7 @@ Point_process* nrn_artcell_instantiate(const char* mechname) { // printf("nrn_artcell_instantiate %s type=%d\n", mechname, type); // create and append to nt.tml - auto tml = alloc_nrn_thread_memb(type); + auto tml = alloc_nrn_thread_memb(nt, type); assert(nt->_ml_list[type] == nullptr); // FIXME nt->_ml_list[type] = tml->ml; diff --git a/coreneuron/mechanism/register_mech.cpp b/coreneuron/mechanism/register_mech.cpp index 44d4b5f8f..41ed41a29 100644 --- a/coreneuron/mechanism/register_mech.cpp +++ b/coreneuron/mechanism/register_mech.cpp @@ -119,6 +119,7 @@ int register_mech(const char** m, mod_f_t initialize, int /* nrnpointerindex */, int vectorized, + mod_f_t private_constructor, mod_f_t private_destructor) { auto& memb_func = corenrn.get_memb_funcs(); @@ -145,6 +146,7 @@ int register_mech(const char** m, memb_func[type].initialize = initialize; memb_func[type].constructor = nullptr; memb_func[type].destructor = nullptr; + memb_func[type].private_constructor = private_constructor; memb_func[type].private_destructor = private_destructor; #if VECTORIZE memb_func[type].vectorized = vectorized ? 1 : 0; @@ -346,10 +348,19 @@ int point_register_mech(const char** m, mod_f_t constructor, mod_f_t destructor, int vectorized, + mod_f_t private_constructor, mod_f_t private_destructor) { const Symbol* s = m[1]; - register_mech( - m, alloc, cur, jacob, stat, initialize, nrnpointerindex, vectorized, private_destructor); + register_mech(m, + alloc, + cur, + jacob, + stat, + initialize, + nrnpointerindex, + vectorized, + private_constructor, + private_destructor); register_constructor(constructor); register_destructor(destructor); return point_reg_helper(s); @@ -421,9 +432,7 @@ void _nrn_thread_reg1(int i, void (*f)(ThreadDatum*)) { corenrn.get_memb_func(i).thread_mem_init_ = f; } -void _nrn_thread_table_reg( - int i, - void (*f)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, Memb_list*, int)) { +void _nrn_thread_table_reg(int i, thread_table_check_t f) { if (i == -1) return; diff --git a/coreneuron/sim/multicore.cpp b/coreneuron/sim/multicore.cpp index cf8daaac8..b8dd293d2 100644 --- a/coreneuron/sim/multicore.cpp +++ b/coreneuron/sim/multicore.cpp @@ -61,7 +61,8 @@ static int table_check_cnt_; static ThreadDatum* table_check_; -NrnThreadMembList* create_tml(int mech_id, +NrnThreadMembList* create_tml(NrnThread& nt, + int mech_id, Memb_func& memb_func, int& shadow_rhs_cnt, const std::vector& mech_types, @@ -91,6 +92,10 @@ NrnThreadMembList* create_tml(int mech_id, } } + if (auto* const priv_ctor = corenrn.get_memb_func(tml->index).private_constructor) { + priv_ctor(&nt, tml->ml, tml->index); + } + return tml; } diff --git a/coreneuron/sim/multicore.hpp b/coreneuron/sim/multicore.hpp index 391b5dcaa..3e06e3585 100644 --- a/coreneuron/sim/multicore.hpp +++ b/coreneuron/sim/multicore.hpp @@ -36,7 +36,8 @@ struct NrnThreadMembList { /* patterned after CvMembList in cvodeobj.h */ int* dependencies; /* list of mechanism types that this mechanism depends on*/ int ndependencies; /* for scheduling we need to know the dependency count */ }; -NrnThreadMembList* create_tml(int mech_id, +NrnThreadMembList* create_tml(NrnThread& nt, + int mech_id, Memb_func& memb_func, int& shadow_rhs_cnt, const std::vector& mech_types, diff --git a/external/mod2c b/external/mod2c index eb9a42f79..f4080b2ce 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit eb9a42f79d4a9ad82b06f5a080e1927a3f7c5f9f +Subproject commit f4080b2cefff571cc455c0e7f3efe34a034c0918 From a13f67bbcfdca4fc4ddab3a23b4402e128c583e5 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 4 Aug 2022 22:27:18 +0200 Subject: [PATCH 074/128] try and fix table statements --- external/mod2c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/mod2c b/external/mod2c index f4080b2ce..2603ada87 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit f4080b2cefff571cc455c0e7f3efe34a034c0918 +Subproject commit 2603ada879eaa9937b955807e65f3d02500c6a09 From 8124239373924db3ba423d30859a5e1dd3bcbff5 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 5 Aug 2022 08:06:10 +0200 Subject: [PATCH 075/128] submodule --- external/mod2c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/mod2c b/external/mod2c index 2603ada87..bc5f9d696 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit 2603ada879eaa9937b955807e65f3d02500c6a09 +Subproject commit bc5f9d6962ebcf2b40d64ae8d6b95a829f40f517 From cd499d73d1b98ce3a0d83d81f52cb8b492daf910 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 5 Aug 2022 09:52:38 +0200 Subject: [PATCH 076/128] reduce diff --- coreneuron/gpu/nrn_acc_manager.cpp | 18 +++++------------- coreneuron/mechanism/mech/enginemech.cpp | 2 +- coreneuron/permute/cellorder.cu | 2 +- coreneuron/utils/randoms/nrnran123.cpp | 4 ---- tests/unit/solver/CMakeLists.txt | 2 +- 5 files changed, 8 insertions(+), 20 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index 9501a758d..e8dcb6ae0 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -132,7 +132,7 @@ void cnrn_target_set_default_device(int device_num) { #ifdef CORENEURON_ENABLE_GPU #ifndef CORENEURON_UNIFIED_MEMORY -static Memb_list* copy_ml_to_device(const Memb_list* ml, int type, double* dml_data) { +static Memb_list* copy_ml_to_device(const Memb_list* ml, int type) { // As we never run code for artificial cell inside GPU we don't copy it. int is_art = corenrn.get_is_artificial()[type]; if (is_art) { @@ -152,10 +152,10 @@ static Memb_list* copy_ml_to_device(const Memb_list* ml, int type, double* dml_d int szp = corenrn.get_prop_param_size()[type]; int szdp = corenrn.get_prop_dparam_size()[type]; - double* dptr = dml_data; - + double* dptr = cnrn_target_deviceptr(ml->data); cnrn_target_memcpy_to_device(&(d_ml->data), &(dptr)); + int* d_nodeindices = cnrn_target_copyin(ml->nodeindices, n); cnrn_target_memcpy_to_device(&(d_ml->nodeindices), &d_nodeindices); @@ -319,7 +319,6 @@ static void delete_ml_from_device(Memb_list* ml, int type) { int pcnt = nrn_soa_padded_size(n, SOA_LAYOUT) * szdp; cnrn_target_delete(ml->pdata, pcnt); } - cnrn_target_delete(ml->nodeindices, n); if (ml->global_variables) { @@ -396,6 +395,7 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) { /*copy all double data for thread */ d__data = cnrn_target_copyin(nt->_data, nt->_ndata); + /* Here is the example of using OpenACC data enter/exit * Remember that we are not allowed to use nt->_data but we have to use: * double *dtmp = nt->_data; // now use dtmp! @@ -465,17 +465,9 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) { // book keeping for linked-list d_last_tml = d_tml; - // TODO: acc_deviceptr is returning host pointer when - // coreneuron is launched via python instead of special - // see: https://github.com/BlueBrain/CoreNeuron/issues/141#issuecomment-1086746848 - // As ml->data is always within nt->_data, temporarily calculate - // device pointer of ml->data on using offset. - double* dml_data = d__data + (tml->ml->data - nt->_data); - /* now for every tml, there is a ml. copy that and setup pointer */ - Memb_list* d_ml = copy_ml_to_device(tml->ml, tml->index, dml_data); + Memb_list* d_ml = copy_ml_to_device(tml->ml, tml->index); cnrn_target_memcpy_to_device(&(d_tml->ml), &d_ml); - /* setup nt._ml_list */ cnrn_target_memcpy_to_device(&(d_ml_list[tml->index]), &d_ml); } diff --git a/coreneuron/mechanism/mech/enginemech.cpp b/coreneuron/mechanism/mech/enginemech.cpp index ee9cc9e28..2c20d1293 100644 --- a/coreneuron/mechanism/mech/enginemech.cpp +++ b/coreneuron/mechanism/mech/enginemech.cpp @@ -1,6 +1,6 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= diff --git a/coreneuron/permute/cellorder.cu b/coreneuron/permute/cellorder.cu index ed8975148..1f1bdff94 100644 --- a/coreneuron/permute/cellorder.cu +++ b/coreneuron/permute/cellorder.cu @@ -1,6 +1,6 @@ /* # ============================================================================= -# Copyright (c) 2016 - 2022 Blue Brain Project/EPFL +# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp index f2bfed11a..af1378044 100644 --- a/coreneuron/utils/randoms/nrnran123.cpp +++ b/coreneuron/utils/randoms/nrnran123.cpp @@ -16,10 +16,6 @@ #include #endif -#ifdef __CUDACC__ -#include -#endif - #include #include #include diff --git a/tests/unit/solver/CMakeLists.txt b/tests/unit/solver/CMakeLists.txt index 01e058525..f8bc52287 100644 --- a/tests/unit/solver/CMakeLists.txt +++ b/tests/unit/solver/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022 Blue Brain Project +# Copyright (c) 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= From 341e89c3d9a8d737062ec97b675019ae96e780c3 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 5 Aug 2022 19:22:46 +0200 Subject: [PATCH 077/128] nmodl attempt --- coreneuron/gpu/nrn_acc_manager.cpp | 14 ++++++++++++++ coreneuron/mechanism/capac.cpp | 6 +++--- coreneuron/mechanism/eion.cpp | 6 +++--- coreneuron/mechanism/mechanism.hpp | 1 + coreneuron/mechanism/membfunc.hpp | 13 +++++++------ coreneuron/mechanism/register_mech.cpp | 18 +++++++++--------- external/mod2c | 2 +- external/nmodl | 2 +- 8 files changed, 39 insertions(+), 23 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index e8dcb6ae0..7df4155fb 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -141,6 +141,14 @@ static Memb_list* copy_ml_to_device(const Memb_list* ml, int type) { auto d_ml = cnrn_target_copyin(ml); + if (ml->instance) { + assert(ml->instance_size); + void* d_inst = cnrn_target_copyin(static_cast(ml->instance), + ml->instance_size); + cnrn_target_memcpy_to_device(&(d_ml->instance), &d_inst); + } + + if (ml->global_variables) { assert(ml->global_variables_size); void* d_glob_vars = cnrn_target_copyin(static_cast(ml->global_variables), @@ -327,6 +335,12 @@ static void delete_ml_from_device(Memb_list* ml, int type) { ml->global_variables_size); } + if (ml->instance) { + assert(ml->instance_size); + cnrn_target_delete(static_cast(ml->instance), + ml->instance_size); + } + cnrn_target_delete(ml); } diff --git a/coreneuron/mechanism/capac.cpp b/coreneuron/mechanism/capac.cpp index 2ffabb4e9..f47a4ebd7 100644 --- a/coreneuron/mechanism/capac.cpp +++ b/coreneuron/mechanism/capac.cpp @@ -36,10 +36,10 @@ void capacitance_reg(void) { nullptr, nullptr, nrn_init_capacitance, - -1, - 1, nullptr, - nullptr); + nullptr, + -1, + 1); int mechtype = nrn_get_mechtype(mechanism[1]); _nrn_layout_reg(mechtype, SOA_LAYOUT); hoc_register_prop_size(mechtype, nparm, 0); diff --git a/coreneuron/mechanism/eion.cpp b/coreneuron/mechanism/eion.cpp index 9f442e12a..ec1fd665e 100644 --- a/coreneuron/mechanism/eion.cpp +++ b/coreneuron/mechanism/eion.cpp @@ -97,10 +97,10 @@ void ion_reg(const char* name, double valence) { nullptr, nullptr, nrn_init_ion, - -1, - 1, nullptr, - nullptr); + nullptr, + -1, + 1); mechtype = nrn_get_mechtype(mechanism[1]); _nrn_layout_reg(mechtype, SOA_LAYOUT); hoc_register_prop_size(mechtype, nparm, 1); diff --git a/coreneuron/mechanism/mechanism.hpp b/coreneuron/mechanism/mechanism.hpp index d82729a1d..f8efd643b 100644 --- a/coreneuron/mechanism/mechanism.hpp +++ b/coreneuron/mechanism/mechanism.hpp @@ -151,6 +151,7 @@ struct Memb_list { // called before the main nrn_acc_manager methods that copy // thread/mechanism data to the device. void* instance = nullptr; /* mechanism instance struct from NMODL */ + std::size_t instance_size{}; void* global_variables = nullptr; /* global variables struct for each mechanism */ std::size_t global_variables_size{}; }; diff --git a/coreneuron/mechanism/membfunc.hpp b/coreneuron/mechanism/membfunc.hpp index 5380e16b6..7602a8218 100644 --- a/coreneuron/mechanism/membfunc.hpp +++ b/coreneuron/mechanism/membfunc.hpp @@ -96,22 +96,23 @@ extern int register_mech(const char** m, mod_f_t jacob, mod_f_t stat, mod_f_t initialize, - int nrnpointerindex, - int vectorized, mod_f_t private_constructor, - mod_f_t private_destructor); + mod_f_t private_destructor, + int nrnpointerindex, + int vectorized +); extern int point_register_mech(const char**, mod_alloc_t alloc, mod_f_t cur, mod_f_t jacob, mod_f_t stat, mod_f_t initialize, + mod_f_t private_constructor, + mod_f_t private_destructor, int nrnpointerindex, mod_f_t constructor, mod_f_t destructor, - int vectorized, - mod_f_t private_constructor, - mod_f_t private_destructor); + int vectorized); extern void register_constructor(mod_f_t constructor); using NetBufReceive_t = void (*)(NrnThread*); extern void hoc_register_net_receive_buffering(NetBufReceive_t, int); diff --git a/coreneuron/mechanism/register_mech.cpp b/coreneuron/mechanism/register_mech.cpp index 41ed41a29..01a71b5bc 100644 --- a/coreneuron/mechanism/register_mech.cpp +++ b/coreneuron/mechanism/register_mech.cpp @@ -117,10 +117,10 @@ int register_mech(const char** m, mod_f_t jacob, mod_f_t stat, mod_f_t initialize, - int /* nrnpointerindex */, - int vectorized, mod_f_t private_constructor, - mod_f_t private_destructor) { + mod_f_t private_destructor, + int /* nrnpointerindex */, + int vectorized) { auto& memb_func = corenrn.get_memb_funcs(); int type = nrn_get_mechtype(m[1]); @@ -344,12 +344,12 @@ int point_register_mech(const char** m, mod_f_t jacob, mod_f_t stat, mod_f_t initialize, + mod_f_t private_constructor, + mod_f_t private_destructor, int nrnpointerindex, mod_f_t constructor, mod_f_t destructor, - int vectorized, - mod_f_t private_constructor, - mod_f_t private_destructor) { + int vectorized) { const Symbol* s = m[1]; register_mech(m, alloc, @@ -357,10 +357,10 @@ int point_register_mech(const char** m, jacob, stat, initialize, - nrnpointerindex, - vectorized, private_constructor, - private_destructor); + private_destructor, + nrnpointerindex, + vectorized); register_constructor(constructor); register_destructor(destructor); return point_reg_helper(s); diff --git a/external/mod2c b/external/mod2c index bc5f9d696..8b754b35b 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit bc5f9d6962ebcf2b40d64ae8d6b95a829f40f517 +Subproject commit 8b754b35b6ea3088a713590bc5d72af3e2f8ef2b diff --git a/external/nmodl b/external/nmodl index b99496a91..ec1f3300c 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit b99496a919df98a89cd97cb6898dda49f0d17c56 +Subproject commit ec1f3300c8e4b6b5cd7c4d85ebb5204050c1b311 From 5ee91a6458cdc602b51634e7e0b8feb7872be32b Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 10 Aug 2022 09:02:17 +0200 Subject: [PATCH 078/128] fix build dependencies --- coreneuron/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 57af85b6e..69f61daaf 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -306,6 +306,11 @@ if(CORENRN_ENABLE_GPU) target_compile_options(coreneuron-core PRIVATE ${CORENRN_ACC_FLAGS}) endif() +# Create an extra target for use by NEURON when CoreNEURON is being built as a +# submodule. NEURON tests will depend on this, so it must in turn depend on +# everything that is needed to run nrnivmodl -coreneuron. +add_custom_target(coreneuron-for-tests) +add_dependencies(coreneuron-for-tests coreneuron-core ${NMODL_TARGET_TO_DEPEND}) # Create an extra target for internal use that unit tests and so on can depend on. # ${corenrn_mech_library} is libcorenrnmech.{a,so}, which contains both the compiled default # mechanisms and the content of libcoreneuron-core.a. From 95972570b27a0f0fb9f1610abc64be1d7ebb6113 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 10 Aug 2022 09:02:43 +0200 Subject: [PATCH 079/128] Try and fix partial_piv_lu.cu linking. --- coreneuron/CMakeLists.txt | 8 ++++++-- external/nmodl | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 69f61daaf..9e829f940 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -117,7 +117,7 @@ if(CORENRN_ENABLE_GPU) # these functions from CUDA kernels presents no issue ... TODO is it going to work to call these # from libcoreneuron-cuda.so? probably not... if(CORENRN_ENABLE_NMODL AND EXISTS ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) - list(APPEND CORENEURON_CUDA_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) + set(CORENEURON_CUDA_LIBRARY_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) endif() endif() @@ -159,9 +159,13 @@ endif() # https://forums.developer.nvidia.com/t/cannot-dynamically-load-a-shared-library-containing-both-openacc-and-cuda-code/210972 add_library(coreneuron-core STATIC ${CORENEURON_CODE_FILES} ${CORENRN_MPI_OBJ}) if(CORENRN_ENABLE_GPU) - set(coreneuron_cuda_target coreneuron-cuda) + set(coreneuron_cuda_target coreneuron-cuda coreneuron-cuda-helpers) + add_library(coreneuron-cuda-helpers STATIC ${CORENEURON_CUDA_LIBRARY_FILES}) add_library(coreneuron-cuda ${COMPILE_LIBRARY_TYPE} ${CORENEURON_CUDA_FILES}) + target_link_libraries(coreneuron-cuda PUBLIC coreneuron-cuda-helpers) target_link_libraries(coreneuron-core PUBLIC coreneuron-cuda) + set_property(TARGET coreneuron-cuda PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS OFF) + set_property(TARGET coreneuron-core coreneuron-cuda-helpers PROPERTY CUDA_SEPARABLE_COMPILATION ON) endif() foreach(target coreneuron-core ${coreneuron_cuda_target}) diff --git a/external/nmodl b/external/nmodl index ec1f3300c..8003e2627 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit ec1f3300c8e4b6b5cd7c4d85ebb5204050c1b311 +Subproject commit 8003e262727ca89630e1e800e8b3415f6d9b716f From c0862b792f766f7aa8bdd426a03853bc52b10471 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 10 Aug 2022 09:32:53 +0200 Subject: [PATCH 080/128] fix shutdown with NMODL --- coreneuron/io/nrn_setup.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp index a7ee2fdf8..5aed57fbb 100644 --- a/coreneuron/io/nrn_setup.cpp +++ b/coreneuron/io/nrn_setup.cpp @@ -754,17 +754,14 @@ void nrn_cleanup() { ml->_thread = nullptr; } - // Probably causes problems with NMODL, which allocates its instance - // in unified memory. - if (ml->instance) { - free(ml->instance); - ml->instance = nullptr; - } - // Destroy the global variables struct allocated in nrn_init if (auto* const priv_dtor = corenrn.get_memb_func(tml->index).private_destructor) { (*priv_dtor)(nt, ml, tml->index); + assert(!ml->instance); + assert(!ml->instance_size); + // TODO make mod2c use `instance` instead of `global_variables` assert(!ml->global_variables); + assert(!ml->global_variables_size); } NetReceiveBuffer_t* nrb = ml->_net_receive_buffer; From 4f3c5df42c8ce541dfe1a665933c1157009aadb6 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 11 Aug 2022 12:49:23 +0200 Subject: [PATCH 081/128] try and fix all the things --- CMakeLists.txt | 5 +++++ coreneuron/CMakeLists.txt | 10 +++------- coreneuron/utils/randoms/nrnran123.cpp | 10 +++++++++- external/nmodl | 2 +- tests/unit/alignment/CMakeLists.txt | 4 +--- 5 files changed, 19 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ab3de7345..d53d2a369 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -281,6 +281,11 @@ endif() # ============================================================================= # Build option specific compiler flags # ============================================================================= +if(CORENRN_ENABLE_NMODL) + # We use Eigen for "small" matrices with thread-level parallelism handled at a + # higher level; tell Eigen not to try to multithread internally + list(APPEND CORENRN_COMPILE_DEFS EIGEN_DONT_PARALLELIZE) +endif() if(CORENRN_HAVE_NVHPC_COMPILER) # PGI with llvm code generation doesn't have necessary assembly intrinsic headers list(APPEND CORENRN_COMPILE_DEFS EIGEN_DONT_VECTORIZE=1) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 9e829f940..2fa72a534 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -116,8 +116,8 @@ if(CORENRN_ENABLE_GPU) # __device__ & acc routine tokens), which allows us to eventually call them from OpenACC. Calling # these functions from CUDA kernels presents no issue ... TODO is it going to work to call these # from libcoreneuron-cuda.so? probably not... - if(CORENRN_ENABLE_NMODL AND EXISTS ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) - set(CORENEURON_CUDA_LIBRARY_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cu) + if(CORENRN_ENABLE_NMODL AND EXISTS ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cpp) + list(APPEND CORENEURON_CODE_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cpp) endif() endif() @@ -159,13 +159,9 @@ endif() # https://forums.developer.nvidia.com/t/cannot-dynamically-load-a-shared-library-containing-both-openacc-and-cuda-code/210972 add_library(coreneuron-core STATIC ${CORENEURON_CODE_FILES} ${CORENRN_MPI_OBJ}) if(CORENRN_ENABLE_GPU) - set(coreneuron_cuda_target coreneuron-cuda coreneuron-cuda-helpers) - add_library(coreneuron-cuda-helpers STATIC ${CORENEURON_CUDA_LIBRARY_FILES}) + set(coreneuron_cuda_target coreneuron-cuda) add_library(coreneuron-cuda ${COMPILE_LIBRARY_TYPE} ${CORENEURON_CUDA_FILES}) - target_link_libraries(coreneuron-cuda PUBLIC coreneuron-cuda-helpers) target_link_libraries(coreneuron-core PUBLIC coreneuron-cuda) - set_property(TARGET coreneuron-cuda PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS OFF) - set_property(TARGET coreneuron-core coreneuron-cuda-helpers PROPERTY CUDA_SEPARABLE_COMPILATION ON) endif() foreach(target coreneuron-core ${coreneuron_cuda_target}) diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp index af1378044..0a6c89562 100644 --- a/coreneuron/utils/randoms/nrnran123.cpp +++ b/coreneuron/utils/randoms/nrnran123.cpp @@ -21,6 +21,14 @@ #include #include +// Defining these attributes seems to help nvc++ in OpenMP target offload mode. +#if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ + defined(_OPENMP) && defined(__CUDACC__) +#define CORENRN_HOST_DEVICE __host__ __device__ +#else +#define CORENRN_HOST_DEVICE +#endif + namespace { #ifdef CORENEURON_USE_BOOST_POOL /** Tag type for use with boost::fast_pool_allocator that forwards to @@ -86,7 +94,7 @@ __attribute__((noinline)) philox4x32_key_t& global_state() { } } // namespace -philox4x32_ctr_t coreneuron_random123_philox4x32_helper(coreneuron::nrnran123_State* s) { +CORENRN_HOST_DEVICE philox4x32_ctr_t coreneuron_random123_philox4x32_helper(coreneuron::nrnran123_State* s) { return philox4x32(s->c, global_state()); } diff --git a/external/nmodl b/external/nmodl index 8003e2627..09005d9ad 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 8003e262727ca89630e1e800e8b3415f6d9b716f +Subproject commit 09005d9adf2a6f9372a0d4ad11674ce15ff72ca0 diff --git a/tests/unit/alignment/CMakeLists.txt b/tests/unit/alignment/CMakeLists.txt index 92464350e..0cffdc8b3 100644 --- a/tests/unit/alignment/CMakeLists.txt +++ b/tests/unit/alignment/CMakeLists.txt @@ -3,9 +3,7 @@ # # See top-level LICENSE file for details. # ============================================================================= -include_directories(${CMAKE_SOURCE_DIR}/coreneuron ${Boost_INCLUDE_DIRS}) - add_executable(alignment_test_bin alignment.cpp) -target_compile_options(alignment_test_bin PRIVATE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) +target_link_libraries(alignment_test_bin coreneuron-unit-test) add_test(NAME alignment_test COMMAND ${TEST_EXEC_PREFIX} $) cpp_cc_configure_sanitizers(TARGET alignment_test_bin TEST alignment_test) From f706029239fa7f0cfbd69cb1abd5c84172c6b8dd Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 11 Aug 2022 12:52:52 +0200 Subject: [PATCH 082/128] one more openmp fix --- coreneuron/mechanism/register_mech.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/coreneuron/mechanism/register_mech.cpp b/coreneuron/mechanism/register_mech.cpp index 01a71b5bc..498754d80 100644 --- a/coreneuron/mechanism/register_mech.cpp +++ b/coreneuron/mechanism/register_mech.cpp @@ -19,9 +19,7 @@ namespace coreneuron { int secondorder = 0; -nrn_pragma_omp(declare target) double t, dt, celsius, pi; -nrn_pragma_omp(end declare target) int rev_dt; using Pfrv = void (*)(); From adead78fcc33fbd05c8a63f041e91190794dfcef Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 11 Aug 2022 13:05:09 +0200 Subject: [PATCH 083/128] clang-format --- coreneuron/gpu/nrn_acc_manager.cpp | 6 ++---- coreneuron/mechanism/mechanism.hpp | 2 +- coreneuron/mechanism/membfunc.hpp | 3 +-- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index 7df4155fb..13ed8d109 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -143,8 +143,7 @@ static Memb_list* copy_ml_to_device(const Memb_list* ml, int type) { if (ml->instance) { assert(ml->instance_size); - void* d_inst = cnrn_target_copyin(static_cast(ml->instance), - ml->instance_size); + void* d_inst = cnrn_target_copyin(static_cast(ml->instance), ml->instance_size); cnrn_target_memcpy_to_device(&(d_ml->instance), &d_inst); } @@ -337,8 +336,7 @@ static void delete_ml_from_device(Memb_list* ml, int type) { if (ml->instance) { assert(ml->instance_size); - cnrn_target_delete(static_cast(ml->instance), - ml->instance_size); + cnrn_target_delete(static_cast(ml->instance), ml->instance_size); } cnrn_target_delete(ml); diff --git a/coreneuron/mechanism/mechanism.hpp b/coreneuron/mechanism/mechanism.hpp index f8efd643b..baa872c85 100644 --- a/coreneuron/mechanism/mechanism.hpp +++ b/coreneuron/mechanism/mechanism.hpp @@ -150,7 +150,7 @@ struct Memb_list { // accelerator if the "constructor" in the translated MOD file code was // called before the main nrn_acc_manager methods that copy // thread/mechanism data to the device. - void* instance = nullptr; /* mechanism instance struct from NMODL */ + void* instance = nullptr; /* mechanism instance struct from NMODL */ std::size_t instance_size{}; void* global_variables = nullptr; /* global variables struct for each mechanism */ std::size_t global_variables_size{}; diff --git a/coreneuron/mechanism/membfunc.hpp b/coreneuron/mechanism/membfunc.hpp index 7602a8218..ac650595c 100644 --- a/coreneuron/mechanism/membfunc.hpp +++ b/coreneuron/mechanism/membfunc.hpp @@ -99,8 +99,7 @@ extern int register_mech(const char** m, mod_f_t private_constructor, mod_f_t private_destructor, int nrnpointerindex, - int vectorized -); + int vectorized); extern int point_register_mech(const char**, mod_alloc_t alloc, mod_f_t cur, From de70877ae7228599fa900e85dfeeac0949160314 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 16 Aug 2022 14:10:19 +0200 Subject: [PATCH 084/128] clang-format --- coreneuron/utils/randoms/nrnran123.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/coreneuron/utils/randoms/nrnran123.cpp b/coreneuron/utils/randoms/nrnran123.cpp index 0a6c89562..14e2b15df 100644 --- a/coreneuron/utils/randoms/nrnran123.cpp +++ b/coreneuron/utils/randoms/nrnran123.cpp @@ -94,7 +94,8 @@ __attribute__((noinline)) philox4x32_key_t& global_state() { } } // namespace -CORENRN_HOST_DEVICE philox4x32_ctr_t coreneuron_random123_philox4x32_helper(coreneuron::nrnran123_State* s) { +CORENRN_HOST_DEVICE philox4x32_ctr_t +coreneuron_random123_philox4x32_helper(coreneuron::nrnran123_State* s) { return philox4x32(s->c, global_state()); } From 8c737562a719c93552ebbab28d4ff0970316534d Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 16 Aug 2022 14:28:58 +0200 Subject: [PATCH 085/128] nmodl submodule --- external/nmodl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/nmodl b/external/nmodl index 09005d9ad..d44340f48 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 09005d9adf2a6f9372a0d4ad11674ce15ff72ca0 +Subproject commit d44340f4805d08ffa55510448ab48602a2635c62 From 56b573a2fb01bcc19c7739ea703146273cc24357 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 16 Aug 2022 14:32:46 +0200 Subject: [PATCH 086/128] cmake-format --- CMakeLists.txt | 4 ++-- coreneuron/CMakeLists.txt | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d53d2a369..9c8db5d31 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -282,8 +282,8 @@ endif() # Build option specific compiler flags # ============================================================================= if(CORENRN_ENABLE_NMODL) - # We use Eigen for "small" matrices with thread-level parallelism handled at a - # higher level; tell Eigen not to try to multithread internally + # We use Eigen for "small" matrices with thread-level parallelism handled at a higher level; tell + # Eigen not to try to multithread internally list(APPEND CORENRN_COMPILE_DEFS EIGEN_DONT_PARALLELIZE) endif() if(CORENRN_HAVE_NVHPC_COMPILER) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 2fa72a534..5392a9c48 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -306,9 +306,9 @@ if(CORENRN_ENABLE_GPU) target_compile_options(coreneuron-core PRIVATE ${CORENRN_ACC_FLAGS}) endif() -# Create an extra target for use by NEURON when CoreNEURON is being built as a -# submodule. NEURON tests will depend on this, so it must in turn depend on -# everything that is needed to run nrnivmodl -coreneuron. +# Create an extra target for use by NEURON when CoreNEURON is being built as a submodule. NEURON +# tests will depend on this, so it must in turn depend on everything that is needed to run nrnivmodl +# -coreneuron. add_custom_target(coreneuron-for-tests) add_dependencies(coreneuron-for-tests coreneuron-core ${NMODL_TARGET_TO_DEPEND}) # Create an extra target for internal use that unit tests and so on can depend on. From b8f266595b5e71fda7cb626c6eb84df263d1de0d Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 16 Aug 2022 14:48:02 +0200 Subject: [PATCH 087/128] Boost unit tests are built in header-only mode now --- tests/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7ef3d9647..d6b334ca3 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -31,8 +31,7 @@ if(Boost_FOUND) target_compile_options(coreneuron-unit-test INTERFACE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) target_include_directories(coreneuron-unit-test SYSTEM INTERFACE ${Boost_INCLUDE_DIRS}) - target_link_libraries(coreneuron-unit-test INTERFACE coreneuron-all - ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}) + target_link_libraries(coreneuron-unit-test INTERFACE coreneuron-all) add_subdirectory(unit/cmdline_interface) add_subdirectory(unit/interleave_info) add_subdirectory(unit/alignment) From 4944c75c8e148ce14f2a10afdf7a4631e8a2125f Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 16 Aug 2022 14:48:46 +0200 Subject: [PATCH 088/128] Drop ${TEST_EXEC_PREFIX} that was causing simple tests to be executed on many ranks. --- tests/unit/alignment/CMakeLists.txt | 2 +- tests/unit/cmdline_interface/CMakeLists.txt | 2 +- tests/unit/interleave_info/CMakeLists.txt | 3 +-- tests/unit/lfp/CMakeLists.txt | 2 +- tests/unit/queueing/CMakeLists.txt | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/unit/alignment/CMakeLists.txt b/tests/unit/alignment/CMakeLists.txt index 0cffdc8b3..89da4da14 100644 --- a/tests/unit/alignment/CMakeLists.txt +++ b/tests/unit/alignment/CMakeLists.txt @@ -5,5 +5,5 @@ # ============================================================================= add_executable(alignment_test_bin alignment.cpp) target_link_libraries(alignment_test_bin coreneuron-unit-test) -add_test(NAME alignment_test COMMAND ${TEST_EXEC_PREFIX} $) +add_test(NAME alignment_test COMMAND $) cpp_cc_configure_sanitizers(TARGET alignment_test_bin TEST alignment_test) diff --git a/tests/unit/cmdline_interface/CMakeLists.txt b/tests/unit/cmdline_interface/CMakeLists.txt index fadbe60a3..cc98ad78d 100644 --- a/tests/unit/cmdline_interface/CMakeLists.txt +++ b/tests/unit/cmdline_interface/CMakeLists.txt @@ -5,5 +5,5 @@ # ============================================================================= add_executable(cmd_interface_test_bin test_cmdline_interface.cpp) target_link_libraries(cmd_interface_test_bin coreneuron-unit-test) -add_test(NAME cmd_interface_test COMMAND ${TEST_EXEC_PREFIX} $) +add_test(NAME cmd_interface_test COMMAND $) cpp_cc_configure_sanitizers(TARGET cmd_interface_test_bin TEST cmd_interface_test) diff --git a/tests/unit/interleave_info/CMakeLists.txt b/tests/unit/interleave_info/CMakeLists.txt index 948f32405..cda875eae 100644 --- a/tests/unit/interleave_info/CMakeLists.txt +++ b/tests/unit/interleave_info/CMakeLists.txt @@ -5,6 +5,5 @@ # ============================================================================= add_executable(interleave_info_bin check_constructors.cpp) target_link_libraries(interleave_info_bin coreneuron-unit-test) -add_test(NAME interleave_info_constructor_test COMMAND ${TEST_EXEC_PREFIX} - $) +add_test(NAME interleave_info_constructor_test COMMAND $) cpp_cc_configure_sanitizers(TARGET interleave_info_bin TEST interleave_info_constructor_test) diff --git a/tests/unit/lfp/CMakeLists.txt b/tests/unit/lfp/CMakeLists.txt index 8b5b201c4..34231b9f9 100644 --- a/tests/unit/lfp/CMakeLists.txt +++ b/tests/unit/lfp/CMakeLists.txt @@ -5,7 +5,7 @@ # ============================================================================= add_executable(lfp_test_bin lfp.cpp) target_link_libraries(lfp_test_bin coreneuron-unit-test) -add_test(NAME lfp_test COMMAND ${TEST_EXEC_PREFIX} $) +add_test(NAME lfp_test COMMAND $) cpp_cc_configure_sanitizers(TARGET lfp_test_bin TEST lfp_test) set_property( TEST lfp_test diff --git a/tests/unit/queueing/CMakeLists.txt b/tests/unit/queueing/CMakeLists.txt index fc653ea98..05b2a12f2 100644 --- a/tests/unit/queueing/CMakeLists.txt +++ b/tests/unit/queueing/CMakeLists.txt @@ -5,5 +5,5 @@ # ============================================================================= add_executable(queuing_test_bin test_queueing.cpp) target_link_libraries(queuing_test_bin coreneuron-unit-test) -add_test(NAME queuing_test COMMAND ${TEST_EXEC_PREFIX} $) +add_test(NAME queuing_test COMMAND $) cpp_cc_configure_sanitizers(TARGET queuing_test_bin TEST queuing_test) From 9d2ce472636d352909f85c1a689b38e63df565de Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 17 Aug 2022 09:03:19 +0200 Subject: [PATCH 089/128] submodule --- external/nmodl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/nmodl b/external/nmodl index d44340f48..22361bdbb 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit d44340f4805d08ffa55510448ab48602a2635c62 +Subproject commit 22361bdbbe1a7c2874aa93c49e7e601858fc5abf From 467f1327033853cef34b30bd3933f38131722ecb Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 17 Aug 2022 11:02:31 +0200 Subject: [PATCH 090/128] CORENEURON_GPU_DEBUG: add environment variable that enables cnrn_target_* debug messages. --- coreneuron/gpu/nrn_acc_manager.cpp | 93 ++++++++++++++++++++++++++++++ coreneuron/sim/multicore.hpp | 1 + coreneuron/utils/offload.hpp | 80 +++++++++++++++++++------ 3 files changed, 155 insertions(+), 19 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index 13ed8d109..cd6ab939e 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -32,6 +32,13 @@ #include #endif +#if __has_include() +#define USE_CXXABI +#include +#include +#include +#endif + #ifdef CORENEURON_ENABLE_PRESENT_TABLE #include #include @@ -44,6 +51,38 @@ std::shared_mutex present_table_mutex; } // namespace #endif +namespace { +/** @brief Try to demangle a type name, return the mangled name on failure. + */ +std::string cxx_demangle(const char* mangled) { +#ifdef USE_CXXABI + int status{}; + // Note that the third argument to abi::__cxa_demangle returns the length of + // the allocated buffer, which may be larger than strlen(demangled) + 1. + std::unique_ptr demangled{ + abi::__cxa_demangle(mangled, nullptr, nullptr, &status), free}; + return status ? mangled : demangled.get(); +#else + return mangled; +#endif +} +bool cnrn_target_debug_output_enabled() { + const char* env = std::getenv("CORENEURON_GPU_DEBUG"); + if (!env) { + return false; + } + std::string env_s{env}; + if (env_s == "1") { + return true; + } else if (env_s == "0") { + return false; + } else { + throw std::runtime_error("CORENEURON_GPU_DEBUG must be set to 0 or 1 (got " + env_s + ")"); + } +} +bool cnrn_target_enable_debug{cnrn_target_debug_output_enabled()}; +} // namespace + namespace coreneuron { extern InterleaveInfo* interleave_info; void nrn_ion_global_map_copyto_device(); @@ -51,6 +90,60 @@ void nrn_ion_global_map_delete_from_device(); void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay); void nrn_VecPlay_delete_from_device(NrnThread* nt); +void cnrn_target_copyin_debug(std::string_view file, + int line, + std::size_t sizeof_T, + std::type_info const& typeid_T, + void const* h_ptr, + std::size_t len, + void* d_ptr) { + if (!cnrn_target_enable_debug) { + return; + } + std::cerr << file << ':' << line << ": cnrn_target_copyin<" << cxx_demangle(typeid_T.name()) + << ">(" << h_ptr << ", " << len << " * " << sizeof_T << " = " << len * sizeof_T + << ") -> " << d_ptr << std::endl; +} +void cnrn_target_delete_debug(std::string_view file, + int line, + std::size_t sizeof_T, + std::type_info const& typeid_T, + void const* h_ptr, + std::size_t len) { + if (!cnrn_target_enable_debug) { + return; + } + std::cerr << file << ':' << line << ": cnrn_target_delete<" << cxx_demangle(typeid_T.name()) + << ">(" << h_ptr << ", " << len << " * " << sizeof_T << " = " << len * sizeof_T << ')' + << std::endl; +} +void cnrn_target_deviceptr_debug(std::string_view file, + int line, + std::size_t /* sizeof_T */, + std::type_info const& typeid_T, + void const* h_ptr, + void* d_ptr) { + if (!cnrn_target_enable_debug) { + return; + } + std::cerr << file << ':' << line << ": cnrn_target_device_ptr<" << cxx_demangle(typeid_T.name()) + << ">(" << h_ptr << ") -> " << d_ptr << std::endl; +} +void cnrn_target_memcpy_to_device_debug(std::string_view file, + int line, + std::size_t sizeof_T, + std::type_info const& typeid_T, + void const* h_ptr, + std::size_t len, + void* d_ptr) { + if (!cnrn_target_enable_debug) { + return; + } + std::cerr << file << ':' << line << ": cnrn_target_memcpy_to_device<" + << cxx_demangle(typeid_T.name()) << ">(" << d_ptr << ", " << h_ptr << ", " << len + << " * " << sizeof_T << " = " << len * sizeof_T << ')' << std::endl; +} + #ifdef CORENEURON_ENABLE_PRESENT_TABLE void* cnrn_target_deviceptr_impl(void const* h_ptr) { if (!h_ptr) { diff --git a/coreneuron/sim/multicore.hpp b/coreneuron/sim/multicore.hpp index 3e06e3585..349e057c5 100644 --- a/coreneuron/sim/multicore.hpp +++ b/coreneuron/sim/multicore.hpp @@ -164,6 +164,7 @@ void nrn_multithread_job(F&& job, Args&&... args) { #pragma omp parallel for private(i) shared(nrn_threads, job, nrn_nthread, \ nrnmpi_myid) schedule(static, 1) + // FIXME: multiple forwarding of the same arguments... for (i = 0; i < nrn_nthread; ++i) { job(nrn_threads + i, std::forward(args)...); } diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index f37724bb4..a487a92fe 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -23,8 +23,35 @@ #endif #include +#include namespace coreneuron { +void cnrn_target_copyin_debug(std::string_view file, + int line, + std::size_t sizeof_T, + std::type_info const& typeid_T, + void const* h_ptr, + std::size_t len, + void* d_ptr); +void cnrn_target_delete_debug(std::string_view file, + int line, + std::size_t sizeof_T, + std::type_info const& typeid_T, + void const* h_ptr, + std::size_t len); +void cnrn_target_deviceptr_debug(std::string_view file, + int line, + std::size_t sizeof_T, + std::type_info const& typeid_T, + void const* h_ptr, + void* d_ptr); +void cnrn_target_memcpy_to_device_debug(std::string_view file, + int line, + std::size_t sizeof_T, + std::type_info const& typeid_T, + void const* h_ptr, + std::size_t len, + void* d_ptr); #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) && !defined(CORENEURON_UNIFIED_MEMORY) // Homegrown implementation for buggy NVHPC versions (<=22.3?) @@ -35,52 +62,55 @@ void cnrn_target_delete_update_present_table(void const* h_ptr, std::size_t len) #endif template -T* cnrn_target_deviceptr(const T* h_ptr) { +T* cnrn_target_deviceptr(std::string_view file, int line, const T* h_ptr) { + T* d_ptr{}; #ifdef CORENEURON_ENABLE_PRESENT_TABLE - return static_cast(cnrn_target_deviceptr_impl(h_ptr)); + d_ptr = static_cast(cnrn_target_deviceptr_impl(h_ptr)); #elif defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) - return static_cast(acc_deviceptr(const_cast(h_ptr))); + d_ptr = static_cast(acc_deviceptr(const_cast(h_ptr))); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) - T const* d_ptr{}; - nrn_pragma_omp(target data use_device_ptr(h_ptr)) - { d_ptr = h_ptr; } - - return const_cast(d_ptr); + { d_ptr = const_cast(h_ptr); } #else throw std::runtime_error( "cnrn_target_deviceptr() not implemented without OpenACC/OpenMP and gpu build"); #endif + cnrn_target_deviceptr_debug(file, line, sizeof(T), typeid(T), h_ptr, d_ptr); + return d_ptr; } template -T* cnrn_target_copyin(const T* h_ptr, std::size_t len = 1) { +T* cnrn_target_copyin(std::string_view file, int line, const T* h_ptr, std::size_t len = 1) { + T* d_ptr{}; #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) - auto* d_ptr = static_cast(acc_copyin(const_cast(h_ptr), len * sizeof(T))); -#ifdef CORENEURON_ENABLE_PRESENT_TABLE - cnrn_target_copyin_update_present_table(h_ptr, d_ptr, len * sizeof(T)); -#endif - return d_ptr; + d_ptr = static_cast(acc_copyin(const_cast(h_ptr), len * sizeof(T))); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) nrn_pragma_omp(target enter data map(to : h_ptr[:len])) - return cnrn_target_deviceptr(h_ptr); + nrn_pragma_omp(target data use_device_ptr(h_ptr)) + { d_ptr = const_cast(h_ptr); } #else throw std::runtime_error( "cnrn_target_copyin() not implemented without OpenACC/OpenMP and gpu build"); #endif +#ifdef CORENEURON_ENABLE_PRESENT_TABLE + cnrn_target_copyin_update_present_table(h_ptr, d_ptr, len * sizeof(T)); +#endif + cnrn_target_copyin_debug(file, line, sizeof(T), typeid(T), h_ptr, len, d_ptr); + return d_ptr; } template -void cnrn_target_delete(T* h_ptr, std::size_t len = 1) { -#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ - defined(_OPENACC) +void cnrn_target_delete(std::string_view file, int line, T* h_ptr, std::size_t len = 1) { + cnrn_target_delete_debug(file, line, sizeof(T), typeid(T), h_ptr, len); #ifdef CORENEURON_ENABLE_PRESENT_TABLE cnrn_target_delete_update_present_table(h_ptr, len * sizeof(T)); #endif +#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ + defined(_OPENACC) acc_delete(h_ptr, len * sizeof(T)); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) @@ -92,7 +122,12 @@ void cnrn_target_delete(T* h_ptr, std::size_t len = 1) { } template -void cnrn_target_memcpy_to_device(T* d_ptr, const T* h_ptr, std::size_t len = 1) { +void cnrn_target_memcpy_to_device(std::string_view file, + int line, + T* d_ptr, + const T* h_ptr, + std::size_t len = 1) { + cnrn_target_memcpy_to_device_debug(file, line, sizeof(T), typeid(T), h_ptr, len, d_ptr); #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) acc_memcpy_to_device(d_ptr, const_cast(h_ptr), len * sizeof(T)); @@ -111,4 +146,11 @@ void cnrn_target_memcpy_to_device(T* d_ptr, const T* h_ptr, std::size_t len = 1) #endif } +// Replace with std::source_location once we have C++20 +#define cnrn_target_copyin(...) cnrn_target_copyin(__FILE__, __LINE__, __VA_ARGS__) +#define cnrn_target_delete(...) cnrn_target_delete(__FILE__, __LINE__, __VA_ARGS__) +#define cnrn_target_deviceptr(...) cnrn_target_deviceptr(__FILE__, __LINE__, __VA_ARGS__) +#define cnrn_target_memcpy_to_device(...) \ + cnrn_target_memcpy_to_device(__FILE__, __LINE__, __VA_ARGS__) + } // namespace coreneuron From b23acd70bbcf2e68ebd3a1b99ac163f8509d8c84 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 17 Aug 2022 11:18:05 +0200 Subject: [PATCH 091/128] avoid sizeof(void) --- coreneuron/gpu/nrn_acc_manager.cpp | 1 - coreneuron/utils/offload.hpp | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index cd6ab939e..ab2d16ce2 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -119,7 +119,6 @@ void cnrn_target_delete_debug(std::string_view file, } void cnrn_target_deviceptr_debug(std::string_view file, int line, - std::size_t /* sizeof_T */, std::type_info const& typeid_T, void const* h_ptr, void* d_ptr) { diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index a487a92fe..ff49477ce 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -41,7 +41,6 @@ void cnrn_target_delete_debug(std::string_view file, std::size_t len); void cnrn_target_deviceptr_debug(std::string_view file, int line, - std::size_t sizeof_T, std::type_info const& typeid_T, void const* h_ptr, void* d_ptr); @@ -77,7 +76,7 @@ T* cnrn_target_deviceptr(std::string_view file, int line, const T* h_ptr) { throw std::runtime_error( "cnrn_target_deviceptr() not implemented without OpenACC/OpenMP and gpu build"); #endif - cnrn_target_deviceptr_debug(file, line, sizeof(T), typeid(T), h_ptr, d_ptr); + cnrn_target_deviceptr_debug(file, line, typeid(T), h_ptr, d_ptr); return d_ptr; } From 26c394b08f9ed07ad3b3f3722da6255965e85801 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 17 Aug 2022 14:58:15 +0200 Subject: [PATCH 092/128] try and fix ispc --- external/nmodl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/nmodl b/external/nmodl index 22361bdbb..1bd24b21c 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 22361bdbbe1a7c2874aa93c49e7e601858fc5abf +Subproject commit 1bd24b21c6480fbb38a7ea89ccb1ff9491d89f85 From d74796a4b4cf9bda7ca8d67c4c8bf1e9667a307f Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 17 Aug 2022 15:05:18 +0200 Subject: [PATCH 093/128] drop ispc_celsius --- coreneuron/apps/main1.cpp | 6 ------ coreneuron/mechanism/nrnoc_ml.ispc | 2 -- coreneuron/utils/ispc/globals.cpp | 17 ----------------- external/nmodl | 2 +- 4 files changed, 1 insertion(+), 26 deletions(-) delete mode 100644 coreneuron/utils/ispc/globals.cpp diff --git a/coreneuron/apps/main1.cpp b/coreneuron/apps/main1.cpp index 4408234b6..e79db2104 100644 --- a/coreneuron/apps/main1.cpp +++ b/coreneuron/apps/main1.cpp @@ -58,9 +58,6 @@ bool corenrn_units_use_legacy() { void (*nrn2core_part2_clean_)(); -// cf. utils/ispc_globals.c -extern double ispc_celsius; - /** * If "export OMP_NUM_THREADS=n" is not set then omp by default sets * the number of threads equal to the number of cores on this node. @@ -244,9 +241,6 @@ void nrn_init_and_load_data(int argc, corenrn_param.celsius = celsius; - // for ispc backend - ispc_celsius = celsius; - // create net_cvode instance mk_netcvode(); diff --git a/coreneuron/mechanism/nrnoc_ml.ispc b/coreneuron/mechanism/nrnoc_ml.ispc index 6b196eaf3..fa8079fb2 100644 --- a/coreneuron/mechanism/nrnoc_ml.ispc +++ b/coreneuron/mechanism/nrnoc_ml.ispc @@ -153,8 +153,6 @@ struct NrnThread { void* mapping; }; -extern uniform double ispc_celsius; - // NOTE : this implementation is duplicated from "coreneuron/network/cvodestb.cpp" // If changes are required, make sure to change CPP as well. static inline int at_time(uniform NrnThread* nt, varying double te) { diff --git a/coreneuron/utils/ispc/globals.cpp b/coreneuron/utils/ispc/globals.cpp deleted file mode 100644 index 0344bf1b8..000000000 --- a/coreneuron/utils/ispc/globals.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/* -# ============================================================================= -# Copyright (c) 2016 - 2021 Blue Brain Project/EPFL -# -# See top-level LICENSE file for details. -# ============================================================================= -*/ - -/* - * Coreneuron global variables are declared at least in the coreneuron namespace. In ispc it is, - * however, not possible to access variables within C++ namespaces. To be able to access these - * variables from ispc kernels, we declare them in global namespace and a C linkage file. - */ - -extern "C" { -double ispc_celsius; -} diff --git a/external/nmodl b/external/nmodl index 1bd24b21c..8559c925c 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 1bd24b21c6480fbb38a7ea89ccb1ff9491d89f85 +Subproject commit 8559c925cf9791dc9f40764ee0eb4c9fd6d9f57c From 1c238265d08b8ce5e4025dee5c3f58b19b20a131 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 17 Aug 2022 17:21:19 +0200 Subject: [PATCH 094/128] ispc fix --- external/nmodl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/nmodl b/external/nmodl index 8559c925c..a69cb558e 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 8559c925cf9791dc9f40764ee0eb4c9fd6d9f57c +Subproject commit a69cb558e03b3f53ffcd545cee9fd7157beee33a From d1757ebb303f78dd2f24706cbb011fda206be2b1 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 17 Aug 2022 18:50:08 +0200 Subject: [PATCH 095/128] submodule --- external/nmodl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/nmodl b/external/nmodl index a69cb558e..13f00b4c3 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit a69cb558e03b3f53ffcd545cee9fd7157beee33a +Subproject commit 13f00b4c3ebac25078c3cff20259f4e1c4855291 From 2df364f75a93c2b7478deead824850e8ee169ce4 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 17 Aug 2022 18:50:23 +0200 Subject: [PATCH 096/128] lots more coreneuron builds, presumably temporarily --- .gitlab-ci.yml | 101 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3ba880c40..3da9d2b52 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -130,13 +130,72 @@ build:coreneuron:nmodl:nvhpc:acc: SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu~shared+tests~legacy-unit+sympy build_type=RelWithDebInfo needs: ["build:nmodl"] +build:coreneuron:nmodl:nvhpc:acc:debug: + extends: [.build_coreneuron, .spack_nvhpc] + variables: + SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu~shared+tests~legacy-unit+sympy build_type=Debug + needs: ["build:nmodl"] + +build:coreneuron:nmodl:nvhpc:acc:shared: + extends: [.build_coreneuron, .spack_nvhpc] + variables: + SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu+shared+tests~legacy-unit+sympy build_type=RelWithDebInfo + needs: ["build:nmodl"] + +build:coreneuron:nmodl:nvhpc:acc:shared:debug: + extends: [.build_coreneuron, .spack_nvhpc] + variables: + SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu+shared+tests~legacy-unit+sympy build_type=Debug + needs: ["build:nmodl"] + +build:coreneuron:nmodl:nvhpc:acc:legacy: + extends: [.build_coreneuron, .spack_nvhpc] + variables: + SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu~shared+tests~legacy-unit~sympy build_type=RelWithDebInfo + needs: ["build:nmodl"] + +build:coreneuron:nmodl:nvhpc:acc:debug:legacy: + extends: [.build_coreneuron, .spack_nvhpc] + variables: + SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu~shared+tests~legacy-unit~sympy build_type=Debug + needs: ["build:nmodl"] + +build:coreneuron:nmodl:nvhpc:acc:shared:legacy: + extends: [.build_coreneuron, .spack_nvhpc] + variables: + SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu+shared+tests~legacy-unit~sympy build_type=RelWithDebInfo + needs: ["build:nmodl"] + +build:coreneuron:nmodl:nvhpc:acc:shared:debug:legacy: + extends: [.build_coreneuron, .spack_nvhpc] + variables: + SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu+shared+tests~legacy-unit~sympy build_type=Debug + needs: ["build:nmodl"] + build:coreneuron:nmodl:nvhpc:omp: extends: [.build_coreneuron, .spack_nvhpc] variables: - # Sympy + OpenMP target offload does not currently work with NVHPC (?) + SPACK_PACKAGE_SPEC: +caliper+nmodl+openmp+gpu~shared+tests~legacy-unit+sympy build_type=RelWithDebInfo + needs: ["build:nmodl"] + +build:coreneuron:nmodl:nvhpc:omp:debug: + extends: [.build_coreneuron, .spack_nvhpc] + variables: + SPACK_PACKAGE_SPEC: +caliper+nmodl+openmp+gpu~shared+tests~legacy-unit+sympy build_type=Debug + needs: ["build:nmodl"] + +build:coreneuron:nmodl:nvhpc:omp:legacy: + extends: [.build_coreneuron, .spack_nvhpc] + variables: SPACK_PACKAGE_SPEC: +caliper+nmodl+openmp+gpu~shared+tests~legacy-unit~sympy build_type=RelWithDebInfo needs: ["build:nmodl"] +build:coreneuron:nmodl:nvhpc:omp:debug:legacy: + extends: [.build_coreneuron, .spack_nvhpc] + variables: + SPACK_PACKAGE_SPEC: +caliper+nmodl+openmp+gpu~shared+tests~legacy-unit~sympy build_type=Debug + needs: ["build:nmodl"] + build:coreneuron:mod2c:intel: extends: [.build_coreneuron, .spack_intel] variables: @@ -206,10 +265,50 @@ test:coreneuron:nmodl:nvhpc:omp: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:nmodl:nvhpc:omp"] +test:coreneuron:nmodl:nvhpc:omp:debug: + extends: [.ctest, .gpu_node] + needs: ["build:coreneuron:nmodl:nvhpc:omp:debug"] + +test:coreneuron:nmodl:nvhpc:omp:legacy: + extends: [.ctest, .gpu_node] + needs: ["build:coreneuron:nmodl:nvhpc:omp:legacy"] + +test:coreneuron:nmodl:nvhpc:omp:debug:legacy: + extends: [.ctest, .gpu_node] + needs: ["build:coreneuron:nmodl:nvhpc:omp:debug:legacy"] + test:coreneuron:nmodl:nvhpc:acc: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:nmodl:nvhpc:acc"] +test:coreneuron:nmodl:nvhpc:acc:debug: + extends: [.ctest, .gpu_node] + needs: ["build:coreneuron:nmodl:nvhpc:acc:debug"] + +test:coreneuron:nmodl:nvhpc:acc:shared: + extends: [.ctest, .gpu_node] + needs: ["build:coreneuron:nmodl:nvhpc:acc:shared"] + +test:coreneuron:nmodl:nvhpc:acc:shared:debug: + extends: [.ctest, .gpu_node] + needs: ["build:coreneuron:nmodl:nvhpc:acc:shared:debug"] + +test:coreneuron:nmodl:nvhpc:acc:legacy: + extends: [.ctest, .gpu_node] + needs: ["build:coreneuron:nmodl:nvhpc:acc:legacy"] + +test:coreneuron:nmodl:nvhpc:acc:debug:legacy: + extends: [.ctest, .gpu_node] + needs: ["build:coreneuron:nmodl:nvhpc:acc:debug:legacy"] + +test:coreneuron:nmodl:nvhpc:acc:shared:legacy: + extends: [.ctest, .gpu_node] + needs: ["build:coreneuron:nmodl:nvhpc:acc:shared:legacy"] + +test:coreneuron:nmodl:nvhpc:acc:shared:debug:legacy: + extends: [.ctest, .gpu_node] + needs: ["build:coreneuron:nmodl:nvhpc:acc:shared:debug:legacy"] + test:coreneuron:mod2c:intel: extends: [.ctest] needs: ["build:coreneuron:mod2c:intel"] From da8d64fd40605df7a246127f1003d4766b801d37 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 18 Aug 2022 12:19:10 +0200 Subject: [PATCH 097/128] cleanup on mod2c side --- coreneuron/gpu/nrn_acc_manager.cpp | 13 ------------- coreneuron/io/nrn_setup.cpp | 3 --- coreneuron/mechanism/mechanism.hpp | 15 +++++---------- coreneuron/utils/offload.hpp | 11 +++++++++++ external/mod2c | 2 +- 5 files changed, 17 insertions(+), 27 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index ab2d16ce2..fcaf8047b 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -240,13 +240,6 @@ static Memb_list* copy_ml_to_device(const Memb_list* ml, int type) { } - if (ml->global_variables) { - assert(ml->global_variables_size); - void* d_glob_vars = cnrn_target_copyin(static_cast(ml->global_variables), - ml->global_variables_size); - cnrn_target_memcpy_to_device(&(d_ml->global_variables), &d_glob_vars); - } - int n = ml->nodecount; int szp = corenrn.get_prop_param_size()[type]; int szdp = corenrn.get_prop_dparam_size()[type]; @@ -420,12 +413,6 @@ static void delete_ml_from_device(Memb_list* ml, int type) { } cnrn_target_delete(ml->nodeindices, n); - if (ml->global_variables) { - assert(ml->global_variables_size); - cnrn_target_delete(static_cast(ml->global_variables), - ml->global_variables_size); - } - if (ml->instance) { assert(ml->instance_size); cnrn_target_delete(static_cast(ml->instance), ml->instance_size); diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp index 5aed57fbb..b9edf7814 100644 --- a/coreneuron/io/nrn_setup.cpp +++ b/coreneuron/io/nrn_setup.cpp @@ -759,9 +759,6 @@ void nrn_cleanup() { (*priv_dtor)(nt, ml, tml->index); assert(!ml->instance); assert(!ml->instance_size); - // TODO make mod2c use `instance` instead of `global_variables` - assert(!ml->global_variables); - assert(!ml->global_variables_size); } NetReceiveBuffer_t* nrb = ml->_net_receive_buffer; diff --git a/coreneuron/mechanism/mechanism.hpp b/coreneuron/mechanism/mechanism.hpp index baa872c85..9335e7530 100644 --- a/coreneuron/mechanism/mechanism.hpp +++ b/coreneuron/mechanism/mechanism.hpp @@ -143,16 +143,11 @@ struct Memb_list { NetSendBuffer_t* _net_send_buffer = nullptr; int nodecount; /* actual node count */ int _nodecount_padded; - // Not obvious that these need to be distinct (i.e. we could just have - // `instance` and `instance_size`, and use them in mod2c for global - // variables while NMODL could use the existing instance struct for globals - // too). nrn_acc_manager.cpp could handle data movement to/from the - // accelerator if the "constructor" in the translated MOD file code was - // called before the main nrn_acc_manager methods that copy - // thread/mechanism data to the device. - void* instance = nullptr; /* mechanism instance struct from NMODL */ + // nrn_acc_manager.cpp handles data movement to/from the accelerator as the + // "private constructor" in the translated MOD file code is called before + // the main nrn_acc_manager methods that copy thread/mechanism data to the + // device + void* instance{nullptr}; /* mechanism instance struct */ std::size_t instance_size{}; - void* global_variables = nullptr; /* global variables struct for each mechanism */ - std::size_t global_variables_size{}; }; } // namespace coreneuron diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index ff49477ce..920b786a6 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -145,11 +145,22 @@ void cnrn_target_memcpy_to_device(std::string_view file, #endif } +template +void cnrn_target_update_on_device(std::string_view file, + int line, + const T* h_ptr, + std::size_t len = 1) { + auto* d_ptr = cnrn_target_deviceptr(file, line, h_ptr); + cnrn_target_memcpy_to_device(file, line, d_ptr, h_ptr); +} + // Replace with std::source_location once we have C++20 #define cnrn_target_copyin(...) cnrn_target_copyin(__FILE__, __LINE__, __VA_ARGS__) #define cnrn_target_delete(...) cnrn_target_delete(__FILE__, __LINE__, __VA_ARGS__) #define cnrn_target_deviceptr(...) cnrn_target_deviceptr(__FILE__, __LINE__, __VA_ARGS__) #define cnrn_target_memcpy_to_device(...) \ cnrn_target_memcpy_to_device(__FILE__, __LINE__, __VA_ARGS__) +#define cnrn_target_update_on_device(...) \ + cnrn_target_update_on_device(__FILE__, __LINE__, __VA_ARGS__) } // namespace coreneuron diff --git a/external/mod2c b/external/mod2c index 8b754b35b..1264bc364 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit 8b754b35b6ea3088a713590bc5d72af3e2f8ef2b +Subproject commit 1264bc364a89d6d63faa47f82e761a1a7de726b3 From be420bfcc54780587d5d8cb12bbf2f1c672dd608 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 18 Aug 2022 18:20:28 +0200 Subject: [PATCH 098/128] nmodl submodule --- external/nmodl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/nmodl b/external/nmodl index 13f00b4c3..59a971157 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 13f00b4c3ebac25078c3cff20259f4e1c4855291 +Subproject commit 59a9711578e6841c1c3389c5f3685cc4c6bc7a88 From 8dcb6019514843492b94939f32ee38ae3855f423 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 19 Aug 2022 09:51:55 +0200 Subject: [PATCH 099/128] nmodl submodule, eigen shim comments --- coreneuron/CMakeLists.txt | 12 +++++++----- external/nmodl | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index 5392a9c48..c4143b48c 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -111,11 +111,13 @@ if(CORENRN_ENABLE_GPU) # this cannot be included in the same shared library as the rest of the OpenACC code. set(CORENEURON_CUDA_FILES ${CMAKE_CURRENT_SOURCE_DIR}/permute/cellorder.cu) - # Eigen-3.5+ provides better GPU support. However, some functions cannot be called directly from - # within an OpenACC region. Therefore, we need to wrap them in a special API (decorate them with - # __device__ & acc routine tokens), which allows us to eventually call them from OpenACC. Calling - # these functions from CUDA kernels presents no issue ... TODO is it going to work to call these - # from libcoreneuron-cuda.so? probably not... + # Eigen functions cannot be called directly from OpenACC regions, but Eigen is sort-of compatible + # with being compiled as CUDA code. Because of + # https://forums.developer.nvidia.com/t/cannot-dynamically-load-a-shared-library-containing-both-openacc-and-cuda-code/210972 + # this has to mean `nvc++ -cuda` rather than `nvcc`. We explicitly instantiate Eigen functions for + # different matrix sizes in partial_piv_lu.cpp (with CUDA attributes but without OpenACC or OpenMP + # annotations) and dispatch to these from a wrapper in partial_piv_lu.h that does have + # OpenACC/OpenMP annotations. if(CORENRN_ENABLE_NMODL AND EXISTS ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cpp) list(APPEND CORENEURON_CODE_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cpp) endif() diff --git a/external/nmodl b/external/nmodl index 59a971157..bed61a5d0 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 59a9711578e6841c1c3389c5f3685cc4c6bc7a88 +Subproject commit bed61a5d0380bd8260776f45515dd40878fafaf7 From a11b4616276a2670e870a9d1faa148cc6dd484cd Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 19 Aug 2022 12:58:06 +0200 Subject: [PATCH 100/128] submodules --- external/mod2c | 2 +- external/nmodl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/mod2c b/external/mod2c index 1264bc364..e40c7c093 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit 1264bc364a89d6d63faa47f82e761a1a7de726b3 +Subproject commit e40c7c093f70bfba72ade6802e4ba7d242eca03a diff --git a/external/nmodl b/external/nmodl index bed61a5d0..e0183df5b 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit bed61a5d0380bd8260776f45515dd40878fafaf7 +Subproject commit e0183df5b9f3d01cb35af21c197090c42f1c3354 From 1cce3af2568335420d995034bf52487895c1311a Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 19 Aug 2022 14:30:25 +0200 Subject: [PATCH 101/128] swap order so needs_foo.o comes before libfoo.so --- extra/nrnivmodl_core_makefile.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in index 4d7df0388..135a9e722 100644 --- a/extra/nrnivmodl_core_makefile.in +++ b/extra/nrnivmodl_core_makefile.in @@ -222,8 +222,9 @@ coremech_lib_shared: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always (cd $(MOD_OBJS_DIR)/libcoreneuron-core && ar x $(CORENRN_LIB_DIR)/libcoreneuron-core.a) $(CXX_SHARED_LIB_CMD) $(ENGINEMECH_OBJ) -o ${COREMECH_LIB_PATH} $(ALL_OBJS) \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ - $(LDFLAGS) ${SONAME_OPTION} @CORENEURON_LINKER_START_GROUP@ \ + @CORENEURON_LINKER_START_GROUP@ \ $(MOD_OBJS_DIR)/libcoreneuron-core/*.o @CORENEURON_LINKER_END_GROUP@ \ + $(LDFLAGS) ${SONAME_OPTION} \ -Wl,-rpath,$(CORENRN_LIB_DIR) -L$(CORENRN_LIB_DIR) # cleanup rm $(MOD_OBJS_DIR)/libcoreneuron-core/*.o From 188a93535a9407feb8974fa6187c2837bc5b5392 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 19 Aug 2022 14:49:54 +0200 Subject: [PATCH 102/128] submodule --- external/nmodl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/nmodl b/external/nmodl index e0183df5b..980a19534 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit e0183df5b9f3d01cb35af21c197090c42f1c3354 +Subproject commit 980a195349091cd64455a75ed9ec574efe948962 From 195e75d27dcc68d38cbfc15f8ff3fad6545414be Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 19 Aug 2022 15:14:52 +0200 Subject: [PATCH 103/128] revert some incomplete unified memory changes, add comments, private present table only for nvhpc <= 22.3 --- coreneuron/gpu/nrn_acc_manager.cpp | 2 ++ .../mechanism/mech/mod2c_core_thread.hpp | 20 +++++++++---------- coreneuron/sim/multicore.hpp | 2 +- coreneuron/utils/offload.hpp | 8 +++++--- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index fcaf8047b..eed8d0c0f 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -172,6 +172,7 @@ void cnrn_target_copyin_update_present_table(void const* h_ptr, void* d_ptr, std return; } std::lock_guard _{present_table_mutex}; + // TODO include more pendantic overlap checking? auto const result = present_table.emplace(static_cast(h_ptr), std::make_pair(len, static_cast(d_ptr))); } @@ -180,6 +181,7 @@ void cnrn_target_delete_update_present_table(void const* h_ptr, std::size_t len) return; } std::lock_guard _{present_table_mutex}; + // TODO properly matching OpenACC semantics would require a reference count auto const iter = present_table.find(static_cast(h_ptr)); assert(iter != present_table.end()); assert(iter->second.first == len); diff --git a/coreneuron/mechanism/mech/mod2c_core_thread.hpp b/coreneuron/mechanism/mech/mod2c_core_thread.hpp index e224137e0..d18160f3a 100644 --- a/coreneuron/mechanism/mech/mod2c_core_thread.hpp +++ b/coreneuron/mechanism/mech/mod2c_core_thread.hpp @@ -44,16 +44,16 @@ struct Item { using List = Item; /* list of mixed items */ -struct SparseObj: public MemoryManaged { /* all the state information */ - Elm** rowst{}; /* link to first element in row (solution order)*/ - Elm** diag{}; /* link to pivot element in row (solution order)*/ - void* elmpool{}; /* no interthread cache line sharing for elements */ - unsigned neqn{}; /* number of equations */ - unsigned _cntml_padded{}; /* number of instances */ - unsigned* varord{}; /* row and column order for pivots */ - double* rhs{}; /* initially- right hand side finally - answer */ - unsigned* ngetcall{}; /* per instance counter for number of calls to _getelm */ - int phase{}; /* 0-solution phase; 1-count phase; 2-build list phase */ +struct SparseObj { /* all the state information */ + Elm** rowst{}; /* link to first element in row (solution order)*/ + Elm** diag{}; /* link to pivot element in row (solution order)*/ + void* elmpool{}; /* no interthread cache line sharing for elements */ + unsigned neqn{}; /* number of equations */ + unsigned _cntml_padded{}; /* number of instances */ + unsigned* varord{}; /* row and column order for pivots */ + double* rhs{}; /* initially- right hand side finally - answer */ + unsigned* ngetcall{}; /* per instance counter for number of calls to _getelm */ + int phase{}; /* 0-solution phase; 1-count phase; 2-build list phase */ int numop{}; unsigned coef_list_size{}; double** coef_list{}; /* pointer to (first instance) value in _getelm order */ diff --git a/coreneuron/sim/multicore.hpp b/coreneuron/sim/multicore.hpp index 349e057c5..a6ac50be0 100644 --- a/coreneuron/sim/multicore.hpp +++ b/coreneuron/sim/multicore.hpp @@ -54,7 +54,7 @@ struct NrnFastImem { double* nrn_sav_d; }; -struct TrajectoryRequests: public MemoryManaged { +struct TrajectoryRequests { void** vpr; /* PlayRecord Objects known by NEURON */ double** scatter; /* if bsize == 0, each time step */ double** varrays; /* if bsize > 0, the Vector data pointers. */ diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index 920b786a6..184211c19 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -51,9 +51,11 @@ void cnrn_target_memcpy_to_device_debug(std::string_view file, void const* h_ptr, std::size_t len, void* d_ptr); -#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ - defined(_OPENACC) && !defined(CORENEURON_UNIFIED_MEMORY) -// Homegrown implementation for buggy NVHPC versions (<=22.3?) +#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ + defined(_OPENACC) && !defined(CORENEURON_UNIFIED_MEMORY) && defined(__NVCOMPILER_MAJOR__) && \ + defined(__NVCOMPILER_MINOR__) && (__NVCOMPILER_MAJOR__ <= 22) && (__NVCOMPILER_MINOR__ <= 3) +// Homegrown implementation for buggy NVHPC versions (<=22.3), see +// https://forums.developer.nvidia.com/t/acc-deviceptr-does-not-work-in-openacc-code-dynamically-loaded-from-a-shared-library/211599 #define CORENEURON_ENABLE_PRESENT_TABLE void* cnrn_target_deviceptr_impl(void const* h_ptr); void cnrn_target_copyin_update_present_table(void const* h_ptr, void* d_ptr, std::size_t len); From 52de49e4b0d38dcc6e326a8ed35246069357aeeb Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Mon, 22 Aug 2022 15:52:03 +0200 Subject: [PATCH 104/128] nmodl --- external/nmodl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/nmodl b/external/nmodl index 980a19534..7a53be75f 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 980a195349091cd64455a75ed9ec574efe948962 +Subproject commit 7a53be75fa329e5120038b26e53b18dbe3074bd6 From 94fef756d56649779dc91207192bb78db69ca0a2 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Mon, 22 Aug 2022 15:58:47 +0200 Subject: [PATCH 105/128] mod2c --- external/mod2c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/mod2c b/external/mod2c index e40c7c093..77bba7715 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit e40c7c093f70bfba72ade6802e4ba7d242eca03a +Subproject commit 77bba771579c9f91a2e10533967486e5e1f429fa From dbd53fce89de5893e77738e21082b74a7e5df81b Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Mon, 22 Aug 2022 15:59:10 +0200 Subject: [PATCH 106/128] set CORENRN_ENABLE_SHARED as global property --- CMake/OpenAccHelper.cmake | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake index f232d2bab..a21f8b523 100644 --- a/CMake/OpenAccHelper.cmake +++ b/CMake/OpenAccHelper.cmake @@ -101,7 +101,7 @@ if(CORENRN_ENABLE_GPU) endif() # ============================================================================= -# Initialise global property that will be used by NEURON to link with CoreNEURON +# Initialise global properties that will be used by NEURON to link with CoreNEURON # ============================================================================= if(CORENRN_ENABLE_GPU) # CORENRN_LIB_LINK_FLAGS is the full set of flags needed to link against libcorenrnmech.so: @@ -109,8 +109,8 @@ if(CORENRN_ENABLE_GPU) # to be used when linking the NEURON Python module to make sure it is able to dynamically load # libcorenrnmech.so. set_property(GLOBAL PROPERTY CORENRN_LIB_LINK_FLAGS "${NVHPC_ACC_COMP_FLAGS}") - # Because of if(CORENRN_ENABLE_SHARED) + # Because of # https://forums.developer.nvidia.com/t/dynamically-loading-an-openacc-enabled-shared-library-from-an-executable-compiled-with-nvc-does-not-work/210968 # we have to tell NEURON to pass OpenACC flags when linking special, otherwise we end up with an # `nrniv` binary that cannot dynamically load CoreNEURON in shared-library builds. @@ -118,6 +118,10 @@ if(CORENRN_ENABLE_GPU) endif() endif() +# NEURON needs to have access to this when CoreNEURON is built as a submodule. If CoreNEURON is +# installed externally then this is set via coreneuron-config.cmake +set_property(GLOBAL PROPERTY CORENRN_ENABLE_SHARED ${CORENRN_ENABLE_SHARED}) + if(CORENRN_HAVE_NVHPC_COMPILER) if(${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 20.7) # https://forums.developer.nvidia.com/t/many-all-diagnostic-numbers-increased-by-1-from-previous-values/146268/3 From 30ba4b0bdcbdf9f49121b1673621d91a049f7af2 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 23 Aug 2022 14:04:10 +0200 Subject: [PATCH 107/128] re-add global_variables[_size] and more helpers --- coreneuron/gpu/nrn_acc_manager.cpp | 52 ++++++++++++++++++++++-------- coreneuron/io/nrn_setup.cpp | 3 +- coreneuron/mechanism/mechanism.hpp | 5 +-- coreneuron/mechanism/nrnoc_ml.ispc | 2 ++ coreneuron/utils/offload.hpp | 43 +++++++++++++++++------- external/nmodl | 2 +- 6 files changed, 78 insertions(+), 29 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index eed8d0c0f..a05c897d7 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -125,7 +125,18 @@ void cnrn_target_deviceptr_debug(std::string_view file, if (!cnrn_target_enable_debug) { return; } - std::cerr << file << ':' << line << ": cnrn_target_device_ptr<" << cxx_demangle(typeid_T.name()) + std::cerr << file << ':' << line << ": cnrn_target_deviceptr<" << cxx_demangle(typeid_T.name()) + << ">(" << h_ptr << ") -> " << d_ptr << std::endl; +} +void cnrn_target_is_present_debug(std::string_view file, + int line, + std::type_info const& typeid_T, + void const* h_ptr, + void* d_ptr) { + if (!cnrn_target_enable_debug) { + return; + } + std::cerr << file << ':' << line << ": cnrn_target_is_present<" << cxx_demangle(typeid_T.name()) << ">(" << h_ptr << ") -> " << d_ptr << std::endl; } void cnrn_target_memcpy_to_device_debug(std::string_view file, @@ -144,28 +155,39 @@ void cnrn_target_memcpy_to_device_debug(std::string_view file, } #ifdef CORENEURON_ENABLE_PRESENT_TABLE -void* cnrn_target_deviceptr_impl(void const* h_ptr) { +template +std::pair cnrn_target_deviceptr_impl(void const* h_ptr) { if (!h_ptr) { - return nullptr; + return {nullptr, false}; } // Concurrent calls to this method are safe, but they must be serialised // w.r.t. calls to the cnrn_target_*_update_present_table methods. std::shared_lock _{present_table_mutex}; - assert(!present_table.empty()); + if (present_table.empty()) { + return {nullptr, must_be_present_or_null}; + } // prev(first iterator greater than h_ptr or last if not found) gives the first iterator less // than or equal to h_ptr auto const iter = std::prev(std::upper_bound( present_table.begin(), present_table.end(), h_ptr, [](void const* hp, auto const& entry) { return hp < entry.first; })); - assert(iter != present_table.end()); + if (iter == present_table.end()) { + return {nullptr, must_be_present_or_null}; + } std::byte const* const h_byte_ptr{static_cast(h_ptr)}; std::byte const* const h_start_of_block{iter->first}; std::size_t const block_size{iter->second.first}; std::byte* const d_start_of_block{iter->second.second}; - assert(h_byte_ptr < h_start_of_block + block_size); - return d_start_of_block + (h_byte_ptr - h_start_of_block); + bool const is_present{h_byte_ptr < h_start_of_block + block_size}; + if (!is_present) { + return {nullptr, must_be_present_or_null}; + } + return {d_start_of_block + (h_byte_ptr - h_start_of_block), false}; } +template std::pair cnrn_target_deviceptr_impl(void const*); +template std::pair cnrn_target_deviceptr_impl(void const*); + void cnrn_target_copyin_update_present_table(void const* h_ptr, void* d_ptr, std::size_t len) { if (!h_ptr) { assert(!d_ptr); @@ -235,10 +257,11 @@ static Memb_list* copy_ml_to_device(const Memb_list* ml, int type) { auto d_ml = cnrn_target_copyin(ml); - if (ml->instance) { - assert(ml->instance_size); - void* d_inst = cnrn_target_copyin(static_cast(ml->instance), ml->instance_size); - cnrn_target_memcpy_to_device(&(d_ml->instance), &d_inst); + if (ml->global_variables) { + assert(ml->global_variables_size); + void* d_inst = cnrn_target_copyin(static_cast(ml->global_variables), + ml->global_variables_size); + cnrn_target_memcpy_to_device(&(d_ml->global_variables), &d_inst); } @@ -415,9 +438,10 @@ static void delete_ml_from_device(Memb_list* ml, int type) { } cnrn_target_delete(ml->nodeindices, n); - if (ml->instance) { - assert(ml->instance_size); - cnrn_target_delete(static_cast(ml->instance), ml->instance_size); + if (ml->global_variables) { + assert(ml->global_variables_size); + cnrn_target_delete(static_cast(ml->global_variables), + ml->global_variables_size); } cnrn_target_delete(ml); diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp index b9edf7814..703e853d8 100644 --- a/coreneuron/io/nrn_setup.cpp +++ b/coreneuron/io/nrn_setup.cpp @@ -758,7 +758,8 @@ void nrn_cleanup() { if (auto* const priv_dtor = corenrn.get_memb_func(tml->index).private_destructor) { (*priv_dtor)(nt, ml, tml->index); assert(!ml->instance); - assert(!ml->instance_size); + assert(!ml->global_variables); + assert(ml->global_variables_size == 0); } NetReceiveBuffer_t* nrb = ml->_net_receive_buffer; diff --git a/coreneuron/mechanism/mechanism.hpp b/coreneuron/mechanism/mechanism.hpp index 9335e7530..9427423df 100644 --- a/coreneuron/mechanism/mechanism.hpp +++ b/coreneuron/mechanism/mechanism.hpp @@ -143,11 +143,12 @@ struct Memb_list { NetSendBuffer_t* _net_send_buffer = nullptr; int nodecount; /* actual node count */ int _nodecount_padded; + void* instance{nullptr}; /* mechanism instance struct */ // nrn_acc_manager.cpp handles data movement to/from the accelerator as the // "private constructor" in the translated MOD file code is called before // the main nrn_acc_manager methods that copy thread/mechanism data to the // device - void* instance{nullptr}; /* mechanism instance struct */ - std::size_t instance_size{}; + void* global_variables{nullptr}; + std::size_t global_variables_size{}; }; } // namespace coreneuron diff --git a/coreneuron/mechanism/nrnoc_ml.ispc b/coreneuron/mechanism/nrnoc_ml.ispc index fa8079fb2..0b1196df0 100644 --- a/coreneuron/mechanism/nrnoc_ml.ispc +++ b/coreneuron/mechanism/nrnoc_ml.ispc @@ -57,6 +57,8 @@ struct Memb_list { uniform int nodecount; uniform int _nodecount_padded; void* uniform instance; + void* uniform global_variables; + uniform unsigned long global_variables_size; }; struct Point_process { diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index 184211c19..df8f5f507 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -19,10 +19,10 @@ #else #define nrn_pragma_acc(x) #define nrn_pragma_omp(x) -#include #endif #include +#include #include namespace coreneuron { @@ -44,6 +44,11 @@ void cnrn_target_deviceptr_debug(std::string_view file, std::type_info const& typeid_T, void const* h_ptr, void* d_ptr); +void cnrn_target_is_present_debug(std::string_view file, + int line, + std::type_info const& typeid_T, + void const* h_ptr, + void* d_ptr); void cnrn_target_memcpy_to_device_debug(std::string_view file, int line, std::size_t sizeof_T, @@ -57,16 +62,20 @@ void cnrn_target_memcpy_to_device_debug(std::string_view file, // Homegrown implementation for buggy NVHPC versions (<=22.3), see // https://forums.developer.nvidia.com/t/acc-deviceptr-does-not-work-in-openacc-code-dynamically-loaded-from-a-shared-library/211599 #define CORENEURON_ENABLE_PRESENT_TABLE -void* cnrn_target_deviceptr_impl(void const* h_ptr); +template +std::pair cnrn_target_deviceptr_impl(void const* h_ptr); void cnrn_target_copyin_update_present_table(void const* h_ptr, void* d_ptr, std::size_t len); void cnrn_target_delete_update_present_table(void const* h_ptr, std::size_t len); #endif -template +template T* cnrn_target_deviceptr(std::string_view file, int line, const T* h_ptr) { T* d_ptr{}; + bool error{false}; #ifdef CORENEURON_ENABLE_PRESENT_TABLE - d_ptr = static_cast(cnrn_target_deviceptr_impl(h_ptr)); + auto const d_ptr_and_error = cnrn_target_deviceptr_impl(h_ptr); + d_ptr = static_cast(d_ptr_and_error.first); + error = d_ptr_and_error.second; #elif defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) d_ptr = static_cast(acc_deviceptr(const_cast(h_ptr))); @@ -75,10 +84,21 @@ T* cnrn_target_deviceptr(std::string_view file, int line, const T* h_ptr) { nrn_pragma_omp(target data use_device_ptr(h_ptr)) { d_ptr = const_cast(h_ptr); } #else - throw std::runtime_error( - "cnrn_target_deviceptr() not implemented without OpenACC/OpenMP and gpu build"); + if (must_be_present_or_null && h_ptr) { + throw std::runtime_error( + "cnrn_target_deviceptr() not implemented without OpenACC/OpenMP and gpu build"); + } #endif - cnrn_target_deviceptr_debug(file, line, typeid(T), h_ptr, d_ptr); + if (must_be_present_or_null) { + cnrn_target_deviceptr_debug(file, line, typeid(T), h_ptr, d_ptr); + } else { + cnrn_target_is_present_debug(file, line, typeid(T), h_ptr, d_ptr); + } + if (error) { + throw std::runtime_error( + "cnrn_target_deviceptr() encountered an error, you may want to try setting " + "CORENEURON_GPU_DEBUG=1"); + } return d_ptr; } @@ -152,14 +172,15 @@ void cnrn_target_update_on_device(std::string_view file, int line, const T* h_ptr, std::size_t len = 1) { - auto* d_ptr = cnrn_target_deviceptr(file, line, h_ptr); + auto* d_ptr = cnrn_target_deviceptr(file, line, h_ptr); cnrn_target_memcpy_to_device(file, line, d_ptr, h_ptr); } // Replace with std::source_location once we have C++20 -#define cnrn_target_copyin(...) cnrn_target_copyin(__FILE__, __LINE__, __VA_ARGS__) -#define cnrn_target_delete(...) cnrn_target_delete(__FILE__, __LINE__, __VA_ARGS__) -#define cnrn_target_deviceptr(...) cnrn_target_deviceptr(__FILE__, __LINE__, __VA_ARGS__) +#define cnrn_target_copyin(...) cnrn_target_copyin(__FILE__, __LINE__, __VA_ARGS__) +#define cnrn_target_delete(...) cnrn_target_delete(__FILE__, __LINE__, __VA_ARGS__) +#define cnrn_target_deviceptr(...) cnrn_target_deviceptr(__FILE__, __LINE__, __VA_ARGS__) +#define cnrn_target_is_present(...) cnrn_target_deviceptr(__FILE__, __LINE__, __VA_ARGS__) #define cnrn_target_memcpy_to_device(...) \ cnrn_target_memcpy_to_device(__FILE__, __LINE__, __VA_ARGS__) #define cnrn_target_update_on_device(...) \ diff --git a/external/nmodl b/external/nmodl index 7a53be75f..32cff1684 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 7a53be75fa329e5120038b26e53b18dbe3074bd6 +Subproject commit 32cff1684e4cb3b66b1d0bfa21b0ed062edf9b22 From 9c28e3d7e7a496d1104b832dc74b1f9fc441013a Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 23 Aug 2022 14:31:14 +0200 Subject: [PATCH 108/128] reference counting in private present table --- coreneuron/gpu/nrn_acc_manager.cpp | 39 ++++++++++++++++++++---------- coreneuron/utils/offload.hpp | 24 ++++++++++-------- external/nmodl | 2 +- 3 files changed, 41 insertions(+), 24 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index a05c897d7..1fcc59478 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -46,7 +46,11 @@ #include #include namespace { -std::map> present_table; +struct present_table_value { + std::size_t ref_count{}, size{}; + std::byte* dev_ptr{}; +}; +std::map present_table; std::shared_mutex present_table_mutex; } // namespace #endif @@ -155,8 +159,7 @@ void cnrn_target_memcpy_to_device_debug(std::string_view file, } #ifdef CORENEURON_ENABLE_PRESENT_TABLE -template -std::pair cnrn_target_deviceptr_impl(void const* h_ptr) { +std::pair cnrn_target_deviceptr_impl(bool must_be_present_or_null, void const* h_ptr) { if (!h_ptr) { return {nullptr, false}; } @@ -177,16 +180,14 @@ std::pair cnrn_target_deviceptr_impl(void const* h_ptr) { } std::byte const* const h_byte_ptr{static_cast(h_ptr)}; std::byte const* const h_start_of_block{iter->first}; - std::size_t const block_size{iter->second.first}; - std::byte* const d_start_of_block{iter->second.second}; + std::size_t const block_size{iter->second.size}; + std::byte* const d_start_of_block{iter->second.dev_ptr}; bool const is_present{h_byte_ptr < h_start_of_block + block_size}; if (!is_present) { return {nullptr, must_be_present_or_null}; } return {d_start_of_block + (h_byte_ptr - h_start_of_block), false}; } -template std::pair cnrn_target_deviceptr_impl(void const*); -template std::pair cnrn_target_deviceptr_impl(void const*); void cnrn_target_copyin_update_present_table(void const* h_ptr, void* d_ptr, std::size_t len) { if (!h_ptr) { @@ -194,20 +195,32 @@ void cnrn_target_copyin_update_present_table(void const* h_ptr, void* d_ptr, std return; } std::lock_guard _{present_table_mutex}; - // TODO include more pendantic overlap checking? - auto const result = present_table.emplace(static_cast(h_ptr), - std::make_pair(len, static_cast(d_ptr))); + // TODO include more pedantic overlap checking? + present_table_value new_val{}; + new_val.size = len; + new_val.ref_count = 1; + new_val.dev_ptr = static_cast(d_ptr); + auto const [iter, inserted] = present_table.emplace(static_cast(h_ptr), + std::move(new_val)); + if (!inserted) { + // Insertion didn't occur because h_ptr was already in the present table + assert(iter->second.size == len); + assert(iter->second.dev_ptr == new_val.dev_ptr); + ++(iter->second.ref_count); + } } void cnrn_target_delete_update_present_table(void const* h_ptr, std::size_t len) { if (!h_ptr) { return; } std::lock_guard _{present_table_mutex}; - // TODO properly matching OpenACC semantics would require a reference count auto const iter = present_table.find(static_cast(h_ptr)); assert(iter != present_table.end()); - assert(iter->second.first == len); - present_table.erase(iter); + assert(iter->second.size == len); + --(iter->second.ref_count); + if (iter->second.ref_count == 0) { + present_table.erase(iter); + } } #endif diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index df8f5f507..1911d364b 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -62,18 +62,20 @@ void cnrn_target_memcpy_to_device_debug(std::string_view file, // Homegrown implementation for buggy NVHPC versions (<=22.3), see // https://forums.developer.nvidia.com/t/acc-deviceptr-does-not-work-in-openacc-code-dynamically-loaded-from-a-shared-library/211599 #define CORENEURON_ENABLE_PRESENT_TABLE -template -std::pair cnrn_target_deviceptr_impl(void const* h_ptr); +std::pair cnrn_target_deviceptr_impl(bool must_be_present_or_null, void const* h_ptr); void cnrn_target_copyin_update_present_table(void const* h_ptr, void* d_ptr, std::size_t len); void cnrn_target_delete_update_present_table(void const* h_ptr, std::size_t len); #endif -template -T* cnrn_target_deviceptr(std::string_view file, int line, const T* h_ptr) { +template +T* cnrn_target_deviceptr_or_present(std::string_view file, + int line, + bool must_be_present_or_null, + const T* h_ptr) { T* d_ptr{}; bool error{false}; #ifdef CORENEURON_ENABLE_PRESENT_TABLE - auto const d_ptr_and_error = cnrn_target_deviceptr_impl(h_ptr); + auto const d_ptr_and_error = cnrn_target_deviceptr_impl(must_be_present_or_null, h_ptr); d_ptr = static_cast(d_ptr_and_error.first); error = d_ptr_and_error.second; #elif defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ @@ -172,15 +174,17 @@ void cnrn_target_update_on_device(std::string_view file, int line, const T* h_ptr, std::size_t len = 1) { - auto* d_ptr = cnrn_target_deviceptr(file, line, h_ptr); + auto* d_ptr = cnrn_target_deviceptr_or_present(file, line, true, h_ptr); cnrn_target_memcpy_to_device(file, line, d_ptr, h_ptr); } // Replace with std::source_location once we have C++20 -#define cnrn_target_copyin(...) cnrn_target_copyin(__FILE__, __LINE__, __VA_ARGS__) -#define cnrn_target_delete(...) cnrn_target_delete(__FILE__, __LINE__, __VA_ARGS__) -#define cnrn_target_deviceptr(...) cnrn_target_deviceptr(__FILE__, __LINE__, __VA_ARGS__) -#define cnrn_target_is_present(...) cnrn_target_deviceptr(__FILE__, __LINE__, __VA_ARGS__) +#define cnrn_target_copyin(...) cnrn_target_copyin(__FILE__, __LINE__, __VA_ARGS__) +#define cnrn_target_delete(...) cnrn_target_delete(__FILE__, __LINE__, __VA_ARGS__) +#define cnrn_target_is_present(...) \ + cnrn_target_deviceptr_or_present(__FILE__, __LINE__, false, __VA_ARGS__) +#define cnrn_target_deviceptr(...) \ + cnrn_target_deviceptr_or_present(__FILE__, __LINE__, true, __VA_ARGS__) #define cnrn_target_memcpy_to_device(...) \ cnrn_target_memcpy_to_device(__FILE__, __LINE__, __VA_ARGS__) #define cnrn_target_update_on_device(...) \ diff --git a/external/nmodl b/external/nmodl index 32cff1684..032f3ae88 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 32cff1684e4cb3b66b1d0bfa21b0ed062edf9b22 +Subproject commit 032f3ae88278262a4ba3d22469bde3c7f578e1f6 From 89f3e67442732cf1f5f814a559482d13e584afb4 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 23 Aug 2022 16:00:22 +0200 Subject: [PATCH 109/128] explicit copyin/update/copyout for celsius/pi/secondorder --- coreneuron/apps/main1.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/coreneuron/apps/main1.cpp b/coreneuron/apps/main1.cpp index e79db2104..a429b04f7 100644 --- a/coreneuron/apps/main1.cpp +++ b/coreneuron/apps/main1.cpp @@ -500,6 +500,9 @@ extern "C" void mk_mech_init(int argc, char** argv) { #ifdef CORENEURON_ENABLE_GPU if (corenrn_param.gpu) { init_gpu(); + cnrn_target_copyin(&celsius); + cnrn_target_copyin(&pi); + cnrn_target_copyin(&secondorder); nrnran123_initialise_global_state_on_device(); } #endif @@ -558,6 +561,8 @@ extern "C" int run_solve_core(int argc, char** argv) { #endif bool compute_gpu = corenrn_param.gpu; + nrn_pragma_acc(update device(celsius, secondorder, pi) if (compute_gpu)) + nrn_pragma_omp(target update to(celsius, secondorder, pi) if (compute_gpu)) { double v = corenrn_param.voltage; double dt = corenrn_param.dt; @@ -679,6 +684,9 @@ extern "C" int run_solve_core(int argc, char** argv) { nrn_partrans::delete_gap_indices_from_device(); } nrnran123_destroy_global_state_on_device(); + cnrn_target_delete(&secondorder); + cnrn_target_delete(&pi); + cnrn_target_delete(&celsius); } // Cleaning the memory From fbde41ca1abe4fdd28bbf8d171626b971ab7cc41 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Tue, 23 Aug 2022 16:14:40 +0200 Subject: [PATCH 110/128] submodules --- external/mod2c | 2 +- external/nmodl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/mod2c b/external/mod2c index 77bba7715..ec96803a3 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit 77bba771579c9f91a2e10533967486e5e1f429fa +Subproject commit ec96803a3ec34bab63cc0e00b6cc85581eacd403 diff --git a/external/nmodl b/external/nmodl index 032f3ae88..bac6ff883 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 032f3ae88278262a4ba3d22469bde3c7f578e1f6 +Subproject commit bac6ff8839bf6556bab88aec9666a91d7de44825 From c728fabe04ebc70fe15da9ec92f8f0a2bd9f02fa Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 24 Aug 2022 11:27:01 +0200 Subject: [PATCH 111/128] ispc fix --- coreneuron/mechanism/nrnoc_ml.ispc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coreneuron/mechanism/nrnoc_ml.ispc b/coreneuron/mechanism/nrnoc_ml.ispc index 0b1196df0..2c28a745f 100644 --- a/coreneuron/mechanism/nrnoc_ml.ispc +++ b/coreneuron/mechanism/nrnoc_ml.ispc @@ -58,7 +58,7 @@ struct Memb_list { uniform int _nodecount_padded; void* uniform instance; void* uniform global_variables; - uniform unsigned long global_variables_size; + uniform size_t global_variables_size; }; struct Point_process { From 0adad8b225f5184c9aa2de5bf366a4c46291a034 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 24 Aug 2022 11:27:17 +0200 Subject: [PATCH 112/128] submodule --- external/nmodl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/nmodl b/external/nmodl index bac6ff883..07086dfa3 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit bac6ff8839bf6556bab88aec9666a91d7de44825 +Subproject commit 07086dfa38e308699c86212cf98d5324217995bc From dc8d86bad778c2047e1cdc4c1603f752157df7b3 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 24 Aug 2022 14:50:18 +0200 Subject: [PATCH 113/128] OpenMP: support cnrn_target_is_present --- coreneuron/utils/offload.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index 1911d364b..b0a19fb67 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -83,8 +83,10 @@ T* cnrn_target_deviceptr_or_present(std::string_view file, d_ptr = static_cast(acc_deviceptr(const_cast(h_ptr))); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) - nrn_pragma_omp(target data use_device_ptr(h_ptr)) - { d_ptr = const_cast(h_ptr); } + if (must_be_present_or_null || omp_target_is_present(h_ptr, omp_get_default_device())) { + nrn_pragma_omp(target data use_device_ptr(h_ptr)) + { d_ptr = const_cast(h_ptr); } + } #else if (must_be_present_or_null && h_ptr) { throw std::runtime_error( From d75d5d4de25758720971558a13dd096550caee9d Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 24 Aug 2022 14:50:31 +0200 Subject: [PATCH 114/128] submodule --- external/nmodl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/nmodl b/external/nmodl index 07086dfa3..6b43a20be 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 07086dfa38e308699c86212cf98d5324217995bc +Subproject commit 6b43a20be76fce6b144e4324c574301c1f087387 From d8ea959ebf9ea59823987038601824ccc787aadb Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 24 Aug 2022 15:57:02 +0200 Subject: [PATCH 115/128] omp_target_is_present has problems in nvhpc 22.3 --- coreneuron/utils/offload.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index b0a19fb67..6297221e8 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -56,9 +56,9 @@ void cnrn_target_memcpy_to_device_debug(std::string_view file, void const* h_ptr, std::size_t len, void* d_ptr); -#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ - defined(_OPENACC) && !defined(CORENEURON_UNIFIED_MEMORY) && defined(__NVCOMPILER_MAJOR__) && \ - defined(__NVCOMPILER_MINOR__) && (__NVCOMPILER_MAJOR__ <= 22) && (__NVCOMPILER_MINOR__ <= 3) +#if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_UNIFIED_MEMORY) && \ + defined(__NVCOMPILER_MAJOR__) && defined(__NVCOMPILER_MINOR__) && \ + (__NVCOMPILER_MAJOR__ <= 22) && (__NVCOMPILER_MINOR__ <= 3) // Homegrown implementation for buggy NVHPC versions (<=22.3), see // https://forums.developer.nvidia.com/t/acc-deviceptr-does-not-work-in-openacc-code-dynamically-loaded-from-a-shared-library/211599 #define CORENEURON_ENABLE_PRESENT_TABLE From 74e5f0ac9abaa686279119ffac9208d8866bee66 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 25 Aug 2022 11:41:59 +0200 Subject: [PATCH 116/128] Update GitLab CI config --- .gitlab-ci.yml | 252 +++++++++++++++---------------------------------- 1 file changed, 78 insertions(+), 174 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3da9d2b52..b000c1eab 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -96,256 +96,160 @@ build:nmodl: # TODO: fix this more robustly so we don't have to play so many games. SPACK_PACKAGE_DEPENDENCIES: ^hpe-mpi%gcc ^caliper%gcc+cuda cuda_arch=70 -build:coreneuron:mod2c:nvhpc:acc: - extends: [.build_coreneuron, .spack_nvhpc] - variables: - SPACK_PACKAGE_SPEC: +caliper+gpu+openmp~shared+tests~legacy-unit build_type=RelWithDebInfo - -build:coreneuron:mod2c:nvhpc:acc:debug: - extends: [.build_coreneuron, .spack_nvhpc] +build:coreneuron:mod2c:intel:shared: + extends: [.build_coreneuron, .spack_intel] variables: - SPACK_PACKAGE_SPEC: +caliper+gpu+openmp~shared+tests~legacy-unit build_type=Debug + SPACK_PACKAGE_SPEC: +caliper~gpu~legacy-unit~nmodl+openmp+shared+tests~unified build_type=RelWithDebInfo -# Shared + OpenACC + OpenMP host threading has problems -build:coreneuron:mod2c:nvhpc:acc:shared: - extends: [.build_coreneuron, .spack_nvhpc] +build:coreneuron:nmodl:intel:shared:debug:legacy: + extends: [.build_coreneuron, .spack_intel] + needs: ["build:nmodl"] variables: - SPACK_PACKAGE_SPEC: +caliper+gpu~openmp+shared+tests~legacy-unit build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +caliper~gpu~legacy-unit+nmodl+openmp+shared~sympy+tests~unified build_type=Debug -# Shared + OpenACC + OpenMP host threading has problems -build:coreneuron:mod2c:nvhpc:acc:shared:debug: - extends: [.build_coreneuron, .spack_nvhpc] +# Disable caliper to improve coverage +build:coreneuron:nmodl:intel: + extends: [.build_coreneuron, .spack_intel] + needs: ["build:nmodl"] variables: - SPACK_PACKAGE_SPEC: +caliper+gpu~openmp+shared+tests~legacy-unit build_type=Debug + SPACK_PACKAGE_SPEC: ~caliper~gpu~legacy-unit+nmodl+openmp~shared+sympy+tests~unified build_type=RelWithDebInfo -# Build CoreNEURON with Unified Memory on GPU -build:coreneuron:mod2c:nvhpc:acc:unified: +# Not linked to a NEURON build+test job, see +# https://github.com/BlueBrain/CoreNeuron/issues/594 +build:coreneuron:mod2c:nvhpc:acc:debug:unified: extends: [.build_coreneuron, .spack_nvhpc] variables: - SPACK_PACKAGE_SPEC: +caliper+gpu+unified+openmp~shared+tests~legacy-unit build_type=Debug + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit~nmodl+openmp~shared+tests+unified build_type=Debug -build:coreneuron:nmodl:nvhpc:acc: +# Shared + OpenACC + OpenMP host threading has problems +build:coreneuron:mod2c:nvhpc:acc:shared:debug: extends: [.build_coreneuron, .spack_nvhpc] variables: - SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu~shared+tests~legacy-unit+sympy build_type=RelWithDebInfo - needs: ["build:nmodl"] + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit~nmodl~openmp+shared+tests~unified build_type=Debug -build:coreneuron:nmodl:nvhpc:acc:debug: +build:coreneuron:nmodl:nvhpc:acc:legacy: extends: [.build_coreneuron, .spack_nvhpc] - variables: - SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu~shared+tests~legacy-unit+sympy build_type=Debug needs: ["build:nmodl"] - -build:coreneuron:nmodl:nvhpc:acc:shared: - extends: [.build_coreneuron, .spack_nvhpc] variables: - SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu+shared+tests~legacy-unit+sympy build_type=RelWithDebInfo - needs: ["build:nmodl"] + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl~openmp~shared~sympy+tests~unified build_type=RelWithDebInfo build:coreneuron:nmodl:nvhpc:acc:shared:debug: extends: [.build_coreneuron, .spack_nvhpc] - variables: - SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu+shared+tests~legacy-unit+sympy build_type=Debug needs: ["build:nmodl"] - -build:coreneuron:nmodl:nvhpc:acc:legacy: - extends: [.build_coreneuron, .spack_nvhpc] - variables: - SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu~shared+tests~legacy-unit~sympy build_type=RelWithDebInfo - needs: ["build:nmodl"] - -build:coreneuron:nmodl:nvhpc:acc:debug:legacy: - extends: [.build_coreneuron, .spack_nvhpc] variables: - SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu~shared+tests~legacy-unit~sympy build_type=Debug - needs: ["build:nmodl"] + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl~openmp+shared+sympy+tests~unified build_type=Debug -build:coreneuron:nmodl:nvhpc:acc:shared:legacy: +build:coreneuron:nmodl:nvhpc:omp:debug:legacy: extends: [.build_coreneuron, .spack_nvhpc] - variables: - SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu+shared+tests~legacy-unit~sympy build_type=RelWithDebInfo needs: ["build:nmodl"] - -build:coreneuron:nmodl:nvhpc:acc:shared:debug:legacy: - extends: [.build_coreneuron, .spack_nvhpc] variables: - SPACK_PACKAGE_SPEC: +caliper+nmodl~openmp+gpu+shared+tests~legacy-unit~sympy build_type=Debug - needs: ["build:nmodl"] + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl+openmp~shared~sympy+tests~unified build_type=Debug build:coreneuron:nmodl:nvhpc:omp: extends: [.build_coreneuron, .spack_nvhpc] - variables: - SPACK_PACKAGE_SPEC: +caliper+nmodl+openmp+gpu~shared+tests~legacy-unit+sympy build_type=RelWithDebInfo needs: ["build:nmodl"] - -build:coreneuron:nmodl:nvhpc:omp:debug: - extends: [.build_coreneuron, .spack_nvhpc] variables: - SPACK_PACKAGE_SPEC: +caliper+nmodl+openmp+gpu~shared+tests~legacy-unit+sympy build_type=Debug - needs: ["build:nmodl"] + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl+openmp~shared+sympy+tests~unified build_type=RelWithDebInfo -build:coreneuron:nmodl:nvhpc:omp:legacy: - extends: [.build_coreneuron, .spack_nvhpc] - variables: - SPACK_PACKAGE_SPEC: +caliper+nmodl+openmp+gpu~shared+tests~legacy-unit~sympy build_type=RelWithDebInfo - needs: ["build:nmodl"] - -build:coreneuron:nmodl:nvhpc:omp:debug:legacy: - extends: [.build_coreneuron, .spack_nvhpc] - variables: - SPACK_PACKAGE_SPEC: +caliper+nmodl+openmp+gpu~shared+tests~legacy-unit~sympy build_type=Debug - needs: ["build:nmodl"] +# Build NEURON +build:neuron:mod2c:intel:shared: + extends: [.build_neuron, .spack_intel] + needs: ["build:coreneuron:mod2c:intel:shared"] -build:coreneuron:mod2c:intel: - extends: [.build_coreneuron, .spack_intel] - variables: - SPACK_PACKAGE_SPEC: +caliper+tests~legacy-unit build_type=Debug +build:neuron:nmodl:intel:shared:debug:legacy: + extends: [.build_neuron, .spack_intel] + needs: ["build:coreneuron:nmodl:intel:shared:debug:legacy"] -build:coreneuron:nmodl:intel: - extends: [.build_coreneuron, .spack_intel] - variables: - SPACK_PACKAGE_SPEC: +caliper+nmodl+tests~legacy-unit build_type=Debug - needs: ["build:nmodl"] +build:neuron:nmodl:intel: + extends: [.build_neuron, .spack_intel] + needs: ["build:coreneuron:nmodl:intel"] -# Build NEURON -build:neuron:mod2c:nvhpc:acc: +build:neuron:mod2c:nvhpc:acc:shared:debug: extends: [.build_neuron, .spack_nvhpc] - needs: ["build:coreneuron:mod2c:nvhpc:acc"] + needs: ["build:coreneuron:mod2c:nvhpc:acc:shared:debug"] -build:neuron:mod2c:nvhpc:acc:debug: +build:neuron:nmodl:nvhpc:acc:legacy: extends: [.build_neuron, .spack_nvhpc] - needs: ["build:coreneuron:mod2c:nvhpc:acc:debug"] + needs: ["build:coreneuron:nmodl:nvhpc:acc:legacy"] -build:neuron:mod2c:nvhpc:acc:shared: +build:neuron:nmodl:nvhpc:acc:shared:debug: extends: [.build_neuron, .spack_nvhpc] - needs: ["build:coreneuron:mod2c:nvhpc:acc:shared"] + needs: ["build:coreneuron:nmodl:nvhpc:acc:shared:debug"] -build:neuron:mod2c:nvhpc:acc:shared:debug: +build:neuron:nmodl:nvhpc:omp:debug:legacy: extends: [.build_neuron, .spack_nvhpc] - needs: ["build:coreneuron:mod2c:nvhpc:acc:shared:debug"] + needs: ["build:coreneuron:nmodl:nvhpc:omp:debug:legacy"] build:neuron:nmodl:nvhpc:omp: extends: [.build_neuron, .spack_nvhpc] needs: ["build:coreneuron:nmodl:nvhpc:omp"] -build:neuron:nmodl:nvhpc:acc: - extends: [.build_neuron, .spack_nvhpc] - needs: ["build:coreneuron:nmodl:nvhpc:acc"] +# Test CoreNEURON +test:coreneuron:mod2c:intel:shared: + extends: [.ctest] + needs: ["build:coreneuron:mod2c:intel:shared"] -build:neuron:mod2c:intel: - extends: [.build_neuron, .spack_intel] - needs: ["build:coreneuron:mod2c:intel"] +test:coreneuron:nmodl:intel:shared:debug:legacy: + extends: [.ctest] + needs: ["build:coreneuron:nmodl:intel:shared:debug:legacy"] -build:neuron:nmodl:intel: - extends: [.build_neuron, .spack_intel] +test:coreneuron:nmodl:intel: + extends: [.ctest] needs: ["build:coreneuron:nmodl:intel"] -# Test CoreNEURON -test:coreneuron:mod2c:nvhpc:acc: - extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:mod2c:nvhpc:acc"] - -test:coreneuron:mod2c:nvhpc:acc:debug: +test:coreneuron:mod2c:nvhpc:acc:debug:unified: extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:mod2c:nvhpc:acc:debug"] - -test:coreneuron:mod2c:nvhpc:acc:shared: - extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:mod2c:nvhpc:acc:shared"] + needs: ["build:coreneuron:mod2c:nvhpc:acc:debug:unified"] test:coreneuron:mod2c:nvhpc:acc:shared:debug: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:mod2c:nvhpc:acc:shared:debug"] -test:coreneuron:mod2c:nvhpc:acc:unified: - extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:mod2c:nvhpc:acc:unified"] - -test:coreneuron:nmodl:nvhpc:omp: - extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:omp"] - -test:coreneuron:nmodl:nvhpc:omp:debug: - extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:omp:debug"] - -test:coreneuron:nmodl:nvhpc:omp:legacy: - extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:omp:legacy"] - -test:coreneuron:nmodl:nvhpc:omp:debug:legacy: - extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:omp:debug:legacy"] - -test:coreneuron:nmodl:nvhpc:acc: - extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:acc"] - -test:coreneuron:nmodl:nvhpc:acc:debug: - extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:acc:debug"] - -test:coreneuron:nmodl:nvhpc:acc:shared: +test:coreneuron:nmodl:nvhpc:acc:legacy: extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:acc:shared"] + needs: ["build:coreneuron:nmodl:nvhpc:acc:legacy"] test:coreneuron:nmodl:nvhpc:acc:shared:debug: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:nmodl:nvhpc:acc:shared:debug"] -test:coreneuron:nmodl:nvhpc:acc:legacy: - extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:acc:legacy"] - -test:coreneuron:nmodl:nvhpc:acc:debug:legacy: +test:coreneuron:nmodl:nvhpc:omp:debug:legacy: extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:acc:debug:legacy"] + needs: ["build:coreneuron:nmodl:nvhpc:omp:debug:legacy"] -test:coreneuron:nmodl:nvhpc:acc:shared:legacy: +test:coreneuron:nmodl:nvhpc:omp: extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:acc:shared:legacy"] + needs: ["build:coreneuron:nmodl:nvhpc:omp"] -test:coreneuron:nmodl:nvhpc:acc:shared:debug:legacy: - extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:acc:shared:debug:legacy"] +# Test NEURON +test:neuron:mod2c:intel:shared: + extends: [.test_neuron] + needs: ["build:neuron:mod2c:intel:shared"] -test:coreneuron:mod2c:intel: - extends: [.ctest] - needs: ["build:coreneuron:mod2c:intel"] +test:neuron:nmodl:intel:shared:debug:legacy: + extends: [.test_neuron] + needs: ["build:neuron:nmodl:intel:shared:debug:legacy"] -test:coreneuron:nmodl:intel: - extends: [.ctest] - needs: ["build:coreneuron:nmodl:intel"] +test:neuron:nmodl:intel: + extends: [.test_neuron] + needs: ["build:neuron:nmodl:intel"] -# Test NEURON -test:neuron:mod2c:nvhpc:acc: +test:neuron:mod2c:nvhpc:acc:shared:debug: extends: [.test_neuron, .gpu_node] - needs: ["build:neuron:mod2c:nvhpc:acc"] + needs: ["build:neuron:mod2c:nvhpc:acc:shared:debug"] -test:neuron:mod2c:nvhpc:acc:debug: +test:neuron:nmodl:nvhpc:acc:legacy: extends: [.test_neuron, .gpu_node] - needs: ["build:neuron:mod2c:nvhpc:acc:debug"] + needs: ["build:neuron:nmodl:nvhpc:acc:legacy"] -test:neuron:mod2c:nvhpc:acc:shared: +test:neuron:nmodl:nvhpc:acc:shared:debug: extends: [.test_neuron, .gpu_node] - needs: ["build:neuron:mod2c:nvhpc:acc:shared"] + needs: ["build:neuron:nmodl:nvhpc:acc:shared:debug"] -test:neuron:mod2c:nvhpc:acc:shared:debug: +test:neuron:nmodl:nvhpc:omp:debug:legacy: extends: [.test_neuron, .gpu_node] - needs: ["build:neuron:mod2c:nvhpc:acc:shared:debug"] + needs: ["build:neuron:nmodl:nvhpc:omp:debug:legacy"] test:neuron:nmodl:nvhpc:omp: extends: [.test_neuron, .gpu_node] needs: ["build:neuron:nmodl:nvhpc:omp"] - -test:neuron:nmodl:nvhpc:acc: - extends: [.test_neuron, .gpu_node] - needs: ["build:neuron:nmodl:nvhpc:acc"] - -test:neuron:mod2c:intel: - extends: [.test_neuron] - needs: ["build:neuron:mod2c:intel"] - -test:neuron:nmodl:intel: - extends: [.test_neuron] - needs: ["build:neuron:nmodl:intel"] From 6b96330671326403231247c50e289b5786f8c497 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 25 Aug 2022 11:55:09 +0200 Subject: [PATCH 117/128] tweak nmodl+cpu builds --- .gitlab-ci.yml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b000c1eab..cd1944cde 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -101,18 +101,18 @@ build:coreneuron:mod2c:intel:shared: variables: SPACK_PACKAGE_SPEC: +caliper~gpu~legacy-unit~nmodl+openmp+shared+tests~unified build_type=RelWithDebInfo -build:coreneuron:nmodl:intel:shared:debug:legacy: +build:coreneuron:nmodl:intel:debug:legacy: extends: [.build_coreneuron, .spack_intel] needs: ["build:nmodl"] variables: - SPACK_PACKAGE_SPEC: +caliper~gpu~legacy-unit+nmodl+openmp+shared~sympy+tests~unified build_type=Debug + SPACK_PACKAGE_SPEC: +caliper~gpu~legacy-unit+nmodl+openmp~shared~sympy+tests~unified build_type=Debug # Disable caliper to improve coverage -build:coreneuron:nmodl:intel: +build:coreneuron:nmodl:intel:shared: extends: [.build_coreneuron, .spack_intel] needs: ["build:nmodl"] variables: - SPACK_PACKAGE_SPEC: ~caliper~gpu~legacy-unit+nmodl+openmp~shared+sympy+tests~unified build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: ~caliper~gpu~legacy-unit+nmodl+openmp+shared+sympy+tests~unified build_type=RelWithDebInfo # Not linked to a NEURON build+test job, see # https://github.com/BlueBrain/CoreNeuron/issues/594 @@ -156,13 +156,13 @@ build:neuron:mod2c:intel:shared: extends: [.build_neuron, .spack_intel] needs: ["build:coreneuron:mod2c:intel:shared"] -build:neuron:nmodl:intel:shared:debug:legacy: +build:neuron:nmodl:intel:debug:legacy: extends: [.build_neuron, .spack_intel] - needs: ["build:coreneuron:nmodl:intel:shared:debug:legacy"] + needs: ["build:coreneuron:nmodl:intel:debug:legacy"] -build:neuron:nmodl:intel: +build:neuron:nmodl:intel:shared: extends: [.build_neuron, .spack_intel] - needs: ["build:coreneuron:nmodl:intel"] + needs: ["build:coreneuron:nmodl:intel:shared"] build:neuron:mod2c:nvhpc:acc:shared:debug: extends: [.build_neuron, .spack_nvhpc] @@ -189,13 +189,13 @@ test:coreneuron:mod2c:intel:shared: extends: [.ctest] needs: ["build:coreneuron:mod2c:intel:shared"] -test:coreneuron:nmodl:intel:shared:debug:legacy: +test:coreneuron:nmodl:intel:debug:legacy: extends: [.ctest] - needs: ["build:coreneuron:nmodl:intel:shared:debug:legacy"] + needs: ["build:coreneuron:nmodl:intel:debug:legacy"] -test:coreneuron:nmodl:intel: +test:coreneuron:nmodl:intel:shared: extends: [.ctest] - needs: ["build:coreneuron:nmodl:intel"] + needs: ["build:coreneuron:nmodl:intel:shared"] test:coreneuron:mod2c:nvhpc:acc:debug:unified: extends: [.ctest, .gpu_node] @@ -226,13 +226,13 @@ test:neuron:mod2c:intel:shared: extends: [.test_neuron] needs: ["build:neuron:mod2c:intel:shared"] -test:neuron:nmodl:intel:shared:debug:legacy: +test:neuron:nmodl:intel:debug:legacy: extends: [.test_neuron] - needs: ["build:neuron:nmodl:intel:shared:debug:legacy"] + needs: ["build:neuron:nmodl:intel:debug:legacy"] -test:neuron:nmodl:intel: +test:neuron:nmodl:intel:shared: extends: [.test_neuron] - needs: ["build:neuron:nmodl:intel"] + needs: ["build:neuron:nmodl:intel:shared"] test:neuron:mod2c:nvhpc:acc:shared:debug: extends: [.test_neuron, .gpu_node] From d72ecde07632f9eb0200eed70b59e6b1bd742d69 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 25 Aug 2022 11:56:33 +0200 Subject: [PATCH 118/128] tweak mod2c+gpu build --- .gitlab-ci.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index cd1944cde..01b3175ba 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -122,10 +122,10 @@ build:coreneuron:mod2c:nvhpc:acc:debug:unified: SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit~nmodl+openmp~shared+tests+unified build_type=Debug # Shared + OpenACC + OpenMP host threading has problems -build:coreneuron:mod2c:nvhpc:acc:shared:debug: +build:coreneuron:mod2c:nvhpc:acc:shared: extends: [.build_coreneuron, .spack_nvhpc] variables: - SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit~nmodl~openmp+shared+tests~unified build_type=Debug + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit~nmodl~openmp+shared+tests~unified build_type=RelWithDebInfo build:coreneuron:nmodl:nvhpc:acc:legacy: extends: [.build_coreneuron, .spack_nvhpc] @@ -164,9 +164,9 @@ build:neuron:nmodl:intel:shared: extends: [.build_neuron, .spack_intel] needs: ["build:coreneuron:nmodl:intel:shared"] -build:neuron:mod2c:nvhpc:acc:shared:debug: +build:neuron:mod2c:nvhpc:acc:shared: extends: [.build_neuron, .spack_nvhpc] - needs: ["build:coreneuron:mod2c:nvhpc:acc:shared:debug"] + needs: ["build:coreneuron:mod2c:nvhpc:acc:shared"] build:neuron:nmodl:nvhpc:acc:legacy: extends: [.build_neuron, .spack_nvhpc] @@ -201,9 +201,9 @@ test:coreneuron:mod2c:nvhpc:acc:debug:unified: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:mod2c:nvhpc:acc:debug:unified"] -test:coreneuron:mod2c:nvhpc:acc:shared:debug: +test:coreneuron:mod2c:nvhpc:acc:shared: extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:mod2c:nvhpc:acc:shared:debug"] + needs: ["build:coreneuron:mod2c:nvhpc:acc:shared"] test:coreneuron:nmodl:nvhpc:acc:legacy: extends: [.ctest, .gpu_node] @@ -234,9 +234,9 @@ test:neuron:nmodl:intel:shared: extends: [.test_neuron] needs: ["build:neuron:nmodl:intel:shared"] -test:neuron:mod2c:nvhpc:acc:shared:debug: +test:neuron:mod2c:nvhpc:acc:shared: extends: [.test_neuron, .gpu_node] - needs: ["build:neuron:mod2c:nvhpc:acc:shared:debug"] + needs: ["build:neuron:mod2c:nvhpc:acc:shared"] test:neuron:nmodl:nvhpc:acc:legacy: extends: [.test_neuron, .gpu_node] From 977779ec201ed950938a99ebf7fa9b598789b029 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 25 Aug 2022 13:14:29 +0200 Subject: [PATCH 119/128] swap debug/relwithdebinfo for nmodl+gpu builds --- .gitlab-ci.yml | 64 +++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 01b3175ba..d7ba6c74f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -127,29 +127,29 @@ build:coreneuron:mod2c:nvhpc:acc:shared: variables: SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit~nmodl~openmp+shared+tests~unified build_type=RelWithDebInfo -build:coreneuron:nmodl:nvhpc:acc:legacy: +build:coreneuron:nmodl:nvhpc:acc:debug:legacy: extends: [.build_coreneuron, .spack_nvhpc] needs: ["build:nmodl"] variables: - SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl~openmp~shared~sympy+tests~unified build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl~openmp~shared~sympy+tests~unified build_type=Debug -build:coreneuron:nmodl:nvhpc:acc:shared:debug: +build:coreneuron:nmodl:nvhpc:acc:shared: extends: [.build_coreneuron, .spack_nvhpc] needs: ["build:nmodl"] variables: - SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl~openmp+shared+sympy+tests~unified build_type=Debug + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl~openmp+shared+sympy+tests~unified build_type=RelWithDebInfo -build:coreneuron:nmodl:nvhpc:omp:debug:legacy: +build:coreneuron:nmodl:nvhpc:omp:legacy: extends: [.build_coreneuron, .spack_nvhpc] needs: ["build:nmodl"] variables: - SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl+openmp~shared~sympy+tests~unified build_type=Debug + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl+openmp~shared~sympy+tests~unified build_type=RelWithDebInfo -build:coreneuron:nmodl:nvhpc:omp: +build:coreneuron:nmodl:nvhpc:omp:debug: extends: [.build_coreneuron, .spack_nvhpc] needs: ["build:nmodl"] variables: - SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl+openmp~shared+sympy+tests~unified build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl+openmp~shared+sympy+tests~unified build_type=Debug # Build NEURON build:neuron:mod2c:intel:shared: @@ -168,21 +168,21 @@ build:neuron:mod2c:nvhpc:acc:shared: extends: [.build_neuron, .spack_nvhpc] needs: ["build:coreneuron:mod2c:nvhpc:acc:shared"] -build:neuron:nmodl:nvhpc:acc:legacy: +build:neuron:nmodl:nvhpc:acc:debug:legacy: extends: [.build_neuron, .spack_nvhpc] - needs: ["build:coreneuron:nmodl:nvhpc:acc:legacy"] + needs: ["build:coreneuron:nmodl:nvhpc:acc:debug:legacy"] -build:neuron:nmodl:nvhpc:acc:shared:debug: +build:neuron:nmodl:nvhpc:acc:shared: extends: [.build_neuron, .spack_nvhpc] - needs: ["build:coreneuron:nmodl:nvhpc:acc:shared:debug"] + needs: ["build:coreneuron:nmodl:nvhpc:acc:shared"] -build:neuron:nmodl:nvhpc:omp:debug:legacy: +build:neuron:nmodl:nvhpc:omp:legacy: extends: [.build_neuron, .spack_nvhpc] - needs: ["build:coreneuron:nmodl:nvhpc:omp:debug:legacy"] + needs: ["build:coreneuron:nmodl:nvhpc:omp:legacy"] -build:neuron:nmodl:nvhpc:omp: +build:neuron:nmodl:nvhpc:omp:debug: extends: [.build_neuron, .spack_nvhpc] - needs: ["build:coreneuron:nmodl:nvhpc:omp"] + needs: ["build:coreneuron:nmodl:nvhpc:omp:debug"] # Test CoreNEURON test:coreneuron:mod2c:intel:shared: @@ -205,21 +205,21 @@ test:coreneuron:mod2c:nvhpc:acc:shared: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:mod2c:nvhpc:acc:shared"] -test:coreneuron:nmodl:nvhpc:acc:legacy: +test:coreneuron:nmodl:nvhpc:acc:debug:legacy: extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:acc:legacy"] + needs: ["build:coreneuron:nmodl:nvhpc:acc:debug:legacy"] -test:coreneuron:nmodl:nvhpc:acc:shared:debug: +test:coreneuron:nmodl:nvhpc:acc:shared: extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:acc:shared:debug"] + needs: ["build:coreneuron:nmodl:nvhpc:acc:shared"] -test:coreneuron:nmodl:nvhpc:omp:debug:legacy: +test:coreneuron:nmodl:nvhpc:omp:legacy: extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:omp:debug:legacy"] + needs: ["build:coreneuron:nmodl:nvhpc:omp:legacy"] -test:coreneuron:nmodl:nvhpc:omp: +test:coreneuron:nmodl:nvhpc:omp:debug: extends: [.ctest, .gpu_node] - needs: ["build:coreneuron:nmodl:nvhpc:omp"] + needs: ["build:coreneuron:nmodl:nvhpc:omp:debug"] # Test NEURON test:neuron:mod2c:intel:shared: @@ -238,18 +238,18 @@ test:neuron:mod2c:nvhpc:acc:shared: extends: [.test_neuron, .gpu_node] needs: ["build:neuron:mod2c:nvhpc:acc:shared"] -test:neuron:nmodl:nvhpc:acc:legacy: +test:neuron:nmodl:nvhpc:acc:debug:legacy: extends: [.test_neuron, .gpu_node] - needs: ["build:neuron:nmodl:nvhpc:acc:legacy"] + needs: ["build:neuron:nmodl:nvhpc:acc:debug:legacy"] -test:neuron:nmodl:nvhpc:acc:shared:debug: +test:neuron:nmodl:nvhpc:acc:shared: extends: [.test_neuron, .gpu_node] - needs: ["build:neuron:nmodl:nvhpc:acc:shared:debug"] + needs: ["build:neuron:nmodl:nvhpc:acc:shared"] -test:neuron:nmodl:nvhpc:omp:debug:legacy: +test:neuron:nmodl:nvhpc:omp:legacy: extends: [.test_neuron, .gpu_node] - needs: ["build:neuron:nmodl:nvhpc:omp:debug:legacy"] + needs: ["build:neuron:nmodl:nvhpc:omp:legacy"] -test:neuron:nmodl:nvhpc:omp: +test:neuron:nmodl:nvhpc:omp:debug: extends: [.test_neuron, .gpu_node] - needs: ["build:neuron:nmodl:nvhpc:omp"] + needs: ["build:neuron:nmodl:nvhpc:omp:debug"] From 08031b2cccde0eb2b4d8bd53975c83731647c077 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 25 Aug 2022 13:33:53 +0200 Subject: [PATCH 120/128] fixup --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d7ba6c74f..dd9adc325 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -112,6 +112,7 @@ build:coreneuron:nmodl:intel:shared: extends: [.build_coreneuron, .spack_intel] needs: ["build:nmodl"] variables: + SPACK_PACKAGE_DEPENDENCIES: ^hpe-mpi%gcc SPACK_PACKAGE_SPEC: ~caliper~gpu~legacy-unit+nmodl+openmp+shared+sympy+tests~unified build_type=RelWithDebInfo # Not linked to a NEURON build+test job, see From aa893f6c02f61f3efdc45d28146e63c7eeed9b10 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 25 Aug 2022 14:25:23 +0200 Subject: [PATCH 121/128] disable OpenMP in CPU builds --- .gitlab-ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index dd9adc325..a6073578d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -99,13 +99,13 @@ build:nmodl: build:coreneuron:mod2c:intel:shared: extends: [.build_coreneuron, .spack_intel] variables: - SPACK_PACKAGE_SPEC: +caliper~gpu~legacy-unit~nmodl+openmp+shared+tests~unified build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +caliper~gpu~legacy-unit~nmodl~openmp+shared+tests~unified build_type=RelWithDebInfo build:coreneuron:nmodl:intel:debug:legacy: extends: [.build_coreneuron, .spack_intel] needs: ["build:nmodl"] variables: - SPACK_PACKAGE_SPEC: +caliper~gpu~legacy-unit+nmodl+openmp~shared~sympy+tests~unified build_type=Debug + SPACK_PACKAGE_SPEC: +caliper~gpu~legacy-unit+nmodl~openmp~shared~sympy+tests~unified build_type=Debug # Disable caliper to improve coverage build:coreneuron:nmodl:intel:shared: @@ -113,7 +113,7 @@ build:coreneuron:nmodl:intel:shared: needs: ["build:nmodl"] variables: SPACK_PACKAGE_DEPENDENCIES: ^hpe-mpi%gcc - SPACK_PACKAGE_SPEC: ~caliper~gpu~legacy-unit+nmodl+openmp+shared+sympy+tests~unified build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: ~caliper~gpu~legacy-unit+nmodl~openmp+shared+sympy+tests~unified build_type=RelWithDebInfo # Not linked to a NEURON build+test job, see # https://github.com/BlueBrain/CoreNeuron/issues/594 From f71b6fd57624e7fd3c46ddcc324b29f5940d66de Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 25 Aug 2022 17:04:26 +0200 Subject: [PATCH 122/128] submodules --- external/mod2c | 2 +- external/nmodl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/mod2c b/external/mod2c index ec96803a3..626ffc202 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit ec96803a3ec34bab63cc0e00b6cc85581eacd403 +Subproject commit 626ffc2024872b7d48ecca95786b97d707898317 diff --git a/external/nmodl b/external/nmodl index 6b43a20be..1265e4a84 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 6b43a20be76fce6b144e4324c574301c1f087387 +Subproject commit 1265e4a84b699cac10668db5ca59f7054c9f1f51 From b214ad0c44ea99e250e47e0287e909f8f7298a97 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 25 Aug 2022 17:16:01 +0200 Subject: [PATCH 123/128] submodule --- external/nmodl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/nmodl b/external/nmodl index 1265e4a84..c7891c5e7 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 1265e4a84b699cac10668db5ca59f7054c9f1f51 +Subproject commit c7891c5e73a857aab96554d668a75eb16316a992 From f53901ea120d0a30962bad527701aacff0308c2f Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 25 Aug 2022 17:19:20 +0200 Subject: [PATCH 124/128] submodule --- external/nmodl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/nmodl b/external/nmodl index c7891c5e7..4eaad0be8 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit c7891c5e73a857aab96554d668a75eb16316a992 +Subproject commit 4eaad0be8922de50c4b4a0444b72393361c7a998 From c647a08c7afd9a8af48e2ac14df8b7516a8a46b7 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Thu, 25 Aug 2022 17:33:42 +0200 Subject: [PATCH 125/128] submodule --- external/nmodl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/nmodl b/external/nmodl index 4eaad0be8..0274a4c6e 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 4eaad0be8922de50c4b4a0444b72393361c7a998 +Subproject commit 0274a4c6e87d71fb161f21aea8d236a2a57d3fd1 From 2857e2d71089a6899e3c43a9865acd7a4ed304f3 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 26 Aug 2022 10:09:53 +0200 Subject: [PATCH 126/128] make all intel builds debug --- .gitlab-ci.yml | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a6073578d..b8fad911f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -96,10 +96,13 @@ build:nmodl: # TODO: fix this more robustly so we don't have to play so many games. SPACK_PACKAGE_DEPENDENCIES: ^hpe-mpi%gcc ^caliper%gcc+cuda cuda_arch=70 -build:coreneuron:mod2c:intel:shared: +# TODO: improve coverage by switching an Intel build to be statically linked +# TODO: improve coverage by switching an Intel build to RelWithDebInfo +# TODO: improve coverage by enabling +openmp on an Intel build +build:coreneuron:mod2c:intel:shared:debug: extends: [.build_coreneuron, .spack_intel] variables: - SPACK_PACKAGE_SPEC: +caliper~gpu~legacy-unit~nmodl~openmp+shared+tests~unified build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: +caliper~gpu~legacy-unit~nmodl~openmp+shared+tests~unified build_type=Debug build:coreneuron:nmodl:intel:debug:legacy: extends: [.build_coreneuron, .spack_intel] @@ -108,12 +111,12 @@ build:coreneuron:nmodl:intel:debug:legacy: SPACK_PACKAGE_SPEC: +caliper~gpu~legacy-unit+nmodl~openmp~shared~sympy+tests~unified build_type=Debug # Disable caliper to improve coverage -build:coreneuron:nmodl:intel:shared: +build:coreneuron:nmodl:intel:shared:debug: extends: [.build_coreneuron, .spack_intel] needs: ["build:nmodl"] variables: SPACK_PACKAGE_DEPENDENCIES: ^hpe-mpi%gcc - SPACK_PACKAGE_SPEC: ~caliper~gpu~legacy-unit+nmodl~openmp+shared+sympy+tests~unified build_type=RelWithDebInfo + SPACK_PACKAGE_SPEC: ~caliper~gpu~legacy-unit+nmodl~openmp+shared+sympy+tests~unified build_type=Debug # Not linked to a NEURON build+test job, see # https://github.com/BlueBrain/CoreNeuron/issues/594 @@ -153,17 +156,17 @@ build:coreneuron:nmodl:nvhpc:omp:debug: SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl+openmp~shared+sympy+tests~unified build_type=Debug # Build NEURON -build:neuron:mod2c:intel:shared: +build:neuron:mod2c:intel:shared:debug: extends: [.build_neuron, .spack_intel] - needs: ["build:coreneuron:mod2c:intel:shared"] + needs: ["build:coreneuron:mod2c:intel:shared:debug"] build:neuron:nmodl:intel:debug:legacy: extends: [.build_neuron, .spack_intel] needs: ["build:coreneuron:nmodl:intel:debug:legacy"] -build:neuron:nmodl:intel:shared: +build:neuron:nmodl:intel:shared:debug: extends: [.build_neuron, .spack_intel] - needs: ["build:coreneuron:nmodl:intel:shared"] + needs: ["build:coreneuron:nmodl:intel:shared:debug"] build:neuron:mod2c:nvhpc:acc:shared: extends: [.build_neuron, .spack_nvhpc] @@ -186,17 +189,17 @@ build:neuron:nmodl:nvhpc:omp:debug: needs: ["build:coreneuron:nmodl:nvhpc:omp:debug"] # Test CoreNEURON -test:coreneuron:mod2c:intel:shared: +test:coreneuron:mod2c:intel:shared:debug: extends: [.ctest] - needs: ["build:coreneuron:mod2c:intel:shared"] + needs: ["build:coreneuron:mod2c:intel:shared:debug"] test:coreneuron:nmodl:intel:debug:legacy: extends: [.ctest] needs: ["build:coreneuron:nmodl:intel:debug:legacy"] -test:coreneuron:nmodl:intel:shared: +test:coreneuron:nmodl:intel:shared:debug: extends: [.ctest] - needs: ["build:coreneuron:nmodl:intel:shared"] + needs: ["build:coreneuron:nmodl:intel:shared:debug"] test:coreneuron:mod2c:nvhpc:acc:debug:unified: extends: [.ctest, .gpu_node] @@ -223,17 +226,17 @@ test:coreneuron:nmodl:nvhpc:omp:debug: needs: ["build:coreneuron:nmodl:nvhpc:omp:debug"] # Test NEURON -test:neuron:mod2c:intel:shared: +test:neuron:mod2c:intel:shared:debug: extends: [.test_neuron] - needs: ["build:neuron:mod2c:intel:shared"] + needs: ["build:neuron:mod2c:intel:shared:debug"] test:neuron:nmodl:intel:debug:legacy: extends: [.test_neuron] needs: ["build:neuron:nmodl:intel:debug:legacy"] -test:neuron:nmodl:intel:shared: +test:neuron:nmodl:intel:shared:debug: extends: [.test_neuron] - needs: ["build:neuron:nmodl:intel:shared"] + needs: ["build:neuron:nmodl:intel:shared:debug"] test:neuron:mod2c:nvhpc:acc:shared: extends: [.test_neuron, .gpu_node] From 0e2475593ef020606ca95b8a04f84ea0d771ee87 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Fri, 26 Aug 2022 14:59:27 +0200 Subject: [PATCH 127/128] keep some optimisation of partial_piv_lu.cpp in debug builds e --- coreneuron/CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt index c4143b48c..97d12e613 100644 --- a/coreneuron/CMakeLists.txt +++ b/coreneuron/CMakeLists.txt @@ -120,6 +120,14 @@ if(CORENRN_ENABLE_GPU) # OpenACC/OpenMP annotations. if(CORENRN_ENABLE_NMODL AND EXISTS ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cpp) list(APPEND CORENEURON_CODE_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cpp) + if(CORENRN_ENABLE_GPU + AND CORENRN_HAVE_NVHPC_COMPILER + AND CMAKE_BUILD_TYPE STREQUAL "Debug") + # In this case OpenAccHelper.cmake passes -gpu=debug, which makes these Eigen functions + # extremely slow. Downgrade that to -gpu=lineinfo for this file. + set_source_files_properties(${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cpp + PROPERTIES COMPILE_FLAGS "-gpu=lineinfo,nodebug -O1") + endif() endif() endif() From 567bd21cc895460c3e176d71d24dd82a549835a9 Mon Sep 17 00:00:00 2001 From: Pramod Kumbhar Date: Sun, 28 Aug 2022 13:53:07 +0200 Subject: [PATCH 128/128] update nmodl and mod2c submodule --- external/mod2c | 2 +- external/nmodl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/mod2c b/external/mod2c index 626ffc202..469c74dc7 160000 --- a/external/mod2c +++ b/external/mod2c @@ -1 +1 @@ -Subproject commit 626ffc2024872b7d48ecca95786b97d707898317 +Subproject commit 469c74dc7d96bbc5a06a42696422154b4cd2ce28 diff --git a/external/nmodl b/external/nmodl index 0274a4c6e..4f45a1c8a 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 0274a4c6e87d71fb161f21aea8d236a2a57d3fd1 +Subproject commit 4f45a1c8a9b99c64127ea795eb12952e754b775c