diff --git a/src/mca/plm/slurm/plm_slurm.h b/src/mca/plm/slurm/plm_slurm.h index d654a48e4d..b357d4b7ef 100644 --- a/src/mca/plm/slurm/plm_slurm.h +++ b/src/mca/plm/slurm/plm_slurm.h @@ -33,9 +33,8 @@ BEGIN_C_DECLS struct prte_mca_plm_slurm_component_t { prte_plm_base_component_t super; - int custom_args_index; char *custom_args; - bool slurm_warning_msg; + bool early; }; typedef struct prte_mca_plm_slurm_component_t prte_mca_plm_slurm_component_t; diff --git a/src/mca/plm/slurm/plm_slurm_component.c b/src/mca/plm/slurm/plm_slurm_component.c index 936386bbb6..81cf74e59e 100644 --- a/src/mca/plm/slurm/plm_slurm_component.c +++ b/src/mca/plm/slurm/plm_slurm_component.c @@ -38,6 +38,7 @@ #include "src/util/name_fns.h" #include "src/util/pmix_environ.h" #include "src/util/pmix_show_help.h" +#include "src/util/pmix_string_copy.h" #include "plm_slurm.h" #include "src/mca/plm/base/plm_private.h" @@ -84,28 +85,15 @@ prte_mca_plm_slurm_component_t prte_mca_plm_slurm_component = { here; will be initialized in plm_slurm_open() */ }; -static char *custom_args = NULL; -static char *force_args = NULL; - static int plm_slurm_register(void) { pmix_mca_base_component_t *comp = &prte_mca_plm_slurm_component.super; - prte_mca_plm_slurm_component.custom_args_index = - pmix_mca_base_component_var_register(comp, "args", "Custom arguments to srun", - PMIX_MCA_BASE_VAR_TYPE_STRING, - &custom_args); - - force_args = NULL; - (void) pmix_mca_base_component_var_register(comp, "force_args", "Mandatory custom arguments to srun", - PMIX_MCA_BASE_VAR_TYPE_STRING, - &force_args); - - prte_mca_plm_slurm_component.slurm_warning_msg = false; - (void) pmix_mca_base_component_var_register(comp, "disable_warning", "Turn off warning message about custom args set in environment", - PMIX_MCA_BASE_VAR_TYPE_BOOL, - &prte_mca_plm_slurm_component.slurm_warning_msg); + prte_mca_plm_slurm_component.custom_args = NULL; + pmix_mca_base_component_var_register(comp, "args", "Custom arguments to srun", + PMIX_MCA_BASE_VAR_TYPE_STRING, + &prte_mca_plm_slurm_component.custom_args); return PRTE_SUCCESS; } @@ -117,11 +105,11 @@ static int plm_slurm_open(void) static int prte_mca_plm_slurm_component_query(pmix_mca_base_module_t **module, int *priority) { - const pmix_mca_base_var_t *var; - pmix_status_t rc; + FILE *fp; + char version[1024], *ptr; + int major, minor; /* Are we running under a SLURM job? */ - if (NULL != getenv("SLURM_JOBID")) { *priority = 75; @@ -129,28 +117,32 @@ static int prte_mca_plm_slurm_component_query(pmix_mca_base_module_t **module, i "%s plm:slurm: available for selection", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME))); - prte_mca_plm_slurm_component.custom_args = NULL; - - // if we were are warning about externally set custom args, then - // check to see if that was done - if (!prte_mca_plm_slurm_component.slurm_warning_msg && - NULL == force_args) { - // check for custom args - rc = pmix_mca_base_var_get(prte_mca_plm_slurm_component.custom_args_index, &var); - if (PMIX_SUCCESS == rc) { - // the variable was set - see who set it - if (PMIX_MCA_BASE_VAR_SOURCE_ENV == var->mbv_source) { - // set in the environment - warn - pmix_show_help("help-plm-slurm.txt", "custom-args-in-env", true, - custom_args); - } - } + // check the version + fp = popen("srun --version", "r"); + if (NULL == fp) { + // cannot run srun, so we cannot support this job + *module = NULL; + return PRTE_ERROR; } - - if (NULL != force_args) { - prte_mca_plm_slurm_component.custom_args = force_args; - } else if (NULL != custom_args) { - prte_mca_plm_slurm_component.custom_args = custom_args; + if (NULL == fgets(version, sizeof(version), fp)) { + pclose(fp); + *module = NULL; + return PRTE_ERROR; + } + pclose(fp); + // parse on the dots + major = strtol(&version[6], &ptr, 10); + ++ptr; + minor = strtol(ptr, NULL, 10); + + if (23 > major) { + prte_mca_plm_slurm_component.early = true; + } else if (23 < major) { + prte_mca_plm_slurm_component.early = false; + } else if (11 > minor) { + prte_mca_plm_slurm_component.early = true; + } else { + prte_mca_plm_slurm_component.early = false; } *module = (pmix_mca_base_module_t *) &prte_plm_slurm_module; diff --git a/src/mca/plm/slurm/plm_slurm_module.c b/src/mca/plm/slurm/plm_slurm_module.c index 2edae0312a..8c9a04ec27 100644 --- a/src/mca/plm/slurm/plm_slurm_module.c +++ b/src/mca/plm/slurm/plm_slurm_module.c @@ -15,7 +15,7 @@ * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -253,6 +253,11 @@ static void launch_daemons(int fd, short args, void *cbdata) /* add the srun command */ pmix_argv_append(&argc, &argv, "srun"); + // add the external launcher flag if necessary + if (!prte_mca_plm_slurm_component.early) { + pmix_argv_append(&argc, &argv, "--external-launcher"); + } + /* start one orted on each node */ pmix_argv_append(&argc, &argv, "--ntasks-per-node=1"); diff --git a/src/runtime/prte_init.c b/src/runtime/prte_init.c index db7022cfb1..5e7a234f63 100644 --- a/src/runtime/prte_init.c +++ b/src/runtime/prte_init.c @@ -179,6 +179,24 @@ int prte_init_minimum(void) return PRTE_ERR_SILENT; } + /* Protect against the envar version of the Slurm + * custom args MCA param. This is an unfortunate + * hack that hopefully will eventually go away. + * See both of the following for detailed + * explanations and discussion: + * + * https://github.com/openpmix/prrte/issues/1974 + * https://github.com/open-mpi/ompi/issues/12471 + * + * Orgs/users wanting to add custom args to the + * internal "srun" command used to spawn the + * PRRTE daemons must do so via the default MCA + * param files (system or user), or via the + * prterun (or its proxy) cmd line + */ + unsetenv("PRTE_MCA_plm_slurm_args"); + unsetenv("OMPI_MCA_plm_slurm_args"); + /* carry across the toolname */ pmix_tool_basename = prte_tool_basename;