From 97ccdd627f5a1d39c8329192dd65912a851c5950 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Thu, 9 Oct 2025 14:34:52 -0500 Subject: [PATCH 01/19] btl/ofi: fault tolerance Signed-off-by: Matthew Whitlock --- opal/mca/btl/ofi/btl_ofi.h | 2 + opal/mca/btl/ofi/btl_ofi_context.c | 126 +++++++++++++++++------------ opal/mca/btl/ofi/btl_ofi_frag.c | 6 +- opal/mca/btl/ofi/btl_ofi_frag.h | 3 +- opal/mca/btl/ofi/btl_ofi_module.c | 9 +++ 5 files changed, 91 insertions(+), 55 deletions(-) diff --git a/opal/mca/btl/ofi/btl_ofi.h b/opal/mca/btl/ofi/btl_ofi.h index e12d490b390..20345d02c6b 100644 --- a/opal/mca/btl/ofi/btl_ofi.h +++ b/opal/mca/btl/ofi/btl_ofi.h @@ -139,6 +139,8 @@ struct mca_btl_ofi_module_t { /** registration cache */ mca_rcache_base_module_t *rcache; + + mca_btl_base_module_error_cb_fn_t ofi_error_cb; }; typedef struct mca_btl_ofi_module_t mca_btl_ofi_module_t; diff --git a/opal/mca/btl/ofi/btl_ofi_context.c b/opal/mca/btl/ofi/btl_ofi_context.c index 2b9a5fb6905..4a774145744 100644 --- a/opal/mca/btl/ofi/btl_ofi_context.c +++ b/opal/mca/btl/ofi/btl_ofi_context.c @@ -310,6 +310,56 @@ mca_btl_ofi_context_t *get_ofi_context_rr(mca_btl_ofi_module_t *btl) return &btl->contexts[rr_num++ % btl->num_contexts]; } +static void inline complete_op_context(mca_btl_ofi_context_t* context, + void *op_context, int rc) +{ + mca_btl_ofi_completion_context_t *c_ctx = + (mca_btl_ofi_completion_context_t*) op_context; + /* We are casting to every type here just for simplicity. */ + mca_btl_ofi_base_completion_t *comp = + (mca_btl_ofi_base_completion_t *) c_ctx->comp; + mca_btl_ofi_frag_completion_t *frag_comp = + (mca_btl_ofi_frag_completion_t *) c_ctx->comp; + mca_btl_ofi_rdma_completion_t *rdma_comp + = (mca_btl_ofi_rdma_completion_t *) c_ctx->comp; + + switch (comp->type) { + case MCA_BTL_OFI_TYPE_GET: + case MCA_BTL_OFI_TYPE_PUT: + case MCA_BTL_OFI_TYPE_AOP: + case MCA_BTL_OFI_TYPE_AFOP: + case MCA_BTL_OFI_TYPE_CSWAP: + /* call the callback */ + if (rdma_comp->cbfunc) { + rdma_comp->cbfunc(comp->btl, comp->endpoint, rdma_comp->local_address, + rdma_comp->local_handle, rdma_comp->cbcontext, + rdma_comp->cbdata, rc); + } + + MCA_BTL_OFI_NUM_RDMA_DEC((mca_btl_ofi_module_t *) comp->btl); + break; + + case MCA_BTL_OFI_TYPE_RECV: + mca_btl_ofi_recv_frag((mca_btl_ofi_module_t *) comp->btl, + (mca_btl_ofi_endpoint_t *) comp->endpoint, context, + frag_comp->frag, rc); + break; + + case MCA_BTL_OFI_TYPE_SEND: + MCA_BTL_OFI_NUM_SEND_DEC((mca_btl_ofi_module_t *) comp->btl); + mca_btl_ofi_frag_complete(frag_comp->frag, rc); + break; + + default: + /* catasthrophic */ + BTL_ERROR(("unknown completion type")); + MCA_BTL_OFI_ABORT(); + } + + /* return the completion handler */ + opal_free_list_return(comp->my_list, (opal_free_list_item_t *) comp); +} + int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context) { @@ -319,11 +369,6 @@ int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context) struct fi_cq_entry cq_entry[MCA_BTL_OFI_DEFAULT_MAX_CQE]; struct fi_cq_err_entry cqerr = {0}; - mca_btl_ofi_completion_context_t *c_ctx; - mca_btl_ofi_base_completion_t *comp; - mca_btl_ofi_rdma_completion_t *rdma_comp; - mca_btl_ofi_frag_completion_t *frag_comp; - ret = fi_cq_read(context->cq, &cq_entry, mca_btl_ofi_component.num_cqe_read); if (0 < ret) { @@ -331,49 +376,7 @@ int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context) for (int i = 0; i < events_read; i++) { if (NULL != cq_entry[i].op_context) { ++events; - - c_ctx = (mca_btl_ofi_completion_context_t *) cq_entry[i].op_context; - - /* We are casting to every type here just for simplicity. */ - comp = (mca_btl_ofi_base_completion_t *) c_ctx->comp; - frag_comp = (mca_btl_ofi_frag_completion_t *) c_ctx->comp; - rdma_comp = (mca_btl_ofi_rdma_completion_t *) c_ctx->comp; - - switch (comp->type) { - case MCA_BTL_OFI_TYPE_GET: - case MCA_BTL_OFI_TYPE_PUT: - case MCA_BTL_OFI_TYPE_AOP: - case MCA_BTL_OFI_TYPE_AFOP: - case MCA_BTL_OFI_TYPE_CSWAP: - /* call the callback */ - if (rdma_comp->cbfunc) { - rdma_comp->cbfunc(comp->btl, comp->endpoint, rdma_comp->local_address, - rdma_comp->local_handle, rdma_comp->cbcontext, - rdma_comp->cbdata, OPAL_SUCCESS); - } - - MCA_BTL_OFI_NUM_RDMA_DEC((mca_btl_ofi_module_t *) comp->btl); - break; - - case MCA_BTL_OFI_TYPE_RECV: - mca_btl_ofi_recv_frag((mca_btl_ofi_module_t *) comp->btl, - (mca_btl_ofi_endpoint_t *) comp->endpoint, context, - frag_comp->frag); - break; - - case MCA_BTL_OFI_TYPE_SEND: - MCA_BTL_OFI_NUM_SEND_DEC((mca_btl_ofi_module_t *) comp->btl); - mca_btl_ofi_frag_complete(frag_comp->frag, OPAL_SUCCESS); - break; - - default: - /* catasthrophic */ - BTL_ERROR(("unknown completion type")); - MCA_BTL_OFI_ABORT(); - } - - /* return the completion handler */ - opal_free_list_return(comp->my_list, (opal_free_list_item_t *) comp); + complete_op_context(context, cq_entry[i].op_context, OPAL_SUCCESS); } } } else if (OPAL_UNLIKELY(ret == -FI_EAVAIL)) { @@ -383,10 +386,31 @@ int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context) if (0 > ret) { BTL_ERROR(("%s:%d: Error returned from fi_cq_readerr: %s(%d)", __FILE__, __LINE__, fi_strerror(-ret), ret)); - } else { - BTL_ERROR(("fi_cq_readerr: (provider err_code = %d)\n", cqerr.prov_errno)); + MCA_BTL_OFI_ABORT(); + } else if(NULL != cqerr.op_context){ + switch(cqerr.err) { + case -FI_EIO: { + mca_btl_ofi_completion_context_t *c_ctx = + (mca_btl_ofi_completion_context_t*) cqerr.op_context; + mca_btl_ofi_base_completion_t *comp = + (mca_btl_ofi_base_completion_t*) c_ctx->comp; + mca_btl_ofi_module_t *ofi_btl = + (mca_btl_ofi_module_t*) comp->btl; + if(ofi_btl->ofi_error_cb){ + ofi_btl->ofi_error_cb(comp->btl, 0, comp->endpoint->ep_proc, + "IO error reported by libfabric"); + } + + ++events; + complete_op_context(context, cqerr.op_context, OPAL_ERR_UNREACH); + break; + } + default: + BTL_ERROR(("fi_cq_readerr: %s(%d) (provider err_code = %d)\n", + fi_strerror(-cqerr.err), cqerr.err, cqerr.prov_errno)); + MCA_BTL_OFI_ABORT(); + } } - MCA_BTL_OFI_ABORT(); } #ifdef FI_EINTR /* sometimes, sockets provider complain about interrupt. We do nothing. */ diff --git a/opal/mca/btl/ofi/btl_ofi_frag.c b/opal/mca/btl/ofi/btl_ofi_frag.c index 25433c0b6a3..e325dd34ccf 100644 --- a/opal/mca/btl/ofi/btl_ofi_frag.c +++ b/opal/mca/btl/ofi/btl_ofi_frag.c @@ -145,9 +145,9 @@ int mca_btl_ofi_send(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi } int mca_btl_ofi_recv_frag(mca_btl_ofi_module_t *ofi_btl, mca_btl_base_endpoint_t *endpoint, - mca_btl_ofi_context_t *context, mca_btl_ofi_base_frag_t *frag) + mca_btl_ofi_context_t *context, mca_btl_ofi_base_frag_t *frag, + int rc) { - int rc; mca_btl_active_message_callback_t *reg = mca_btl_base_active_message_trigger + frag->hdr.tag; mca_btl_base_segment_t segment = {.seg_addr.pval = (void *) (frag + 1), .seg_len = frag->hdr.len}; @@ -160,7 +160,7 @@ int mca_btl_ofi_recv_frag(mca_btl_ofi_module_t *ofi_btl, mca_btl_base_endpoint_t /* call the callback */ reg->cbfunc(&ofi_btl->super, &recv_desc); - mca_btl_ofi_frag_complete(frag, OPAL_SUCCESS); + mca_btl_ofi_frag_complete(frag, rc); /* repost the recv */ rc = mca_btl_ofi_post_recvs((mca_btl_base_module_t *) ofi_btl, context, 1); diff --git a/opal/mca/btl/ofi/btl_ofi_frag.h b/opal/mca/btl/ofi/btl_ofi_frag.h index 3afa8866265..786fafa3bbe 100644 --- a/opal/mca/btl/ofi/btl_ofi_frag.h +++ b/opal/mca/btl/ofi/btl_ofi_frag.h @@ -38,7 +38,8 @@ int mca_btl_ofi_send(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi mca_btl_base_descriptor_t *descriptor, mca_btl_base_tag_t tag); int mca_btl_ofi_recv_frag(mca_btl_ofi_module_t *ofi_btl, mca_btl_base_endpoint_t *endpoint, - mca_btl_ofi_context_t *context, mca_btl_ofi_base_frag_t *frag); + mca_btl_ofi_context_t *context, mca_btl_ofi_base_frag_t *frag, + int rc); struct mca_btl_base_descriptor_t *mca_btl_ofi_prepare_src(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, diff --git a/opal/mca/btl/ofi/btl_ofi_module.c b/opal/mca/btl/ofi/btl_ofi_module.c index 23b0dc7dfe8..8c73c91008f 100644 --- a/opal/mca/btl/ofi/btl_ofi_module.c +++ b/opal/mca/btl/ofi/btl_ofi_module.c @@ -143,6 +143,14 @@ static int mca_btl_ofi_del_procs(mca_btl_base_module_t *btl, size_t nprocs, opal return OPAL_SUCCESS; } +static int mca_btl_ofi_register_error(mca_btl_base_module_t *btl, + mca_btl_base_module_error_cb_fn_t cb) +{ + mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl; + ofi_btl->ofi_error_cb = cb; + return OPAL_SUCCESS; +} + void mca_btl_ofi_rcache_init(mca_btl_ofi_module_t *module) { if (!module->initialized) { @@ -515,4 +523,5 @@ mca_btl_ofi_module_t mca_btl_ofi_module_template = { .btl_add_procs = mca_btl_ofi_add_procs, .btl_del_procs = mca_btl_ofi_del_procs, .btl_finalize = mca_btl_ofi_finalize, + .btl_register_error = mca_btl_ofi_register_error, }}; From 6152e7efb7c4003ddfdb2f7caa1b942204f723fb Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Mon, 10 Nov 2025 14:15:39 -0600 Subject: [PATCH 02/19] btl/ofi check for valid pointer in error handler Signed-off-by: Matthew Whitlock --- opal/mca/btl/ofi/btl_ofi_context.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/opal/mca/btl/ofi/btl_ofi_context.c b/opal/mca/btl/ofi/btl_ofi_context.c index 4a774145744..ea876d548c0 100644 --- a/opal/mca/btl/ofi/btl_ofi_context.c +++ b/opal/mca/btl/ofi/btl_ofi_context.c @@ -397,7 +397,11 @@ int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context) mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t*) comp->btl; if(ofi_btl->ofi_error_cb){ - ofi_btl->ofi_error_cb(comp->btl, 0, comp->endpoint->ep_proc, + opal_proc_t *ep_proc = NULL; + if(comp->endpoint){ + ep_proc = comp->endpoint->ep_proc; + } + ofi_btl->ofi_error_cb(comp->btl, 0, ep_proc, "IO error reported by libfabric"); } From 5938c94aa3d1d88dcbfc8927014917e85c3dc004 Mon Sep 17 00:00:00 2001 From: Nathan Bellalou Date: Tue, 23 Dec 2025 13:52:38 +0200 Subject: [PATCH 03/19] opal/mca/common/ucx : assert fix - change thread mode sent to UCX api Signed-off-by: Nathan Bellalou --- opal/mca/common/ucx/common_ucx_wpool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/opal/mca/common/ucx/common_ucx_wpool.c b/opal/mca/common/ucx/common_ucx_wpool.c index ae290201710..8f8fb9d57b8 100644 --- a/opal/mca/common/ucx/common_ucx_wpool.c +++ b/opal/mca/common/ucx/common_ucx_wpool.c @@ -3,6 +3,8 @@ #include "common_ucx.h" #include "common_ucx_wpool.h" #include "common_ucx_wpool_int.h" +#include "mpi.h" +#include "ompi/runtime/mpiruntime.h" #include "opal/mca/base/mca_base_framework.h" #include "opal/mca/base/mca_base_var.h" #include "opal/mca/pmix/pmix-internal.h" @@ -55,7 +57,7 @@ static opal_common_ucx_winfo_t *_winfo_create(opal_common_ucx_wpool_t *wpool) if (opal_common_ucx_thread_enabled || wpool->dflt_winfo == NULL) { memset(&worker_params, 0, sizeof(worker_params)); worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; - worker_params.thread_mode = UCS_THREAD_MODE_SINGLE; + worker_params.thread_mode = ompi_mpi_thread_provided == MPI_THREAD_SINGLE ? UCS_THREAD_MODE_SINGLE : UCS_THREAD_MODE_SERIALIZED; status = ucp_worker_create(wpool->ucp_ctx, &worker_params, &worker); if (UCS_OK != status) { MCA_COMMON_UCX_ERROR("ucp_worker_create failed: %d", status); From 15e5a621ed5dfa3245eba9ec33539123c14d9d35 Mon Sep 17 00:00:00 2001 From: Nathan Bellalou Date: Sun, 11 Jan 2026 10:28:21 +0200 Subject: [PATCH 04/19] Fix abstraction violation between ompi and opal Create two bool variables, opal_single_threaded and opal_common_ucx_single_threaded, that mimic behavior of variables opal_uses_threads and opal_common_ucx_single_threaded, in order to propagate mpi thread level to opal while preserving abstraction. opal_single_threaded is true if and only if mpi thread level is MPI_THREAD_SINGLE Signed-off-by: Nathan Bellalou --- ompi/instance/instance.c | 3 +++ ompi/mca/osc/ucx/osc_ucx_component.c | 1 + opal/mca/common/ucx/common_ucx_wpool.c | 5 ++--- opal/mca/common/ucx/common_ucx_wpool.h | 1 + opal/mca/threads/base/mutex.c | 6 ++++++ opal/mca/threads/thread_usage.h | 1 + 6 files changed, 14 insertions(+), 3 deletions(-) diff --git a/ompi/instance/instance.c b/ompi/instance/instance.c index 8ca19a9724c..bd686d2bab2 100644 --- a/ompi/instance/instance.c +++ b/ompi/instance/instance.c @@ -835,6 +835,9 @@ int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t opal_set_using_threads(true); } + /* Set single-threaded flag for optimization purposes */ + opal_single_threaded = (ts_level == MPI_THREAD_SINGLE); + opal_mutex_lock (&instance_lock); if (0 == opal_atomic_fetch_add_32 (&ompi_instance_count, 1)) { ret = ompi_mpi_instance_init_common (argc, argv); diff --git a/ompi/mca/osc/ucx/osc_ucx_component.c b/ompi/mca/osc/ucx/osc_ucx_component.c index 93b0fb9c9a0..635a53a3e0f 100644 --- a/ompi/mca/osc/ucx/osc_ucx_component.c +++ b/ompi/mca/osc/ucx/osc_ucx_component.c @@ -190,6 +190,7 @@ static int component_register(void) { free(description_str); opal_common_ucx_thread_enabled = opal_using_threads(); + opal_common_ucx_single_threaded = opal_single_threaded; mca_osc_ucx_component.acc_single_intrinsic = false; opal_asprintf(&description_str, "Enable optimizations for MPI_Fetch_and_op, MPI_Accumulate, etc for codes " diff --git a/opal/mca/common/ucx/common_ucx_wpool.c b/opal/mca/common/ucx/common_ucx_wpool.c index 8f8fb9d57b8..cd93f253e4d 100644 --- a/opal/mca/common/ucx/common_ucx_wpool.c +++ b/opal/mca/common/ucx/common_ucx_wpool.c @@ -3,8 +3,6 @@ #include "common_ucx.h" #include "common_ucx_wpool.h" #include "common_ucx_wpool_int.h" -#include "mpi.h" -#include "ompi/runtime/mpiruntime.h" #include "opal/mca/base/mca_base_framework.h" #include "opal/mca/base/mca_base_var.h" #include "opal/mca/pmix/pmix-internal.h" @@ -34,6 +32,7 @@ __thread int initialized = 0; #endif bool opal_common_ucx_thread_enabled = false; +bool opal_common_ucx_single_threaded = true; opal_atomic_int64_t opal_common_ucx_ep_counts = 0; opal_atomic_int64_t opal_common_ucx_unpacked_rkey_counts = 0; @@ -57,7 +56,7 @@ static opal_common_ucx_winfo_t *_winfo_create(opal_common_ucx_wpool_t *wpool) if (opal_common_ucx_thread_enabled || wpool->dflt_winfo == NULL) { memset(&worker_params, 0, sizeof(worker_params)); worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; - worker_params.thread_mode = ompi_mpi_thread_provided == MPI_THREAD_SINGLE ? UCS_THREAD_MODE_SINGLE : UCS_THREAD_MODE_SERIALIZED; + worker_params.thread_mode = opal_common_ucx_single_threaded ? UCS_THREAD_MODE_SINGLE : UCS_THREAD_MODE_SERIALIZED; status = ucp_worker_create(wpool->ucp_ctx, &worker_params, &worker); if (UCS_OK != status) { MCA_COMMON_UCX_ERROR("ucp_worker_create failed: %d", status); diff --git a/opal/mca/common/ucx/common_ucx_wpool.h b/opal/mca/common/ucx/common_ucx_wpool.h index 0d94e51cb64..44bf55803d0 100644 --- a/opal/mca/common/ucx/common_ucx_wpool.h +++ b/opal/mca/common/ucx/common_ucx_wpool.h @@ -59,6 +59,7 @@ typedef struct { } opal_common_ucx_wpool_t; extern bool opal_common_ucx_thread_enabled; +extern bool opal_common_ucx_single_threaded; extern opal_atomic_int64_t opal_common_ucx_ep_counts; extern opal_atomic_int64_t opal_common_ucx_unpacked_rkey_counts; diff --git a/opal/mca/threads/base/mutex.c b/opal/mca/threads/base/mutex.c index fec6cee1d98..322fc855eb8 100644 --- a/opal/mca/threads/base/mutex.c +++ b/opal/mca/threads/base/mutex.c @@ -35,6 +35,12 @@ */ bool opal_uses_threads = false; +/* + * Track if MPI is running in single-threaded mode (MPI_THREAD_SINGLE). + * Default is true until MPI_Init/MPI_Init_thread determines otherwise. + */ +bool opal_single_threaded = true; + static void mca_threads_mutex_constructor(opal_mutex_t *p_mutex) { #if OPAL_ENABLE_DEBUG diff --git a/opal/mca/threads/thread_usage.h b/opal/mca/threads/thread_usage.h index 4e2fd75a7e1..66b00eba4e5 100644 --- a/opal/mca/threads/thread_usage.h +++ b/opal/mca/threads/thread_usage.h @@ -33,6 +33,7 @@ #include "opal/sys/atomic.h" OPAL_DECLSPEC extern bool opal_uses_threads; +OPAL_DECLSPEC extern bool opal_single_threaded; /** * Check and see if the process is using multiple threads. From 39cf29122ed7c104b275516513ff43c4ab9ee91d Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Mon, 12 Jan 2026 12:24:01 -0700 Subject: [PATCH 05/19] PML/UCX: properly handle persistent req free list items Turns out the requests being returned to the UCX PML's persisten request list weren't being properly finalized. But it turns out mpi4py unit testing tests all kinds of edge cases, like getting the fortran handle for a persistent requests, and thus triggered a bug in the UCX PML when OMPI is configured with debug. Characteristic traceback at finalize prior to this patch is: python3: ../opal/mca/threads/pthreads/threads_pthreads_mutex.h:86: opal_thread_internal_mutex_lock: Assertion `0 == ret' failed. [er-head:1179128] *** Process received signal *** [er-head:1179128] Signal: Aborted (6) [er-head:1179128] Signal code: (-6) [er-head:1179128] [ 0] /lib64/libpthread.so.0(+0x12cf0)[0x7ffff71edcf0] [er-head:1179128] [ 1] /lib64/libc.so.6(gsignal+0x10f)[0x7ffff66daacf] [er-head:1179128] [ 2] /lib64/libc.so.6(abort+0x127)[0x7ffff66adea5] [er-head:1179128] [ 3] /lib64/libc.so.6(+0x21d79)[0x7ffff66add79] [er-head:1179128] [ 4] /lib64/libc.so.6(+0x47426)[0x7ffff66d3426] [er-head:1179128] [ 5] /home/foobar/ompi/install_it/lib/libopen-pal.so.0(+0x414a2)[0x7ffff1ccb4a2] [er-head:1179128] [ 6] /home/foobar/ompi/install_it/lib/libopen-pal.so.0(+0x4150d)[0x7ffff1ccb50d] [er-head:1179128] [ 7] /home/foobar/ompi/install_it/lib/libopen-pal.so.0(opal_pointer_array_set_item+0x7c)[0x7ffff1ccbd40] [er-head:1179128] [ 8] /home/foobar/ompi/install_it/lib/libmpi.so.0(+0x3a5adb)[0x7ffff21c1adb] [er-head:1179128] [ 9] /home/foobar/ompi/install_it/lib/libopen-pal.so.0(+0x3a7aa)[0x7ffff1cc47aa] [er-head:1179128] [10] /home/foobar/ompi/install_it/lib/libopen-pal.so.0(+0x3b34d)[0x7ffff1cc534d] [er-head:1179128] [11] /home/foobar/ompi/install_it/lib/libmpi.so.0(+0x39e934)[0x7ffff21ba934] [er-head:1179128] [12] /home/foobar/ompi/install_it/lib/libmpi.so.0(mca_pml_ucx_cleanup+0x314)[0x7ffff21bc96d] [er-head:1179128] [13] /home/foobar/ompi/install_it/lib/libmpi.so.0(+0x3a79ad)[0x7ffff21c39ad] [er-head:1179128] [14] /home/foobar/ompi/install_it/lib/libmpi.so.0(+0x39c57e)[0x7ffff21b857e] [er-head:1179128] [15] /home/foobar/ompi/install_it/lib/libopen-pal.so.0(opal_finalize_cleanup_domain+0x3e)[0x7ffff1cd32fa] [er-head:1179128] [16] /home/foobar/ompi/install_it/lib/libopen-pal.so.0(opal_finalize+0x56)[0x7ffff1cc1ca0] [er-head:1179128] [17] /home/foobar/ompi/install_it/lib/libmpi.so.0(ompi_rte_finalize+0x312)[0x7ffff1edaad5] [er-head:1179128] [18] /home/foobar/ompi/install_it/lib/libmpi.so.0(+0xc4dd8)[0x7ffff1ee0dd8] [er-head:1179128] [19] /home/foobar/ompi/install_it/lib/libmpi.so.0(ompi_mpi_instance_finalize+0x13a)[0x7ffff1ee1064] [er-head:1179128] [20] /home/foobar/ompi/install_it/lib/libmpi.so.0(ompi_mpi_finalize+0x5f3)[0x7ffff1ed4c44] [er-head:1179128] [21] /home/foobar/ompi/install_it/lib/libmpi.so.0(PMPI_Finalize+0x54)[0x7ffff1f29440] related to #13623 Signed-off-by: Howard Pritchard --- ompi/mca/pml/ucx/pml_ucx_request.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ompi/mca/pml/ucx/pml_ucx_request.c b/ompi/mca/pml/ucx/pml_ucx_request.c index 2c24d8d178c..532c89b51d6 100644 --- a/ompi/mca/pml/ucx/pml_ucx_request.c +++ b/ompi/mca/pml/ucx/pml_ucx_request.c @@ -236,6 +236,7 @@ static int mca_pml_ucx_persistent_request_free(ompi_request_t **rptr) ucp_request_free(tmp_req); } OMPI_DATATYPE_RELEASE(preq->ompi_datatype); + OMPI_REQUEST_FINI(&preq->ompi); PML_UCX_FREELIST_RETURN(&ompi_pml_ucx.persistent_reqs, &preq->ompi.super); *rptr = MPI_REQUEST_NULL; return OMPI_SUCCESS; From 70d6bf5fdd6fdd4945e66445c5b709deb60565c7 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Mon, 12 Jan 2026 11:58:21 -0800 Subject: [PATCH 06/19] if/bsdx_ipv4: Fix name in COMPONENT_INIT() When we added the MCA_BASE_COMPONENT_INIT() macro to clean up LTO build issues, we accidently added a _component to the end of the component name, breaking the build for any platform that uses the bsdx_ipv4 component. Signed-off-by: Brian Barrett --- opal/mca/if/bsdx_ipv4/if_bsdx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opal/mca/if/bsdx_ipv4/if_bsdx.c b/opal/mca/if/bsdx_ipv4/if_bsdx.c index 87bc27b8d42..3ed71ee5caf 100644 --- a/opal/mca/if/bsdx_ipv4/if_bsdx.c +++ b/opal/mca/if/bsdx_ipv4/if_bsdx.c @@ -39,7 +39,7 @@ opal_if_base_component_t mca_if_bsdx_ipv4_component = { {/* This component is checkpointable */ MCA_BASE_METADATA_PARAM_CHECKPOINT}, }; -MCA_BASE_COMPONENT_INIT(opal, if, bsdx_ipv4_component) +MCA_BASE_COMPONENT_INIT(opal, if, bsdx_ipv4) /* convert a netmask (in network byte order) to CIDR notation */ static int prefix(uint32_t netmask) From 5e859a961be5cec266891852b5520b641d18ca23 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Mon, 29 Dec 2025 07:55:06 -0500 Subject: [PATCH 07/19] update-my-copyright.py: properly support git workspaces Don't search for a .git directory; it might not exist. Also, remove unnecessary Mercurial and Subversion support; we haven't used these for years. Signed-off-by: Jeff Squyres --- contrib/update-my-copyright.pl | 167 ++++++++++++--------------------- 1 file changed, 58 insertions(+), 109 deletions(-) diff --git a/contrib/update-my-copyright.pl b/contrib/update-my-copyright.pl index 2bfe9f36a49..3ac826bfa32 100755 --- a/contrib/update-my-copyright.pl +++ b/contrib/update-my-copyright.pl @@ -128,31 +128,13 @@ sub quiet_print { # Find the top-level source tree dir in a git repo my $start = cwd(); -my $top = $start; -while (! -d "$top/.git") { - chdir(".."); - $top = cwd(); - die "Can't find top-level repository directory" - if ($top eq "/"); -} -chdir($start); +my $top = `git rev-parse --show-toplevel`; +chomp($top); quiet_print "==> Top-level repository dir: $top\n"; quiet_print "==> Current directory: $start\n"; -# Select VCS used to obtain modification info. Choose in increasing priority -# order (last hit wins). -my $vcs; -$vcs = "git" - if (-d "$top/.git"); -$vcs = "hg" - if (-d "$top/.hg"); -$vcs = "svn" - if (-d "$top/.svn"); -$vcs = "manual" - if ("$my_manual_list" ne ""); - -my @files = find_modified_files($vcs); +my @files = find_modified_files(); if ($#files < 0) { quiet_print "No added / changed files -- nothing to do\n"; @@ -284,98 +266,65 @@ sub quiet_print { #------------------------------------------------------------------------------- -# Takes two arguments, the top level directory and the VCS method. Returns a -# list of file names (relative to pwd) which the VCS considers to be modified. +# Returns a list of file names (relative to pwd) which git considers +# to be modified. sub find_modified_files { - my $vcs = shift; my @files = (); - if ($vcs eq "git") { - # Number of path entries to remove from ${top}-relative paths. - # (--show-cdup either returns the empty string or sequence of "../" - # entries, always ending in a "/") - my $n_strip = scalar(split(m!/!, scalar(`git rev-parse --show-cdup`))) - 1; - - # "." restricts scope, but does not get us relative path names - my $cmd = "git status -z --porcelain --untracked-files=no ."; - quiet_print "==> Running: \"$cmd\"\n"; - my $lines = `$cmd`; - - # From git-status(1): - # X Y Meaning - # ------------------------------------------------- - # [MD] not updated - # M [ MD] updated in index - # A [ MD] added to index - # D [ M] deleted from index - # R [ MD] renamed in index - # C [ MD] copied in index - # [MARC] index and work tree matches - # [ MARC] M work tree changed since index - # [ MARC] D deleted in work tree - # ------------------------------------------------- - # D D unmerged, both deleted - # A U unmerged, added by us - # U D unmerged, deleted by them - # U A unmerged, added by them - # D U unmerged, deleted by us - # A A unmerged, both added - # U U unmerged, both modified - # ------------------------------------------------- - # ? ? untracked - # ------------------------------------------------- - foreach my $line (split /\x{00}/, $lines) { - my $keep = 0; - my ($s1, $s2, $fullname) = $line =~ m/^(.)(.) (.*)$/; - - # ignore all merge cases - next if ($s1 eq "D" and $s2 eq "D"); - next if ($s1 eq "A" and $s2 eq "A"); - next if ($s1 eq "U" or $s2 eq "U"); - - # only update for actually added/modified cases, no copies, - # renames, etc. - $keep = 1 if ($s1 eq "M" or $s2 eq "M"); - $keep = 1 if ($s1 eq "A"); - - if ($keep) { - my $relname = $fullname; - $relname =~ s!^([^/]*/){$n_strip}!!g; - - push @files, $relname - if (-f $relname); - } - } - } - elsif ($vcs eq "hg" or $vcs eq "svn") { - my $cmd = "$vcs st ."; - - # Run the command, parsing the output. Make a list of files that are - # added or modified. - quiet_print "==> Running: \"$cmd\"\n"; - open(CMD, "$cmd|") || die "Can't run command"; - while () { - chomp; - if ($_ =~ /^M/ || $_ =~ /^A/) { - my @tokens = split(/\s+/, $_); - # Handle output of both forms: - # M filenameA - # A + filenameB - my $filename = $tokens[1]; - $filename = $tokens[2] - if ($tokens[1] =~ /\+/); - # Don't bother saving directory names - push(@files, $filename) - if (-f $filename); - } + # Number of path entries to remove from ${top}-relative paths. + # (--show-cdup either returns the empty string or sequence of "../" + # entries, always ending in a "/") + my $n_strip = scalar(split(m!/!, scalar(`git rev-parse --show-cdup`))) - 1; + + # "." restricts scope, but does not get us relative path names + my $cmd = "git status -z --porcelain --untracked-files=no ."; + quiet_print "==> Running: \"$cmd\"\n"; + my $lines = `$cmd`; + + # From git-status(1): + # X Y Meaning + # ------------------------------------------------- + # [MD] not updated + # M [ MD] updated in index + # A [ MD] added to index + # D [ M] deleted from index + # R [ MD] renamed in index + # C [ MD] copied in index + # [MARC] index and work tree matches + # [ MARC] M work tree changed since index + # [ MARC] D deleted in work tree + # ------------------------------------------------- + # D D unmerged, both deleted + # A U unmerged, added by us + # U D unmerged, deleted by them + # U A unmerged, added by them + # D U unmerged, deleted by us + # A A unmerged, both added + # U U unmerged, both modified + # ------------------------------------------------- + # ? ? untracked + # ------------------------------------------------- + foreach my $line (split /\x{00}/, $lines) { + my $keep = 0; + my ($s1, $s2, $fullname) = $line =~ m/^(.)(.) (.*)$/; + + # ignore all merge cases + next if ($s1 eq "D" and $s2 eq "D"); + next if ($s1 eq "A" and $s2 eq "A"); + next if ($s1 eq "U" or $s2 eq "U"); + + # only update for actually added/modified cases, no copies, + # renames, etc. + $keep = 1 if ($s1 eq "M" or $s2 eq "M"); + $keep = 1 if ($s1 eq "A"); + + if ($keep) { + my $relname = $fullname; + $relname =~ s!^([^/]*/){$n_strip}!!g; + + push @files, $relname + if (-f $relname); } - close(CMD); - } - elsif ($vcs eq "manual") { - @files = split(/\n/, `cat $my_manual_list`); - } - else { - die "unknown VCS '$vcs', stopped"; } return @files; From 8c027cf24107d77508b766e588edd6692b78ad27 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Sat, 27 Dec 2025 16:57:01 -0500 Subject: [PATCH 08/19] docs: update the TCP tuning page Signed-off-by: Jeff Squyres Signed-off-by: Howard Pritchard --- docs/tuning-apps/networking/tcp.rst | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/docs/tuning-apps/networking/tcp.rst b/docs/tuning-apps/networking/tcp.rst index 3858a24e9eb..e20a8e21d78 100644 --- a/docs/tuning-apps/networking/tcp.rst +++ b/docs/tuning-apps/networking/tcp.rst @@ -256,20 +256,16 @@ not use specific IP networks |mdash| or not use any IP networks at all .. warning:: If you use the ``btl_tcp_if_include`` and ``btl_tcp_if_exclude`` MCA parameters to shape the behavior of the TCP BTL for MPI communications, you may - also need/want to investigate the corresponding MCA - parameters ``oob_tcp_if_include`` and - ``oob_tcp_if_exclude``, which are used to shape non-MPI - TCP-based communication (e.g., communications setup and - coordination during ``MPI_INIT`` and ``MPI_FINALIZE``). - -.. error:: TODO do corresponding OOB TCP params still exist in PMIx? - -Note that Open MPI will still use TCP for control messages, such as -data between ``mpirun`` and the MPI processes, rendezvous information -during ``MPI_INIT``, etc. To disable TCP altogether, you also need to -disable the ``tcp`` component from the OOB framework. - -.. error:: TODO Is this possible in PMIx? I doubt it...? + also need/want to investigate the corresponding PRRTE + parameters that control use of network interfaces by the + runtime (e.g., communications setup and coordination + during :ref:`MPI_Init` and :ref:`MPI_Finalize`) using the + ``prte_info(1)`` and ``pmix_info(1)`` commands. + +Note that the Open MPI runtime uses TCP for control messages, such as +for data exchange between ``mpirun(1)`` and the MPI processes, +rendezvous information during :ref:`MPI_Init`, etc. even if the +``tcp`` BTL component is disabled. ///////////////////////////////////////////////////////////////////////// From 427b5766591697213d6f5520f32bc26a8be2566e Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Mon, 29 Dec 2025 08:00:18 -0500 Subject: [PATCH 09/19] docs: support interspinhx for PMIx and PRTE docs links Use Intersphinx (https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html) for making links out to PMIx and PRTE docs. If we simply always linked against the https/internet PMIx and PRTE docs, Intersphinx makes this very easy. But that's not the Open MPI way! Instead, we want to support linking against the internal (embedded) PMIx and PRTE docs when relevant and possible, mainly to support fully-offline HTML docs (e.g., for those who operated in not-connected-to-the-internet scenarios). As such, there's several cases that need to be handled properly: 1. When building the internal PMIx / PRTE, link to the local instances of those docs (vs. the https/internet instance). Ensure to use relative paths (vs. absolute paths) so that the pre-built HTML docs that we include in OMPI distribution tarballs work, regardless of the --prefix/etc. used at configure time. NOTE: When the Open MPI Sphinx docs are built, we have not yet installed the PMIx / PRTE docs. So create our own (fake) objects.inv inventory file for where the PMIx / PRTE docs *will* be installed so that Intersphinx can do its deep linking properly. At least for now, we only care about deep links for pmix_info(1) and prte_info(1), so we can just hard-code those into those inventory files and that's good enough. If the OMPI docs link more deeply into the PMIx / PRTE docs someday (i.e., link to a bunch more things than just pmix_info(1) / prte_info(1)), we might need to revisit this design decision. 2. When building against an external PMIx / PRTE, make a best guess as to where their local HTML doc instance may be (namely: $project_prefix/share/doc/PROJECT). Don't try to handle all the possibilities -- it just gets even more complicated than this already is. If we can't find it, just link out to the https/internet docs. Other miscellaneous small changes: * Added another Python module in docs/requirements.txt (for building the Sphinx inventory file). * Use slightly-more-pythonix dict.get() API calls in docs/conf.py for simplicity. * Updated OMPI PRTE submodule pointer to get a prte_info.1.rst label update that works for both upstream PRTE and the OMPI PRTE fork. Signed-off-by: Jeff Squyres --- .gitignore | 1 + 3rd-party/prrte | 2 +- config/ompi_setup_prrte.m4 | 51 +++++++++++++++-- config/opal_config_pmix.m4 | 49 ++++++++++++++++- docs/Makefile.am | 19 ++++++- docs/conf.py | 110 +++++++++++++++++++++++++++++++++++-- docs/requirements.txt | 1 + 7 files changed, 218 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 4e71ce71bce..7ab0b99af7d 100644 --- a/.gitignore +++ b/.gitignore @@ -517,6 +517,7 @@ docs/_static docs/_static/css/custom.css docs/_templates docs/man-openmpi/man3/bindings +docs/*.inv # Common Python virtual environment and cache directory names venv diff --git a/3rd-party/prrte b/3rd-party/prrte index 5ad79eb2850..d4dffd7d9a5 160000 --- a/3rd-party/prrte +++ b/3rd-party/prrte @@ -1 +1 @@ -Subproject commit 5ad79eb285023d1dcca472ccba9de5987b51cc27 +Subproject commit d4dffd7d9a5f36824cebc71fbf5086f73a78fe59 diff --git a/config/ompi_setup_prrte.m4 b/config/ompi_setup_prrte.m4 index 2a56421e146..79a0f35e35e 100644 --- a/config/ompi_setup_prrte.m4 +++ b/config/ompi_setup_prrte.m4 @@ -19,7 +19,7 @@ dnl Copyright (c) 2019-2020 Intel, Inc. All rights reserved. dnl Copyright (c) 2020-2022 Amazon.com, Inc. or its affiliates. All Rights reserved. dnl Copyright (c) 2021 Nanook Consulting. All rights reserved. dnl Copyright (c) 2021-2022 IBM Corporation. All rights reserved. -dnl Copyright (c) 2023-2024 Jeffrey M. Squyres. All rights reserved. +dnl Copyright (c) 2023-2025 Jeffrey M. Squyres. All rights reserved. dnl Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. dnl $COPYRIGHT$ dnl @@ -39,7 +39,8 @@ dnl results of the build. AC_DEFUN([OMPI_SETUP_PRRTE],[ AC_REQUIRE([AC_PROG_LN_S]) -OPAL_VAR_SCOPE_PUSH([prrte_setup_internal_happy prrte_setup_external_happy target_rst_dir]) + OPAL_VAR_SCOPE_PUSH([prrte_setup_internal_happy prrte_setup_external_happy target_rst_dir ompi_external_prrte_docs_url]) + ompi_external_prrte_docs_url="https://docs.prrte.org/en/latest/" opal_show_subtitle "Configuring PRRTE" @@ -120,6 +121,8 @@ OPAL_VAR_SCOPE_PUSH([prrte_setup_internal_happy prrte_setup_external_happy targe AC_SUBST(OMPI_PRRTE_RST_CONTENT_DIR) AC_SUBST(OMPI_SCHIZO_OMPI_RST_CONTENT_DIR) + AC_SUBST(OMPI_PRRTE_DOCS_URL_BASE) + AC_SUBST(OMPI_USING_INTERNAL_PRRTE) AM_CONDITIONAL(OMPI_HAVE_PRRTE_RST, [test $OMPI_HAVE_PRRTE_RST -eq 1]) AS_IF([test "$OMPI_USING_INTERNAL_PRRTE" = "1"], @@ -250,8 +253,30 @@ AC_DEFUN([_OMPI_SETUP_PRRTE_INTERNAL], [ [OMPI_HAVE_PRRTE_RST=1 OMPI_PRRTE_RST_CONTENT_DIR="$OMPI_TOP_SRCDIR/3rd-party/prrte/src/docs/prrte-rst-content" OMPI_SCHIZO_OMPI_RST_CONTENT_DIR="$OMPI_TOP_SRCDIR/3rd-party/prrte/src/mca/schizo/ompi" + + # If we're building the OMPI Sphinx docs, and also + # building the internal PRRTE, then we're *also* + # building the internal PRRTE docs. + # + # In this case, the OMPI docs/conf.py will do a + # bunch of processing that is a lot easier to do in + # Python than Bourne shell (e.g., use the convenient + # os.path.relpath() to compute the relative path + # that we need, as well as dynamically create a + # Sphinx link inventory file). Hence, we skip doing + # all that work here and just set a sentinel value + OMPI_PRRTE_DOCS_URL_BASE="../../prrte/html" AC_MSG_RESULT([found])], - [AC_MSG_RESULT([not found])]) + [ # If we are not building the Sphinx docs, default + # to using the external PRRTE docs URL. This is + # actually moot because we won't be building the + # docs, but we might as well be complete in the + # logic / cases. + OMPI_PRRTE_DOCS_URL_BASE=$ompi_external_prrte_docs_url + AC_MSG_RESULT([not found])]) + + AC_MSG_CHECKING([for internal PRRTE docs link URL base]) + AC_MSG_RESULT([$OMPI_PRRTE_DOCS_URL_BASE]) $1], [$2]) @@ -273,7 +298,7 @@ dnl _OMPI_SETUP_PRRTE_EXTERNAL([action if success], [action if not success]) dnl dnl Try to find an external prrte with sufficient version. AC_DEFUN([_OMPI_SETUP_PRRTE_EXTERNAL], [ - OPAL_VAR_SCOPE_PUSH([ompi_prte_min_version ompi_prte_min_num_version setup_prrte_external_happy opal_prrte_CPPFLAGS_save]) + OPAL_VAR_SCOPE_PUSH([ompi_prte_min_version ompi_prte_min_num_version setup_prrte_external_happy opal_prrte_CPPFLAGS_save ompi_prrte_docdir]) opal_prrte_CPPFLAGS_save=$CPPFLAGS @@ -321,6 +346,10 @@ AC_DEFUN([_OMPI_SETUP_PRRTE_EXTERNAL], [ [ # Determine if this external PRRTE has installed the RST # directories that we care about + # In the external case, initially assume we'll use the + # web-based docs + OMPI_PRRTE_DOCS_URL_BASE=$ompi_external_prrte_docs_url + AC_MSG_CHECKING([for external PRRTE RST files]) prrte_install_dir=${with_prrte}/share/prte/rst AS_IF([test -n "$SPHINX_BUILD"], @@ -329,6 +358,17 @@ AC_DEFUN([_OMPI_SETUP_PRRTE_EXTERNAL], [ [OMPI_HAVE_PRRTE_RST=1 OMPI_PRRTE_RST_CONTENT_DIR="$prrte_install_dir/prrte-rst-content" OMPI_SCHIZO_OMPI_RST_CONTENT_DIR="$prrte_install_dir/schizo-ompi-rst-content" + # If the external PRTE docs dir exists where + # a simple heuristic thinks it should be + # (i.e., the default docdir location), use + # it. This will be an absolute path, which + # is fine (because we're building against an + # external PRRTE). If we don't find it, + # we'll fall back to the above-set HTTPS + # internet PRRTE docs URL. + ompi_prrte_docdir="$with_prrte/share/doc/prrte/html" + AS_IF([test -d "$ompi_prrte_docdir"], + [OMPI_PRRTE_DOCS_URL_BASE="$ompi_prrte_docdir"]) AC_MSG_RESULT([found]) ], [ # This version of PRRTE doesn't have installed RST @@ -336,6 +376,9 @@ AC_DEFUN([_OMPI_SETUP_PRRTE_EXTERNAL], [ AC_MSG_RESULT([not found]) ]) ]) + + AC_MSG_CHECKING([for external PRRTE docs link URL base]) + AC_MSG_RESULT([$OMPI_PRRTE_DOCS_URL_BASE]) $1], [$2]) diff --git a/config/opal_config_pmix.m4 b/config/opal_config_pmix.m4 index bea801c335e..0bff14c77b3 100644 --- a/config/opal_config_pmix.m4 +++ b/config/opal_config_pmix.m4 @@ -21,6 +21,7 @@ dnl Copyright (c) 2020 Triad National Security, LLC. All rights dnl reserved. dnl Copyright (c) 2020-2022 Amazon.com, Inc. or its affiliates. All Rights reserved. dnl Copyright (c) 2021 Nanook Consulting. All rights reserved. +dnl Copyright (c) 2025 Jeffrey M. Squyres. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -57,7 +58,8 @@ dnl other execution tests later in configure (there are sadly dnl some) would fail if the path in LDFLAGS was not added to dnl LD_LIBRARY_PATH. AC_DEFUN([OPAL_CONFIG_PMIX], [ - OPAL_VAR_SCOPE_PUSH([external_pmix_happy internal_pmix_happy internal_pmix_args internal_pmix_wrapper_libs internal_pmix_CPPFLAGS opal_pmix_STATIC_LDFLAGS opal_pmix_LIBS opal_pmix_STATIC_LIBS]) + OPAL_VAR_SCOPE_PUSH([external_pmix_happy internal_pmix_happy internal_pmix_args internal_pmix_wrapper_libs internal_pmix_CPPFLAGS opal_pmix_STATIC_LDFLAGS opal_pmix_LIBS opal_pmix_STATIC_LIBS opal_external_pmix_docs_url]) + opal_external_pmix_docs_url="https://docs.openpmix.org/en/latest/" opal_show_subtitle "Configuring PMIx" @@ -154,6 +156,8 @@ AC_DEFUN([OPAL_CONFIG_PMIX], [ AC_DEFINE_UNQUOTED([OPAL_USING_INTERNAL_PMIX], [$OPAL_USING_INTERNAL_PMIX], [Whether or not we are using the internal PMIx]) + AC_SUBST(OPAL_PMIX_DOCS_URL_BASE) + AC_SUBST(OPAL_USING_INTERNAL_PMIX) OPAL_SUMMARY_ADD([Miscellaneous], [pmix], [], [$opal_pmix_mode]) @@ -216,8 +220,22 @@ AC_DEFUN([_OPAL_CONFIG_PMIX_EXTERNAL], [ dnl it will screw up other tests (like the pthread tests) opal_pmix_BUILD_LIBS="${opal_pmix_LIBS}" + # If the external PMIx docs dir exists where + # a simple heuristic thinks it should be + # (i.e., the default docdir location), use + # it. This will be an absolute path, which + # is fine (because we're building against an + # external PMIx). If we don't find it, + # we'll fall back to the HTTPS internet PMIx + # docs URL. + opal_pmix_docdir="$with_pmix/share/doc/pmix/html" + AS_IF([test -d "$opal_pmix_docdir"], + [OPAL_PMIX_DOCS_URL_BASE="$opal_pmix_docdir"], + [OPAL_PMIX_DOCS_URL_BASE=$opal_external_pmix_docs_url]) + $1], - [$2])]) + [$2]) + ]) OPAL_VAR_SCOPE_POP ]) @@ -238,7 +256,7 @@ AC_DEFUN([_OPAL_CONFIG_PMIX_INTERNAL_POST], [ pmix_internal_happy=1 - dnl Don't pull LDFLAGS, because we don't have a good way to avoid + dnl Do not pull LDFLAGS, because we don't have a good way to avoid dnl a -L to our install directory, which can cause some weirdness dnl if there's an old OMPI install there. And it makes filtering dnl redundant flags easier. @@ -279,6 +297,31 @@ AC_DEFUN([_OPAL_CONFIG_PMIX_INTERNAL_POST], [ opal_pmix_BUILD_LIBS="$OMPI_TOP_BUILDDIR/3rd-party/openpmix/src/libpmix.la" + AS_IF([test -n "$SPHINX_BUILD"], + [ # If we're building the OMPI Sphinx docs, and also + # building the internal PMIx, then we're *also* + # building the internal PMIx docs. + # + # In this case, the OMPI docs/conf.py will do a + # bunch of processing that is a lot easier to do in + # Python than Bourne shell (e.g., use the convenient + # os.path.relpath() to compute the relative path + # that we need, as well as dynamically create a + # Sphinx link inventory file). Hence, we skip doing + # all that work here and just set a sentinel value + OPAL_PMIX_DOCS_URL_BASE="../../pmix/html" + AC_MSG_RESULT([found])], + [ # If we are not building the Sphinx docs, default + # to using the external PMIx docs URL. This is + # actually moot because we won't be building the + # docs, but we might as well be complete in the + # logic / cases. + OPAL_PMIX_DOCS_URL_BASE=$opal_external_pmix_docs_url + AC_MSG_RESULT([not found])]) + + AC_MSG_CHECKING([for internal PMIx docs link URL base]) + AC_MSG_RESULT([$OPAL_PMIX_DOCS_URL_BASE]) + OPAL_3RDPARTY_SUBDIRS="$OPAL_3RDPARTY_SUBDIRS openpmix" ]) diff --git a/docs/Makefile.am b/docs/Makefile.am index ca620636990..871184eb01d 100644 --- a/docs/Makefile.am +++ b/docs/Makefile.am @@ -1095,8 +1095,22 @@ $(ALL_MAN_BUILT): cp -rpf "$(OMPI_PRRTE_RST_CONTENT_DIR)" "$(builddir)"; \ copied_dir=`basename $(OMPI_PRRTE_RST_CONTENT_DIR)`; \ chmod -R u+w "$(builddir)/$$copied_dir" - $(OMPI_V_SPHINX_HTML) OMPI_TOP_SRCDIR=$(top_srcdir) $(SPHINX_BUILD) -M html "$(builddir)" "$(OUTDIR)" $(SPHINX_OPTS) - $(OMPI_V_SPHINX_MAN) OMPI_TOP_SRCDIR=$(top_srcdir) $(SPHINX_BUILD) -M man "$(builddir)" "$(OUTDIR)" $(SPHINX_OPTS) + $(OMPI_V_SPHINX_HTML) \ + OMPI_TOP_SRCDIR="$(top_srcdir)" \ + OMPI_DOCDIR="$(docdir)" \ + OPAL_PMIX_DOCS_URL_BASE="$(OPAL_PMIX_DOCS_URL_BASE)" \ + OPAL_USING_INTERNAL_PMIX="$(OPAL_USING_INTERNAL_PMIX)" \ + OMPI_PRRTE_DOCS_URL_BASE="$(OMPI_PRRTE_DOCS_URL_BASE)" \ + OMPI_USING_INTERNAL_PRRTE="$(OMPI_USING_INTERNAL_PRRTE)" \ + $(SPHINX_BUILD) -M html "$(builddir)" "$(OUTDIR)" $(SPHINX_OPTS) + $(OMPI_V_SPHINX_HTML) \ + OMPI_TOP_SRCDIR="$(top_srcdir)" \ + OMPI_DOCDIR="$(docdir)" \ + OPAL_PMIX_DOCS_URL_BASE="$(OPAL_PMIX_DOCS_URL_BASE)" \ + OPAL_USING_INTERNAL_PMIX="$(OPAL_USING_INTERNAL_PMIX)" \ + OMPI_PRRTE_DOCS_URL_BASE="$(OMPI_PRRTE_DOCS_URL_BASE)" \ + OMPI_USING_INTERNAL_PRRTE="$(OMPI_USING_INTERNAL_PRRTE)" \ + $(SPHINX_BUILD) -M man "$(builddir)" "$(OUTDIR)" $(SPHINX_OPTS) # A useful rule to invoke manually to ensure that all of the external # HTML links we have are valid. Running this rule requires @@ -1117,6 +1131,7 @@ linkcheck: clean-local: rm -rf $(OUTDIR) rm -rf prrte-rst-content schizo-ompi-rst-content + rm -rf ompi-prrte-objects.inv opal-pmix-objects.inv if test "$(srcdir)" != "$(builddir)"; then \ len=`echo "$(srcdir)/" | wc -c`; \ for file in $(RST_SOURCE_FILES) $(IMAGE_SOURCE_FILES) $(TEXT_SOURCE_FILES) $(SPHINX_CONFIG); do \ diff --git a/docs/conf.py b/docs/conf.py index e443d693fd5..b6e85216a5f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -17,6 +17,7 @@ import os import re import datetime +import sphobjinv as soi year = datetime.datetime.now().year @@ -28,9 +29,7 @@ # The docs/Makefile.am will set the env var OMPI_TOP_SRCDIR, because # we might be doing a VPATH build. -ompi_top_srcdir = '..' -if 'OMPI_TOP_SRCDIR' in os.environ: - ompi_top_srcdir = os.environ['OMPI_TOP_SRCDIR'] +ompi_top_srcdir = os.environ.get('OMPI_TOP_SRCDIR', '..') # Read an Open MPI-style VERSION file def read_version_file(path): @@ -126,8 +125,7 @@ def get_tarball_version(path, expr): # If we're building in an RTD environment for a tag or external (i.e., # PR), use the RTD version -- not what we just read from the VERSIONS # file. -key = 'READTHEDOCS' -if key in os.environ and os.environ[key] == 'True': +if os.environ.get('READTHEDOCS') == 'True': print("OMPI: found ReadTheDocs build environment") # Tell Jinja2 templates the build is running on Read the Docs @@ -178,8 +176,110 @@ def get_tarball_version(path, expr): 'recommonmark', "sphinx_rtd_theme", "sphinx.ext.extlinks", + "sphinx.ext.intersphinx", ] +########################################################################## + +# Map to external documentation: PMIx and PRRTE + +def _make_intersphinx_mapping(project, name, fallback_base, entries): + # If there is no PROJECT_NAME_DOCS_URL_BASE (e.g., in a ReadTheDocs + # build), then use the fallback_base. + key = f'{project}_{name}_DOCS_URL_BASE'.upper() + docs_url_base = os.environ.get(key, fallback_base).strip() + key = f'{project}_USING_INTERNAL_{name}'.upper() + using_internal = os.environ.get(key, '0').strip() + + if using_internal == '0': + # In this case, we're using some external URL base -- either on the + # filesystem or via https. Just use that directly. + inv_filename = None + else: + # In this case, we're using the internal (embedded) version of + # the project (e.g., PMIX or PRRTE). Two things: + # + # 1. The internal PMIX / PRRTE docs have not yet been installed. + # Hence, we have to build our own objects.inv file to use during + # this Sphinx build. + # 2. We have to use relative links because these links must work + # - in the installed tree + # - in a pre-built tarball of the docs (where we don't know + # the install prefix before building) + # NOTE: We specifically realize that these relative links won't + # work in the build tree (because the install tree and build + # tree will likely have different relative paths). We have + # decided that this is acceptable -- the installed tree is + # the more important case to get right. + + # Use the official Sphinx Object Inventory library to build + # an objects.inv file on the fly. We know we only need a few specific + # labels from the internal PMIX / PRRTE docs, so we will just + # build those specific entries. + # + # 1. Initialize an empty inventory. Since this is a fake inventory, + # the name/version values don't matter. + inv = soi.Inventory() + inv.project = f'Open MPI {project}-{name} documentation' + inv.version = '1.2.3' + + # 2. Make the single link that we need + # - name: the unique ID for the link + # - domain: either 'py' or 'std'; we want "std" for a label + # - role: 'class', 'func', 'doc', 'label', etc. + # - uri: the relative path to the HTML page + # - dispname: what shows up in the link text ('-' means same as name) + for label, uri in entries.items(): + inv.objects.append(soi.DataObjStr( + name=label, + domain='std', + role='label', + priority='-1', + uri=uri, + dispname='-' + )) + + # 3. Export to a compressed objects.inv file + text_data = inv.data_file(contract=True) + zlib_data = soi.compress(text_data) + inv_filename = f'{project}-{name}-objects.inv' + soi.writebytes(inv_filename, zlib_data) + + # 4. Finally, figure out: + # - the docdir/html for where OMPI html docs will be installed + # - the docdir/html for where this project_name HTML docs will be installed + # Then compute the relative path between them. This will be + # the URL base that we will use for intersphinx mapping. + + # OMPI HTML docdir + docdir = os.environ.get('OMPI_DOCDIR', '.') + docdir_html = os.path.join(docdir, 'html') + + # This project_name's HTML docdir + docdir_parent = os.path.dirname(docdir) + docdir_name_html = os.path.join(docdir_parent, name, 'html') + + # Compute the relative path between them + docs_url_base = os.path.relpath(docdir_name_html, start=docdir_html) + + global intersphinx_mapping + intersphinx_mapping[name] = (docs_url_base, inv_filename) + +intersphinx_mapping = {} +_make_intersphinx_mapping("opal", "pmix", "https://docs.openpmix.org/en/latest/", { + 'man1-pmix_info' : 'man/man1/pmix_info.1.html#pmix-info', + }) +_make_intersphinx_mapping("ompi", "prrte", "https://docs.prrte.org/en/latest/", { + 'man1-prte_info' : 'man/man1/ompi-prte_info.1.html#man1-prte-info', + }) + +# Sphinx defaults to automatically resolve *unresolved* labels using all your Intersphinx mappings. +# This behavior has unintended side-effects, namely that documentations local references can +# suddenly resolve to an external location. +# See also: +# https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#confval-intersphinx_disabled_reftypes +intersphinx_disabled_reftypes = ["*"] + # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/docs/requirements.txt b/docs/requirements.txt index 001c5f10adf..9fda5a43a4d 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -2,6 +2,7 @@ sphinx>=4.2.0 recommonmark docutils sphinx-rtd-theme +sphobjinv # These modules are needed for the pympistandard module when you are # running Python 3.6 (they became part of core Python in 3.7). We From 5b799a0b2d2c05888c59a33525c3fd515ab72e32 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Mon, 29 Dec 2025 08:10:10 -0500 Subject: [PATCH 10/19] docs: update pmix_info(1) and prte_info(1) links Per the prior commit, update all OMPI docs RST to properly link to PMIx and PRTE documentation. Also added a few mpirun(1) links because they were in the vicinity of the pmix_info(1) and prte_info(1) that were being updates. Signed-off-by: Jeff Squyres --- docs/launching-apps/gridengine.rst | 14 +++++++------- docs/launching-apps/lsf.rst | 8 +++++--- docs/launching-apps/pals.rst | 12 +++++++----- docs/launching-apps/tm.rst | 8 ++++---- docs/mca.rst | 4 ++-- docs/tuning-apps/networking/tcp.rst | 3 ++- 6 files changed, 27 insertions(+), 22 deletions(-) diff --git a/docs/launching-apps/gridengine.rst b/docs/launching-apps/gridengine.rst index 462cb26b61e..0fed2ea191b 100644 --- a/docs/launching-apps/gridengine.rst +++ b/docs/launching-apps/gridengine.rst @@ -17,7 +17,7 @@ Verify Grid Engine support command line switch to Open MPI's ``configure`` script. To verify if support for Grid Engine is configured into your Open MPI -installation, run ``prte_info`` as shown below and look for +installation, run :ref:`prte_info(1) ` as shown below and look for ``gridengine``. .. code-block:: @@ -30,8 +30,8 @@ installation, run ``prte_info`` as shown below and look for PMIx and PRRTE details from the end user, but this is one place that Open MPI is unable to hide the fact that PRRTE provides this functionality, not Open MPI. Hence, users need to use the - ``prte_info`` command to check for Grid Engine support (not - ``ompi_info``). + :ref:`prte_info(1) ` command to check for Grid Engine support (not + :ref:`ompi_info(1) `). Launching --------- @@ -40,7 +40,7 @@ When Grid Engine support is included, Open MPI will automatically detect when it is running inside SGE and will just "do the Right Thing." -Specifically, if you execute an ``mpirun`` command in a Grid Engine +Specifically, if you execute an :ref:`mpirun(1) ` command in a Grid Engine job, it will automatically use the Grid Engine mechanisms to launch and kill processes. There is no need to specify what nodes to run on |mdash| Open MPI will obtain this information directly from Grid @@ -231,13 +231,13 @@ Grid Engine job suspend / resume support ---------------------------------------- To suspend the job, you send a SIGTSTP (not SIGSTOP) signal to -``mpirun``. ``mpirun`` will catch this signal and forward it to the +:ref:`mpirun(1) `. :ref:`mpirun(1) ` will catch this signal and forward it to the ``mpi-hello-world`` as a SIGSTOP signal. To resume the job, you send -a SIGCONT signal to ``mpirun`` which will be caught and forwarded to +a SIGCONT signal to :ref:`mpirun(1) ` which will be caught and forwarded to the ``mpi-hello-world``. By default, this feature is not enabled. This means that both the -SIGTSTP and SIGCONT signals will simply be consumed by the ``mpirun`` +SIGTSTP and SIGCONT signals will simply be consumed by the :ref:`mpirun(1) ` process. To have them forwarded, you have to run the job with ``--mca orte_forward_job_control 1``. Here is an example on Solaris: diff --git a/docs/launching-apps/lsf.rst b/docs/launching-apps/lsf.rst index 7660ca2e2aa..159a85a84a4 100644 --- a/docs/launching-apps/lsf.rst +++ b/docs/launching-apps/lsf.rst @@ -6,7 +6,8 @@ Open MPI supports the LSF resource manager. Verify LSF support ------------------ -The ``prte_info`` command can be used to determine whether or not an +The :ref:`prte_info(1) ` +command can be used to determine whether or not an installed Open MPI includes LSF support: .. code-block:: @@ -27,8 +28,9 @@ installed. PMIx and PRRTE details from the end user, but this is one place that Open MPI is unable to hide the fact that PRRTE provides this functionality, not Open MPI. Hence, users need to use the - ``prte_info`` command to check for LSF support (not - ``ompi_info``). + :ref:`prte_info(1) ` + command to check for LSF support (not + :ref:`ompi_info(1) `). Launching --------- diff --git a/docs/launching-apps/pals.rst b/docs/launching-apps/pals.rst index 49e818acb91..aa44bfffce0 100644 --- a/docs/launching-apps/pals.rst +++ b/docs/launching-apps/pals.rst @@ -27,7 +27,8 @@ documentation :doc:`tm`. Verify PALS support ------------------- -The ``prte_info`` command can be used to determine whether or not an +The :ref:`prte_info(1) ` +command can be used to determine whether or not an installed Open MPI includes PALS support: .. code-block:: @@ -49,11 +50,12 @@ Using ``mpirun`` This section assumes there is PALS support in the PRRTE being used for the Open MPI installation. -When ``mpirun`` is launched in a PBS job, ``mpirun`` will +When :ref:`mpirun(1) ` is launched in a PBS job, +:ref:`mpirun(1) ` will automatically utilize the PALS infrastructure for launching and controlling the individual MPI processes. -.. note:: Using ``mpirun`` is the recommended method for launching Open +.. note:: Using :ref:`mpirun(1) ` is the recommended method for launching Open MPI jobs on HPE systems where PALS is available. This is primarily due to limitations in the PMIx server provided in PALS. @@ -75,7 +77,7 @@ Using PALS "direct launch" functionality ---------------------------------------- The HPE PALS 1.5.0 documentation states that it comes pre-built with PMIx support. -By default the PALS ``aprun`` launcher does not use PMIx. To use the launcher's +By default the PALS ``aprun(1)`` launcher does not use PMIx. To use the launcher's PMIx capabilities either the command line option ``--pmix=pmix`` needs to be set or the ``ALPS_PMI`` environment variable needs to be set to ``pmix``. @@ -89,4 +91,4 @@ or the ``ALPS_PMI`` environment variable needs to be set to ``pmix``. In these examples, four instances of the application are started, two instances per node. -See the PALS ``aprun`` man page for documentation on how to this command. +See the PALS ``aprun(1)`` man page for documentation on how to this command. diff --git a/docs/launching-apps/tm.rst b/docs/launching-apps/tm.rst index a19727ce983..8d3c2ebc134 100644 --- a/docs/launching-apps/tm.rst +++ b/docs/launching-apps/tm.rst @@ -7,7 +7,7 @@ managers. Verify PBS/Torque support ------------------------- -The ``prte_info`` command can be used to determine whether or not an +The :ref:`prte_info(1) ` command can be used to determine whether or not an installed Open MPI includes Torque/PBS Pro support: .. code-block:: @@ -28,8 +28,8 @@ installed. PMIx and PRRTE details from the end user, but this is one place that Open MPI is unable to hide the fact that PRRTE provides this functionality, not Open MPI. Hence, users need to use the - ``prte_info`` command to check for PBS/Torque support (not - ``ompi_info``). + :ref:`prte_info(1) ` command to check for PBS/Torque support (not + :ref:`ompi_info(1) `). Launching --------- @@ -37,7 +37,7 @@ Launching When properly configured, Open MPI obtains both the list of hosts and how many processes to start on each host from Torque / PBS Pro directly. Hence, it is unnecessary to specify the ``--hostfile``, -``--host``, or ``-n`` options to ``mpirun``. Open MPI will use +``--host``, or ``-n`` options to :ref:`mpirun(1) `. Open MPI will use PBS/Torque-native mechanisms to launch and kill processes (``ssh`` is not required). diff --git a/docs/mca.rst b/docs/mca.rst index fbaf7af2950..0c8256a4e55 100644 --- a/docs/mca.rst +++ b/docs/mca.rst @@ -530,8 +530,8 @@ Open MPI has a *large* number of MCA parameters available. Users can use the :ref:`ompi_info(1) ` command to see *all* available MCA parameters. -.. note:: Similarly, you can use the ``pmix_info(1)`` and - ``prte_info(1)`` commands to see all the MCA parameters +.. note:: Similarly, you can use the :ref:`pmix_info(1) ` and + :ref:`prte_info(1) ` commands to see all the MCA parameters available for the PMIx and PRRTE projects, respectively. The documentation for these commands are not included in the diff --git a/docs/tuning-apps/networking/tcp.rst b/docs/tuning-apps/networking/tcp.rst index e20a8e21d78..7b87546596c 100644 --- a/docs/tuning-apps/networking/tcp.rst +++ b/docs/tuning-apps/networking/tcp.rst @@ -260,7 +260,8 @@ not use specific IP networks |mdash| or not use any IP networks at all parameters that control use of network interfaces by the runtime (e.g., communications setup and coordination during :ref:`MPI_Init` and :ref:`MPI_Finalize`) using the - ``prte_info(1)`` and ``pmix_info(1)`` commands. + :ref:`prte_info(1) ` + and :ref:`pmix_info(1) ` commands. Note that the Open MPI runtime uses TCP for control messages, such as for data exchange between ``mpirun(1)`` and the MPI processes, From 319b307ca858dffdae0122c490f99b853fec2fea Mon Sep 17 00:00:00 2001 From: Jessie Yang Date: Thu, 25 Jan 2024 14:58:34 -0800 Subject: [PATCH 11/19] coll/tuned: Change the bcast default collective algorithm selection The default algorithm selections were out of date and not performing well. After gathering data using the ompi-collectives-tuning package, new default algorithm decisions are selected for bcast. Signed-off-by: Jessie Yang --- ompi/mca/coll/tuned/coll_tuned.h | 1 + .../coll/tuned/coll_tuned_decision_fixed.c | 73 +++++++++++++++++++ ompi/mca/coll/tuned/coll_tuned_module.c | 8 +- 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/ompi/mca/coll/tuned/coll_tuned.h b/ompi/mca/coll/tuned/coll_tuned.h index bb5ed3f762a..47634caec25 100644 --- a/ompi/mca/coll/tuned/coll_tuned.h +++ b/ompi/mca/coll/tuned/coll_tuned.h @@ -137,6 +137,7 @@ int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_ /* Bcast */ int ompi_coll_tuned_bcast_intra_dec_fixed(BCAST_ARGS); +int ompi_coll_tuned_bcast_intra_disjoint_dec_fixed(BCAST_ARGS); int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS); int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int algorithm, int faninout, int segsize); int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); diff --git a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c index fa31aef1860..e97993ffe10 100644 --- a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c +++ b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c @@ -654,6 +654,79 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, size_t count, alg, 0, 0); } + +/* + * bcast_intra_dec for inter node communicators + * + * Function: - selects broadcast algorithm to use + * Accepts: - same arguments as MPI_Bcast() + * Returns: - MPI_SUCCESS or error code (passed from the bcast implementation) + */ +int ompi_coll_tuned_bcast_intra_disjoint_dec_fixed(void *buff, size_t count, + struct ompi_datatype_t *datatype, int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + size_t total_dsize, dsize; + int communicator_size, alg; + communicator_size = ompi_comm_size(comm); + + ompi_datatype_type_size(datatype, &dsize); + total_dsize = dsize * (unsigned long)count; + + OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_bcast_intra_disjoint_dec_fixed" + " root %d rank %d com_size %d", + root, ompi_comm_rank(comm), communicator_size)); + + /** Algorithms: + * {1, "basic_linear"}, + * {2, "chain"}, + * {3, "pipeline"}, + * {4, "split_binary_tree"}, + * {5, "binary_tree"}, + * {6, "binomial"}, + * {7, "knomial"}, + * {8, "scatter_allgather"}, + * {9, "scatter_allgather_ring"}, + */ + if (communicator_size < 4) { + alg = 1; + } else if (communicator_size < 8) { + if (total_dsize < 1048576) { + alg = 1; + } else { + alg = 5; + } + } else if (communicator_size < 16) { + if (total_dsize < 1048576) { + alg = 1; + } else { + alg = 5; + } + } else if (communicator_size < 32) { + if (total_dsize < 262144) { + alg = 1; + } else if (total_dsize < 1048576) { + alg = 7; + } else { + alg = 5; + } + } else { + if (total_dsize < 65536) { + alg = 1; + } else if (total_dsize < 1048576) { + alg = 7; + } else { + alg = 5; + } + } + + return ompi_coll_tuned_bcast_intra_do_this (buff, count, datatype, root, + comm, module, + alg, 0, 0); +} + + /* * reduce_intra_dec * diff --git a/ompi/mca/coll/tuned/coll_tuned_module.c b/ompi/mca/coll/tuned/coll_tuned_module.c index eb4fb125380..20bb4c4a49b 100644 --- a/ompi/mca/coll/tuned/coll_tuned_module.c +++ b/ompi/mca/coll/tuned/coll_tuned_module.c @@ -100,14 +100,20 @@ ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority) /* By default stick with the fixed version of the tuned collectives. Later on, * when the module get enabled, set the correct version based on the availability * of the dynamic rules. + * For some collectives, we distinguish between disjoint communicators to make + * decision specific for inter node communication. */ + if (OMPI_COMM_IS_DISJOINT_SET(comm) && OMPI_COMM_IS_DISJOINT(comm)) { + tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_disjoint_dec_fixed; + } else { + tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_dec_fixed; + } tuned_module->super.coll_allgather = ompi_coll_tuned_allgather_intra_dec_fixed; tuned_module->super.coll_allgatherv = ompi_coll_tuned_allgatherv_intra_dec_fixed; tuned_module->super.coll_allreduce = ompi_coll_tuned_allreduce_intra_dec_fixed; tuned_module->super.coll_alltoall = ompi_coll_tuned_alltoall_intra_dec_fixed; tuned_module->super.coll_alltoallv = ompi_coll_tuned_alltoallv_intra_dec_fixed; tuned_module->super.coll_barrier = ompi_coll_tuned_barrier_intra_dec_fixed; - tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_dec_fixed; tuned_module->super.coll_gather = ompi_coll_tuned_gather_intra_dec_fixed; tuned_module->super.coll_reduce = ompi_coll_tuned_reduce_intra_dec_fixed; tuned_module->super.coll_reduce_scatter = ompi_coll_tuned_reduce_scatter_intra_dec_fixed; From fe641a7a07c37b6b8c91edaed6b663ed750b8693 Mon Sep 17 00:00:00 2001 From: Nithya V S Date: Tue, 13 Jan 2026 11:11:26 +0530 Subject: [PATCH 12/19] coll/acoll: Fixes for coverity deadcode issues Fixes the deadcode path issues from coverity in bcast and reduce. Signed-off-by: Nithya V S --- ompi/mca/coll/acoll/coll_acoll_bcast.c | 14 +++----------- ompi/mca/coll/acoll/coll_acoll_reduce.c | 2 +- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/ompi/mca/coll/acoll/coll_acoll_bcast.c b/ompi/mca/coll/acoll/coll_acoll_bcast.c index b0116fb2361..b1eaf03a17a 100644 --- a/ompi/mca/coll/acoll/coll_acoll_bcast.c +++ b/ompi/mca/coll/acoll/coll_acoll_bcast.c @@ -693,7 +693,7 @@ int mca_coll_acoll_bcast(void *buff, size_t count, struct ompi_datatype_t *datat return ompi_coll_base_bcast_intra_knomial(buff, count, datatype, root, comm, module, 0, 4); } } - if ((!subc->initialized || (root != subc->prev_init_root)) && size > 2) { + if (!subc->initialized || (root != subc->prev_init_root)) { err = mca_coll_acoll_comm_split_init(comm, acoll_module, subc, root); if (MPI_SUCCESS != err) { return err; @@ -704,13 +704,8 @@ int mca_coll_acoll_bcast(void *buff, size_t count, struct ompi_datatype_t *datat total_dsize = dsize * count; rank = ompi_comm_rank(comm); sg_cnt = acoll_module->sg_cnt; - if (size > 2) { - num_nodes = subc->num_nodes; - node_size = ompi_comm_size(subc->local_comm); - } else { - num_nodes = 1; - node_size = size; - } + num_nodes = subc->num_nodes; + node_size = ompi_comm_size(subc->local_comm); /* Use knomial for nodes 8 and above and non-large messages */ if (((num_nodes >= 8 && total_dsize <= 65536) @@ -727,9 +722,6 @@ int mca_coll_acoll_bcast(void *buff, size_t count, struct ompi_datatype_t *datat &use_numa, &use_socket, &use_shm, &lin_0, &lin_1, &lin_2, num_nodes, acoll_module, subc); no_sg = (sg_cnt == node_size) ? 1 : 0; - if (size <= 2) { - no_sg = 1; - } /* Disable shm based bcast if: */ /* - datatype is not a predefined type */ diff --git a/ompi/mca/coll/acoll/coll_acoll_reduce.c b/ompi/mca/coll/acoll/coll_acoll_reduce.c index ec0c07b6e7e..69da3cb49cf 100644 --- a/ompi/mca/coll/acoll/coll_acoll_reduce.c +++ b/ompi/mca/coll/acoll/coll_acoll_reduce.c @@ -63,7 +63,7 @@ static inline int coll_acoll_reduce_topo(const void *sbuf, void *rbuf, size_t co rank = ompi_comm_rank(comm); - int use_socket = 1; + int use_socket = (0 == acoll_module->use_socket) ? 1 : acoll_module->use_socket; tmp_sbuf = (char *) sbuf; if ((MPI_IN_PLACE == sbuf) && (rank == root)) { From 59f8e2ea7c7d07054f205bdc501a2d36e94c2924 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 13 Jan 2026 10:35:56 -0600 Subject: [PATCH 13/19] revoke: Fix null dereference, improve debug prints, comment assumptions Signed-off-by: Matthew Whitlock --- ompi/communicator/ft/comm_ft_revoke.c | 10 +++++----- ompi/mca/coll/han/coll_han_subcomms.c | 11 ++++++++--- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/ompi/communicator/ft/comm_ft_revoke.c b/ompi/communicator/ft/comm_ft_revoke.c index 81e0c7ceb98..adfadc5f9e4 100644 --- a/ompi/communicator/ft/comm_ft_revoke.c +++ b/ompi/communicator/ft/comm_ft_revoke.c @@ -55,7 +55,7 @@ int ompi_comm_revoke_internal(ompi_communicator_t* comm) OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, ompi_comm_print_cid(comm), comm->c_epoch )); /* Mark locally revoked */ - if( ompi_comm_revoke_local(comm, NULL) ) { + if( ompi_comm_revoke_local(comm, false) ) { /* Broadcast the 'revoke' signal to all other processes. */ ompi_comm_rbcast_message_t msg; msg.cid = ompi_comm_get_local_cid(comm); @@ -73,15 +73,15 @@ bool ompi_comm_revoke_local(ompi_communicator_t* comm, bool coll_only) { if( comm->comm_revoked || (coll_only && comm->coll_revoked) ) { OPAL_OUTPUT_VERBOSE((9, ompi_ftmpi_output_handle, - "%s %s: comm %s:%d is already %srevoked, nothing to do", + "%s %s: comm %s:%d is already %s revoked, nothing to do", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, ompi_comm_print_cid(comm), comm->c_epoch, - coll_only ? "coll " : "")); + coll_only ? "coll" : "fully")); return false; } OPAL_OUTPUT_VERBOSE((9, ompi_ftmpi_output_handle, - "%s %s: comm %s:%d is marked %srevoked locally", + "%s %s: comm %s:%d is marked %s revoked locally", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, ompi_comm_print_cid(comm), comm->c_epoch, - coll_only ? "coll " : "")); + coll_only ? "coll" : "fully")); /* * Locally revoke the communicator * diff --git a/ompi/mca/coll/han/coll_han_subcomms.c b/ompi/mca/coll/han/coll_han_subcomms.c index a5208f36044..9fcd65dad9b 100644 --- a/ompi/mca/coll/han/coll_han_subcomms.c +++ b/ompi/mca/coll/han/coll_han_subcomms.c @@ -418,6 +418,11 @@ int mca_coll_han_comm_create(struct ompi_communicator_t *comm, int mca_coll_han_revoke_local(ompi_communicator_t *comm, mca_coll_base_module_t *module) { + // Note that this "coll" revokes the subcomms regardless of whether the + // parent comm is "coll" revoked or "fully" revoked, so it is important + // to only use collective tags on communication in these subcomms. Else, + // one should check the impact to the overall revocation process before + // changing these to "fully" revoking the subcomms. mca_coll_han_module_t *han_module = (mca_coll_han_module_t*) module; for(int i = 0; i < NB_TOPO_LVL; i++){ if(NULL == han_module->sub_comm[i]) continue; @@ -430,9 +435,9 @@ int mca_coll_han_revoke_local(ompi_communicator_t *comm, } } if(han_module->cached_up_comms != NULL){ - for(int i = 0; i < COLL_HAN_LOW_MODULES; i++){ - if(NULL == han_module->cached_low_comms[i]) continue; - ompi_comm_revoke_local(han_module->cached_low_comms[i], true); + for(int i = 0; i < COLL_HAN_UP_MODULES; i++){ + if(NULL == han_module->cached_up_comms[i]) continue; + ompi_comm_revoke_local(han_module->cached_up_comms[i], true); } } return MPI_SUCCESS; From 1698b45640e9382dc3c1b37b1f068f7b29b83586 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 14 Jan 2026 11:54:42 -0500 Subject: [PATCH 14/19] Use #ifdef with HAVE_* defines. Signed-off-by: George Bosilca --- opal/mca/btl/tcp/btl_tcp_component.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opal/mca/btl/tcp/btl_tcp_component.c b/opal/mca/btl/tcp/btl_tcp_component.c index 9b1252e56d3..6ff668b6f8b 100644 --- a/opal/mca/btl/tcp/btl_tcp_component.c +++ b/opal/mca/btl/tcp/btl_tcp_component.c @@ -59,7 +59,7 @@ #ifdef HAVE_SYS_TIME_H # include #endif -#if HAVE_SYS_UCRED_H +#ifdef HAVE_SYS_UCRED_H # include #endif /* HAVE_SYS_UCRED_H */ #ifdef HAVE_UNISTD_H From 13280e723b52dbca5e7fdf78f95a54885e21387d Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 14 Jan 2026 11:55:03 -0500 Subject: [PATCH 15/19] Fix an #endif comment Signed-off-by: George Bosilca --- opal/mca/btl/smcuda/btl_smcuda_component.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opal/mca/btl/smcuda/btl_smcuda_component.c b/opal/mca/btl/smcuda/btl_smcuda_component.c index 8f633d6b48a..5257070d404 100644 --- a/opal/mca/btl/smcuda/btl_smcuda_component.c +++ b/opal/mca/btl/smcuda/btl_smcuda_component.c @@ -236,7 +236,7 @@ static int smcuda_register(void) if (0 == mca_btl_smcuda.super.btl_accelerator_eager_limit) { mca_btl_smcuda.super.btl_accelerator_eager_limit = SIZE_MAX; /* magic number */ } -#endif /* OPAL_CUDA_SUPPORT */ +#endif /* OPAL_CUDA_GDR_SUPPORT */ return mca_btl_smcuda_component_verify(); } From c6fc05da74115cad67e0c8694d54419003c923d4 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 14 Jan 2026 05:56:50 -0500 Subject: [PATCH 16/19] Enable ASAN for mpi4py in CI Run mpi4py with ASAN, with a separate step that aborts on errors. The existing steps should run to completion even if an error is detected. Signed-off-by: Joseph Schuchart --- .github/workflows/ompi_mpi4py.yaml | 54 +++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ompi_mpi4py.yaml b/.github/workflows/ompi_mpi4py.yaml index 29abbcaf70d..dd8cdaa68df 100644 --- a/.github/workflows/ompi_mpi4py.yaml +++ b/.github/workflows/ompi_mpi4py.yaml @@ -20,18 +20,33 @@ permissions: jobs: test: - runs-on: ubuntu-22.04 + # We need Unbuntu 24.04 (over 22.04) due to a kernel bug, + # see https://github.com/google/sanitizers/issues/856. + runs-on: ubuntu-24.04 timeout-minutes: 30 env: MPI4PY_TEST_SPAWN: true + # disable ODR violation detection until #13469 is fixed + # and don't abort on error by default + ASAN_OPTIONS: verify_asan_link_order=0,detect_odr_violation=0,abort_on_error=0 + # disable leak detection and make sure we do not fail on leaks + LSAN_OPTIONS: detect_leaks=0,exitcode=0 + steps: - name: Configure hostname run: echo 127.0.0.1 `hostname` | sudo tee -a /etc/hosts > /dev/null if: ${{ runner.os == 'Linux' || runner.os == 'macOS' }} + - name: Print kernel version + run: uname -a + + - name: Disable ASLR + run: sudo sysctl -w kernel.randomize_va_space=0 + if: ${{ runner.os == 'Linux' }} + - name: Install depencencies - run: sudo apt-get install -y -q - libnuma-dev + run: sudo apt update && sudo apt-get install -y -q + libnuma-dev libasan8 if: ${{ runner.os == 'Linux' }} - name: Checkout Open MPI @@ -59,7 +74,8 @@ jobs: --disable-oshmem --disable-silent-rules --prefix=/opt/openmpi - LDFLAGS=-Wl,-rpath,/opt/openmpi/lib + CFLAGS="-fno-omit-frame-pointer -g -O1 -fsanitize=address" + LDFLAGS="-Wl,-rpath,/opt/openmpi/lib -fsanitize=address" working-directory: mpi-build - name: Build MPI @@ -145,6 +161,35 @@ jobs: if: ${{ true }} timeout-minutes: 10 + - name: Setting up ASAN environment + # LD_PRELOAD is needed to make sure ASAN is the first thing loaded + # as it will otherwise complain + # Leak detection is currently disabled because of the size of the report. + # The patcher is disabled because ASAN fails if code mmaps data at fixed + # memory addresses, see https://github.com/open-mpi/ompi/issues/12819 + # ODR violation detection is disabled until #13469 is fixed + run: | + echo LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libasan.so.8 >> $GITHUB_ENV + echo ASAN_OPTIONS=detect_odr_violation=0,abort_on_error=1 >> $GITHUB_ENV + echo LSAN_OPTIONS=detect_leaks=0,exitcode=0 >> $GITHUB_ENV + echo OMPI_MCA_memory=^patcher >> $GITHUB_ENV + + - name: Test mpi4py ASAN (np=1) + run: mpiexec -n 1 python test/main.py -v -x TestExcErrhandlerNull + if: ${{ true }} + timeout-minutes: 10 + + - name: Test mpi4py ASAN (np=4) + run: mpiexec -n 4 python test/main.py -v -f -x TestExcErrhandlerNull + if: ${{ true }} + timeout-minutes: 10 + + - name: Disabling ASAN environment + run: | + echo LD_PRELOAD= >> $GITHUB_ENV + echo ASAN_OPTIONS=verify_asan_link_order=0,detect_odr_violation=0,abort_on_error=0 >> $GITHUB_ENV + echo LSAN_OPTIONS=detect_leaks=0,exitcode=0 >> $GITHUB_ENV + - name: Relocate Open MPI installation run: mv /opt/openmpi /opt/ompi - name: Update PATH and set OPAL_PREFIX and LD_LIBRARY_PATH @@ -157,4 +202,3 @@ jobs: run: python test/main.py -v -x TestExcErrhandlerNull if: ${{ true }} timeout-minutes: 10 - From 7f5eea7572f444906b8919a23bbd1de9db730abc Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 14 Jan 2026 16:31:25 -0500 Subject: [PATCH 17/19] Remove disable of ASLR and apt update Signed-off-by: Joseph Schuchart --- .github/workflows/ompi_mpi4py.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ompi_mpi4py.yaml b/.github/workflows/ompi_mpi4py.yaml index dd8cdaa68df..3f8b57d4736 100644 --- a/.github/workflows/ompi_mpi4py.yaml +++ b/.github/workflows/ompi_mpi4py.yaml @@ -37,15 +37,15 @@ jobs: run: echo 127.0.0.1 `hostname` | sudo tee -a /etc/hosts > /dev/null if: ${{ runner.os == 'Linux' || runner.os == 'macOS' }} - - name: Print kernel version - run: uname -a - - - name: Disable ASLR - run: sudo sysctl -w kernel.randomize_va_space=0 - if: ${{ runner.os == 'Linux' }} +# - name: Print kernel version +# run: uname -a +# +# - name: Disable ASLR +# run: sudo sysctl -w kernel.randomize_va_space=0 +# if: ${{ runner.os == 'Linux' }} - name: Install depencencies - run: sudo apt update && sudo apt-get install -y -q + run: sudo apt-get install -y -q libnuma-dev libasan8 if: ${{ runner.os == 'Linux' }} From 1af95bb437e4dc7807768019294c7913654e6a64 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 14 Jan 2026 17:04:31 -0500 Subject: [PATCH 18/19] Enable ASAN for all mpi4py tests and increase optimizations 30 minutes are not enough to run two extra tests so just enable ASAN for the existing tests. Also test `ompi_info` and `mpicc`. Signed-off-by: Joseph Schuchart --- .github/workflows/ompi_mpi4py.yaml | 52 ++++++++++++------------------ 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/.github/workflows/ompi_mpi4py.yaml b/.github/workflows/ompi_mpi4py.yaml index 3f8b57d4736..0e5939118d2 100644 --- a/.github/workflows/ompi_mpi4py.yaml +++ b/.github/workflows/ompi_mpi4py.yaml @@ -26,10 +26,9 @@ jobs: timeout-minutes: 30 env: MPI4PY_TEST_SPAWN: true - # disable ODR violation detection until #13469 is fixed - # and don't abort on error by default + # disable ASAN while building ASAN_OPTIONS: verify_asan_link_order=0,detect_odr_violation=0,abort_on_error=0 - # disable leak detection and make sure we do not fail on leaks + # disable leak detection LSAN_OPTIONS: detect_leaks=0,exitcode=0 steps: @@ -37,13 +36,6 @@ jobs: run: echo 127.0.0.1 `hostname` | sudo tee -a /etc/hosts > /dev/null if: ${{ runner.os == 'Linux' || runner.os == 'macOS' }} -# - name: Print kernel version -# run: uname -a -# -# - name: Disable ASLR -# run: sudo sysctl -w kernel.randomize_va_space=0 -# if: ${{ runner.os == 'Linux' }} - - name: Install depencencies run: sudo apt-get install -y -q libnuma-dev libasan8 @@ -74,7 +66,7 @@ jobs: --disable-oshmem --disable-silent-rules --prefix=/opt/openmpi - CFLAGS="-fno-omit-frame-pointer -g -O1 -fsanitize=address" + CFLAGS="-O2 -fno-omit-frame-pointer -g -fsanitize=address" LDFLAGS="-Wl,-rpath,/opt/openmpi/lib -fsanitize=address" working-directory: mpi-build @@ -131,6 +123,19 @@ jobs: env: CFLAGS: "-O0" + - name: Setting up ASAN environment + # LD_PRELOAD is needed to make sure ASAN is the first thing loaded + # as it will otherwise complain + # Leak detection is currently disabled because of the size of the report. + # The patcher is disabled because ASAN fails if code mmaps data at fixed + # memory addresses, see https://github.com/open-mpi/ompi/issues/12819 + # ODR violation detection is disabled until #13469 is fixed + run: | + echo LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libasan.so.8 >> $GITHUB_ENV + echo ASAN_OPTIONS=detect_odr_violation=0,abort_on_error=1 >> $GITHUB_ENV + echo LSAN_OPTIONS=detect_leaks=0,exitcode=0 >> $GITHUB_ENV + echo OMPI_MCA_memory=^patcher >> $GITHUB_ENV + - name: Test mpi4py (singleton) run: python test/main.py -v -x TestExcErrhandlerNull if: ${{ true }} @@ -161,28 +166,11 @@ jobs: if: ${{ true }} timeout-minutes: 10 - - name: Setting up ASAN environment - # LD_PRELOAD is needed to make sure ASAN is the first thing loaded - # as it will otherwise complain - # Leak detection is currently disabled because of the size of the report. - # The patcher is disabled because ASAN fails if code mmaps data at fixed - # memory addresses, see https://github.com/open-mpi/ompi/issues/12819 - # ODR violation detection is disabled until #13469 is fixed - run: | - echo LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libasan.so.8 >> $GITHUB_ENV - echo ASAN_OPTIONS=detect_odr_violation=0,abort_on_error=1 >> $GITHUB_ENV - echo LSAN_OPTIONS=detect_leaks=0,exitcode=0 >> $GITHUB_ENV - echo OMPI_MCA_memory=^patcher >> $GITHUB_ENV - - - name: Test mpi4py ASAN (np=1) - run: mpiexec -n 1 python test/main.py -v -x TestExcErrhandlerNull - if: ${{ true }} - timeout-minutes: 10 + - name: Show MPI (ASAN) + run: ompi_info - - name: Test mpi4py ASAN (np=4) - run: mpiexec -n 4 python test/main.py -v -f -x TestExcErrhandlerNull - if: ${{ true }} - timeout-minutes: 10 + - name: Show MPICC (ASAN) + run: mpicc -show - name: Disabling ASAN environment run: | From b8a4c15874f2664831cd50b1733f890b5f99a146 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 14 Jan 2026 18:16:09 -0500 Subject: [PATCH 19/19] ASAN: explicitly disable stack-use-after-return check This may reduce overhead, although according to https://github.com/google/sanitizers/wiki/addresssanitizerflags it should be disabled by default. Signed-off-by: Joseph Schuchart --- .github/workflows/ompi_mpi4py.yaml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ompi_mpi4py.yaml b/.github/workflows/ompi_mpi4py.yaml index 0e5939118d2..bce5ed4f1df 100644 --- a/.github/workflows/ompi_mpi4py.yaml +++ b/.github/workflows/ompi_mpi4py.yaml @@ -125,14 +125,16 @@ jobs: - name: Setting up ASAN environment # LD_PRELOAD is needed to make sure ASAN is the first thing loaded - # as it will otherwise complain + # as it will otherwise complain. # Leak detection is currently disabled because of the size of the report. # The patcher is disabled because ASAN fails if code mmaps data at fixed - # memory addresses, see https://github.com/open-mpi/ompi/issues/12819 + # memory addresses, see https://github.com/open-mpi/ompi/issues/12819. # ODR violation detection is disabled until #13469 is fixed + # Disabling stack use after return detection to reduce slowdown, per + # https://github.com/llvm/llvm-project/issues/64190. run: | echo LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libasan.so.8 >> $GITHUB_ENV - echo ASAN_OPTIONS=detect_odr_violation=0,abort_on_error=1 >> $GITHUB_ENV + echo ASAN_OPTIONS=detect_odr_violation=0,abort_on_error=1,detect_stack_use_after_return=0 >> $GITHUB_ENV echo LSAN_OPTIONS=detect_leaks=0,exitcode=0 >> $GITHUB_ENV echo OMPI_MCA_memory=^patcher >> $GITHUB_ENV