Skip to content

Commit

Permalink
Merge pull request open-mpi#12823 from bosilca/topic/fix_ob1_segmenta…
Browse files Browse the repository at this point in the history
…tion

Topic/fix ob1 segmentation with UCT BTL
  • Loading branch information
bosilca authored Sep 30, 2024
2 parents b7a56e9 + 58400ad commit 0e1abb6
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 29 deletions.
2 changes: 1 addition & 1 deletion ompi/mca/pml/ob1/pml_ob1_isend.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ static inline int mca_pml_ob1_send_inline (const void *buf, size_t count,
}

if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
return rc;
return rc;
}

return (int) size;
Expand Down
2 changes: 1 addition & 1 deletion opal/datatype/opal_datatype_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ struct opal_datatype_t;
# define OPAL_DATATYPE_SAFEGUARD_POINTER(ACTPTR, LENGTH, INITPTR, PDATA, COUNT) \
{ \
unsigned char *__lower_bound = (INITPTR), *__upper_bound; \
assert(((LENGTH) != 0) && ((COUNT) != 0)); \
assert( (COUNT) != 0 ); \
__lower_bound += (PDATA)->true_lb; \
__upper_bound = (INITPTR) + (PDATA)->true_ub + \
((PDATA)->ub - (PDATA)->lb) * ((COUNT) -1); \
Expand Down
7 changes: 4 additions & 3 deletions opal/datatype/opal_datatype_position.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ static inline void position_single_block(opal_convertor_t *CONVERTOR, unsigned c
}

/**
* Advance the convertors' position according. Update the pointer and the remaining space
* accordingly.
* Advance the convertors' position according to account for *COUNT elements. Update
* the pointer and the remaining space accordingly.
*/
static inline void position_predefined_data(opal_convertor_t *CONVERTOR, dt_elem_desc_t *ELEM,
size_t *COUNT, unsigned char **POINTER, size_t *SPACE)
Expand All @@ -82,7 +82,8 @@ static inline void position_predefined_data(opal_convertor_t *CONVERTOR, dt_elem

if (cando_count > *(COUNT)) {
cando_count = *(COUNT);
}
} else if( 0 == cando_count )
return;

if (1 == _elem->blocklen) {
DO_DEBUG(opal_output(0,
Expand Down
14 changes: 0 additions & 14 deletions opal/mca/btl/sm/btl_sm_send.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,18 +73,4 @@ int mca_btl_sm_send(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpo
}

return OPAL_SUCCESS;

#if 0
if (((frag->hdr->flags & MCA_BTL_SM_FLAG_SINGLE_COPY) ||
!(frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) &&
frag->base.des_cbfunc) {
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;

return OPAL_SUCCESS;
}

/* data is gone (from the pml's perspective). frag callback/release will
happen later */
return 1;
#endif
}
30 changes: 20 additions & 10 deletions opal/mca/btl/uct/btl_uct_am.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ mca_btl_base_descriptor_t *mca_btl_uct_alloc(mca_btl_base_module_t *btl,
}

static inline void _mca_btl_uct_send_pack(void *data, void *header, size_t header_size,
opal_convertor_t *convertor, size_t payload_size)
opal_convertor_t *convertor, size_t* payload_size)
{
uint32_t iov_count = 1;
struct iovec iov;
Expand All @@ -64,11 +64,9 @@ static inline void _mca_btl_uct_send_pack(void *data, void *header, size_t heade

/* pack the data into the supplied buffer */
iov.iov_base = (IOVBASE_TYPE *) ((intptr_t) data + header_size);
iov.iov_len = length = payload_size;
iov.iov_len = *payload_size;

(void) opal_convertor_pack(convertor, &iov, &iov_count, &length);

assert(length == payload_size);
(void) opal_convertor_pack(convertor, &iov, &iov_count, payload_size);
}

struct mca_btl_base_descriptor_t *mca_btl_uct_prepare_src(mca_btl_base_module_t *btl,
Expand All @@ -92,7 +90,10 @@ struct mca_btl_base_descriptor_t *mca_btl_uct_prepare_src(mca_btl_base_module_t
}

_mca_btl_uct_send_pack((void *) ((intptr_t) frag->uct_iov.buffer + reserve), NULL, 0,
convertor, *size);
convertor, size);
/* update the length of the fragment according to the convertor packed data */
frag->segments[0].seg_len = reserve + *size;
frag->uct_iov.length = frag->segments[0].seg_len;
} else {
opal_convertor_get_current_pointer(convertor, &data_ptr);
assert(NULL != data_ptr);
Expand Down Expand Up @@ -286,7 +287,7 @@ static size_t mca_btl_uct_sendi_pack(void *data, void *arg)

am_header->value = args->am_header;
_mca_btl_uct_send_pack((void *) ((intptr_t) data + 8), args->header, args->header_size,
args->convertor, args->payload_size);
args->convertor, &args->payload_size);
return args->header_size + args->payload_size + 8;
}

Expand Down Expand Up @@ -329,9 +330,18 @@ int mca_btl_uct_sendi(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpo
} else if (msg_size < (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, context->context_id)
.cap.am.max_short) {
int8_t *data = alloca(total_size);
_mca_btl_uct_send_pack(data, header, header_size, convertor, payload_size);
ucs_status = uct_ep_am_short(ep_handle, MCA_BTL_UCT_FRAG, am_header.value, data,
total_size);
size_t packed_payload_size = payload_size;
_mca_btl_uct_send_pack(data, header, header_size, convertor, &packed_payload_size);
if (packed_payload_size != payload_size) {
/* This should never happen as the packed data should go in a single pack. But
in case it does, fallback onto a descriptor allocation and let the caller
send the data.
*/
ucs_status = UCS_ERR_NO_RESOURCE;
} else {
ucs_status = uct_ep_am_short(ep_handle, MCA_BTL_UCT_FRAG, am_header.value, data,
total_size);
}
} else {
ssize_t size;

Expand Down

0 comments on commit 0e1abb6

Please sign in to comment.