Skip to content

Commit

Permalink
Fix segfault for decoupled models (#327) (#328)
Browse files Browse the repository at this point in the history
* Set release flags and clean up response factory map before returning error

* Address comments

* Move the cleanup function to the outside scope

* Delete response factory when response sender goes out of scope
  • Loading branch information
krishung5 authored and mc-nv committed Dec 20, 2023
1 parent f32a352 commit 599a75e
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 10 deletions.
2 changes: 1 addition & 1 deletion src/ipc_message.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ typedef enum PYTHONSTUB_commandtype_enum {
PYTHONSTUB_AutoCompleteResponse,
PYTHONSTUB_LogRequest,
PYTHONSTUB_BLSDecoupledInferPayloadCleanup,
PYTHONSTUB_DecoupledResponseFactoryCleanup,
PYTHONSTUB_BLSDecoupledResponseFactoryCleanup,
PYTHONSTUB_MetricFamilyRequestNew,
PYTHONSTUB_MetricFamilyRequestDelete,
PYTHONSTUB_MetricRequestNew,
Expand Down
2 changes: 1 addition & 1 deletion src/pb_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -997,7 +997,7 @@ Stub::ServiceStubToParentRequests()
(utils_msg_payload->command_type ==
PYTHONSTUB_BLSDecoupledInferPayloadCleanup) ||
(utils_msg_payload->command_type ==
PYTHONSTUB_DecoupledResponseFactoryCleanup)) {
PYTHONSTUB_BLSDecoupledResponseFactoryCleanup)) {
SendCleanupId(utils_msg_payload, utils_msg_payload->command_type);
} else if (
utils_msg_payload->command_type == PYTHONSTUB_IsRequestCancelled) {
Expand Down
18 changes: 13 additions & 5 deletions src/python_be.cc
Original file line number Diff line number Diff line change
Expand Up @@ -830,8 +830,8 @@ ModelInstanceState::StubToParentMQMonitor()
break;
}
case PYTHONSTUB_BLSDecoupledInferPayloadCleanup:
case PYTHONSTUB_DecoupledResponseFactoryCleanup: {
ProcessCleanupRequest(message);
case PYTHONSTUB_BLSDecoupledResponseFactoryCleanup: {
ProcessBLSCleanupRequest(message);
break;
}
case PYTHONSTUB_IsRequestCancelled: {
Expand Down Expand Up @@ -928,9 +928,17 @@ ModelInstanceState::ProcessCleanupRequest(
Stub()->ShmPool()->Load<char>(message->Args());
CleanupMessage* cleanup_message_ptr =
reinterpret_cast<CleanupMessage*>(cleanup_request_message.data_.get());

void* id = cleanup_message_ptr->id;
infer_payload_.erase(reinterpret_cast<intptr_t>(id));
intptr_t id = reinterpret_cast<intptr_t>(cleanup_message_ptr->id);
if (message->Command() == PYTHONSTUB_BLSDecoupledInferPayloadCleanup) {
// Remove the InferPayload object from the map.
infer_payload_.erase(id);
} else if (
message->Command() == PYTHONSTUB_BLSDecoupledResponseFactoryCleanup) {
// Delete response factory
std::unique_ptr<
TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
response_factory(reinterpret_cast<TRITONBACKEND_ResponseFactory*>(id));
}

{
bi::scoped_lock<bi::interprocess_mutex> lock{*(message->ResponseMutex())};
Expand Down
5 changes: 3 additions & 2 deletions src/python_be.h
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,9 @@ class ModelInstanceState : public BackendModelInstance {
std::unique_ptr<InferResponse>* infer_response,
bi::managed_external_buffer::handle_t* response_handle);

// Process the decoupled cleanup request for InferPayload and ResponseFactory
void ProcessCleanupRequest(const std::unique_ptr<IPCMessage>& message);
// Process the bls decoupled cleanup request for InferPayload and
// ResponseFactory
void ProcessBLSCleanupRequest(const std::unique_ptr<IPCMessage>& message);

// Process request cancellation query
void ProcessIsRequestCancelled(const std::unique_ptr<IPCMessage>& message);
Expand Down
2 changes: 1 addition & 1 deletion src/response_sender.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ ResponseSender::~ResponseSender()
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
stub->EnqueueCleanupId(
reinterpret_cast<void*>(response_factory_address_),
PYTHONSTUB_DecoupledResponseFactoryCleanup);
PYTHONSTUB_BLSDecoupledResponseFactoryCleanup);
}

void
Expand Down

0 comments on commit 599a75e

Please sign in to comment.