Skip to content

Commit

Permalink
Extract sequencer for different id / flag handling
Browse files Browse the repository at this point in the history
  • Loading branch information
GuanLuo committed Oct 18, 2023
1 parent 244619b commit 3360546
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 14 deletions.
6 changes: 4 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ set(
rate_limiter.cc
repo_agent.cc
scheduler_utils.cc
sequence_batch_scheduler.cc
sequence_batch_scheduler/sequence_batch_scheduler.cc
sequence_state.cc
server.cc
shared_library.cc
Expand Down Expand Up @@ -194,8 +194,9 @@ set(
response_allocator.h
scheduler.h
scheduler_utils.h
sequence_batch_scheduler.h
sequence_batch_scheduler/sequence_batch_scheduler.h
sequence_state.h
sequence_batch_scheduler/sequence_utils.h
server.h
server_message.h
shared_library.h
Expand Down Expand Up @@ -279,6 +280,7 @@ target_include_directories(
triton-core
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/../include
${CMAKE_CURRENT_SOURCE_DIR}
${Boost_INCLUDE_DIRS}
${Protobuf_INCLUDE_DIRS}
)
Expand Down
2 changes: 1 addition & 1 deletion src/backend_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#include "filesystem/api.h"
#include "model_config_utils.h"
#include "numa_utils.h"
#include "sequence_batch_scheduler.h"
#include "sequence_batch_scheduler/sequence_batch_scheduler.h"
#include "sequence_state.h"
#include "server.h"
#include "server_message.h"
Expand Down
1 change: 1 addition & 0 deletions src/infer_request.cc
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ void
InferenceRequest::Release(
std::unique_ptr<InferenceRequest>&& request, const uint32_t release_flags)
{
// lock needed if release passes through all callback.
// Invoke the release callbacks added internally before releasing the
// request to user provided callback.
for (auto it = request->release_callbacks_.rbegin();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,13 @@ SequenceBatchScheduler::Create(

auto& config = model->Config();

// Sequencer
if (config.sequence_batching().generative_sequence()) {
sched->sequencer_.reset(new GenerativeSequencer());
} else {
sched->sequencer_.reset(new Sequencer());
}

// Max sequence idle...
sched->max_sequence_idle_microseconds_ =
config.sequence_batching().max_sequence_idle_microseconds();
Expand Down Expand Up @@ -674,17 +681,8 @@ SequenceBatchScheduler::Enqueue(std::unique_ptr<InferenceRequest>& irequest)
"batcher");
}

// A request must have a correlation ID to be processed correctly by
// this scheduler. A value of 0 (zero) or "" (empty) indicates that the
// request doesn't have a correlation ID.
const InferenceRequest::SequenceId& correlation_id =
irequest->CorrelationId();
if (!correlation_id.InSequence()) {
return Status(
Status::Code::INVALID_ARG,
"inference request to model '" + irequest->ModelName() +
"' must specify a non-zero or non-empty correlation ID");
}
RETURN_IF_ERROR(sequencer_->SetupSequenceRequest(irequest));
const auto& correlation_id = irequest->CorrelationId();

BatcherSequenceSlot* target = nullptr;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include "scheduler.h"
#include "scheduler_utils.h"
#include "sequence_state.h"
#include "sequence_utils.h"
#include "status.h"
#include "triton/common/model_config.h"

Expand Down Expand Up @@ -279,6 +280,8 @@ class SequenceBatchScheduler : public Scheduler {
// Initial state used for implicit state.
std::unordered_map<std::string, SequenceStates::InitialStateData>
initial_state_;

std::unique_ptr<Sequencer> sequencer_;
};

// Base class for a scheduler that implements a particular scheduling
Expand Down
53 changes: 53 additions & 0 deletions src/sequence_batch_scheduler/sequence_utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#pragma once

#include <atomic>
#include <memory>

#include "infer_request.h"
#include "status.h"

namespace triton { namespace core {

class Sequencer {
public:
// Regular sequencer expects the request is well-prepared by the user when
// sending inference request.
virtual Status SetupSequenceRequest(
std::unique_ptr<InferenceRequest>& irequest)
{
// A request must have a correlation ID to be processed correctly by
// this scheduler. A value of 0 (zero) or "" (empty) indicates that the
// request doesn't have a correlation ID.
const auto& correlation_id = irequest->CorrelationId();
if (!correlation_id.InSequence()) {
return Status(
Status::Code::INVALID_ARG,
"inference request to model '" + irequest->ModelName() +
"' must specify a non-zero or non-empty correlation ID");
}
return Status::Success;
}
};

class GenerativeSequencer : public Sequencer {
public:
// Generative sequencer will prepare the request for sequence batcher if it is
// not associated with an sequence
Status SetupSequenceRequest(
std::unique_ptr<InferenceRequest>& irequest) override
{
// A request must have a correlation ID to be processed correctly by
// this scheduler. A value of 0 (zero) or "" (empty) indicates that the
// request doesn't have a correlation ID.
const auto& correlation_id = irequest->CorrelationId();
if (!correlation_id.InSequence()) {
irequest->SetCorrelationId(InferenceRequest::SequenceId(sequence_id_++));
irequest->SetFlags(TRITONSERVER_REQUEST_FLAG_SEQUENCE_START);
}
return Status::Success;
}

std::atomic<uint64_t> sequence_id_{1};
};

}} // namespace triton::core

0 comments on commit 3360546

Please sign in to comment.