Skip to content

Commit

Permalink
Added documentation to explain the flow better.
Browse files Browse the repository at this point in the history
  • Loading branch information
corey-derochie-amd committed Oct 23, 2024
1 parent f371a36 commit e88e0ec
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions src/misc/msccl/msccl_status.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ static vector<shared_ptr<mscclRankState>> rankStates;
static inline mscclRankState& mscclGetRankState(int rank, int rankCount = -1) {
static thread_local shared_ptr<mscclRankState> threadRankState;

// Calling code can allocate states for the number of ranks at an appropriate time.
// It is assumed that all threads will call this function simultaneously with the
// same rankCount, which would avoid race conditions later in the function.
if (rankCount > 0) {
lock_guard<mutex> lock(rankStatesMutex);
if (rankStates.size() < rankCount) {
Expand All @@ -37,20 +40,24 @@ static inline mscclRankState& mscclGetRankState(int rank, int rankCount = -1) {
}

if (rank < 0 || rank >= rankStates.size()) {
// threadRankState is used when no rank state can be returned (rank<0 or rank not in rankStates)
if (!threadRankState) {
threadRankState.reset(new mscclRankState());
}
return *threadRankState;
}

if (!rankStates[rank]) {
// When no state is yet assigned to a rank, use the current thread's threadRankState.
if (!threadRankState) {
threadRankState.reset(new mscclRankState());
}
rankStates[rank] = threadRankState;
}

if (!threadRankState) {
// Cache this rank's state in threadRankState in case this thread calls with rank<0 later.
// NOTE: When multiple ranks share a thread, only the first rank in will be used for rank<0.
threadRankState = rankStates[rank];
}

Expand Down

0 comments on commit e88e0ec

Please sign in to comment.