Skip to content

Commit

Permalink
Optimizating Dive Vector usage to improve load times
Browse files Browse the repository at this point in the history
  • Loading branch information
Shan-Min Chao authored and shanminchao committed Jan 6, 2025
1 parent 615fa43 commit cdf84f0
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 12 deletions.
69 changes: 58 additions & 11 deletions dive_core/command_hierarchy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2242,6 +2242,9 @@ uint64_t CommandHierarchyCreator::GetChildCount(CommandHierarchy::TopologyType t
//--------------------------------------------------------------------------------------------------
void CommandHierarchyCreator::CreateTopologies()
{
uint64_t total_num_children[CommandHierarchy::kTopologyTypeCount] = {};
uint64_t total_num_shared_children[CommandHierarchy::kTopologyTypeCount] = {};

// A kVulkanCallTopology is a kAllEventTopology without the following:
// kDrawDispatchDmaNode, kSyncNode, kPostambleStateNode, kMarkerNode-kBarrier
auto FilterOut = [&](size_t node_index) {
Expand Down Expand Up @@ -2275,6 +2278,11 @@ void CommandHierarchyCreator::CreateTopologies()

// Go through primary children of a particular node, and only add non-ignored nodes
const DiveVector<uint64_t> &children = m_node_children[src_topology][0][node_index];

// Optionally pre-reserve the maximum size for performance reasons
// This may result in slightly more memory being used
m_node_children[dst_topology][0][node_index].reserve(children.size());

for (size_t child = 0; child < children.size(); ++child)
{
if (!FilterOut(children[child]))
Expand All @@ -2284,6 +2292,14 @@ void CommandHierarchyCreator::CreateTopologies()
// Shared children should remain the same
const DiveVector<uint64_t> &shared = m_node_children[src_topology][1][node_index];
m_node_children[CommandHierarchy::kVulkanCallTopology][1][node_index] = shared;

// Cache # of children
total_num_children[src_topology] += m_node_children[src_topology][0][node_index].size();
total_num_shared_children[src_topology] += m_node_children[src_topology][1][node_index]
.size();
total_num_children[dst_topology] += m_node_children[dst_topology][0][node_index].size();
total_num_shared_children[dst_topology] += m_node_children[dst_topology][1][node_index]
.size();
}

// A kVulkanEventTopology is a kVulkanCallTopology without non-Event Vulkan kMarkerNodes.
Expand All @@ -2302,33 +2318,48 @@ void CommandHierarchyCreator::CreateTopologies()

// Go through primary children of a particular node, and only add non-ignored nodes
const DiveVector<uint64_t> &children = m_node_children[src_topology][0][node_index];
DiveVector<uint64_t> acc_shared;

// Optionally pre-reserve the maximum size for performance reasons
// This may result in slightly more memory being used
m_node_children[dst_topology][0][node_index].reserve(children.size());

DiveVector<uint64_t> acc_shared;
for (size_t child = 0; child < children.size(); ++child)
{
// Accumulate shared packets from the child node
uint64_t child_index = children[child];
const DiveVector<uint64_t> &shared = m_node_children[src_topology][1][child_index];
for (uint32_t i = 0; i < shared.size(); ++i)
acc_shared.push_back(shared[i]);
if (!IsVulkanNonEventNode(child_index))
{
// If it isn't a Vulkan Event node or a Vulkan Non-Event node (ie. a non-Vulkan
// node, such as a normal marker node, a submit node, etc), then throw away the
// previous accumulation. For example, the beginning of a submit sometimes has a
// vkCmdBegin followed by a debug-marker. The PM4 contents of the vkCmdBegin is
// thrown away, since it isn't part of the debug-marker.
if (!IsVulkanEventNode(child_index))
acc_shared.clear();
// node, such as a normal marker node, a submit node, etc), then don't
// accumulate shared nodes. For example, the beginning of a submit sometimes has
// a vkCmdBegin followed by a debug-marker. The PM4 contents of the vkCmdBegin
// is thrown away, since it isn't part of the debug-marker.
if (IsVulkanEventNode(child_index))
{
for (uint32_t i = 0; i < shared.size(); ++i)
acc_shared.push_back(shared[i]);
}

AddChild(dst_topology, node_index, child_index);

if (acc_shared.empty())
m_node_children[dst_topology][1][child_index] = shared;
else
m_node_children[dst_topology][1][child_index] = acc_shared;
acc_shared.clear();
m_node_children[dst_topology][1][child_index] = std::move(acc_shared);
acc_shared.resize(0);
}
else
{
for (uint32_t i = 0; i < shared.size(); ++i)
acc_shared.push_back(shared[i]);
}
}
// Cache # of children
total_num_children[dst_topology] += m_node_children[dst_topology][0][node_index].size();
total_num_shared_children[dst_topology] += m_node_children[dst_topology][1][node_index]
.size();
}

// Convert the m_node_children temporary structure into CommandHierarchy's topologies
Expand All @@ -2337,6 +2368,22 @@ void CommandHierarchyCreator::CreateTopologies()
num_nodes = m_node_children[topology][0].size();
Topology &cur_topology = m_command_hierarchy_ptr->m_topology[topology];
cur_topology.SetNumNodes(num_nodes);

// Optional loop: Pre-reserve to prevent the resize() from allocating memory later
// Note: The number of children for some of the topologies have been determined
// earlier in this function already
if (total_num_children[topology] == 0 && total_num_shared_children[topology] == 0)
{
for (uint64_t node_index = 0; node_index < num_nodes; ++node_index)
{
auto &node_children = m_node_children[topology];
total_num_children[topology] += node_children[0][node_index].size();
total_num_shared_children[topology] += node_children[1][node_index].size();
}
}
cur_topology.m_children_list.reserve(total_num_children[topology]);
cur_topology.m_shared_children_list.reserve(total_num_shared_children[topology]);

for (uint64_t node_index = 0; node_index < num_nodes; ++node_index)
{
DIVE_ASSERT(m_node_children[topology][0].size() == m_node_children[topology][1].size());
Expand Down
1 change: 1 addition & 0 deletions dive_core/stl_replacement.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ template<class Type> class Vector
~Vector();
Type &operator[](uint64_t i) const;
Vector<Type> &operator=(const Vector<Type> &a);
Vector<Type> &operator=(Vector<Type> &&a);
Type *data() const;
Type &front() const;
Type &back() const;
Expand Down
21 changes: 20 additions & 1 deletion dive_core/stl_replacement.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,11 @@ Vector<Type>::Vector(uint64_t size) :
}

//--------------------------------------------------------------------------------------------------
template<class Type> Vector<Type>::Vector(std::initializer_list<Type> a)
template<class Type>
Vector<Type>::Vector(std::initializer_list<Type> a) :
m_buffer(nullptr),
m_reserved(0),
m_size(0)
{
reserve(a.size());
std::copy(a.begin(), a.end(), m_buffer);
Expand Down Expand Up @@ -99,6 +103,21 @@ template<class Type> Vector<Type> &Vector<Type>::operator=(const Vector<Type> &a
return *this;
}

//--------------------------------------------------------------------------------------------------
template<class Type> Vector<Type> &Vector<Type>::operator=(Vector<Type> &&a)
{
if (&a != this)
{
m_buffer = a.m_buffer;
m_reserved = a.m_reserved;
m_size = a.m_size;
a.m_buffer = nullptr;
a.m_reserved = 0;
a.m_size = 0;
}
return *this;
}

//--------------------------------------------------------------------------------------------------
template<class Type> Type *Vector<Type>::data() const
{
Expand Down

0 comments on commit cdf84f0

Please sign in to comment.