From 8df0a9d2e473c0c5754214ac1d3ec100b0f33469 Mon Sep 17 00:00:00 2001
From: Jeff Noyle <jeffno@microsoft.com>
Date: Wed, 30 Oct 2024 14:09:36 -0700
Subject: [PATCH] CP: PIX: Implement shader access tracking for
 descriptor-heap-indexed TLAS (#6950) (#6991)

Turns out some drivers DO care. Specifically new hardware from a
particular hardware partner. Original PR comment:



Simple missing case, should have implemented it in the first place. Also
noticed that from a recent change to move to raw buffer writes, the
debug output UAV offset was being set to a non-dword aligned offset. No
drivers/hardware seem to care, but it's a concerning thing to leave
as-is.

(cherry picked from commit 080aeb7199e66e4b0a2b6383fd26a9f8d2cccbf5)
---
 .../DxilDebugInstrumentation.cpp              |  4 +-
 .../DxilShaderAccessTracking.cpp              | 12 ++-
 .../pix/AccessTrackingDXRDynamicTLAS.hlsl     | 76 +++++++++++++++++++
 .../test/HLSLFileCheck/pix/DebugBasic.hlsl    |  2 +-
 4 files changed, 89 insertions(+), 5 deletions(-)
 create mode 100644 tools/clang/test/HLSLFileCheck/pix/AccessTrackingDXRDynamicTLAS.hlsl

diff --git a/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp b/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp
index 21c1199f91..a7d7e72cb4 100644
--- a/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp
+++ b/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp
@@ -795,12 +795,12 @@ void DxilDebugInstrumentation::determineLimitANDAndInitializeCounter(
   auto *PHIForCounterOffset =
       BC.Builder.CreatePHI(Type::getInt32Ty(BC.Ctx), 2, "PIXCounterLocation");
   const uint32_t InterestingCounterOffset =
-      static_cast<uint32_t>(m_UAVSize / 2 - 1);
+      static_cast<uint32_t>(m_UAVSize / 2 - sizeof(uint32_t));
   PHIForCounterOffset->addIncoming(
       BC.HlslOP->GetU32Const(InterestingCounterOffset),
       InterestingInvocationBlock);
   const uint32_t UninterestingCounterOffsetValue =
-      static_cast<uint32_t>(m_UAVSize - 1);
+      static_cast<uint32_t>(m_UAVSize - sizeof(uint32_t));
   PHIForCounterOffset->addIncoming(
       BC.HlslOP->GetU32Const(UninterestingCounterOffsetValue),
       NonInterestingInvocationBlock);
diff --git a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
index 8299ff87d2..03ab62c50b 100644
--- a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
+++ b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
@@ -980,9 +980,17 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) {
           case DXIL::OpCode::BufferUpdateCounter:
             readWrite = ShaderAccessFlags::Counter;
             break;
-          case DXIL::OpCode::TraceRay:
+          case DXIL::OpCode::TraceRay: {
             // Read of AccelerationStructure; doesn't match function attribute
-            // readWrite = ShaderAccessFlags::Read;  // TODO: Support
+            auto res = GetResourceFromHandle(Call->getArgOperand(1), DM);
+            if (res.accessStyle == AccessStyle::None) {
+              continue;
+            }
+            if (EmitResourceAccess(DM, res, Call, HlslOP, Ctx,
+                                   ShaderAccessFlags::Read)) {
+              Modified = true;
+            }
+          }
             continue;
           case DXIL::OpCode::RayQuery_TraceRayInline: {
             // Read of AccelerationStructure; doesn't match function attribute
diff --git a/tools/clang/test/HLSLFileCheck/pix/AccessTrackingDXRDynamicTLAS.hlsl b/tools/clang/test/HLSLFileCheck/pix/AccessTrackingDXRDynamicTLAS.hlsl
new file mode 100644
index 0000000000..5e7a23d60e
--- /dev/null
+++ b/tools/clang/test/HLSLFileCheck/pix/AccessTrackingDXRDynamicTLAS.hlsl
@@ -0,0 +1,76 @@
+// RUN: %dxc -T lib_6_6 %s | %opt -S -hlsl-dxil-pix-shader-access-instrumentation,config=.256;512;1024. | %FileCheck %s
+
+// This file is checking for the correct access tracking for a descriptor-heap-indexed TLAS for TraceRay.
+
+// First advance through the output text to where the handle for heap index 7 is created:
+// CHECK: call %dx.types.Handle @dx.op.createHandleFromHeap(i32 218, i32 7
+
+// The next buffer store should be for this resource.
+// See DxilShaderAccessTracking::EmitResourceAccess for how this index is calculated. 
+// It's the descriptor heap index (7, as seen in the HLSL below) plus 1 (to skip
+// over the "out-of-bounds" entry in the output UAV) times 8 DWORDs per record
+// (the first DWORD for write, the second for read), plus 4 to offset to the "read" record.
+// Read access for descriptor 7 is therefore at (7+1)*8+4 = 68.
+// This is then added to the base address for dynamic writes, which is 256
+// (from the config=.256 in the command-line above), for a total of 324.
+
+// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %[[UAV:[0-9+]]], i32 324,
+
+RWTexture2D<float4> RTOutput : register(u0);
+
+struct PayloadData
+{
+  uint index : INDEX;
+};
+
+struct AttributeData
+{
+  float2 barycentrics;
+};
+
+struct ColorConstant
+{
+  uint3 color;
+};
+
+struct AlphaConstant
+{
+  uint alpha;
+};
+
+
+ConstantBuffer<ColorConstant> color : register(b0);
+ConstantBuffer<AlphaConstant> alpha : register(b1);
+
+[shader("raygeneration")]
+void RayGenMain()
+{
+  uint2 index = DispatchRaysIndex().xy;
+  uint2 dim = DispatchRaysDimensions().xy;
+
+  PayloadData payload;
+  payload.index = index.y * dim.x + index.x;
+
+  RayDesc ray;
+  ray.Origin.x = 2.0 * (index.x + 0.5) / dim.x - 1.0;
+  ray.Origin.y = 1.0 - 2.0 * (index.y + 0.5) / dim.y;
+  ray.Origin.z = 0.0;
+  ray.Direction = float3(0, 0, -1);
+  ray.TMin = 0.01;
+  ray.TMax = 100.0;
+
+  RaytracingAccelerationStructure scene = ResourceDescriptorHeap[7];
+
+  TraceRay(
+    scene, // Acceleration structure
+    0,     // Ray flags
+    0xFF,  // Instance inclusion mask
+    0,     // RayContributionToHitGroupIndex
+    1,     // MultiplierForGeometryContributionToHitGroupIndex
+    0,     // MissShaderIndex
+    ray,
+    payload);
+
+  RTOutput[index] = float4(color.color.r / 255.0f, color.color.g / 255.0f, color.color.b / 255.0f, alpha.alpha / 255.0f);
+}
+
diff --git a/tools/clang/test/HLSLFileCheck/pix/DebugBasic.hlsl b/tools/clang/test/HLSLFileCheck/pix/DebugBasic.hlsl
index 6d6cd5454d..b211e22959 100644
--- a/tools/clang/test/HLSLFileCheck/pix/DebugBasic.hlsl
+++ b/tools/clang/test/HLSLFileCheck/pix/DebugBasic.hlsl
@@ -16,7 +16,7 @@
 // Check for branches-for-interest and AND value and counter location for a UAV size of 128
 // CHECK: br i1 %ComparePos, label %PIXInterestingBlock, label %PIXNonInterestingBlock
 // CHECK: %PIXOffsetOr = phi i32 [ 0, %PIXInterestingBlock ], [ 64, %PIXNonInterestingBlock ]
-// CHECK: %PIXCounterLocation = phi i32 [ 63, %PIXInterestingBlock ], [ 127, %PIXNonInterestingBlock ]
+// CHECK: %PIXCounterLocation = phi i32 [ 60, %PIXInterestingBlock ], [ 124, %PIXNonInterestingBlock ]
 
 // Check the first block header was emitted: (increment, AND + OR)
 // CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle, i32 0