From 8df0a9d2e473c0c5754214ac1d3ec100b0f33469 Mon Sep 17 00:00:00 2001 From: Jeff Noyle <jeffno@microsoft.com> Date: Wed, 30 Oct 2024 14:09:36 -0700 Subject: [PATCH] CP: PIX: Implement shader access tracking for descriptor-heap-indexed TLAS (#6950) (#6991) Turns out some drivers DO care. Specifically new hardware from a particular hardware partner. Original PR comment: Simple missing case, should have implemented it in the first place. Also noticed that from a recent change to move to raw buffer writes, the debug output UAV offset was being set to a non-dword aligned offset. No drivers/hardware seem to care, but it's a concerning thing to leave as-is. (cherry picked from commit 080aeb7199e66e4b0a2b6383fd26a9f8d2cccbf5) --- .../DxilDebugInstrumentation.cpp | 4 +- .../DxilShaderAccessTracking.cpp | 12 ++- .../pix/AccessTrackingDXRDynamicTLAS.hlsl | 76 +++++++++++++++++++ .../test/HLSLFileCheck/pix/DebugBasic.hlsl | 2 +- 4 files changed, 89 insertions(+), 5 deletions(-) create mode 100644 tools/clang/test/HLSLFileCheck/pix/AccessTrackingDXRDynamicTLAS.hlsl diff --git a/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp b/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp index 21c1199f91..a7d7e72cb4 100644 --- a/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp +++ b/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp @@ -795,12 +795,12 @@ void DxilDebugInstrumentation::determineLimitANDAndInitializeCounter( auto *PHIForCounterOffset = BC.Builder.CreatePHI(Type::getInt32Ty(BC.Ctx), 2, "PIXCounterLocation"); const uint32_t InterestingCounterOffset = - static_cast<uint32_t>(m_UAVSize / 2 - 1); + static_cast<uint32_t>(m_UAVSize / 2 - sizeof(uint32_t)); PHIForCounterOffset->addIncoming( BC.HlslOP->GetU32Const(InterestingCounterOffset), InterestingInvocationBlock); const uint32_t UninterestingCounterOffsetValue = - static_cast<uint32_t>(m_UAVSize - 1); + static_cast<uint32_t>(m_UAVSize - sizeof(uint32_t)); PHIForCounterOffset->addIncoming( BC.HlslOP->GetU32Const(UninterestingCounterOffsetValue), NonInterestingInvocationBlock); diff --git a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp index 8299ff87d2..03ab62c50b 100644 --- a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp +++ b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp @@ -980,9 +980,17 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) { case DXIL::OpCode::BufferUpdateCounter: readWrite = ShaderAccessFlags::Counter; break; - case DXIL::OpCode::TraceRay: + case DXIL::OpCode::TraceRay: { // Read of AccelerationStructure; doesn't match function attribute - // readWrite = ShaderAccessFlags::Read; // TODO: Support + auto res = GetResourceFromHandle(Call->getArgOperand(1), DM); + if (res.accessStyle == AccessStyle::None) { + continue; + } + if (EmitResourceAccess(DM, res, Call, HlslOP, Ctx, + ShaderAccessFlags::Read)) { + Modified = true; + } + } continue; case DXIL::OpCode::RayQuery_TraceRayInline: { // Read of AccelerationStructure; doesn't match function attribute diff --git a/tools/clang/test/HLSLFileCheck/pix/AccessTrackingDXRDynamicTLAS.hlsl b/tools/clang/test/HLSLFileCheck/pix/AccessTrackingDXRDynamicTLAS.hlsl new file mode 100644 index 0000000000..5e7a23d60e --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/pix/AccessTrackingDXRDynamicTLAS.hlsl @@ -0,0 +1,76 @@ +// RUN: %dxc -T lib_6_6 %s | %opt -S -hlsl-dxil-pix-shader-access-instrumentation,config=.256;512;1024. | %FileCheck %s + +// This file is checking for the correct access tracking for a descriptor-heap-indexed TLAS for TraceRay. + +// First advance through the output text to where the handle for heap index 7 is created: +// CHECK: call %dx.types.Handle @dx.op.createHandleFromHeap(i32 218, i32 7 + +// The next buffer store should be for this resource. +// See DxilShaderAccessTracking::EmitResourceAccess for how this index is calculated. +// It's the descriptor heap index (7, as seen in the HLSL below) plus 1 (to skip +// over the "out-of-bounds" entry in the output UAV) times 8 DWORDs per record +// (the first DWORD for write, the second for read), plus 4 to offset to the "read" record. +// Read access for descriptor 7 is therefore at (7+1)*8+4 = 68. +// This is then added to the base address for dynamic writes, which is 256 +// (from the config=.256 in the command-line above), for a total of 324. + +// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %[[UAV:[0-9+]]], i32 324, + +RWTexture2D<float4> RTOutput : register(u0); + +struct PayloadData +{ + uint index : INDEX; +}; + +struct AttributeData +{ + float2 barycentrics; +}; + +struct ColorConstant +{ + uint3 color; +}; + +struct AlphaConstant +{ + uint alpha; +}; + + +ConstantBuffer<ColorConstant> color : register(b0); +ConstantBuffer<AlphaConstant> alpha : register(b1); + +[shader("raygeneration")] +void RayGenMain() +{ + uint2 index = DispatchRaysIndex().xy; + uint2 dim = DispatchRaysDimensions().xy; + + PayloadData payload; + payload.index = index.y * dim.x + index.x; + + RayDesc ray; + ray.Origin.x = 2.0 * (index.x + 0.5) / dim.x - 1.0; + ray.Origin.y = 1.0 - 2.0 * (index.y + 0.5) / dim.y; + ray.Origin.z = 0.0; + ray.Direction = float3(0, 0, -1); + ray.TMin = 0.01; + ray.TMax = 100.0; + + RaytracingAccelerationStructure scene = ResourceDescriptorHeap[7]; + + TraceRay( + scene, // Acceleration structure + 0, // Ray flags + 0xFF, // Instance inclusion mask + 0, // RayContributionToHitGroupIndex + 1, // MultiplierForGeometryContributionToHitGroupIndex + 0, // MissShaderIndex + ray, + payload); + + RTOutput[index] = float4(color.color.r / 255.0f, color.color.g / 255.0f, color.color.b / 255.0f, alpha.alpha / 255.0f); +} + diff --git a/tools/clang/test/HLSLFileCheck/pix/DebugBasic.hlsl b/tools/clang/test/HLSLFileCheck/pix/DebugBasic.hlsl index 6d6cd5454d..b211e22959 100644 --- a/tools/clang/test/HLSLFileCheck/pix/DebugBasic.hlsl +++ b/tools/clang/test/HLSLFileCheck/pix/DebugBasic.hlsl @@ -16,7 +16,7 @@ // Check for branches-for-interest and AND value and counter location for a UAV size of 128 // CHECK: br i1 %ComparePos, label %PIXInterestingBlock, label %PIXNonInterestingBlock // CHECK: %PIXOffsetOr = phi i32 [ 0, %PIXInterestingBlock ], [ 64, %PIXNonInterestingBlock ] -// CHECK: %PIXCounterLocation = phi i32 [ 63, %PIXInterestingBlock ], [ 127, %PIXNonInterestingBlock ] +// CHECK: %PIXCounterLocation = phi i32 [ 60, %PIXInterestingBlock ], [ 124, %PIXNonInterestingBlock ] // Check the first block header was emitted: (increment, AND + OR) // CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle, i32 0