diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..921c1ea --- /dev/null +++ b/.clang-format @@ -0,0 +1,54 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# This file is part of the AMD Render Pipeline Shaders SDK which is +# released under the AMD INTERNAL EVALUATION LICENSE. +# +# See file LICENSE.RTF for full license details. + +BasedOnStyle: Google +IndentWidth: 4 +UseTab: Never +ColumnLimit: 120 +Language: Cpp +AccessModifierOffset: -4 +BreakBeforeBraces: Custom +BraceWrapping: + AfterCaseLabel: true + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: true + AfterStruct: true + AfterUnion: true + AfterExternBlock: false + BeforeCatch: true + BeforeElse: true + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +ConstructorInitializerAllOnOneLineOrOnePerLine : false +BreakConstructorInitializers: BeforeComma +DerivePointerAlignment: false +IndentCaseLabels: false +NamespaceIndentation: All +AlignConsecutiveAssignments: true +AlignConsecutiveDeclarations: true +AlignEscapedNewlines: Left +AlignTrailingComments: true +AlignOperands: true +AllowShortFunctionsOnASingleLine: false +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AllowShortBlocksOnASingleLine: false +ReflowComments: false +SortIncludes: false +SortUsingDeclarations: false +BinPackArguments: false +BinPackParameters: false +ExperimentalAutoDetectBinPacking: false +AllowAllParametersOfDeclarationOnNextLine: true +AlignConsecutiveMacros: true +AlignAfterOpenBracket: true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b9c8538 --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# This file is part of the AMD Render Pipeline Shaders SDK which is +# released under the AMD INTERNAL EVALUATION LICENSE. +# +# See file LICENSE.RTF for full license details. + +/bin +/lib +/out +/build +/generated_projects +/generated_projects_Arm64 +.vs/ +*.vcxproj.user +*.vcxproj.filters +*.csproj.user +*.suo +*.VC.db +*.opendb +*.db-shm +*.db-wal +*.rpsb +*.json +/docs/* +!/docs/assets +/tests/imported/tmp diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..734f8ab --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,61 @@ +variables: + PackageName: AMDRenderPipelineShaderSDK + GIT_SUBMODULE_STRATEGY: normal + +stages: + - build + - test + +build: + tags: + - windows + - amd64 + - rps + stage: build + script: + - 'del /q ".\external\catch2\scripts\updateDocumentToC.py"' + - 'cmake -S ./ -B ./build -G "Visual Studio 16 2019" -A x64 -DRpsEnableVulkanTests=OFF' + - 'cmake --build ./build --config Debug --parallel' + - 'cmake --build ./build --config Release --parallel' + artifacts: + paths: + - build + - docs + exclude: + - build/**/CMakeFiles + - build/**/*.ilk + - build/**/*.obj + +build-vs2017: + tags: + - windows + - amd64 + stage: build + script: + - 'del /q ".\external\catch2\scripts\updateDocumentToC.py"' + - 'cmake -S ./ -B ./build_vs2017 -G "Visual Studio 15 2017" -A x64 -DRpsEnableVulkanTests=ON' + - 'cmake --build ./build_vs2017 --config Debug --parallel' + - 'cmake --build ./build_vs2017 --config Release --parallel' + artifacts: + paths: + - build_vs2017 + - docs + exclude: + - build_vs2017/**/CMakeFiles + - build_vs2017/**/*.ilk + - build_vs2017/**/*.obj + +test: + tags: + - windows + - amd64 + - rps + stage: test + script: + - 'del /q ".\external\catch2\scripts\updateDocumentToC.py"' + - cd build + - 'ctest -C Release -V --output-on-failure --output-log rpsTestLog.txt --parallel 8' + artifacts: + name: "%PackageName%-%CI_COMMIT_TAG%-%CI_COMMIT_REF_NAME%-%CI_COMMIT_SHORT_SHA%" + paths: + - rpsTestLog.txt diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..06e2f45 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "external/catch2"] + path = external/catch2 + url = https://github.com/catchorg/Catch2.git +[submodule "external/imgui"] + path = external/imgui + url = https://github.com/ocornut/imgui.git diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..1baac59 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,410 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# This file is part of the AMD Render Pipeline Shaders SDK which is +# released under the AMD INTERNAL EVALUATION LICENSE. +# +# See file LICENSE.RTF for full license details. + +cmake_minimum_required(VERSION 3.12.1) + +option( RpsBuildTests "Enable unit test targets" ON ) +option( RpsBuildTools "Enable tool targets" ON ) +option( RpsEnableVulkan "Enable Vulkan backend" ON ) +option( RpsEnableImGui "Enable ImGui" ON) +option( RpsEnableDXAgilitySDK "Enable DX12 Agility SDK" OFF ) + +if ( "${CMAKE_GENERATOR_PLATFORM}" STREQUAL "" ) + project( "rps" ) +else ( ) + message( STATUS "Generator platform: ${CMAKE_GENERATOR_PLATFORM}" ) + project( "rps_${CMAKE_GENERATOR_PLATFORM}" ) +endif ( ) + +set( NugetPackagesRoot "${CMAKE_BINARY_DIR}/rps_nuget_packages" ) +set( DXAgilitySDK_VERSION_STRING "1.706.3-preview" ) +set( DXAgilitySDK_VERSION 706 ) # to set D3D12SDKVersion +set( DXAgilitySDK_INSTALL_DIR "${NugetPackagesRoot}/DXAgilitySDK.${DXAgilitySDK_VERSION_STRING}" ) +set( RpsDXAgilitySDK_DIR "${DXAgilitySDK_INSTALL_DIR}" CACHE STRING "DX12 Agility SDK directory" ) + +set( RpsRootSolutionFolder "" CACHE STRING "Root IDE solution folder" ) +set( RpsImGui_DIR "${PROJECT_SOURCE_DIR}/external/imgui" CACHE STRING "ImGui source directory" ) + +function( BuildFolderProperty RelativeFolder OutputVar ) + if ( "${RpsRootSolutionFolder}" STREQUAL "" ) + set(${OutputVar} "${RelativeFolder}" PARENT_SCOPE) + else() + set(${OutputVar} "${RpsRootSolutionFolder}/${RelativeFolder}" PARENT_SCOPE) + endif() +endfunction() + +if ( "${CMAKE_GENERATOR_PLATFORM}" STREQUAL "Arm64" ) + set( RpsEnableVulkan OFF ) +endif ( ) + +if ( RpsEnableVulkan ) + find_package( Vulkan ) +endif ( ) + +if ( NOT WIN32 ) + set ( RpsEnableDXAgilitySDK OFF ) +endif() + +# JIT only supports win64 for now +set ( RpsJITSupported FALSE ) +if ( WIN32 AND (CMAKE_SIZEOF_VOID_P EQUAL 8) ) + set ( RpsJITSupported TRUE ) +endif ( ) + +if ( RpsEnableDXAgilitySDK ) + + set( DX12AgilitySDK_INCLUDE_DIR "${RpsDXAgilitySDK_DIR}/build/native/include" ) + + # fallback. + if ( (NOT "${RpsDXAgilitySDK_DIR}" STREQUAL "${DXAgilitySDK_INSTALL_DIR}") AND (NOT EXISTS "${DX12AgilitySDK_INCLUDE_DIR}") ) + set( DX12AgilitySDK_DIR "${DXAgilitySDK_INSTALL_DIR}/build/native" ) + set( DX12AgilitySDK_INCLUDE_DIR "${DX12AgilitySDK_DIR}/include" ) + message( WARNING "Unable to find DXAgilitySDK at ${RpsDXAgilitySDK_DIR}. Falling back to ${DXAgilitySDK_INSTALL_DIR}" ) + else() + set( DX12AgilitySDK_DIR "${RpsDXAgilitySDK_DIR}/build/native" ) + endif() + + # acquire agility SDK available if not available. + if( NOT EXISTS "${DX12AgilitySDK_INCLUDE_DIR}" ) + message( STATUS "Unable to find DX12AgilitySDK_INCLUDE_DIR at ${DX12AgilitySDK_INCLUDE_DIR}" ) + message( STATUS "Attempting to install DXAgilitySDK to ${DXAgilitySDK_INSTALL_DIR}" ) + + # check internet connection. + if( WIN32 ) + set ( PingCountArg "-n" ) + else() + set ( PingCountArg "-c" ) + endif() + execute_process( + COMMAND ping www.amd.com ${PingCountArg} 2 -w 1000 + ERROR_QUIET + OUTPUT_QUIET + RESULT_VARIABLE PING_STATUS + ) + + # download agility sdk. + if ( PING_STATUS GREATER 0 ) + set ( RpsEnableDXAgilitySDK OFF ) + message( WARNING "Cannot download DXAgilitySDK as no internet connection. Unsetting RpsEnableDXAgilitySDK." ) + else() + set( DXAgilitySDK_DOWNLOAD_URL "https://www.nuget.org/api/v2/package/Microsoft.Direct3D.D3D12/${DXAgilitySDK_VERSION_STRING}" ) + message( STATUS "Downloading DXAgilitySDK from ${DXAgilitySDK_DOWNLOAD_URL} ..." ) + file( MAKE_DIRECTORY "${DXAgilitySDK_INSTALL_DIR}" ) + file( DOWNLOAD "${DXAgilitySDK_DOWNLOAD_URL}" "${DXAgilitySDK_INSTALL_DIR}.zip" ) + message( VERBOSE "Extracting ${DXAgilitySDK_INSTALL_DIR}.zip to ${DXAgilitySDK_INSTALL_DIR} ..." ) + execute_process( + COMMAND ${CMAKE_COMMAND} "-E" "tar" "xvz" "${DXAgilitySDK_INSTALL_DIR}.zip" + WORKING_DIRECTORY "${DXAgilitySDK_INSTALL_DIR}" + OUTPUT_QUIET + ERROR_VARIABLE DX12AgilitySDK_ERROR + ) + + message( VERBOSE "Cleaning ${DXAgilitySDK_INSTALL_DIR}.zip ..." ) + # cleanup temp folder + file( REMOVE_RECURSE "${DXAgilitySDK_INSTALL_DIR}.zip" ) + + if ( "${DX12AgilitySDK_ERROR}" STREQUAL "" ) + message( STATUS "Successfully installed DXAgilitySDK to ${DXAgilitySDK_INSTALL_DIR}" ) + message( STATUS "DX12AgilitySDK_INCLUDE_DIR = ${DX12AgilitySDK_INCLUDE_DIR}") + else() + set ( RpsEnableDXAgilitySDK OFF ) + message( WARNING "Unsetting RpsEnableDXAgilitySDK. DXAgilitySDK install failed with: ${DX12AgilitySDK_ERROR}" ) + endif() + endif() + else() + message( STATUS "DX12AgilitySDK_INCLUDE_DIR = ${DX12AgilitySDK_INCLUDE_DIR}" ) + endif() + +endif( ) + +set_property( GLOBAL PROPERTY USE_FOLDERS ON ) +if ( MSVC ) + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /UMBCS /WX" ) + + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-private-field" ) + endif() +elseif( CMAKE_CXX_COMPILER_ID MATCHES "Clang" ) + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-unused-variable -Wno-unused-private-field" ) +endif ( ) + +if (WIN32 AND "$ENV{CXXFLAGS}" MATCHES "-fsanitize=address" ) + message( STATUS "ASAN enabled. Disabling SEH" ) + add_definitions( -DCATCH_CONFIG_NO_WINDOWS_SEH ) +endif( ) + +if ( RpsEnableImGui AND (CMAKE_SIZEOF_VOID_P LESS 8) ) + add_definitions( -DImTextureID=ImU64 ) +endif( ) + +include(CheckIncludeFiles) + +function( CheckIncludeFilesAndAddDefinition IncludeFileName DefinitionName ) + check_include_files( ${IncludeFileName} IncludeFileFound) + if ( ${IncludeFileFound} ) + add_definitions( -D${DefinitionName} ) + endif( ) +endfunction( ) + +CheckIncludeFilesAndAddDefinition(intrin.h RPS_HAS_INTRIN_H) + +function( TryCompileFileAndAddDefinition TryCompileFileName DefinitionName ) + try_compile( TryCompileSucceeded ${CMAKE_CURRENT_BINARY_DIR}/cmake_try_compile ${CMAKE_CURRENT_SOURCE_DIR}/tools/cmake_tests/${TryCompileFileName} ) + message( STATUS ${DefinitionName} = ${TryCompileSucceeded} ) + if ( ${TryCompileSucceeded} ) + add_definitions( -D${DefinitionName} ) + endif() +endfunction( ) + +TryCompileFileAndAddDefinition( has_bitscan.c RPS_HAS_BITSCAN ) +TryCompileFileAndAddDefinition( has_builtin_clz_ctz.c RPS_HAS_BUILTIN_CLZ_CTZ ) +TryCompileFileAndAddDefinition( has_nodiscard.cpp RPS_HAS_NODISCARD ) +TryCompileFileAndAddDefinition( has_maybe_unused.cpp RPS_HAS_MAYBE_UNUSED ) +TryCompileFileAndAddDefinition( has_popcnt.c RPS_HAS_POPCNT ) +TryCompileFileAndAddDefinition( has_builtin_popcount.c RPS_HAS_BUILTIN_POPCOUNT ) + +if ( RpsBuildTests ) + enable_testing() +endif ( ) + +# Compile RPSL +function( CompileRpslDxc TargetName RpslFileName GeneratedSources OutDirPrefix ) + get_filename_component( FileNameWithoutExtension ${RpslFileName} NAME_WE ) + set( OutDirectory "${CMAKE_CURRENT_BINARY_DIR}/${TargetName}/${OutDirPrefix}/" ) + # message( STATUS "Rps-hlslc Output Dir: " ${OutDirectory} ) + # message( STATUS "Working dir: " ${OutDirectory} ) + set( OutputSource ${OutDirectory}${FileNameWithoutExtension}.rpsl.g.c ) + string( APPEND RpsCompileOpts + "$,-O0,-O3>") + add_custom_command( + OUTPUT ${OutputSource} + COMMAND "${PROJECT_SOURCE_DIR}/tools/rps_hlslc/bin/rps-hlslc.exe" "${RpslFileName}" -od "${OutDirectory}" -m ${FileNameWithoutExtension} ${RpsCompileOpts} + COMMAND ${CMAKE_COMMAND} -E echo "Compiling RPSL ${RpslFileName} : '${PROJECT_SOURCE_DIR}/tools/rps_hlslc/bin/rps-hlslc.exe ${RpslFileName} -od ${OutDirectory} -m ${FileNameWithoutExtension} ${RpsCompileOpts}'" + WORKING_DIRECTORY ${OutDirectory} + DEPENDS ${RpslFileName} "${PROJECT_SOURCE_DIR}/tools/rps_hlslc/bin/rps-hlslc.exe" "${PROJECT_SOURCE_DIR}/tools/rps_hlslc/bin/dxcompiler.dll" "${PROJECT_SOURCE_DIR}/tools/rps_hlslc/bin/llvm-cbe.exe" + VERBATIM ) + set( GeneratedSources "${GeneratedSources}" "${OutputSource}" PARENT_SCOPE ) +endfunction() + +# Copy assets +function( CopyShaders TargetName ShaderFiles SrcFolder ) + foreach( Shader ${ShaderFiles} ) + message( STATUS "Copy Shader: ${Shader} => $" ) + add_custom_command( TARGET ${TargetName} PRE_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${Shader}" "$" ) + endforeach() +endfunction() + +function( CopyDXC TargetName ) + #x64 only for now + if(CMAKE_SIZEOF_VOID_P EQUAL 8) + add_custom_command( + TARGET ${TargetName} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${PROJECT_SOURCE_DIR}/external/dxc/x64/dxil.dll + $) + add_custom_command( + TARGET ${TargetName} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${PROJECT_SOURCE_DIR}/tools/rps_hlslc/bin/dxcompiler.dll + $) + endif() +endfunction() + +function( CopyJITCompiler TargetName ) + #x64 only for now + if(CMAKE_SIZEOF_VOID_P EQUAL 8) + add_custom_command( + TARGET ${TargetName} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${PROJECT_SOURCE_DIR}/tools/rps_hlslc/bin/rps-jit.dll + $) + # TODO: For now we need rps-hlslc.exe to compile rpsl to bitcode. + # Should allow dxcompiler.dll handle this. + add_custom_command( + TARGET ${TargetName} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${PROJECT_SOURCE_DIR}/tools/rps_hlslc/bin/rps-hlslc.exe + $) + CopyDXC( ${TargetName} ) + endif() +endfunction() + +function( CopyDX12AgilitySDKBinaries TargetName ) + if (RpsEnableDXAgilitySDK) + if(CMAKE_SIZEOF_VOID_P EQUAL 8) + set( AgilitySDKPlatformName x64 ) + endif() + foreach(BinaryToCopy D3D12Core.dll D3D12SDKLayers.dll) + add_custom_command( + TARGET ${TargetName} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${DX12AgilitySDK_DIR}/bin/${AgilitySDKPlatformName}/${BinaryToCopy} + $/D3D12/${BinaryToCopy} ) + endforeach() + endif() +endfunction() + +# Adding a library module from the ./src/${ModuleName} folder +function( AddModule ModuleName ModuleFolder SrcFolder SrcInclude CompileFlags LibType LinkLibs ) + file( GLOB_RECURSE HeaderFiles + "${ModuleFolder}/*.h" + "${ModuleFolder}/*.hpp" + "${ModuleFolder}/*.inl" ) + file( GLOB_RECURSE SourceFiles + "${ModuleFolder}/*.c" + "${ModuleFolder}/*.cpp" ) + source_group( TREE "${ModuleFolder}/" FILES ${HeaderFiles} ${SourceFiles} ) + + if ( SourceFiles ) + add_library( ${ModuleName} ${LibType} ${HeaderFiles} ${SourceFiles} ) + target_include_directories( ${ModuleName} PRIVATE ${PROJECT_SOURCE_DIR}/include/ ${PROJECT_SOURCE_DIR}/src/ ${SrcInclude} ) + else ( ) + set(ModuleName ${ModuleName}.headers) + add_custom_target( ${ModuleName} SOURCES ${HeaderFiles} ) + endif ( ) + + if ( CompileFlags ) + set_target_properties( ${ModuleName} PROPERTIES COMPILE_FLAGS "${CompileFlags}") + endif ( ) + + if ( SrcFolder ) + set_target_properties( ${ModuleName} PROPERTIES FOLDER "${SrcFolder}") + endif ( ) + + if ( LinkLibs ) + target_link_libraries( ${ModuleName} PRIVATE ${LinkLibs} ) + endif ( ) + +endfunction( ) + +# Adding a library module from the ./src/${ModuleName} folder without recursion +function( AddModuleNoRecurse ModuleName ModuleFolder SrcFolder SrcInclude CompileFlags LibType ) + file( GLOB HeaderFiles + "${ModuleFolder}/*.h" + "${ModuleFolder}/*.hpp" + "${ModuleFolder}/*.inl" ) + file( GLOB SourceFiles + "${ModuleFolder}/*.c" + "${ModuleFolder}/*.cpp" ) + source_group( TREE "${ModuleFolder}/" FILES ${HeaderFiles} ${SourceFiles} ) + + if ( SourceFiles ) + add_library( ${ModuleName} ${LibType} ${HeaderFiles} ${SourceFiles} ) + target_include_directories( ${ModuleName} PRIVATE ${PROJECT_SOURCE_DIR}/include/ ${PROJECT_SOURCE_DIR}/src/ ${SrcInclude} ) + else ( ) + add_library( ${ModuleName} INTERFACE ) + add_custom_target( ${ModuleName}.headers SOURCES ${HeaderFiles} ) + target_include_directories( ${ModuleName} INTERFACE ${PROJECT_SOURCE_DIR}/src/ ) + endif ( ) + + if ( CompileFlags ) + set_target_properties( ${ModuleName} PROPERTIES COMPILE_FLAGS "${CompileFlags}") + endif ( ) + + if ( SrcFolder ) + set_target_properties( ${ModuleName} PROPERTIES FOLDER "${SrcFolder}") + endif ( ) +endfunction( ) + +if(${CMAKE_VERSION} VERSION_LESS "3.16.0") + message( WARNING "Current CMake version doesn't support VS_DPI_AWARE. Consider update to 3.16.0 or newer") +endif() + +# Adding a sample project +function( AddCppApp AppName AppFolder SrcFolder SrcSharedFolder SrcInclude CompileFlags DependenciesString ) + file( GLOB_RECURSE SourceFiles + "${SrcFolder}/*.h" + "${SrcFolder}/*.hpp" + "${SrcFolder}/*.inl" + "${SrcFolder}/*.c" + "${SrcFolder}/*.cpp" ) + source_group( TREE "${SrcFolder}/" FILES ${SourceFiles} ) + + file (GLOB_RECURSE ShaderFiles + "${SrcFolder}/*.hlsl" + "${SrcFolder}/*.glsl" + "${SrcFolder}/*.dxbc" + "${SrcFolder}/*.spv" + "${SrcFolder}/*.frag" + "${SrcFolder}/*.vert" + "${SrcFolder}/*.spv" ) + source_group( TREE "${SrcFolder}/" PREFIX "shaders" FILES ${ShaderFiles} ) + + if ( SrcSharedFolder ) + file( GLOB_RECURSE SourceSharedFiles + "${SrcSharedFolder}/*.h" ) + source_group( TREE "${SrcSharedFolder}/" PREFIX "shared" FILES ${SourceSharedFiles} ) + + if ( ${AppName} MATCHES "rpsl" ) + file( GLOB_RECURSE RpslFiles + "${SrcSharedFolder}/*.rpsl" ) + source_group( TREE "${SrcSharedFolder}/" PREFIX "rpsl" FILES ${RpslFiles} ) + endif( ) + + set( GeneratedSources "" ) + foreach( RpslFileName ${RpslFiles} ) + message( STATUS "Found Rpsl: " ${RpslFileName} ) + CompileRpslDxc( ${AppName} ${RpslFileName} "${GeneratedSources}" "Generated") + endforeach() + source_group( TREE "${CMAKE_CURRENT_BINARY_DIR}/${AppName}/Generated/" PREFIX "rps_generated" FILES ${GeneratedSources} ) + + endif( ) + + set_source_files_properties( ${ShaderFiles} PROPERTIES VS_COPY_TO_OUT_DIR "Always" VS_TOOL_OVERRIDE "Content" ) + + add_executable( ${AppName} WIN32 ${SourceFiles} ${ShaderFiles} ${RpslFiles} ${GeneratedSources} ${SourceSharedFiles} ) + + CopyShaders( ${AppName} "${ShaderFiles}" "${SrcFolder}" ) + + target_include_directories( ${AppName} PRIVATE + ${PROJECT_SOURCE_DIR}/include/ + ${PROJECT_SOURCE_DIR}/src/ + ${SrcFolder}/ + ${SrcInclude} + ) + set( DependencyList ${DependenciesString} ) + target_link_libraries( ${AppName} ${DependencyList} ) + + if ( ${CompileFlags} MATCHES "CONSOLE" ) + message(${AppName}) + set( LinkFlags /SUBSYSTEM:CONSOLE ) + endif( ) + + set_target_properties( ${AppName} PROPERTIES + VS_DEBUGGER_WORKING_DIRECTORY "${SrcFolder}" + COMPILE_FLAGS "${CompileFlags}" + FOLDER "${AppFolder}" + LINK_FLAGS "${LinkFlags}" + VS_DPI_AWARE "PerMonitor" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${AppName}") + +endfunction() + +function( AddSampleApps AppNames Variant Platform SrcInclude CompileFlags DependenciesString ) + foreach(AppName ${AppNames}) + AddCppApp( ${AppName}${Variant}_${Platform} "samples/${Platform}" ${PROJECT_SOURCE_DIR}/samples/${Platform}/${AppName} ${PROJECT_SOURCE_DIR}/samples/shared/${AppName} "${SrcInclude}" ${CompileFlags} "${DependenciesString}" ) + endforeach() +endfunction() + +set ( FullSource TRUE ) + + +# Common for full or limited source +BuildFolderProperty( "modules" ModuleFolder ) +AddModule( rps ${PROJECT_SOURCE_DIR}/include "${ModuleFolder}" "" "" INTERFACE "" ) + +add_subdirectory( src ) +add_subdirectory( external ) + +if ( RpsBuildTests ) + add_subdirectory( "${CMAKE_CURRENT_SOURCE_DIR}/tests" ) +endif ( ) + +if ( RpsBuildTools ) + add_subdirectory( tools ) +endif ( ) diff --git a/Doxyfile b/Doxyfile new file mode 100644 index 0000000..f5385a6 --- /dev/null +++ b/Doxyfile @@ -0,0 +1,219 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# This file is part of the AMD Render Pipeline Shaders SDK which is +# released under the AMD INTERNAL EVALUATION LICENSE. +# +# See file LICENSE.RTF for full license details. + +# Doxyfile 1.9.2 + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- +DOXYFILE_ENCODING = UTF-8 +PROJECT_NAME = "Render Pipeline Shaders" +PROJECT_NUMBER = +PROJECT_BRIEF = "A software stack for programming graphics applications using Render Pipeline Shaders." +PROJECT_LOGO = +OUTPUT_DIRECTORY = ./docs +CREATE_SUBDIRS = NO +ALLOW_UNICODE_NAMES = NO +OUTPUT_LANGUAGE = English +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = NO +FULL_PATH_NAMES = YES +STRIP_FROM_PATH = +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = NO +JAVADOC_BANNER = NO +QT_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = YES +PYTHON_DOCSTRING = YES +INHERIT_DOCS = YES +SEPARATE_MEMBER_PAGES = YES +TAB_SIZE = 4 +OPTIMIZE_OUTPUT_FOR_C = YES +OPTIMIZE_OUTPUT_JAVA = NO +OPTIMIZE_FOR_FORTRAN = NO +OPTIMIZE_OUTPUT_VHDL = NO +OPTIMIZE_OUTPUT_SLICE = NO +MARKDOWN_SUPPORT = YES +TOC_INCLUDE_HEADINGS = 5 +AUTOLINK_SUPPORT = YES +BUILTIN_STL_SUPPORT = NO +CPP_CLI_SUPPORT = NO +SIP_SUPPORT = NO +IDL_PROPERTY_SUPPORT = YES +DISTRIBUTE_GROUP_DOC = NO +GROUP_NESTED_COMPOUNDS = NO +SUBGROUPING = YES +INLINE_GROUPED_CLASSES = NO +INLINE_SIMPLE_STRUCTS = NO +TYPEDEF_HIDES_STRUCT = YES +LOOKUP_CACHE_SIZE = 0 +NUM_PROC_THREADS = 1 +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- +EXTRACT_ALL = YES +EXTRACT_PRIVATE = NO +EXTRACT_PRIV_VIRTUAL = NO +EXTRACT_PACKAGE = NO +EXTRACT_STATIC = YES +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_METHODS = YES +EXTRACT_ANON_NSPACES = NO +RESOLVE_UNNAMED_PARAMS = YES +HIDE_UNDOC_MEMBERS = NO +HIDE_UNDOC_CLASSES = NO +HIDE_FRIEND_COMPOUNDS = NO +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = NO +HIDE_SCOPE_NAMES = NO +HIDE_COMPOUND_REFERENCE= NO +SHOW_HEADERFILE = YES +SHOW_INCLUDE_FILES = NO +SHOW_GROUPED_MEMB_INC = NO +FORCE_LOCAL_INCLUDES = NO +INLINE_INFO = YES +SORT_MEMBER_DOCS = NO +SORT_BRIEF_DOCS = NO +SORT_MEMBERS_CTORS_1ST = NO +SORT_GROUP_NAMES = NO +SORT_BY_SCOPE_NAME = NO +STRICT_PROTO_MATCHING = NO +GENERATE_TODOLIST = NO +GENERATE_TESTLIST = NO +GENERATE_BUGLIST = NO +GENERATE_DEPRECATEDLIST= NO +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = NO +SHOW_FILES = NO +SHOW_NAMESPACES = NO +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_IF_INCOMPLETE_DOC = YES +WARN_NO_PARAMDOC = YES +WARN_AS_ERROR = NO +WARN_FORMAT = "$file:$line: $text" +WARN_LOGFILE = +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- +INPUT = ./include \ + ./tools/rps_hlslc/rpsl +EXCLUDE_PATTERNS = +EXCLUDE = +INPUT_ENCODING = UTF-8 +FILE_PATTERNS = *.h *.hpp +RECURSIVE = YES +EXCLUDE_SYMLINKS = NO +EXCLUDE_SYMBOLS = rps::details \ + rps::details::* +EXAMPLE_RECURSIVE = NO +FILTER_SOURCE_FILES = NO +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- +SOURCE_BROWSER = YES +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = NO +REFERENCES_RELATION = NO +SOURCE_TOOLTIPS = YES +USE_HTAGS = NO +VERBATIM_HEADERS = YES +CLANG_ASSISTED_PARSING = NO +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- +ALPHABETICAL_INDEX = YES +IGNORE_PREFIX = +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_COLORSTYLE_HUE = 220 +HTML_COLORSTYLE_SAT = 100 +HTML_COLORSTYLE_GAMMA = 80 +HTML_TIMESTAMP = NO +HTML_DYNAMIC_MENUS = YES +HTML_DYNAMIC_SECTIONS = NO +HTML_INDEX_NUM_ENTRIES = 100 +GENERATE_DOCSET = NO +GENERATE_HTMLHELP = NO +GENERATE_QHP = NO +GENERATE_ECLIPSEHELP = NO +DISABLE_INDEX = NO +GENERATE_TREEVIEW = YES +FULL_SIDEBAR = NO +ENUM_VALUES_PER_LINE = 4 +TREEVIEW_WIDTH = 250 +EXT_LINKS_IN_WINDOW = NO +HTML_FORMULA_FORMAT = png +FORMULA_FONTSIZE = 10 +FORMULA_TRANSPARENT = YES +USE_MATHJAX = NO +SEARCHENGINE = YES +SERVER_BASED_SEARCH = NO +EXTERNAL_SEARCH = NO +#--------------------------------------------------------------------------- +# Configuration options related to enablements +#--------------------------------------------------------------------------- +GENERATE_LATEX = NO +GENERATE_RTF = NO +GENERATE_MAN = NO +GENERATE_XML = NO +GENERATE_DOCBOOK = NO +GENERATE_AUTOGEN_DEF = NO +GENERATE_PERLMOD = NO +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = YES +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES = YES +INCLUDE_PATH = ./include +INCLUDE_FILE_PATTERNS = *h *hpp +PREDEFINED = __cplusplus \ + RPS_D3D12_RUNTIME \ + RPS_VK_RUNTIME \ + RPS_D3D12_RUNTIME +SKIP_FUNCTION_MACROS = YES +#--------------------------------------------------------------------------- +# Configuration options related to external references +#--------------------------------------------------------------------------- +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +EXTERNAL_PAGES = YES +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- +CLASS_DIAGRAMS = YES +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = NO +CLASS_GRAPH = YES +DOT_CLEANUP = YES \ No newline at end of file diff --git a/LICENSE.rtf b/LICENSE.rtf new file mode 100644 index 0000000..3a11976 --- /dev/null +++ b/LICENSE.rtf @@ -0,0 +1,3073 @@ +{\rtf1\adeflang1025\ansi\ansicpg1252\uc1\adeff0\deff0\stshfdbch0\stshfloch0\stshfhich0\stshfbi0\deflang4105\deflangfe2052\themelang1033\themelangfe2052\themelangcs0{\fonttbl{\f0\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\fbidi \fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;} +{\f2\fbidi \fmodern\fcharset0\fprq1{\*\panose 02070309020205020404}Courier New;}{\f34\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria Math;}{\f37\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0502020204030204}Calibri;} +{\f40\fbidi \fswiss\fcharset0\fprq2{\*\panose 020b0604030504040204}Verdana;}{\f44\fbidi \froman\fcharset0\fprq2{\*\panose 00000000000000000000}Cambria;}{\f45\fbidi \fswiss\fcharset0\fprq2{\*\panose 020b0604030504040204}Tahoma;} +{\flomajor\f31500\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fdbmajor\f31501\fbidi \fnil\fcharset134\fprq2{\*\panose 02010600030101010101}SimSun{\*\falt \'cb\'ce\'cc\'e5};} +{\fhimajor\f31502\fbidi \froman\fcharset0\fprq2{\*\panose 00000000000000000000}Cambria;}{\fbimajor\f31503\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;} +{\flominor\f31504\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fdbminor\f31505\fbidi \fnil\fcharset134\fprq2{\*\panose 02010600030101010101}SimSun{\*\falt \'cb\'ce\'cc\'e5};} +{\fhiminor\f31506\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0502020204030204}Calibri;}{\fbiminor\f31507\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f46\fbidi \froman\fcharset238\fprq2 Times New Roman CE;} +{\f47\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\f49\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\f50\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\f51\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);} +{\f52\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\f53\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\f54\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\f56\fbidi \fswiss\fcharset238\fprq2 Arial CE;} +{\f57\fbidi \fswiss\fcharset204\fprq2 Arial Cyr;}{\f59\fbidi \fswiss\fcharset161\fprq2 Arial Greek;}{\f60\fbidi \fswiss\fcharset162\fprq2 Arial Tur;}{\f61\fbidi \fswiss\fcharset177\fprq2 Arial (Hebrew);} +{\f62\fbidi \fswiss\fcharset178\fprq2 Arial (Arabic);}{\f63\fbidi \fswiss\fcharset186\fprq2 Arial Baltic;}{\f64\fbidi \fswiss\fcharset163\fprq2 Arial (Vietnamese);}{\f66\fbidi \fmodern\fcharset238\fprq1 Courier New CE;} +{\f67\fbidi \fmodern\fcharset204\fprq1 Courier New Cyr;}{\f69\fbidi \fmodern\fcharset161\fprq1 Courier New Greek;}{\f70\fbidi \fmodern\fcharset162\fprq1 Courier New Tur;}{\f71\fbidi \fmodern\fcharset177\fprq1 Courier New (Hebrew);} +{\f72\fbidi \fmodern\fcharset178\fprq1 Courier New (Arabic);}{\f73\fbidi \fmodern\fcharset186\fprq1 Courier New Baltic;}{\f74\fbidi \fmodern\fcharset163\fprq1 Courier New (Vietnamese);}{\f416\fbidi \fswiss\fcharset238\fprq2 Calibri CE;} +{\f417\fbidi \fswiss\fcharset204\fprq2 Calibri Cyr;}{\f419\fbidi \fswiss\fcharset161\fprq2 Calibri Greek;}{\f420\fbidi \fswiss\fcharset162\fprq2 Calibri Tur;}{\f421\fbidi \fswiss\fcharset177\fprq2 Calibri (Hebrew);} +{\f422\fbidi \fswiss\fcharset178\fprq2 Calibri (Arabic);}{\f423\fbidi \fswiss\fcharset186\fprq2 Calibri Baltic;}{\f424\fbidi \fswiss\fcharset163\fprq2 Calibri (Vietnamese);}{\f446\fbidi \fswiss\fcharset238\fprq2 Verdana CE;} +{\f447\fbidi \fswiss\fcharset204\fprq2 Verdana Cyr;}{\f449\fbidi \fswiss\fcharset161\fprq2 Verdana Greek;}{\f450\fbidi \fswiss\fcharset162\fprq2 Verdana Tur;}{\f453\fbidi \fswiss\fcharset186\fprq2 Verdana Baltic;} +{\f454\fbidi \fswiss\fcharset163\fprq2 Verdana (Vietnamese);}{\f486\fbidi \froman\fcharset238\fprq2 Cambria CE;}{\f487\fbidi \froman\fcharset204\fprq2 Cambria Cyr;}{\f489\fbidi \froman\fcharset161\fprq2 Cambria Greek;} +{\f490\fbidi \froman\fcharset162\fprq2 Cambria Tur;}{\f493\fbidi \froman\fcharset186\fprq2 Cambria Baltic;}{\f494\fbidi \froman\fcharset163\fprq2 Cambria (Vietnamese);}{\f496\fbidi \fswiss\fcharset238\fprq2 Tahoma CE;} +{\f497\fbidi \fswiss\fcharset204\fprq2 Tahoma Cyr;}{\f499\fbidi \fswiss\fcharset161\fprq2 Tahoma Greek;}{\f500\fbidi \fswiss\fcharset162\fprq2 Tahoma Tur;}{\f501\fbidi \fswiss\fcharset177\fprq2 Tahoma (Hebrew);} +{\f502\fbidi \fswiss\fcharset178\fprq2 Tahoma (Arabic);}{\f503\fbidi \fswiss\fcharset186\fprq2 Tahoma Baltic;}{\f504\fbidi \fswiss\fcharset163\fprq2 Tahoma (Vietnamese);}{\f505\fbidi \fswiss\fcharset222\fprq2 Tahoma (Thai);} +{\flomajor\f31508\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flomajor\f31509\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\flomajor\f31511\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;} +{\flomajor\f31512\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flomajor\f31513\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\flomajor\f31514\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);} +{\flomajor\f31515\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flomajor\f31516\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fdbmajor\f31520\fbidi \fnil\fcharset0\fprq2 SimSun Western{\*\falt \'cb\'ce\'cc\'e5};} +{\fhimajor\f31528\fbidi \froman\fcharset238\fprq2 Cambria CE;}{\fhimajor\f31529\fbidi \froman\fcharset204\fprq2 Cambria Cyr;}{\fhimajor\f31531\fbidi \froman\fcharset161\fprq2 Cambria Greek;}{\fhimajor\f31532\fbidi \froman\fcharset162\fprq2 Cambria Tur;} +{\fhimajor\f31535\fbidi \froman\fcharset186\fprq2 Cambria Baltic;}{\fhimajor\f31536\fbidi \froman\fcharset163\fprq2 Cambria (Vietnamese);}{\fbimajor\f31538\fbidi \froman\fcharset238\fprq2 Times New Roman CE;} +{\fbimajor\f31539\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fbimajor\f31541\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbimajor\f31542\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;} +{\fbimajor\f31543\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fbimajor\f31544\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbimajor\f31545\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;} +{\fbimajor\f31546\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\flominor\f31548\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flominor\f31549\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;} +{\flominor\f31551\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\flominor\f31552\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flominor\f31553\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);} +{\flominor\f31554\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\flominor\f31555\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flominor\f31556\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);} +{\fdbminor\f31560\fbidi \fnil\fcharset0\fprq2 SimSun Western{\*\falt \'cb\'ce\'cc\'e5};}{\fhiminor\f31568\fbidi \fswiss\fcharset238\fprq2 Calibri CE;}{\fhiminor\f31569\fbidi \fswiss\fcharset204\fprq2 Calibri Cyr;} +{\fhiminor\f31571\fbidi \fswiss\fcharset161\fprq2 Calibri Greek;}{\fhiminor\f31572\fbidi \fswiss\fcharset162\fprq2 Calibri Tur;}{\fhiminor\f31573\fbidi \fswiss\fcharset177\fprq2 Calibri (Hebrew);} +{\fhiminor\f31574\fbidi \fswiss\fcharset178\fprq2 Calibri (Arabic);}{\fhiminor\f31575\fbidi \fswiss\fcharset186\fprq2 Calibri Baltic;}{\fhiminor\f31576\fbidi \fswiss\fcharset163\fprq2 Calibri (Vietnamese);} +{\fbiminor\f31578\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fbiminor\f31579\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fbiminor\f31581\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;} +{\fbiminor\f31582\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fbiminor\f31583\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fbiminor\f31584\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);} +{\fbiminor\f31585\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fbiminor\f31586\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0; +\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128; +\red192\green192\blue192;\red0\green0\blue0;\red0\green0\blue0;}{\*\defchp \lang1033\langfe1033\langnp1033\langfenp1033 }{\*\defpap \ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 }\noqfpromote {\stylesheet{ +\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \snext0 \sqformat \spriority0 \styrsid686994 Normal;}{ +\s1\qc \li0\ri0\keepn\widctlpar\wrapdefault\aspalpha\aspnum\faauto\outlinelevel0\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \b\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +\sbasedon0 \snext0 \sqformat \spriority0 \styrsid5135223 heading 1;}{\s2\ql \li0\ri0\keepn\widctlpar\wrapdefault\aspalpha\aspnum\faauto\outlinelevel1\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \ab\af0\afs20\alang1025 \ltrch\fcs0 +\b\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \sqformat \spriority0 \styrsid5135223 heading 2;}{\s3\ql \li0\ri0\sb240\sa60\keepn\widctlpar\wrapdefault\aspalpha\aspnum\faauto\outlinelevel2\adjustright\rin0\lin0\itap0 +\rtlch\fcs1 \ab\af0\afs26\alang1025 \ltrch\fcs0 \b\f44\fs26\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \slink21 \sunhideused \sqformat \spriority0 \styrsid16649630 heading 3;}{ +\s8\ql \li0\ri0\sb240\sa60\widctlpar\wrapdefault\aspalpha\aspnum\faauto\outlinelevel7\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \ai\af0\afs24\alang1025 \ltrch\fcs0 \i\f37\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +\sbasedon0 \snext0 \slink20 \ssemihidden \sunhideused \sqformat \spriority0 \styrsid686994 heading 8;}{\*\cs10 \additive \ssemihidden \sunhideused \spriority1 Default Paragraph Font;}{\* +\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\trcbpat1\trcfpat1\tblind0\tblindtype3\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv +\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \snext11 \ssemihidden \sunhideused Normal Table;}{ +\s15\ql \li0\ri0\widctlpar\tqc\tx4320\tqr\tx8640\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +\sbasedon0 \snext15 \spriority0 \styrsid5135223 footer;}{\*\cs16 \additive \rtlch\fcs1 \af0 \ltrch\fcs0 \sbasedon10 \spriority0 \styrsid5135223 page number;}{\s17\ql \li0\ri0\widctlpar +\tqc\tx4320\tqr\tx8640\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext17 \spriority0 \styrsid5135223 header;}{ +\s18\qc \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \b\f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext18 \spriority0 \styrsid5135223 Body Text;} +{\s19\qj \fi720\li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext19 \spriority0 \styrsid5135223 +Body Text Indent;}{\*\cs20 \additive \rtlch\fcs1 \ai\af0\afs24 \ltrch\fcs0 \i\f37\fs24 \sbasedon10 \slink8 \slocked \ssemihidden \spriority0 \styrsid686994 Heading 8 Char;}{\*\cs21 \additive \rtlch\fcs1 \ab\af0\afs26 \ltrch\fcs0 \b\f44\fs26 +\sbasedon10 \slink3 \slocked \spriority0 \styrsid16649630 Heading 3 Char;}{\*\ts22\tsrowd\trbrdrt\brdrs\brdrw10\brdrcf1 \trbrdrl\brdrs\brdrw10\brdrcf1 \trbrdrb\brdrs\brdrw10\brdrcf1 \trbrdrr\brdrs\brdrw10\brdrcf1 \trbrdrh\brdrs\brdrw10\brdrcf1 \trbrdrv +\brdrs\brdrw10\brdrcf1 \trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\trcbpat1\trcfpat1\tblind0\tblindtype3\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv +\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon11 \snext22 \spriority0 \styrsid11231210 Table Grid;}{\* +\cs23 \additive \rtlch\fcs1 \af0\afs16 \ltrch\fcs0 \fs16 \sbasedon10 \spriority0 \styrsid4410928 annotation reference;}{\s24\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 +\f1\fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext24 \slink25 \spriority0 \styrsid4410928 annotation text;}{\*\cs25 \additive \rtlch\fcs1 \af0 \ltrch\fcs0 \f1 \sbasedon10 \slink24 \slocked \spriority0 \styrsid4410928 +Comment Text Char;}{\s26\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \ab\af0\afs20\alang1025 \ltrch\fcs0 \b\f1\fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +\sbasedon24 \snext24 \slink27 \spriority0 \styrsid4410928 annotation subject;}{\*\cs27 \additive \rtlch\fcs1 \ab\af0 \ltrch\fcs0 \b\f1 \sbasedon25 \slink26 \slocked \spriority0 \styrsid4410928 Comment Subject Char;}{ +\s28\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af45\afs16\alang1025 \ltrch\fcs0 \f45\fs16\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext28 \slink29 \spriority0 \styrsid4410928 +Balloon Text;}{\*\cs29 \additive \rtlch\fcs1 \af45\afs16 \ltrch\fcs0 \f45\fs16 \sbasedon10 \slink28 \slocked \spriority0 \styrsid4410928 Balloon Text Char;}{\s30\ql \li0\ri0\nowidctlpar\wrapdefault\faauto\rin0\lin0\itap0 \rtlch\fcs1 \af40\afs24\alang1025 +\ltrch\fcs0 \f40\fs24\cf1\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \snext30 \spriority0 \styrsid8922804 Default;}{\*\cs31 \additive \ul\cf2 \spriority0 \styrsid8922804 Hyperlink;}{ +\s32\ql \li0\ri0\nowidctlpar\wrapdefault\hyphpar0\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f2\fs20\lang1033\langfe1025\cgrid\langnp1033\langfenp1025 \sbasedon0 \snext32 \styrsid13713571 +WW-Plain Text;}{\s33\ql \li720\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin720\itap0\contextualspace \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +\sbasedon0 \snext33 \sqformat \spriority34 \styrsid13713571 List Paragraph;}{\s34\ql \li0\ri0\sb100\sa100\sbauto1\saauto1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 +\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext34 \styrsid12284823 Normal (Web);}{\s35\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 +\f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \snext35 \shidden \ssemihidden \styrsid14361568 Revision;}}{\*\listtable{\list\listtemplateid-962804864{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0 +\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \b0\i0\f1\fs18\expnd0\expndtw0\up0\charscalex100\fbias0 \fi720\li0\lin0 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal +\levelspace0\levelindent0{\leveltext\'03\'00.\'01;}{\levelnumbers\'01\'03;}\rtlch\fcs1 \af0 \ltrch\fcs0 \b0\i0\f1\fs18\expnd0\expndtw0\up0\charscalex100\fbias0 \fi1440\li0\jclisttab\tx1530\lin0 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0 +\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext\'05\'00.\'01.\'02;}{\levelnumbers\'01\'03\'05;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-720\li2160\jclisttab\tx2160\lin2160 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0 +\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext\'07\'00.\'01.\'02.\'03;}{\levelnumbers\'01\'03\'05\'07;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-720\li2520\jclisttab\tx2520\lin2520 }{\listlevel\levelnfc0\levelnfcn0\leveljc0 +\leveljcn0\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext\'09\'00.\'01.\'02.\'03.\'04;}{\levelnumbers\'01\'03\'05\'07\'09;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-720\li2880\jclisttab\tx2880\lin2880 }{\listlevel\levelnfc0 +\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext\'0b\'00.\'01.\'02.\'03.\'04.\'05;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-1080\li3600 +\jclisttab\tx3600\lin3600 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext\'0d\'00.\'01.\'02.\'03.\'04.\'05.\'06;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\rtlch\fcs1 \af0 +\ltrch\fcs0 \fbias0 \fi-1080\li3960\jclisttab\tx3960\lin3960 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext\'0f\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07;}{\levelnumbers +\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-1440\li4680\jclisttab\tx4680\lin4680 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext +\'11\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-1440\li5040\jclisttab\tx5040\lin5040 }{\listname ;}\listid50619426}{\list\listtemplateid67698719{\listlevel +\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li360\lin360 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0 +\levelstartat1\levelspace0\levelindent0{\leveltext\'04\'00.\'01.;}{\levelnumbers\'01\'03;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-432\li792\lin792 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext +\'06\'00.\'01.\'02.;}{\levelnumbers\'01\'03\'05;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-504\li1224\lin1224 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext +\'08\'00.\'01.\'02.\'03.;}{\levelnumbers\'01\'03\'05\'07;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-648\li1728\lin1728 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext +\'0a\'00.\'01.\'02.\'03.\'04.;}{\levelnumbers\'01\'03\'05\'07\'09;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-792\li2232\lin2232 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext +\'0c\'00.\'01.\'02.\'03.\'04.\'05.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-936\li2736\lin2736 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext +\'0e\'00.\'01.\'02.\'03.\'04.\'05.\'06.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-1080\li3240\lin3240 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext +\'10\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-1224\li3744\lin3744 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0 +{\leveltext\'12\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08.;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-1440\li4320\lin4320 }{\listname ;}\listid102699155}{\list\listtemplateid-302462038\listsimple{\listlevel +\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \b0\i0\fbias0 \fi-360\li360\jclisttab\tx360\lin360 }{\listname ;}\listid361634945} +{\list\listtemplateid-283865918\listhybrid{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid67698711\'02\'00);}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 +\fi-360\li720\lin720 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713\'02\'01.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 +\fi-360\li1440\lin1440 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698715\'02\'02.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 +\fi-180\li2160\lin2160 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698703\'02\'03.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 +\fi-360\li2880\lin2880 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713\'02\'04.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 +\fi-360\li3600\lin3600 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698715\'02\'05.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 +\fi-180\li4320\lin4320 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698703\'02\'06.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 +\fi-360\li5040\lin5040 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713\'02\'07.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 +\fi-360\li5760\lin5760 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698715\'02\'08.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 +\fi-180\li6480\lin6480 }{\listname ;}\listid378865547}{\list\listtemplateid-485841040{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 +\ltrch\fcs0 \b\i0\f40\fs18\expnd0\expndtw0\up0\charscalex100\fbias0 \fi0\li0\lin0 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext\'03\'00.\'01;}{\levelnumbers\'01\'03;} +\rtlch\fcs1 \af0 \ltrch\fcs0 \b\i0\f40\fs18\expnd0\expndtw0\up0\charscalex100\fbias0 \fi720\li0\jclisttab\tx-31680\lin0 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext +\'05\'00.\'01.\'02;}{\levelnumbers\'01\'03\'05;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-720\li2160\jclisttab\tx2160\lin2160 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext +\'07\'00.\'01.\'02.\'03;}{\levelnumbers\'01\'03\'05\'07;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-720\li2520\jclisttab\tx2520\lin2520 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0 +{\leveltext\'09\'00.\'01.\'02.\'03.\'04;}{\levelnumbers\'01\'03\'05\'07\'09;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-720\li2880\jclisttab\tx2880\lin2880 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal +\levelspace0\levelindent0{\leveltext\'0b\'00.\'01.\'02.\'03.\'04.\'05;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-1080\li3600\jclisttab\tx3600\lin3600 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0 +\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext\'0d\'00.\'01.\'02.\'03.\'04.\'05.\'06;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-1080\li3960\jclisttab\tx3960\lin3960 }{\listlevel\levelnfc0 +\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext\'0f\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-1440\li4680 +\jclisttab\tx4680\lin4680 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext\'11\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;} +\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-1440\li5040\jclisttab\tx5040\lin5040 }{\listname ;}\listid453905382}{\list\listtemplateid554217218\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0 +{\leveltext\leveltemplateid1285320390\'02\'00.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-720\li1080\lin1080 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0 +{\leveltext\leveltemplateid67698713\'02\'01.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li1440\lin1440 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'02.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li2160\lin2160 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698703\'02\'03.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li2880\lin2880 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698713\'02\'04.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li3600\lin3600 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'05.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li4320\lin4320 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698703\'02\'06.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li5040\lin5040 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698713\'02\'07.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li5760\lin5760 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'08.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li6480\lin6480 }{\listname ;}\listid497770244}{\list\listtemplateid-171024214\listhybrid{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0 +\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid-337448548\'02\'00);}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \b\fbias0 \fi-360\li1440\lin1440 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1 +\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713\'02\'01.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li2160\lin2160 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative +\levelspace0\levelindent0{\leveltext\leveltemplateid67698715\'02\'02.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li2880\lin2880 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0 +\levelindent0{\leveltext\leveltemplateid67698703\'02\'03.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li3600\lin3600 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0 +{\leveltext\leveltemplateid67698713\'02\'04.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li4320\lin4320 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'05.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li5040\lin5040 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698703\'02\'06.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li5760\lin5760 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698713\'02\'07.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li6480\lin6480 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'08.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li7200\lin7200 }{\listname ;}\listid542517863}{\list\listtemplateid1612480300\listhybrid{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0 +\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid649487386\'02\'00);}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \b0\fbias0 \fi-360\li720\lin720 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1 +\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713\'02\'01.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li1440\lin1440 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative +\levelspace0\levelindent0{\leveltext\leveltemplateid67698715\'02\'02.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li2160\lin2160 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0 +\levelindent0{\leveltext\leveltemplateid67698703\'02\'03.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li2880\lin2880 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0 +{\leveltext\leveltemplateid67698713\'02\'04.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li3600\lin3600 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'05.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li4320\lin4320 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698703\'02\'06.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li5040\lin5040 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698713\'02\'07.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li5760\lin5760 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'08.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li6480\lin6480 }{\listname ;}\listid542669416}{\list\listtemplateid-1680186868\listhybrid{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0 +\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid-1938120284\'03(\'00);}{\levelnumbers\'02;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-360\li1080\lin1080 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1 +\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713\'02\'01.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li1800\lin1800 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative +\levelspace0\levelindent0{\leveltext\leveltemplateid67698715\'02\'02.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li2520\lin2520 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0 +\levelindent0{\leveltext\leveltemplateid67698703\'02\'03.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li3240\lin3240 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0 +{\leveltext\leveltemplateid67698713\'02\'04.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li3960\lin3960 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'05.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li4680\lin4680 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698703\'02\'06.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li5400\lin5400 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698713\'02\'07.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li6120\lin6120 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'08.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li6840\lin6840 }{\listname ;}\listid589702026}{\list\listtemplateid2046570720\listhybrid{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0 +\levelstartat3\levelspace0\levelindent0{\leveltext\leveltemplateid67698711\'02\'00);}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-360\li720\lin720 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative +\levelspace0\levelindent0{\leveltext\leveltemplateid67698713\'02\'01.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li1440\lin1440 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0 +\levelindent0{\leveltext\leveltemplateid67698715\'02\'02.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li2160\lin2160 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0 +{\leveltext\leveltemplateid67698703\'02\'03.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li2880\lin2880 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698713\'02\'04.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li3600\lin3600 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'05.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li4320\lin4320 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698703\'02\'06.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li5040\lin5040 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698713\'02\'07.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li5760\lin5760 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'08.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li6480\lin6480 }{\listname ;}\listid645277662}{\list\listtemplateid-1245256206{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1 +\levelspace0\levelindent0{\leveltext\'01\'00;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-720\li720\lin720 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext +\'03\'00.\'01;}{\levelnumbers\'01\'03;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-720\li720\lin720 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'05\'00.\'01.\'02;}{\levelnumbers +\'01\'03\'05;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-720\li720\lin720 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'07\'00.\'01.\'02.\'03;}{\levelnumbers\'01\'03\'05\'07;} +\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-1080\li1080\lin1080 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'09\'00.\'01.\'02.\'03.\'04;}{\levelnumbers\'01\'03\'05\'07\'09;}\rtlch\fcs1 +\af0 \ltrch\fcs0 \fbias0 \fi-1080\li1080\lin1080 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0b\'00.\'01.\'02.\'03.\'04.\'05;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\rtlch\fcs1 +\af0 \ltrch\fcs0 \fbias0 \fi-1440\li1440\lin1440 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0d\'00.\'01.\'02.\'03.\'04.\'05.\'06;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;} +\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-1440\li1440\lin1440 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'0f\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07;}{\levelnumbers +\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-1800\li1800\lin1800 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext +\'11\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-2160\li2160\lin2160 }{\listname ;}\listid1028063777}{\list\listtemplateid-1773998230\listhybrid{\listlevel +\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid1047434724\'02\'00);}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \b\fbias0 \fi-360\li720\lin720 }{\listlevel\levelnfc4\levelnfcn4 +\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713\'02\'01.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li1440\lin1440 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2 +\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698715\'02\'02.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li2160\lin2160 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0 +\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698703\'02\'03.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li2880\lin2880 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1 +\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713\'02\'04.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li3600\lin3600 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative +\levelspace0\levelindent0{\leveltext\leveltemplateid67698715\'02\'05.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li4320\lin4320 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0 +\levelindent0{\leveltext\leveltemplateid67698703\'02\'06.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li5040\lin5040 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0 +{\leveltext\leveltemplateid67698713\'02\'07.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li5760\lin5760 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'08.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li6480\lin6480 }{\listname ;}\listid1053625041}{\list\listtemplateid-1680186868\listhybrid{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0 +\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid-1938120284\'03(\'00);}{\levelnumbers\'02;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-360\li8640\lin8640 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1 +\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713\'02\'01.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li9360\lin9360 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative +\levelspace0\levelindent0{\leveltext\leveltemplateid67698715\'02\'02.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li10080\lin10080 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0 +\levelindent0{\leveltext\leveltemplateid67698703\'02\'03.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li10800\lin10800 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0 +{\leveltext\leveltemplateid67698713\'02\'04.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li11520\lin11520 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'05.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li12240\lin12240 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698703\'02\'06.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li12960\lin12960 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698713\'02\'07.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li13680\lin13680 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'08.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li14400\lin14400 }{\listname ;}\listid1206524817}{\list\listtemplateid-171024214\listhybrid{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0 +\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid-337448548\'02\'00);}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \b\fbias0 \fi-360\li1440\lin1440 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1 +\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713\'02\'01.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li2160\lin2160 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative +\levelspace0\levelindent0{\leveltext\leveltemplateid67698715\'02\'02.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li2880\lin2880 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0 +\levelindent0{\leveltext\leveltemplateid67698703\'02\'03.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li3600\lin3600 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0 +{\leveltext\leveltemplateid67698713\'02\'04.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li4320\lin4320 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'05.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li5040\lin5040 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698703\'02\'06.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li5760\lin5760 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698713\'02\'07.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li6480\lin6480 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'08.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li7200\lin7200 }{\listname ;}\listid1335961019}{\list\listtemplateid726575542{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1 +\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-360\li720\lin720 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat4\levellegal\levelspace0\levelindent0{\leveltext +\'03\'00.\'01;}{\levelnumbers\'01\'03;}\rtlch\fcs1 \af0 \ltrch\fcs0 \b\fbias0 \fi-720\li1080\lin1080 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext +\'05\'00.\'01.\'02;}{\levelnumbers\'01\'03\'05;}\rtlch\fcs1 \af0 \ltrch\fcs0 \b\fbias0 \fi-720\li1080\lin1080 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext +\'07\'00.\'01.\'02.\'03;}{\levelnumbers\'01\'03\'05\'07;}\rtlch\fcs1 \af0 \ltrch\fcs0 \b\fbias0 \fi-1080\li1440\lin1440 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext +\'09\'00.\'01.\'02.\'03.\'04;}{\levelnumbers\'01\'03\'05\'07\'09;}\rtlch\fcs1 \af0 \ltrch\fcs0 \b\fbias0 \fi-1080\li1440\lin1440 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext +\'0b\'00.\'01.\'02.\'03.\'04.\'05;}{\levelnumbers\'01\'03\'05\'07\'09\'0b;}\rtlch\fcs1 \af0 \ltrch\fcs0 \b\fbias0 \fi-1440\li1800\lin1800 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0 +{\leveltext\'0d\'00.\'01.\'02.\'03.\'04.\'05.\'06;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d;}\rtlch\fcs1 \af0 \ltrch\fcs0 \b\fbias0 \fi-1440\li1800\lin1800 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levellegal +\levelspace0\levelindent0{\leveltext\'0f\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f;}\rtlch\fcs1 \af0 \ltrch\fcs0 \b\fbias0 \fi-1800\li2160\lin2160 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0 +\levelfollow0\levelstartat1\levellegal\levelspace0\levelindent0{\leveltext\'11\'00.\'01.\'02.\'03.\'04.\'05.\'06.\'07.\'08;}{\levelnumbers\'01\'03\'05\'07\'09\'0b\'0d\'0f\'11;}\rtlch\fcs1 \af0 \ltrch\fcs0 \b\fbias0 \fi-2160\li2520\lin2520 }{\listname +;}\listid1433161003}{\list\listtemplateid-682191340\listhybrid{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid114427236\'02\'00);}{\levelnumbers\'01;}\rtlch\fcs1 \af0\afs18 +\ltrch\fcs0 \b\f40\fs18\fbias0 \fi-360\li720\lin720 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713\'02\'01.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 +\ltrch\fcs0 \fi-360\li1440\lin1440 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698715\'02\'02.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 +\fi-180\li2160\lin2160 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698703\'02\'03.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 +\fi-360\li2880\lin2880 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713\'02\'04.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 +\fi-360\li3600\lin3600 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698715\'02\'05.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 +\fi-180\li4320\lin4320 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698703\'02\'06.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 +\fi-360\li5040\lin5040 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713\'02\'07.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 +\fi-360\li5760\lin5760 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698715\'02\'08.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 +\fi-180\li6480\lin6480 }{\listname ;}\listid1480997992}{\list\listtemplateid-211784922\listhybrid{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid67698711 +\'02\'00);}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-360\li2880\lin2880 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713 +\'02\'01.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li3600\lin3600 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698715 +\'02\'02.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li4320\lin4320 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698703 +\'02\'03.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li5040\lin5040 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713 +\'02\'04.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li5760\lin5760 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698715 +\'02\'05.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li6480\lin6480 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698703 +\'02\'06.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li7200\lin7200 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713 +\'02\'07.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li7920\lin7920 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698715 +\'02\'08.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li8640\lin8640 }{\listname ;}\listid1548373150}{\list\listtemplateid-171024214\listhybrid{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0 +\levelindent0{\leveltext\leveltemplateid-337448548\'02\'00);}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \b\fbias0 \fi-360\li1440\lin1440 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0 +\levelindent0{\leveltext\leveltemplateid67698713\'02\'01.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li2160\lin2160 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0 +{\leveltext\leveltemplateid67698715\'02\'02.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li2880\lin2880 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698703\'02\'03.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li3600\lin3600 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698713\'02\'04.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li4320\lin4320 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'05.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li5040\lin5040 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698703\'02\'06.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li5760\lin5760 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698713\'02\'07.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li6480\lin6480 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'08.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li7200\lin7200 }{\listname ;}\listid1809542631}{\list\listtemplateid-1301510768\listhybrid{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0 +\levelstartat1\levelspace0\levelindent0{\leveltext\leveltemplateid897730020\'03(\'00);}{\levelnumbers\'02;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fbias0 \fi-360\li720\lin720 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1 +\lvltentative\levelspace0\levelindent0{\leveltext\leveltemplateid67698713\'02\'01.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li1440\lin1440 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative +\levelspace0\levelindent0{\leveltext\leveltemplateid67698715\'02\'02.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li2160\lin2160 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0 +\levelindent0{\leveltext\leveltemplateid67698703\'02\'03.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li2880\lin2880 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0 +{\leveltext\leveltemplateid67698713\'02\'04.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li3600\lin3600 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'05.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li4320\lin4320 }{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698703\'02\'06.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li5040\lin5040 }{\listlevel\levelnfc4\levelnfcn4\leveljc0\leveljcn0\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698713\'02\'07.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-360\li5760\lin5760 }{\listlevel\levelnfc2\levelnfcn2\leveljc2\leveljcn2\levelfollow0\levelstartat1\lvltentative\levelspace0\levelindent0{\leveltext +\leveltemplateid67698715\'02\'08.;}{\levelnumbers\'01;}\rtlch\fcs1 \af0 \ltrch\fcs0 \fi-180\li6480\lin6480 }{\listname ;}\listid1987011196}}{\*\listoverridetable{\listoverride\listid50619426\listoverridecount0\ls1}{\listoverride\listid453905382 +\listoverridecount0\ls2}{\listoverride\listid1809542631\listoverridecount0\ls3}{\listoverride\listid589702026\listoverridecount0\ls4}{\listoverride\listid361634945\listoverridecount0\ls5}{\listoverride\listid1206524817\listoverridecount0\ls6} +{\listoverride\listid542669416\listoverridecount0\ls7}{\listoverride\listid1028063777\listoverridecount0\ls8}{\listoverride\listid1987011196\listoverridecount0\ls9}{\listoverride\listid1548373150\listoverridecount0\ls10}{\listoverride\listid378865547 +\listoverridecount0\ls11}{\listoverride\listid645277662\listoverridecount0\ls12}{\listoverride\listid1335961019\listoverridecount0\ls13}{\listoverride\listid542517863\listoverridecount0\ls14}{\listoverride\listid453905382\listoverridecount9{\lfolevel +\listoverridestartat\levelstartat1}{\lfolevel\listoverridestartat\levelstartat1}{\lfolevel\listoverridestartat\levelstartat1}{\lfolevel\listoverridestartat\levelstartat1}{\lfolevel\listoverridestartat\levelstartat1}{\lfolevel\listoverridestartat +\levelstartat1}{\lfolevel\listoverridestartat\levelstartat1}{\lfolevel\listoverridestartat\levelstartat1}{\lfolevel\listoverridestartat\levelstartat1}\ls15}{\listoverride\listid1480997992\listoverridecount0\ls16}{\listoverride\listid497770244 +\listoverridecount0\ls17}{\listoverride\listid1433161003\listoverridecount0\ls18}{\listoverride\listid102699155\listoverridecount0\ls19}{\listoverride\listid1053625041\listoverridecount0\ls20}}{\*\pgptbl {\pgp\ipgp0\itap0\li0\ri0\sb0\sa0}{\pgp\ipgp0\itap0 +\li0\ri0\sb0\sa0}}{\*\rsidtbl \rsid964\rsid5944\rsid13820\rsid24827\rsid89560\rsid96791\rsid151197\rsid160198\rsid279929\rsid332559\rsid405089\rsid414678\rsid478916\rsid551236\rsid590552\rsid666131\rsid686994\rsid729164\rsid788444\rsid797272\rsid868435 +\rsid872988\rsid986640\rsid991543\rsid1064329\rsid1136288\rsid1184735\rsid1208049\rsid1250427\rsid1258595\rsid1261359\rsid1321178\rsid1342061\rsid1455539\rsid1515488\rsid1539944\rsid1540012\rsid1588624\rsid1592157\rsid1643388\rsid1665872\rsid1710064 +\rsid1838427\rsid1843025\rsid1846361\rsid1848488\rsid2062675\rsid2168500\rsid2191226\rsid2250570\rsid2373004\rsid2386328\rsid2494064\rsid2497632\rsid2514201\rsid2753227\rsid2757773\rsid2775539\rsid2961932\rsid2972289\rsid3024266\rsid3041664\rsid3084267 +\rsid3243428\rsid3478944\rsid3487979\rsid3554556\rsid3616660\rsid3636095\rsid3691870\rsid3754991\rsid3760314\rsid3877651\rsid3893489\rsid4007548\rsid4090161\rsid4143008\rsid4196095\rsid4217816\rsid4261473\rsid4266728\rsid4272710\rsid4339337\rsid4350897 +\rsid4410928\rsid4474123\rsid4526390\rsid4544504\rsid4655441\rsid4656132\rsid4684829\rsid4785366\rsid4787507\rsid4923456\rsid4937582\rsid4944853\rsid4997005\rsid5057400\rsid5128662\rsid5135223\rsid5189795\rsid5377694\rsid5391622\rsid5456293\rsid5599293 +\rsid5636316\rsid5640508\rsid5641658\rsid5653987\rsid5705132\rsid5841004\rsid5914192\rsid5975919\rsid6031693\rsid6048369\rsid6052992\rsid6115309\rsid6192003\rsid6309293\rsid6371233\rsid6389549\rsid6431678\rsid6440714\rsid6492038\rsid6512339\rsid6515263 +\rsid6517011\rsid6561333\rsid6565827\rsid6583375\rsid6649761\rsid6828884\rsid6890305\rsid6902399\rsid6959017\rsid7088543\rsid7156589\rsid7223845\rsid7290315\rsid7306026\rsid7357891\rsid7475935\rsid7476016\rsid7486145\rsid7552467\rsid7626014\rsid7629024 +\rsid7688258\rsid7741098\rsid7743764\rsid7804261\rsid7811321\rsid7817112\rsid7871077\rsid7872888\rsid7885279\rsid7896249\rsid7930761\rsid8221668\rsid8283643\rsid8399023\rsid8418159\rsid8524243\rsid8531935\rsid8585328\rsid8739334\rsid8739694\rsid8744339 +\rsid8789564\rsid8812944\rsid8851842\rsid8855407\rsid8869717\rsid8913080\rsid8922804\rsid8981302\rsid9059579\rsid9074333\rsid9137639\rsid9192520\rsid9329671\rsid9396936\rsid9442413\rsid9460959\rsid9526050\rsid9589947\rsid9718056\rsid9765372\rsid9858274 +\rsid9909461\rsid9984772\rsid10037268\rsid10040187\rsid10049382\rsid10106677\rsid10110036\rsid10161061\rsid10178495\rsid10296154\rsid10427296\rsid10436123\rsid10501631\rsid10579505\rsid10622710\rsid10646580\rsid10765994\rsid10769166\rsid10842362 +\rsid10886337\rsid10973546\rsid11077217\rsid11099350\rsid11172672\rsid11231210\rsid11233100\rsid11360165\rsid11602297\rsid11683287\rsid11744864\rsid11802299\rsid12007934\rsid12017668\rsid12153489\rsid12284823\rsid12325322\rsid12352403\rsid12394240 +\rsid12596707\rsid12599662\rsid12601731\rsid12612730\rsid12658752\rsid12724517\rsid12737859\rsid12789636\rsid12846829\rsid12942843\rsid12986674\rsid12992617\rsid12996043\rsid13002867\rsid13055895\rsid13198047\rsid13270131\rsid13331806\rsid13516301 +\rsid13531971\rsid13656454\rsid13702009\rsid13703289\rsid13713571\rsid13775719\rsid13786860\rsid13839664\rsid13913753\rsid13920398\rsid13923957\rsid13979741\rsid13982238\rsid14052604\rsid14171619\rsid14175700\rsid14311703\rsid14319156\rsid14356857 +\rsid14361568\rsid14382697\rsid14489021\rsid14572538\rsid14630660\rsid14681671\rsid14704993\rsid14755191\rsid14880983\rsid14889927\rsid14962135\rsid14972662\rsid15076895\rsid15100911\rsid15168791\rsid15231079\rsid15296256\rsid15358346\rsid15424423 +\rsid15533868\rsid15535258\rsid15537240\rsid15603023\rsid15665657\rsid15667693\rsid15669559\rsid15761125\rsid15885021\rsid15941974\rsid15944795\rsid15995044\rsid16001025\rsid16058675\rsid16141392\rsid16192851\rsid16198517\rsid16319078\rsid16400425 +\rsid16472867\rsid16583280\rsid16649630}{\mmathPr\mmathFont34\mbrkBin0\mbrkBinSub0\msmallFrac0\mdispDef1\mlMargin0\mrMargin0\mdefJc1\mwrapIndent1440\mintLim0\mnaryLim1}{\info{\title NON-DISCLOSURE AND LIMITED USE AGREEMENT}{\author Light, Paige} +{\operator Zhuo Chen}{\creatim\yr2022\mo12\dy7\min7}{\revtim\yr2022\mo12\dy7\min7}{\printim\yr2013\mo2\dy12\hr14\min26}{\version2}{\edmins1}{\nofpages4}{\nofwords2299}{\nofchars13105}{\*\company Advanced Micro Devices}{\nofcharsws15374}{\vern57}} +{\*\userprops {\propname _NewReviewCycle}\proptype30{\staticval }{\propname MSIP_Label_4342314e-0df4-4b58-84bf-38bed6170a0f_Enabled}\proptype30{\staticval true}{\propname MSIP_Label_4342314e-0df4-4b58-84bf-38bed6170a0f_SetDate}\proptype30{\staticval 2022- +12-07T05:07:17Z}{\propname MSIP_Label_4342314e-0df4-4b58-84bf-38bed6170a0f_Method}\proptype30{\staticval Standard}{\propname MSIP_Label_4342314e-0df4-4b58-84bf-38bed6170a0f_Name}\proptype30{\staticval General}{\propname MSIP_Label_4342314e-0df4-4b58-84bf- +38bed6170a0f_SiteId}\proptype30{\staticval 3dd8961f-e488-4e60-8e11-a82d994e183d}{\propname MSIP_Label_4342314e-0df4-4b58-84bf-38bed6170a0f_ActionId}\proptype30{\staticval 8cbd1668-3944-43f5-ab25-dd7d052d9f2d}{\propname MSIP_Label_4342314e-0df4-4b58-84bf-3 +8bed6170a0f_ContentBits}\proptype30{\staticval 1}}{\*\xmlnstbl {\xmlns1 http://schemas.microsoft.com/office/word/2003/wordml}}\paperw12240\paperh15840\margl864\margr864\margt1152\margb1152\gutter0\ltrsect +\widowctrl\ftnbj\aenddoc\revisions\trackmoves0\trackformatting1\donotembedsysfont0\relyonvml0\donotembedlingdata0\grfdocevents0\validatexml1\showplaceholdtext0\ignoremixedcontent0\saveinvalidxml0\showxmlerrors1 +\noxlattoyen\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\formshade\horzdoc\dgmargin\dghspace90\dgvspace180\dghorigin864\dgvorigin1152\dghshow0\dgvshow0 +\jexpand\viewkind1\viewscale100\pgbrdrhead\pgbrdrfoot\splytwnine\ftnlytwnine\htmautsp\nolnhtadjtbl\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule\nobrkwrptbl\snaptogridincell\allowfieldendsel\wrppunct +\asianbrkrule\nojkernpunct\rsidroot12942843\newtblstyruls\nogrowautofit\usenormstyforlist\noindnmbrts\felnbrelev\nocxsptable\indrlsweleven\noafcnsttbl\afelev\utinl\hwelev\spltpgpar\notcvasp\notbrkcnstfrctbl\notvatxbx\krnprsnet\cachedcolbal +\nouicompat \fet0{\*\wgrffmtfilter 013f}\nofeaturethrottle1\ilfomacatclnup0{\*\ftnsep \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 +\f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid3691870 \chftnsep +\par }}{\*\ftnsepc \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af0 +\ltrch\fcs0 \insrsid3691870 \chftnsepc +\par }}{\*\aftnsep \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af0 +\ltrch\fcs0 \insrsid3691870 \chftnsep +\par }}{\*\aftnsepc \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af0 +\ltrch\fcs0 \insrsid3691870 \chftnsepc +\par }}\ltrpar \sectd \ltrsect\psz1\linex0\endnhere\sectlinegrid360\sectdefaultcl\sectrsid11683287\sftnbj {\headerl \ltrpar \pard\plain \ltrpar\s17\ql \li0\ri0\widctlpar\tqc\tx4320\tqr\tx8640\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 +\rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid13786860 +\par }}{\headerr \ltrpar \pard\plain \ltrpar\qc \fi720\li720\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin720\itap0\pararsid12284823 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 +\f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af0 \ltrch\fcs0 \lang1024\langfe1024\noproof\insrsid13786860 +{\shp{\*\shpinst\shpleft0\shptop300\shpright12240\shpbottom697\shpfhdr0\shpbxpage\shpbxignore\shpbypage\shpbyignore\shpwr3\shpwrk0\shpfblwtxt0\shpz0\shplid1025{\sp{\sn shapeType}{\sv 202}}{\sp{\sn fFlipH}{\sv 0}}{\sp{\sn fFlipV}{\sv 0}} +{\sp{\sn lTxid}{\sv 65536}}{\sp{\sn dxTextLeft}{\sv 254000}}{\sp{\sn dyTextTop}{\sv 0}}{\sp{\sn dyTextBottom}{\sv 0}}{\sp{\sn fRotateText}{\sv 0}}{\sp{\sn fFitShapeToText}{\sv 0}} +{\sp{\sn fFilled}{\sv 0}}{\sp{\sn fNoFillHitTest}{\sv 1}}{\sp{\sn lineWidth}{\sv 6350}}{\sp{\sn fLine}{\sv 0}}{\sp{\sn wzName}{\sv MSIPCM570e44558e880388e024255c}}{\sp{\sn wzDescription}{\sv \'7b"HashCode":-549612842, +"Height":792.0,"Width":612.0,"Placement":"Header","Index":"Primary","Section":1,"Top":0.0,"Left":0.0\'7d}}{\sp{\sn posrelh}{\sv 1}}{\sp{\sn posrelv}{\sv 1}}{\sp{\sn metroBlob}{\sv {\*\svb +504b030414000600080000002100b6833892fe000000e1010000130000005b436f6e74656e745f54797065735d2e786d6c9491414ec3301045f748dcc1f216254ebb400825e982b44b40a81c60644f128b646c794c686f8f93b61b449158da33ffbf27bbdc1cc6414c18d83aaae42a2fa440d2ce58ea2af9bedf650f527004 +323038c24a1e91e5a6bebd29f7478f2c529ab8927d8cfe5129d63d8ec0b9f34869d2ba30424cc7d0290ffa033a54eba2b857da51448a599c3b645d36d8c2e710c5f690ae4f26010796e2e9b438b32a09de0f56434ca66a22f383929d09794a2e3bdc5bcf774943aa5f09f3e43ae09c7b494f13ac41f10a213ec398349409ac +70ed1aa7f3bf3b66c99133d7b65663de04de2ea98bd3b56ee3be28e0f4dff226c5de70bab4abe583ea6f000000ffff0300504b03041400060008000000210038fd21ffd6000000940100000b0000005f72656c732f2e72656c73a490c16ac3300c86ef83bd83d17d719ac318a34e2fa3d06be91ec0d88a631a5b4632d9faf6 +3383c1327adb51bfd0f7897f7ff84c8b5a91255236b0eb7a50981df9988381f7cbf1e90594549bbd5d28a3811b0a1cc6c787fd19175bdb91ccb1886a942c06e65acbabd6e2664c563a2a98db66224eb6b691832ed65d6d403df4fdb3e6df0c18374c75f206f8e40750975b69e63fec141d93d0543b4749d33445778faa3d7d +e433ae8d62396035e059be43c6b56bcf81beefddfdd31bd89639ba23db846fe4b67e1ca8653f7abde972fc020000ffff0300504b0304140006000800000021002cd2d9c917020000250400000e0000006472732f65326f446f632e786d6cac534d6fdb300cbd0fd87f10745fec7849d31a718aac458601415b201d7a566429 +36208b9aa4c4ce7efd28d94e866ea761179922697ebcf7b4bcef1a454ec2ba1a7441a7939412a13994b53e14f4fbebe6d32d25ce335d32055a14f42c1cbd5f7dfcb06c4d2e32a84095c2122ca25dde9a8256de9b3c491caf44c3dc048cd01894601be6f16a0f4969598bd51b9564697a93b4604b63810be7d0fbd807e92ad6 +975270ff2ca5139ea882e26c3e9e369efb7026ab25cb0f9699aae6c318ec1fa66858adb1e9a5d423f38c1c6dfd47a9a6e6161c483fe1d0242065cd45dc01b799a6efb6d955cc88b80b82e3cc0526f7ffcaf2a7d3cebc58e2bb2fd021810190d6b8dca133ecd349db842f4e4a308e109e2fb089ce138ecec56291cd520c718c +65f32cbd9b8732c9f56f639dff2aa021c128a8455a225aecb475be4f1d5342330d9b5aa9488dd2a42de8cde7791a7fb844b0b8d2d8e33a6bb07cb7ef8605f6509e712f0b3de5cef04d8dcdb7ccf9176691639c1775eb9ff1900ab0090c162515d89f7ff3877c841ea394b4a89982ba1f47660525ea9b4652b239c21054166f +68d868dc4d6733bcec47af3e360f807a9ce2d3303c9a21d7abd194169a37d4f53ab4c310d31c9b16d48fe683ef258cef828bf53a26a19e0cf35bbd333c940e38064c5fbb3766cd00bc47ca9e609415cbdfe1dfe7f60cac8f1e641dc909c8f6700e80a31623bdc3bb0962fffd1eb3aeaf7bf50b0000ffff0300504b03041400 +0600080000002100e78bd454dc000000070100000f0000006472732f646f776e7265762e786d6c4c8fc14ec3400c44ef48fcc3ca48dce886800a843855958a03520fd0f2019bac4902596f94dda6e9dfe39ecac9638d35f39caf66d7ab89c6d07946b85f24a0886b6f3b6e10bef66f77cfa042346c4def99104e1460555c5f +e526b3fec89f34ed62a32484436610da18874ceb50b7e44c58f88158bc6f3f3a13651d1b6d47739470d7eb344996da998ea5a13503952dd5bfbb834328cb0fbb3fc566cb9b9f6eae6cf53ed56e40bcbd99d7afa022cdf1720c677c418742982a7f601b548f208f44848744e6d94dd3475115c2f2e5097491ebfffcc51f0000 +00ffff0300504b01022d0014000600080000002100b6833892fe000000e10100001300000000000000000000000000000000005b436f6e74656e745f54797065735d2e786d6c504b01022d001400060008000000210038fd21ffd6000000940100000b000000000000000000000000002f0100005f72656c732f2e72656c73 +504b01022d00140006000800000021002cd2d9c917020000250400000e000000000000000000000000002e0200006472732f65326f446f632e786d6c504b01022d0014000600080000002100e78bd454dc000000070100000f00000000000000000000000000710400006472732f646f776e7265762e786d6c504b05060000 +000004000400f30000007a0500000000}}}{\sp{\sn dhgt}{\sv 251659264}}{\sp{\sn fLayoutInCell}{\sv 0}}{\sp{\sn fAllowOverlap}{\sv 1}}{\sp{\sn fBehindDocument}{\sv 0}}{\sp{\sn fHidden}{\sv 0}}{\sp{\sn fLayoutInCell}{\sv 0}}{\shptxt \ltrpar \pard\plain \ltrpar +\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid13786860 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af1 \ltrch\fcs0 +\fs20\cf2\insrsid13786860\charrsid13786860 [AMD Official Use Only - General] +\par }}}{\shprslt{\*\do\dobxpage\dobypage\dodhgt8192\dptxbx\dptxlrtb{\dptxbxtext\ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid13786860 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 +\f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af1 \ltrch\fcs0 \fs20\cf2\insrsid13786860\charrsid13786860 [AMD Official Use Only - General] +\par }}\dpx0\dpy300\dpxsize12240\dpysize397\dpfillfgcr255\dpfillfgcg255\dpfillfgcb255\dpfillbgcr255\dpfillbgcg255\dpfillbgcb255\dpfillpat0\dplinehollow}}}}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid12284823 \tab \tab \tab \tab \tab \tab \tab \tab \tab +\tab }{\rtlch\fcs1 \af1 \ltrch\fcs0 \fs20\lang1024\langfe1024\noproof\insrsid3691870\charrsid3691870 {\*\shppict{\pict{\*\picprop\shplid1025{\sp{\sn shapeType}{\sv 75}}{\sp{\sn fFlipH}{\sv 0}} +{\sp{\sn fFlipV}{\sv 0}}{\sp{\sn fLockRotation}{\sv 0}}{\sp{\sn fLockAspectRatio}{\sv 1}}{\sp{\sn fLockPosition}{\sv 0}}{\sp{\sn fLockAgainstSelect}{\sv 0}} +{\sp{\sn fLockCropping}{\sv 0}}{\sp{\sn fLockVerticies}{\sv 0}}{\sp{\sn fLockAgainstGrouping}{\sv 0}}{\sp{\sn pictureGray}{\sv 0}}{\sp{\sn pictureBiLevel}{\sv 0}}{\sp{\sn fFilled}{\sv 0}} +{\sp{\sn fLine}{\sv 0}}{\sp{\sn wzName}{\sv Picture 1}}{\sp{\sn dhgt}{\sv 251658240}}{\sp{\sn fHidden}{\sv 0}}{\sp{\sn fLayoutInCell}{\sv 1}}}\picscalex50\picscaley48\piccropl0\piccropr0\piccropt0\piccropb0 +\picw4978\pich1143\picwgoal2822\pichgoal648\pngblip\bliptag1255513328{\*\blipuid 4ad59cf05f6a5d6754968f8ac0529726}89504e470d0a1a0a0000000d4948445200000310000000b4080000000021f795440000000467414d410000b1889598f4a6000000097048597300003d8500003d +84011ef07cd10000131749444154789ced9ddb79e33812854ffb7300ec0ce8089a1d415311ac1c41d311b41d41cb11b827027122b037027122b0360233036b22 +f03e58be49bc5401554091c6ffb03b634300ccd121501714be3c2191d0e2a28e3d032e27b127909833eb2af60cb87c492b444291dd621b7b0a3cd20a91d024db +14b1a7c0230922a1cad414910491d065628a483644429bddf736f614e8a41522a14d769bc59e029d2488843ac5663a8af0df3271632f8f6a0fa7598cb729eeb5 +46ffc8f56abccdeaf7e0af178dc84c802203803c07f003a550a73cb68b5d9471f99cfa76b0bb637ee0afe1af8132db8b758861ea558851881c44024a94f89185 +b5748bcd6414f1e409fbfb95f98ed8cb8634fea5daf8dc99ac863b29b94f9647b9ba7d08f0285eb8d5fd6bc4f0b621fee27e6057fb0ee9c71ffdf1b7e7ea43f8 +d3accecfceceaf9b40c32d83accc02780adf614b9e8bbc713aa0bd97818dd604f63ce6b479c45d215ec8aadb47e507f2cc3414e1bb42b01708a06d3cc7f4e55c +37bd66b76855fb1766579f7f3daf03ecf0ab4928c253106c931a00aefdc6f466a76be05d4c2c9d0dc0ddc5d7c51f754d4c42119e82b873798a4deb37a837aa8a +b8707947c4a7b93abb6895c7a82ae50104f01484c38e09f197086c2fd4baae6bb5ae95d9d5670b65314fe078849f20b66ebb83103bd661eeb41451eb492d00cd +f999eecec9be22fc04e1b640b87f4e8efa8f4ab7db2b956ec3d15e9da9aedff615e1e3487b741d5427384775bb3eb35698c1032b2dc586dbf5985ce3d1c4ffb3 +6878ad10b5eb076307e700e04ade19b43b8fbe1794a0bd104ba3eae0d6f6f1082f41b8ef7ca29bd52aa77da7767eb8976671de6af56dfcc0908f203cdca7d183 +73007617c2eff30906207ab9d333256c2bc247107f7b7cd6c012812d215d9cc1752dda5d6c566a0177d38af010849721103d38070887234c657c4bd07cd78a4a +583e42e72188da6b600b4b046ab95928c6fa62b13bd77212e4768fd07908c22f98103f380700ab5aa823e1ed9711eebe373a1ddb3d54ea2e08df4d4ffce01c00 +5c3422ddccc4e17a44bba8753a36ab087741f898d400a0132a6623920b3eb18c6f0e174a7bdb8217460d86b320bc636b168273107ab72b04f9ccb052328e0a9b +c9e0ce82a8bd8736615603ad7f2ef8f48abe73a89592e56d1e8f701684bf096021380708f887a69bf14da3f94c8a701584441cc1c812e19b0bae964a6e06adaa +4a1615e12a085f931a30129c033c5ff1330c401ca1a688954ab73e380aa2ad2506b7b244f818010226c804d052c4ef4aa55b0f1c0521b1405809ce011e6ea2b9 +06200ed13af964eec090a3206a99d16d04e7e01148502e696307adb3b1d614e12688bb56667423c13938bfe98502dd53e09328c2add8b1cc8e09d8d595504fde +6c170e45080394c524301cf3fd074083c67f98fa47e5df4907378ea52a94703916fb2036ba68594bcf64808a3da0a7d750ea4c35f1e9dc5e927bec267b603f21 +128fa68e47b8fc052bb9e137820fd6373b66e40b7a84ef55136105f1fc886e961e132e980f888a2945b8fc01b9dcf0a5e073f54e175bb386bbf7cdd78c2088a7 +a7a7c7b5bb26b4ae12b0a40887e98b96fa7f907bacfef993f78cd1fcff23c611c4d3d3d3e33a779cf22d7b2ce28c5c27248e8b9749caa40660283807b0ca6628 +14ed0846563d6c4aa74f4ad76578c1cea1520741b4a2476ded04e7c072be4e3ce3bbdc3cb8ec9c765a57c1983930e42008d105c250700e6024625cd5aaf30840 +7ebbc9f99f6a1ae979ecb1a2080741d4b233b0139c03c8290a4aa561c3523eacf85f42b55446238ae00b821aa5a6fe79464ecebd400ac8ce25e3fbf73ddb33d0 +aa197d368ed0f10541dd315d529fb529b39a940b3e9f8ceffcfe92fb11bd7af926ee65640b826c52fffc45edb1e1ce4197d12b8094afe40acb0dd7bfb3d3abf8 +6fe1c0105b10541bb8cc2bea9336b6448cd5689d951e802577ef5eebf9d70c28822d889ad8ee2750119b9a3939b76724177c6e19df6c6b56f152187e3ed9102e +33e00a821a36c8960075cf646e89180c47cc2fe39bab086b2f3051b882a09ad4cb0cc84b626353c13900d8f6c79f6c647ccb5230d3b2acbdc024610a826c00ff +7afd1f0aa6827300d0f4f991eaa95f22d709b3f8b0b91798204c4150bfb9790100cb9cd8dc5e98ab27f236f94b157b286e58cdcdbdc0e4600aa226b6dbaf0d15 +b1b9b1e01cd0939ba1557c223e152b1ea17b776f54788220af95fbc4b1c99ad5e874be8a5fc265889b92d178a77495ca36fe56812708b2499d3fff7f56113f60 +2d380774e5824f38e37b1cd6a64967cfb45dfcabd22f079620c85fdbffbcfcc34f6ad7069788a3f5604e972a1e53ac188db7adc20cb4aa2ab3600982fa5e785b +184a6a429345dff681c530fd8cef617ee78cc60a7ba6dac4869425889ad8eeddd193295b111f7d4ab3c8f81e849337217c28067a759f9870044136a9dfa96049 +75709bf46dbf8b3af44626e64359d2db8aef998ce8812508ea5b217fb74f229bd5367ddbaf71e981d8f57cf8cd682bbc67b2a2078e207851eace7f19c2e68e64 +9f0b3eb30cd71e384b84ec9ec98c1e388220eff23f9c5e272734190cce017bd7d2e7d003e3ed25bc67b2a3078620c8c19883848d297b5eb1d7c2bc1dae6f9073 +6d00896ab12f18d203431077d497e48102aa9cf8398bc13900bbc56ef40cdd6c6094a6f9476c504b7a6008821c84387ca81575049b4b04b66775ec290483b167 +6aa4c6bcb2a407ba20c835cbabc31f90f74c16837300f039ec07000c8b0f68851ecb852d6f0a591064afe8d1f73fafa81f0db544949562e739bb8e8525fe33de +e4854664406b777c53054136a98be3640df212112c3877a3576dda4e955227421b11d6f4401684ab490d00654efc6cb0e05ce652c591c6ada1caee0ee439b9a9 +84e7cd9c1ec882207f55ab8e9f9123a0c1b6936aeff175a9d36f304a72cbc67f307b7aa00a826c5277e62e91139ac205e70ad14b2e5eb9ac54ba0d08c388687d +c732a807aa20dc4d6aa0c313db4b38cf6ba95113abe21d4db60863c7d77a0e65510f4441904dea9eaf3e79cf143038c73b454c827954df24397d2fe9694498d4 +0351106493baeafe31ddbd1d303877530977c82ce66214fa12e175de73b7a87d3eae064d107e3ba6819f1f11323827ec7c9db8c3f505fa43f15921768bc6e3d3 +8a90044136a93b8210cf900b1f875c22849daf1377b8bef08ddcd2236a64f7863e9220c80b447f2a0c79c31ef2e49ce83b7df20ed73d39b965e33c865d3d9004 +41afc2d3ef4d22ef99829e9c1374be4edfe11a0ec37a200982fcd21ed818e564cf6bd05c2f31e7eb0c1cae7b188ba6e3626e590f244190dfd943411d726271d8 +9373d54aa41b1bf7a389c0b084dcbed7a6f5401104d9f133b80ad0139ac21e8bf85d0974526c043af924d8d6034510e4e3e4c3bb22f21211f8e4dcdadf3994ad +67e1700d82713d100441dfc20c7fe54d7a5e0160e3ad08ff1e2c916b76befd6e5b0f0441d4d4ae8a7cf0d7f484a6c027e7bcdfef026b8c2572c5beb7c3d7f719 +605c10024108daefdf08bc44785a00ab4a661ad383ed659ac0fd1aa382a0bfaec75680a2a4f614baaca5978fa8e2d4bb9b17ff63b69f801ec6054136a9c76d04 +9bc139c0cbf93a2387ab3653d0c3a820e826f5f8c912ba591dbc1083b3f3f5533b5ce9694fc044f4302a889ada1125145d513b0b5fd6d2d130fedc0e57d6df3e +0d3d8c0ac2eb2cf52166cd6ab8ba4ee7e5707d46c72f3a113d8c09826e52530c047a4253f8b2964eeffa99395c9f51f9e24e450f6382209bd4b4cc0cba591d7e +8928f8e7ddc4cfdccd1613d7c7911816047d2f4ffbaad3ab4b47286bc93e11ad702a7ba6d8b83e8ec4b020c81604350c6dd98ae0a670abd4ed884f436f9a13db +992aef3dc2b0206a6a37d4ca4b15b5c32877ceb10ef92855769a1239add994f4302c88bb96da0dd538a05f3917e5ce39c631d09994143846dcc934293d0c0b82 +6c52d3cbccd0f74c51aaa4930b052856878d0c3d1f83f6aca6a5874141b4e4b3d41579bcdeba1c4744b9738efcde57ac1f1e19fa0a417a5413d3c3a020e8174d +d2dda9b6cd6a72b1b1f93a5c77744150de0953d3c3a0206a6a27f4e3a19c84a63877ce919caf3376b8324c08c27fc9c9e9614810f2263500cef62ace9d7304e7 +eb4c1dae0058d7a08ce7f6d9ba3e8ec48020c83b26fa593880b3678a74e7dca8f375d60e57c915c2d8f57124fa054137a9c9d73f0060ddeb17e95ad211e7eb6c +1dae005871b9311bc26679ef11fa054137a91977b9f29ac708ce0123ced7f93a5c01d6dda2d9c87be14fed379538f40ba2a67691333d90f484a628c1398cac01 +f375b80200795b307ef99657b5fc68f40a826e523317088e591d6b0f3ae07c5d5701e7111efabe8053e26f429cf6fd82fe64fefe2f734cfaaabcab2b66df4214 +ebf3ee5f5455d07984a665d8d43ff4a611913e41d04d6aa51356cf5c578a9d0fb15c777a0c973376b802ac1d13e3bed229d1b765622c9d9ac409ce013d4bc1ec +4b6c308cb679ee987a0561c5831cc9f30a607d1c5dc9667189dc00754b6f5b6a4d222e3d8288e5f03c2252700ee838303d7b3db0f605bc1a3493a1471046764c +88b9441c7dff675952e03d4dc368cc4a4f980edd8288b7753f22e25a75a0888e3dd4cce0bc7cca99ae96dd82881410eb24e25c3ed8d03377b882b9408c176a9c +26dd82a8c34e629098e6fd3b2febdc1dae0058a9a9735d2e3b0561c6a406229d9c7be1755998bdc315f8d3321a8f5c06325d3a0373764c6a2062700e00d6d973 +dc71d619ae00801dcb7dc13a023325ba0461c8a40680b629238e3e9beb76c738676d0be6ba63eadc325932a981989ed74fc475c369cd39343c2dba0451879ec4 +081183739f8666c56a3edb1d5397204c99d400d212a1cfae27b9b787bcd29986013a0461cba4062c4a7466ec98c5b9d94760a6c3b1208c99d400ec593533837b +993aa320e9e4381684c52f9f95dcdb99c2d4032ed57cd0ab2f12704e751c722488a871b03e4c4e6a2eecbe738f781937a95f93ce5c82a94782b833b95f4f66b5 +1a5beefa802ad79887186fa7de2b07451c09c2e28ec9a661330f1ab61e32db37d5bfaf02e1a08843416c354f487b9096081daef997bf5de60af3106355bdffb7 +8a9d727328089b0b440acee9b0fdbe627f2637bd405407b35b724f391e0862e763a0ab92960879aed9e634dc2cd5601cef91b897cb1e08c2a6490da4e09c3cf5 +d9cae15365293c0d49ba6c06a6220e046175c704d3539b22f5d945ebf239cb0b44b70dcd53c447415835a981149c9364e72a07ac72d18988d2e7532a1e18d521 +3e0ac2f25b3805e7a4b83bffea280794862dea7e1f6bb6a12be28320ec9ad44032ab45d8dd5d7c3d77fecf9c19be2a6628e6c050c48713730c93fa2127371de6 +aca5b68c7b726e06b4cd3f9e7b62c30769876370d9861a7ffc2008fa8e49eec4d4af2b72d3bf4aa9413f190db6ff369cbb817a5895de5d6831561425db5cd5a4 +8ede0b82f1fa904befaae882b86b73b161e7c4a2ff575b4967f5d2ae01315e14255bd34e82be17047d81e05db338dc555593db5e5bf6f9c5a309338ce14a3c24 +cf2a4d11ef8c6a8649cdbb667118c66263376cf809307cd72431d240bafce99d2018b160c984f892ee12db59760bcf1cc3774d92236febcbf136ef0441ffb631 +6ed625c038a05b4b8e9b60c071e507861189be19dff5bd0982914f5a915b5260ecbfda5a74e4049579e8817240e24d108c621bb247083947d6d39e290a73d103 +4111af82606446302e9a26c1d8336d1bd9a11314e6a3877145bc0aa2a6f7297d3500c724494b44788afbf9e801a8ee073ff22a084610a2e2ce610cc61241bf4f +3e21c4d2ae7f295b3bb8828745f4220886492d5ff899b3074b297e81b9b41b7f70dcca0d2ae245100c935aa18e61456f9a82736159dbbd0fc0d9b41952c45e10 +0c933a57d850323496827321c9efabd853e8c5c3d42ffa3fba17444def4ca3d02dc72ca915c64f74b3e29c350b8c97ebab5f117b41305ebb2a77c730221b2938 +178ae2de6e7aabaf2bb8f7e3cf82e098d4b9cf3cfae09caf487ba630acec7a5b0542237d1d3c0b8261522bdd4fcc7817a5e05c084c2f0f12a1c26c5376fdf804 +6099d45a57037012cad312a14ebeb6bc3cc884ceb34dd5f1d31380f50debea4302ce91a3149c53265f3f54b1e730c85a46ad5d07244e0096e346ed6a00ce029d +82739a9897c3db0510de1d55473f3a01eb8d5ba82da49c84a6149cd3c3be1c6827df5cbb3a41c4c46fd7ae53704e8bead6bc1c24f5d011883f015a46dd2ac9b9 +1c769dd3dbd65a93f8d414378f629b113d44f5005c1ea4839fb01608c9e2024754f4a62938274e56dddfeb5da62887b01e8e0e489cd830a9b99da73d93287975 +fb20e4b951465c0f878a38e598d482e5983ac897f4bddb3695b51463f98351f824320a7a00aaece2cd4b73cad93155f29379cf2f863193ca5a8a50943fec5b0d +6fa8e80158e66f37ed9d724c6ae5fb89cbbc25b74d652d7d298b6fb2e584f4f978a1a220c5e65511a78c05422f08b18751f83895b574a744f94de3548b368717 +2a0af2a688d39afe298d93101fa8aee911b7bb9b09b8442c911528f12dcb26a884675cae6127f3aa8853c628ea8f32db7885a08b0d751c9f5146f9f983d8301f +fef58d503c3e1f196722a8ea01281e9e6f90f8f2a43a4ce2f372bd92ec4d590f00768b2d8e2f6e4f242ca2af877d4e791244620204d0c35e11491009fb04d1c3f391a1d3f16689445c02e901c8d66985489867ec424549d6491009e384bddbeeff558f8cb838e6391b0000000049454e44ae426082}}{\nonshppict +{\pict\picscalex50\picscaley48\piccropl0\piccropr0\piccropt0\piccropb0\picw4978\pich1143\picwgoal2822\pichgoal648\wmetafile8\bliptag1255513328\blipupi400{\*\blipuid 4ad59cf05f6a5d6754968f8ac0529726} +010009000003ea1501000000c115010000000400000003010800050000000b0200000000050000000c0242001b01030000001e00040000000701040004000000 +07010400c1150100410b2000cc00b40010030000000041001a01000000002800000010030000b400000001000800000000000000000000000000000000000000 +00000000000000000000ffffff0060606000efefef003030300020202000cfcfcf0010101000bfbfbf0080808000404040009f9f9f0070707000afafaf00dfdf +df00505050008f8f8f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000040301010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000020101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010201010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000002010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010103020001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000201010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010103040000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000020101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010103040000000101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000201010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010103040000000008080808080808080808 +08080808080808080808080808080808080808080808080808080808080808080803010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010106080808080808080808080808080808080808080808080808080808080808080808080808080808080808080301010101010101010101010101010101 +01030808080808080808080808080808080808080808080808080808080808080808080808080808080806010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010e080808080808080808080808080808080808080808080808080808080808080808080808080808080e0101010101010101010101010101 +01010101010101010101010101010101060808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808 +0808080808080808080808080808080808080808080808080e010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000020101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010103 +040000000000050000000000000000000000000000000000000000000000000000000000000000000000000000000000000b0101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010103070000000000000000000000000000000000000000000000000000000000000000000000000000000000000703 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000070a0a09090d0806010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010607000000000000090000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00040101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101100000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000201010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000a0a090d060101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00020101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010607000000000000000e00000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000006010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010500000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000e010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000a0c0b080101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010101010106070000000000000000010f0000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000c0101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000040c0d010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000b01010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010607000000 +000000000000010d0000000000000000000000000000000000000000000000000000000000000000000000000000000000000701010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000080101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000f10060101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000b0101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010d0700000000000000000000010105000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000d010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010e00000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000004010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000005020d03010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000b01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010b0000000000000000000000000101090000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000f0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000b0101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000070206010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000b0101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010b0000000000000000000000000001010e00000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000003010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +05000000000000000000000000000000000000000000000000000000000000000000000000000000000000050101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00041003010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000b010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b0000000000000000 +0000000000000101010f000000000000000000000000000000000000000000000000000000000000000000000000000000000000100101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010101010101010b000000000000000000000000000000000000000000000000000000000000000000000000000000000000000901010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000020d010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000b01010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101100000000000000000000000000000000101010d0000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000040101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000070301010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000050b010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000b010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010102000000000000000000000000000000000101010105000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000006010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010e0000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000201010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000050b010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000b01010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010102000000000000000000000000000000000001010101090000000000 +000000000000000000000000000000000000000000000000000000000000000000000000000c0101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010200 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000e010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000050b01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000000000000000000d0101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010102000000000000000000000000 +00000000000001010101030000000000000000000000000000000000000000000000000000000000000000000000000000000000000701010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010101010103070000000000000000000000000000000000000000000000000000000000000000000000000000000000000f010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000050b01010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000706010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010200000000000000000000000000000000000000010101010102000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000b010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010b00000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000008010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000040601010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000007060101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101030a00000000000000000000000000000000000000000101010101080000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000f0101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010105000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000004010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +09010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000706010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101030400000000000000000000000000000000000000000001010101010105000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000e010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010108000000 +000000000000000000000000000000000000000000000000000000000000000000000000000000000b0101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000406010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000070601010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101030400000000000000000000000000000000 +00000000000001010101010110000000000000000000000000000000000000000000000000000000000000000000000000000000000000100101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010102000000000000000000000000000000000000000000000000000000000000000000000000000000000000050101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007 +06010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101030400 +00000000000000000000000000000000000000000000010101010101030000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000050101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010103000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000090101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000050601010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000070601010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010106040000000000000000000000000000000000000000000000000101010101010102000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000006010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000070301010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000b010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000007060101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010106070000000000000000000000000000000000000000000000000001010101010101080000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000020101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010500000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000201010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000c01010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000706010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010106070000000000000000000000000000000000000000 +00000000000001010101010101010500000000000000000000000000000000000000000000000000000000000000000000000000000000000701010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010d000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000000000000f0301010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000007060101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010106070000000000 +00000000000000000000000000000000000000000000010101010101010110000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000b010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000f01010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000040e0101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000403010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010607000000000000000000000000000000000000000000000000000000000101010101010101030000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000f0101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010e00000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000008010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000070601010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000040301010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010b00000000000000000000000000000000000000000000000000000000000001010101010101010102 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000e010101010101010101010101010101010101010101 +0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010c000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000004010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000007060101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004030101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010b00000000000000000000000000000000000000000000000000 +00000000000001010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010103070000000000000000000000000000000000000000000000000000000000000000000000000000000000000b0101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000706010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000040301010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b00000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010500000000000000000000000000000000000000000000000000000000000000 +00000000000000000000050101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010101010101010101010101010101010b000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000005010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000070601 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000004030101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010b0000000000000000000000000000000000000000000000000000000000000000000101010101010101010110000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000008010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101040000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000706010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000403010101010101010101010101010101010101010101 +010101010101010101010101010101010101010c0000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +03070000000000000000000000000000000000000000000000000000000000000000000000000000000000020101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000070301010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000070e01010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000040301010101 +01010101010101010101010101010101010101010101010101010101010101010101020000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010200000000000000000000000000000000000000000000000000000000000000000000000000000000000703010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101020000000000000000000000000000000000000000000000000000000000000000000000000000000000000201010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004030101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000403010101010101010101010101010101010101010101010101010101010101010101010101020000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010106000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000b010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101030000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000e01010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +000000000000000000000000000000000000000000000000000a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a070000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000f0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000040301010101010101010101010101010101010101010101010101010101010101010101 +02000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010400000000000000000000000000 +000000000000000000000000000000000000000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101011000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000f01010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101080810090a0a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f010101010101010101010101010101010101 +01010101010101010101010101010302000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010b0000000000000000000000000000000000000000000000000000000000000000000000000000000000000e010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010105000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000008010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +0101010101010101010101010101010101010101010101010101010101010101010e0b0204000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000801010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000201 +01010101010101010101010101010101010101010101010101010101010304000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010103070000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +0000000000000000000101010101010101010101010101010101010101010101010101010101010101010101010101010101010101030d020700000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007030101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000002010101010101010101010101010101010101010101010101010101010304000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010200000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0b010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010d0f0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000a0101010101010101010101010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000201010101010101010101010101010101010101010101010101010304000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010106000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000007010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101030b0500000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000020101010101010101010101010101 +01010101010101010101030400000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101040000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b0500000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000e01010101010101010101010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000201010101010101010101010101010101010101010101060700000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000301010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0b05000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000020101010101010101010101010101010101010101060700000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010103070000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000201 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000080101010101010101010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000002010101010101010101010101010101010101060700000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010200000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000e01010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010604000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000501010101010101010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000201010101010101010101 +01010101010106070000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010900000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000901010101010101010101010101010d070000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101040000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010807000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000070e010101010101 +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000b0101010101010101010101010b000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010b00000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010604000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000002010101010101010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000000000000b010101010101010101010b000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010103070000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000b010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101030400000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000e01010101010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000b010101010101 +01010b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +010101010101010c0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000007010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101040000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000d0101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010e00000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101030400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000070301010101 +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000101010101010101010101010101010101010a0000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000301010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010103040000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000001001010101010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010b00000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000020101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010307000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000005010101010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010107000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000e01010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000b01010101010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000010101010101010101010101010101010101010c0000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101 +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010e00000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000e0101010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101010101010101010101010a0000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000501010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010c0a0a0a0a0a0a030101010101010101010101010101010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010307000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000001001010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +010101010101010101010b0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000b010101010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010102000000000000000403010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004010101010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010107000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000007010101010101010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +0101010101010101010101010101010101010101010101010101010101010101010101010b0000000000000000000f0101010101010101010101010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e01 +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000101010101010101010101010101010101010101010c0000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a010101010101010101010101010101010101010101010101010101010101010101010101010101010d000000000000000000000002 +01010101010101010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000001001010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010e00000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000030101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010101010101 +010106070000000000000000000000000b0101010101010101010101010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +0101010101010101010101010a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000020101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +0101010101010101010101010101010101030700000000000000000000000000000d010101010101010101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000301010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000010101010101010101010101010101010101010101010b0000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000e01010101010101010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010101010101010304000000000000000000000000000000070601010101010101010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000b +01010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010107000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a01010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a010101010101010101010101010101010101010101010101010101010101010101010101010f000000000000000000000000000000 +00000703010101010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010b00000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000c010101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101090000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000801010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010101010102 +00000000000000000000000000000000000000040301010101010101010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101050000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000040101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +010101010101010101010101010e0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000501010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +01010101010101010101010101010b0000000000000000000000000000000000000000000f010101010101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101100000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101010101010101 +01010101010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000101010101010101010101010101010101010101010101010f000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000040a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a04000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000b010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010101010d00000000000000000000000000000000000000000000000201010101010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010e00000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +08010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010d0000000000000000000000000000000000 +00000000000000000000000000000000000000000000000010010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000000007010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010607000000000000000000000000000000000000 +0000000000000b010101010101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101040000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010109000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010105 +00000000000000000000000000000000000000000000000000000000000000000000000000000000050101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101050000000000000000000000000000000000000000000000000000000000000000000000000000 +00000009010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101010e07000000 +00000000000000000000000000000000000000000000000b01010101010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101011000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000f01010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000000006010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010d0000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000030101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +01010101010101010101030400000000000000000000000000000000000000000000000000000007060101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010e00000000000000000000000000000000000000000000000000000000000000000000000000000000000007010101010101010101010101010101 +01010101010101010101010101010101100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000010101010101010101010101010101010101010101010101010e0000000000000000000000000000000000000000000000000000000000000000 +00000000000000000002010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010a00000000000000 +00000000000000000000000000000000000000000000000000000000000000000000020101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000070e010101010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101070000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00030101010101010101010101010101010101010101010101010101010101010102000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000001010101010101010101010101010101010101010101010101010f000000000000000000000000000000 +00000000000000000000000000000000000000000000000000070301010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010e0000000000000000000000000000000000000000000000000000000000000000000000000000000000000e01010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010101010101010101010101010200000000000000000000000000000000000000000000 +00000000000000000004030101010101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010f00000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000801010101010101010101010101010101010101010101010101010101010101010200000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +0d00000000000000000000000000000000000000000000000000000000000000000000000000000000000b010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010c00000000000000000000000000000000000000000000000000000000000000000000000000000000 +000a0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101011000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010109000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000d010101010101010101010101010101010101010101010101010101010101 +0101010c000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +010101010101010101010101010101010105000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010107000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000801010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +010101010101010b0000000000000000000000000000000000000000000000000000000000000000000000020101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +01010101010101010101010101010101010101010b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000e0101010101010101010101010101010101010101010101010101010101010101010101010101010101010b000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000501010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101010101060700000000000000000000000000000000000000000000000000000000000000000000000010010101 +01010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101030000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000c01010101010101010101010101010101010101010101010101010101010101010101010b0000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101010e0000000000000000000000000000 +00000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010400000000000000000000000000000000000000000000000000000000000000000000000000000000000b010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a01010101010101010101010101010101010101010101010101010e0700000000000000000000000000000000000000000000000000 +000000000000000000000000000b0101010101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010104000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101010b000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010f00000000000000000000000000000000000000000000000000000000000000000000000000000000050101010101010101010101010101010101010101 +01010101010101010101010101010101010101010106000000000000000000000000000000000000000000000000000000000000000000000000000000000007 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101030400000000000000000000 +00000000000000000000000000000000000000000000000000000000000706010101010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101020000000000000000 +00000000000000000000000000000000000000000000000000000000000000000004010101010101010101010101010101010101010101010101010101010101 +010101010101010b0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +0101010101010101010101010101010101010d000000000000000000000000000000000000000000000000000000000000000000000000000000000008010101 +01010101010101010101010101010101010101010101010101010101010101010101010101020000000000000000000000000000000000000000000000000000 +00000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000000070e01010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101100000000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101 +0101010101010101010101010101010101010101010101010b000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010500000000000000000000000000000000000000000000000000000000 +00000000000000000000000002010101010101010101010101010101010101010101010101010101010101010101010101010101030000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000030101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010101010101020000000000000000000000000000000000000000000000000000000000000000000000000000000000000004 +03010101010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000101010101010101010101010101010101010101010101010101010101010101010101010101010b00000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010109000000000000000000000000 +00000000000000000000000000000000000000000000000000000000070301010101010101010101010101010101010101010101010101010101010101010101 +01010101100000000000000000000000000000000000000000000000000000000000000000000000000000000000020101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010101010101100000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000a0101010101010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000e0101010101010101010101010101010101010101010101010101010101010101010101010101010b0000000000 +00000000000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +0101010300000000000000000000000000000000000000000000000000000000000000000000000000000000000b010101010101010101010101010101010101 +01010101010101010101010101010101010101010500000000000000000000000000000000000000000000000000000000000000000000000000000000000e01 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101010b0000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000002010101010101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000080101010101010101010101010101010101010101010101010101010101 +01010101010101010101010b00000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +010101010101010101010101010101010101010102000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101 +01010101010101010101010101010101010101010101010101010101010101010101010d00000000000000000000000000000000000000000000000000000000 +000000000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101 +06070000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001001010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010000000000000000000000000000000000000000000000000000000000000000000000000000000000000801010101010101010101010101 +010101010101010101010101010101010101010101010101010101010b0700000000000000000000000000000000000000000000000000000000000000000000 +00000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000e0101010101010101010101010101010101010101010101010101010101010101010101010f000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000801010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010101010106070000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000b010101010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000801010101010101010101010101010101010101010101010101010101010101010101010101010101010106070000000000000000000000000000000000 +00000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010500000000000000000000 +00000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101010101 +01010e00000000000000000000000000000000000000000000000000000000000000000000000000000000000501010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010101010101010103040000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000601010101010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101070000000000000000000000000000000000000000000000 +00000000000000000000000000000000000008010101010101010101010101010101010101010101010101010101010101010101010101010101010101010607 +00000000000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101011000000000000000000000000000000000000000000000000000000000000000000000000000000000050101010101010101010101010101010101 +0101010101010101010101010101010101010c00000000000000000000000000000000000000000000000000000000000000000000000000000000000b010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101040000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000007060101010101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010a00000000000000 +00000000000000000000000000000000000000000000000000000000000000000000080101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010106070000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +0101010101010101010101010101010101010101010300000000000000000000000000000000000000000000000000000000000000000000000000000000000d +01010101010101010101010101010101010101010101010101010101010101010101070000000000000000000000000000000000000000000000000000000000 +00000000000000000000000701010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101020000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000403010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010a00000000000000000000000000000000000000000000000000000000000000000000000000000000000d01010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010607000000000000000000000000000000000000000000000000000000000000 +00000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010200000000000000000000000000000000000000000000000000 +000000000000000000000000000000020101010101010101010101010101010101010101010101010101010101010101010b0000000000000000000000000000 +00000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000004010101010101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010101010101010101010101010101010a000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101060700000000000000000000000000 +00000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010108000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000301010101010101010101010101010101010101010101010101010101010101 +010a0000000000000000000000000000000000000000000000000000000000000000000000000000000000030101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a010101010101010101010101010101010b000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000201010101010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010a0000000000000000000000000000000000000000000000 +00000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010607000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +0101010101010105000000000000000000000000000000000000000000000000000000000000000000000000000000000b010101010101010101010101010101 +01010101010101010101010101010101060000000000000000000000000000000000000000000000000000000000000000000000000000000000020101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010101010106000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010a00000000000000 +000000000000000000000000000000000000000000000000000000000000000000000d0101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101060700000000000000000000000000000000000000000000000000000000000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010101010101010101011000000000000000000000000000000000000000000000000000000000000000000000000000000000 +04010101010101010101010101010101010101010101010101010101010101010200000000000000000000000000000000000000000000000000000000000000 +000000000000000000000e0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010106070000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000b010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010a00000000000000000000000000000000000000000000000000000000000000000000000000000000000801010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010106070000000000000000000000000000000000000000000000000000 +00000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010103000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000e01010101010101010101010101010101010101010101010101010101010307000000000000000000000000000000 +000000000000000000000000000000000000000000000000000a0101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010101010304000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000060101010101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010105000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000801010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010605000000000000000000 +00000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010200000000000000 +0000000000000000000000000000000000000000000000000000000000000000000c010101010101010101010101010101010101010101010101010101010110 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000801010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010101010104000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000706010101010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000008010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010103040000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000050101010101010101010101010101 +01010101010101010101010101010105000000000000000000000000000000000000000000000000000000000000000000000000000000000501010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010101010102000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000040301010101010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000080101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010304000000000000000000000000000000000000000000000000000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010101010101010101010101050000000000000000000000000000000000000000000000000000000000000000000000000000 +00000d01010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000b01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010109000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010000000000000000000000000000000000000000000000000000000000000000000000000000000000000801010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101030400000000000000000000000000000000000000000000 +00000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010101011000000000000000000000000000000000000000000000 +000000000000000000000000000000000000020101010101010101010101010101010101010101010101010101010f0000000000000000000000000000000000 +00000000000000000000000000000000000000000000000701010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01010101010b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000020101010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010600000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000301010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010103040000000000 +00000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010103070000000000 +000000000000000000000000000000000000000000000000000000000000000000000003010101010101010101010101010101010101010101010101010e0000 +00000000000000000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000c010101010101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101030400000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +0101010101010101010102000000000000000000000000000000000000000000000000000000000000000000000000000000000b010101010101010101010101 +01010101010101010101010101090000000000000000000000000000000000000000000000000000000000000000000000000000000000030101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010101010607000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000b01010101010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010103040000000000000000000000000000000000000000000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010101010101010101010101010600000000000000000000000000000000000000000000000000000000000000000000000000 +00000004010101010101010101010101010101010101010101010101010700000000000000000000000000000000000000000000000000000000000000000000 +00000000000002010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010305000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +08010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010b00000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010304000000000000000000000000000000000000 +00000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010101010101040000000000000000000000000000000000000000 +00000000000000000000000000000000000000000601010101010101010101010101010101010101010101010b00000000000000000000000000000000000000 +000000000000000000000000000000000000000000000e0101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101 +01030400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000070601010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010c00000000000000000000000000000000000000000000000000000000000000000000000000000000 +000f0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101030400 +00000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010b00000000 +0000000000000000000000000000000000000000000000000000000000000000000000000c01010101010101010101010101010101010101010101010a000000 +000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101010101010200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000005030101010101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010101010101030a000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101010101010101010307000000000000000000000000000000000000000000000000000000000000000000000000000000070101010101010101010101 +01010101010101010101010e00000000000000000000000000000000000000000000000000000000000000000000000000000000000801010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01010101010c00000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000403010101010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101070000000000000000 +00000000000000000000000000000000000000000000000000000000000000000010010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010200000000000000000000000000000000000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010101010101010101010101010101020000000000000000000000000000000000000000000000000000000000000000000000 +00000000000d01010101010101010101010101010101010101010102000000000000000000000000000000000000000000000000000000000000000000000000 +00000000050101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010b00000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000002010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010e000000000000000000000000000000000000000000000000000000000000000000000000000000000000080101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101020000000000000000000000000000 +00000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010101010101010600000000000000000000000000000000000000 +0000000000000000000000000000000000000000000f010101010101010101010101010101010101010103070000000000000000000000000000000000000000 +00000000000000000000000000000000000000000b01010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010800 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000c01010109000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010d0000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000e0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010200000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101040000 +000000000000000000000000000000000000000000000000000000000000000000000000000003010101010101010101010101010101010101010b0000000000 +00000000000000000000000000000000000000000000000000000000000000000000000701010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000a0101060700000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000000b0101090000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010200000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101020000000000000000000000000000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +010101010101010101010101010b0000000000000000000000000000000000000000000000000000000000000000000000000000000010010101010101010101 +01010101010101010101050000000000000000000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000a01030500000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008010900000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010105000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010102000000000000000000000000000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010101010101010101010101010101010307000000000000000000000000000000000000000000000000000000000000000000 +00000000000004010101010101010101010101010101010101080000000000000000000000000000000000000000000000000000000000000000000000000000 +00000003010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000a030400000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000007060900000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010e0000000000000000000000000000000000000000000000000000000000000000000000000000000000000c010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010200000000000000000000 +00000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101020000000000000000000000000000000000 +00000000000000000000000000000000000000000000000601010101010101010101010101010101010200000000000000000000000000000000000000000000 +00000000000000000000000000000000000002010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0200000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000050c000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010110000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0d010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010102000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010600 +0000000000000000000000000000000000000000000000000000000000000000000000000000000c010101010101010101010101010101010e00000000000000 +000000000000000000000000000000000000000000000000000000000000000000000e0101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000030101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010200000000000000000000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101010101010101010101010104000000000000000000000000000000000000000000000000000000000000000000000000000000070101010101010101 +010101010101010109000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000007000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010e0000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000050101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010c0000000000000000000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +010101010101010101010101010101010101010101010101010101010101010b0000000000000000000000000000000000000000000000000000000000000000 +00000000000000000d01010101010101010101010101010105000000000000000000000000000000000000000000000000000000000000000000000000000000 +00080101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000004030200000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010c0000000000000000000000000000000000000000000000000000000000000000000000000000000000000201010101010101010101010101010101 +0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b000000000000 +00000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010307000000000000000000000000000000 +0000000000000000000000000000000000000000000000000f01010101010101010101010101010b000000000000000000000000000000000000000000000000 +00000000000000000000000000000000050101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000000000070301010f0000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010101010101010101010101050000000000000000000000000000000000000000000000000000000000000000000000000000000000000b +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010b0000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +02000000000000000000000000000000000000000000000000000000000000000000000000000000000e010101010101010101010101010a0000000000000000 +00000000000000000000000000000000000000000000000000000000000000000b01010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007060101010304000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010d00000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000e010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010101010101010101010101010101010101010b000000000000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010e0000000000000000000000000000000000000000000000000000000000000000000000000000000010010101010101 +0101010101010e000000000000000000000000000000000000000000000000000000000000000000000000000000000701010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000d010101010103070000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010a000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000004010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b00000000000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +0101010101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000000000000000000000000000000000 +000000000000000000050101010101010101010101010c0000000000000000000000000000000000000000000000000000000000000000000000000000000009 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000b010101010101010607000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010800000000000000000000000000000000000000000000000000000000000000000000000000000000000000100101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b0000 +00000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b0000000000000000000000000000 +00000000000000000000000000000000000000000000000000000601010101010101010101030700000000000000000000000000000000000000000000000000 +00000000000000000000000000000003010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000c010101010101010101080000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010101010101010101010a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e01 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010b00000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010700000000000000000000000000000000000000000000000000000000000000000000000000000002010101010101010101010b00000000000000000000 +00000000000000000000000000000000000000000000000000000000000002010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000201010101010101010101010b +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000040101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010b0000000009010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +0101010101010101010101010101010101010c000000000000000000000000000000000000000000000000000000000000000000000000000000070101010101 +010101010104000000000000000000000000000000000000000000000000000000000000000000000000000000000e0101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000004030101010101010101010101010c0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101050000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000001001010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b000000090101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +010101010101010101010101010101010101010101010101010101010101010101010e0000000000000000000000000000000000000000000000000000000000 +00000000000000000000000b010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000000000a0101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000403010101010101010101010101010102000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0900000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010b0700090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010a000000000000000000000000 +0000000000000000000000000000000000000000000000000000000f010101010101010102000000000000000000000000000000000000000000000000000000 +00000000000000000000000000080101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000706010101010101010101010101010101010400000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010106000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010106070901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010b000000000000000000000000000000000000000000000000000000000000000000000000000000000e01010101010103000000000000000000000000 +00000000000000000000000000000000000000000000000000000000050101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000006010101010101010101010101010101 +01010304000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000b010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010610010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010700000000000000000000000000000000000000000000000000000000000000000000000000000010010101 +01010109000000000000000000000000000000000000000000000000000000000000000000000000000000000b01010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000b +01010101010101010101010101010101010101060700000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010c0000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000007030101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +0101010101010101010101010101010101010101010101010101010101010101010101010c000000000000000000000000000000000000000000000000000000 +00000000000000000000000005010101010101050000000000000000000000000000000000000000000000000000000000000000000000000000000701010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000001001010101010101010101010101010101010101010106070000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b0000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010e0000000000000000000000 +000000000000000000000000000000000000000000000000000000000006010101010d0000000000000000000000000000000000000000000000000000000000 +00000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000201010101010101010101010101010101010101010101010b00000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010106070000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000301010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010a00000000000000000000000000000000000000000000000000000000000000000000000000000002010101010a00000000000000000000000000 +00000000000000000000000000000000000000000000000000000003010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101 +01010101010110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010101010101010101010101010101010101010e070000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000020101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +0101010101010101010101010101010101010101010b000000000000000000000000000000000000000000000000000000000000000000000000000000070301 +010e0000000000000000000000000000000000000000000000000000000000000000000000000000000002010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000403010101 +01010101010101010101010101010101010101010101010200000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010106070000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000e01010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010700000000000000000000000000000000000000000000000000 +00000000000000000000000000000b01010c000000000000000000000000000000000000000000000000000000000000000000000000000000000e0101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000070e010101010101010101010101010101010101010101010101010101010a0000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010106070000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000a01010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010c000000000000000000 +0000000000000000000000000000000000000000000000000000000000000a010107000000000000000000000000000000000000000000000000000000000000 +0000000000000000000a010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000070601010101010101010101010101010101010101010101010101010101010304000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010607000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000006010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010e000000000000000000000000000000000000000000000000000000000000000000000000000000000e0b000000000000000000000000000000 +00000000000000000000000000000000000000000000000000080101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000d01010101010101010101010101010101010101010101 +01010101010101010103070000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010d07000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000201010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010b00000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010f00000000000000000000000000000000000000000000000000000000000000000000000000000009 +04000000000000000000000000000000000000000000000000000000000000000000000000000000050101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000b01010101010101 +01010101010101010101010101010101010101010101010101010607000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010c00000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000703010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b00000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +0101010101010101010101010101010101010101010101010101010101010101010101010101010d000000000000000000000000000000000000000000000000 +00000000000000000000000000000007000000000000000000000000000000000000000000000000000000000000000000000000000000000b01010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000201010101010101010101010101010101010101010101010101010101010101010101010d0000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010e0400000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010d0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010500000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000070101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000002010101010101010101010101010101010101010101010101010101010101010101010101010b000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010109070000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101060700000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000040301010101010101010101010101010101010101010101010101 +010101010101010101010101010c0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010805000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000008010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101060700000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +0101010101010101010101010101010101010101010101010e000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000003010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000050301010101010101010101 +01010101010101010101010101010101010101010101010101010101010102000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010e0f00000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000a010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101060700000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010f00000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000007060101010101010101010101010101010101010101010101010101010101010101010101010101010101030400000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080a0000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000007030101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101060700 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010d000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000e010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000060101010101010101010101010101010101010101010101010101010101010101010101010101010101010103 +04000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101030b05000000 +000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000d0101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010103050000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101050000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000a010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000b0101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101060700000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101030b0f000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000c010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010103040000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000080101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010106000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010108100f0000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000004010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010103040000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +0101010101010101010101010101010101010101010101010101010101010101010101010101010101010e000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000050101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +02010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b00000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101 +01010101010101010101010101010101010101010101010101010101010101080d0c0a0000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000070e0101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010103040000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010f00000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000b0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000004010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +000000000000000000080808080808080808080808080808080808080808080808080808080808090909090a0a00000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000b0101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010304000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010d00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000070101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000004030101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010200000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +02010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010102000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101050000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000070e0101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000002010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010102000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010900000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007060101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010304000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000002010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010102000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010e000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000201010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000b010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010e070000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004030101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000e010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000b010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010607000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004030101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010900000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010800000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000002010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010d0000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000403010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010b00000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101050000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000080101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0108000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f010101010101 +0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000403010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b00000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101011000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +05010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000403010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000a03010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010b0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000b0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000503010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010102000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000201010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010b0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101020000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000070101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000706010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101030400000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101060000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010800000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000901010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010103050000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000b0101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101060700000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +05000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000b01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101060700000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000005080101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101060700 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000201010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000901010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010106000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f030101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010106070000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010300000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000e010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000201010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b00000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00070d01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010101010101010101010101010101010101010e070000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101020000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000401010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +09000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000a0e01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010103040000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000080101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00040301010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010103040000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010105000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000005010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000070601010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101040000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f0e0101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010304000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101100000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000b0101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000070601010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010304000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a08010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010304000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010300000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000070101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000b01010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010e070000000000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000050b0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010102000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000901010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000000000001001 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010607000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000709010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010102000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101060000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000301010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000020101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010b0000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000051003010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010104000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000201010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000f0101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a0d0101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010200000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b0000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000e010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01080000000000000000000000000000000000000000000000000000000000000000000000000000000000040301010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101020000000000000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +07020e01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010200000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010103070000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010800000000000000000000000000000000000000000000000000000000000000000000000000000000070301010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010101010101010101010101010101010101010f000000000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000a10030101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b00000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010102000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000008010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +00000000000000000007060101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101030400000000000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000f0b0301010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010b0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101060000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000005010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000000000000080101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010103050000000000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000007020d03010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010b0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010104000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000000000000b0101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010800000000000000000000000000000000000000000000000000000000000000000000000000000b0101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101060700000000000000000000000000000000000000000000000000000000000000000000000000090101010101010101010101010101 +010101010101010101010101010101010a0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000050f0908010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010b0000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +0101010101010b000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000070101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000000c0101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000000000000000 +00090101010101010101010101010101010101010101010101010101010101010a00000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a +090d0e01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010d0000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101030700000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000090101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010108000000000000000000000000000000000000000000000000000000000000 +00000000000002010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010b00000000000000000000000000000000000000 +0000000000000000000000000000000000090101010101010101010101010101010101010101010101010101010101010a000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000a0a090908060101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101060700 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010102000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000301010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101080000000000000000000000000000 +00000000000000000000000000000000000000000004010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101090000 +00000000000000000000000000000000000000000000000000000000000000000009010101010101010101010101010101010101010101010101010101010101 +0a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000040a0a0a0a0a02090909090808010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010106070000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010106070000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010106070000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010103050000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010304000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010304000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000001010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010304000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010304000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101 +01010101010101010101010101010101010101010200000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000040000002701ffff030000000000}}}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid13703289\charrsid10049382 +\par }}{\footerl \ltrpar \pard\plain \ltrpar\s15\ql \li0\ri0\widctlpar\tqc\tx4320\tqr\tx8640\pvpara\phmrg\posxc\posy0\wraparound\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 +\fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\field{\*\fldinst {\rtlch\fcs1 \af0 \ltrch\fcs0 \cs16\insrsid13820 PAGE }}{\fldrslt {\rtlch\fcs1 \af0 \ltrch\fcs0 \cs16\lang1024\langfe1024\noproof\insrsid13820 1}}}\sectd \ltrsect +\linex0\endnhere\sectdefaultcl\sftnbj {\rtlch\fcs1 \af0 \ltrch\fcs0 \cs16\insrsid13820 +\par }\pard \ltrpar\s15\ql \li0\ri0\widctlpar\tqc\tx4320\tqr\tx8640\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 {\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid13820 +\par }}{\footerr \ltrpar \pard\plain \ltrpar\s15\ql \li0\ri0\widctlpar\tqc\tx4320\tqr\tx8640\pvpara\phmrg\posxc\posy0\wraparound\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 +\fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\field{\*\fldinst {\rtlch\fcs1 \af1\afs16 \ltrch\fcs0 \cs16\f40\fs16\insrsid13820\charrsid11077217 PAGE }}{\fldrslt {\rtlch\fcs1 \af1\afs16 \ltrch\fcs0 +\cs16\f40\fs16\lang1024\langfe1024\noproof\insrsid8221668 2}}}\sectd \ltrsect\linex0\endnhere\sectdefaultcl\sftnbj {\rtlch\fcs1 \af1\afs16 \ltrch\fcs0 \cs16\f40\fs16\insrsid13820\charrsid11077217 +\par }\pard \ltrpar\s15\ql \li0\ri0\widctlpar\tqc\tx4320\tqr\tx8640\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid13713571 {\rtlch\fcs1 \ab\af1 \ltrch\fcs0 \b\f1\insrsid13820\charrsid6309293 +\par }}{\headerf \ltrpar \pard\plain \ltrpar\s17\ql \li0\ri0\widctlpar\tqc\tx4320\tqr\tx8640\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 +{\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid13786860 +\par }}{\footerf \ltrpar \pard\plain \ltrpar\s15\ql \li0\ri0\widctlpar\tqc\tx4320\tqr\tx8640\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \fs20\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 { +\rtlch\fcs1 \af0 \ltrch\fcs0 \insrsid13786860 +\par }}{\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}} +{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl8 +\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}\pard\plain \ltrpar\qc \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid12284823 +\rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \b\f40\cf1\insrsid14361568 ADVANCED MICRO DEVICES, INC. +\par INTERNAL EVALUATION LICENSE +\par }\pard \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid332559 {\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \b\f40\cf1\insrsid332559 +\par Software: AMD Beta Render Pipeline Shaders S}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \b\f40\cf1\insrsid14361568 DK}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \b\f40\cf1\insrsid332559 \line +\par }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \b\f40\insrsid12325322\charrsid12325322 BETA PROGRAM: }{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 \f40\insrsid12325322\charrsid14361568 +THIS IS A BETA PROGRAM AND THE SOFTWARE YOU ARE RECEIVING MAY NOT BE THE FINAL RELEASE. IT MAY NOT WORK THE WAY +THE FINAL VERSION WILL. CERTAIN FEATURES MAY BE MISSING OR DISABLED. ANY AND ALL USE OF THE SOFTWARE AND DOCUMENTATION IS AT YOUR SOLE RISK AND ADVANCED MICRO DEVICES, INC (\'93}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \b\f40\insrsid12325322\charrsid14361568 +AMD}{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 \f40\insrsid12325322\charrsid14361568 \'94) IS UNDER NO OBLIGATION TO PROVIDE TECHNICAL SUPPORT, UPDATES, BUG FIXES, OR +MAINTENANCE OF ANY KIND FOR THE SOFTWARE OR DOCUMENTATION. AMD MAKES NO REPRESENTATIONS THAT A FINAL VERSION OF THE SOFTWARE OR DOCUMENTATION WILL BE RELEASED OR REPLACE THE VERSION YOU HAVE RECEIVED AS PART OF THE BETA PROGRAM. AMD MAY CHANGE THE SOFTWAR +E, INCLUDING ITS FEATURES AND FUNCTIONALITIES, FOR THE FINAL RELEASE.}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \b\f40\insrsid12325322 +\par +\par }{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 \f40\insrsid12284823 THIS IS A LEGAL AGREEMENT (\'93}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \b\f40\insrsid12284823\charrsid7820227 AGREEMENT}{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 \f40\insrsid12284823 \'94 +) BETWEEN YOU AND }{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 \f40\insrsid14361568 AMD. }{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 \f40\insrsid12284823 YOUR USE OF THE AMD SOFTWARE IDENTIFIED }{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 \f40\insrsid14361568 ABOVE}{ +\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 \f40\insrsid12284823 AND RELATED DOCUMENTATION (AS DEFINED BELOW), ARE SUBJECT TO THE FOLLOWING TERMS AND CONDITIONS. }{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 \f40\insrsid12284823\charrsid7820227 +IF YOU ARE ENTERING INTO THIS AGREEMENT ON BEHALF OF A COMPANY OR OTHER LEGAL ENTITY, YOU REPRESENT THAT YOU HAVE THE LEGAL AUTHORITY TO BIND THE ENTITY TO THIS AGREEMENT, IN WHICH CASE \'93YOU\'94 WILL MEAN THE ENTITY YOU REPRESENT.}{\rtlch\fcs1 +\ab\af0\afs18 \ltrch\fcs0 \f40\insrsid12284823 +\par }\pard\plain \ltrpar\s34\ql \li0\ri0\sb100\sa100\sbauto1\saauto1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid12284823 \cbpat8 \rtlch\fcs1 \af0\afs24\alang1025 \ltrch\fcs0 +\fs24\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 \f40\fs18\insrsid12284823 DO NOT USE THIS }{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 \f40\fs18\insrsid14361568 SOFTWARE}{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 +\f40\fs18\insrsid12284823 OR ANY OF THE DOCUMENTATION UNTIL YOU HAVE CAREFULLY READ AND AGREED TO THE FOLLOWING TERMS AND CONDITIONS. }{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 \f40\fs18\insrsid12284823\charrsid7820227 }{\rtlch\fcs1 \ab\af0\afs18 +\ltrch\fcs0 \f40\fs18\insrsid12284823 IF YOU DO NOT AGREE TO THE TERMS OF THIS AGREEMENT, DO NOT INSTALL, COPY, OR USE THE }{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 \f40\fs18\insrsid14361568 SOFTWARE}{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 +\f40\fs18\insrsid12284823 OR ANY DOCUMENTATION. BY INSTALLING, COPYING}{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 \f40\fs18\insrsid14361568 ,}{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 \f40\fs18\insrsid12284823 OR USING THE }{\rtlch\fcs1 \ab\af0\afs18 +\ltrch\fcs0 \f40\fs18\insrsid14361568 SOFTWARE}{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 \f40\fs18\insrsid12284823 OR DOCUMENTATION, YOU ARE EXPRESSLY AGREEING TO ALL OF THE TERMS AND CONDITIONS OF THIS AGREEMENT. }{\rtlch\fcs1 \ab\af0\afs18 \ltrch\fcs0 +\f40\fs18\insrsid12284823\charrsid7820227 }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid4143008\charrsid1258595 +\par {\listtext\pard\plain\ltrpar \s33 \rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\f40\fs18\lang1033\langfe1033\langnp1033\insrsid3084267\charrsid14889927 \hich\af40\dbch\af0\loch\f40 1.\tab}}\pard\plain \ltrpar +\s33\qj \fi-720\li720\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\ls18\adjustright\rin0\lin720\itap0\pararsid14889927\contextualspace \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 { +\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\f40\insrsid3084267\charrsid14889927 DEFINITIONS}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\f40\insrsid14889927\charrsid14889927 . +\par }\pard \ltrpar\s33\qj \li1080\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin1080\itap0\pararsid14889927 {\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid14889927\charrsid1258595 }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 +\f40\insrsid3084267\charrsid1258595 +\par }\pard\plain \ltrpar\s30\qj \li0\ri0\nowidctlpar\tx720\wrapdefault\faauto\rin0\lin0\itap0\pararsid14889927 \rtlch\fcs1 \af40\afs24\alang1025 \ltrch\fcs0 \f40\fs24\cf1\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 +\b\fs18\cf0\insrsid6115309\charrsid14889927 1.1}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\fs18\insrsid16001025\charrsid1258595 }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\fs18\insrsid14489021\charrsid1258595 \tab }{\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 +\b\fs18\insrsid16001025\charrsid1258595 \'93Derivative Works\'94}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid16001025\charrsid1258595 means any work, revision, modification or adaptation}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid151197 + made to or}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid16001025\charrsid1258595 derived from}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid12284823 }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid16001025\charrsid1258595 the }{\rtlch\fcs1 +\af40\afs18 \ltrch\fcs0 \fs18\insrsid14361568 Software}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid16001025\charrsid1258595 or any work that incorporates the }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid14361568 Software}{\rtlch\fcs1 +\af40\afs18 \ltrch\fcs0 \fs18\insrsid16001025\charrsid1258595 in whole or in part. }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid4143008\charrsid1258595 +\par }\pard\plain \ltrpar\s33\qj \li720\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin720\itap0\pararsid12658752\contextualspace \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 { +\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid6115309\charrsid1258595 +\par }\pard\plain \ltrpar\qj \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid12658752 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af37\afs18 +\ltrch\fcs0 \b\f40\insrsid6115309\charrsid14889927 1.}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\f40\insrsid13270131\charrsid14889927 2}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid6115309\charrsid1258595 \tab }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 +\f40\insrsid4143008\charrsid1258595 \'93}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\f40\insrsid4143008\charrsid1258595 Documentation}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid4143008\charrsid1258595 \'94 + means documentation, associated, included, or provided in connection with the }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid14361568 Software}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid4143008\charrsid1258595 , or any portion thereof}{ +\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid151197 , including but not limited to information provided online, electronically, or as install scripts}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid4143008\charrsid1258595 .}{\rtlch\fcs1 \af37\afs18 +\ltrch\fcs0 \f40\insrsid6890305\charrsid1258595 +\par }\pard \ltrpar\qj \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid14889927 {\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid6115309\charrsid1258595 +\par }\pard \ltrpar\qj \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid12658752 {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid6115309\charrsid14889927 1.}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\b\f40\insrsid13270131\charrsid14889927 3}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid6115309\charrsid1258595 \tab }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid10106677\charrsid1258595 \'93Free Software License}{\rtlch\fcs1 \af1\afs18 +\ltrch\fcs0 \f40\insrsid10106677\charrsid1258595 \'94 means an open source or other license that requires, as a condition of use, }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid10106677\charrsid1258595 +modification or distribution, that any resulting software must be (a) disclosed or distributed in source}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid10106677\charrsid1258595 code form; (b) licensed for the purpose of making derivative works; or (c +) redistributable at no charge. }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid4143008\charrsid1258595 +\par }\pard \ltrpar\qj \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid14889927 {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid6115309\charrsid1258595 +\par }\pard \ltrpar\qj \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid12284823 {\rtlch\fcs1 \ab\af1\afs18 \ltrch\fcs0 \b\f40\insrsid12284823\charrsid12284823 1.4.}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\f40\insrsid12284823 \tab }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid4143008\charrsid12284823 \'93}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid4143008\charrsid12284823 Intellectual Property Rights}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\f40\insrsid4143008\charrsid12284823 \'94 means all copyrights, trademarks, trade secrets, patents, mask works, and all related, similar, or other intellectual property rights recognized in any jurisdiction worldwide, including all applications}{ +\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid14889927\charrsid12284823 and }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid4143008\charrsid12284823 registrations with respect thereto. }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid12284823 +\par }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid12284823\charrsid12284823 +\par }{\rtlch\fcs1 \ab\af1\afs18 \ltrch\fcs0 \b\f40\insrsid12284823\charrsid12284823 1.5}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid12284823\charrsid12284823 \tab }{\rtlch\fcs1 \ab\af37\afs18 \ltrch\fcs0 \b\f40\insrsid12284823\charrsid12284823 \'93}{ +\rtlch\fcs1 \ab\af37\afs18 \ltrch\fcs0 \b\f40\insrsid14361568 Software}{\rtlch\fcs1 \ab\af37\afs18 \ltrch\fcs0 \b\f40\insrsid12284823\charrsid12284823 \'94}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid12284823\charrsid12284823 means the }{ +\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid14361568 software identified above}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid12284823\charrsid12284823 , including (a) Documentation, (b) Sample Code, (c) tools and utilities, and (d) header files. } +{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid12284823\charrsid12284823 +\par }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \insrsid6115309\charrsid1258595 +\par }\pard \ltrpar\qj \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid12658752 {\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid14361568\charrsid1258595 +\par }\pard \ltrpar\qj \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid4143008 {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid6115309\charrsid14889927 1.}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid12284823 7} +{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid6115309\charrsid1258595 \tab }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid12284823\charrsid10049382 \'93Object Code\'94 }{\rtlch\fcs1 \af0 \ltrch\fcs0 \f40\insrsid4474123\charrsid4474123 +means machine readable computer programming code files, which is not in a human readable form.}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid4143008\charrsid1258595 +\par }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid6115309\charrsid1258595 +\par }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid6115309\charrsid14889927 1.}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid12284823 8}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid6115309\charrsid1258595 \tab }{\rtlch\fcs1 \af1\afs18 +\ltrch\fcs0 \b\f40\insrsid12284823\charrsid1258595 \'93Sample Code\'94}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid12284823\charrsid1258595 means the }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid2753227 header files and }{\rtlch\fcs1 \af1\afs18 +\ltrch\fcs0 \f40\insrsid12284823\charrsid1258595 Source Code identified within the SDK as sample code.}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid10049382 +\par +\par }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid15533868 1.}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid12284823 9\tab }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid12284823\charrsid1258595 \'93}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\b\f40\insrsid12284823\charrsid1258595 Source Code}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid12284823\charrsid1258595 \'94 + means computer programming code in the human readable form and related system level documentation, including all associated comments, symbols and any procedural code such as job control language.}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\b\f40\insrsid6115309\charrsid10049382 +\par }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid12986674\charrsid1258595 +\par }\pard \ltrpar\qj \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid6115309 {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid6115309\charrsid14889927 1.}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid12284823 10 +}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid6115309\charrsid1258595 \tab }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid4143008\charrsid1258595 }{\rtlch\fcs1 \ab\af1\afs18 \ltrch\fcs0 \b\f40\insrsid12284823\charrsid12284823 \'93Your}{\rtlch\fcs1 +\af1\afs18 \ltrch\fcs0 \b\f40\insrsid12284823\charrsid1258595 }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\insrsid14361568 Products}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid12284823\charrsid1258595 \'94 means any software developed or modified by } +{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid12284823 You}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid12284823\charrsid1258595 using the }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid14361568 Software}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\f40\insrsid12284823\charrsid1258595 , and which may include }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid12284823 any Sample Code}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid12284823\charrsid1258595 and/or }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\f40\insrsid12284823 D}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid12284823\charrsid1258595 erivative }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid12284823 W}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid12284823\charrsid1258595 orks.}{ +\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid6115309\charrsid1258595 +\par }\pard \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 {\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\f40\insrsid10049382 +\par }\pard \ltrpar\qj \li0\ri0\nowidctlpar\tx360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid12986674 {\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\f40\insrsid15944795\charrsid1258595 2}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 +\b\f40\insrsid3760314\charrsid1258595 .}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\f40\insrsid872988\charrsid1258595 \tab }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\f40\insrsid3084267\charrsid1258595 \tab LICENSE}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 +\f40\insrsid6115309\charrsid1258595 . Subject to the terms and conditions of this Agreement, AMD hereby grants }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid12284823 You}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid6115309\charrsid1258595 + a non-exclusive, royalty-free, revocable, non-transferable, non-assigna}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid872988\charrsid1258595 ble limited copyright license to:}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid12986674\charrsid1258595 + +\par }\pard \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 {\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\f40\insrsid10049382 +\par }\pard \ltrpar\qj \fi-360\li360\ri0\nowidctlpar\tx360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin360\itap0\pararsid14361568 {\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\f40\insrsid12352403\charrsid1258595 \tab }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 +\b\f40\insrsid12986674\charrsid1258595 a)\tab }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid13531971\charrsid1258595 install, use and copy the }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid14361568 Software}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 +\f40\insrsid12284823 internally}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid14361568 to evaluate and test the Software in a non-production environment}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid12284823 ; }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 +\f40\insrsid14361568\charrsid1258595 and}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid12986674\charrsid1258595 +\par }\pard \ltrpar\qj \li0\ri0\nowidctlpar\tx360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid16198517 {\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \b\f40\insrsid12352403\charrsid1258595 \tab }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 +\b\f40\insrsid12986674\charrsid1258595 b)}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid12986674\charrsid1258595 \tab }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid13531971\charrsid1258595 create Derivative Works}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 +\f40\insrsid9137639\charrsid1258595 }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid3877651 solely for}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid797272 internal use in furtherance of}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid3877651 + the Licensed }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid8283643 Purpose; }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid13531971\charrsid1258595 +\par }\pard\plain \ltrpar\s30\qj \li0\ri0\widctlpar\wrapdefault\faauto\rin0\lin0\itap0\pararsid15761125 \rtlch\fcs1 \af40\afs24\alang1025 \ltrch\fcs0 \f40\fs24\cf1\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 +\fs18\insrsid4785366\charrsid1258595 +\par }\pard \ltrpar\s30\qj \li0\ri0\widctlpar\wrapdefault\faauto\rin0\lin0\itap0\pararsid4785366 {\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 \b\fs18\insrsid14361568 3}{\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 \b\fs18\insrsid4785366\charrsid1258595 .\tab +RESTRICTIONS. }{\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 \fs18\insrsid4785366\charrsid1258595 Except for the limited }{\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 \fs18\insrsid788444 copyright }{\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 +\fs18\insrsid4785366\charrsid1258595 license expressly granted in Section 2 herein, }{\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 \fs18\insrsid3877651 You have}{\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 \fs18\insrsid4785366\charrsid1258595 no other}{\rtlch\fcs1 +\ab\af40\afs18 \ltrch\fcs0 \fs18\insrsid1208049\charrsid1258595 rights in the }{\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 \fs18\insrsid14361568 Software}{\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 \fs18\insrsid4785366\charrsid1258595 +, whether express, implied, arising by estoppel or otherwise. Further }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid4785366\charrsid1258595 restrictions }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid3877651 regarding Your}{\rtlch\fcs1 +\af40\afs18 \ltrch\fcs0 \fs18\insrsid1208049\charrsid1258595 use of the }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid14361568 Software}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid4785366\charrsid1258595 + are set forth below. Except as expressly authorized herein, }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid3877651 You}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid4785366\charrsid1258595 may not: }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 +\fs18\insrsid12352403\charrsid1258595 \tab +\par +\par {\listtext\pard\plain\ltrpar \s33 \rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \b\f40\fs18\lang1033\langfe1033\langnp1033\insrsid4785366\charrsid1258595 \hich\af40\dbch\af0\loch\f40 a)\tab}}\pard\plain \ltrpar\s33\qj \fi-360\li720\ri0\nowidctlpar +\tx360\wrapdefault\aspalpha\aspnum\faauto\ls16\adjustright\rin0\lin720\itap0\pararsid12352403\contextualspace \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 +\f40\insrsid4785366\charrsid1258595 modify or create derivativ}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid1208049\charrsid1258595 e works of the }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid14361568 Software}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 +\f40\insrsid4785366\charrsid1258595 ;}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid12352403\charrsid1258595 +\par {\listtext\pard\plain\ltrpar \s30 \rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \b\f40\fs18\cf1\lang1033\langfe1033\langnp1033\insrsid4785366\charrsid1258595 \hich\af40\dbch\af0\loch\f40 b)\tab}}\pard\plain \ltrpar +\s30\qj \fi-360\li720\ri0\widctlpar\wrapdefault\faauto\ls16\rin0\lin720\itap0\pararsid12352403 \rtlch\fcs1 \af40\afs24\alang1025 \ltrch\fcs0 \f40\fs24\cf1\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 +\fs18\insrsid4785366\charrsid1258595 distribute, publish, display, sublicense, assign or otherwise transfer}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid1208049\charrsid1258595 the }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid14361568 Software +}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid4785366\charrsid1258595 ;}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid12352403\charrsid1258595 +\par {\listtext\pard\plain\ltrpar \s30 \rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \b\f40\fs18\cf1\lang1033\langfe1033\langnp1033\insrsid4785366\charrsid1258595 \hich\af40\dbch\af0\loch\f40 c)\tab}}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 +\fs18\insrsid4785366\charrsid1258595 decompile, reverse engineer, disassemble or otherwise reduce the }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid14361568 Software}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid4785366\charrsid1258595 + in Object Code to a human-perceivable form (except as }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid151197 expressly }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid4785366\charrsid1258595 allowed by applicable law}{\rtlch\fcs1 \af40\afs18 +\ltrch\fcs0 \fs18\insrsid151197 but then only to the limited extent of such law}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid4785366\charrsid1258595 ); +\par {\listtext\pard\plain\ltrpar \s30 \rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \b\f40\fs18\cf1\lang1033\langfe1033\langnp1033\insrsid4785366\charrsid1258595 \hich\af40\dbch\af0\loch\f40 d)\tab}alter or remove any copyright, trademark or patent no}{\rtlch\fcs1 +\af40\afs18 \ltrch\fcs0 \fs18\insrsid1208049\charrsid1258595 tice(s) in the }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid14361568 Software}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid8812944\charrsid1258595 ;}{\rtlch\fcs1 \af40\afs18 +\ltrch\fcs0 \fs18\insrsid4785366\charrsid1258595 +\par {\listtext\pard\plain\ltrpar \s30 \rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \b\f40\fs18\cf1\lang1033\langfe1033\langnp1033\insrsid4785366\charrsid1258595 \hich\af40\dbch\af0\loch\f40 e)\tab}use the }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid14361568 +Software}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid4785366\charrsid1258595 to: (i) develop inventions directly derived from Confidential Information to seek patent protection; (ii) assist in the analysis of }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 +\fs18\insrsid4474123 Your}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid4785366\charrsid1258595 patents and patent applications; or (iii) modify }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid4474123 Your}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 +\fs18\insrsid4785366\charrsid1258595 existing patents or patent applicat}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid8812944\charrsid1258595 ions; }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid1208049\charrsid1258595 +\par {\listtext\pard\plain\ltrpar \s30 \rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\fs18\cf1\lang1033\langfe1033\langnp1033\insrsid1208049\charrsid1258595 \hich\af40\dbch\af0\loch\f40 f)\tab}}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\fs18\insrsid1208049\charrsid1258595 use the }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid14361568 Software}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid1208049\charrsid1258595 in way that requires that }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\fs18\insrsid11233100 the }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid14361568 Software}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid11233100 or any portion thereof}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid1208049\charrsid1258595 + be licensed under a Free Software License}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid14361568 ; and/}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid14361568\charrsid1258595 or}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid1208049 +\par {\listtext\pard\plain\ltrpar \s30 \rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 \b\f40\fs18\cf1\lang1033\langfe1033\langnp1033\insrsid14361568\charrsid4141791 \hich\af40\dbch\af0\loch\f40 g)\tab}}{\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 +\fs18\insrsid14361568\charrsid4141791 use the Software in high-risk contexts, including but not limited to in surgical implants, applications intended to support or sustain life, weapons, + nuclear applications, or in other contexts where failure of the Licensed Software could directly cause personal injury, death, or severe property or environmental damage.}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid14361568\charrsid1258595 +\par }\pard \ltrpar\s30\qj \li0\ri0\widctlpar\wrapdefault\faauto\rin0\lin0\itap0\pararsid4266728 {\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 \b\fs18\insrsid12352403\charrsid1258595 \tab }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid4656132\charrsid1258595 + +\par }\pard \ltrpar\s30\qj \li0\ri0\widctlpar\wrapdefault\faauto\rin0\lin0\itap0\pararsid1455539 {\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\fs18\expnd0\expndtw-3\insrsid4266728 4}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 +\fs18\expnd0\expndtw-3\insrsid1455539\charrsid1258595 .\tab }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\fs18\expnd0\expndtw-3\insrsid1455539\charrsid1258595 OWNERSHIP}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\fs18\expnd0\expndtw-3\insrsid4266728 +; THIRD PARTY MATERIALS}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\expnd0\expndtw-3\insrsid1455539\charrsid1258595 . }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\expnd0\expndtw-3\insrsid10765994\charrsid10765994 +The Software, including all Intellectual Property Rights therein, is and remains the sole and exclusive property of AMD or its licensors, and You shall have no right, title or interest therein except as expressly set forth in this Agreement. }{ +\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\expnd0\expndtw-3\insrsid10765994\charrsid13775719 Except as expressly provided in }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\expnd0\expndtw-3\insrsid10765994 Section 2}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 +\fs18\expnd0\expndtw-3\insrsid10765994\charrsid13775719 , AMD does not grant, by implication, estoppels or otherwise under any patents, trademarks, copyrights, mask works, trade secret information, intellectual property, license or similar material. }{ +\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\expnd0\expndtw-3\insrsid10765994\charrsid10765994 For the avoidance of doubt, nothin +g in this Agreement shall require AMD to generally release the Licensed Software. The Software may include third party technologies for which You must obtain licenses from parties other than AMD (collectively, \'93}{\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 +\b\fs18\expnd0\expndtw-3\insrsid10765994\charrsid4266728 Third Party Materials}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\expnd0\expndtw-3\insrsid10765994\charrsid10765994 \'94). These Third Party Ma +terials are not licensed as part of the Software licensed under this Agreement and are used at Your sole discretion. You are solely responsible for obtaining all applicable Intellectual Property Rights to use the Third Party Materials. In addition, the So +ftware may include code or content subject to an open source or similar license (\'93}{\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 \b\fs18\expnd0\expndtw-3\insrsid10765994\charrsid4266728 Open Source Component}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 +\fs18\expnd0\expndtw-3\insrsid10765994\charrsid10765994 \'94). To the extent that an Open Source Component is subject to a license that is inconsistent with this Agreement, then such Open Source Component shall be + governed solely by the applicable license.}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\expnd0\expndtw-3\insrsid1455539\charrsid1258595 +\par }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\ul\expnd0\expndtw-3\insrsid1455539\charrsid1258595 +\par }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\fs18\insrsid4266728 5}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\fs18\insrsid1455539\charrsid1258595 .\tab FEEDBACK. }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid3877651 You have}{\rtlch\fcs1 \af37\afs18 +\ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 no obligation to give AMD any suggestions, comments or other feedback (\'93}{\rtlch\fcs1 \ab\af37\afs18 \ltrch\fcs0 \b\fs18\insrsid1455539\charrsid4266728 Feedback}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 +\fs18\insrsid1455539\charrsid1258595 \'94) relating to the }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid14361568 Software}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 +. However, AMD may use and include any Feedback that it receives from }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid3877651 You}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 to improve the }{\rtlch\fcs1 \af37\afs18 +\ltrch\fcs0 \fs18\insrsid14361568 Software}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 or other AMD products, software and technologies. Accordingly, for any Feedback }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 +\fs18\insrsid3877651 You}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 provides to AMD, }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid3877651 You grant}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 +\fs18\insrsid1455539\charrsid1258595 AMD and its affiliates and subsidiaries a worldwide, non-exclusive, irrevocable, royalty-free, fully paid up, perpetual + license to, directly or indirectly, use, reproduce, license, sublicense, distribute, make, have made, sell and otherwise commercialize the Feedback in the }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid14361568 Software}{\rtlch\fcs1 \af37\afs18 +\ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 or other AMD products}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid10765994 or}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 technologies. }{\rtlch\fcs1 \af37\afs18 +\ltrch\fcs0 \fs18\insrsid3877651 You}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 further agree}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid3877651 }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 +\fs18\insrsid1455539\charrsid1258595 not to provide any Feedback that (a) }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid3877651 You know}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 + is subject to any Intellectual Property Rights of any third party or (b) is subject to license terms which seek to require any products incorporating or derived from such Feedback, or other AMD Intellectual Property, to be licensed to or otherw +ise shared with any third party.}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\fs18\insrsid1455539\charrsid1258595 +\par +\par }\pard\plain \ltrpar\qj \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid1455539 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af37\afs18 +\ltrch\fcs0 \b\f40\insrsid4266728 6}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\f40\insrsid1455539\charrsid1258595 .\tab SUPPORT AND UPDATES. }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 +AMD is under no obligation to provide any kind of support under this Agreement. }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 AMD may, in its sole discretion, provide to }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid3877651 +You}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 updates to the }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid14361568 Software}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 +, and such updates will be covered as }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid14361568 Software}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 under this Agreement.\~ +\par }\pard\plain \ltrpar\s30\qj \li0\ri0\widctlpar\wrapdefault\faauto\rin0\lin0\itap0\pararsid1455539 \rtlch\fcs1 \af40\afs24\alang1025 \ltrch\fcs0 \f40\fs24\cf1\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\fs18\cf0\insrsid1455539\charrsid1258595 +\par }\pard\plain \ltrpar\s1\ql \li0\ri0\sa120\keep\keepn\widctlpar\tx360\wrapdefault\aspalpha\aspnum\faauto\outlinelevel0\adjustright\rin0\lin0\itap0\pararsid1455539 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 +\b\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid4266728 7}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 .\tab }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid5636316 +\tab }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 WARRANTY DISCLAIMER, LIMITATION OF LIABILITY AND INDEMNIFICATION}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b0\f40\insrsid1455539\charrsid1258595 .}{\rtlch\fcs1 \af37\afs18 +\ltrch\fcs0 \b0\f40\expnd0\expndtw-3\cf1\insrsid1455539\charrsid1258595 +\par }\pard\plain \ltrpar\qj \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid7872888 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af37\afs18 +\ltrch\fcs0 \b\f40\insrsid4266728 7}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\f40\insrsid1455539\charrsid1258595 .1\tab Disclaimer OF Warranty.}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 +\f40\insrsid1455539\charrsid1258595 THE }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid14361568 SOFTWARE}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid14889927 ARE}{\rtlch\fcs1 +\af0\afs18 \ltrch\fcs0 \f40\insrsid14889927\charrsid1258595 }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 +PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND. AMD DISCLAIMS ALL WARRANTIES, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, THAT THE }{ +\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid14361568 SOFTWARE}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 WILL RUN UNINTERRUPTED OR ERROR-FREE OR }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid14889927 THOSE}{\rtlch\fcs1 +\af0\afs18 \ltrch\fcs0 \f40\insrsid14889927\charrsid1258595 }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 ARISING FROM CUSTOM OF TRADE OR COURSE OF USAGE. THE ENTIRE RISK ASSOCIATED WITH THE USE OF THE }{\rtlch\fcs1 +\af0\afs18 \ltrch\fcs0 \f40\insrsid14361568 SOFTWARE}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 IS ASSUMED BY }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid3877651 YOU}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 +\f40\insrsid1455539\charrsid1258595 . Some jurisdictions do not allow the exclusion of implied warranties, so the above exclusion may not apply }{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 \f40\insrsid3877651 to You}{\rtlch\fcs1 \af0\afs18 \ltrch\fcs0 +\f40\insrsid1455539\charrsid1258595 . +\par }\pard\plain \ltrpar\s30\qj \li0\ri0\widctlpar\wrapdefault\faauto\rin0\lin0\itap0\pararsid1455539 \rtlch\fcs1 \af40\afs24\alang1025 \ltrch\fcs0 \f40\fs24\cf1\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 +\fs18\insrsid1455539\charrsid1258595 +\par }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\fs18\insrsid4266728 7}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\fs18\insrsid1455539\charrsid1258595 .2}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 \tab }{\rtlch\fcs1 \ab\af40\afs18 +\ltrch\fcs0 \b\fs18\insrsid1455539\charrsid1258595 Limitation of Liability}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 . AMD AND ITS LICENSORS WILL NOT, UNDER ANY CIRCUMSTANCES BE LIABLE TO }{\rtlch\fcs1 \af40\afs18 +\ltrch\fcs0 \fs18\insrsid3877651 YOU }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 FOR ANY PUNITIVE, DIRECT, INCIDENTAL, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING FROM USE OF THE }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 +\fs18\insrsid14361568 SOFTWARE}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 OR THIS AGREEMENT EVEN IF AMD AND ITS LICENSORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. In no event shall AMD's total liability }{ +\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid14889927 to }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid3877651 You }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 +for all damages, losses, and causes of action (whether in contract, tort (including negligence) or otherwise) exceed the amount of $100 USD. }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid4937582 +\par +\par }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \b\fs18\insrsid4266728 7}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \b\fs18\insrsid4937582\charrsid1588624 .3\tab Indemnification. }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid3877651 You }{\rtlch\fcs1 \af40\afs18 +\ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 agree}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid3877651 }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 +to defend, indemnify and hold harmless AMD and its licensors, and any of + their directors, officers, employees, affiliates or agents from and against any and all loss, damage, liability and other expenses (including reasonable attorneys' fees), resulting from}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid8739694 a)}{ +\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid4266728 a claim or allegation that the Derivative Works violate a third party Intellectual Property Rights}{\rtlch\fcs1 +\af40\afs18 \ltrch\fcs0 \fs18\insrsid8739694 , }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid3877651 b) }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 violation of the terms and conditions of this Agreement by }{ +\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid3877651 You}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 . +\par +\par }\pard \ltrpar\s30\qj \li0\ri0\nowidctlpar\tx720\wrapdefault\faauto\rin0\lin0\itap0\pararsid1455539 {\rtlch\fcs1 \ab\af37\afs18 \ltrch\fcs0 \b\fs18\insrsid10765994\charrsid1258595 1}{\rtlch\fcs1 \ab\af37\afs18 \ltrch\fcs0 \b\fs18\insrsid10765994 0}{ +\rtlch\fcs1 \ab\af37\afs18 \ltrch\fcs0 \b\fs18\insrsid1455539\charrsid1258595 .\tab CONFIDENTIALITY.}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\expnd0\expndtw-3\insrsid1455539\charrsid1258595 }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid3877651 You} +{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 shall protect the }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid14361568 Software}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 + and any information related thereto (collectively, \'93Confidential Information\'94 +) by using the same degree of care, but no less than a reasonable degree of care, to prevent the unauthorized use, dissemination or publication of the Confidential Information as }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid4474123 You}{\rtlch\fcs1 +\af1\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 use to protect }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid4474123 Your}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 + own confidential information of a like nature. }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid4474123 You }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 +shall not disclose any Confidential Information disclosed hereunder to any third party and shall limit disclosure of Confidential Information to only those of }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid4474123 Your}{\rtlch\fcs1 \af1\afs18 +\ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 employees and contractors with a need to know and who are bound by confidentiality obligations with }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid4474123 You }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\fs18\insrsid1455539\charrsid1258595 at least as restrictive as those contained in this Agreement. }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid4474123 You}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 + shall be responsible for }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid4474123 Your}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 employees and contractors adherence to the terms of this Agreement. }{\rtlch\fcs1 +\af40\afs18 \ltrch\fcs0 \fs18\expnd0\expndtw-3\insrsid4474123 You}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\expnd0\expndtw-3\insrsid1455539\charrsid1258595 + may disclose Confidential Information in accordance with a judicial or other governmental order, provided that }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\expnd0\expndtw-3\insrsid4474123 You}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 +\fs18\expnd0\expndtw-3\insrsid1455539\charrsid1258595 either (a) give AMD reasonable notice prior to such disclosure to allow AMD a reasonable opportunity to seek a protective order or equiva +lent or (b) obtain written assurance from the applicable judicial or governmental entity that it will afford the Confidential Information the highest level of protection afforded under applicable law or regulation. +\par }\pard \ltrpar\s30\qj \li0\ri0\widctlpar\wrapdefault\faauto\rin0\lin0\itap0\pararsid1455539 {\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 +\par }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\fs18\insrsid10765994\charrsid1258595 1}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\fs18\insrsid10765994 1}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \b\fs18\insrsid1455539\charrsid1258595 .\tab TERMINATION AND SURVIVAL}{ +\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 . AMD may terminate the Agreement immediately upon the breach by }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid4474123 You }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 +\fs18\insrsid1455539\charrsid1258595 or any sublicensee of any of the terms of the Agreement. }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid4474123 You}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 + may terminate the Agreement upon written notice to AMD}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid4937582 and destruction of the }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid14361568 Software}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 +\fs18\insrsid4937582 }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid4474123 You}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid4937582 accessed hereunder}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 +. The termination of this Agreement shall: (i) immediately result in the termination of all rights granted }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid4474123 to You}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 to }{ +\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid4474123 in Section 2, including the right to install, use, and }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 distribute the }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 +\fs18\insrsid14361568 Software}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid11233100 and }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid4474123 create and distribute }{ +\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid11233100 Derivative Works }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 through }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 multiple + tiers of distribution}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 ; and (ii) have no effect on any sublicenses previously granted by }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid4474123 You}{\rtlch\fcs1 \af37\afs18 +\ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 to end users under }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid14889927 Section}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid14889927\charrsid1258595 }{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 +\fs18\insrsid1455539\charrsid1258595 2}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid14889927 (c)}{\rtlch\fcs1 \af37\afs18 \ltrch\fcs0 \fs18\insrsid4937582 and which are compliant with all terms and conditions of this Agreement}{\rtlch\fcs1 +\af37\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 , which sublicenses shall survive in accordance with their terms. }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 Upon termination or expiration of this Agreement, }{ +\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid4474123 You}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 will cease using and destroy or return to AMD all copies of the }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 +\fs18\insrsid11077217 Confidential Information, including but not limited to the }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid14361568 Software}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 .}{\rtlch\fcs1 \af40\afs18 +\ltrch\fcs0 \b\fs18\insrsid1455539\charrsid1258595 }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \fs18\insrsid14889927\charrsid1258595 Upon termination or expiration of this Agreement, all provisions survive except for Section 2.}{\rtlch\fcs1 \af40\afs18 +\ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 +\par }\pard\plain \ltrpar\qj \li0\ri0\widctlpar\wrapdefault\faauto\rin0\lin0\itap0\pararsid1455539 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \ab\af1\afs18 \ltrch\fcs0 +\b\f40\cgrid0\insrsid7872888\charrsid1258595 +\par }{\rtlch\fcs1 \ab\af1\afs18 \ltrch\fcs0 \b\f40\cgrid0\insrsid1455539\charrsid1258595 1}{\rtlch\fcs1 \ab\af1\afs18 \ltrch\fcs0 \b\f40\cgrid0\insrsid10765994 2}{\rtlch\fcs1 \ab\af1\afs18 \ltrch\fcs0 \b\f40\cgrid0\insrsid1455539\charrsid1258595 .\tab +EXPORT RESTRICTIONS}{\rtlch\fcs1 \ab\af1\afs18 \ltrch\fcs0 \f40\cgrid0\insrsid1455539\charrsid1258595 . }{\rtlch\fcs1 \ab\af1\afs18 \ltrch\fcs0 \b\f40\cgrid0\insrsid4474123 }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\cgrid0\insrsid4474123 You}{\rtlch\fcs1 +\ab\af1\afs18 \ltrch\fcs0 \f40\cgrid0\insrsid1455539\charrsid1258595 shall adhere to all applicable U.S., European, and other export laws, }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 +including but not limited to the U.S. Export Administration Regulations (\'93EAR\'94) (15 C.F.R Sections 730-774), and E.U. Council Regulation (EC) No 428/2009 of\~5\~May\~2009. Further, pursuant to Section 740.6 of the EAR, }{\rtlch\fcs1 \af1\afs18 +\ltrch\fcs0 \f40\insrsid4474123 You}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 hereby }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid4474123 certify }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 +that, except pursuant to a license granted by the United States Department of Commerce Bureau of Industry and Security or as otherwise permitted pursuant to a License Exception under the EAR, }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid4474123 You }{ +\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 will not (1) export, re-export or release to a national of a country in Country +Groups D:1, E:1 or E:2 any restricted technology, software, or source code it receives from AMD, or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced direct product is subject to natio +nal security controls as identified on the Commerce Control List (currently found in Supplement 1 to Part 774 of EAR). For the most current Country Group listings, or for additional information about the EAR or }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\f40\insrsid4474123 Your}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 obligations under those regulations, please refer to the U.S. Bureau of Industry and Security\rquote s website at }{\field\fldedit{\*\fldinst {\rtlch\fcs1 +\af0 \ltrch\fcs0 \insrsid3691870 HYPERLINK "http://www.bis.doc.gov/" }}{\fldrslt {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \cs31\f40\ul\cf2\insrsid1455539\charrsid1258595 http://www.bis.doc.gov/}}}\sectd \ltrsect +\psz1\linex0\endnhere\sectlinegrid360\sectdefaultcl\sectrsid11683287\sftnbj {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 . +\par +\par }\pard\plain \ltrpar\s30\qj \li0\ri0\widctlpar\wrapdefault\faauto\rin0\lin0\itap0\pararsid7872888 \rtlch\fcs1 \af40\afs24\alang1025 \ltrch\fcs0 \f40\fs24\cf1\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 +\b\fs18\insrsid1455539\charrsid1258595 1}{\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 \b\fs18\insrsid10765994 3}{\rtlch\fcs1 \ab\af40\afs18 \ltrch\fcs0 \b\fs18\insrsid1455539\charrsid1258595 .\tab GOVERNMENT END USERS}{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 +\fs18\insrsid1455539\charrsid1258595 . }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 The }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid14361568 Software}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\fs18\insrsid1455539\charrsid1258595 }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid11077217 are}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid11077217\charrsid1258595 }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 +provided with \'93RESTRICTED RIGHTS.\'94 Use, duplication or disclosure by the Government is subject to restrictions as set forth in FAR 52.227-14 and DFAR 252.227-7013, et seq., or its successor. Use of the }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\fs18\insrsid14361568 Software}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 by the Government constitutes acknowledgment of AMD\rquote s proprietary rights in it.}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\fs18\insrsid7872888\charrsid1258595 +\par }{\rtlch\fcs1 \af40\afs18 \ltrch\fcs0 \b\fs18\insrsid1455539\charrsid1258595 }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \fs18\insrsid1455539\charrsid1258595 +\par }\pard\plain \ltrpar\qj \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid15944795 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af1\afs18 +\ltrch\fcs0 \b\f40\cf1\insrsid1455539\charrsid1258595 1}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\cf1\insrsid10765994 4}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \b\f40\cf1\insrsid1455539\charrsid1258595 .\tab GOVERNING LAW.}{\rtlch\fcs1 \ab\af1\afs18 +\ltrch\fcs0 \b\f40\insrsid1455539\charrsid1258595 }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\cf1\insrsid15944795\charrsid1258595 +This Agreement is made under and shall be construed according to the laws of the State of Texas, excluding conflicts of law rules. Each party submits to the jurisdiction of the state and federal c +ourts of Travis County and the Western District of Texas for the purposes of this Agreement. }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\cf1\insrsid4474123 You }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\cf1\insrsid15944795\charrsid1258595 acknowledge that } +{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\cf1\insrsid4474123 Your}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\cf1\insrsid15944795\charrsid1258595 breach of this Agreement may cause irreparable damage and }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\f40\cf1\insrsid4474123 You agree }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\cf1\insrsid15944795\charrsid1258595 that AMD shall be entitled to seek injunctive relief under this Agre +ement, as well as such further relief as may be granted by a court of competent jurisdiction.}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 +\par }\pard\plain \ltrpar\s30\qj \li0\ri0\widctlpar\wrapdefault\faauto\rin0\lin0\itap0\pararsid1455539 \rtlch\fcs1 \af40\afs24\alang1025 \ltrch\fcs0 \f40\fs24\cf1\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\fs18\insrsid1455539\charrsid1258595 +\par }\pard\plain \ltrpar\qj \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid4266728 \rtlch\fcs1 \af0\afs20\alang1025 \ltrch\fcs0 \f1\fs18\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \ab\af1\afs18 +\ltrch\fcs0 \b\f40\insrsid1455539\charrsid1258595 1}{\rtlch\fcs1 \ab\af1\afs18 \ltrch\fcs0 \b\f40\insrsid10765994 5}{\rtlch\fcs1 \ab\af1\afs18 \ltrch\fcs0 \b\f40\insrsid1455539\charrsid1258595 . \tab GENERAL PROVISIONS}{\rtlch\fcs1 \ab\af1\afs18 +\ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 . }{\rtlch\fcs1 \ab\af1\afs18 \ltrch\fcs0 \b\f40\insrsid1455539\charrsid1258595 }{\rtlch\fcs1 \ab\af1\afs18 \ltrch\fcs0 \f40\insrsid4474123 You}{\rtlch\fcs1 \ab\af1\afs18 \ltrch\fcs0 +\f40\insrsid1455539\charrsid1258595 }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 +may not assign this Agreement without the prior written consent of AMD and any assignment without such consent will be null and void. This Agreement may be executed in multiple counterparts, each of which shall constitute a signed original. Any facsimil +e or electronic image of this Agreement or writing referenced herein shall be valid and acceptable for all purposes as if it were an original. The }{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid5641658 P}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 +\f40\insrsid1455539\charrsid1258595 arties do not intend that any agency or partnership relationship be created between them by this Agreement. + Each provision of this Agreement shall be interpreted in such a manner as to be effective and valid under applicable law. However, in the event that any provision of this Agreement becomes or is declared unenforceable by any court of competent jurisdict +ion, such provision shall be deemed deleted and the remainder of this Agreement shall remain in full force and effect. Agreement sets forth the entire agreement and understanding between the Parties with respect to the }{\rtlch\fcs1 \af1\afs18 +\ltrch\fcs0 \f40\insrsid14361568 Software}{\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid1455539\charrsid1258595 and supersedes and merges al +l prior oral and written agreements, discussions and understandings between them regarding the subject matter of this Agreement. No waiver or modification of any provision of this Agreement shall be binding unless made in writing and signed by an authori +zed representative of each Party. +\par }\pard \ltrpar\qc \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid4937582 {\rtlch\fcs1 \af1\afs18 \ltrch\fcs0 \f40\insrsid4474123 +\par }\pard \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid11360165 {\rtlch\fcs1 \ab\af1\afs18 \ltrch\fcs0 \b\f40\insrsid11360165\charrsid11360165 +\par }{\*\themedata 504b030414000600080000002100e9de0fbfff0000001c020000130000005b436f6e74656e745f54797065735d2e786d6cac91cb4ec3301045f748fc83e52d4a +9cb2400825e982c78ec7a27cc0c8992416c9d8b2a755fbf74cd25442a820166c2cd933f79e3be372bd1f07b5c3989ca74aaff2422b24eb1b475da5df374fd9ad +5689811a183c61a50f98f4babebc2837878049899a52a57be670674cb23d8e90721f90a4d2fa3802cb35762680fd800ecd7551dc18eb899138e3c943d7e503b6 +b01d583deee5f99824e290b4ba3f364eac4a430883b3c092d4eca8f946c916422ecab927f52ea42b89a1cd59c254f919b0e85e6535d135a8de20f20b8c12c3b0 +0c895fcf6720192de6bf3b9e89ecdbd6596cbcdd8eb28e7c365ecc4ec1ff1460f53fe813d3cc7f5b7f020000ffff0300504b030414000600080000002100a5d6 +a7e7c0000000360100000b0000005f72656c732f2e72656c73848fcf6ac3300c87ef85bd83d17d51d2c31825762fa590432fa37d00e1287f68221bdb1bebdb4f +c7060abb0884a4eff7a93dfeae8bf9e194e720169aaa06c3e2433fcb68e1763dbf7f82c985a4a725085b787086a37bdbb55fbc50d1a33ccd311ba548b6309512 +0f88d94fbc52ae4264d1c910d24a45db3462247fa791715fd71f989e19e0364cd3f51652d73760ae8fa8c9ffb3c330cc9e4fc17faf2ce545046e37944c69e462 +a1a82fe353bd90a865aad41ed0b5b8f9d6fd010000ffff0300504b0304140006000800000021006b799616830000008a0000001c0000007468656d652f746865 +6d652f7468656d654d616e616765722e786d6c0ccc4d0ac3201040e17da17790d93763bb284562b2cbaebbf600439c1a41c7a0d29fdbd7e5e38337cedf14d59b +4b0d592c9c070d8a65cd2e88b7f07c2ca71ba8da481cc52c6ce1c715e6e97818c9b48d13df49c873517d23d59085adb5dd20d6b52bd521ef2cdd5eb9246a3d8b +4757e8d3f729e245eb2b260a0238fd010000ffff0300504b03041400060008000000210096b5ade296060000501b0000160000007468656d652f7468656d652f +7468656d65312e786d6cec594f6fdb3614bf0fd87720746f6327761a07758ad8b19b2d4d1bc46e871e698996d850a240d2497d1bdae38001c3ba618715d86d87 +615b8116d8a5fb34d93a6c1dd0afb0475292c5585e9236d88aad3e2412f9e3fbff1e1fa9abd7eec70c1d1221294fda5efd72cd4324f1794093b0eddd1ef62fad +79482a9c0498f184b4bd2991deb58df7dfbb8ad755446282607d22d771db8b944ad79796a40fc3585ee62949606ecc458c15bc8a702910f808e8c66c69b9565b +5d8a314d3c94e018c8de1a8fa94fd05093f43672e23d06af89927ac06762a049136785c10607758d9053d965021d62d6f6804fc08f86e4bef210c352c144dbab +999fb7b4717509af678b985ab0b6b4ae6f7ed9ba6c4170b06c788a705430adf71bad2b5b057d03606a1ed7ebf5babd7a41cf00b0ef83a6569632cd467faddec9 +699640f6719e76b7d6ac355c7c89feca9cccad4ea7d36c65b258a206641f1b73f8b5da6a6373d9c11b90c537e7f08dce66b7bbeae00dc8e257e7f0fd2badd586 +8b37a088d1e4600ead1ddaef67d40bc898b3ed4af81ac0d76a197c86826828a24bb318f3442d8ab518dfe3a20f000d6458d104a9694ac6d88728eee2782428d6 +0cf03ac1a5193be4cbb921cd0b495fd054b5bd0f530c1931a3f7eaf9f7af9e3f45c70f9e1d3ff8e9f8e1c3e3073f5a42ceaa6d9c84e5552fbffdeccfc71fa33f +9e7ef3f2d117d57859c6fffac327bffcfc793510d26726ce8b2f9ffcf6ecc98baf3efdfdbb4715f04d814765f890c644a29be408edf3181433567125272371be +15c308d3f28acd249438c19a4b05fd9e8a1cf4cd296699771c393ac4b5e01d01e5a30a787d72cf1178108989a2159c77a2d801ee72ce3a5c545a6147f32a9979 +3849c26ae66252c6ed637c58c5bb8b13c7bfbd490a75330f4b47f16e441c31f7184e140e494214d273fc80900aedee52ead87597fa824b3e56e82e451d4c2b4d +32a423279a668bb6690c7e9956e90cfe766cb37b077538abd27a8b1cba48c80acc2a841f12e698f13a9e281c57911ce298950d7e03aba84ac8c154f8655c4f2a +f074481847bd804859b5e696007d4b4edfc150b12addbecba6b18b148a1e54d1bc81392f23b7f84137c2715a851dd0242a633f900710a218ed715505dfe56e86 +e877f0034e16bafb0e258ebb4faf06b769e888340b103d3311da9750aa9d0a1cd3e4efca31a3508f6d0c5c5c398602f8e2ebc71591f5b616e24dd893aa3261fb +44f95d843b5974bb5c04f4edafb95b7892ec1108f3f98de75dc97d5772bdff7cc95d94cf672db4b3da0a6557f70db629362d72bcb0431e53c6066acac80d699a +6409fb44d08741bdce9c0e4971624a2378cceaba830b05366b90e0ea23aaa241845368b0eb9e2612ca8c742851ca251ceccc70256d8d87265dd96361531f186c +3d9058edf2c00eafe8e1fc5c509031bb4d680e9f39a3154de0accc56ae644441edd76156d7429d995bdd88664a9dc3ad50197c38af1a0c16d684060441db0256 +5e85f3b9660d0713cc48a0ed6ef7dedc2dc60b17e92219e180643ed27acffba86e9c94c78ab90980d8a9f0913ee49d62b512b79626fb06dccee2a432bbc60276 +b9f7dec44b7904cfbca4f3f6443ab2a49c9c2c41476dafd55c6e7ac8c769db1bc399161ee314bc2e75cf8759081743be1236ec4f4d6693e5336fb672c5dc24a8 +c33585b5fb9cc24e1d4885545b58463634cc5416022cd19cacfccb4d30eb45296023fd35a458598360f8d7a4003bbaae25e331f155d9d9a5116d3bfb9a95523e +51440ca2e0088dd844ec6370bf0e55d027a012ae264c45d02f708fa6ad6da6dce29c255df9f6cae0ec38666984b372ab5334cf640b37795cc860de4ae2816e95 +b21be5ceaf8a49f90b52a51cc6ff3355f47e0237052b81f6800fd7b802239daf6d8f0b1571a8426944fdbe80c6c1d40e8816b88b8569082ab84c36ff0539d4ff +6dce591a26ade1c0a7f669880485fd484582903d284b26fa4e2156cff62e4b9265844c4495c495a9157b440e091bea1ab8aaf7760f4510eaa69a6465c0e04ec6 +9ffb9e65d028d44d4e39df9c1a52ecbd3607fee9cec7263328e5d661d3d0e4f62f44acd855ed7ab33cdf7bcb8ae889599bd5c8b3029895b6825696f6af29c239 +b75a5bb1e6345e6ee6c28117e73586c1a2214ae1be07e93fb0ff51e133fb65426fa843be0fb515c187064d0cc206a2fa926d3c902e907670048d931db4c1a449 +59d366ad93b65abe595f70a75bf03d616c2dd959fc7d4e6317cd99cbcec9c58b34766661c7d6766ca1a9c1b327531486c6f941c638c67cd22a7f75e2a37be0e8 +2db8df9f30254d30c1372581a1f51c983c80e4b71ccdd28dbf000000ffff0300504b0304140006000800000021000dd1909fb60000001b010000270000007468 +656d652f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73848f4d0ac2301484f78277086f6fd3ba109126dd88d0add40384e4 +350d363f2451eced0dae2c082e8761be9969bb979dc9136332de3168aa1a083ae995719ac16db8ec8e4052164e89d93b64b060828e6f37ed1567914b284d2624 +52282e3198720e274a939cd08a54f980ae38a38f56e422a3a641c8bbd048f7757da0f19b017cc524bd62107bd5001996509affb3fd381a89672f1f165dfe5141 +73d9850528a2c6cce0239baa4c04ca5bbabac4df000000ffff0300504b01022d0014000600080000002100e9de0fbfff0000001c020000130000000000000000 +0000000000000000005b436f6e74656e745f54797065735d2e786d6c504b01022d0014000600080000002100a5d6a7e7c0000000360100000b00000000000000 +000000000000300100005f72656c732f2e72656c73504b01022d00140006000800000021006b799616830000008a0000001c0000000000000000000000000019 +0200007468656d652f7468656d652f7468656d654d616e616765722e786d6c504b01022d001400060008000000210096b5ade296060000501b00001600000000 +000000000000000000d60200007468656d652f7468656d652f7468656d65312e786d6c504b01022d00140006000800000021000dd1909fb60000001b01000027 +00000000000000000000000000a00900007468656d652f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73504b050600000000050005005d0100009b0a00000000} +{\*\colorschememapping 3c3f786d6c2076657273696f6e3d22312e302220656e636f64696e673d225554462d3822207374616e64616c6f6e653d22796573223f3e0d0a3c613a636c724d +617020786d6c6e733a613d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f64726177696e676d6c2f323030362f6d6169 +6e22206267313d226c743122207478313d22646b3122206267323d226c743222207478323d22646b322220616363656e74313d22616363656e74312220616363 +656e74323d22616363656e74322220616363656e74333d22616363656e74332220616363656e74343d22616363656e74342220616363656e74353d22616363656e74352220616363656e74363d22616363656e74362220686c696e6b3d22686c696e6b2220666f6c486c696e6b3d22666f6c486c696e6b222f3e} +{\*\latentstyles\lsdstimax376\lsdlockeddef0\lsdsemihiddendef0\lsdunhideuseddef0\lsdqformatdef0\lsdprioritydef0{\lsdlockedexcept \lsdqformat1 \lsdlocked0 Normal;\lsdqformat1 \lsdlocked0 heading 1;\lsdqformat1 \lsdlocked0 heading 2; +\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdlocked0 heading 3;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdlocked0 heading 4;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdlocked0 heading 5; +\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdlocked0 heading 6;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdlocked0 heading 7;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdlocked0 heading 8; +\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdlocked0 heading 9;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 3; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 6;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 7; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 8;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 9;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 toc 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 toc 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 toc 3; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 toc 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 toc 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 toc 6;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 toc 7;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 toc 8; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 toc 9;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Normal Indent;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 footnote text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 annotation text; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 header;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 footer;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index heading;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdlocked0 caption; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 table of figures;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 envelope address;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 envelope return;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 footnote reference; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 annotation reference;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 line number;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 page number;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 endnote reference; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 endnote text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 table of authorities;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 macro;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 toa heading; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 3; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 5; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 5; +\lsdqformat1 \lsdlocked0 Title;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Closing;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Signature;\lsdsemihidden1 \lsdunhideused1 \lsdpriority1 \lsdlocked0 Default Paragraph Font; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text Indent;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 2; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Message Header; +\lsdqformat1 \lsdlocked0 Subtitle;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text First Indent 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Note Heading;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text 2; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text Indent 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text Indent 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Block Text; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Hyperlink;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 FollowedHyperlink;\lsdqformat1 \lsdlocked0 Strong;\lsdqformat1 \lsdlocked0 Emphasis;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Document Map; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Plain Text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 E-mail Signature;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Top of Form;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Bottom of Form; +\lsdsemihidden1 \lsdunhideused1 \lsdpriority99 \lsdlocked0 Normal (Web);\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Acronym;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Address;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Cite; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Code;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Definition;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Keyboard;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Preformatted; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Sample;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Typewriter;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Variable;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Normal Table; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 annotation subject;\lsdsemihidden1 \lsdunhideused1 \lsdpriority99 \lsdlocked0 No List;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Outline List 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Outline List 2; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Outline List 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Simple 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Simple 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Simple 3; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Classic 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Classic 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Classic 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Classic 4; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Colorful 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Colorful 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Colorful 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 1; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 5; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 4; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 6;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 7;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 8; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 4; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 6;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 7;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 8; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table 3D effects 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table 3D effects 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table 3D effects 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Contemporary; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Elegant;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Professional;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Subtle 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Subtle 2; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Web 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Web 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Web 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Balloon Text; +\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Theme;\lsdsemihidden1 \lsdpriority99 \lsdlocked0 Placeholder Text;\lsdqformat1 \lsdpriority1 \lsdlocked0 No Spacing;\lsdpriority60 \lsdlocked0 Light Shading;\lsdpriority61 \lsdlocked0 Light List; +\lsdpriority62 \lsdlocked0 Light Grid;\lsdpriority63 \lsdlocked0 Medium Shading 1;\lsdpriority64 \lsdlocked0 Medium Shading 2;\lsdpriority65 \lsdlocked0 Medium List 1;\lsdpriority66 \lsdlocked0 Medium List 2;\lsdpriority67 \lsdlocked0 Medium Grid 1; +\lsdpriority68 \lsdlocked0 Medium Grid 2;\lsdpriority69 \lsdlocked0 Medium Grid 3;\lsdpriority70 \lsdlocked0 Dark List;\lsdpriority71 \lsdlocked0 Colorful Shading;\lsdpriority72 \lsdlocked0 Colorful List;\lsdpriority73 \lsdlocked0 Colorful Grid; +\lsdpriority60 \lsdlocked0 Light Shading Accent 1;\lsdpriority61 \lsdlocked0 Light List Accent 1;\lsdpriority62 \lsdlocked0 Light Grid Accent 1;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 1;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 1; +\lsdpriority65 \lsdlocked0 Medium List 1 Accent 1;\lsdsemihidden1 \lsdpriority99 \lsdlocked0 Revision;\lsdqformat1 \lsdpriority34 \lsdlocked0 List Paragraph;\lsdqformat1 \lsdpriority29 \lsdlocked0 Quote; +\lsdqformat1 \lsdpriority30 \lsdlocked0 Intense Quote;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 1;\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 1;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 1;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 1; +\lsdpriority70 \lsdlocked0 Dark List Accent 1;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 1;\lsdpriority72 \lsdlocked0 Colorful List Accent 1;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 1;\lsdpriority60 \lsdlocked0 Light Shading Accent 2; +\lsdpriority61 \lsdlocked0 Light List Accent 2;\lsdpriority62 \lsdlocked0 Light Grid Accent 2;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 2;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 2;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 2; +\lsdpriority66 \lsdlocked0 Medium List 2 Accent 2;\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 2;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 2;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 2;\lsdpriority70 \lsdlocked0 Dark List Accent 2; +\lsdpriority71 \lsdlocked0 Colorful Shading Accent 2;\lsdpriority72 \lsdlocked0 Colorful List Accent 2;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 2;\lsdpriority60 \lsdlocked0 Light Shading Accent 3;\lsdpriority61 \lsdlocked0 Light List Accent 3; +\lsdpriority62 \lsdlocked0 Light Grid Accent 3;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 3;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 3;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 3;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 3; +\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 3;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 3;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 3;\lsdpriority70 \lsdlocked0 Dark List Accent 3;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 3; +\lsdpriority72 \lsdlocked0 Colorful List Accent 3;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 3;\lsdpriority60 \lsdlocked0 Light Shading Accent 4;\lsdpriority61 \lsdlocked0 Light List Accent 4;\lsdpriority62 \lsdlocked0 Light Grid Accent 4; +\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 4;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 4;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 4;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 4; +\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 4;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 4;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 4;\lsdpriority70 \lsdlocked0 Dark List Accent 4;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 4; +\lsdpriority72 \lsdlocked0 Colorful List Accent 4;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 4;\lsdpriority60 \lsdlocked0 Light Shading Accent 5;\lsdpriority61 \lsdlocked0 Light List Accent 5;\lsdpriority62 \lsdlocked0 Light Grid Accent 5; +\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 5;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 5;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 5;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 5; +\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 5;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 5;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 5;\lsdpriority70 \lsdlocked0 Dark List Accent 5;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 5; +\lsdpriority72 \lsdlocked0 Colorful List Accent 5;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 5;\lsdpriority60 \lsdlocked0 Light Shading Accent 6;\lsdpriority61 \lsdlocked0 Light List Accent 6;\lsdpriority62 \lsdlocked0 Light Grid Accent 6; +\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 6;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 6;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 6;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 6; +\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 6;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 6;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 6;\lsdpriority70 \lsdlocked0 Dark List Accent 6;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 6; +\lsdpriority72 \lsdlocked0 Colorful List Accent 6;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 6;\lsdqformat1 \lsdpriority19 \lsdlocked0 Subtle Emphasis;\lsdqformat1 \lsdpriority21 \lsdlocked0 Intense Emphasis; +\lsdqformat1 \lsdpriority31 \lsdlocked0 Subtle Reference;\lsdqformat1 \lsdpriority32 \lsdlocked0 Intense Reference;\lsdqformat1 \lsdpriority33 \lsdlocked0 Book Title;\lsdsemihidden1 \lsdunhideused1 \lsdpriority37 \lsdlocked0 Bibliography; +\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority39 \lsdlocked0 TOC Heading;\lsdpriority41 \lsdlocked0 Plain Table 1;\lsdpriority42 \lsdlocked0 Plain Table 2;\lsdpriority43 \lsdlocked0 Plain Table 3;\lsdpriority44 \lsdlocked0 Plain Table 4; +\lsdpriority45 \lsdlocked0 Plain Table 5;\lsdpriority40 \lsdlocked0 Grid Table Light;\lsdpriority46 \lsdlocked0 Grid Table 1 Light;\lsdpriority47 \lsdlocked0 Grid Table 2;\lsdpriority48 \lsdlocked0 Grid Table 3;\lsdpriority49 \lsdlocked0 Grid Table 4; +\lsdpriority50 \lsdlocked0 Grid Table 5 Dark;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 1;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 1; +\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 1;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 1;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 1;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 1; +\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 1;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 2;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 2;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 2; +\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 2;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 2;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 2;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 2; +\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 3;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 3;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 3;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 3; +\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 3;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 3;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 3;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 4; +\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 4;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 4;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 4;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 4; +\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 4;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 4;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 5;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 5; +\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 5;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 5;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 5;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 5; +\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 5;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 6;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 6;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 6; +\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 6;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 6;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 6;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 6; +\lsdpriority46 \lsdlocked0 List Table 1 Light;\lsdpriority47 \lsdlocked0 List Table 2;\lsdpriority48 \lsdlocked0 List Table 3;\lsdpriority49 \lsdlocked0 List Table 4;\lsdpriority50 \lsdlocked0 List Table 5 Dark; +\lsdpriority51 \lsdlocked0 List Table 6 Colorful;\lsdpriority52 \lsdlocked0 List Table 7 Colorful;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 1;\lsdpriority47 \lsdlocked0 List Table 2 Accent 1;\lsdpriority48 \lsdlocked0 List Table 3 Accent 1; +\lsdpriority49 \lsdlocked0 List Table 4 Accent 1;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 1;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 1;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 1; +\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 2;\lsdpriority47 \lsdlocked0 List Table 2 Accent 2;\lsdpriority48 \lsdlocked0 List Table 3 Accent 2;\lsdpriority49 \lsdlocked0 List Table 4 Accent 2; +\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 2;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 2;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 2;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 3; +\lsdpriority47 \lsdlocked0 List Table 2 Accent 3;\lsdpriority48 \lsdlocked0 List Table 3 Accent 3;\lsdpriority49 \lsdlocked0 List Table 4 Accent 3;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 3; +\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 3;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 3;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 4;\lsdpriority47 \lsdlocked0 List Table 2 Accent 4; +\lsdpriority48 \lsdlocked0 List Table 3 Accent 4;\lsdpriority49 \lsdlocked0 List Table 4 Accent 4;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 4;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 4; +\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 4;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 5;\lsdpriority47 \lsdlocked0 List Table 2 Accent 5;\lsdpriority48 \lsdlocked0 List Table 3 Accent 5; +\lsdpriority49 \lsdlocked0 List Table 4 Accent 5;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 5;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 5;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 5; +\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 6;\lsdpriority47 \lsdlocked0 List Table 2 Accent 6;\lsdpriority48 \lsdlocked0 List Table 3 Accent 6;\lsdpriority49 \lsdlocked0 List Table 4 Accent 6; +\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 6;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 6;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 6;\lsdsemihidden1 \lsdunhideused1 \lsdpriority99 \lsdlocked0 Mention; +\lsdsemihidden1 \lsdunhideused1 \lsdpriority99 \lsdlocked0 Smart Hyperlink;\lsdsemihidden1 \lsdunhideused1 \lsdpriority99 \lsdlocked0 Hashtag;\lsdsemihidden1 \lsdunhideused1 \lsdpriority99 \lsdlocked0 Unresolved Mention; +\lsdsemihidden1 \lsdunhideused1 \lsdpriority99 \lsdlocked0 Smart Link;}}{\*\datastore 0105000002000000180000004d73786d6c322e534158584d4c5265616465722e362e3000000000000000000000100000 +d0cf11e0a1b11ae1000000000000000000000000000000003e000300feff0900060000000000000000000000010000000100000000000000001000000200000001000000feffffff0000000000000000ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +fffffffffffffffffdffffff04000000feffffff05000000feffffff06000000feffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffff52006f006f007400200045006e00740072007900000000000000000000000000000000000000000000000000000000000000000000000000000000000000000016000500ffffffffffffffff010000000c6ad98892f1d411a65f0040963251e5000000000000000000000000c04c +2cd4f909d9010300000000050000000000004d0073006f004400610074006100530074006f0072006500000000000000000000000000000000000000000000000000000000000000000000000000000000001a000101ffffffffffffffff020000000000000000000000000000000000000000000000c04c2cd4f909d901 +c04c2cd4f909d901000000000000000000000000c4004100dc004d005500cf00c600c000df00d4004300d100d40047005400d30053004a00430043005700c0003d003d000000000000000000000000000000000032000101ffffffff05000000030000000000000000000000000000000000000000000000c04c2cd4f909 +d901c04c2cd4f909d9010000000000000000000000004900740065006d0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a000201ffffffff04000000ffffffff000000000000000000000000000000000000000000000000 +00000000000000000000000000000000d800000000000000010000000200000003000000feffffff0500000006000000070000000800000009000000feffffff0b0000000c0000000d000000feffffff0f00000010000000110000001200000013000000feffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff3c623a536f75726365732053656c65637465645374796c653d225c4150412e58534c22205374796c654e616d653d224150412220786d6c6e733a623d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f7267 +2f6f6666696365446f63756d656e742f323030362f6269626c696f6772617068792220786d6c6e733d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f6f6666696365446f63756d656e742f323030362f6269626c696f677261706879223e3c2f623a536f75726365733e00000000 +0000000000000000000000000000000000000000000000000000000000000000000000003c3f786d6c2076657273696f6e3d22312e302220656e636f64696e673d225554462d3822207374616e64616c6f6e653d226e6f223f3e0d0a3c64733a6461746173746f72654974656d2064733a6974656d49443d227b35323043 +304639302d413046392d343046462d423144302d3634463334383930383235417d2220786d6c6e733a64733d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f6f6666696365446f63756d656e742f323030362f637573746f6d586d6c223e3c64733a736368656d61526566733e3c +64733a736368656d615265662064733a7572693d22687474703a2f2f736368656d61732e6f70656e500072006f007000650072007400690065007300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000016000200ffffffffffffffffffffffff000000000000 +000000000000000000000000000000000000000000000000000000000000040000005501000000000000c700c300dd0050005500d100490054005900c400de0046004500dc00d2003500520056004a00ce00d200c0003d003d000000000000000000000000000000000032000100ffffffffffffffff0600000000000000 +00000000000000000000000000000000c04c2cd4f909d901c04c2cd4f909d9010000000000000000000000004900740065006d0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a000201ffffffff07000000ffffffff0000 +000000000000000000000000000000000000000000000000000000000000000000000a000000d800000000000000500072006f007000650072007400690065007300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000016000200ffffffffffffffffffffffff +0000000000000000000000000000000000000000000000000000000000000000000000000e0000005501000000000000786d6c666f726d6174732e6f72672f6f6666696365446f63756d656e742f323030362f6269626c696f677261706879222f3e3c2f64733a736368656d61526566733e3c2f64733a6461746173746f +72654974656d3e000000000000000000000000000000000000000000000000000000000000000000000000000000000000003c623a536f75726365732053656c65637465645374796c653d225c4150412e58534c22205374796c654e616d653d224150412220786d6c6e733a623d22687474703a2f2f736368656d61732e +6f70656e786d6c666f726d6174732e6f72672f6f6666696365446f63756d656e742f323030362f6269626c696f6772617068792220786d6c6e733d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f6f6666696365446f63756d656e742f323030362f6269626c696f677261706879 +223e3c2f623a536f75726365733e000000000000000000000000000000000000000000000000000000000000000000000000000000003c3f786d6c2076657273696f6e3d22312e302220656e636f64696e673d225554462d3822207374616e64616c6f6e653d226e6f223f3e0d0a3c64733a6461746173746f7265497465 +6d2064733a6974656d49443d227b35333446334639452d313331322d344636322d383531332d4343394634353532364543417d2220786d6c6e733a64733d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f6f6666696365446f63756d656e742f323030362f637573746f6d586d6c +223e3c64733a736368656d61526566733e3c64733a736368656d615265662064733a7572693d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f6f6666696365446f63756d656e742f323030362f6269626c696f677261706879222f3e3c2f64733a736368656d61526566733e3c2f +64733a6461746173746f72654974656d3e00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000105000000000000}} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..dac57a6 --- /dev/null +++ b/README.md @@ -0,0 +1,95 @@ +# AMD Render Pipeline Shaders (RPS) SDK + +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +The AMD Render Pipeline Shaders (RPS) SDK is released under the AMD INTERNAL EVALUATION LICENSE. Please see file [LICENSE.RTF](LICENSE.rtf) for full license details. + +## Introduction + +The AMD Render Pipeline Shaders (RPS) SDK is a comprehensive and extensible Render Graph framework. + +Since the debut of explicit graphics APIs (Direct3D 12 and Vulkan®), Render Graphs (or Frame Graphs) have been proposed as an elegant and efficient solution to various problems such as generating resource barriers, managing transient memory and scheduling GPU workload. In practice, it is non-trivial to implement a render graph system from scratch, and it requires effort to keep it optimal according to hardware, API and content changes. Porting effects and techniques between different render graph systems may also consume extra engineering time. + +![image](./docs/assets/rps_purpose.svg) + +The RPS SDK intends to make Render Graphs more easily accessible and to provide a generally optimal barrier generator and (aliasing) memory scheduler. RPS also tries to simplify Render Graph construction by extending HLSL with attributes and intrinsics to create a domain specific language for render graph programming. This enables opt-in applications to program Render Graphs implicitly via a high-level, declarative programming model, allowing users to focus on the render pipeline logic rather than node configuration details. We call the extended language "Render Pipeline Shader Language", or RPSL. + +The SDK has a compiler-like architecture. It includes a frontend used to specify resources and a node sequence, a runtime compiler which compiles the linear node sequence into a graph and schedules it, and a runtime backend that converts the scheduled render graph into graphics API commands. + +![image](./docs/assets/sdk_structure.svg) + +The RPS SDK is built with controllability and extensibility in mind. The HLSL language extension and API provide a rich set of controls for customizing its behavior. Both the frontend, the runtime render graph compiler, and the backend can be extended and customized. + +## Content Structure +Notable directories and files of the SDK: +- [./include](./include/): Main public API headers. +- [./src](./src): Main implementation sources and C++ class definitions. +- [./tools/rps_hlslc/bin](./tools/rps_hlslc/bin): The RPSL compiler toolchain binaries. +- [./tools/rps_hlslc/rpsl/rpsl.h](./tools/rps_hlslc/rpsl/rpsl.h): The RPSL language documentation embedded in a header file. +- [./tools/rps_visualizer](./tools/rps_visualizer): A library to visualize RPS render graph resources and memory layout. Please refer to [README.md](./tools/rps_visualizer/README.md) for more details. +- [./tools/rps_explorer](./tools/rps_explorer): A tool to load and test RPSL files. +- [./tests/console](./tests/console) and [./tests/gui](./tests/gui): Test cases. Currently also serve as samples. + +## Build + +### Prerequisites +- OS: Windows 10 21H2 or later. +- Compiler: A C++11 compliant compiler. Tested compilers include Visual C++ toolset v141 or later (VS2017+) and Clang 15.0.1. +- CMake: CMake 3.12.1 or later. + +### Build +The SDK uses CMake. There are a few ways to start: +- Using default build batch file [./build.bat](./build.bat). +- Manually config using CMake: + ```cmake + cmake -S ./ -B ./build -A x64 + cmake --build ./build --config RelWithDebInfo + ``` + +### Running Tests & RPSL Explorer +After a successful build: +- By default (or when CMake option RpsBuildTests is ON), all tests can be executed by running ctest from the build folder: + ```bash + cd build + ctest -C RelWithDebInfo + ``` +- By default, the RPSL Explorer binaries can be located at `./build/tools/rpsl_explorer//`. Run `rpsl_explorer.exe` and open an RPSL file to start hacking! + +## Documentation + +The main sources of documentation for the RPS public APIs are inlined Doxygen-style comments, which can be built into HTML documents if Doxygen is installed (tested with Doxygen version 1.9.2): +```bash +doxygen +``` + +## Integration + +If the parent project uses CMake, RPS can be added as a subdirectory, e.g.: +```cmake +set(RpsRootSolutionFolder "rps") +add_subdirectory(libs/AMD/rps) +``` + +RPS can also be integrated as pre-built static libraries, or you may include the RPS sources directly. + +The main header to include is [./include/rps/rps.h](./include/rps/rps.h). A few macros (`RPS_D3D12_RUNTIME`, `RPS_VK_RUNTIME`, `RPS_D3D11_RUNTIME`) can be used to selectively enable default runtime backends. + +RPSL files can be compiled into C source code with the provided RPS-HLSLC tool chain at development time. The generated .C file can be integrated into the engine or application sources. Typical usage: +```bash +rps-hlslc.exe [-od ] [-m ] ... +``` +Please see [./tools/rps_hlslc/README.md](./tools/rps_hlslc/README.md) for usage details. + +Currently, the RPS SDK is released under the AMD Internal Evaluation License. We welcome everyone to try it out and appreciate any feedback! +If you are interested in using it in a future engine or title product, please contact an AMD Alliance Manager or Developer Technology engineer. + +## Third-Party Software +- [Catch2](https://github.com/catchorg/Catch2) +- [Dear ImGui](https://github.com/ocornut/imgui) +- [Microsoft DirectX Shader Compiler](https://github.com/microsoft/DirectXShaderCompiler) +- [LLVM](https://llvm.org/) +- [LLVM-CBE (C Backend)](https://github.com/JuliaHubOSS/llvm-cbe) + +## Attribution +- AMD, the AMD Arrow logo, Radeon, Crossfire, and combinations thereof are either registered trademarks or trademarks of Advanced Micro Devices, Inc. in the United States and/or other countries. +- Microsoft, DirectX, Visual Studio, and Windows are either registered trademarks or trademarks of Microsoft Corporation in the United States and/or other countries. diff --git a/build.bat b/build.bat new file mode 100644 index 0000000..eae1ec2 --- /dev/null +++ b/build.bat @@ -0,0 +1,10 @@ +@REM Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +@REM +@REM This file is part of the AMD Render Pipeline Shaders SDK which is +@REM released under the AMD INTERNAL EVALUATION LICENSE. +@REM +@REM See file LICENSE.RTF for full license details. + +cmake -S ./ -B ./build -A x64 +cmake --build ./build --config RelWithDebInfo + diff --git a/docs/assets/rps_purpose.svg b/docs/assets/rps_purpose.svg new file mode 100644 index 0000000..822d196 --- /dev/null +++ b/docs/assets/rps_purpose.svg @@ -0,0 +1 @@ +Game EngineGraphics API & RuntimeDriversHardwareEffects, Passes, Frame LogicRHITransient Memory & ResourcesBarrier SchedulingGPU Work SchedulingResource BindingStreaming…PSO ManagingGame EngineGraphics API & RuntimeDriversHardwareEffects, Passes, Frame LogicResource BindingStreaming…PSO ManagingRHIRPS SDK \ No newline at end of file diff --git a/docs/assets/sdk_structure.svg b/docs/assets/sdk_structure.svg new file mode 100644 index 0000000..f506b40 --- /dev/null +++ b/docs/assets/sdk_structure.svg @@ -0,0 +1 @@ +API / RHI BackendRuntime PipelineCustom RHIDX12VulkanRPSL RuntimeRender GraphRuntime CompilerCommonBackend APICustom PassesRender Graph BuilderVariablesCommand Recording CallbacksFrontendRPSLOffline / JITCompilerC++ APIRPSModuleCustom Frontend/.CBuild CallbacksC-APICommon PassesBackend Passes \ No newline at end of file diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt new file mode 100644 index 0000000..ceb6d62 --- /dev/null +++ b/external/CMakeLists.txt @@ -0,0 +1,58 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# This file is part of the AMD Render Pipeline Shaders SDK which is +# released under the AMD INTERNAL EVALUATION LICENSE. +# +# See file LICENSE.RTF for full license details. + +if ( ${RpsEnableImGui} ) + BuildFolderProperty( "external" AppFolder ) + + set (ImGuiSources + ${RpsImGui_DIR}/imgui.cpp + ${RpsImGui_DIR}/imgui.h + ${RpsImGui_DIR}/imgui_draw.cpp + ${RpsImGui_DIR}/imgui_internal.h + ${RpsImGui_DIR}/imgui_tables.cpp + ${RpsImGui_DIR}/imgui_widgets.cpp) + + set(ImGuiWin32BackendSources + ${RpsImGui_DIR}/backends/imgui_impl_win32.cpp + ${RpsImGui_DIR}/backends/imgui_impl_win32.h) + + set(ImGuiVkBackendSources + ${RpsImGui_DIR}/backends/imgui_impl_vulkan.cpp + ${RpsImGui_DIR}/backends/imgui_impl_vulkan.h) + + set(ImGuiDX12BackendSources + ${RpsImGui_DIR}/backends/imgui_impl_dx12.cpp + ${RpsImGui_DIR}/backends/imgui_impl_dx12.h) + + set(ImGuiDX11BackendSources + ${RpsImGui_DIR}/backends/imgui_impl_dx11.cpp + ${RpsImGui_DIR}/backends/imgui_impl_dx11.h) + + add_library(ImGui STATIC ${ImGuiSources}) + set_target_properties( ImGui PROPERTIES FOLDER "${AppFolder}/imgui") + target_include_directories(ImGui PUBLIC ${RpsImGui_DIR}) + + add_library(ImGuiWin32Backend STATIC ${ImGuiWin32BackendSources}) + set_target_properties( ImGuiWin32Backend PROPERTIES FOLDER "${AppFolder}/imgui") + target_include_directories(ImGuiWin32Backend PUBLIC ${RpsImGui_DIR}) + + if(${Vulkan_FOUND}) + add_library(ImGuiVkBackend STATIC ${ImGuiVkBackendSources}) + set_target_properties( ImGuiVkBackend PROPERTIES FOLDER "${AppFolder}/imgui") + target_include_directories(ImGuiVkBackend PUBLIC + ${Vulkan_INCLUDE_DIRS} + ${RpsImGui_DIR}) + endif() + + add_library(ImGuiDX12Backend STATIC ${ImGuiDX12BackendSources}) + set_target_properties( ImGuiDX12Backend PROPERTIES FOLDER "${AppFolder}/imgui") + target_include_directories(ImGuiDX12Backend PUBLIC ${RpsImGui_DIR}) + + add_library(ImGuiDX11Backend STATIC ${ImGuiDX11BackendSources}) + set_target_properties( ImGuiDX11Backend PROPERTIES FOLDER "${AppFolder}/imgui") + target_include_directories(ImGuiDX11Backend PUBLIC ${RpsImGui_DIR}) +endif() \ No newline at end of file diff --git a/external/catch2 b/external/catch2 new file mode 160000 index 0000000..216713a --- /dev/null +++ b/external/catch2 @@ -0,0 +1 @@ +Subproject commit 216713a4066b79d9803d374f261ccb30c0fb451f diff --git a/external/dxc/README.txt b/external/dxc/README.txt new file mode 100644 index 0000000..236e558 --- /dev/null +++ b/external/dxc/README.txt @@ -0,0 +1,3 @@ +!! Official DXIL binary + +Oct 2020 - v1.5.2010 release \ No newline at end of file diff --git a/external/dxc/x64/.gitattributes b/external/dxc/x64/.gitattributes new file mode 100644 index 0000000..dd2ae13 --- /dev/null +++ b/external/dxc/x64/.gitattributes @@ -0,0 +1 @@ +dxil.dll filter=lfs diff=lfs merge=lfs -text diff --git a/external/dxc/x64/dxil.dll b/external/dxc/x64/dxil.dll new file mode 100644 index 0000000..fff6170 --- /dev/null +++ b/external/dxc/x64/dxil.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5fd5b101cf62bb439fb489e0dd78cc98110154716c5a30b4afef095fe31cf37 +size 1405216 diff --git a/external/imgui b/external/imgui new file mode 160000 index 0000000..9aae45e --- /dev/null +++ b/external/imgui @@ -0,0 +1 @@ +Subproject commit 9aae45eb4a05a5a1f96be1ef37eb503a12ceb889 diff --git a/include/rps/core/rps_api.h b/include/rps/core/rps_api.h new file mode 100644 index 0000000..1dc6dc2 --- /dev/null +++ b/include/rps/core/rps_api.h @@ -0,0 +1,583 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_API_H_ +#define _RPS_API_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/// @defgroup Core Core Components +/// @{ + +#include "rps/core/rps_result.h" + +// +// Definitions +// + +/// @defgroup Basic Basic Types and Definitions +/// @{ + +/// @brief Macro for defining a handle type to an internal implementation structure. +/// +/// A handle is a pointer to a type that is prefixed with _T. e.g: a handle called RpsDevice should have +/// a matching structure available called RpsDevice_T. +#define RPS_DEFINE_HANDLE(TypeName) typedef struct TypeName##_T* TypeName; + +//defines a bool() operator for checking if an opaque handle is NULL. +#ifdef __cplusplus +#define RPS_OPAQUE_HANDLE_METHODS() \ + operator bool() const \ + { \ + return ptr != nullptr; \ + } +#else +#define RPS_OPAQUE_HANDLE_METHODS() +#endif //__cplusplus + +/// @brief Macro for declaring an opaque handle type. +/// +/// This macro is useful for forward declarations of such handle types in headers and not strictly required. May only +/// be used in conjunction with RPS_DEFINE_OPAQUE_HANDLE. +#define RPS_DECLARE_OPAQUE_HANDLE(TypeName) typedef struct TypeName##_T TypeName; + +/// @brief Macro to define an opaque handle type. +/// +/// An opaque handle is a pointer to an unspecified type that may vary depending on the runtime implementation. This +/// type can be different for e.g. a Vulkan and a DX12 backend. In addition to the pointer itself, the handle provides +/// additional methods for C++ compilation units. +#define RPS_DEFINE_OPAQUE_HANDLE(TypeName) \ + typedef struct TypeName##_T \ + { \ + void* ptr; \ + RPS_OPAQUE_HANDLE_METHODS() \ + } TypeName; + +/// @brief A macro for defining a mapping between an opaque handle type and an underlying implementation type. +/// +/// Includes conversions from and to the given object type. +#define RPS_IMPL_OPAQUE_HANDLE(Name, HandleType, ObjectType) \ + static inline ObjectType* rps##Name##FromHandle(HandleType hdl) \ + { \ + return (ObjectType*)(hdl.ptr); \ + } \ + static inline HandleType rps##Name##ToHandle(ObjectType* ptr) \ + { \ + return {ptr}; \ + } + +/// @brief Constant for an invalid RPS object handle value. +#define RPS_NULL_HANDLE 0 + +/// @brief Boolean value type. +typedef int32_t RpsBool; + +/// @brief Constant for a boolean value holding a value of true. For use with RpsBool. +#define RPS_TRUE (1) + +/// @brief Constant for a boolean value holding a value of false. For use with RpsBool. +#define RPS_FALSE (0) + +/// @brief Type for holding up to 16 bitflags. +typedef uint16_t RpsFlags16; + +/// @brief Type for holding up to 32 bitflags. +typedef uint32_t RpsFlags32; + +/// @brief Type for holding up to 64 bitflags. +typedef uint64_t RpsFlags64; + +/// @brief Type for general 32-bit index values. +typedef uint32_t RpsIndex32; + +/// @brief Constant for an invalid unsigned 32-bit index value. +#define RPS_INDEX_NONE_U32 (UINT32_MAX) + +/// @brief Constant for an invalid signed 32-bit index value. +#define RPS_INDEX_NONE_I32 (-1) + +/// @brief Constant for the maximum length of names supported by RPS APIs (including the terminating '\0'). +#define RPS_NAME_MAX_LEN (256) + +/// @} end defgroup Basic + +// +// Types +// + +/// @defgroup RpsAllocator RpsAllocator +/// @{ + +/// @brief Signature of functions for allocating memory. +/// +/// @param pContext Context for memory allocation. +/// @param size Size of the desired allocation in bytes. +/// @param alignment Minimum alignment requirement of the desired allocation in bytes. +/// +/// @returns Pointer to the allocated memory. +typedef void* (*PFN_rpsAlloc)(void* pContext, size_t size, size_t alignment); + +/// @brief Signature of functions for reallocating memory. +/// +/// @param pUserContext Context for memory allocation. +/// @param oldBuffer Address of the memory allocation to be reallocated. +/// Must not be NULL for oldSize != 0. +/// @param oldSize Size of the old memory allocation in bytes. +/// @param newSize Desired size of the allocation in bytes. +/// @param alignment Minimum alignment requirement of the desired allocation in bytes. +/// +/// @returns Pointer to the (re-)allocated memory. +typedef void* (*PFN_rpsRealloc)(void* pUserContext, void* oldBuffer, size_t oldSize, size_t newSize, size_t alignment); + +/// @brief Signature of functions for freeing allocated memory. +/// +/// @param pUserContext Context for memory allocation. +/// @param buffer Address of the memory allocation to be freed. +typedef void (*PFN_rpsFree)(void* pUserContext, void* buffer); + +/// @brief Memory allocator interface. +typedef struct RpsAllocator +{ + PFN_rpsAlloc pfnAlloc; ///< Pointer to a function for allocating memory. + PFN_rpsFree pfnFree; ///< Pointer to a function for releasing memory. + PFN_rpsRealloc pfnRealloc; ///< Pointer to a function for reallocating memory. + void* pContext; ///< Context to be passed to the allocator functions. +} RpsAllocator; + +/// @brief Requirements for a single memory allocation. +typedef struct RpsAllocInfo +{ + size_t size; ///< Size of the allocation in bytes. + size_t alignment; ///< Minimum alignment requirement of the allocation in bytes. +} RpsAllocInfo; + +/// @} end defgroup RpsAllocator + +/// @defgroup RpsPrinter RpsPrinter +/// @{ + +/// @brief Signature of functions for printing with variadic arguments. +/// +/// @param pContext Context for printing. See RpsPrinter. +/// @param format Format string for the print operation matching the C99 specification for printf. +/// @param ... List of arguments matching the requirements of the contents of format. +typedef void (*PFN_rpsPrintf)(void* pContext, const char* format, ...); + +/// @brief Signature of functions for printing with a variable argument list. +/// +/// @param pContext Context for printing. See RpsPrinter. +/// @param format Format string for the print operation matching the C99 specification for printf. +/// @param vl Variable argument list matching the requirements of the contents of +/// format. +typedef void (*PFN_rpsVPrintf)(void* pContext, const char* format, va_list vl); + +/// @brief Printer interface. +typedef struct RpsPrinter +{ + PFN_rpsPrintf pfnPrintf; ///< Pointer to a function for printing with variadic arguments. + PFN_rpsVPrintf pfnVPrintf; ///< Pointer to a function for printing with a variable argument list. + void* pContext; ///< Context to be passed to the print functions. +} RpsPrinter; + +/// @} end defgroup RpsPrinter + +/// @brief Signature of functions for generating random integers uniformly distributed on the closed interval +/// [minValue, maxValue]. +/// +/// @param pContext Context for generating random numbers. +/// @param minValue Minimum output value. +/// @param maxValue Maximum output value. +/// +/// @returns Generated uniform random value. +typedef int32_t (*PFN_rpsRandomUniformInt)(void* pContext, int32_t minValue, int32_t maxValue); + +/// @brief Random number generator interface. +typedef struct RpsRandomNumberGenerator +{ + PFN_rpsRandomUniformInt pfnRandomUniformInt; ///< Pointer to a function for generating random uniform integers. + void* pContext; ///< Context to be passed to the generator function. +} RpsRandomNumberGenerator; + +// +// RpsDevice +// + +/// @defgroup RpsDevice RpsDevice +/// @{ + +/// @brief Handle type for RPS device objects. +/// +/// The RPS device is used as the main state object for the RPS runtime API. It provides a central location for data +/// and callbacks of the rest of the software stack. +RPS_DEFINE_HANDLE(RpsDevice); + +/// @brief Signature of functions for destroying device objects. +/// +/// @param hDevice Handle to the RpsDevice object to destroy. +typedef void (*PFN_rpsDeviceOnDestroy)(RpsDevice hDevice); + +/// @brief Creation parameters for an RpsDevice. +typedef struct RpsDeviceCreateInfo +{ + RpsAllocator allocator; ///< Default allocator to be usable for all memory allocations which do + /// not specify a separate allocator. + RpsPrinter printer; ///< Default printer to be usable for all printing operations which do + /// not specify a separate printer. + RpsAllocInfo privateDataAllocInfo; ///< Allocation info for user controlled data which is part of the + /// device. + PFN_rpsDeviceOnDestroy pfnDeviceOnDestroy; ///< Pointer to a function for eventual destruction of the device. +} RpsDeviceCreateInfo; + +/// @brief Creates a device object. +/// +/// @param pCreateInfo Pointer to creation parameters. Passing NULL initializes the device with default +/// parameters. +/// @param pHDevice Pointer in which the device object is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsDeviceCreate(const RpsDeviceCreateInfo* pCreateInfo, RpsDevice* pHDevice); + +/// @brief Signature of functions for creating device objects. +/// +/// @param pCreateInfo Pointer to creation parameters. Passing NULL initializes the device with +/// default parameters. +/// @param pHDevice Pointer in which the device object is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +typedef RpsResult (*PFN_rpsDeviceCreate)(const RpsDeviceCreateInfo* pCreateInfo, RpsDevice* pHDevice); + +/// @brief Destroys an RPS device object. +/// +/// @param hDevice Handle to the RpsDevice. +void rpsDeviceDestroy(RpsDevice hDevice); + +/// @brief Accesses user data of an RPS device. +/// +/// This user is normally not the API user but RPS itself. The device private data is created at device allocation, +/// based on privateDataAllocInfo and the device itself will never touch the data until it is destroyed. +/// +/// @param hDevice Handle to the RpsDevice. +/// +/// @returns Pointer to the user controlled data if a handle != RPS_NULL_HANDLE is passed, +/// NULL otherwise. +void* rpsDeviceGetPrivateData(RpsDevice hDevice); + +/// @brief Sets the global debug printer which is used for diagnostic purposes when no device context is available. +/// +/// @param pPrinter Pointer to the printer to set. +void rpsSetGlobalDebugPrinter(const RpsPrinter* pPrinter); + +/// @brief Gets the global debug printer. +/// +/// @returns Pointer to the current debug printer. +const RpsPrinter* rpsGetGlobalDebugPrinter(); + +/// @} end defgroup RpsDevice + +/// @brief Parameters of a type used in the RPS library. +typedef struct RpsTypeInfo +{ + uint16_t size; ///< Size of a single instance in bytes. + uint16_t id; ///< Unique identifier of the type. +} RpsTypeInfo; + +/// @brief Integer type ids for all built-in types, e.g. integers and floating point types. +typedef enum RpsBuiltInTypeIds +{ + RPS_TYPE_OPAQUE = 0, ///< General type with specified size. + RPS_TYPE_BUILT_IN_BOOL, ///< 32-bit boolean type. + RPS_TYPE_BUILT_IN_INT8, ///< 8-bit signed integer type. + RPS_TYPE_BUILT_IN_UINT8, ///< 8-bit unsigned integer type. + RPS_TYPE_BUILT_IN_INT16, ///< 16-bit signed integer type. + RPS_TYPE_BUILT_IN_UINT16, ///< 16-bit unsigned integer type. + RPS_TYPE_BUILT_IN_INT32, ///< 32-bit signed integer type. + RPS_TYPE_BUILT_IN_UINT32, ///< 32-bit unsigned integer type. + RPS_TYPE_BUILT_IN_INT64, ///< 64-bit signed integer type. + RPS_TYPE_BUILT_IN_UINT64, ///< 64-bit unsigned integer type. + RPS_TYPE_BUILT_IN_FLOAT32, ///< 32-bit floating point type. + RPS_TYPE_BUILT_IN_FLOAT64, ///< 64-bit floating point type. + RPS_TYPE_BUILT_IN_MAX_VALUE, ///< Number of built-in types. + RPS_TYPE_RUNTIME_DEFINED_BEGIN = 64, ///< Starting value of the type id range reserved for runtime defined types. + RPS_TYPE_USER_DEFINED_BEGIN = 256, ///< Starting value of the type id range reserved for user defined types. +} RpsBuiltInTypeIds; + +/// @brief Integer type ids for any kind of type. +typedef uint32_t RpsTypeId; + +/// @brief Creates an RpsTypeInfo structure from only a size parameter. +/// +/// @param size Size in bytes. +/// +/// @returns RpsTypeInfo created from the size. +static inline RpsTypeInfo rpsTypeInfoInitFromSize(size_t size) +{ + const RpsTypeInfo result = {(uint16_t)size, (uint16_t)RPS_TYPE_OPAQUE}; + return result; +} + +/// @brief Creates a RpsTypeInfo structure with size and type ID. +/// +/// @param size Size of the type in bytes. +/// @param typeId Identifier of the type. Can be one of the @ref RpsBuiltInTypeIds values or a value defined by the runtime or user. +/// +/// @returns RpsTypeInfo created from the size. +static inline RpsTypeInfo rpsTypeInfoInitFromSizeAndTypeID(size_t size, RpsTypeId typeId) +{ + const RpsTypeInfo result = {(uint16_t)size, (uint16_t)typeId}; + return result; +} + +/// @brief Initializes a general type info structure from a type. +/// +/// @param TypeName Name of the type. +/// +/// @returns RpsTypeInfo of the type name with RPS_TYPE_OPAQUE as ID. +#define rpsTypeInfoInitFromType(TypeName) rpsTypeInfoInitFromSize(sizeof(TypeName)) + +/// @brief Initializes a general type info structure from the type and a type ID. +/// +/// @param TypeName Name of the type to generate an info structure for. +/// @param TypeID ID of the type. +/// +/// @returns RpsTypeInfo of the type name and ID. +#define rpsTypeInfoInitFromTypeAndID(TypeName, TypeID) rpsTypeInfoInitFromSizeAndTypeID(sizeof(TypeName), TypeID) + +/// @brief Type for render graph node declaration identifiers. +typedef uint32_t RpsNodeDeclId; + +/// @brief Type for function parameter identifiers. +typedef uint32_t RpsParamId; + +/// @brief Type for render graph node identifiers. +typedef uint32_t RpsNodeId; + +/// @brief Constant for an invalid render graph node ID. +#define RPS_NODEDECL_ID_INVALID RPS_INDEX_NONE_U32 + +/// @brief Constant for an invalid render graph node parameter ID. +#define RPS_PARAM_ID_INVALID RPS_INDEX_NONE_U32 + +/// @brief Transparent handle type for a general render graph variable. +typedef void* RpsVariable; + +/// @brief Transparent handle type for a general, immutable render graph variable. +typedef const void* RpsConstant; + +/// @brief Bitflags for subgraph properties. +typedef enum RpsSubgraphFlagBits +{ + RPS_SUBGRAPH_FLAG_NONE = 0, ///< No subgraph properties. + RPS_SUBGRAPH_FLAG_ATOMIC = 1 << 0, ///< The subgraph is atomic, so external nodes may not be reorderd + /// in-between nodes belonging to this subgraph. + RPS_SUBGRAPH_FLAG_SEQUENTIAL = 1 << 1, ///< The subgraph is sequential, the relative order of its nodes should be + /// preserved. +} RpsSubgraphFlagBits; + +/// @brief Bitmask type for RpsSubgraphFlagBits. +typedef RpsFlags32 RpsSubgraphFlags; + +// +// Debug +// + +/// @defgroup Debug Debug +/// @{ + +/// @brief Type for files represented by an RPSL internal integer identifier. +typedef RpsIndex32 RpsSourceFileId; + +/// @brief Parameters of a source code location. +typedef struct RpsSourceLocation +{ + RpsSourceFileId file; ///< Identifier for a file. + uint32_t line; ///< Line number within the file. +} RpsSourceLocation; + +/// @brief Type for RPSL debug information. +typedef struct RpsDebugInfo RpsDebugInfo; + +/// @} end defgroup Debug + +/// @} end addtogroup Core + +// +// RPSL +// + +/// @defgroup RPSLRuntime RPSL Runtime +/// @{ + +/// Bitmask type for an internal entry flags type. +typedef RpsFlags32 RpslEntryCallFlags; + +/// @brief Signature of functions for executing RPSL callbacks. +/// +/// @param numArgs Number of arguments used for the callback. +/// @param ppArgs Pointer to an array of const void* const with numArgs elements to use for the callback. +/// @param flags Flags for the type of entry. +typedef void (*PFN_RpslEntry)(uint32_t numArgs, const void* const* ppArgs, RpslEntryCallFlags flags); + +/// @brief Type for RPSL entry point declarations. +/// +/// An RpslEntry is defined by an export function entry in RPSL and usually statically linked or dynamically loaded into +/// the application. It contains the signature info and the function entry point. Users can use the subsequent macros to +/// define such an entry point for usage in their application when binding nodes. +typedef const struct RpsRpslEntry_T* RpsRpslEntry; + +/// @brief Macro for creating a variable name matching the name of the entry point of an RPSL module. +/// +/// @param ModuleName Name of the module. +/// @param EntryName Name of the entry point. +/// +/// @returns Concatenation of a prefix with these two names as a unique identifier. +#define RPS_ENTRY_REF(ModuleName, EntryName) rpsl_M_##ModuleName##_E_##EntryName + +/// @brief Macro for defining a unique string identifier to an entry point of an RPSL module. +/// +/// @param ModuleName Name of the module. +/// @param EntryName Name of the entry point. +/// +/// @returns Concatenation of a prefix with these two names as a unique identifier into a +/// string. +#define RPS_ENTRY_NAME(ModuleName, EntryName) "rpsl_M_" #ModuleName "_E_" #EntryName + +/// @brief String constant for the name of the entry table. +#define RPS_ENTRY_TABLE_NAME "rpsl_M_entry_tbl" + +/// @brief String constant for the name of the module ID. +#define RPS_MODULE_ID_NAME "rpsl_M_module_id" + +/// @brief Macro for defining a C++ declaration of an RPSL entry point. +/// +/// @param ModuleName Name of the module. +/// @param EntryName Name of the entry point. +/// +/// @returns C++ variable declaration of the module with external C linkage. +#define RPS_DECLARE_RPSL_ENTRY(ModuleName, EntryName) extern "C" RpsRpslEntry RPS_ENTRY_REF(ModuleName, EntryName); + +/// @brief Type for a dispatch table of an RPSL process. +typedef struct ___rpsl_runtime_procs ___rpsl_runtime_procs; + +/// @brief Signature of functions for initializing RPSL processes from a DLL. +/// +/// @param pProcs +/// @param sizeofProcs +/// +/// returns +typedef int32_t (*PFN_rpslDynLibInit)(const ___rpsl_runtime_procs* pProcs, uint32_t sizeofProcs); + +/// @brief Initializes an RPSL DLL module. +/// +/// The user can create an RPSL DLL module by linking rpsl code with rps_rpsl_host_dll.c. After this DLL is loaded, the +/// user must get the address of the `___rps_dyn_lib_init` entry point and call rpsRpslDynamicLibraryInit with this +/// entry point address as the parameter. This initializes the RPSL runtime callbacks for the DLL. +/// +/// @param pfn_dynLibInit Address of "___rps_dyn_lib_init" entry point of the RPSL DLL module. +/// +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsRpslDynamicLibraryInit(PFN_rpslDynLibInit pfn_dynLibInit); + +/// @brief Generates an RPSL entry name. +/// +/// Generates the name from the given module and entry names, so that it matches the symbol name generated by the RPSL +/// compiler for this entry. This name can be used to retrieve the RpslEntry address from a dynamically loaded library +/// using e.g. GetProcAddress or dlsym. +/// +/// @param pBuf Pointer in which the name is returned. Must not be NULL. +/// @param bufSize Size of the buffer in bytes. +/// @param moduleName Null terminated string with the name of the module. +/// @param entryName Null terminated string with the name of the entry point. +/// +/// @returns Pointer to the buffer if its size is large enough, otherwise NULL. +const char* rpsMakeRpslEntryName(char* pBuf, size_t bufSize, const char* moduleName, const char* entryName); + +// +// RPSL-JIT +// + +/// @defgroup RPSLJIT RPSL JIT +/// @{ + +/// Handle type for JIT loaded modules. +RPS_DEFINE_HANDLE(RpsJITModule); + +// Currently RPSL-JIT module is provided as a separated DLL on Windows only, mainly for tooling purpose. +// Below function prototypes and proc names are provided to enable using the DLL. + +/// @brief Signature of functions for initializing the JIT compiler. +/// +/// @param argc Number of program arguments. +/// @param args Pointer to an array of const char* with argc elements. Must not be NULL for argc != 0. +/// +/// @returns Result code of the operation. See RpsResult for more info. +typedef int32_t (*PFN_RpsJITStartup)(int32_t argc, const char** args); + +/// @brief Signature of functions for shutting down the JIT compiler. +typedef void (*PFN_RpsJITShutdown)(void); + +/// @brief Signature of functions for loading RPSL modules with the JIT compiler. +/// +/// @param name Null terminated string with the name of the module. +/// @param pJITModule Pointer in which a handle to the loaded module is returned. +/// +/// @returns Result code of the operation. See RpsResult for more info. +typedef int32_t (*PFN_RpsJITLoad)(const char* name, RpsJITModule* pJITModule); + +/// @brief Signature of functions for unloading a JIT loaded module. +/// +/// @param hJITModule Handle to the JIT module. +typedef void (*PFN_RpsJITUnload)(RpsJITModule hJITModule); + +/// @brief Signature of functions for getting an RPSL entry point from a module. +/// +/// @param hJITModule Handle to the RPSL module. Must not be RPS_NULL_HANDLE. +/// @param symbolName Null terminated string with the name of the entry point. +/// @param pEntryPoint Pointer to the entry point as a 64-bit unsigned integer. +/// +/// @returns Result code of the operation. See RpsResult for more info. +typedef int32_t (*PFN_RpsJITGetEntryPoint)(RpsJITModule hJITModule, const char* symbolName, uint64_t* pEntryPoint); + +#define RPS_JIT_PROC_NAME_STARTUP "RpsJITStartup" +#define RPS_JIT_PROC_NAME_SHUTDOWN "RpsJITShutdown" +#define RPS_JIT_PROC_NAME_LOAD "RpsJITLoad" +#define RPS_JIT_PROC_NAME_UNLOAD "RpsJITUnload" +#define RPS_JIT_PROC_NAME_GETENTRYPOINT "RpsJITGetEntryPoint" + +/// @} end defgroup RPSLJIT + +/// @} end defgroup RPSLRuntime + +#ifdef __cplusplus +} +#endif // __cplusplus + +#ifdef __cplusplus + +#define RPS_CLASS_NO_COPY(ClassName) \ +private: \ + ClassName(const ClassName&) = delete; \ + ClassName& operator=(const ClassName&) = delete; + +#define RPS_CLASS_NO_MOVE(ClassName) \ +private: \ + ClassName(ClassName&&) = delete; \ + ClassName& operator=(ClassName&&) = delete; + +#define RPS_CLASS_NO_COPY_MOVE(ClassName) \ + RPS_CLASS_NO_COPY(ClassName) \ + RPS_CLASS_NO_MOVE(ClassName) + +#endif //__cplusplus + +#endif //_RPS_API_H_ diff --git a/include/rps/core/rps_cmd_callback_wrapper.hpp b/include/rps/core/rps_cmd_callback_wrapper.hpp new file mode 100644 index 0000000..62cf0ed --- /dev/null +++ b/include/rps/core/rps_cmd_callback_wrapper.hpp @@ -0,0 +1,424 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_CALLBACK_WRAPPER_H +#define RPS_CALLBACK_WRAPPER_H + +#include +#include + +namespace rps +{ + /// @brief Place holder for an unused argument of a callback function. + /// + /// It can be used to skip parameter marshalling during command node callbacks, + /// while keeping the parameter ordinals match between the callback functions and node declarations. + /// For example for node declaration: + /// ``` + /// node foo( rtv param0, srv param1 ); + /// ``` + /// If the callback function does not need to bind the render target param0 explicitly, it can be declared as: + /// ``` + /// void FooCallback( const RpsCmdCallbackContext* pContext, rps::UnusedArg unusedParam0, D3D12_CPU_DESCRIPTOR_HANDLE usedParam1 ); + /// ``` + /// So that the runtime will ignore unusedParam0, while still pass usedParam1 to the callback. + /// + /// @ingroup RpsRenderGraphCommandRecording + struct UnusedArg + { + }; + + namespace details + { + template + struct CommandArgUnwrapper + { + }; + + // Value types or const ref types + template + struct CommandArgUnwrapper::value>::type> + { + using ValueT = typename std::remove_cv::type>::type; + + const ValueT& operator()(const RpsCmdCallbackContext* pContext) + { + return *static_cast(pContext->ppArgs[Index]); + } + }; + + // Const pointer types + template + struct CommandArgUnwrapper< + Index, + T, + typename std::enable_if::value && + std::is_const::type>::value>::type> + { + T operator()(const RpsCmdCallbackContext* pContext) + { + return static_cast(pContext->ppArgs[Index]); + } + }; + + // Skipping unused args + template + struct CommandArgUnwrapper + { + rps::UnusedArg operator()(const RpsCmdCallbackContext* pContext) + { + return {}; + } + }; + + // Converting RpsBool to bool + template + struct CommandArgUnwrapper + { + bool operator()(const RpsCmdCallbackContext* pContext) + { + const RpsBool value = *static_cast(pContext->ppArgs[Index]); + return !!value; + } + }; + +#if __cplusplus >= 201402L + + // Non-recursive argument unwrapping with index_sequence + template + struct FunctionWrapper + { + template + static TRet Call(const RpsCmdCallbackContext* pContext, + TRet (*pFn)(const RpsCmdCallbackContext*, TArgs...), + std::index_sequence) + { + return pFn(pContext, CommandArgUnwrapper()(pContext)...); + } + + template + static TRet Call(const RpsCmdCallbackContext* pContext, + TClass* pThis, + TRet (TClass::*pFn)(const RpsCmdCallbackContext*, TArgs...), + std::index_sequence) + { + return (pThis->*pFn)(pContext, CommandArgUnwrapper()(pContext)...); + } + + template + static TRet Call(const RpsCmdCallbackContext* pContext, + std::function fn, + std::index_sequence) + { + return fn(pContext, CommandArgUnwrapper()(pContext)...); + } + + template + static TRet Call(const RpsCmdCallbackContext* pContext, + TRet (*pFn)(const RpsCmdCallbackContext&, TArgs...), + std::index_sequence) + { + return pFn(*pContext, CommandArgUnwrapper()(pContext)...); + } + + template + static TRet Call(const RpsCmdCallbackContext* pContext, + TClass* pThis, + TRet (TClass::*pFn)(const RpsCmdCallbackContext&, TArgs...), + std::index_sequence) + { + return (pThis->*pFn)(*pContext, CommandArgUnwrapper()(pContext)...); + } + + template + static TRet Call(const RpsCmdCallbackContext* pContext, + std::function fn, + std::index_sequence) + { + return fn(*pContext, CommandArgUnwrapper()(pContext)...); + } + }; + + template + TRet WrappedFunction(const RpsCmdCallbackContext* pContext, TRet (*pFn)(const RpsCmdCallbackContext*, TArgs...)) + { + return FunctionWrapper::template Call<>(pContext, pFn, std::index_sequence_for{}); + } + + template + TRet WrappedFunction(const RpsCmdCallbackContext* pContext, + TClass* pThis, + TRet (TClass::*pFn)(const RpsCmdCallbackContext*, TArgs...)) + { + return FunctionWrapper::template Call( + pContext, pThis, pFn, std::index_sequence_for{}); + } + + template + TRet WrappedFunction(const RpsCmdCallbackContext* pContext, + std::function fn) + { + return FunctionWrapper::template Call<>(pContext, fn, std::index_sequence_for{}); + } + + template + TRet WrappedFunction(const RpsCmdCallbackContext* pContext, TRet (*pFn)(const RpsCmdCallbackContext&, TArgs...)) + { + return FunctionWrapper::template Call<>(pContext, pFn, std::index_sequence_for{}); + } + + template + TRet WrappedFunction(const RpsCmdCallbackContext* pContext, + TClass* pThis, + TRet (TClass::*pFn)(const RpsCmdCallbackContext&, TArgs...)) + { + return FunctionWrapper::template Call( + pContext, pThis, pFn, std::index_sequence_for{}); + } + + template + TRet WrappedFunction(const RpsCmdCallbackContext* pContext, + std::function fn) + { + return FunctionWrapper::template Call<>(pContext, fn, std::index_sequence_for{}); + } + +#else //#if __cplusplus >= 201402L + + // Recursive argument unwrapping + template + struct FunctionWrapperRecursive + { + template + static TRet Wrapped(const RpsCmdCallbackContext* pContext, + TRet (*pFn)(const RpsCmdCallbackContext*, TArgs...), + TUnwrappedArgs&&... unwrappedArgs) + { + using TupleType = std::tuple; + return FunctionWrapperRecursive::Wrapped( + pContext, + pFn, + CommandArgUnwrapper::type>()(pContext), + unwrappedArgs...); + } + + template + static TRet Wrapped(const RpsCmdCallbackContext* pContext, + TClass* pThis, + TRet (TClass::*pFn)(const RpsCmdCallbackContext*, TArgs...), + TUnwrappedArgs&&... unwrappedArgs) + { + using TupleType = std::tuple; + return FunctionWrapperRecursive::Wrapped( + pContext, + pThis, + pFn, + CommandArgUnwrapper::type>()(pContext), + unwrappedArgs...); + } + + template + static TRet Wrapped(const RpsCmdCallbackContext* pContext, + std::function func, + TUnwrappedArgs&&... unwrappedArgs) + { + using TupleType = std::tuple; + return FunctionWrapperRecursive::Wrapped( + pContext, + func, + CommandArgUnwrapper::type>()(pContext), + unwrappedArgs...); + } + + template + static TRet Wrapped(const RpsCmdCallbackContext* pContext, + TRet (*pFn)(const RpsCmdCallbackContext&, TArgs...), + TUnwrappedArgs&&... unwrappedArgs) + { + using TupleType = std::tuple; + return FunctionWrapperRecursive::Wrapped( + pContext, + pFn, + CommandArgUnwrapper::type>()(pContext), + unwrappedArgs...); + } + + template + static TRet Wrapped(const RpsCmdCallbackContext* pContext, + TClass* pThis, + TRet (TClass::*pFn)(const RpsCmdCallbackContext&, TArgs...), + TUnwrappedArgs&&... unwrappedArgs) + { + using TupleType = std::tuple; + return FunctionWrapperRecursive::Wrapped( + pContext, + pThis, + pFn, + CommandArgUnwrapper::type>()(pContext), + unwrappedArgs...); + } + + template + static TRet Wrapped(const RpsCmdCallbackContext* pContext, + std::function func, + TUnwrappedArgs&&... unwrappedArgs) + { + using TupleType = std::tuple; + return FunctionWrapperRecursive::Wrapped( + pContext, + func, + CommandArgUnwrapper::type>()(pContext), + unwrappedArgs...); + } + }; + + template <> + struct FunctionWrapperRecursive<0> + { + template + static TRet Wrapped(const RpsCmdCallbackContext* pContext, + TRet (*pFn)(const RpsCmdCallbackContext*, TArgs...), + TUnwrappedArgs&&... unwrappedArgs) + { + return pFn(pContext, unwrappedArgs...); + } + + template + static TRet Wrapped(const RpsCmdCallbackContext* pContext, + TClass* pThis, + TRet (TClass::*pFn)(const RpsCmdCallbackContext*, TArgs...), + TUnwrappedArgs&&... unwrappedArgs) + { + return (pThis->*pFn)(pContext, unwrappedArgs...); + } + + template + static TRet Wrapped(const RpsCmdCallbackContext* pContext, + std::function func, + TUnwrappedArgs&&... unwrappedArgs) + { + return func(pContext, unwrappedArgs...); + } + + template + static TRet Wrapped(const RpsCmdCallbackContext* pContext, + TRet (*pFn)(const RpsCmdCallbackContext&, TArgs...), + TUnwrappedArgs&&... unwrappedArgs) + { + return pFn(*pContext, *unwrappedArgs...); + } + + template + static TRet Wrapped(const RpsCmdCallbackContext* pContext, + TClass* pThis, + TRet (TClass::*pFn)(const RpsCmdCallbackContext&, TArgs...), + TUnwrappedArgs&&... unwrappedArgs) + { + return (pThis->*pFn)(*pContext, unwrappedArgs...); + } + + template + static TRet Wrapped(const RpsCmdCallbackContext* pContext, + std::function func, + TUnwrappedArgs&&... unwrappedArgs) + { + return func(*pContext, unwrappedArgs...); + } + }; + + template + TRet WrappedFunction(const RpsCmdCallbackContext* pContext, TRet (*pFn)(const RpsCmdCallbackContext*, TArgs...)) + { + return FunctionWrapperRecursive::Wrapped(pContext, pFn); + } + + template + TRet WrappedFunction(const RpsCmdCallbackContext* pContext, + TClass* pThis, + TRet (TClass::*pFn)(const RpsCmdCallbackContext*, TArgs...)) + { + return FunctionWrapperRecursive::Wrapped(pContext, pThis, pFn); + } + + template + TRet WrappedFunction(const RpsCmdCallbackContext* pContext, + std::function fn) + { + return FunctionWrapperRecursive::Wrapped(pContext, fn); + } + + template + TRet WrappedFunction(const RpsCmdCallbackContext* pContext, TRet (*pFn)(const RpsCmdCallbackContext&, TArgs...)) + { + return FunctionWrapperRecursive::Wrapped(pContext, pFn); + } + + template + TRet WrappedFunction(const RpsCmdCallbackContext* pContext, + TClass* pThis, + TRet (TClass::*pFn)(const RpsCmdCallbackContext&, TArgs...)) + { + return FunctionWrapperRecursive::Wrapped(pContext, pThis, pFn); + } + + template + TRet WrappedFunction(const RpsCmdCallbackContext* pContext, + std::function fn) + { + return FunctionWrapperRecursive::Wrapped(pContext, fn); + } +#endif //#if __cplusplus >= 201402L + + template + void WrappedFunction(const RpsCmdCallbackContext* pContext, TClass* pThis, std::nullptr_t n) + { + } + + template + struct MemberNodeCallbackContext + { + TObject* target; + TFunc method; + + MemberNodeCallbackContext(TObject* inTarget, TFunc inFunc) + : target(inTarget) + , method(inFunc) + { + } + + static void Callback(const RpsCmdCallbackContext* pContext) + { + auto pThis = static_cast*>(pContext->pCmdCallbackContext); + + details::WrappedFunction(pContext, pThis->target, pThis->method); + } + }; + + template + struct NonMemberNodeCallbackContext + { + TFunc func; + + NonMemberNodeCallbackContext(TFunc inFunc) + : func(inFunc) + { + } + + static void Callback(const RpsCmdCallbackContext* pContext) + { + auto pThis = static_cast*>(pContext->pCmdCallbackContext); + + details::WrappedFunction(pContext, pThis->func); + } + }; + + } // namespace details + +} // namespace rps + +#endif //RPS_CALLBACK_WRAPPER_H + diff --git a/include/rps/core/rps_result.h b/include/rps/core/rps_result.h new file mode 100644 index 0000000..2ee0dd2 --- /dev/null +++ b/include/rps/core/rps_result.h @@ -0,0 +1,135 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_ERRORCODES_H_ +#define _RPS_ERRORCODES_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/// @brief Result and error codes used by operations of the RPS library. +/// +/// @ingroup Basic +typedef enum RpsResult +{ + /// Successful completion. + RPS_OK = 0, + + /// Failure due to an unspecified error. + RPS_ERROR_UNSPECIFIED = -1, + + /// Failure due to an unrecognized command. + RPS_ERROR_UNRECOGNIZED_COMMAND = -2, + + /// Failure due to invalid arguments. + RPS_ERROR_INVALID_ARGUMENTS = -3, + + /// Failure due to invalid data. + RPS_ERROR_INVALID_DATA = -4, + + /// Failure due to an invalid operation. + RPS_ERROR_INVALID_OPERATION = -5, + + /// Failure due to running out of memory. + RPS_ERROR_OUT_OF_MEMORY = -6, + + /// Failure due to not being able to find the specified file. + RPS_ERROR_FILE_NOT_FOUND = -7, + + /// Failure due to an invalid file format. + RPS_ERROR_INVALID_FILE_FORMAT = -8, + + /// Failure due to the file format version being too old. + RPS_ERROR_UNSUPPORTED_VERSION_TOO_OLD = -9, + + /// Failure due to the file format version being too new. + RPS_ERROR_UNSUPPORTED_VERSION_TOO_NEW = -10, + + /// Failure due to an unknown node. + RPS_ERROR_UNKNOWN_NODE = -11, + + /// Failure due to an index being out of its valid bounds. + RPS_ERROR_INDEX_OUT_OF_BOUNDS = -12, + + /// Failure due to a command being already finalized. + RPS_ERROR_COMMAND_ALREADY_FINAL = -13, + + /// Failure due to a data layout mismatch between runtime and shader. + RPS_ERROR_INTEROP_DATA_LAYOUT_MISMATCH = -14, + + /// Failure due to a key not being found. + RPS_ERROR_KEY_NOT_FOUND = -15, + + /// Failure due to a key value being duplicated where it is required to be unique. + RPS_ERROR_KEY_DUPLICATED = -16, + + /// Failure due to a feature not being implemented yet. + RPS_ERROR_NOT_IMPLEMENTED = -17, + + /// Failure due to an integer overflow. + RPS_ERROR_INTEGER_OVERFLOW = -18, + + /// Failure due to exclusive ranges overlapping. + RPS_ERROR_RANGE_OVERLAPPING = -19, + + /// Failure due to rpsRenderPipelineValidate finding an invalid pipeline configuration. More details are provided + /// via output of the device print function. + RPS_ERROR_VALIDATION_FAILED = -20, + + /// Failure due to a compiled RPSL shader program being ill formed. Normally indicates a compiler error. + RPS_ERROR_INVALID_PROGRAM = -21, + + /// Failure due to an RPSL module being incompatible with the current runtime. + RPS_ERROR_UNSUPPORTED_MODULE_VERSION = -22, + + /// Failure due to a failed type safety check. + RPS_ERROR_TYPE_MISMATCH = -23, + + /// Failure due to a feature not being supported. + RPS_ERROR_NOT_SUPPORTED = -24, + + /// Failure due to failed a runtime API without direct mapping of the API error code. + RPS_ERROR_RUNTIME_API_ERROR = -25, + + /// Failure due to an RPS library internal error. + RPS_ERROR_INTERNAL_ERROR = -26, + + /// Number of unique RPS result codes. + RPS_RESULT_CODE_COUNT = 27, +} RpsResult; + +/// @brief Checks if the RpsResult code indicates a success. +/// +/// @param R Result code. +/// +/// @returns true if RPS_OK, false otherwise. +/// +/// @ingroup Basic +#define RPS_SUCCEEDED(R) ((R) >= 0) + +/// @brief Checks if the RpsResult code indicates a failure. +/// +/// @param R Result code. +/// +/// @returns false if RPS_OK, true otherwise. +/// +/// @ingroup Basic +#define RPS_FAILED(R) ((R) < 0) + +/// @brief Gets the name string of a result code. +/// +/// @param result Result code. +/// +/// @returns Null terminated string with the result name. +const char* rpsResultGetName(RpsResult result); + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // #ifndef _RPS_ERRORCODES_H_ diff --git a/include/rps/frontend/rps_builder.h b/include/rps/frontend/rps_builder.h new file mode 100644 index 0000000..62d5507 --- /dev/null +++ b/include/rps/frontend/rps_builder.h @@ -0,0 +1,420 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_BUILDER_HPP_ +#define _RPS_BUILDER_HPP_ + +#include "rps/runtime/common/rps_runtime.h" + + + +#ifdef __cplusplus + +namespace rps +{ + namespace details + { + template + struct TypeIdGetter + { + static constexpr RpsTypeId value = 0; + }; + + // clang-format off +#define RPS_DEFINE_BUILT_IN_TYPE_ID(TypeName, Value) \ + template <> \ + struct TypeIdGetter \ + { \ + static constexpr RpsTypeId value = Value; \ + }; + + RPS_DEFINE_BUILT_IN_TYPE_ID(RpsImageView, RPS_TYPE_IMAGE_VIEW); + RPS_DEFINE_BUILT_IN_TYPE_ID(rps::ImageView, RPS_TYPE_IMAGE_VIEW); + RPS_DEFINE_BUILT_IN_TYPE_ID(RpsBufferView, RPS_TYPE_BUFFER_VIEW); + RPS_DEFINE_BUILT_IN_TYPE_ID(rps::BufferView, RPS_TYPE_BUFFER_VIEW); + +#undef RPS_DEFINE_BUILT_IN_TYPE_ID + // clang-format on + + } // namespace details + + struct ParameterDesc : public RpsParameterDesc + { + public: + ParameterDesc(RpsTypeInfo typeInfo, + const ParamAttrList* inAttrs = nullptr, + const char* inName = nullptr, + RpsParameterFlags inFlags = RPS_PARAMETER_FLAG_NONE, + uint32_t inArraySize = 0) + : RpsParameterDesc{typeInfo, inArraySize, inAttrs, inName, inFlags} + { + } + + ParameterDesc(size_t elementSize, + const ParamAttrList* inAttrs = nullptr, + const char* inName = nullptr, + RpsParameterFlags inFlags = RPS_PARAMETER_FLAG_NONE, + uint32_t inArraySize = 0) + : RpsParameterDesc{{static_cast(elementSize), 0}, inArraySize, inAttrs, inName, inFlags} + { + } + + template + static ParameterDesc Make(const ParamAttrList* inAttrs, + const char* inName = nullptr, + RpsParameterFlags inFlags = RPS_PARAMETER_FLAG_NONE, + uint32_t inArraySize = 0) + { + return ParameterDesc(GetTypeInfo(), inAttrs, inName, inFlags, inArraySize); + } + + template + static ParameterDesc Make(const ParamAttrList& inAttrs, + const char* inName = nullptr, + RpsParameterFlags inFlags = RPS_PARAMETER_FLAG_NONE, + uint32_t inArraySize = 0) + { + return ParameterDesc(GetTypeInfo(), &inAttrs, inName, inFlags, inArraySize); + } + + template + static ParameterDesc Make(const char* inName = nullptr, + RpsParameterFlags inFlags = RPS_PARAMETER_FLAG_NONE, + uint32_t inArraySize = 0) + { + return ParameterDesc(GetTypeInfo(), nullptr, inName, inFlags, inArraySize); + } + + private: + template + static constexpr RpsTypeInfo GetTypeInfo() + { + return RpsTypeInfo{sizeof(T), rps::details::TypeIdGetter::value}; + } + }; + + class RenderGraphBuilder; + + class RenderGraphBuilderRef + { + public: + RenderGraphBuilderRef(RpsRenderGraphBuilder builder); + + ~RenderGraphBuilderRef() + { + } + + public: + template + struct TNodeArg + { + using ValueType = T; + + TNodeArg(T& inValue, + const ParamAttrList* inAttrs = nullptr, + RpsParameterFlags inFlags = RPS_PARAMETER_FLAG_NONE) + : value(inValue) + , attrs(inAttrs) + , flags(inFlags) + { + } + + T* operator&() const + { + return &value; + } + + T& value; + const ParamAttrList* attrs; + RpsParameterFlags flags; + }; + + private: + struct NodeArgHelper + { + template + static const ParamAttrList* GetAttrList(const TNodeArg& arg) + { + return arg.attrs; + } + + template + static const ParamAttrList* GetAttrList(const T& arg) + { + return nullptr; + } + + template + static RpsParameterFlags GetFlag(const TNodeArg& arg) + { + return arg.flags; + } + + template + static RpsParameterFlags GetFlag(const T& arg) + { + return RPS_PARAMETER_FLAG_NONE; + } + }; + + template + struct NodeArgTypeHelper + { + using Type = T; + }; + + template + struct NodeArgTypeHelper> + { + using Type = T; + }; + + public: + void* AllocateData(size_t size, size_t alignment) const; + + template + T* New(TArgs&&... args) + { + static_assert(std::is_trivially_destructible::value, "Type must be trivially destructible."); + void* pMemory = AllocateData(sizeof(T), alignof(T)); + return pMemory ? (new (pMemory) T(args...)) : nullptr; + } + + RpsNodeDeclId DeclNode(const RpsNodeDesc& nodeDesc) const; + + template + RpsNodeDeclId DeclNode(const char* name, RpsNodeDeclFlags flags, TParams... params) const + { + RpsParameterDesc paramDescs[] = { + params..., + }; + + RpsNodeDesc nodeDesc = {}; + + nodeDesc.numParams = sizeof...(params); + nodeDesc.pParamDescs = paramDescs; + nodeDesc.flags = flags; + nodeDesc.name = name; + + return DeclNode(nodeDesc); + } + + RpsNodeId AddNode(RpsNodeDeclId nodeDeclId, + uint32_t tag, + PFN_rpsCmdCallback callback, + void* callbackUserContext, + std::initializer_list args); + + template + TNodeArg MakeNodeArg(TArg& value, TAttrs... attrs) + { + auto* pAttrList = New(attrs...); + return TNodeArg(value, pAttrList, RPS_PARAMETER_FLAG_NONE); + } + + template + RpsNodeId AddNode(TNodeFunc nodeFunc, uint32_t tag, const char* name, TArgs&&... args) + { + RpsParameterDesc paramDescs[] = { + ParameterDesc::Make::Type>( + NodeArgHelper::GetAttrList(args), nullptr, NodeArgHelper::GetFlag(args))..., + }; + + RpsNodeDesc nodeDesc = {}; + nodeDesc.numParams = uint32_t(sizeof...(TArgs)); + nodeDesc.pParamDescs = paramDescs; + nodeDesc.name = name; + + const RpsNodeDeclId nodeDeclId = DeclNode(nodeDesc); + + using ContextType = rps::details::NonMemberNodeCallbackContext; + static_assert(std::is_trivially_destructible::value, ""); + + ContextType* callbackUserContext = New(nodeFunc); + + return AddNode(nodeDeclId, tag, &ContextType::Callback, callbackUserContext, {&args...}); + } + + template + RpsNodeId AddNode(TTarget* pTarget, TNodeFunc nodeFunc, uint32_t tag, const char* name, TArgs&&... args) + { + RpsParameterDesc paramDescs[] = { + ParameterDesc::Make::Type>( + NodeArgHelper::GetAttrList(args), nullptr, NodeArgHelper::GetFlag(args))..., + }; + + RpsNodeDesc nodeDesc = {}; + nodeDesc.numParams = uint32_t(sizeof...(TArgs)); + nodeDesc.pParamDescs = paramDescs; + nodeDesc.name = name; + + const RpsNodeDeclId nodeDeclId = DeclNode(nodeDesc); + + using ContextType = rps::details::MemberNodeCallbackContext; + static_assert(std::is_trivially_destructible::value, ""); + + ContextType* callbackUserContext = New(pTarget, nodeFunc); + + return AddNode(nodeDeclId, tag, &ContextType::Callback, callbackUserContext, {&args...}); + } + + RpsResourceId GetParamResourceId(RpsParamId paramId, uint32_t arrayIndex = 0) const; + RpsResult DeclareResource(uint32_t localResourceId, + RpsVariable hDescVar, + const char* name, + RpsResourceId* pOutResId); + + RpsVariable GetParamVariable(RpsParamId paramId, size_t* pSize = nullptr) const; + + template + T* GetParamVariable(RpsParamId paramId) const + { + size_t varSize = 0; + auto result = GetParamVariable(paramId, &varSize); + + return (varSize == sizeof(T)) ? static_cast(result) : nullptr; + } + + template + RpsResult SetParamVariable(RpsParamId paramId, const T& value) + { + auto pData = GetParamVariable(paramId); + if (pData) + { + *pData = value; + return RPS_OK; + } + return RPS_ERROR_INDEX_OUT_OF_BOUNDS; + } + + private: + RenderGraphBuilder& m_builder; + RpsResult m_result = RPS_OK; + }; + +} // namespace rps + +#endif // __cplusplus + +/// @defgroup Frontend Frontend +/// @{ + +/// @defgroup RpsRenderGraphHelpers RpsRenderGraph Helpers +/// @{ + +// Render Graph Helpers + +/// @brief Allocates memory from a render graph builder and initializes it by copying data from an existing buffer. +/// +/// @param hRenderGraphBuilder Handle to the render graph builder. Must not be RPS_NULL_HANDLE. +/// @param size Size of the data in bytes. +/// @param pCopyFrom Pointer to the data. Must not be NULL for size != 0. +/// +/// @returns Pointer to the allocated memory where the data was copied to if the +/// allocation was successful, NULL otherwise. Only valid until the next +/// render graph update. +static inline void* rpsRenderGraphAllocAndCopyFrom(RpsRenderGraphBuilder hRenderGraphBuilder, + size_t size, + const void* pCopyFrom) + +{ + void* pResult = rpsRenderGraphAllocateData(hRenderGraphBuilder, size); + if (pResult) + { + memcpy(pResult, pCopyFrom, size); + } + return pResult; +} + +/// @brief Allocates memory from a render graph builder and zeroes it. +/// +/// @param hRenderGraphBuilder Handle to the render graph builder. Must not be RPS_NULL_HANDLE. +/// @param size Size of the data in bytes. +/// +/// @returns Pointer to the allocated memory if the allocation was successful, +/// NULL otherwise. Only valid until the next render graph update. +static inline void* rpsRenderGraphAllocAndZero(RpsRenderGraphBuilder hRenderGraphBuilder, size_t size) +{ + void* pResult = rpsRenderGraphAllocateData(hRenderGraphBuilder, size); + if (pResult) + { + memset(pResult, 0, size); + } + return pResult; +} + +/// @brief Allocates an object from a render graph builder. +/// +/// @param hRenderGraphBuilder Handle to the render graph builder. Must not be RPS_NULL_HANDLE. +/// @param TypeName Type name of the object. +/// +/// @returns Pointer to the allocated object if the allocation was successful, NULL +/// otherwise. +#define rpsRenderGraphAllocateDataOfType(hRenderGraphBuilder, TypeName) \ + ((TypeName*)rpsRenderGraphAllocateData(hRenderGraphBuilder, sizeof(TypeName))) + +/// @brief Allocates an array of objects in a render graph. +/// +/// @param hRenderGraphBuilder Handle to the render graph builder. Must not be RPS_NULL_HANDLE. +/// @param TypeName Type name of the objects. +/// @param NumElements Number of elements in the array. +/// +/// @returns Pointer to the allocated array of TypeName if the allocation was +/// successful, NULL otherwise. +#define rpsRenderGraphAllocateArrayOfType(hRenderGraphBuilder, TypeName, NumElements) \ + ((TypeName*)rpsRenderGraphAllocateData(hRenderGraphBuilder, sizeof(TypeName) * NumElements)) + +/// @brief Allocates an object with zeroed memory in a render graph builder. +/// +/// @param hRenderGraphBuilder Handle to the render graph builder. Must not be RPS_NULL_HANDLE. +/// @param TypeName Type name of the object. +/// +/// @returns Pointer to the allocated object if the allocation was successful, NULL +/// otherwise. +#define rpsRenderGraphAllocateDataOfTypeZeroed(hRenderGraphBuilder, TypeName) \ + ((TypeName*)rpsRenderGraphAllocAndZero(hRenderGraphBuilder, sizeof(TypeName))) + +/// @brief Allocates an array of objects with zeroed memory in a render graph builder. +/// +/// @param hRenderGraphBuilder Handle to the render graph builder. Must not be RPS_NULL_HANDLE. +/// @param TypeName Type name of the objects. +/// @param NumElements Number of elements in the array. +/// +/// @returns Pointer to the allocated array of TypeName if the allocation was successful, +/// NULL otherwise. +#define rpsRenderGraphAllocateArrayOfTypeZeroed(hRenderGraphBuilder, TypeName, NumElements) \ + ((TypeName*)rpsRenderGraphAllocAndZero(hRenderGraphBuilder, sizeof(TypeName) * NumElements)) + +/// @brief Allocates memory in a render graph and copies an object into it. +/// +/// @param hRenderGraphBuilder Handle to the render graph builder. Must not be RPS_NULL_HANDLE. +/// @param TypeName Type name of the object. +/// @param CopyFrom Pointer to the object to copy. Must not be NULL. +/// +/// @returns Pointer to the allocated object if the allocation was successful, NULL +/// otherwise. +#define rpsRenderGraphAllocateDataOfTypeAndCopyFrom(hRenderGraphBuilder, TypeName, CopyFrom) \ + ((TypeName*)rpsRenderGraphAllocAndCopyFrom(hRenderGraphBuilder, sizeof(TypeName), CopyFrom)) + +/// @brief Allocates memory in a render graph and copies an array of objects into it. +/// +/// +/// @param hRenderGraphBuilder Handle to the render graph builder. Must not be RPS_NULL_HANDLE. +/// @param TypeName Type name of the objects. +/// @param NumElements Number of elements in the array. +/// @param CopyFrom Pointer to the array of TypeName to copy. Must not be NULL if NumElements != 0. +/// +/// @returns Pointer to the allocated array of TypeName if the allocation was +/// successful, NULL otherwise. +#define rpsRenderGraphAllocateArrayOfTypeAndCopyFrom(hRenderGraphBuilder, TypeName, NumElements, CopyFrom) \ + ((TypeName*)rpsRenderGraphAllocAndCopyFrom(hRenderGraphBuilder, sizeof(TypeName) * (Count), CopyFrom)) + +/// @} end addtogroup RpsRenderGraphHelpers + +/// @} end addtogroup Frontend + +#endif //_RPS_BUILDER_HPP_ diff --git a/include/rps/rps.h b/include/rps/rps.h new file mode 100644 index 0000000..cc4aa15 --- /dev/null +++ b/include/rps/rps.h @@ -0,0 +1,33 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_H_ +#define _RPS_H_ + +#include "core/rps_api.h" + +#include "frontend/rps_builder.h" + +#ifndef RPS_NO_RUNTIME + +#include "rps/runtime/common/rps_runtime.h" + +#if RPS_D3D12_RUNTIME +#include "rps/runtime/d3d12/rps_d3d12_runtime.h" +#endif + +#if RPS_VK_RUNTIME +#include "rps/runtime/vk/rps_vk_runtime.h" +#endif + +#if RPS_D3D11_RUNTIME +#include "rps/runtime/d3d11/rps_d3d11_runtime.h" +#endif + +#endif //RPS_NO_COMMON_RHI + +#endif//_RPS_H_ diff --git a/include/rps/runtime/common/rps_access.h b/include/rps/runtime/common/rps_access.h new file mode 100644 index 0000000..401685a --- /dev/null +++ b/include/rps/runtime/common/rps_access.h @@ -0,0 +1,566 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_ACCESS_H_ +#define _RPS_ACCESS_H_ + +#ifndef RPSL_COMPILER_BUILD + +#include "rps/core/rps_api.h" +#include "rps/runtime/common/rps_format.h" +#include "rps/runtime/common/rps_resource.h" + +#endif //RPSL_COMPILER_BUILD + +/// @addtogroup RpsRenderGraphRuntime +/// @{ + +//--------------------------------------------------------------------------------------- +// Resource Access +//--------------------------------------------------------------------------------------- + +/// @defgroup RpsAccessAttr RpsAccessAttr +/// @{ + +/// @brief Bitflags for resource (view) access attributes. +/// +/// If specified for a node parameter, it indicates the required resource layout and synchronizations before entering +/// and after exiting the node. +typedef enum RpsAccessFlagBits +{ + // Basic access flags + RPS_ACCESS_UNKNOWN = 0, ///< Unknown access. + RPS_ACCESS_INDIRECT_ARGS_BIT = 1 << 0, ///< Accessible as an indirect argument buffer. + RPS_ACCESS_INDEX_BUFFER_BIT = 1 << 1, ///< Accessible as an index buffer. + RPS_ACCESS_VERTEX_BUFFER_BIT = 1 << 2, ///< Accessible as a vertex buffer. + RPS_ACCESS_CONSTANT_BUFFER_BIT = 1 << 3, ///< Accessible as a constant buffer. + RPS_ACCESS_SHADER_RESOURCE_BIT = 1 << 4, ///< Accessible as a shader resource (readonly) view. + RPS_ACCESS_UNORDERED_ACCESS_BIT = 1 << 5, ///< Accessible as a unordered access (shader readwrite) view. + RPS_ACCESS_SHADING_RATE_BIT = 1 << 6, ///< Accessible as a shading rate image in a Variable Rate Shading + /// (VRS) pass. + RPS_ACCESS_RENDER_TARGET_BIT = 1 << 7, ///< Accessible as a render target view. + RPS_ACCESS_DEPTH_READ_BIT = 1 << 8, ///< Accessible as a readonly depth view. + RPS_ACCESS_DEPTH_WRITE_BIT = 1 << 9, ///< Accessible as a writable depth view. + RPS_ACCESS_STENCIL_READ_BIT = 1 << 10, ///< Accessible as a readonly stencil view. + RPS_ACCESS_STENCIL_WRITE_BIT = 1 << 11, ///< Accessible as a writable stencil view. + RPS_ACCESS_STREAM_OUT_BIT = 1 << 12, ///< Accessible for write as a stream out buffer. + RPS_ACCESS_COPY_SRC_BIT = 1 << 13, ///< Accessible as a copy source. + RPS_ACCESS_COPY_DEST_BIT = 1 << 14, ///< Accessible as a copy target. + RPS_ACCESS_RESOLVE_SRC_BIT = 1 << 15, ///< Accessible as a resolve source. + RPS_ACCESS_RESOLVE_DEST_BIT = 1 << 16, ///< Accessible as a resolve target. + RPS_ACCESS_RAYTRACING_AS_BUILD_BIT = 1 << 17, ///< Accessible for write (build) as a raytracing acceleration + /// structure. + RPS_ACCESS_RAYTRACING_AS_READ_BIT = 1 << 18, ///< Accessible for read as a raytracing acceleration structure. + RPS_ACCESS_PRESENT_BIT = 1 << 19, ///< Accessible as a present source. + RPS_ACCESS_CPU_READ_BIT = 1 << 20, ///< Accessible for reads by the CPU. + RPS_ACCESS_CPU_WRITE_BIT = 1 << 21, ///< Accessible for writes by the CPU. + + // Additional decorator flags not for standalone use but instead for combination with basic access flags. + + /// Access can be used by a render pass attachment (as render target or depth stencil). Used to distinguish clear-only + /// accesses (which may use special clear commands) and render target / depth stencil view accesses. + RPS_ACCESS_RENDER_PASS = 1 << 25, + + /// Initial state when entering the node. This allows a view to have a different state at entering and exiting, + /// in case the node implementation needs to perform a transition but does not want to transition it back to the + /// original state. Not implemented yet. + RPS_ACCESS_BEFORE_BIT = 1 << 26, + + /// Final state when exiting the node. This allows a view to have a different state at entering and exiting, + /// in case the node implementation needs to perform a transition but does not want to transition it back to the + /// original state. Not implemented yet. + RPS_ACCESS_AFTER_BIT = 1 << 27, + + /// View is cleared before the current access. Usually used together with other basic access flags. + RPS_ACCESS_CLEAR_BIT = 1 << 28, + + /// Access does not read existing data so it can be discarded. + RPS_ACCESS_DISCARD_OLD_DATA_BIT = 1 << 29, + + /// Access does not care about the ordering with regard to other accesses which also have the + /// RPS_ACCESS_RELAXED_ORDER_BIT flag. + RPS_ACCESS_RELAXED_ORDER_BIT = 1 << 30, + + /// Access does not need a resource view to be created, (e.g. via + /// ID3D12GraphicsCommandList::CopyResource). + RPS_ACCESS_NO_VIEW_BIT = 1 << 31, + + // Aliases + + /// Accessible as a predication buffer. + RPS_ACCESS_PREDICATION_BIT = RPS_ACCESS_INDIRECT_ARGS_BIT, + + /// Depth read write access. + RPS_ACCESS_DEPTH = RPS_ACCESS_DEPTH_READ_BIT | RPS_ACCESS_DEPTH_WRITE_BIT, + + /// Stencil read write access. + RPS_ACCESS_STENCIL = RPS_ACCESS_STENCIL_READ_BIT | RPS_ACCESS_STENCIL_WRITE_BIT, + + /// Depth / Stencil read access. + RPS_ACCESS_DEPTH_STENCIL_READ = RPS_ACCESS_DEPTH_READ_BIT | RPS_ACCESS_STENCIL_READ_BIT, + + /// Depth / Stencil write access. + RPS_ACCESS_DEPTH_STENCIL_WRITE = RPS_ACCESS_DEPTH_WRITE_BIT | RPS_ACCESS_STENCIL_WRITE_BIT, + + /// Depth / Stencil read write access. + RPS_ACCESS_DEPTH_STENCIL = RPS_ACCESS_DEPTH_STENCIL_READ | RPS_ACCESS_DEPTH_STENCIL_WRITE, + + /// Bitwise OR of all possible GPU writeable access flags. + RPS_ACCESS_ALL_GPU_WRITE = RPS_ACCESS_RENDER_TARGET_BIT | RPS_ACCESS_UNORDERED_ACCESS_BIT | + RPS_ACCESS_DEPTH_WRITE_BIT | RPS_ACCESS_STENCIL_WRITE_BIT | RPS_ACCESS_STREAM_OUT_BIT | + RPS_ACCESS_COPY_DEST_BIT | RPS_ACCESS_RESOLVE_DEST_BIT | + RPS_ACCESS_RAYTRACING_AS_BUILD_BIT, + + /// Bitwise OR of all possible GPU readonly access flags. + RPS_ACCESS_ALL_GPU_READONLY = + RPS_ACCESS_INDIRECT_ARGS_BIT | RPS_ACCESS_INDEX_BUFFER_BIT | RPS_ACCESS_VERTEX_BUFFER_BIT | + RPS_ACCESS_CONSTANT_BUFFER_BIT | RPS_ACCESS_SHADER_RESOURCE_BIT | RPS_ACCESS_SHADING_RATE_BIT | + RPS_ACCESS_DEPTH_READ_BIT | RPS_ACCESS_STENCIL_READ_BIT | RPS_ACCESS_COPY_SRC_BIT | RPS_ACCESS_RESOLVE_SRC_BIT | + RPS_ACCESS_RAYTRACING_AS_READ_BIT | RPS_ACCESS_PRESENT_BIT, + + /// Bitwise OR of all possible GPU access flags. + RPS_ACCESS_ALL_GPU = RPS_ACCESS_ALL_GPU_WRITE | RPS_ACCESS_ALL_GPU_READONLY, + + /// Bitwise OR of all possible CPU access flags. + RPS_ACCESS_ALL_CPU = RPS_ACCESS_CPU_READ_BIT | RPS_ACCESS_CPU_WRITE_BIT, + + /// Bitwise OR of all GPU / CPU access, excluding decorator flags such as RPS_ACCESS_RELAXED_ORDER_BIT and RPS_ACCESS_NO_VIEW_BIT. + RPS_ACCESS_ALL_ACCESS_MASK = RPS_ACCESS_ALL_GPU | RPS_ACCESS_ALL_CPU, + +} RpsAccessFlagBits; + +/// @brief Bitmask type for RpsAccessFlagBits. +typedef RpsFlags32 RpsAccessFlags; + +/// @brief Bitflags for shader stages. +typedef enum RpsShaderStageBits +{ + RPS_SHADER_STAGE_NONE = 0, ///< No shader stages. + RPS_SHADER_STAGE_VS = 1 << 0, ///< Vertex shader stage. + RPS_SHADER_STAGE_PS = 1 << 1, ///< Pixel shader stage. + RPS_SHADER_STAGE_GS = 1 << 2, ///< Geometry shader stage. + RPS_SHADER_STAGE_CS = 1 << 3, ///< Compute shader stage. + RPS_SHADER_STAGE_HS = 1 << 4, ///< Hull shader stage. + RPS_SHADER_STAGE_DS = 1 << 5, ///< Domain shader stage. + RPS_SHADER_STAGE_RAYTRACING = 1 << 6, ///< Raytracing shader stage. + RPS_SHADER_STAGE_AS = 1 << 7, ///< Amplification shader stage. + RPS_SHADER_STAGE_MS = 1 << 8, ///< Mesh shader stage. + RPS_SHADER_STAGE_ALL = (1u << 9) - 1, ///< All shader stages. +} RpsShaderStageBits; + +/// @brief Bitmask type for RpsShaderStageBits. +typedef RpsFlags32 RpsShaderStageFlags; + +/// @brief Resource access attribute. +typedef struct RpsAccessAttr +{ + RpsAccessFlags accessFlags; ///< Access flags. + RpsShaderStageFlags accessStages; ///< Shader stages allowed for access if applicable. +} RpsAccessAttr; + +/// @} end defgroup RpsAccessAttr + +/// @defgroup RpsSemanticAttr RpsSemanticAttr +/// @{ + +/// @brief Graphics resource and argument data usage semantics. +typedef enum RpsSemantic +{ + RPS_SEMANTIC_UNSPECIFIED = 0, ///< No semantics. + + // Shaders: + RPS_SEMANTIC_VERTEX_SHADER, ///< Reserved for future use. + RPS_SEMANTIC_PIXEL_SHADER, ///< Reserved for future use. + RPS_SEMANTIC_GEOMETRY_SHADER, ///< Reserved for future use. + RPS_SEMANTIC_COMPUTE_SHADER, ///< Reserved for future use. + RPS_SEMANTIC_HULL_SHADER, ///< Reserved for future use. + RPS_SEMANTIC_DOMAIN_SHADER, ///< Reserved for future use. + RPS_SEMANTIC_RAYTRACING_PIPELINE, ///< Reserved for future use. + RPS_SEMANTIC_AMPLIFICATION_SHADER, ///< Reserved for future use. + RPS_SEMANTIC_MESH_SHADER, ///< Reserved for future use. + + // States: + RPS_SEMANTIC_VERTEX_LAYOUT, ///< Reserved for future use. + RPS_SEMANTIC_STREAM_OUT_LAYOUT, ///< Reserved for future use. + RPS_SEMANTIC_STREAM_OUT_DESC, ///< Reserved for future use. + RPS_SEMANTIC_BLEND_STATE, ///< Reserved for future use. + RPS_SEMANTIC_RENDER_TARGET_BLEND, ///< Reserved for future use. + RPS_SEMANTIC_DEPTH_STENCIL_STATE, ///< Reserved for future use. + RPS_SEMANTIC_RASTERIZER_STATE, ///< Reserved for future use. + RPS_SEMANTIC_DYNAMIC_STATE_BEGIN, ///< Start of the dynamic state semantic enumeration values. + + /// Usage as a viewport. The data type must be RpsViewport. + RPS_SEMANTIC_VIEWPORT = RPS_SEMANTIC_DYNAMIC_STATE_BEGIN, + + /// Usage as a scissor rectangle. The data type must be RpsRect. + RPS_SEMANTIC_SCISSOR, + + /// Usage as primitive topology. The data must be one of the values specified by RpsPrimitiveTopology. + RPS_SEMANTIC_PRIMITIVE_TOPOLOGY, + + /// Reserved for future use. + RPS_SEMANTIC_PATCH_CONTROL_POINTS, + + /// Reserved for future use. + RPS_SEMANTIC_PRIMITIVE_STRIP_CUT_INDEX, + + /// Reserved for future use. + RPS_SEMANTIC_BLEND_FACTOR, + + /// Reserved for future use. + RPS_SEMANTIC_STENCIL_REF, + + /// Reserved for future use. + RPS_SEMANTIC_DEPTH_BOUNDS, + + /// Reserved for future use. + RPS_SEMANTIC_SAMPLE_LOCATION, + + /// Reserved for future use. + RPS_SEMANTIC_SHADING_RATE, + + /// Usage as a color clear value. The data type must be float[4]. + RPS_SEMANTIC_COLOR_CLEAR_VALUE, + + /// Usage as a depth clear value. The data type must be float. + RPS_SEMANTIC_DEPTH_CLEAR_VALUE, + + /// Usage as a stencil clear value. The data type must be uint32_t, only the lower 8 bit will be used. + RPS_SEMANTIC_STENCIL_CLEAR_VALUE, + + // Resource bindings: + + /// Start of the resource binding enumeration values. + RPS_SEMANTIC_RESOURCE_BINDING_BEGIN, + + /// Bound as a vertex buffer. The semantic index indicates the vertex buffer binding slot. + RPS_SEMANTIC_VERTEX_BUFFER = RPS_SEMANTIC_RESOURCE_BINDING_BEGIN, + + /// Bound as an index buffer. + RPS_SEMANTIC_INDEX_BUFFER, + + /// Bound as an indirect argument buffer. + RPS_SEMANTIC_INDIRECT_ARGS, + + /// Bound as an indirect count buffer. + RPS_SEMANTIC_STREAM_OUT_BUFFER, + + /// Bound for write as a stream out buffer. The semantic index indicates the stream out buffer binding slot. + RPS_SEMANTIC_INDIRECT_COUNT, + + /// Bound as a render target view. The semantic index indicates the render target slot. + RPS_SEMANTIC_RENDER_TARGET, + + /// Bound as a depth stencil view. + RPS_SEMANTIC_DEPTH_STENCIL_TARGET, + + /// Bound as a shading rate image in a Variable Rate Shading (VRS) pass. + RPS_SEMANTIC_SHADING_RATE_IMAGE, + + /// Bound as a resolve target. The semantic index indicates the render + /// target slot of the resolve source. + RPS_SEMANTIC_RESOLVE_TARGET, + + /// User defined resource view binding. This is intended for shader resource views and unordered access views where + /// resources are bound to programmable shaders instead of fixed function binding points. + RPS_SEMANTIC_USER_RESOURCE_BINDING, + + // !! KEEP RPS_SEMANTIC_USER_RESOURCE_BINDING THE LAST ELEMENT !! + + RPS_SEMANTIC_COUNT, ///< Number of defined semantics. +} RpsSemantic; + +/// @brief Constant for a semantic index value indicating the actual semantic index should occur immediately following +/// the previously defined semantic of the same type. +#define RPS_SEMANTIC_INDEX_APPEND (UINT32_MAX) + +/// @brief Graph entry and node parameter semantic attribute. +typedef struct RpsSemanticAttr +{ + RpsSemantic semantic; //< Semantic type. + uint32_t semanticIndex; //< Index of the semantic if required. The meaning of the semantic index differs depending + // on the semantic. See RpsSemantic for more info. +} RpsSemanticAttr; + +/// @} end defgroup RpsSemanticAttr + +/// @defgroup RpsResourceView RpsResourceView +/// @{ + +/// @brief Bitflags for resource view properties. +typedef enum RpsResourceViewFlagBits +{ + RPS_RESOURCE_VIEW_FLAG_NONE = 0, ///< No special resource view flags. + RPS_RESOURCE_VIEW_FLAG_CUBEMAP_BIT = 1 << 0, ///< Resource view is used as a cubemap. +} RpsResourceViewFlagBits; + +/// @brief Bitmask for RpsResourceViewFlagBits. +typedef RpsFlags32 RpsResourceViewFlags; + +/// @brief General resource view. +/// +/// All shared parameters between buffer and image views. +typedef struct RpsResourceView +{ + RpsResourceId resourceId; ///< Resource ID of the viewed resource. RPS_RESOURCE_ID_INVALID indicates a null + /// resource view. + RpsFormat viewFormat; ///< Format of the view. RPS_FORMAT_UNKNOWN indicates the format should be inherited from + /// the resource or the view does not require a format (e.g. structured buffers). + uint32_t temporalLayer; ///< Temporal layer of the view. + RpsResourceViewFlags flags; ///< Flags for additional view properties. +} RpsResourceView; + +/// @brief Image resource view. +typedef struct RpsImageView +{ + RpsResourceView base; ///< Base resource view properties. + RpsSubresourceRange subresourceRange; ///< Subresource range of the view. + float minLodClamp; ///< Min LOD clamp value of the texture view. + uint32_t componentMapping; ///< 32-bit value for the color component (RGBA channel) mapping of the view. +} RpsImageView; + +/// @defgroup RpsResourceViewComponentMapping RpsResourceViewComponentMapping +/// @{ + +/// @brief Resource components or value to map to for component mapping. +typedef enum RpsResourceViewComponentMapping +{ + RPS_RESOURCE_VIEW_COMPONENT_MAPPING_R = 0, ///< Red component. + RPS_RESOURCE_VIEW_COMPONENT_MAPPING_G = 1, ///< Green component. + RPS_RESOURCE_VIEW_COMPONENT_MAPPING_B = 2, ///< Blue component. + RPS_RESOURCE_VIEW_COMPONENT_MAPPING_A = 3, ///< Alpha component + RPS_RESOURCE_VIEW_COMPONENT_MAPPING_ZERO = 4, ///< Constant value of 0. + RPS_RESOURCE_VIEW_COMPONENT_MAPPING_ONE = 5, ///< Constant value of 1. + + /// Specifies the default component mapping (where R, G, B, A components map to R, G, B, A without swizzling). + /// Note this is using 1 byte each channel, different from DX12 default. + RPS_RESOURCE_VIEW_COMPONENT_MAPPING_DEFAULT = + RPS_RESOURCE_VIEW_COMPONENT_MAPPING_R | (RPS_RESOURCE_VIEW_COMPONENT_MAPPING_G << 8) | + (RPS_RESOURCE_VIEW_COMPONENT_MAPPING_B << 16) | (RPS_RESOURCE_VIEW_COMPONENT_MAPPING_A << 24), +} RpsResourceViewComponentMapping; + +/// @brief Macro for encoding a set of component mappings as a 32-bit color value. +/// +/// @param R Red channel value. +/// @param G Green channel value. +/// @param B Blue channel value. +/// @param A Alpha channel value. +/// +/// @returns Encoded 32-bit value. +#define RPS_IMAGE_VIEW_MAKE_COMPONENT_MAPPING(R, G, B, A) \ + (((R)&0xFF) | (((G)&0xFF) << 8) | (((B)&0xFF) << 16) | (((A)&0xFF) << 24)) + +/// @brief Macro for decoding the red channel of a 32-bit component mapping encoded in the layout defined by +/// RPS_IMAGE_VIEW_MAKE_COMPONENT_MAPPING. +/// +/// @param Value Encoded 32-bit value. +/// +/// returns Decoded red channel. +#define RPS_IMAGE_VIEW_GET_COMPONENT_MAPPING_CHANNEL_R(Value) ((RpsResourceViewComponentMapping)((Value)&0xFF)) +/// @brief Macro for decoding the green channel of a 32-bit component mapping encoded in the layout defined by +/// RPS_IMAGE_VIEW_MAKE_COMPONENT_MAPPING.. +/// +/// @param Value Encoded 32-bit value. +/// +/// returns Decoded green channel. +#define RPS_IMAGE_VIEW_GET_COMPONENT_MAPPING_CHANNEL_G(Value) ((RpsResourceViewComponentMapping)(((Value) >> 8) & 0xFF)) +/// @brief Macro for decoding the blue channel of a 32-bit component mapping encoded in the layout defined by +/// RPS_IMAGE_VIEW_MAKE_COMPONENT_MAPPING.. +/// +/// @param Value Encoded 32-bit value. +/// +/// returns Decoded blue channel. +#define RPS_IMAGE_VIEW_GET_COMPONENT_MAPPING_CHANNEL_B(Value) \ + ((RpsResourceViewComponentMapping)(((Value) >> 16) & 0xFF)) +/// @brief Macro for decoding the alpha channel of a 32-bit component mapping encoded in the layout defined by +/// RPS_IMAGE_VIEW_MAKE_COMPONENT_MAPPING. +/// +/// @param Value Encoded 32-bit value. +/// +/// returns Decoded alpha channel. +#define RPS_IMAGE_VIEW_GET_COMPONENT_MAPPING_CHANNEL_A(Value) \ + ((RpsResourceViewComponentMapping)(((Value) >> 24) & 0xFF)) +/// @brief @brief Macro for decoding a channel of a 32-bit component mapping encoded in the layout defined by +/// RPS_IMAGE_VIEW_MAKE_COMPONENT_MAPPING. +/// +/// @param Value Encoded 32-bit value. +/// @param Channel Channel to decode. Has to be either R, G B or A. +/// +/// returns Decoded red channel. +#define RPS_IMAGE_VIEW_GET_COMPONENT_MAPPING_CHANNEL(Value, Channel) \ + RPS_CONCATENATE_DIRECT(RPS_IMAGE_VIEW_GET_COMPONENT_MAPPING_CHANNEL_, Channel)(Value) + +/// @} end defgrouop RpsResourceViewComponentMapping + +/// @brief Constant for a buffer range value indicating the entire remaining size of the buffer. +#define RPS_BUFFER_WHOLE_SIZE UINT64_MAX + +/// @brief Buffer resource view. +typedef struct RpsBufferView +{ + RpsResourceView base; ///< Base view properties. + uint64_t offset; ///< Offset of the buffer range in bytes. + uint64_t sizeInBytes; ///< Size of the buffer range in bytes. + uint32_t stride; ///< Stride of a structured buffer view. If the API does not support altering per-view + /// buffer stride ( e.g. DX11), the stride applies to the whole buffer resource. +} RpsBufferView; + +/// @brief Enumeration of runtime defined built-in type IDs. +typedef enum RpsRuntimeBuiltInTypeIds +{ + RPS_TYPE_IMAGE_VIEW = RPS_TYPE_RUNTIME_DEFINED_BEGIN, ///< Type ID of RpsImageView. + RPS_TYPE_BUFFER_VIEW, ///< Type ID of RpsBufferView. +} RpsRuntimeBuiltInTypeIds; + +/// @} end defgroup RpsResourceView4758 + +#ifndef RPSL_COMPILER_BUILD + +#ifdef __cplusplus + +namespace rps +{ + /// @brief C++ helper type for RpsAccessAttr. + /// + /// @ingroup RpsAccessAttr + struct AccessAttr : public RpsAccessAttr + { + constexpr AccessAttr(RpsAccessFlags accessFlags = RPS_ACCESS_UNKNOWN, + RpsShaderStageFlags shaderStages = RPS_SHADER_STAGE_NONE) + : RpsAccessAttr{accessFlags, shaderStages} + { + } + constexpr AccessAttr(const RpsAccessAttr& attr) + : RpsAccessAttr{attr} + { + } + + AccessAttr& operator|=(const AccessAttr& rhs); + AccessAttr& operator&=(const AccessAttr& rhs); + + void Print(const RpsPrinter& printer) const; + }; + + /// @brief C++ helper type for RpsSemanticAttr. + struct SemanticAttr : public RpsSemanticAttr + { + constexpr SemanticAttr(RpsSemantic semantic, uint32_t semanticIndex = 0) + : RpsSemanticAttr{semantic, semanticIndex} + { + } + constexpr SemanticAttr(const RpsSemanticAttr& attr) + : RpsSemanticAttr{attr} + { + } + + void Print(const RpsPrinter& printer) const; + }; + + /// @addtogroup RpsAccessAttr + /// @{ + + /// @brief Per field bitwise-OR operator for RpsAccessAttr. + inline RpsAccessAttr operator|(const RpsAccessAttr& lhs, const RpsAccessAttr& rhs) + { + return RpsAccessAttr{lhs.accessFlags | rhs.accessFlags, lhs.accessStages | rhs.accessStages}; + } + + /// @brief Per field bitwise-AND operator for RpsAccessAttr. + inline RpsAccessAttr operator&(const RpsAccessAttr& lhs, const RpsAccessAttr& rhs) + { + return RpsAccessAttr{lhs.accessFlags & rhs.accessFlags, lhs.accessStages & rhs.accessStages}; + } + + /// @brief Returns if two RpsAccessAttr structures are equal. + inline bool operator==(const RpsAccessAttr& lhs, const RpsAccessAttr& rhs) + { + return (lhs.accessFlags == rhs.accessFlags) && (lhs.accessStages == rhs.accessStages); + } + + /// @brief Returns if two RpsAccessAttr structures are not equal. + inline bool operator!=(const RpsAccessAttr& lhs, const RpsAccessAttr& rhs) + { + return !(lhs == rhs); + } + + inline AccessAttr& AccessAttr::operator|=(const AccessAttr& rhs) + { + *this = *this | rhs; + return *this; + } + + inline AccessAttr& AccessAttr::operator&=(const AccessAttr& rhs) + { + *this = *this & rhs; + return *this; + } + + /// @} end addtogroup RpsAccessAttr + + /// @addtogroup RpsResourceView + /// @{ + + /// @brief C++ helper type for RpsImageView. + struct ImageView : public RpsImageView + { + ImageView() + : ImageView(RPS_RESOURCE_ID_INVALID) + { + } + + ImageView(RpsResourceId inResId, + RpsFormat inFormat = RPS_FORMAT_UNKNOWN, + uint32_t inTemporalLayer = 0, + RpsResourceViewFlags inFlags = RPS_RESOURCE_VIEW_FLAG_NONE, + SubresourceRange inSubResRange = {}) + { + base.resourceId = inResId; + base.viewFormat = inFormat; + base.temporalLayer = inTemporalLayer; + base.flags = inFlags; + subresourceRange = inSubResRange; + minLodClamp = 0.0f; + componentMapping = RPS_RESOURCE_VIEW_COMPONENT_MAPPING_DEFAULT; + } + }; + + /// @brief C++ helper type for RpsBufferView. + struct BufferView : public RpsBufferView + { + BufferView() + : BufferView(RPS_RESOURCE_ID_INVALID) + { + } + + BufferView(RpsResourceId inResId, + RpsFormat inFormat = RPS_FORMAT_UNKNOWN, + uint64_t inOffset = 0, + uint64_t inSizeInBytes = UINT64_MAX, + uint16_t inStride = 0, + uint32_t inTemporalLayer = 0) + { + base.resourceId = inResId; + base.viewFormat = inFormat; + base.temporalLayer = inTemporalLayer; + base.flags = RPS_RESOURCE_VIEW_FLAG_NONE; + offset = inOffset; + sizeInBytes = inSizeInBytes; + stride = inStride; + } + }; + + /// @} end addtogroup RpsResourceView + +} // namespace rps + +#endif //__cplusplus + +#endif //RPSL_COMPILER_BUILD + +/// @} end addtogroup RpsRenderGraphRuntime + +#endif //_RPS_ACCESS_H_ diff --git a/include/rps/runtime/common/rps_format.h b/include/rps/runtime/common/rps_format.h new file mode 100644 index 0000000..a7a9c6e --- /dev/null +++ b/include/rps/runtime/common/rps_format.h @@ -0,0 +1,354 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_FORMAT_H_ +#define _RPS_FORMAT_H_ + +#include "rps/core/rps_api.h" + +/// @addtogroup RpsRenderGraphRuntimeResources +/// @{ + +/// @defgroup RpsFormat RpsFormat +/// @{ + +/// @brief Supported RPS formats. +typedef enum RpsFormat +{ + RPS_FORMAT_UNKNOWN, ///< Unknown format. + RPS_FORMAT_R32G32B32A32_TYPELESS, ///< 4-channel RGBA format with each channel being a typeless 32-bit value. + RPS_FORMAT_R32G32B32A32_FLOAT, ///< 4-channel RGBA format with each channel being a 32-bit IEEE 754 floating + /// point value. + RPS_FORMAT_R32G32B32A32_UINT, ///< 4-channel RGBA format with each channel being a 32-bit unsigned integer. + RPS_FORMAT_R32G32B32A32_SINT, ///< 4-channel RGBA format with each channel being a 32-bit signed integer. + RPS_FORMAT_R32G32B32_TYPELESS, ///< 3-channel RGB format with each channel being a typeless 32-bit value. + RPS_FORMAT_R32G32B32_FLOAT, ///< 3-channel RGB format with each channel being a 32-bit IEEE 754 floating + /// point value. + RPS_FORMAT_R32G32B32_UINT, ///< 3-channel RGB format with each channel being a 32-bit unsigned integer. + RPS_FORMAT_R32G32B32_SINT, ///< 3-channel RGB format with each channel being a 32-bit signed integer. + + RPS_FORMAT_R16G16B16A16_TYPELESS, ///< 4-channel RGBA format with each channel being a typeless 16-bit value. + RPS_FORMAT_R16G16B16A16_FLOAT, ///< 4-channel RGBA format with each channel being a 16-bit floating point + /// value. + RPS_FORMAT_R16G16B16A16_UNORM, ///< 4-channel RGBA format with each channel being a normalized, 16-bit unsigned + /// integer. + RPS_FORMAT_R16G16B16A16_UINT, ///< 4-channel RGBA format with each channel being a 16-bit unsigned integer. + RPS_FORMAT_R16G16B16A16_SNORM, ///< 4-channel RGBA format with each channel being a normalized, 16-bit signed + /// integer. + + /// 4-channel RGBA format with each channel being a 16-bit signed integer. + RPS_FORMAT_R16G16B16A16_SINT, + + /// 2-channel RG format with each channel being a typeless 32-bit value. + RPS_FORMAT_R32G32_TYPELESS, + + /// 2-channel RG format with each channel being a 32-bit IEEE 754 floating point value. + RPS_FORMAT_R32G32_FLOAT, + + /// 2-channel RG format with each channel being a 32-bit unsigned integer. + RPS_FORMAT_R32G32_UINT, + + /// 2-channel RG format with each channel being a 32-bit signed integer. + RPS_FORMAT_R32G32_SINT, + + /// 2-channel RG format with the first channel being a typeless 32-bit value, the second channel a typeless 8-bit + /// value and 24 unused bits at the end. + RPS_FORMAT_R32G8X24_TYPELESS, + + /// 2-channel RG format with the first channel being a 32-bit depth value, the second one a 8-bit unsigned integer + /// value and 24 unused bits at the end. + RPS_FORMAT_D32_FLOAT_S8X24_UINT, + + /// Single channel R format with the channel being a typeless 32-bit IEEE 754 floating point value and additional + /// sets of 8 and 24 unused bits afterwards. + RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS, + + /// Single channel R format with 32 unused bits, the channel being an 8-bit unsigned integer value and 24 unused + /// bits at the end. + RPS_FORMAT_X32_TYPELESS_G8X24_UINT, + + /// 4-channel RGBA format with the RGB channels being typeless 10-bit values and the A channel being a typeless + /// 2-bit value. + RPS_FORMAT_R10G10B10A2_TYPELESS, + + /// 4-channel RGBA format with the RGB channels being 10-bit normalized, unsigned integer values and the A channel + /// being a 2-bit normalized, unsigned integer value. + RPS_FORMAT_R10G10B10A2_UNORM, + + /// 4-channel RGBA format with the RGB channels being 10-bit unsigned integer values and the A channel being a 2-bit + /// unsigned integer value. + RPS_FORMAT_R10G10B10A2_UINT, + + /// 3-channel RGB format with the RG channels being 11-bit floating point values and the B channel being a 10-bit + /// floating point value. + RPS_FORMAT_R11G11B10_FLOAT, + + RPS_FORMAT_R8G8B8A8_TYPELESS, ///< 4-channel RGBA format with all channels being typeless 8-bit values. + RPS_FORMAT_R8G8B8A8_UNORM, ///< 4-channel RGBA format with all channels being normalized 8-bit unsigned + /// integers. + RPS_FORMAT_R8G8B8A8_UNORM_SRGB, ///< 4-channel RGBA format with all channels being normalized 8-bit unsigned integer + /// SRGB values. + RPS_FORMAT_R8G8B8A8_UINT, ///< 4-channel RGBA format with all channels being 8-bit unsigned integers. + RPS_FORMAT_R8G8B8A8_SNORM, ///< 4-channel RGBA format with all channels being normalized, 8-bit signed + /// integers. + RPS_FORMAT_R8G8B8A8_SINT, ///< 4-channel RGBA format with all channels being 8-bit signed integers. + + RPS_FORMAT_R16G16_TYPELESS, ///< 2-channel RG format with each channel being a typeless 16-bit value. + RPS_FORMAT_R16G16_FLOAT, ///< 2-channel RG format with each channel being a 16-bit IEEE 754 floating point value. + RPS_FORMAT_R16G16_UNORM, ///< 2-channel RG format with each channel being a normalized, 16-bit unsigned integer. + RPS_FORMAT_R16G16_UINT, ///< 2-channel RG format with each channel being a 16-bit unsigned integer. + RPS_FORMAT_R16G16_SNORM, ///< 2-channel RG format with each channel being a normalized, 16-bit signed integer + /// value. + RPS_FORMAT_R16G16_SINT, ///< 2-channel RG format with each channel being a 16-bit signed integer. + + RPS_FORMAT_R32_TYPELESS, ///< Single channel R format with the channel being a typeless 32-bit value. + RPS_FORMAT_D32_FLOAT, ///< Single channel R format with the channel being a 32-bit IEEE 754 floating point depth + /// value. + RPS_FORMAT_R32_FLOAT, ///< Single channel R format with the channel being a 32-bit IEEE 754 floating point + /// value. + RPS_FORMAT_R32_UINT, ///< Single channel R format with the channel being a 32-bit unsigned integer. + RPS_FORMAT_R32_SINT, ///< Single channel R format with the channel being a 32-bit signed integer. + + /// 2-channel RG format with the first channel being a typeless 24-bit value and the second one a typeless 8-bit + /// value. + RPS_FORMAT_R24G8_TYPELESS, + + /// 2-channel RG format with the first channel being a normalized, 24-bit unsigned integer depth value and the + /// second one an 8-bit unsigned integer stencil value. + RPS_FORMAT_D24_UNORM_S8_UINT, + + /// 2-channel RG format with the first channel being a normalized, 24-bit unsigned integer value and the second one + /// a typeless 8-bit value. + RPS_FORMAT_R24_UNORM_X8_TYPELESS, + + /// Single channel R format with 24 unused bits with the channel being an 8-bit unsigned integer. + RPS_FORMAT_X24_TYPELESS_G8_UINT, + + RPS_FORMAT_R8G8_TYPELESS, ///< 2-channel RG format with each channel being a typeless 8-bit value. + RPS_FORMAT_R8G8_UNORM, ///< 2-channel RG format with each channel being a normalized, 8-bit unsigned integer. + RPS_FORMAT_R8G8_UINT, ///< 2-channel RG format with each channel being a 8-bit unsigned integer. + RPS_FORMAT_R8G8_SNORM, ///< 2-channel RG format with each channel being a normalized, 8-bit signed integer. + RPS_FORMAT_R8G8_SINT, ///< 2-channel RG format with each channel being a 8-bit signed integer. + + RPS_FORMAT_R16_TYPELESS, ///< Single channel R format with the channel being a typeless 16-bit value. + RPS_FORMAT_R16_FLOAT, ///< Single channel R format with the channel being a 16-bit IEEE 754 floating point + /// value. + RPS_FORMAT_D16_UNORM, ///< Single channel R format with the channel being a 16-bit IEEE 754 floating point + /// depth value. + RPS_FORMAT_R16_UNORM, ///< Single channel R format with the channel being a 16-bit unsigned integer. + RPS_FORMAT_R16_UINT, ///< Single channel R format with the channel being a 16-bit signed integer. + RPS_FORMAT_R16_SNORM, ///< Single channel R format with the channel being a normalized, 16-bit signed integer. + RPS_FORMAT_R16_SINT, ///< Single channel R format with the channel being a 16-bit signed integer. + + RPS_FORMAT_R8_TYPELESS, ///< Single channel R format with the channel being a typeless 8-bit value. + RPS_FORMAT_R8_UNORM, ///< Single channel R format with the channel being a normalized, 8-bit unsigned integer. + RPS_FORMAT_R8_UINT, ///< Single channel R format with the channel being a 8-bit signed integer. + RPS_FORMAT_R8_SNORM, ///< Single channel R format with the channel being a normalized, 8-bit signed integer. + RPS_FORMAT_R8_SINT, ///< Single channel R format with the channel being a 8-bit signed integer. + RPS_FORMAT_A8_UNORM, ///< Single channel A format with the channel being a normalized, 8-bit unsigned integer. + + RPS_FORMAT_R1_UNORM, ///< Single channel R format with the channel being a 1-bit unsigned integer. + + /// 4-channel RGB format with the first three channels being a 9-bit mantissa. Together with the 5-bit exponent that + /// is shared for all three channels they form three 9-bit mantissa + 5-bit exponent floating point value. + RPS_FORMAT_R9G9B9E5_SHAREDEXP, + + /// 4-channel RGB format with each channel being a normalized, 8-bit unsigned integer. Each block of 32 bits + /// describes the RGB values for a pair of pixels that always share one R and B value but have separate G values. + RPS_FORMAT_R8G8_B8G8_UNORM, + + /// 4-channel RGB format with each channel being a normalized, 8-bit unsigned integer. Each block of 32 bits + /// describes the RGB values for a pair of pixels that always share one R and B value but have separate G values. + RPS_FORMAT_G8R8_G8B8_UNORM, + + /// 4-channel block compressed format with the first channel being a typeless 5-bit value, the second one a + /// typeless, 6-bit value, the third one a typeless, 5-bit value and the last one a typeless, 0-bit or 1-bit value. + RPS_FORMAT_BC1_TYPELESS, + + /// 4-channel block compressed format with the first channel being a normalized, 5-bit unsigned integer, the second + /// one a normalized, 6-bit unsigned integer, the third one a normalized, 5-bit unsigned integer and the last one a + /// normalized, 0-bit or 1-bit unsigned integer. + RPS_FORMAT_BC1_UNORM, + + /// 4-channel block compressed format with the first channel being a normalized, 5-bit unsigned integer SRGB value, + /// the second one a normalized, 6-bit unsigned integer SRGB value, the third one a normalized, 5-bit unsigned + /// integer SRGB valu eand the last one a normalized, 0-bit or 1-bit unsigned integer SRGB value. + RPS_FORMAT_BC1_UNORM_SRGB, + + /// 4-channel block compressed format with the first channel being a typeless 5-bit value, the second one a + /// typeless, 6-bit value, the third one a typeless, 5-bit value and the last one a typeless, 4-bit value. + RPS_FORMAT_BC2_TYPELESS, + + /// 4-channel block compressed format with the first channel being a normalized, 5-bit unsigned integer, the second + /// one a normalized, 6-bit unsigned integer, the third one a normalized, 5-bit unsigned integer and the last one a + /// normalized, 4-bit unsigned integer. + RPS_FORMAT_BC2_UNORM, + + /// 4-channel block compressed format with the first channel being a normalized, 5-bit unsigned integer SRGB value, + /// the second one a normalized, 6-bit unsigned integer SRGB value, the third one a normalized, 5-bit unsigned + /// integer SRGB value and the last one a normalized, 4-bit unsigned integer SRGB value. + RPS_FORMAT_BC2_UNORM_SRGB, + + /// 4-channel block compressed format with the first channel being a typeless 5-bit value, the second one a + /// typeless, 6-bit value, the third one a typeless, 5-bit value and the last one a typeless, 8-bit value. + RPS_FORMAT_BC3_TYPELESS, + + /// 4-channel block compressed format with the first channel being a normalized, 5-bit unsigned integer, the second + /// one a normalized, 6-bit unsigned integer, the third one a normalized, 5-bit unsigned integer and the last one a + /// normalized, 8-bit unsigned integer. + RPS_FORMAT_BC3_UNORM, + + /// 4-channel block compressed format with the first channel being a normalized, 5-bit unsigned integer SRGB value, + /// the second one a normalized, 6-bit unsigned integer SRGB value, the third one a normalized, 5-bit unsigned + /// integer SRGB value and the last one a normalized, 0-bit or 1-bit unsigned integer SRGB value. + RPS_FORMAT_BC3_UNORM_SRGB, + + /// Single channel block compressed format with the channel being a typeless 8-bit value. + RPS_FORMAT_BC4_TYPELESS, + + /// Single channel block compressed format with the channel being a normalized, 8-bit signed integer value. + RPS_FORMAT_BC4_UNORM, + + /// Single channel block compressed format with the channel being a normalized, 8-bit signed integer value. + RPS_FORMAT_BC4_SNORM, + + /// 2-channel block compressed format with each channel being a typeless 8-bit value. + RPS_FORMAT_BC5_TYPELESS, + + /// 2-channel block compressed format with each channel being a normalized, 8-bit unsigned integer value. + RPS_FORMAT_BC5_UNORM, + + /// 2-channel block compressed format with each channel being a normalized, 8-bit signed integer value. + RPS_FORMAT_BC5_SNORM, + + /// 3-channel BGR format with the first channel being a normalized, 5-bit unsigned integer, the second one a + /// normalized, 6-bit unsigned integer and the third one a normalized, 5-bit unsigned integer. + RPS_FORMAT_B5G6R5_UNORM, + + /// 4-channel BGRA format with the first three channels being a normalized, 5-bit unsigned integer and the last one + /// a normalized, 1-bit unsigned integer. + RPS_FORMAT_B5G5R5A1_UNORM, + + /// 4-channel BGRA format with each channel being a normalized, 8-bit unsigned integer. + RPS_FORMAT_B8G8R8A8_UNORM, + + /// 3-channel BGR format with each channel being a normalized, 8-bit unsigned integer value and 8 unused bits at the + /// end. + RPS_FORMAT_B8G8R8X8_UNORM, + + /// 4-channel RGB 2.8-biased fixed-point format with the first three channels being a normalized, 10-bit + /// unsigned integer and the last one a normalized 2-bit unsigned integer. + RPS_FORMAT_R10G10B10_XR_BIAS_A2_UNORM, + + RPS_FORMAT_B8G8R8A8_TYPELESS, ///< 4-channel BGRA format with each channel being a typeless 8-bit value. + RPS_FORMAT_B8G8R8A8_UNORM_SRGB, ///< 4-channel BGRA format with each channel being a normalized, 8-bit unsigned + /// integer SRGB value. + RPS_FORMAT_B8G8R8X8_TYPELESS, ///< 3-channel BGR format with each channel being a typeless 8-bit value and 8 + /// unused bits at the end. + RPS_FORMAT_B8G8R8X8_UNORM_SRGB, ///< 3-channel BGR format with each channel being a normalized, 8-bit unsigned + /// integer and 8 unused bits a the end. + + /// 3-channel block compressed HDR format with each channel being a typeless 16-bit value. + RPS_FORMAT_BC6H_TYPELESS, + + /// 3-channel block compressed HDR format with each channel being a 16-bit unsigned "half" floating point value. + RPS_FORMAT_BC6H_UF16, + + /// 3-channel block compressed HDR format with each channel being a 16-bit signed "half" floating point value. + RPS_FORMAT_BC6H_SF16, + + /// 3-channel or 4-channel block compressed format with the first three channels being a typeless, 4-7-bit value and + /// the last one an optional, typeless 0-8-bit value. + RPS_FORMAT_BC7_TYPELESS, + + /// 3-channel or 4-channel block compressed format with the first three channels being an normalized, 4-7-bit + /// unsigned integer and the last one an optional, normalized, 0-8-bit unsigned integer. + RPS_FORMAT_BC7_UNORM, + + /// 3-channel or 4-channel block compressed format with the first three channels being an normalized, 4-7-bit + /// unsigned integer and the last one an optional, normalized, 0-8-bit unsigned integer . + RPS_FORMAT_BC7_UNORM_SRGB, + + RPS_FORMAT_AYUV, ///< 4-channel video resource format with each channel being a 8-bit value. + RPS_FORMAT_Y410, ///< 4-channel video resource format with each of the first three channels being a + /// 10-bit value and the last one a 2-bit value. + RPS_FORMAT_Y416, ///< 4-channel video resource format with each channel being a 16-bit value. + RPS_FORMAT_NV12, ///< 2-channel video resource format with each channel being a 8-bit value. + RPS_FORMAT_P010, ///< 2-channel video resource format with each channel being a 16-bit value. + RPS_FORMAT_P016, ///< 2-channel video resource format with each channel being a 8-bit value. + RPS_FORMAT_420_OPAQUE, ///< Video resource format with opaque layout. + RPS_FORMAT_YUY2, ///< 4-channel video resource format with each channel being a 8-bit value. + RPS_FORMAT_Y210, ///< 4-channel video resource format with each channel being a 16-bit value. + RPS_FORMAT_Y216, ///< 4-channel video resource format with each channel being a 16-bit value. + RPS_FORMAT_NV11, ///< 2-channel video resource format with each channel being a 8-bit value. + RPS_FORMAT_AI44, ///< 4-bit palletized video resource format. + RPS_FORMAT_IA44, ///< 4-bit palletized video resource format. + RPS_FORMAT_P8, ///< RGB video resource format with 8-bit palletization. + RPS_FORMAT_A8P8, ///< RGB video resource format with 8-bit palletization. + RPS_FORMAT_B4G4R4A4_UNORM, ///< 4-channels BGRA format with each channel being a normalized 4-bit unsigned integer. + + RPS_FORMAT_COUNT, ///< Number of formats available in RpsFormat. +} RpsFormat; + +/// @brief Returns whether a format is block compressed. +/// +/// All block compressed formats start with the prefix RPS_FORMAT_BC. +/// +/// @param format Format to check. +/// +/// @returns RPS_TRUE if the format is block compressed, RPS_FALSE otherwise. +RpsBool rpsFormatIsBlockCompressed(RpsFormat format); + +/// @brief Returns whether a format has a depth or a stencil component. +/// +/// @param format Format to check. +/// +/// @returns RPS_TRUE if the format has a depth or a stencil component, RPS_FALSE otherwise. +RpsBool rpsFormatHasDepthStencil(RpsFormat format); + +/// @brief Returns whether a format has a depth component. +/// +/// @param format Format to check. +/// +/// @returns RPS_TRUE if the format has a depth component, RPS_FALSE otherwise. +RpsBool rpsFormatHasDepth(RpsFormat format); + +/// @brief Returns whether a format has a stencil component. +/// +/// @param format Format to check. +/// +/// @returns RPS_TRUE if the format has a stencil component, RPS_FALSE otherwise. +RpsBool rpsFormatHasStencil(RpsFormat format); + +/// @brief Returns whether a format has only a depth component and no stencil component. +/// +/// @param format Format to check. +/// +/// @returns RPS_TRUE if the format has a stencil component and no stencil component, RPS_FALSE otherwise. +RpsBool rpsFormatIsDepthOnly(RpsFormat format); + +/// @brief Returns the single element byte size for a format. +/// +/// For most formats one element is one pixel. This is different for block compressed formats, e.g. +/// RPS_FORMAT_BC1_UNORM. The byte size of one block will be returned for these instead. +/// +/// @param format Format to check. +/// +/// @returns 0 if the format does not support element wise usage, size of bytes of a single element otherwise. +uint32_t rpsGetFormatElementBytes(RpsFormat format); + +/// @brief Gets the name string of a format. +/// +/// @param format Format to get its name for. +/// +/// @returns Null terminated string with the format name. +const char* rpsFormatGetName(RpsFormat format); + +/// @} end defgroup RpsFormat +/// @} end addtogroup RpsRenderGraphRuntimeResource + +#endif // #ifndef _RPS_FORMAT_H_ diff --git a/include/rps/runtime/common/rps_render_states.h b/include/rps/runtime/common/rps_render_states.h new file mode 100644 index 0000000..a7dfd48 --- /dev/null +++ b/include/rps/runtime/common/rps_render_states.h @@ -0,0 +1,88 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_RENDER_STATES_H_ +#define _RPS_RENDER_STATES_H_ + +#include "rps/core/rps_api.h" + +/// @addtogroup RpsRenderGraphRuntime +/// @{ + +/// Screen region to render to. +typedef struct RpsViewport +{ + float x; ///< Left offset of the viewport. + float y; ///< Top offset of the viewport. + float width; ///< Width of the viewport. + float height; ///< Height of the viewport. + float minZ; ///< Minimum Z value of the viewport. + float maxZ; ///< Maximum Z value of the viewport. +} RpsViewport; + +/// Rectangular geometrical figure. +typedef struct RpsRect +{ + int32_t x; ///< X coordinate of the left edge of the rectangle. + int32_t y; ///< Y coordinate of the top edge of the rectangle. + int32_t width; ///< Width of the rectangle. + int32_t height; ///< Height of the rectangle. +} RpsRect; + +/// @brief Primitive topology types. +/// +/// This maps to common API primitive topology enumerations +/// such as `D3D_PRIMITIVE_TOPOLOGY` and `VkPrimitiveTopology`. +typedef enum RpsPrimitiveTopology +{ + RPS_PRIMITIVE_TOPOLOGY_UNDEFINED = 0, ///< Undefined topology. + RPS_PRIMITIVE_TOPOLOGY_POINTLIST = 1, ///< Point list topology. + RPS_PRIMITIVE_TOPOLOGY_LINELIST = 2, ///< Line list topology. + RPS_PRIMITIVE_TOPOLOGY_LINESTRIP = 3, ///< Line strip topology. + RPS_PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4, ///< Triangle list topology. + RPS_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5, ///< Triangle strip topology. + RPS_PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10, ///< Line list with adjacency topology. + RPS_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11, ///< Line strip with adjacency topology. + RPS_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12, ///< Triangle list with adjacency topology. + RPS_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13, ///< Triangle strip with adjacency topology. + RPS_PRIMITIVE_TOPOLOGY_PATCHLIST = 14, ///< Patch list. The number of control points in the patch list + /// is specified separately. + RPS_PRIMITIVE_TOPOLOGY_FORCE_INT32 = 0x7FFFFFFF, ///< Forces the enumeration to be int32 type. Do not use!!! +} RpsPrimitiveTopology; + +/// @brief Resolve mode types for built-in resolve node. +/// +/// Support of the modes is subject to the API backend used. +typedef enum RpsResolveMode +{ + RPS_RESOLVE_MODE_AVERAGE = 0, ///< Resolve operation outputs the average value of all MSAA samples. + RPS_RESOLVE_MODE_MIN, ///< Resolve operation outputs the minimum value of all MSAA samples. + RPS_RESOLVE_MODE_MAX, ///< Resolve operation outputs the maximum value of all MSAA samples. + RPS_RESOLVE_MODE_ENCODE_SAMPLER_FEEDBACK, ///< Encoding sampler feedback map (DX12 only). + RPS_RESOLVE_MODE_DECODE_SAMPLER_FEEDBACK, ///< Decoding sampler feedback map (DX12 only). + + RPS_RESOLVE_MODE_FORCE_INT32 = 0x7FFFFFFF, +} RpsResolveMode; + +/// @} end addtogroup RpsRenderGraphRuntime + +/// @brief Screen regions to render to. +/// +/// @ingroup RpsRenderGraphCommandRecording +typedef struct RpsCmdViewportInfo +{ + RpsRect defaultRenderArea; ///< Default render area of the node. Usually deduced from the bound render + /// target dimensions. + uint32_t numViewports; ///< Number of viewports used by the node. + uint32_t numScissorRects; ///< Number of scissor rectangles used by the node. + const RpsViewport* pViewports; ///< Pointer to an array of const RpsViewport* with numViewports + /// elements. Must not be NULL if numViewports != 0. + const RpsRect* pScissorRects; ///< Pointer to an array of const RpsRect* with numScissorRects + /// elements. Must not be NULL if numScissorRects != 0. +} RpsCmdViewportInfo; + +#endif //_RPS_RENDER_STATES_H_ diff --git a/include/rps/runtime/common/rps_resource.h b/include/rps/runtime/common/rps_resource.h new file mode 100644 index 0000000..572f3eb --- /dev/null +++ b/include/rps/runtime/common/rps_resource.h @@ -0,0 +1,326 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_RESOURCE_H_ +#define _RPS_RESOURCE_H_ + +#include "rps/core/rps_api.h" +#include "rps/runtime/common/rps_format.h" + +/// @addtogroup RpsRenderGraphRuntimeResources +/// @{ + +//--------------------------------------------------------------------------------------- +// Resource +//--------------------------------------------------------------------------------------- + +/// @brief Constant for an invalid resource ID. +#define RPS_RESOURCE_ID_INVALID RPS_INDEX_NONE_U32 + +/// @brief Type for resource identifiers. +typedef uint32_t RpsResourceId; + +/// @brief Resource types used by RPS resources. +typedef enum RpsResourceType +{ + RPS_RESOURCE_TYPE_UNKNOWN = 0, ///< Resource type is unknown / invalid. + RPS_RESOURCE_TYPE_BUFFER, ///< A buffer resource type. + RPS_RESOURCE_TYPE_IMAGE_1D, ///< A 1D image resource type. + RPS_RESOURCE_TYPE_IMAGE_2D, ///< A 2D image resource type. + RPS_RESOURCE_TYPE_IMAGE_3D, ///< A 3D image resource type. + RPS_RESOURCE_TYPE_COUNT, ///< Count of defined resource type values. +} RpsResourceType; + +/// @brief Bitflags for special properties of a resource. +typedef enum RpsResourceFlagBits +{ + RPS_RESOURCE_FLAG_NONE = 0, ///< No special properties. + RPS_RESOURCE_FLAG_CUBEMAP_COMPATIBLE_BIT = (1 << 1), ///< Supports cubemap views. + RPS_RESOURCE_FLAG_ROWMAJOR_IMAGE_BIT = (1 << 2), ///< Uses rowmajor image layout. + RPS_RESOURCE_FLAG_PREFER_GPU_LOCAL_CPU_VISIBLE_BIT = (1 << 3), ///< Preferred to be in GPU-local CPU-visible heap + /// if available. + RPS_RESOURCE_FLAG_PREFER_DEDICATED_ALLOCATION_BIT = (1 << 4), ///< Preferred to be in dedicated allocation or as + /// committed resource. + RPS_RESOURCE_FLAG_PERSISTENT_BIT = (1 << 15), ///< Resource data is persistent from frame to + /// frame. +} RpsResourceFlagBits; + +/// @brief Bitmask type for RpsResourceFlagBits. +typedef RpsFlags32 RpsResourceFlags; + +/// @brief Bitflags for used aspects of an image resource. +typedef enum RpsImageAspectUsageFlagBits +{ + RPS_IMAGE_ASPECT_UNKNOWN = 0, /// Image aspect usage is unknown. + RPS_IMAGE_ASPECT_COLOR = 1 << 0, /// The color aspect is used. + RPS_IMAGE_ASPECT_DEPTH = 1 << 1, /// The depth aspect is used. + RPS_IMAGE_ASPECT_STENCIL = 1 << 2, /// The stencil aspect is used. + RPS_IMAGE_ASPECT_METADATA = 1 << 3, /// The metadata aspect is used. + RPS_IMAGE_ASPECT_DEFAULT = RPS_IMAGE_ASPECT_COLOR, /// Default image aspect usage. +} RpsImageAspectUsageFlagBits; + +/// @brief Bitmask type for RpsImageAspectUsageFlagBits. +typedef RpsFlags32 RpsImageAspectUsageFlags; + +/// @brief RGBA color value to use for clearing a resource. +/// +/// Depending on the underlaying format of a resource, an appropriately +/// typed member of this union should be used. +typedef union RpsClearColorValue +{ + float float32[4]; ///< 4-tuple of IEEE 754 floating point values representing an RGBA clear color. + int32_t int32[4]; ///< 4-tuple of signed integers representing an RGBA clear color. + uint32_t uint32[4]; ///< 4-tuple of unsigned integers representing an RGBA clear color. +} RpsClearColorValue; + +/// @brief Bitflags for the way a resource should be cleared. +typedef enum RpsClearFlags +{ + RPS_CLEAR_FLAG_NONE, ///< No clear flags are specified. (Not a valid use case). + RPS_CLEAR_FLAG_COLOR = 1 << 0, ///< Clears the color aspect of a render target view. + RPS_CLEAR_FLAG_DEPTH = 1 << 1, ///< Clears the depth aspect of a depth stencil view. + RPS_CLEAR_FLAG_STENCIL = 1 << 2, ///< Clears the stencil aspect of a depth stencil view. + RPS_CLEAR_FLAG_UAVFLOAT = 1 << 3, ///< Clears the UAV with floating point data. + RPS_CLEAR_FLAG_UAVUINT = 1 << 4, ///< Clears the UAV with integer data. +} RpsClearFlags; + +/// @brief Parameters for clearing a depth stencil resource. +typedef struct RpsClearDepthStencilValue +{ + float depth; ///< Clear value for the depth aspect. + uint32_t stencil; ///< Clear value for the stencil aspect. +} RpsClearDepthStencilValue; + +/// @brief General value a resource can be cleared to. +/// +/// Depending on the context and target resource view format, an appropriately +/// typed member of this union should be used. +typedef union RpsClearValue +{ + RpsClearColorValue color; ///< Clear value for a color resource. + RpsClearDepthStencilValue depthStencil; ///< Clear value for a depth stencil resource. +} RpsClearValue; + +/// @brief Parameters for clearing a resource. +typedef struct RpsClearInfo +{ + RpsFormat format; ///< Format of the resource view to use for clearing. + RpsClearValue value; ///< Clear value. +} RpsClearInfo; + +/// @brief Parameters for a resource description. +typedef struct RpsResourceDesc +{ + RpsResourceType type; ///< Resource type. + uint32_t temporalLayers; ///< Number of temporal layers the resource consists of. + RpsResourceFlags flags; ///< Resource flags for special properties. + + union + { + struct + { + uint32_t width; ///< Width of an image resource. + uint32_t height; ///< Height of an image resource. + union + { + uint32_t depth; ///< Depth of a 3D image resource. + uint32_t arrayLayers; ///< Number of array layers for a non-3D image resource. + }; + uint32_t mipLevels; ///< Number of mipmap levels. + RpsFormat format; ///< Platform independent format to be interpreted by the runtime. + uint32_t sampleCount; ///< Number of MSAA samples of an image. + } image; + struct + { + uint32_t sizeInBytesLo; ///< Lower 32 bits of the size of a buffer resource in bytes. + uint32_t sizeInBytesHi; ///< Higher 32 bits of the size of a buffer resource in bytes. + } buffer; + }; +} RpsResourceDesc; + +/// @brief Subsection of a resource from the graphics API perspective. +typedef struct RpsSubresourceRange +{ + uint16_t baseMipLevel; ///< First mipmapping level accessible in the range. + uint16_t mipLevels; ///< Number of mipmap levels in the range. + uint32_t baseArrayLayer; ///< First layer accessible in the range. + uint32_t arrayLayers; ///< Number of array layers accessible in the range. +} RpsSubresourceRange; + +/// @brief Constant for maximum number of temporal layers a resource may have. +#define RPS_RESOURCE_MAX_TEMPORAL_LAYERS (256) + +/// @brief Constant for the maximum number of simultaneous bound render targets supported by RPS. +#define RPS_MAX_SIMULTANEOUS_RENDER_TARGET_COUNT (8) + +/// @brief Output resources for writing results of a graphics node. +typedef struct RpsCmdRenderTargetInfo +{ + uint32_t numRenderTargets; ///< Number of render targets used by the node. + uint32_t numSamples; ///< Number of MSAA samples. + RpsFormat depthStencilFormat; ///< Depth stencil format or RPS_FORMAT_UNKNOWN if no depth buffer is bound. + + /// Array of render target formats with one format for each of the numRenderTargets render targets. + RpsFormat renderTargetFormats[RPS_MAX_SIMULTANEOUS_RENDER_TARGET_COUNT]; +} RpsCmdRenderTargetInfo; + +#ifdef __cplusplus + +namespace rps +{ + /// @brief A C++ helper type for RpsResourceDesc. + struct ResourceDesc : public RpsResourceDesc + { + ResourceDesc() + : ResourceDesc(RPS_RESOURCE_TYPE_UNKNOWN, RPS_FORMAT_UNKNOWN, 0, 0) + { + } + + ResourceDesc(const RpsResourceDesc& desc) + : RpsResourceDesc(desc) + { + } + + ResourceDesc(RpsResourceType inType, + RpsFormat inFormat, + uint64_t inWidth, + uint32_t inHeight = 1, + uint32_t inDepthOrArrayLayers = 1, + uint32_t inMipLevels = 1, + uint32_t inSampleCount = 1, + uint32_t inTemporalLayers = 1, + RpsResourceFlags inFlags = RPS_RESOURCE_FLAG_NONE) + { + type = inType; + temporalLayers = inTemporalLayers; + flags = inFlags; + + if (inType == RPS_RESOURCE_TYPE_BUFFER) + { + buffer.sizeInBytesLo = uint32_t(inWidth & UINT32_MAX); + buffer.sizeInBytesHi = uint32_t(inWidth >> 32u); + } + else + { + image.width = uint32_t(inWidth); + image.height = inHeight; + + if (inType == RPS_RESOURCE_TYPE_IMAGE_3D) + image.depth = inDepthOrArrayLayers; + else + image.arrayLayers = inDepthOrArrayLayers; + + image.mipLevels = inMipLevels; + image.format = inFormat; + image.sampleCount = inSampleCount; + } + } + + /// @brief Checks if the described resource is a buffer + bool IsBuffer() const + { + return type == RPS_RESOURCE_TYPE_BUFFER; + } + + /// @brief Checks if the described resource is an image (texture) + bool IsImage() const + { + return (type == RPS_RESOURCE_TYPE_IMAGE_1D) || (type == RPS_RESOURCE_TYPE_IMAGE_2D) || + (type == RPS_RESOURCE_TYPE_IMAGE_3D); + } + + /// @brief Creates a resource description for a buffer resource. + static ResourceDesc Buffer(uint64_t inSizeInBytes, + uint32_t inTemporalLayers = 1, + RpsResourceFlags inFlags = RPS_RESOURCE_FLAG_NONE) + { + return ResourceDesc(RPS_RESOURCE_TYPE_BUFFER, RPS_FORMAT_UNKNOWN, inSizeInBytes, 1); + } + + /// @brief Creates a resource description structure for an 1D Texture resource. + static ResourceDesc Image1D(RpsFormat inFormat, + uint32_t inWidth, + uint32_t inMipLevels = 1, + uint32_t inArrayLayers = 1, + uint32_t inTemporalLayers = 1, + RpsResourceFlags inFlags = RPS_RESOURCE_FLAG_NONE) + { + return ResourceDesc(RPS_RESOURCE_TYPE_IMAGE_1D, + inFormat, + inWidth, + 1, + inArrayLayers, + inMipLevels, + 1, + inTemporalLayers, + inFlags); + } + + /// @brief Creates a resource description for a 2D Texture resource. + static ResourceDesc Image2D(RpsFormat inFormat, + uint32_t inWidth, + uint32_t inHeight, + uint32_t inArrayLayers = 1, + uint32_t inMipLevels = 1, + uint32_t inSampleCount = 1, + uint32_t inTemporalLayers = 1, + RpsResourceFlags inFlags = RPS_RESOURCE_FLAG_NONE) + { + return ResourceDesc(RPS_RESOURCE_TYPE_IMAGE_2D, + inFormat, + inWidth, + inHeight, + inArrayLayers, + inMipLevels, + inSampleCount, + inTemporalLayers, + inFlags); + } + + /// @brief Creates a resource description for a 3D Texture resource. + static ResourceDesc Image3D(RpsFormat inFormat, + uint32_t inWidth, + uint32_t inHeight, + uint32_t inDepth, + uint32_t inMipLevels = 1, + uint32_t inTemporalLayers = 1, + RpsResourceFlags inFlags = RPS_RESOURCE_FLAG_NONE) + { + return ResourceDesc(RPS_RESOURCE_TYPE_IMAGE_3D, + inFormat, + inWidth, + inHeight, + inDepth, + inMipLevels, + 1, + inTemporalLayers, + inFlags); + } + }; + + /// @brief C++ helper type for RpsSubresourceRange. + struct SubresourceRange : public RpsSubresourceRange + { + SubresourceRange(uint16_t inBaseMip = 0, + uint16_t inMipLevels = 1, + uint32_t inBaseArrayLayer = 0, + uint32_t inArrayLayers = 1) + { + baseArrayLayer = inBaseArrayLayer; + arrayLayers = inArrayLayers; + baseMipLevel = inBaseMip; + mipLevels = inMipLevels; + } + }; + +} // namespace rps + +#endif //__cplusplus + +/// @} end addtogroup RpsRenderGraphRuntimeResources + +#endif //_RPS_RESOURCE_H_ diff --git a/include/rps/runtime/common/rps_runtime.h b/include/rps/runtime/common/rps_runtime.h new file mode 100644 index 0000000..8affd39 --- /dev/null +++ b/include/rps/runtime/common/rps_runtime.h @@ -0,0 +1,1665 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_RUNTIME_H +#define RPS_RUNTIME_H + +#include "rps/runtime/common/rps_format.h" +#include "rps/runtime/common/rps_resource.h" +#include "rps/runtime/common/rps_access.h" +#include "rps/runtime/common/rps_render_states.h" + +/// @defgroup Runtime Runtime +/// @{ + +/// @defgroup RpsRuntimeDevice RpsRuntimeDevice +/// @defgroup RpsVKRuntimeDevice RpsVKRuntimeDevice +/// @defgroup RpsD3D12RuntimeDevice RpsD3D12RuntimeDevice +/// @defgroup RpsD3D11RuntimeDevice RpsD3D11RuntimeDevice +/// @defgroup RpsRenderGraphRuntime RpsRenderGraph Runtime +/// @defgroup RpsRenderGraphRuntimeResources RpsRenderGraph Runtime Resources +/// @defgroup RpsRenderGraphCommandRecording RpsRenderGraph Command Recording +/// @defgroup RpsSubprogram RpsSubprogram + +#ifdef __cplusplus +extern "C" { +#endif //__cplusplus + +/// @brief Handle type for RPS runtime device objects. +/// +/// @ingroup RpsRuntimeDevice +RPS_DEFINE_HANDLE(RpsRuntimeDevice); + +/// @addtogroup RpsRenderGraphRuntime +/// @{ + +/// @brief Handle type for RPS render graph objects. +RPS_DEFINE_HANDLE(RpsRenderGraph); + +/// @brief Handle type for the render graph builder objects. +RPS_DEFINE_HANDLE(RpsRenderGraphBuilder); + +/// @brief Handle type for RPS render graph phase objects. +RPS_DEFINE_HANDLE(RpsRenderGraphPhase); + +/// @brief Handle type for RPS subprogram objects. +/// +/// Can be used as either main entry or a node implementation in a render graph. +RPS_DEFINE_HANDLE(RpsSubprogram); + +/// @} end addtogroup RpsRenderGraphRuntime + +/// @addtogroup RpsRenderGraphRuntimeResources +/// @{ + +/// @brief Opaque handle type for RPS runtime heap objects. +RPS_DEFINE_OPAQUE_HANDLE(RpsRuntimeHeap); + +/// @brief Opaque handle type for RPS runtime resource objects. +RPS_DEFINE_OPAQUE_HANDLE(RpsRuntimeResource); + +/// @brief Opaque handle type for runtime command buffer objects. +RPS_DEFINE_OPAQUE_HANDLE(RpsRuntimeCommandBuffer); + +/// @} end addtogroup RpsRenderGraphRuntimeResources + +/// @addtogroup RpsRenderGraphRuntime +/// @{ + +/// @defgroup RpsParamAttr RpsParamAttr +/// @{ + +/// @brief Function parameter attribute. +typedef struct RpsParamAttr +{ + RpsAccessAttr access; ///< Access attribute of the parameter. + RpsSemanticAttr semantic; ///< Semantic attribute of the parameter. +} RpsParamAttr; + +/// @brief Handle type for an object describing a number of render graph node parameter attributes. +RPS_DEFINE_HANDLE(RpsParamAttrList); + +/// @} end defgroup RpsParamAttr + +/// @brief Handle type for an object describing a number of render graph node attributes. +RPS_DEFINE_HANDLE(RpsNodeAttrList); + +/// @brief Bitflags for scheduling behavior. +typedef enum RpsScheduleFlagBits +{ + /// No schedule flag bits are specified. Default options are used. When used as + /// RpsRenderGraphUpdateInfo::scheduleFlags, the RpsRenderGraphCreateInfo::scheduleInfo::scheduleFlags specified + /// at render graph creation time are used instead. + RPS_SCHEDULE_UNSPECIFIED = (0), + + /// Command nodes are kept in the program order. + RPS_SCHEDULE_KEEP_PROGRAM_ORDER_BIT = (1 << 0), + + /// Schedules in favor of reducing total GPU memory usage. Possible strategies include minimizing transient resource + /// lifetimes and agressive aliasing. This may increase the number of barriers generated. + RPS_SCHEDULE_PREFER_MEMORY_SAVING_BIT = (1 << 1), + + /// Schedules commands randomly (without changing program logic). Mostly useful for testing purposes. Applications + /// should normally avoid using this flag for end-user scenarios. If RPS_SCHEDULE_KEEP_PROGRAM_ORDER_BIT is set, + /// this flag will have no effect. + RPS_SCHEDULE_RANDOM_ORDER_BIT = (1 << 2), + + /// Avoids alternating between graphics and compute work on the same queue. This can help for some architectures + /// where switching between graphics and compute produces extra overhead. + RPS_SCHEDULE_MINIMIZE_COMPUTE_GFX_SWITCH_BIT = (1 << 3), + + /// Disables dead code elimination optimization. By default, RPS removes nodes that have no visible effect (Not + /// contributing to modification of external, temporal, persistent or CPU resources). This flag disables this + /// optimization. + RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT = (1 << 4), + + /// Disables work pipelining based on the workload type. + RPS_SCHEDULE_WORKLOAD_TYPE_PIPELINING_DISABLE_BIT = (1 << 5), + + /// Performs aggressive work pipelining based on the workload type. If + /// RPS_SCHEDULE_WORKLOAD_TYPE_PIPELINING_DISABLE_BIT is set, this flag will have not effect. + RPS_SCHEDULE_WORKLOAD_TYPE_PIPELINING_AGGRESSIVE_BIT = (1 << 6), + + /// Includes split barriers where appropriate. + RPS_SCHEDULE_ALLOW_SPLIT_BARRIERS_BIT = (1 << 16), + + // Reserved for future use: + + /// Reserved for future use. Avoids rescheduling if possible and uses the existing schedule instead. + RPS_SCHEDULE_AVOID_RESCHEDULE_BIT = (1 << 17), + + /// Reserved for future use. Allows work to overlap between multiple frames. + RPS_SCHEDULE_ALLOW_FRAME_OVERLAP_BIT = (1 << 21), + + /// Reserved for future use. Tries to use render pass transitions instead of standalone transition nodes when + /// possible. If RPS_SCHEDULE_DISABLE_RENDERPASS_TRANSITIONS_BIT is set, this flag will have no effect. + RPS_SCHEDULE_PREFER_RENDERPASS_TRANSITIONS_BIT = (1 << 22), + + /// Reserved for future use. Uses standalone transition nodes instead of render pass transitions. + RPS_SCHEDULE_DISABLE_RENDERPASS_TRANSITIONS_BIT = (1 << 23), + + // End reserved for future use. + + /// Uses default options. This is identical to RPS_SCHEDULE_UNSPECIFIED in most cases, except when used as + /// RpsRenderGraphUpdateInfo::scheduleFlags, instead using the default options regardless of + /// RpsRenderGraphCreateInfo::scheduleInfo::scheduleFlags. This default behavior is a baseline set of criteria used + /// for scheduling to which these flags can add additional ones. + RPS_SCHEDULE_DEFAULT = (1 << 30), + + /// Pioritizes application performance over a lower memory footprint. + RPS_SCHEDULE_DEFAULT_PERFORMANCE = RPS_SCHEDULE_DEFAULT, + + /// Prioritizes a lower memory footprint over performance. + RPS_SCHEDULE_DEFAULT_MEMORY = RPS_SCHEDULE_PREFER_MEMORY_SAVING_BIT, +} RpsScheduleFlagBits; + +/// @brief Bitmask type for RpsScheduleFlagBits. +typedef RpsFlags32 RpsScheduleFlags; + +/// @brief Bitflags for enabling diagnostic systems. +typedef enum RpsDiagnosticFlagBits +{ + RPS_DIAGNOSTIC_NONE = 0, ///< No diagnostic mode enabled. + RPS_DIAGNOSTIC_ENABLE_PRE_SCHEDULE_DUMP = 1 << 0, ///< Dumps the resources and commands of a render graph + /// before optimization through the scheduler. + RPS_DIAGNOSTIC_ENABLE_POST_SCHEDULE_DUMP = 1 << 1, ///< Dumps the commands of the render graph after + /// optimization through the scheduler. + RPS_DIAGNOSTIC_ENABLE_DAG_DUMP = 1 << 2, ///< Dumps the directed acyclic graph of nodes defined + /// by the render graph in graphviz format. + RPS_DIAGNOSTIC_ENABLE_SOURCE_LOCATION = 1 << 3, ///< Inserts source code location debug data for + /// resource definitions and node calls. + RPS_DIAGNOSTIC_ENABLE_RUNTIME_DEBUG_NAMES = 1 << 4, ///< Sets resource names as debug names in the graphics + /// API in use. + + RPS_DIAGNOSTIC_ENABLE_ALL = (1 << 5) - 1, ///< Enable all flags. +} RpsDiagnosticFlagBits; + +/// @brief Bitmask type for RpsDiagnosticFlagBits. +typedef RpsFlags32 RpsDiagnosticFlags; + +/// @brief Bitflags for special render graph properties. +typedef enum RpsRenderGraphFlagBits +{ + RPS_RENDER_GRAPH_FLAG_NONE = 0, ///< No special properties. + RPS_RENDER_GRAPH_DISALLOW_UNBOUND_NODES_BIT = 1 << 0, ///< Disallows unbound nodes if no default callback is set. + RPS_RENDER_GRAPH_NO_GPU_MEMORY_ALIASING = 1 << 1, ///< Disables GPU memory aliasing. +} RpsRenderGraphFlagBits; + +/// @brief Bitmask type for RpsRenderGraphFlagBits. +typedef RpsFlags32 RpsRenderGraphFlags; + +/// @brief Constant for the maximum number of hardware queues in use by RPS. +#define RPS_MAX_QUEUES (8) + +/// @brief Bitflags for properties of a render graph node declaration. +/// +/// These are e.g. required queue type, async preferences, etc. All usages of a node will use the same set of +/// properties the node was declared with. +typedef enum RpsNodeDeclFlagBits +{ + RPS_NODE_DECL_FLAG_NONE = 0, ///< No node declaration properties. + RPS_NODE_DECL_GRAPHICS_BIT = 1 << 0, ///< Node requires a queue with graphics capabilites. + RPS_NODE_DECL_COMPUTE_BIT = 1 << 1, ///< Node requires a queue with compute capabilities. + RPS_NODE_DECL_COPY_BIT = 1 << 2, ///< Node requires a queue with copy capabilities. + RPS_NODE_DECL_PREFER_RENDER_PASS = 1 << 3, ///< Node prefers to be executed as a render pass if the API backend + /// supports it. + RPS_NODE_DECL_PREFER_ASYNC = 1 << 4, ///< Node prefers to be executed asynchronously. +} RpsNodeDeclFlagBits; + +/// @brief Bitmask type for RpsNodeDeclFlagBits. +typedef RpsFlags32 RpsNodeDeclFlags; + +/// @brief Bitflags for decorating node parameters. +typedef enum RpsParameterFlagBits +{ + RPS_PARAMETER_FLAG_NONE = 0, ///< No bit flags. + RPS_PARAMETER_FLAG_OUT_BIT = 1 << 0, ///< Node parameter is an output parameter. + RPS_PARAMETER_FLAG_OPTIONAL_BIT = 1 << 1, ///< Node parameter is optional. + RPS_PARAMETER_FLAG_RESOURCE_BIT = 1 << 2, ///< Node parameter is an RPS resource. +} RpsParameterFlagBits; + +/// @brief Bitmask type for RpsParameterFlagBits. +typedef uint32_t RpsParameterFlags; + +/// @brief Bitflags for command callback properties. +/// +/// Different calls of the same node may use different sets of properties. +typedef enum RpsCmdCallbackFlagBits +{ + /// No callback properties. + RPS_CMD_CALLBACK_FLAG_NONE = 0, + + /// The command callback will record command buffer in a multi-threaded way. This may change the render pass setup + /// behavior as required by some graphics APIs. + RPS_CMD_CALLBACK_MULTI_THREADED_BIT = 1 << 0, + + /// Skips default render target / depth stencil buffer setup, even if any were specified in the node parameter + /// semantics. + RPS_CMD_CALLBACK_CUSTOM_RENDER_TARGETS_BIT = 1 << 1, + + /// Skips viewport and scissor rect setup during command node setup. Used when the command callback will do the + /// setup instead. + RPS_CMD_CALLBACK_CUSTOM_VIEWPORT_BIT = 1 << 2, + + /// Skips render state & resource binding setup other than render targets (including depth stencil buffer) and + /// viewport (including scissor rects). + RPS_CMD_CALLBACK_CUSTOM_STATE_SETUP_BIT = 1 << 3, +} RpsCmdCallbackFlagBits; + +/// @brief Bitmask type for RpsCmdCallbackFlagBits. +typedef RpsFlags32 RpsCmdCallbackFlags; + +/// @brief Type for command callback contexts. +typedef struct RpsCmdCallbackContext RpsCmdCallbackContext; + +/// @brief Signature of render graph node callbacks. +/// +/// @param pContext Context for the command callback. +typedef void (*PFN_rpsCmdCallback)(const RpsCmdCallbackContext* pContext); + +/// @brief Command callback with usage parameters. +typedef struct RpsCmdCallback +{ + PFN_rpsCmdCallback pfnCallback; ///< Pointer to a callback function. + void* pUserContext; ///< User context to be passed to the callback. + RpsCmdCallbackFlags flags; ///< Flags for the callback. +} RpsCmdCallback; + +/// @brief Parameters for describing a node call parameter. +typedef struct RpsParameterDesc +{ + RpsTypeInfo typeInfo; ///< Type info of the parameter. + uint32_t arraySize; ///< Number of array elements for this parameter. 0 indicates not an array (single + /// element). UINT32_MAX indicates an unbounded array. + RpsConstant attr; ///< Pointer to a runtime defined structure with attributes of the parameter. + const char* name; ///< Null terminated string with the name of the parameter. + RpsParameterFlags flags; ///< Parameter type flags. +} RpsParameterDesc; + +/// @brief Parameters for describing a render graph node. +typedef struct RpsNodeDesc +{ + RpsNodeDeclFlags flags; ///< Flags for the type of render graph node. + uint32_t numParams; ///< Number of parameters used in the callback. + const RpsParameterDesc* pParamDescs; ///< Pointer to an array of const RpsParameterDesc with + /// numParams parameter descriptions. Must not be NULL if numParams != 0. + const char* name; ///< Null terminated string with the name of the callback. +} RpsNodeDesc; + +/// @brief Parameters for describing a render graph signature. +typedef struct RpsRenderGraphSignatureDesc +{ + uint32_t numParams; ///< Number of parameters in the signature. + uint32_t numNodeDescs; ///< Number of node descriptions in the signature. + uint32_t maxExternalResources; ///< Number of resources in the parameters of the signature. Array parameters + /// contribute with their size towards this number. + const RpsParameterDesc* pParamDescs; ///< Pointer to an array of const RpsParameterDesc with + /// numParams parameters for the signature. Must not be NULL if + /// numParams != 0. + const RpsNodeDesc* pNodeDescs; ///< Pointer to an array of const RpsNodeDesc with numNodeDescs + /// node descriptions for the signature. Must not be NULL if numParams != 0. + const char* name; ///< Null terminated string with the name of the render graph. +} RpsRenderGraphSignatureDesc; + +/// @brief Reports an error from a command callback context. +/// +/// @param pContext Pointer to the context. Must be the primary context (passed to the command +/// callback as argument). This function fails if the context is a secondary +/// context (created via rpsCmdCloneContext). Must not be NULL. +/// +/// @param errorCode Error code for the type of error to report. For errorCode == RPS_OK, this +/// function does nothing. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsCmdCallbackReportError(const RpsCmdCallbackContext* pContext, RpsResult errorCode); + +/// @brief Bitflags for node instance properties. +typedef enum RpsNodeFlagBits +{ + RPS_NODE_FLAG_NONE = 0, ///< No node instance properties. + RPS_NODE_PREFER_ASYNC = 1 << 1, ///< Node prefers to be executed asynchronously. +} RpsNodeFlagBits; + +/// @brief Bitmask type for RpsNodeFlagBits of properties for a render graph node instance. +/// +/// While RpsNodeDeclFlags apply to all instances which share the same node declaration, RpsNodeFlags apply to one +/// specific node instance. +typedef RpsFlags32 RpsNodeFlags; + +/// @brief Signature of functions for render graph building. +/// +/// @param hBuilder Handle to the render graph builder to use. +/// @param pArgs Pointer to an array of RpsConstant with numArgs constant +/// arguments to use for building. Must not be NULL if numArgs != 0. +/// @param numArgs Number of constant arguments to use for building. +/// +/// @returns Result code of the operation. See RpsResult for more info. +typedef RpsResult (*PFN_rpsRenderGraphBuild)(RpsRenderGraphBuilder hBuilder, + const RpsConstant* pArgs, + uint32_t numArgs); + +/// @brief Parameters for updating a render graph. +/// +/// @relates RpsRenderGraph +typedef struct RpsRenderGraphUpdateInfo +{ + /// Index of the current frame to be recorded after the update. + uint64_t frameIndex; + + /// Index of the last frame that finished executing on the GPU. Used for resource lifetime management. + uint64_t gpuCompletedFrameIndex; + + /// Flags for scheduling behavior. Used for overriding flags specified at creation. + RpsScheduleFlags scheduleFlags; + + /// Flags for enabling diagnostics systems during the render graph update. + RpsDiagnosticFlags diagnosticFlags; + + /// Number of arguments to pass to the entry. Passing less than the number of values from the render graph entry + /// updates only the first numArgs arguments and does not touch any other argument. + uint32_t numArgs; + + /// Pointer to an array of RpsConstant with numArgs constant arguments to pass to the entry. + /// Must not be NULL if numArgs != 0. + const RpsConstant* ppArgs; + + /// Pointer to an array of const RpsRuntimeResource* const with pointers to externally managed + /// resources used in the render graph. Resource arguments in ppArgs have a corresponding runtime resource (or + /// array) in ppArgResources, at the same index. If e.g. {&backBufferResourceDescription, value} is passed for + /// ppArgs, ppArgResources[0] would have to point to the corresponding RpsRuntimeResource of the + /// backbuffer. + const RpsRuntimeResource* const* ppArgResources; + + /// Pointer to a function for starting the render graph building process. + PFN_rpsRenderGraphBuild pfnBuildCallback; + + /// Pointer to a random number generator. + const RpsRandomNumberGenerator* pRandomNumberGenerator; +} RpsRenderGraphUpdateInfo; + +/// @brief Constant for the maximum number of supported frames which can be queued on the GPU simultaneously. +#define RPS_MAX_QUEUED_FRAMES (16) + +/// @brief Special frame index value, when passed as RpsRenderGraphUpdateInfo::gpuCompletedFrameIndex, +/// indicates that no frames are known to have finished executing on the GPU yet. + +#define RPS_GPU_COMPLETED_FRAME_INDEX_NONE (UINT64_MAX) + +/// @brief Signature of functions for executing a render graph phase. +/// +/// @param hRenderGraph Handle to the render graph to execute the phase for. +/// @param pUpdateInfo Pointer to update parameters. +/// @param phase Handle to the render graph phase object. +/// +/// @returns Result code of the operation. See RpsResult for more info. +typedef RpsResult (*PFN_rpsRenderGraphPhaseRun)(RpsRenderGraph hRenderGraph, + const RpsRenderGraphUpdateInfo* pUpdateInfo, + RpsRenderGraphPhase phase); + +/// @brief Signature of functions for destroying a render graph phase object. +/// +/// @param phase Handle to the render graph phase object. +/// +/// @returns Result code of the operation. See RpsResult for more info. +typedef RpsResult (*PFN_rpsRenderGraphPhaseDestroy)(RpsRenderGraphPhase phase); + +/// @brief Parameters of a render graph processing phase. +typedef struct RpsRenderGraphPhaseInfo +{ + RpsRenderGraphPhase hPhase; ///< Handle to the render graph phase object. + PFN_rpsRenderGraphPhaseRun pfnRun; ///< Pointer to a function for executing the render graph phase. + PFN_rpsRenderGraphPhaseDestroy pfnDestroy; ///< Pointer to a function for destroying the render graph phase. +} RpsRenderGraphPhaseInfo; + +/// @} end addtogroup RpsRenderGraphRuntime + +/// @addtogroup RpsRenderGraphRuntimeResources +/// @{ + +/// @brief Parameters of a memory type. +typedef struct RpsMemoryTypeInfo +{ + uint64_t defaultHeapSize; ///< Default size for creating a heap of this type. + uint32_t minAlignment; ///< Minimum alignment for heaps of this memory type in bytes. +} RpsMemoryTypeInfo; + +/// @brief Required parameters for a GPU memory allocation. +typedef struct RpsGpuMemoryRequirement +{ + uint64_t size; ///< Size of the allocation in bytes. + uint32_t alignment; ///< Minimum alignment required by the allocation in bytes. + RpsIndex32 memoryTypeIndex; ///< Index for the type of memory the allocation should be created from. API backend + /// specific. +} RpsGpuMemoryRequirement; + +/// @brief Type for heap identifiers. +/// +/// Internally, these are used as simple indices. +typedef RpsIndex32 RpsHeapId; + +/// @brief Parameters of a resource placement inside a heap. +typedef struct RpsHeapPlacement +{ + RpsHeapId heapId; ///< ID of the heap in the render graph. + uint64_t offset; ///< Offset of the resource placement inside the heap in bytes. +} RpsHeapPlacement; + +/// @brief Parameters of a runtime resource. +typedef struct RpsRuntimeResourceInfo +{ + RpsRuntimeResource hResource; ///< Handle to the resource created by the API backend. + RpsResourceDesc resourceDesc; ///< Resource description. + uint32_t numSubresources; ///< Number of subresources in the entire resource. + RpsSubresourceRange fullRange; ///< Range spanning all subresources. + RpsHeapId heapId; ///< ID of the heap its memory is placed in. + RpsGpuMemoryRequirement allocInfo; ///< Allocation parameters of the resource. +} RpsRuntimeResourceInfo; + +/// @brief Bitflags for queue capabilities. +typedef enum RpsQueueFlagBits +{ + RPS_QUEUE_FLAG_NONE = 0, ///< No capabilities. + RPS_QUEUE_FLAG_GRAPHICS = 1 << 0, ///< Graphics capabilities. + RPS_QUEUE_FLAG_COMPUTE = 1 << 1, ///< Compute capabilities. + RPS_QUEUE_FLAG_COPY = 1 << 2, ///< Copy capabilities. +} RpsQueueFlagBits; + +/// @brief Bitmask type for RpsQueueFlagBits. +typedef RpsFlags32 RpsQueueFlags; + +/// @} end addtogroup RpsRenderGraphRuntimeResources + +#include "rps/runtime/common/rps_runtime_callbacks.h" + +/// @addtogroup RpsRuntimeDevice +/// @{ + +/// @brief Parameters for creating a runtime device. +typedef struct RpsRuntimeDeviceCreateInfo +{ + void* pUserContext; ///< User defined context to be passed to the callback functions. + RpsRuntimeCallbacks callbacks; ///< Callback functions. +} RpsRuntimeDeviceCreateInfo; + +/// @brief Parameters for creating a dummy runtime device. +/// +/// A NullRuntimeDevice is a default implementation of the RuntimeDevice interface without any real GPU +/// device associated. +typedef struct RpsNullRuntimeDeviceCreateInfo +{ + const RpsDeviceCreateInfo* pDeviceCreateInfo; ///< Pointer to parameters for creating the core device + /// associated with the runtime device. Passing NULL uses + /// default parameters for creation instead. + const RpsRuntimeDeviceCreateInfo* pRuntimeCreateInfo; ///< Pointer to parameters for creating the runtime device. + /// Passing NULL uses default parameters for creation + /// instead. +} RpsNullRuntimeDeviceCreateInfo; + +/// @brief Creates a dummy runtime. +/// +/// For more info see RpsNullRuntimeDeviceCreateInfo. +/// +/// @param pCreateInfo Pointer to parameters for creating a dummy runtime. Passing +/// NULL uses default parameters for creation instead. +/// @param phDevice Pointer a handle in which the device is returned. Must +/// not be NULL. +/// +/// @returns Result code of the operation. See RpsResult +/// for more info. +RpsResult rpsNullRuntimeDeviceCreate(const RpsNullRuntimeDeviceCreateInfo* pCreateInfo, RpsDevice* phDevice); + +/// @} end addtogroup RpsRuntimeDevice + +/// @addtogroup RpsSubprogram +/// @{ + +/// Parameters for creating an RPS program. +typedef struct RpsProgramCreateInfo +{ + /// Pointer to signature parameters for the program entry. If hRpslEntryPoint is specified, this parameter will be + /// ignored and the signature will be taken from the RpslEntry definition. + /// Must not be NULL if hRpslEntryPoint == RPS_NULL_HANDLE. + const RpsRenderGraphSignatureDesc* pSignatureDesc; + + /// Handle to the program entry point. + RpsRpslEntry hRpslEntryPoint; + + /// Default node callback. Used when a node is called for which no implementation is bound. + RpsCmdCallback defaultNodeCallback; +} RpsProgramCreateInfo; + +/// @brief Gets the signature description of an RPSL entry point. +/// +/// @param hRpslEntry Handle to the RPSL entry point. +/// @param pDesc Pointer in which the signature description is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsRpslEntryGetSignatureDesc(RpsRpslEntry hRpslEntry, RpsRenderGraphSignatureDesc* pDesc); + +/// @brief Creates a subprogram. +/// +/// @param hDevice Handle to the device to use for creation. +/// @param pCreateInfo Pointer to the creation parameters. Must not be NULL. +/// @param phProgram Pointer to a handle in which the subprogram is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsProgramCreate(RpsDevice hDevice, const RpsProgramCreateInfo* pCreateInfo, RpsSubprogram* phProgram); + +/// @brief Destroys a subprogram instance. +/// +/// @param hProgram Handle to the subprogram object. +void rpsProgramDestroy(RpsSubprogram hProgram); + +/// @brief Binds a command node callback to a node declaration specified by name. +/// +/// Node instances generated from the program with the specified node declaration will call the same callback during +/// render graph command recording. +/// +/// @param hProgram Handle to the program to bind a node for. +/// @param name Null terminated string with the name of the node. +/// @param pCallback Pointer to callback parameters. Passing NULL uses default callback parameters. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsProgramBindNodeCallback(RpsSubprogram hProgram, const char* name, const RpsCmdCallback* pCallback); + +/// @brief Binds a subprogram to a node declaration specified by name. +/// +/// The subprogram will be executed during render graph update as if inlined into the parent program. +/// During render graph command recording, node instances generated from the subprogram will call the subprogram node +/// callbacks bindings. Subprograms can be nested recursively. +/// +/// @param hProgram Handle to the program to bind the subprogram to. +/// @param name Null terminated string with the name of the node. +/// @param hSubprogram Handle to the subprogram to bind. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsProgramBindNodeSubprogram(RpsSubprogram hProgram, const char* name, RpsSubprogram hSubprogram); + +/// @} end addtogroup RpsSubprogram + +/// @addtogroup RpsRenderGraphRuntime +/// @{ + +/// @brief Parameters for creating a render graph. +typedef struct RpsRenderGraphCreateInfo +{ + struct + { + RpsScheduleFlags scheduleFlags; ///< Flags for scheduling behavior. + uint32_t numQueues; ///< Number of queues avaiblable to the render graph. If 0, RPS assumes there + /// is 1 graphics queue. + const RpsQueueFlags* pQueueInfos; ///< Pointer to an array of RpsQueueFlags with numQueues queue + /// flags. Must not be NULL if numQueues != 0. + } scheduleInfo; + + struct + { + uint32_t numHeaps; ///< Number of memory heaps available to the render graph. + const uint32_t* heapBudgetMiBs; ///< Pointer to an array of uint32_t numHeaps memory sizes as + /// limits on the amount of memory to be used. Must not be NULL if + /// numHeaps != 0. + } memoryInfo; + + RpsProgramCreateInfo mainEntryCreateInfo; ///< Creation parameters for the main entry RPS program. + RpsRenderGraphFlags renderGraphFlags; ///< Flags for render graph properties. + + /// Number of render graph phase objects used by the render graph. + uint32_t numPhases; + + /// Pointer to an array of const RpsRenderGraphPhaseInfo with numPhases render graph phase objects + /// used by the render graph. If null, RPS uses the runtime specified default pipeline to process the render graph. + const RpsRenderGraphPhaseInfo* pPhases; + +} RpsRenderGraphCreateInfo; + +/// @brief Creates a render graph. +/// +/// @param hDevice Handle to the device to use for creation. Must not be RPS_NULL_HANDLE. +/// @param pCreateInfo Pointer to creation parameters. Must not be NULL. +/// @param phRenderGraph Pointer a handle in which the render graph is returned. +/// Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsRenderGraphCreate(RpsDevice hDevice, + const RpsRenderGraphCreateInfo* pCreateInfo, + RpsRenderGraph* phRenderGraph); + +/// @brief Updates a render graph. +/// +/// @param hRenderGraph Handle to the render graph to update. Must not be RPS_NULL_HANDLE. +/// @param pUpdateInfo Pointer to update parameters. Must not be NULL. +/// +/// @returns RpsResult indicating potential errors during the execution. See +/// RpsResult for more info. +RpsResult rpsRenderGraphUpdate(RpsRenderGraph hRenderGraph, const RpsRenderGraphUpdateInfo* pUpdateInfo); + +/// @brief Destroys a render graph. +/// +/// @param hRenderGraph Handle to the render graph object to destroy. +void rpsRenderGraphDestroy(RpsRenderGraph hRenderGraph); + +/// @brief Allocates memory from a render graph builder. +/// +/// @param hRenderGraphBuilder Handle to the render graph builder. Must not be RPS_NULL_HANDLE. +/// @param size Required size of the allocation. +/// +/// @returns Pointer to the allocated memory if the allocation was successful, NULL otherwise. +/// Only valid until the next render graph update. +void* rpsRenderGraphAllocateData(RpsRenderGraphBuilder hRenderGraphBuilder, size_t size); + +/// @brief Allocates memory from a render graph builder with alignment requirements. +/// +/// @param hRenderGraphBuilder Handle to the render graph builder. Must not be RPS_NULL_HANDLE. +/// @param size Required size of the allocation. +/// @param alignment Minimum alignment requirement of the allocation in bytes. +/// +/// @returns Pointer to the allocated memory if allocation was successful, NULL otherwise. +/// Only valid until the next render graph update. +void* rpsRenderGraphAllocateDataAligned(RpsRenderGraphBuilder hRenderGraphBuilder, size_t size, size_t alignment); + +/// @brief Declare an on-demand node type during the render graph construction. +/// +/// Normally, node declarations are specified in the RenderGraphSignature ahead of time. This function allows +/// additional node declarations to be added. Note: The lifetime of the dynamic node declaration is temporary +/// and it is only valid until the next render graph update. +/// +/// @param hRenderGraphBuilder Handle to the render graph builder. Must not be RPS_NULL_HANDLE. +/// @param pNodeDesc Pointer to a node description. Passing NULL for the name of the description +/// registers the node as a fallback for calling unknown nodes. +/// +/// @returns ID of the node declaration if successful, RPS_NODEDECL_ID_INVALID otherwise. +RpsNodeDeclId rpsRenderGraphDeclareDynamicNode(RpsRenderGraphBuilder hRenderGraphBuilder, const RpsNodeDesc* pNodeDesc); + +/// @brief Gets a variable from the render graph builder by its ID. +/// +/// @param hRenderGraphBuilder Handle to the render graph builder to get the variable from. +/// @param paramId Index of the parameter. +/// +/// @returns Variable identified by its ID. +RpsVariable rpsRenderGraphGetParamVariable(RpsRenderGraphBuilder hRenderGraphBuilder, RpsParamId paramId); + +/// @brief Gets the resource ID of a resource parameter by the parameter ID. +/// +/// Resource parameters have RPS_PARAMETER_FLAG_RESOURCE_BIT set and Resources are expected to be provided externally +/// to the render graph. +/// +/// @param hRenderGraphBuilder Handle to the render graph builder. Must not be RPS_NULL_HANDLE. +/// @param paramId Index of the parameter. +/// +/// @returns ID of the resource identified by its parameter ID. +RpsResourceId rpsRenderGraphGetParamResourceId(RpsRenderGraphBuilder hRenderGraphBuilder, RpsParamId paramId); + +/// @brief Declare a render graph managed resource. +/// +/// @param hRenderGraphBuilder Handle to the render graph builder. Must not be RPS_NULL_HANDLE. +/// @param name Null terminated string with the name of the resource. +/// @param localId Subprogram local ID of the resource. +/// @param arg Variable for the ID of the resource +/// +/// @returns ID of the declared resource. +RpsResourceId rpsRenderGraphDeclareResource(RpsRenderGraphBuilder hRenderGraphBuilder, + const char* name, + RpsResourceId localId, + RpsVariable arg); + +// Nodes + +/// @brief Adds a render graph node to a render graph. +/// +/// @param hRenderGraphBuilder Handle to the render graph builder. Must not be RPS_NULL_HANDLE. +/// @param nodeDeclId Node declaration ID. +/// @param userTag User controlled tag for associations with a node call. Is accessible through +/// RpsCmdCallbackContext. +/// @param callback Pointer to the callback function. +/// @param pCallbackUserContext Pointer to a user controlled structure to be passed to the callback. +/// @param pArgs Pointer to the parameters used for the callback. +/// Must not be NULL if numArgs != 0. +/// @param numArgs Number of parameters used for the callback. +/// +/// @returns ID of the command node. +RpsNodeId rpsRenderGraphAddNode(RpsRenderGraphBuilder hRenderGraphBuilder, + RpsNodeDeclId nodeDeclId, + uint32_t userTag, + PFN_rpsCmdCallback callback, + void* pCallbackUserContext, + const RpsVariable* pArgs, + uint32_t numArgs); + +/// @brief Gets the runtime resource info from a resource ID. +/// +/// Can be used to retrieve information such as the API resource handle, resource description and subresource info. +/// +/// @param hRenderGraph Handle to the render graph to get the resource info from. Must not be +/// RPS_NULL_HANDLE. +/// @param resourceId The index to the resource to get. This can be the index returned by +/// rpsRenderGraphDeclareResource or rpsRenderGraphGetParamResourceId. +/// @param temporalLayerIndex The temporal layer index. Ignored for non-temporal resource. +/// @param pResourceInfo Pointer in which the runtime resource info is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsRenderGraphGetResourceInfo(RpsRenderGraph hRenderGraph, + RpsResourceId resourceId, + uint32_t temporalLayerIndex, + RpsRuntimeResourceInfo* pResourceInfo); + +/// @brief Gets the runtime resource info of an output parameter. +/// +/// @param hRenderGraph Handle to the render graph to get the resource info from. +/// @param paramId Index of the resource parameter. Must be an output resource parameter +/// of a render graph entry (Declared as 'out [...] texture / buffer' in +/// RPSL or with +/// (RPS_PARAMETER_FLAG_OUT_BIT | RPS_PARAMETER_FLAG_RESOURCE_BIT) set. +/// @param arrayOffset Offset of the first parameters for array parameters. Must be 0 +/// otherwise. +/// @param numResources Number of resources to get infos for. +/// @param pResourceInfos Pointer to an array of RpsRuntimeResourceInfo in which the +/// numResources resource infos are returned. Must not be NULL if +/// numResources != 0. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsRenderGraphGetOutputParameterResourceInfos(RpsRenderGraph hRenderGraph, + RpsParamId paramId, + uint32_t arrayOffset, + uint32_t numResources, + RpsRuntimeResourceInfo* pResourceInfos); + +/// @brief Gets the main entry of a render graph. +/// +/// @param hRenderGraph Handle to the render graph. Must not be RPS_NULL_HANDLE. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsSubprogram rpsRenderGraphGetMainEntry(RpsRenderGraph hRenderGraph); + +/// @} end addtogroup RpsRenderGraphRuntime + +/// @addtogroup RpsRenderGraphCommandRecording +/// @{ + +/// @defgroup RpsRenderGraphCommandRecordingVK Vulkan +/// @defgroup RpsRenderGraphCommandRecordingD3D11 D3D11 +/// @defgroup RpsRenderGraphCommandRecordingD3D12 D3D12 + +/// @brief Parameters of a batch of commands to be recorded by the graphics API in use. +/// +/// These commands are the result of scheduling and have to be executed on the same queue. +typedef struct RpsCommandBatch +{ + uint32_t queueIndex; ///< Index of the queue to submit the current batch to. + uint32_t waitFencesBegin; ///< Offset of the range of fence IDs into the + /// RpsRenderGraphBatchLayout::pWaitFenceIds array to wait for before submitting. + uint32_t numWaitFences; ///< Number of fence IDs to wait for before submitting. + uint32_t signalFenceIndex; ///< Index of the fence to signal after submitting. + uint32_t cmdBegin; ///< Index of the first runtime command in the batch. + uint32_t numCmds; ///< Number of runtime commands in the batch. +} RpsCommandBatch; + +/// @brief Parameters of the command batch layout of a render graph. +typedef struct RpsRenderGraphBatchLayout +{ + uint32_t numCmdBatches; ///< Number of command batches. + uint32_t numFenceSignals; ///< Number of fence signals in the pipeline. + const RpsCommandBatch* pCmdBatches; ///< Pointer to an array of const RpsCommandBatch with + /// numCmdBatches command batch parameters. Must not be NULL. + const uint32_t* pWaitFenceIndices; ///< Pointer to an array of const uint32_t numFenceSignals + /// wait fence indices. Must not be NULL if numFenceSignals != 0. Each + /// batch can wait for a range of fence IDs in this array, defined by its + /// waitFencesBeginIndex and numWaitFences. +} RpsRenderGraphBatchLayout; + +/// @brief Bitflags for recording commands. +typedef enum RpsRecordCommandFlagBits +{ + RPS_RECORD_COMMAND_FLAG_NONE = 0, ///< No recording options. + RPS_RECORD_COMMAND_FLAG_ENABLE_COMMAND_DEBUG_MARKERS = 1 << 0, ///< Enables per-command debug markers during + /// command recording. +} RpsRecordCommandFlagBits; + +/// @brief Bitmask type for RpsRecordCommandFlagBits. +typedef RpsFlags32 RpsRecordCommandFlags; + +/// @brief Parameters for recording commands using a processed render graph. +typedef struct RpsRenderGraphRecordCommandInfo +{ + RpsRuntimeCommandBuffer hCmdBuffer; ///< Handle to the runtime command buffer object. + void* pUserContext; ///< User defined context to be passed to the callbacks during recording. + /// Passing NULL uses a default context instead. + uint64_t frameIndex; ///< Index of the frame to record commands for. + uint32_t cmdBeginIndex; ///< Index of the first command to be recorded. + uint32_t numCmds; ///< Number of commands to record. + RpsRecordCommandFlags flags; ///< Flags for specifying recording behavior. +} RpsRenderGraphRecordCommandInfo; + +/// @brief Gets the command batch layout of a render graph. +/// +/// @param hRenderGraph Handle to the render graph. Must not be RPS_NULL_HANDLE. +/// @param pBatchLayout Pointer to return the batch layout in. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsRenderGraphGetBatchLayout(RpsRenderGraph hRenderGraph, RpsRenderGraphBatchLayout* pBatchLayout); + +/// @brief Records graphics API commands from a processed render graph. +/// +/// @param hRenderGraph Handle to the render graph. Must not be RPS_NULL_HANDLE. +/// @param pRecordInfo Pointer to recording parameters. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsRenderGraphRecordCommands(RpsRenderGraph hRenderGraph, const RpsRenderGraphRecordCommandInfo* pRecordInfo); + +/// @brief Constant for an invalid command ID. +#define RPS_CMD_ID_INVALID RPS_INDEX_NONE_U32 + +/// @brief Diagnostic information for a command of the render graph command stream. +typedef struct RpsCmdDiagnosticInfo +{ + uint32_t cmdIndex; ///< Index of the command in the scheduled command stream. Also used for + /// resource lifetimes. + RpsBool isTransition; ///< Indicator for transition commands. + union + { + //TODO Add more struct members if required by a tool. + struct + { + uint32_t dummy; ///< Dummy value to avoid an empty struct. + } cmd; + + struct + { + RpsAccessAttr prevAccess; ///< Access before the current transition. + RpsAccessAttr nextAccess; ///< Access after the current transition. + RpsSubresourceRange range; ///< Access range for the transition. + uint32_t resourceIndex; ///< Index of the resource to transition. + } transition; + }; + +} RpsCmdDiagnosticInfo; + +/// @brief Diagnostic information for a resource. +typedef struct RpsResourceDiagnosticInfo +{ + const char* name; ///< Null terminated string with the name of the resource. + uint32_t temporalChildIndex; ///< Index to the first temporal child of the temporal parent. Only for + /// use in temporal parent resources. + RpsBool isExternal; ///< Indicator for external resources. + RpsResourceDesc desc; ///< Description of the resource. + RpsClearValue clearValue; ///< Clear value of the resource. + RpsAccessAttr allAccesses; ///< Combination of all accesses of the resource throughout the frame. + RpsAccessAttr initialAccess; ///< Initial access of the resource. + uint32_t lifetimeBegin; ///< Index of the first command to access the resource. + uint32_t lifetimeEnd; ///< Index of the last command to access the resource. + RpsGpuMemoryRequirement allocRequirement; ///< Allocation requirements for the memory of the resource. + RpsHeapPlacement allocPlacement; ///< Allocation placement for the memory of the resource. + RpsRuntimeResource hRuntimeResource; ///< Handle to the backend specific resource. +} RpsResourceDiagnosticInfo; + +/// @brief Diagnostic information for a heap. +typedef struct RpsHeapDiagnosticInfo +{ + uint64_t size; ///< Total size of the heap. May be 0 if the heap is not created in the backend + /// yet. + uint64_t usedSize; ///< Amount of memory allocated from the heap. + uint64_t maxUsedSize; ///< Maximum amount of memory ever allocated from the heap. + uint32_t alignment; ///< Alignment of the heap in bytes. + uint32_t memoryTypeIndex; ///< Index of the backend specific memory type of the heap. + RpsRuntimeHeap hRuntimeHeap; ///< Handle to the backend specific heap implementation. +} RpsHeapDiagnosticInfo; + +/// @brief Diagnostic information for parts of a render graph. +typedef struct RpsRenderGraphDiagnosticInfo +{ + uint32_t numResourceInfos; ///< Number of resource infos. + uint32_t numCommandInfos; ///< Number of command infos. + uint32_t numHeapInfos; ///< Number of heap infos. + + /// Pointer to an array of RpsResourceDiagnosticInfo with numResourceInfos resource infos. + const RpsResourceDiagnosticInfo* pResourceDiagInfos; + + /// Pointer to an array of RpsCmdDiagnosticInfo with numCommandInfos command infos. + const RpsCmdDiagnosticInfo* pCmdDiagInfos; + + /// Pointer to an array of RpsHeapDiagnosticInfo with numHeapInfosheap infos. + const RpsHeapDiagnosticInfo* pHeapDiagInfos; +} RpsRenderGraphDiagnosticInfo; + +/// @brief Bitflags for diagnostic info modes. +typedef enum RpsRenderGraphDiagnosticInfoFlagBits +{ + RPS_RENDER_GRAPH_DIAGNOSTIC_INFO_DEFAULT = 0, ///< Diagnostic info is taken from the latest frame. + RPS_RENDER_GRAPH_DIAGNOSTIC_INFO_USE_CACHED_BIT = (1u << 0), ///< The previously cached diagnostic info is returned + ///< if not called for the first time. +} RpsRenderGraphDiagnosticInfoFlagBits; + +/// @brief Bitmask type for RpsRenderGraphDiagnosticInfoFlagBits. +typedef RpsFlags32 RpsRenderGraphDiagnosticInfoFlags; + +/// @brief Gets diagnostic information from a render graph. +/// +/// Diagnostic information is intended to be consumed by tools related to RPS, e.g. the visualizer tool set. +/// +/// @param hRenderGraph Handle to the render graph. Must not be RPS_NULL_HANDLE. +/// @param pDiagInfo Pointer in which the diagnostic information is returned. Must not be NULL. +/// @param diagnosticFlags Flags for the diagnostic mode. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsRenderGraphGetDiagnosticInfo(RpsRenderGraph hRenderGraph, + RpsRenderGraphDiagnosticInfo* pDiagInfo, + RpsRenderGraphDiagnosticInfoFlags diagnosticFlags); + +/// @brief Parameters of a command callback context. +typedef struct RpsCmdCallbackContext +{ + /// Handle to the command buffer for command recording. + RpsRuntimeCommandBuffer hCommandBuffer; + + /// User context passed as RpsRenderGraphRecordCommandInfo::pUserContext. Can vary per rpsRenderGraphRecordCommands + /// call and can e.g. be used as per-thread context if doing multi-threaded recording. + void* pUserRecordContext; + + /// User context specified with the command node callback function, for example via a rpsProgramBindNode call. Can + /// vary per callback. + void* pCmdCallbackContext; + + /// Pointer to an array of void* const with numArgs pointers to arguments to use for the callback. + /// Must not be NULL if numArgs != 0. + void* const* ppArgs; + + /// Number of arguments defined for the callback. + uint32_t numArgs; + + /// User defined tag for associations with a specific node. Can be set by passing a value to + /// rpsCmdCallNode. + uint32_t userTag; +} RpsCmdCallbackContext; + +/// @brief Parameters for accessing a resource. +typedef struct RpsResourceAccessInfo +{ + RpsResourceId resourceId; ///< ID of the resource to access. + RpsSubresourceRange range; ///< Subresource range to access. + RpsAccessAttr access; ///< Attributes for access type and shader stages. + RpsFormat viewFormat; ///< Format to use for accessing. +} RpsResourceAccessInfo; + +/// @brief Parameters of a graphics node render target. +typedef struct RpsCmdRenderTargetInfo RpsCmdRenderTargetInfo; + +/// @brief Parameters of a graphics node viewport. +typedef struct RpsCmdViewportInfo RpsCmdViewportInfo; + +/// @brief Gets the render targets parameters from the current recording context. +/// +/// Must only be called from a graphics node callback. +/// +/// @param pContext Pointer to the current recording context. Must not be NULL. +/// @param pRenderTargetInfo Pointer in which the render target parameters are returned. +/// Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsCmdGetRenderTargetsInfo(const RpsCmdCallbackContext* pContext, RpsCmdRenderTargetInfo* pRenderTargetInfo); + +/// @brief Gets the viewport info from the current recording context. +/// +/// Must only be called from a graphics node callback. +/// +/// @param pContext Pointer to the current recording context. Must not be NULL +/// @param pViewportInfo Pointer in which the viewport parameters are returned to. +/// Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsCmdGetViewportInfo(const RpsCmdCallbackContext* pContext, RpsCmdViewportInfo* pViewportInfo); + +/// @brief Parameters for explicitly beginning a render pass from a command callback. +/// +/// Intended for controlling RenderPass suspend / resume & secondary command buffer behaviors. +typedef struct RpsCmdRenderPassBeginInfo +{ + RpsRuntimeRenderPassFlags flags; ///< Flags for render pass properties. +} RpsCmdRenderPassBeginInfo; + +/// @brief Clones a command callback context to create a secondary context and assigns it a new command buffer. +/// +/// The cloned context inherits states from the context being cloned, such as current command info and command +/// arguments. The typical use case is multi-threaded command recording from within a node callback. +/// Must be synchronized by the caller if called from multiple threads. The created context pointer is valid until the +/// next render graph update. +/// +/// @param pContext Pointer to the current command callback context. +/// @param hCmdBufferForDerivedContext Handle to the command buffer to be associated with the new context. +/// @param ppDerivedContext Pointer in which a pointer to the cloned command callback context is +/// returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsCmdCloneContext(const RpsCmdCallbackContext* pContext, + RpsRuntimeCommandBuffer hCmdBufferForDerivedContext, + const RpsCmdCallbackContext** ppDerivedContext); + +/// @brief Begins a rasterization rendering pass. +/// +/// This may begin e.g. a VkRenderPass or set up render targets and viewport/scissor rect states for APIs that do not +/// support a render pass objects. Usually used for multi-threaded rendering from within a command callback. +/// +/// @param pContext Pointer to the current command callback context. Must not be NULL. +/// @param flags Flags for the render pass behavior. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsCmdBeginRenderPass(const RpsCmdCallbackContext* pContext, RpsRuntimeRenderPassFlags flags); + +/// @brief Ends a rasterization rendering pass. +/// +/// Must be paird with rpsCmdBeginRenderPass. Usually used for multi-threaded rendering from within a command callback. +/// +/// @param pContext Pointer to the current command callback context. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsCmdEndRenderPass(const RpsCmdCallbackContext* pContext); + +/// @brief Sets a new command buffer to be used for command recording. +/// +/// RPS does not keep of previously used command buffers. It is the responsibility of the application to track and +/// submit them in order accordingly. Usually used for multi-threaded rendering. +/// +/// @param pContext Pointer to the current command callback context. Must not be NULL. +/// @param hCmdBuffer Handle to the new command buffer. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsCmdSetCommandBuffer(const RpsCmdCallbackContext* pContext, RpsRuntimeCommandBuffer hCmdBuffer); + +/// @brief Gets the description of the current cmd node. +/// +/// @param pContext Pointer to the current command callback context. Must not be NULL. +/// @param ppNodeName Pointer in which a null terminated string with the name of the node +/// is returned. Must not be NULL. +/// @param pNodeNameLength Pointer in which the node name length is returned. Can be NULL. +/// +/// @return Result code of the operation. See RpsResult for more info. +RpsResult rpsCmdGetNodeName(const RpsCmdCallbackContext* pContext, const char** ppNodeName, size_t* pNodeNameLength); + +/// @brief Gets the description of a node argument. +/// +/// @param pContext Pointer to the current command callback context. Must not be NULL. +/// @param paramID Index of the parameter in the current command node callback. +/// @param pDesc Pointer in which the parameter description is returned. +/// Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsCmdGetParamDesc(const RpsCmdCallbackContext* pContext, RpsParamId paramID, RpsParameterDesc* pDesc); + +/// @brief Gets an array of resource descriptions of a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the resource descriptions from. Must be a resource +/// array argument if numDescs > 1. +/// @param srcArrayOffset Offset to the first resource description to get. +/// @param pResourceDescs Pointer to an array of RpsResourceDesc in which the numDescs +/// resource descriptions are returned. Must not be NULL if numDescs != 0. +/// @param numDescs Number of resource descriptions to get. Requires srcArrayOffset + numDescs to +/// be less than the number of elements in the node argument. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsCmdGetArgResourceDescArray(const RpsCmdCallbackContext* pContext, + RpsParamId argIndex, + uint32_t srcArrayOffset, + RpsResourceDesc* pResourceDescs, + uint32_t numDescs); + +/// @brief Gets the resource description from a node argument. +/// +/// @param pContext Pointer to the current command callback context. Must not be NULL. +/// @param argIndex Index of the parameter in the current command node callback. +/// @param pResourceDesc Pointer in which the resource description is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsCmdGetArgResourceDesc(const RpsCmdCallbackContext* pContext, + RpsParamId argIndex, + RpsResourceDesc* pResourceDesc); + +/// @brief Gets an array of runtime resources from a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the runtime resource from. Must be a resource array +/// argument if numResources > 1. +/// @param srcArrayOffset Offset to the first runtime resource to get. +/// @param pRuntimeResources Pointer to an array of RpsRuntimeResource in which the +/// numResources resource descriptions are returned. +/// Must not be NULL if numResources != 0. +/// @param numResources Number of runtime resources to get. Requires srcArrayOffset + numResources to +/// be less than the number of elements in the node argument. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsCmdGetArgRuntimeResourceArray(const RpsCmdCallbackContext* pContext, + RpsParamId argIndex, + uint32_t srcArrayOffset, + RpsRuntimeResource* pRuntimeResources, + uint32_t numResources); + +/// @brief Gets the runtime resource from a resource node argument. +/// +/// @param pContext Pointer to the current command callback context. Must not be NULL. +/// @param argIndex Index of the parameter in the current command node callback. +/// @param pRuntimeResource Pointer in which the runtime resource is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsCmdGetArgRuntimeResource(const RpsCmdCallbackContext* pContext, + RpsParamId argIndex, + RpsRuntimeResource* pRuntimeResource); + +/// @brief Gets an array of resource access infos from a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the resource access infos from. Must be a resource +/// array argument if numAccessess > 1. +/// @param srcArrayOffset Offset to the first resource access info to get. +/// @param pResourceAccessInfos Pointer to an array of RpsResourceAccessInfo in which the +/// numAccessess resource access infos are returned. +/// Must not be NULL if numAccessess != 0. +/// @param numAccessess Number of resource access infos to get. Requires srcArrayOffset + +/// to be less than the number of elements in the node argument. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsCmdGetArgResourceAccessInfoArray(const RpsCmdCallbackContext* pContext, + RpsParamId argIndex, + uint32_t srcArrayOffset, + RpsResourceAccessInfo* pResourceAccessInfos, + uint32_t numAccessess); + +/// @brief Gets the resource access info from a resource node argument. +/// +/// @param pContext Pointer to the current command callback context. Must not be NULL. +/// @param argIndex Index of the parameter in the current command node callback. +/// @param pResourceAccessInfo Pointer in which the resource access info is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsCmdGetArgResourceAccessInfo(const RpsCmdCallbackContext* pContext, + RpsParamId argIndex, + RpsResourceAccessInfo* pResourceAccessInfo); + +/// Get the argument pointer from a node argument. +/// +/// @param pContext Pointer to the current command callback context. Must not be NULL. +/// @param argIndex Index of the parameter. +/// +/// @returns Pointer to the argument. +static inline RpsVariable rpsCmdGetArg(const RpsCmdCallbackContext* pContext, uint32_t argIndex) +{ + return pContext->ppArgs[argIndex]; +} + +/// @brief Signature of functions for acquiring command buffers in a simplified execution mode. +/// +/// Reserved for future use. +typedef RpsResult (*PFN_rpsAcquireRuntimeCommandBuffer)(void* pUserContext, + uint32_t queueIndex, + uint32_t numCmdBuffers, + RpsRuntimeCommandBuffer* pCmdBuffers, + uint32_t* pCmdBufferIdentifiers); + +/// @brief Signature of functions for submitting command buffers in a simplified execution mode. +/// +/// Reserved for future use. +typedef RpsResult (*PFN_rpsSubmitRuntimeCommandBuffer)(void* pUserContext, + uint32_t queueIndex, + const RpsRuntimeCommandBuffer* pRuntimeCmdBufs, + uint32_t numRuntimeCmdBufs, + uint32_t waitId, + uint32_t signalId); + +/// @brief Parameters for executing a render graph. +typedef struct RpsRenderGraphExecuteInfo +{ + void* pUserContext; ///< Pointer to a user defined context to be passed + /// to the callbacks. + PFN_rpsAcquireRuntimeCommandBuffer pfnAcquireRuntimeCmdBufCb; ///< Pointer to a function to acquire command + /// buffers. + PFN_rpsSubmitRuntimeCommandBuffer pfnSubmitRuntimeCmdBufCb; ///< Pointer to a function to submit command + /// buffers. +} RpsRenderGraphExecuteInfo; + +/// @brief Executes a render graph. +/// +/// @param hRenderGraph Handle to the render graph. Must not be RPS_NULL_HANDLE. +/// @param pExecuteInfo Pointer to render graph execution parameters. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for +/// more info. +RpsResult rpsRenderGraphExecute(RpsRenderGraph hRenderGraph, const RpsRenderGraphExecuteInfo* pExecuteInfo); + +/// @} end addtogroup RpsRenderGraphCommandRecording + +#ifdef __cplusplus +} +#endif //__cplusplus + +/// @addtogroup RpsParamAttr +/// @{ + +/// @brief Initializes a parameter attribute. +/// +/// @param pAttr Pointer to the parameter attribute. Must not be NULL. +/// @param accessFlags Flags for accessing the parameter. +/// @param shaderStageFlags Flags for shader stages of the parameter access. +/// @param semantic Semantic of the parameter. +/// @param semanticIndex Semantic index of the parameter. +/// +/// @returns Pointer to the initialized parameter attribute. +static inline const RpsParamAttr* rpsInitParamAttr(RpsParamAttr* pAttr, + RpsAccessFlags accessFlags, + RpsShaderStageFlags shaderStageFlags, + RpsSemantic semantic, + uint32_t semanticIndex) +{ + pAttr->access.accessFlags = accessFlags; + pAttr->access.accessStages = shaderStageFlags; + pAttr->semantic.semantic = semantic; + pAttr->semantic.semanticIndex = semanticIndex; + + return pAttr; +} + +/// @brief Initializes a parameter attribute with only the access attribute being specified. +/// +/// @param pAttr Pointer to the parameter attribute. Must not be NULL. +/// @param accessFlags Flags for accessing the parameter. +/// @param shaderStageFlags Flags for shader stages of the parameter access. +/// +/// @returns Pointer to the initialized parameter attribute. +static inline const RpsParamAttr* rpsInitParamAttrAccess(RpsParamAttr* pAttr, + RpsAccessFlags accessFlags, + RpsShaderStageFlags shaderStageFlags) +{ + return rpsInitParamAttr(pAttr, accessFlags, shaderStageFlags, RPS_SEMANTIC_UNSPECIFIED, 0); +} + +/// @brief Initializes a parameter attribute with only the semantic attribute being specified. +/// +/// @param pAttr Pointer to the parameter attribute. Must not be NULL. +/// @param semantic Semantic of the parameter. +/// @param semanticIndex Semantic index of the parameter. +/// +/// @returns Pointer to the initialized parameter attribute. +static inline const RpsParamAttr* rpsInitParamAttrSemantic(RpsParamAttr* pAttr, + RpsSemantic semantic, + uint32_t semanticIndex) +{ + return rpsInitParamAttr(pAttr, RPS_ACCESS_UNKNOWN, RPS_SHADER_STAGE_NONE, semantic, semanticIndex); +} + +/// @} end addtogroup RpsParamAttr + +#ifdef __cplusplus + +#include + +#include "rps/core/rps_cmd_callback_wrapper.hpp" + +/// @addtogroup RpsRenderGraphRuntime +/// @{ + +/// @brief Declares an on-demand node type during the render graph construction. +/// +/// Normally, node declarations are specified in the RenderGraphSignature ahead of time. This function allows additional +/// node declarations to be added. Note: The lifetime of the dynamic node declaration is temporary and it is only until +/// the next render graph update. +/// +/// @param hBuilder Handle to the RenderGraphBuilder. Must not be RPS_NULL_HANDLE. +/// @param name Null terminated string with the name of the node declaration. Passing NULL +/// registers the node as a fallback for calling unknown nodes. +/// @param flags Flags of the node declaration. +/// @param pParamDescs Pointer to an array of const RpsParameterDesc with numParams parameter +/// descriptions. Must not be NULL. +/// @param numParams Number of parameters the node has. +/// +/// @returns ID of the node declaration if successful, RPS_NODEDECL_ID_INVALID otherwise. +static inline RpsNodeDeclId rpsRenderGraphDeclareDynamicNode(RpsRenderGraphBuilder hBuilder, + const char* name, + RpsNodeDeclFlags flags, + const RpsParameterDesc* pParamDescs, + uint32_t numParams) +{ + RpsNodeDesc nodeDesc = {}; + nodeDesc.flags = flags; + nodeDesc.numParams = numParams; + nodeDesc.pParamDescs = pParamDescs; + nodeDesc.name = name; + + return rpsRenderGraphDeclareDynamicNode(hBuilder, &nodeDesc); +} + +/// @brief Declares an on-demand node type during the render graph construction. +/// +/// Normally, node declarations are specified in the RenderGraphSignature ahead of time. This function allows additional +/// node declarations to be added. Note: The lifetime of the dynamic node declaration is temporary and it is only until +/// the next render graph update. +/// +/// @param hRenderGraphBuilder Handle to the RenderGraphBuilder. Must not be RPS_NULL_HANDLE. +/// @param name Null terminated string with the name of the node declaration. Passing NULL +/// registers the node as a fallback for calling unknown nodes. +/// @param flags Flags of the node declaration. +/// @param paramDescs Initializizer list of parameter descriptions for the node parameters. +/// +/// @returns ID of the node declaration if successful, RPS_NODEDECL_ID_INVALID otherwise. +static inline RpsNodeDeclId rpsRenderGraphDeclareDynamicNode(RpsRenderGraphBuilder hRenderGraphBuilder, + const char* name, + RpsNodeDeclFlags flags, + std::initializer_list paramDescs) +{ + return rpsRenderGraphDeclareDynamicNode( + hRenderGraphBuilder, name, flags, paramDescs.begin(), uint32_t(paramDescs.size())); +} + +/// @brief Adds a render graph node to a render graph. +/// +/// @param hRenderGraphBuilder Handle to the render graph builder. Must not be RPS_NULL_HANDLE. +/// @param nodeDeclId Declaration ID of the node generated by rpsRenderGraphDeclareDynamicNode. +/// @param userTag User controlled tag for associations with a node call. Is accessible through +/// RpsCmdCallbackContext. +/// @param callback Pointer to the callback function. Passing NULL uses the default one +/// registered for the empty string name instead. +/// @param pCallbackUserContext Pointer to a user controlled structure to be passed to the callback. May be +/// NULL. +/// @param args Initializizer list of RPS variables for the node parameters. +/// +/// @returns ID of the command node. +static inline RpsNodeId rpsRenderGraphAddNode(RpsRenderGraphBuilder hRenderGraphBuilder, + RpsNodeDeclId nodeDeclId, + uint32_t userTag, + PFN_rpsCmdCallback callback, + void* pCallbackUserContext, + std::initializer_list args) +{ + return rpsRenderGraphAddNode( + hRenderGraphBuilder, nodeDeclId, userTag, callback, pCallbackUserContext, args.begin(), uint32_t(args.size())); +} + +/// @brief Allocates memory for an object from a render graph. +/// +/// The lifetime of the memory lasts until the next call to rpsRenderGraphUpdate. +/// +/// @tparam T Type of object. Note: This function only allocates memory. It is the application's +/// responsibility to call constructors and destructors when needed. +/// @param hRenderGraphBuilder Handle to the render graph builder. Must not be RPS_NULL_HANDLE. +/// +/// @returns Pointer to the allocated object if successful, NULL otherwise. Only valid until the +/// next render graph update. +template +static inline T* rpsRenderGraphAllocateData(RpsRenderGraphBuilder hRenderGraphBuilder) +{ + return static_cast(rpsRenderGraphAllocateDataAligned(hRenderGraphBuilder, sizeof(T), alignof(T))); +} + +/// @} end addtogroup RpsRenderGraphRuntime + +/// @brief Gets a node argument by the argument index. +/// +/// Must only be called from a command node callback. +/// The function doesn't check if the type casting is valid. Users can call rpsCmdGetParamDesc to query the type info and +/// array size of the parameter before calling this function, to ensure the cast is safe. +/// +/// @tparam T Type to cast the node argument to. The function does not perform validation for the type +/// cast. +/// @param pContext Pointer to the context to get the description from. Must not be NULL. +/// @param paramId Index of the argument. +/// +/// @returns Pointer to a const type T object. nullptr if the index is out of range. +/// +/// @ingroup RpsRenderGraphCommandRecording +template +const T* rpsCmdGetArg(const RpsCmdCallbackContext* pContext, RpsParamId paramId) +{ + return (paramId < pContext->numArgs) ? static_cast(pContext->ppArgs[paramId]) : nullptr; +} + +/// @brief Gets a node argument by the argument index. +/// +/// Must only be called from a command node callback. +/// +/// @tparam T Type to cast the node argument to. The function does not perform validation for the type +/// cast. +/// @tparam Index Index of the argument. +/// @param pContext Pointer to the context to get the description from. Must not be NULL. +/// +/// @returns Pointer to a const type T object. nullptr if the index is out of range. +/// +/// @ingroup RpsRenderGraphCommandRecording +template +const T* rpsCmdGetArg(const RpsCmdCallbackContext* pContext) +{ + return rpsCmdGetArg(pContext, Index); +} + +namespace rps +{ + // TODO! For now, a ParamAttrList is identical to an RpsParamAttr (containing one access & one semantic attr). + // Need to extend ParamAttrList to be a real list of attributes to properly support features like Before/After + // access. + + template + const T* GetCmdArg(const RpsCmdCallbackContext* pContext, uint32_t index) + { + return (index < pContext->numArgs) ? static_cast(pContext->ppArgs[index]) : nullptr; + } + + template + const T* GetCmdArg(const RpsCmdCallbackContext* pContext) + { + return GetCmdArg(pContext, Index); + } + + /// @brief A list of attributes a render graph node parameter can have. + struct ParamAttrList : public RpsParamAttr + { + /// @brief Constructor with parameters only for constructing a set of access attributes. + /// + /// @param accessFlags Access flags of the access attribute. + /// @param shaderStages Shader stages of the access attribute. + constexpr ParamAttrList(RpsAccessFlags accessFlags, RpsShaderStageFlags shaderStages = RPS_SHADER_STAGE_NONE) + : RpsParamAttr{{accessFlags, shaderStages}, {RPS_SEMANTIC_UNSPECIFIED, 0}} + { + } + + /// @brief Constructor with parameters only for constructing a set of access attributes. + /// + /// @param inAccess Access attribute to copy from. + constexpr ParamAttrList(RpsAccessAttr inAccess) + : RpsParamAttr{inAccess, {RPS_SEMANTIC_UNSPECIFIED, 0}} + { + } + + /// @brief Constructor with parameters only for constructing a semantic attribute. + /// + /// @param semantic Semantic for the semantic attribute. + /// @param semanticIndex Index at which to define this semantic. + constexpr ParamAttrList(RpsSemantic semantic, uint32_t semanticIndex = 0) + : RpsParamAttr{{RPS_ACCESS_UNKNOWN, RPS_SHADER_STAGE_NONE}, {semantic, semanticIndex}} + { + } + + /// @brief Constructor with parameters only for constructing a semantic attribute. + /// + /// @param inSemantic Semantic attribute to copy from. + constexpr ParamAttrList(RpsSemanticAttr inSemantic) + : RpsParamAttr{{RPS_ACCESS_UNKNOWN, RPS_SHADER_STAGE_NONE}, {inSemantic}} + { + } + + /// @brief Constructor for a default attribute. + constexpr ParamAttrList() + : RpsParamAttr{{RPS_ACCESS_UNKNOWN, RPS_SHADER_STAGE_NONE}, {RPS_SEMANTIC_UNSPECIFIED, 0}} + { + } + + static RpsParamAttrList ToHandle(ParamAttrList* pAttrList); + }; + + /// @brief Creates a list of parameter attributes. + /// + /// @param allocator Reference to an object for allocating objects with a New(TArgs... args) + /// call. + /// @param args Parameter pack for constructing of an object. + /// + /// @returns Constructed parameters attribute list. + template + RpsParamAttrList MakeParamAttrList(TAllocator& allocator, TArgs... args) + { + return ParamAttrList::ToHandle(allocator.template New(args...)); + } + + namespace details + { + + // RpsRuntimeResource handle + template + struct CommandArgUnwrapper + { + RpsRuntimeResource operator()(const RpsCmdCallbackContext* pContext) + { + RpsRuntimeResource resource = {}; + const RpsResult result = rpsCmdGetArgRuntimeResource(pContext, Index, &resource); + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + return resource; + } + }; + + // RpsResourceDesc info + template + struct CommandArgUnwrapper + { + RpsResourceDesc operator()(const RpsCmdCallbackContext* pContext) + { + RpsResourceDesc desc = {}; + const RpsResult result = rpsCmdGetArgResourceDesc(pContext, Index, &desc); + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + return desc; + } + }; + + // RpsResourceAccess info + template + struct CommandArgUnwrapper + { + RpsResourceAccessInfo operator()(const RpsCmdCallbackContext* pContext) + { + RpsResourceAccessInfo access = {}; + const RpsResult result = rpsCmdGetArgResourceAccessInfo(pContext, Index, &access); + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + return access; + } + }; + + RpsResult ProgramGetBindingSlot(RpsSubprogram hProgram, + const char* name, + size_t size, + RpsCmdCallback** ppCallback); + } // namespace details +} // namespace rps + +/// @addtogroup RpsSubprogram +/// @{ + +/// @brief Binds a command callback implementation to a node type in an rps program. +/// +/// This can be used to bind a C++ member function as the command callback. +/// +/// @param hProgram Handle to the subprogram to bind the node. Must not be RPS_NULL_HANDLE. +/// @param name Null terminated string with the name of the node to bind to. +/// @param cmdCallback Pointer to a function to be bound to the node as the command recording callback. +/// @param pCallbackContext User defined context to be passed back to the user as parameter of the callback. +/// @param flags Flags for the callback behavior. +/// +/// @returns Result code of the operation. See RpsResult for more info. +template , + typename = typename std::enable_if::value && + std::is_member_function_pointer::value>::type> +RpsResult rpsProgramBindNode(RpsSubprogram hProgram, + const char* name, + TFunc cmdCallback, + TTarget* pCallbackContext, + RpsCmdCallbackFlags flags = RPS_CMD_CALLBACK_FLAG_NONE) +{ + RpsCmdCallback* pSlot = {}; + RpsResult result = rps::details::ProgramGetBindingSlot(hProgram, name, sizeof(TContext), &pSlot); + + if (RPS_SUCCEEDED(result)) + { + new (pSlot->pUserContext) TContext(pCallbackContext, cmdCallback); + pSlot->pfnCallback = TContext::Callback; + pSlot->flags = flags; + } + + return result; +} + +/// @brief Binds a command callback implementation to a node type in an rps program. +/// +/// @param hProgram Handle to the subprogram to bind the node. Must not be RPS_NULL_HANDLE. +/// @param name Null terminated string with the name of the node to bind to. +/// @param cmdCallback Pointer to a function to be bound to the node as the command recording callback. +/// @param flags Flags for the callback behavior. +/// +/// @returns Result code of the operation. See RpsResult for more info. +template , + typename = typename std::enable_if::value && + !std::is_member_function_pointer::value>::type> +RpsResult rpsProgramBindNode(RpsSubprogram hProgram, + const char* name, + TFunc cmdCallback, + RpsCmdCallbackFlags flags = RPS_CMD_CALLBACK_FLAG_NONE) +{ + RpsCmdCallback* pSlot = {}; + RpsResult result = rps::details::ProgramGetBindingSlot(hProgram, name, sizeof(TContext), &pSlot); + + if (RPS_SUCCEEDED(result)) + { + new (pSlot->pUserContext) TContext(cmdCallback); + pSlot->pfnCallback = TContext::Callback; + pSlot->flags = flags; + } + + return result; +} + +/// @brief Binds a command callback implementation to a node type in an rps program. +/// +/// @param hProgram Handle to the subprogram to bind the node. Must not be RPS_NULL_HANDLE. +/// @param name Null terminated string with the name of the node to bind to. +/// @param pfnCmdCallback Function pointer of type PFN_rpsCmdCallback to be bound to the node as the command +/// recording callback. +/// @param pCallbackContext User defined context to be passed back when the callback is called. +/// @param flags Flags for the callback behavior. +/// +/// @returns Result code of the operation. See RpsResult for more info. +static inline RpsResult rpsProgramBindNode(RpsSubprogram hProgram, + const char* name, + PFN_rpsCmdCallback pfnCmdCallback, + void* pCallbackContext = nullptr, + RpsCmdCallbackFlags flags = RPS_CMD_CALLBACK_FLAG_NONE) +{ + RpsCmdCallback callbackInfo = {}; + callbackInfo.pfnCallback = pfnCmdCallback; + callbackInfo.pUserContext = pCallbackContext; + callbackInfo.flags = flags; + + return rpsProgramBindNodeCallback(hProgram, name, &callbackInfo); +} + +/// @} end addtogroup RpsSubprogram + +#endif //__cplusplus + +/// @} end defgroup Runntime + +#endif //RPS_RUNTIME_H diff --git a/include/rps/runtime/common/rps_runtime_callbacks.h b/include/rps/runtime/common/rps_runtime_callbacks.h new file mode 100644 index 0000000..ac72996 --- /dev/null +++ b/include/rps/runtime/common/rps_runtime_callbacks.h @@ -0,0 +1,231 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_RUNTIME_CALLBACKS_H +#define RPS_RUNTIME_CALLBACKS_H + +#include "rps/runtime/common/rps_format.h" +#include "rps/runtime/common/rps_resource.h" +#include "rps/runtime/common/rps_access.h" +#include "rps/runtime/common/rps_render_states.h" + +#ifdef __cplusplus +extern "C" { +#endif //__cplusplus + +RPS_DECLARE_OPAQUE_HANDLE(RpsRuntimeHeap); +RPS_DECLARE_OPAQUE_HANDLE(RpsRuntimeResource); + +/// @addtogroup RpsRuntimeDevice +/// @{ + +/// @brief Parameters for creating a custom GPU memory heap. +typedef struct RpsRuntimeOpCreateHeapArgs +{ + uint32_t memoryTypeIndex; ///< Index of the type of memory the allocation should be created from. API + /// backend specific. + size_t size; ///< Size of the heap in bytes. + size_t alignment; ///< Alignment of the heap in bytes. + const char* debugName; ///< Null terminated string with the debug name of the heap. Passing NULL sets no name. + RpsRuntimeHeap* phHeap; ///< Pointer to a handle in which the heap is returned. Must not be NULL. +} RpsRuntimeOpCreateHeapArgs; + +/// @brief Parameters for destroying an array of runtime GPU memory heaps. +typedef struct RpsRuntimeOpDestroyHeapArgs +{ + uint32_t numHeaps; ///< Number of heaps to destroy. + RpsRuntimeHeap* phRtHeaps; ///< Pointer to an array of RpsRuntimeHeap with numHeaps heap handles to + /// destroy. Must not be NULL if numHeaps != 0. +} RpsRuntimeOpDestroyHeapArgs; + +/// @brief Parameters for creating a runtime resource. +typedef struct RpsRuntimeOpCreateResourceArgs +{ + RpsResourceId resourceId; ///< ID of the resource declaration. + RpsResourceDesc desc; ///< Resource description. + RpsVariable originalDesc; ///< Umodified resource description as originally defined by the user. + RpsClearValue clearValue; ///< Default value for clearing the resource. + RpsGpuMemoryRequirement allocRequirement; ///< Allocation requirements. + RpsHeapPlacement allocPlacement; ///< Allocation placement parameters. + RpsAccessAttr allAccesses; ///< Combined accesses of the resource. + RpsAccessAttr initialAccess; ///< Inital access of the resource in a frame. + uint32_t numMutableFormats; ///< Number of mutable formats the resource can be used with. + RpsFormat* mutableFormats; ///< Pointer to an array of RpsFormat with numMutableFormats + /// formats the resource can be used with. Must not be NULL if + /// numMutableFormats != 0. + RpsBool bBufferFormattedWrite; ///< Indicator for a formatted texel buffer (maps to + /// VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT). + RpsBool bBufferFormattedRead; ///< Indicator for a formatted texel buffer (maps to + /// VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) + RpsRuntimeResource* phRuntimeResource; ///< Pointer to a handle to the runtime resource to be returned. +} RpsRuntimeOpCreateResourceArgs; + +/// @brief Parameters for destroying an array of runtime resources. +typedef struct RpsRuntimeOpDestroyResourceArgs +{ + RpsResourceType type; ///< Type of the resources. + uint32_t numResources; ///< Number of resources. + const RpsRuntimeResource* phRuntimeResources; ///< Pointer to an array of RpsRuntimeResource with + /// numResources resource handles. + /// Must not be NULL if numResource != 0. +} RpsRuntimeOpDestroyResourceArgs; + +/// @brief Parameters for creating the used defined resources associated with a node. +/// +/// These can be e.g. descriptor set allocations, PSOs or custom viewports. +typedef struct RpsRuntimeOpCreateNodeUserResourcesArgs +{ + void* pUserContext; ///< User defined context to pass to the node. + void* const* ppArgs; ///< Pointer to an array of void* with numArgs arguments to be passed to the + /// node. + uint32_t numArgs; ///< Number of arguments of the node. + uint32_t nodeTag; ///< User defined node tag to be used for node associations. +} RpsRuntimeOpCreateNodeUserResourcesArgs; + +/// @brief Debug marker modes. +typedef enum RpsRuntimeDebugMarkerMode +{ + RPS_RUNTIME_DEBUG_MARKER_BEGIN, ///< Beginning of a marker region. + RPS_RUNTIME_DEBUG_MARKER_LABEL, ///< Standalone label. + RPS_RUNTIME_DEBUG_MARKER_END, ///< End of a marker region. +} RpsRuntimeDebugMarkerMode; + +/// @brief Bitflags for render pass behavior. +typedef enum RpsRuntimeRenderPassFlagBits +{ + RPS_RUNTIME_RENDER_PASS_FLAG_NONE = 0, ///< No special behavior. + RPS_RUNTIME_RENDER_PASS_SUSPENDING = 1 << 0, ///< Suspending render pass in D3D12. + RPS_RUNTIME_RENDER_PASS_RESUMING = 1 << 1, ///< Resuming render pass in D3D12. + RPS_RUNTIME_RENDER_PASS_EXECUTE_SECONDARY_COMMAND_BUFFERS = 1 << 2, ///< Render pass executes secondary command + /// buffers. + RPS_RUNTIME_RENDER_PASS_SECONDARY_COMMAND_BUFFER = 1 << 3, ///< Current render pass is on a secondary + /// command buffer. +} RpsRuntimeRenderPassFlagBits; + +/// @brief Bitmask type for RpsRuntimeRenderPassFlagBits. +typedef RpsFlags32 RpsRuntimeRenderPassFlags; + +/// @brief Parameters for recording a debug marker command. +typedef struct RpsRuntimeOpRecordDebugMarkerArgs +{ + RpsRuntimeCommandBuffer hCommandBuffer; ///< Handle to the runtime commmand buffer to record the command for. + /// Must not be RPS_NULL_HANDLE. + void* pUserRecordContext; ///< User context passed as RpsRenderGraphRecordCommandInfo::pUserContext. + RpsRuntimeDebugMarkerMode mode; ///< Marker mode. + const char* text; ///< String payload of the marker. Ignored for + /// RPS_RUNTIME_DEBUG_MARKER_END. +} RpsRuntimeOpRecordDebugMarkerArgs; + +/// @brief Parameters for setting a resource debug name. +typedef struct RpsRuntimeOpSetDebugNameArgs +{ + RpsRuntimeResource hResource; ///< Handle to the runtime resource. Only support resource objects at the moment. + /// Must not be RPS_NULL_HANDLE. + RpsResourceType resourceType; ///< Resource type. + const char* name; ///< Null terminated string with the resource name. +} RpsRuntimeOpSetDebugNameArgs; + +/// @brief Signature of functions for defining an array of render graph phases. +/// +/// @param pUserContext User defined context. +/// @param hRenderGraph Handle to the render graph to build the phases for. +/// Must not be RPS_NULL_HANDLE. +/// @param ppPhaseInfo Pointer to an array of const RpsRenderGraphPhaseInfo* in which +/// *pNumPhases render graph phase objects are returned. Must not be NULL. +/// @param pNumPhases Pointer to write the number of created phases to. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +typedef RpsResult (*PFN_rpsRuntimeDeviceBuildRenderGraphPhases)(void* pUserContext, + RpsRenderGraph hRenderGraph, + const RpsRenderGraphPhaseInfo** ppPhaseInfo, + uint32_t* pNumPhases); + +/// @brief Signature of functions for destroying a runtime device. +/// +/// @param pUserContext User defined context. +typedef void (*PFN_rpsRuntimeDeviceDestroy)(void* pUserContext); + +/// @brief Signature of functions for creating a runtime heap. +/// +/// @param pUserContext User defined context. +/// @param pArgs Pointer to heap creation parameters. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +typedef RpsResult (*PFN_rpsRuntimeCreateHeap)(void* pUserContext, const RpsRuntimeOpCreateHeapArgs* pArgs); + +/// @brief Signature of functions for destroying runtime heaps. +/// +/// @param pUserContext User defined context. +/// @param pArgs Pointer to heap destruction parameters. Must not be NULL. +typedef void (*PFN_rpsRuntimeDestroyHeap)(void* pUserContext, const RpsRuntimeOpDestroyHeapArgs* pArgs); + +/// @brief Signature of functions for creating a runtime resource. +/// +/// @param pUserContext User defined context. +/// @param pArgs Pointer to resource creation parameters. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +typedef RpsResult (*PFN_rpsRuntimeCreateResource)(void* pUserContext, const RpsRuntimeOpCreateResourceArgs* pArgs); + +/// @brief Signature of functions for destroying runtime resources. +/// +/// @param pUserContext User defined context. +/// @param pArgs Pointer to heap destruction parameters. Must not be NULL. +typedef void (*PFN_rpsRuntimeDestroyResource)(void* pUserContext, const RpsRuntimeOpDestroyResourceArgs* pArgs); + +/// @brief Creates the user defined resources associated with a node. +/// +/// These can be e.g. descriptor set allocations, PSOs or custom viewports. +/// +/// @param pUserContext User defined context. +/// @param pArgs Pointer to node arg creation parameters. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +typedef RpsResult (*PFN_rpsRuntimeOpCreateNodeUserResources)(void* pUserContext, + const RpsRuntimeOpCreateNodeUserResourcesArgs* pArgs); + +/// @brief Destroys the user defined resources associated with a node. +/// +/// These can be e.g. descriptor set allocations, PSOs or custom viewports. +/// +/// @param pUserContext User defined context. +typedef void (*PFN_rpsRuntimeOpDestroyNodeUserResources)(void* pUserContext); + +/// @brief Signature of functions for recording runtime debug markers. +/// +/// @param pUserContext User defined context. +/// @param pArgs Pointer to debug marker parameters. Must not be NULL. +typedef void (*PFN_rpsRuntimeOpRecordDebugMarker)(void* pUserContext, const RpsRuntimeOpRecordDebugMarkerArgs* pArgs); + +/// @brief Signature of functions for setting runtime debug names. +/// +/// @param pUserContext User defined context. +/// @param pArgs Pointer to debug name parameters. Must not be NULL. +typedef void (*PFN_rpsRuntimeOpSetDebugName)(void* pUserContext, const RpsRuntimeOpSetDebugNameArgs* pArgs); + +/// @brief Callback functions of a runtime. +typedef struct RpsRuntimeCallbacks +{ + PFN_rpsRuntimeDeviceBuildRenderGraphPhases pfnBuildRenderGraphPhases; ///< Render graph phase build callback. + PFN_rpsRuntimeDeviceDestroy pfnDestroyRuntime; ///< Runtime destruction callback. + PFN_rpsRuntimeCreateHeap pfnCreateHeap; ///< Heap creation callback. + PFN_rpsRuntimeDestroyHeap pfnDestroyHeap; ///< Heap destruction callback. + PFN_rpsRuntimeCreateResource pfnCreateResource; ///< Resource creation callback. + PFN_rpsRuntimeDestroyResource pfnDestroyResource; ///< Resource destruction callback. + PFN_rpsRuntimeOpCreateNodeUserResources pfnCreateNodeResources; ///< Node resource creation callback. + PFN_rpsRuntimeOpDestroyNodeUserResources pfnDestroyNodeResources; ///< Node argument destruction callback. + PFN_rpsRuntimeOpRecordDebugMarker pfnRecordDebugMarker; ///< Debug marker recording callback. + PFN_rpsRuntimeOpSetDebugName pfnSetDebugName; ///< Debug name setting callback. +} RpsRuntimeCallbacks; + +/// @} end addtogroup RpsRuntimeDevice + +#ifdef __cplusplus +} +#endif //__cplusplus + +#endif //RPS_RUNTIME_CALLBACKS_H diff --git a/include/rps/runtime/d3d11/rps_d3d11_runtime.h b/include/rps/runtime/d3d11/rps_d3d11_runtime.h new file mode 100644 index 0000000..7e0643b --- /dev/null +++ b/include/rps/runtime/d3d11/rps_d3d11_runtime.h @@ -0,0 +1,347 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_RHI_D3D11_H_ +#define _RPS_RHI_D3D11_H_ + +#include "rps/core/rps_api.h" +#include "rps/runtime/common/rps_runtime.h" +#include "rps/runtime/d3d_common/rps_d3d_common.h" + +#include + +/// @addtogroup RpsD3D11RuntimeDevice +/// @{ + +/// @brief Bitflags for D3D11 runtime behavior. +/// +/// For future use. +typedef enum RpsD3D11RuntimeFlagBits +{ + RPS_D3D11_RUNTIME_FLAG_NONE = 0, ///< No runtime flags. +} RpsD3D11RuntimeFlagBits; + +/// @brief Bitmask type for RpsD3D11RuntimeFlagBits. +typedef uint32_t RpsD3D11RuntimeFlags; + +/// @brief Creation parameters for an RPS device with D3D11 backend. +typedef struct RpsD3D11RuntimeDeviceCreateInfo +{ + const RpsDeviceCreateInfo* pDeviceCreateInfo; ///< Pointer to general RPS device creation parameters. + /// Passing NULL uses default parameters instead. + const RpsRuntimeDeviceCreateInfo* pRuntimeCreateInfo; ///< Pointer to general RPS runtime creation info. Passing + /// NULL uses default parameters instead. + ID3D11Device* pD3D11Device; ///< Pointer to the D3D11 device to use for the runtime. + /// Must not be NULL. + RpsD3D11RuntimeFlags flags; ///< D3D11 runtime flags. +} RpsD3D11RuntimeDeviceCreateInfo; + +/// @} end addtogroup RpsD3D11RuntimeDevice + +#ifdef __cplusplus +extern "C" { +#endif //__cplusplus + +/// @brief Creates a D3D11 runtime device. +/// +/// @param pCreateInfo Pointer to creation parameters. Must not be NULL. +/// @param phDevice Pointer to a handle in which the device is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +/// @ingroup RpsD3D11RuntimeDevice +RpsResult rpsD3D11RuntimeDeviceCreate(const RpsD3D11RuntimeDeviceCreateInfo* pCreateInfo, RpsDevice* phDevice); + +/// @addtogroup RpsRenderGraphCommandRecordingD3D11 +/// @{ + +/// @brief Mapping between RpsRuntimeCommandBuffer and ID3D11DeviceContext*. +RPS_IMPL_OPAQUE_HANDLE(D3D11DeviceContext, RpsRuntimeCommandBuffer, ID3D11DeviceContext); + +/// @brief Mapping between RpsRuntimeResource and ID3D11Resource*. +RPS_IMPL_OPAQUE_HANDLE(D3D11Resource, RpsRuntimeResource, ID3D11Resource); + +/// @brief Gets an array of resource pointers from a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the resource pointers from. Must be a resource +/// array argument if numResources > 1. +/// @param srcArrayOffset Offset to the first resource pointer to get. +/// @param ppResources Pointer to an array of ID3D11Resource* in which the numResources +/// resource pointers are returned. Must not be NULL if numResources != 0. +/// @param numResources Number of resource pointers to get. Requires srcArrayOffset + numResources to be +/// less than the number of elements in the node argument. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsD3D11GetCmdArgResourceArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + ID3D11Resource** ppResources, + uint32_t numResources); + +/// @brief Get a resource pointer from a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the resource pointer from. Must be a resource +/// argument. +/// @param ppResource Pointer in which the resource pointer is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsD3D11GetCmdArgResource(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + ID3D11Resource** ppResource); + +/// @brief Gets an array of render target view pointers from a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the render target view pointers from. Must be a +/// resource array argument if numRTVs > 1. +/// @param srcArrayOffset Offset to the first render target view pointer to get. +/// @param ppRTVs Pointer to an array of ID3D11RenderTargetView* in which the +/// numRTVs render target view pointers are returned. Must not be NULL if +/// numRTVs != 0. +/// @param numRTVs Number of render target view pointers to get. Requires srcArrayOffset + numRTVs +/// to be less than the number of elements in the node argument. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsD3D11GetCmdArgRTVArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + ID3D11RenderTargetView** ppRTVs, + uint32_t numRTVs); + +/// @brief Get a render target view pointer from a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the render target view from. Must be a +/// resource argument. +/// @param ppRTV Pointer in which the render target view pointer is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsD3D11GetCmdArgRTV(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + ID3D11RenderTargetView** ppRTV); + +/// @brief Gets an array of depth stencil view pointers from a depth stencil image node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the depth stencil views from. Must be a depth +/// stencil image array argument if numDSVs > 1. +/// @param srcArrayOffset Offset to the first depth stencil view pointer to get. +/// @param ppDSVs Pointer to an array of ID3D11DepthStencilView* in which the +/// numDSVs depth stencil view pointers are returned. Must not be NULL if +/// numDSVs != 0. +/// @param numDSVs Number of depth stencil view pointers to get. Requires +/// srcArrayOffset + numDSVs to be less than the number of elements in the node +/// argument. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsD3D11GetCmdArgDSVArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + ID3D11DepthStencilView** ppDSVs, + uint32_t numDSVs); + +/// @brief Get a depth stencil view pointer from a depth stencil image node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the depth stencil view from. Must be a depth +/// stencil image argument. +/// @param ppDSV Pointer in which the depth stencil view pointer is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsD3D11GetCmdArgDSV(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + ID3D11DepthStencilView** ppDSV); + +/// @brief Gets an array of shader resource view pointers from a resource array node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the shader resource views from. Must be a resource +/// array argument if numSRVs > 1. +/// @param srcArrayOffset Offset to the first shader resource view to get. +/// @param ppSRVs Pointer in which array of ID3D11ShaderResourceView* in which the +/// numSRVs shader resource view pointers are returned. Must not be NULL if +/// numSRVs != 0. +/// @param numSRVs Number of shader resource view pointers to get. Requires +/// srcArrayOffset + numSRVs to be less than the number of elements in the node +/// argument. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsD3D11GetCmdArgSRVArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + ID3D11ShaderResourceView** ppSRVs, + uint32_t numSRVs); + +/// @brief Get a shader resource view pointer from a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the shader resource view from. Must be a resource +/// argument. +/// @param ppSRV Pointer in which the shader resource view pointer is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsD3D11GetCmdArgSRV(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + ID3D11ShaderResourceView** ppSRV); + +/// @brief Gets an array of unordered access view pointers from a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the unordered access views from. Must be a resource +/// array argument if numUAVs > 1. +/// @param srcArrayOffset Offset to the first unordered access view pointer to get. +/// @param ppUAVs Pointer to an array of ID3D11UnorderedAccessView* in which the +/// numUAVs unordered access view pointers are returned. Must not be NULL if +/// numUAVs != 0. +/// @param numUAVs Number of unordered access view pointers to get. Requires +/// srcArrayOffset + numUAVs to be less than the number of elements in the node +/// argument. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsD3D11GetCmdArgUAVArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + ID3D11UnorderedAccessView** ppUAVs, + uint32_t numUAVs); + +/// @brief Get an unordered access view pointer from a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the unordered access view from. Must be a resource +/// argument. +/// @param ppUAV Pointer in which the unordered access view pointer is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsD3D11GetCmdArgUAV(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + ID3D11UnorderedAccessView** ppUAV); + +/// @} end addtogroup RpsRenderGraphCommandRecordingD3D11 + +#ifdef __cplusplus +} +#endif //__cplusplus + +#ifdef __cplusplus + +#include "rps/core/rps_cmd_callback_wrapper.hpp" + +namespace rps +{ + namespace details + { + template + struct CommandArgUnwrapper + { + ID3D11Resource* operator()(const RpsCmdCallbackContext* pContext) + { + ID3D11Resource* resource = nullptr; + RpsResult result = rpsD3D11GetCmdArgResource(pContext, Index, &resource); + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + return resource; + } + }; + + template + struct CommandArgUnwrapper + { + ID3D11Texture1D* operator()(const RpsCmdCallbackContext* pContext) + { + ID3D11Resource* resource = nullptr; + RpsResult result = rpsD3D11GetCmdArgResource(pContext, Index, &resource); + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + return static_cast(resource); + } + }; + + template + struct CommandArgUnwrapper + { + ID3D11Texture2D* operator()(const RpsCmdCallbackContext* pContext) + { + ID3D11Resource* resource = nullptr; + RpsResult result = rpsD3D11GetCmdArgResource(pContext, Index, &resource); + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + return static_cast(resource); + } + }; + + template + struct CommandArgUnwrapper + { + ID3D11Texture3D* operator()(const RpsCmdCallbackContext* pContext) + { + ID3D11Resource* resource = nullptr; + RpsResult result = rpsD3D11GetCmdArgResource(pContext, Index, &resource); + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + return static_cast(resource); + } + }; + + template + struct CommandArgUnwrapper + { + ID3D11Buffer* operator()(const RpsCmdCallbackContext* pContext) + { + ID3D11Resource* resource = nullptr; + RpsResult result = rpsD3D11GetCmdArgResource(pContext, Index, &resource); + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + return static_cast(resource); + } + }; + + template + struct CommandArgUnwrapper + { + ID3D11ShaderResourceView* operator()(const RpsCmdCallbackContext* pContext) + { + ID3D11ShaderResourceView* pSRV; + RpsResult result = rpsD3D11GetCmdArgSRV(pContext, Index, &pSRV); + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + return pSRV; + } + }; + + template + struct CommandArgUnwrapper + { + ID3D11UnorderedAccessView* operator()(const RpsCmdCallbackContext* pContext) + { + ID3D11UnorderedAccessView* pUAV; + RpsResult result = rpsD3D11GetCmdArgUAV(pContext, Index, &resource); + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + return pUAV; + } + }; + } // namespace details +} // namespace rps + +#endif //__cplusplus + +#endif //_RPS_RHI_D3D11_H_ diff --git a/include/rps/runtime/d3d12/rps_d3d12_runtime.h b/include/rps/runtime/d3d12/rps_d3d12_runtime.h new file mode 100644 index 0000000..8633ceb --- /dev/null +++ b/include/rps/runtime/d3d12/rps_d3d12_runtime.h @@ -0,0 +1,250 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_RHI_D3D12_H_ +#define _RPS_RHI_D3D12_H_ + +#include "rps/core/rps_api.h" +#include "rps/runtime/common/rps_runtime.h" +#include "rps/runtime/d3d_common/rps_d3d_common.h" + +#include + +#if D3D12_SDK_VERSION >= 606 +#define RPS_D3D12_MSAA_UAV_SUPPORT 1 +#define RPS_D3D12_ENHANCED_BARRIER_SUPPORT 1 +#endif + +#if D3D12_SDK_VERSION >= 600 +#define RPS_D3D12_FEATURE_D3D12_OPTIONS12_DEFINED 1 +#endif + +/// @addtogroup RpsD3D12RuntimeDevice +/// @{ + +/// @brief Bitflags for D3D12 runtime behavior. +typedef enum RpsD3D12RuntimeFlagBits +{ + RPS_D3D12_RUNTIME_FLAG_NONE = 0, ///< No flags. + RPS_D3D12_RUNTIME_FLAG_FORCE_RESOURCE_HEAP_TIER1 = 1 << 0, ///< Forces the runtime to behave as if the D3D12 device + /// only supports D3D12_RESOURCE_HEAP_TIER_1. + RPS_D3D12_RUNTIME_FLAG_PREFER_RENDER_PASS = 1 << 1, ///< Prefers using render passes. + RPS_D3D12_RUNTIME_FLAG_PREFER_ENHANCED_BARRIERS = 1 << 2, ///< Prefers using enhanced barriers. +} RpsD3D12RuntimeFlagBits; + +/// @brief Bitmask type for RpsD3D12RuntimeFlagBits +typedef uint32_t RpsD3D12RuntimeFlags; + +/// @brief Creation parameters for an RPS device with a d3d12 backend. +typedef struct RpsD3D12RuntimeDeviceCreateInfo +{ + const RpsDeviceCreateInfo* pDeviceCreateInfo; ///< Pointer to general RPS device creation parameters. + /// Passing NULL uses default parameters instead. + const RpsRuntimeDeviceCreateInfo* pRuntimeCreateInfo; ///< Pointer to general RPS runtime creation info. Passing + /// NULL uses default parameters instead. + ID3D12Device* pD3D12Device; ///< Pointer to the D3D12 device to use for the runtime. + /// Must not be NULL. + RpsD3D12RuntimeFlags flags; ///< D3D13 runtime flags. +} RpsD3D12RuntimeDeviceCreateInfo; + +/// @brief Indices for D3D12 heap/memory types. +typedef enum RpsD3D12HeapTypeIndex +{ + RPS_D3D12_HEAP_TYPE_INDEX_UPLOAD = 0, ///< Maps to D3D12_HEAP_TYPE_UPLOAD. + RPS_D3D12_HEAP_TYPE_INDEX_READBACK, ///< Maps to D3D12_HEAP_TYPE_READBACK. + RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT, ///< Maps to D3D12_HEAP_TYPE_DEFAULT. + RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT_MSAA, ///< Maps to D3D12_HEAP_TYPE_DEFAULT with MSAA support. + RPS_D3D12_HEAP_TYPE_COUNT_TIER_2, ///< Number of heap types for D3D12_RESOURCE_HEAP_TIER_2. + + /// Maps to D3D12_HEAP_TYPE_DEFAULT with the D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES flag set. + RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT_TIER_1_RT_DS_TEXTURE = RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT, + + /// Maps to D3D12_HEAP_TYPE_DEFAULT with the D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES flag set and MSAA support. + RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT_TIER_1_RT_DS_TEXTURE_MSAA = RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT_MSAA, + + /// Maps to D3D12_HEAP_TYPE_DEFAULT with the D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS flag set. + RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT_TIER_1_BUFFER, + + /// Maps to D3D12_HEAP_TYPE_DEFAULT with the D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES flag set. + RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT_TIER_1_NON_RT_DS_TEXTURE, + + /// Number of heap types for D3D12_RESOURCE_HEAP_TIER_1. + RPS_D3D12_HEAP_TYPE_COUNT_TIER_1, + + ///Total number of heap types. + RPS_D3D12_HEAP_TYPE_COUNT_MAX = RPS_D3D12_HEAP_TYPE_COUNT_TIER_1, +} RpsD3D12HeapTypeIndex; + +/// @} end addtogroup RpsD3D12RuntimeDevice + +#ifdef __cplusplus +extern "C" { +#endif //__cplusplus + +/// @brief Creates a D3D12 runtime device. +/// +/// @param pCreateInfo Pointer to creation parameters. Must not be NULL. +/// @param phDevice Pointer to a handle in which the device is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +/// @ingroup RpsD3D12RuntimeDevice +RpsResult rpsD3D12RuntimeDeviceCreate(const RpsD3D12RuntimeDeviceCreateInfo* pCreateInfo, RpsDevice* phDevice); + +/// @addtogroup RpsRenderGraphCommandRecordingD3D12 +/// @{ + +/// @brief Mapping between RpsRuntimeCommandBuffer and ID3D12GraphicsCommandList*. +RPS_IMPL_OPAQUE_HANDLE(D3D12CommandList, RpsRuntimeCommandBuffer, ID3D12GraphicsCommandList); + +/// @brief Mapping between RpsRuntimeCommandBuffer and ID3D12GraphicsCommandList1*. +RPS_IMPL_OPAQUE_HANDLE(D3D12CommandList1, RpsRuntimeCommandBuffer, ID3D12GraphicsCommandList1); + +/// @brief Mapping between RpsRuntimeResource and ID3D12Resource*. +RPS_IMPL_OPAQUE_HANDLE(D3D12Resource, RpsRuntimeResource, ID3D12Resource); + +/// @brief Mapping between RpsRuntimeHeap and ID3D12Heap*. +RPS_IMPL_OPAQUE_HANDLE(D3D12Heap, RpsRuntimeHeap, ID3D12Heap); + +/// @brief Helper function for converting a D3D12_RESOURCE_DESC structure to an RpsResourceDesc structure. +/// +/// @param pD3D12Desc A pointer to the input D3D12_RESOURCE_DESC structure. +/// @param pRpsDesc A pointer to the output RpsResourceDesc structure. +/// +/// @return Result code of the operation. See RpsResult for more info. +RpsResult rpsD3D12ResourceDescToRps(const D3D12_RESOURCE_DESC* pD3D12Desc, RpsResourceDesc* pRpsDesc); + +/// @brief Gets an array of resource pointers from a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the resource pointers from. Must be a resource +/// array argument if numResources > 1. +/// @param srcArrayOffset Offset to the first resource pointer to get. +/// @param ppResources Pointer to an array of ID3D12Resource* in which the numResources +/// resource pointers are returned. Must not be NULL if numResources != 0. +/// @param numResources Number of resources to get. Requires srcArrayOffset + numResources to be less +/// than the number of elements in the node argument. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsD3D12GetCmdArgResourceArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + ID3D12Resource** ppResources, + uint32_t numResources); + +/// @brief Get a resource from a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the resource pointer from. +/// @param ppResource Pointer in which the resource pointer is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsD3D12GetCmdArgResource(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + ID3D12Resource** ppResource); + +/// @brief Gets an array of D3D12 CPU descriptor handles from a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the descriptor handle from. Must be a resource +/// array argument if numHandles > 1. +/// @param srcArrayOffset Offset to the first descriptor handle to get. +/// @param pHandles Pointer to an array of ID3D12_CPU_DESCRIPTOR_HANDLE in which the +/// descriptor handles are returned. Must not be NULL if numHandles != 0. +/// @param numHandles Number of descriptor handles to get. Requires srcArrayOffset + numHandles to be +/// less than the number of elements in the node argument. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsD3D12GetCmdArgDescriptorArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + D3D12_CPU_DESCRIPTOR_HANDLE* pHandles, + uint32_t numHandles); + +/// @brief Get a CPU descriptor handle from a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the descriptor handle from. Must be a resource +/// argument. +/// @param pHandle Pointer in which the descriptor handle is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsD3D12GetCmdArgDescriptor(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + D3D12_CPU_DESCRIPTOR_HANDLE* pHandle); + +/// @brief Copies an array of D3D12 CPU descriptor handles from a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to copy the descriptor content from. Must be a resource +/// array argument if numHandles > 1. +/// @param srcArrayOffset Offset to the first descriptor handle to copy. +/// @param numHandles Number of descriptor handles to copy. Requires srcArrayOffset + numHandles to be +/// less than the number of elements in the node argument. +/// @param singleHandleToArray If true, pDstHandles points only to a single destination handle which refers to a +/// continuous range of descriptors in a descriptor heap. If false, pDstHandles +/// points to an array of descriptor handles possibly referring to independent +/// locations or heaps. +/// @param pDstHandles Pointer to an array of descriptors to copy to. +/// Must not be NULL if numHandles != 0. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsD3D12CopyCmdArgDescriptors(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + uint32_t numHandles, + RpsBool singleHandleToArray, + D3D12_CPU_DESCRIPTOR_HANDLE* pDstHandles); + +/// @} end addtogroup RpsRenderGraphCommandRecordingD3D12 + +#ifdef __cplusplus +} +#endif //__cplusplus + +#ifdef __cplusplus + +#include "rps/core/rps_cmd_callback_wrapper.hpp" + +namespace rps +{ + namespace details + { + template + struct CommandArgUnwrapper + { + ID3D12Resource* operator()(const RpsCmdCallbackContext* pContext) + { + ID3D12Resource* resource = nullptr; + RpsResult result = rpsD3D12GetCmdArgResource(pContext, Index, &resource); + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + return resource; + } + }; + + template + struct CommandArgUnwrapper + { + D3D12_CPU_DESCRIPTOR_HANDLE operator()(const RpsCmdCallbackContext* pContext) + { + D3D12_CPU_DESCRIPTOR_HANDLE resourceView; + RpsResult result = rpsD3D12GetCmdArgDescriptor(pContext, Index, &resourceView); + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + return resourceView; + } + }; + } // namespace details +} // namespace rps + +#endif //__cplusplus + +#endif //_RPS_RHI_D3D12_H_ diff --git a/include/rps/runtime/d3d_common/rps_d3d_common.h b/include/rps/runtime/d3d_common/rps_d3d_common.h new file mode 100644 index 0000000..c5b532d --- /dev/null +++ b/include/rps/runtime/d3d_common/rps_d3d_common.h @@ -0,0 +1,48 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_D3D_COMMON_H_ +#define _RPS_D3D_COMMON_H_ + +#include + +#include "rps/runtime/common/rps_format.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/// @addtogroup RpsFormat +/// @{ + +/// @brief Converts an RPS format to a DXGI format. +/// +/// @param rpsFormat RPS format to convert. +/// +/// @returns Converted DXGI format. +static inline DXGI_FORMAT rpsFormatToDXGI(RpsFormat rpsFormat) +{ + return (DXGI_FORMAT)rpsFormat; +} + +/// @brief Converts a DXGI format to an RPS format. +/// +/// @param dxgiFormat DXGI format to convert. +/// +/// @returns Converted RPS format. +static inline RpsFormat rpsFormatFromDXGI(DXGI_FORMAT dxgiFormat) +{ + return (RpsFormat)dxgiFormat; +} + +/// @} end addtogroup RpsFormat + +#ifdef __cplusplus +} +#endif + +#endif //_RPS_D3D_COMMON_H_ diff --git a/include/rps/runtime/vk/rps_vk_runtime.h b/include/rps/runtime/vk/rps_vk_runtime.h new file mode 100644 index 0000000..e3e8c8b --- /dev/null +++ b/include/rps/runtime/vk/rps_vk_runtime.h @@ -0,0 +1,408 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_VK_RUNTIME_H +#define RPS_VK_RUNTIME_H + +#include "rps/core/rps_api.h" +#include "rps/runtime/common/rps_runtime.h" + +#include + +/// @addtogroup RpsVKRuntimeDevice +/// @{ + +/// @brief Bitflags for VK runtime behavior. +typedef enum RpsVKRuntimeFlagBits +{ + RPS_VK_RUNTIME_FLAG_NONE = 0, ///< No special behavior. + RPS_VK_RUNTIME_FLAG_PREFER_RENDER_PASS = 1 << 1, ///< Prefers using render passes. + RPS_VK_RUNTIME_FLAG_DONT_FLIP_VIEWPORT = 1 << 2, ///< Disables viewport flipping. By default RPS flips the viewport + /// when the automatic viewport setup is enabled for a command + /// node to match the D3D12 backend behavior. +} RpsVKRuntimeFlagBits; + +/// @brief Bitmask type for RpsVKRuntimeFlagBits +typedef uint32_t RpsVKRuntimeFlags; + +/// @brief Creation parameters for an RPS device with a Vulkan backend. +typedef struct RpsVKRuntimeDeviceCreateInfo +{ + const RpsDeviceCreateInfo* pDeviceCreateInfo; ///< Pointer to general RPS device creation parameters. + /// Passing NULL uses default parameters instead. + const RpsRuntimeDeviceCreateInfo* pRuntimeCreateInfo; ///< Pointer to general RPS runtime creation info. Passing + /// NULL uses default parameters instead. + VkDevice hVkDevice; ///< Handle to the VK device to use for the runtime. Must not + /// be VK_NULL_HANDLE. + VkPhysicalDevice hVkPhysicalDevice; ///< Handle to the VK physical device to use for the runtime. + /// Must not be VK_NULL_HANDLE. + RpsVKRuntimeFlags flags; ///< VK runtime flags. +} RpsVKRuntimeDeviceCreateInfo; + +/// @} end addtogroup RpsVKRuntimeDevice + +#ifdef __cplusplus +extern "C" { +#endif //__cplusplus + +/// @brief Creates a VK runtime device. +/// +/// @param pCreateInfo Pointer to creation parameters. Must not be NULL. +/// @param phDevice Pointer to a handle in which the device is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +/// @ingroup RpsVKRuntimeDevice +RpsResult rpsVKRuntimeDeviceCreate(const RpsVKRuntimeDeviceCreateInfo* pCreateInfo, RpsDevice* phDevice); + +/// @addtogroup RpsRenderGraphCommandRecordingVK +/// @{ + +/// @brief Mapping between RpsRuntimeCommandBuffer and VkCommandBuffer. +RPS_IMPL_OPAQUE_HANDLE(VKCommandBuffer, RpsRuntimeCommandBuffer, VkCommandBuffer_T); + +/// @brief Mapping between RpsRuntimeResource and VkImage. +RPS_IMPL_OPAQUE_HANDLE(VKImage, RpsRuntimeResource, VkImage_T); + +/// @brief Mapping between RpsRuntimeResource and VkBuffer. +RPS_IMPL_OPAQUE_HANDLE(VKBuffer, RpsRuntimeResource, VkBuffer_T); + +/// @brief Mapping between RpsRuntimeHeap and VkDeviceMemory. +RPS_IMPL_OPAQUE_HANDLE(VKMemory, RpsRuntimeHeap, VkDeviceMemory_T); + +/// @brief Gets an array of VK image view handles from an image resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the image view handles from. Must be an image +/// resource array argument if numImageViews > 1. +/// @param srcArrayOffset Offset to the first image view handle to get. +/// @param pImageViews Pointer to an array of VkImageView in which the numImageViews +/// image view handles are returned. Must not be NULL if numImageViews != 0. +/// @param numImageViews Number of image view handles to get. Requires srcArrayOffset + numImageViews to +/// be less than the number of elements in the node argument. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsVKGetCmdArgImageViewArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + VkImageView* pImageViews, + uint32_t numImageViews); + +/// @brief Gets a VK image view handle from an image resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the image view from. Must be an image resource +/// argument. +/// @param pImageView Pointer in which the image view handle is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsVKGetCmdArgImageView(const RpsCmdCallbackContext* pContext, uint32_t argIndex, VkImageView* pImageView); + +/// @brief Parameters of a VK image view info. +typedef struct RpsVkImageViewInfo +{ + VkImageView hImageView; ///< Handle to the image view. + VkImageLayout layout; ///< Layout of the viwed image. +} RpsVkImageViewInfo; + +/// @brief Gets an array of VK image view infos from an image resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the image view infos from. Must be an image +/// resource array argument if numImageViewInfos > 1. +/// @param srcArrayOffset Offset to the first image view info to get. +/// @param pImageViewInfos Pointer to an array of RpsVkImageViewInfo in which the +/// numImageViewInfos image view infos are returned. Must not be NULL if +/// numImageViewInfos != 0. +/// @param numImageViewInfos Number of image view infos to get. Requires srcArrayOffset + numImageViewInfos +/// to be less than the number of elements in the node argument. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsVKGetCmdArgImageViewInfoArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + RpsVkImageViewInfo* pImageViewInfos, + uint32_t numImageViewInfos); + +/// @brief Gets a VK image view info from an image view node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the image view info from. Must be an image resource +/// argument. +/// @param pImageViewInfo Pointer in which the image view info is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsVKGetCmdArgImageViewInfo(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + RpsVkImageViewInfo* pImageViewInfo); + +/// @brief Gets an array of VK image handles from an image resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the image view infos from. Must be an image +/// resource array argument if numImages > 1. +/// @param srcArrayOffset Offset to the first image handle to get. +/// @param pImages Pointer to an array of VkImage in which the numImages image +/// handles are returned. Must not be NULL if numImages != 0. +/// @param numImages Number of image handles to get. Requires srcArrayOffset + numImages to be +/// less than the number of elements in the node argument. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsVKGetCmdArgImageArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + VkImage* pImages, + uint32_t numImages); + +/// @brief Gets a VK image handle from an image resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the image handle info from. Must be an image +/// resource argument. +/// @param pImage Pointer in which the image handle is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsVKGetCmdArgImage(const RpsCmdCallbackContext* pContext, uint32_t argIndex, VkImage* pImage); + +/// @brief Gets an array of VK buffer view handles from a buffer resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the buffer view handle from. Must be a buffer +/// resource array argument if numBufferViews > 1. +/// @param srcArrayOffset Offset to the first buffer view handle to get. +/// @param pBufferViews Pointer to an array of VkBufferView in which the numBufferViews +/// buffer view handles are returned. Must not be NULL if numBufferViews != 0. +/// @param numBufferViews Number of buffer view handles to get. Requires srcArrayOffset + numBufferViews +/// to be less than the number of elements in the node argument. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsVKGetCmdArgBufferViewArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + VkBufferView* pBufferViews, + uint32_t numBufferViews); + +/// @brief Gets a VK buffer view handle from an buffer resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the buffer view handle from. Must be a buffer +/// resource argument. +/// @param pBufferViews Pointer in which the buffer view handle is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsVKGetCmdArgBufferView(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + VkBufferView* pBufferViews); + +/// @brief Gets an array of VK buffer handles from a buffer resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the image view infos from. Must be a buffer +/// resource array argument if numBuffers > 1. +/// @param srcArrayOffset Offset to the first buffer handle to get. +/// @param pBuffers Pointer to an array of VkBuffer in which the numBuffers buffer +/// handles are returned. Must not be NULL if numBuffers != 0. +/// @param numBuffers Number of buffer handles to get. Requires srcArrayOffset + numBuffers to be +/// less than the number of elements in the node argument. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsVKGetCmdArgBufferArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + VkBuffer* pBuffers, + uint32_t numBuffers); + +/// @brief Gets a VK buffer handle from a buffer resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the buffer handle from. Must be a buffer resource +/// argument. +/// @param pBuffer Pointer in which the buffer handle is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsVKGetCmdArgBuffer(const RpsCmdCallbackContext* pContext, uint32_t argIndex, VkBuffer* pBuffer); + +/// @brief Parameters of a VK memory range. +typedef struct RpsVkDeviceMemoryRange +{ + VkDeviceMemory hMemory; ///< Handle to the device memory. + size_t offset; ///< Offset into the device memory in bytes. + size_t size; ///< Size of the range in bytes. +} RpsVkDeviceMemoryRange; + +/// @brief Gets an array of VK memory ranges from a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the memory ranges from. Must be a resource array +/// argument if numRanges > 1. +/// @param srcArrayOffset Offset to the first memory range to get. Pointer to an array of +/// RpsVkDeviceMemoryRange in which the numRanges memory ranges are +/// returned. Must not be NULL if numRanges != 0. +/// @param pMemoryRanges Pointer to an array of RpsVkDeviceMemoryRange in which the +/// numRanges memory ranges are returned. Must not be NULL if numRanges != 0. +/// @param numRanges Number of memory ranges to get. Requires srcArrayOffset + numRanges to be +/// less than the number of elements in the node argument. +/// +/// @returns Result code of the operation. See RpsResult for more info. + +RpsResult rpsVKGetCmdArgGpuMemoryArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + RpsVkDeviceMemoryRange* pMemoryRanges, + uint32_t numRanges); + +/// @brief Gets a VK memory range from a resource node argument. +/// +/// @param pContext Callback context of the current command. +/// @param argIndex Index of the argument to get the memory range from. Must be a resource argument. +/// @param pMemoryRange Pointer in which the memory range is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsVKGetCmdArgGpuMemory(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + RpsVkDeviceMemoryRange* pMemoryRange); + +/// @brief Gets the render pass handle of the current node. +/// +/// @param pContext Callback context of the current command. +/// @param pRenderPass Pointer in which the render pass handle is returned. Must not be NULL. +/// +/// @returns Result code of the operation. See RpsResult for more info. +RpsResult rpsVKGetCmdRenderPass(const RpsCmdCallbackContext* pContext, VkRenderPass* pRenderPass); + +/// @} end addtogroup RpsRenderGraphCommandRecordingVK + +/// @addtogroup RpsFormat +/// @{ + +/// @brief Converts an RPS format to a VK format. +/// +/// @param rpsFormat RPS format to convert. +/// +/// @returns Converted VK format. +VkFormat rpsFormatToVK(RpsFormat rpsFormat); + +/// @brief Converts a VK format to an RPS format. +/// +/// @param vkFormat VK format to convert. +/// +/// @returns Converted RPS format. +RpsFormat rpsFormatFromVK(VkFormat vkFormat); + +/// @} end addtogroup RpsFormat + +#ifdef __cplusplus +} +#endif //__cplusplus + +#ifdef __cplusplus + +#include "rps/core/rps_cmd_callback_wrapper.hpp" + +namespace rps +{ + namespace details + { + template + struct CommandArgUnwrapper + { + VkImage operator()(const RpsCmdCallbackContext* pContext) + { + VkImage image = VK_NULL_HANDLE; + RpsResult result = rpsVKGetCmdArgImage(pContext, Index, &image); + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + return image; + } + }; + + template + struct CommandArgUnwrapper + { + VkBuffer operator()(const RpsCmdCallbackContext* pContext) + { + VkBuffer buffer = VK_NULL_HANDLE; + RpsResult result = rpsVKGetCmdArgBuffer(pContext, Index, &buffer); + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + return buffer; + } + }; + + template + struct CommandArgUnwrapper + { + VkImageView operator()(const RpsCmdCallbackContext* pContext) + { + VkImageView imageView; + RpsResult result = rpsVKGetCmdArgImageView(pContext, Index, &imageView); + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + return imageView; + } + }; + + template + struct CommandArgUnwrapper + { + VkBufferView operator()(const RpsCmdCallbackContext* pContext) + { + VkBufferView bufferView; + RpsResult result = rpsVKGetCmdArgBufferView(pContext, Index, &bufferView); + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + return bufferView; + } + }; + + template + struct CommandArgUnwrapper + { + RpsVkDeviceMemoryRange operator()(const RpsCmdCallbackContext* pContext) + { + RpsVkDeviceMemoryRange memoryRange; + RpsResult result = rpsVKGetCmdArgGpuMemory(pContext, Index, &memoryRange); + + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + + return memoryRange; + } + }; + + template + struct CommandArgUnwrapper + { + RpsVkImageViewInfo operator()(const RpsCmdCallbackContext* pContext) + { + RpsVkImageViewInfo imageViewInfo; + RpsResult result = rpsVKGetCmdArgImageViewInfo(pContext, Index, &imageViewInfo); + + if (RPS_FAILED(result)) + { + rpsCmdCallbackReportError(pContext, result); + } + + return imageViewInfo; + } + }; + + } // namespace details +} // namespace rps + +#endif //__cplusplus + +#endif //RPS_VK_RUNTIME_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..6daba40 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,21 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# This file is part of the AMD Render Pipeline Shaders SDK which is +# released under the AMD INTERNAL EVALUATION LICENSE. +# +# See file LICENSE.RTF for full license details. + +BuildFolderProperty( "modules" AppFolder ) + +AddModule( rps_core "${CMAKE_CURRENT_SOURCE_DIR}/core" "${AppFolder}" "" "" STATIC "" ) +AddModule( rps_frontend "${CMAKE_CURRENT_SOURCE_DIR}/frontend" "${AppFolder}" "" "" STATIC "rps_core" ) +AddModule( rps_runtime "${CMAKE_CURRENT_SOURCE_DIR}/runtime/common" "${AppFolder}" "" "" STATIC "rps_core" ) +AddModule( rps_runtime_d3d12 "${CMAKE_CURRENT_SOURCE_DIR}/runtime/d3d12" "${AppFolder}" "" "" STATIC "rps_core;rps_runtime" ) +if (RpsEnableDXAgilitySDK) + target_include_directories( rps_runtime_d3d12 PUBLIC ${DX12AgilitySDK_INCLUDE_DIR} ) + target_compile_definitions( rps_runtime_d3d12 PUBLIC RPS_DX12_AGILITY_SDK_VER=${DXAgilitySDK_VERSION} ) +endif() +AddModule( rps_runtime_d3d11 "${CMAKE_CURRENT_SOURCE_DIR}/runtime/d3d11" "${AppFolder}" "" "" STATIC "rps_core;rps_runtime" ) +if (Vulkan_FOUND) + AddModule( rps_runtime_vk "${CMAKE_CURRENT_SOURCE_DIR}/runtime/vk" "${AppFolder}" "${Vulkan_INCLUDE_DIRS};" "" STATIC "rps_core;rps_runtime" ) +endif( ) diff --git a/src/core/rps_core.cpp b/src/core/rps_core.cpp new file mode 100644 index 0000000..e10cc2e --- /dev/null +++ b/src/core/rps_core.cpp @@ -0,0 +1,39 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "core/rps_core.hpp" +#include + +// The context-less global debug printer +static RpsPrinter g_rpsGlobalDebugPrinter; + +void rpsSetGlobalDebugPrinter(const RpsPrinter* pPrinter) +{ + g_rpsGlobalDebugPrinter = pPrinter ? *pPrinter : RpsPrinter{}; +} + +const RpsPrinter* rpsGetGlobalDebugPrinter() +{ + return &g_rpsGlobalDebugPrinter; +} + +void rpsDiagLog(const char* fmt, ...) +{ + va_list vargs; + va_start(vargs, fmt); + + if (g_rpsGlobalDebugPrinter.pfnVPrintf) + { + g_rpsGlobalDebugPrinter.pfnVPrintf(g_rpsGlobalDebugPrinter.pContext, fmt, vargs); + } + else + { + vfprintf(stderr, fmt, vargs); + } + + va_end(vargs); +} diff --git a/src/core/rps_core.hpp b/src/core/rps_core.hpp new file mode 100644 index 0000000..76c3e28 --- /dev/null +++ b/src/core/rps_core.hpp @@ -0,0 +1,358 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_CORE_H_ +#define _RPS_CORE_H_ + +/// @defgroup Core + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef RPS_HAS_INTRIN_H +#include +#endif + +#ifndef RPS_ASSERT +#include +#endif + +#include "rps/core/rps_api.h" + +#if !defined(RPS_DEBUG) && !defined(NDEBUG) +/// A preprocessor symbol indicating that if RPS is compiled in a debug configuration. +/// +/// @ingroup Core +#define RPS_DEBUG (1) +#endif + +#define RPS_CONCATENATE_DIRECT(A, B) A##B +#define RPS_CONCATENATE_INDIRECT(A, B) RPS_CONCATENATE_DIRECT(A, B) + +#define RPS_STRINGIFY_DIRECT(X) #X +#define RPS_STRINGIFY(X) RPS_STRINGIFY_DIRECT(X) + +#define RPS_ALIGNOF(x) alignof(x) + +// CountOf C array +#ifdef __cplusplus +extern "C++" template +constexpr size_t RPS_COUNTOF(T const (&)[Count]) noexcept +{ + return Count; +} +#else +#define RPS_COUNTOF(arr) (sizeof(arr) / sizeof(arr[0])) +#endif + +#define RPS_UNUSED(x) \ + do \ + { \ + (void)sizeof(!(x)); \ + } while (0) + +#if RPS_HAS_NODISCARD +#define RPS_NO_DISCARD [[nodiscard]] +#else +#define RPS_NO_DISCARD +#endif + +#if RPS_HAS_MAYBE_UNUSED +#define RPS_MAYBE_UNUSED [[maybe_unused]] +#else +#define RPS_MAYBE_UNUSED +#endif + +#if __cplusplus >= 202002L +#define RPS_CONSTEXPR_20 constexpr +#else +#define RPS_CONSTEXPR_20 inline +#endif + +#if __cplusplus >= 201703L +#define RPS_CONSTEXPR_17 constexpr +#else +#define RPS_CONSTEXPR_17 inline +#endif + +#if __cplusplus >= 201402L +#define RPS_CONSTEXPR_14 constexpr +#else +#define RPS_CONSTEXPR_14 +#endif + +/// An assertion macro that is used through RPS codebase. +/// +/// @ingroup Core +#ifndef RPS_ASSERT +#if RPS_DEBUG +#define RPS_ASSERT(x) assert(x) +#else +#define RPS_ASSERT(x) RPS_UNUSED(x) +#endif +#endif // #ifndef RPS_ASSERT + +#ifdef __cplusplus +#define RPS_STATIC_ASSERT(Expr, Msg) static_assert(Expr, Msg) +#define RPS_STATIC_ASSERT_STANDALONE(Expr, Msg, Prefix) static_assert(Expr, Msg) +#else +#define RPS_STATIC_ASSERT(Expr, Msg) \ + do \ + { \ + typedef char RPS_CONCATENATE_INDIRECT(__RPS_STATIC_ASSERT_, __LINE__)[(Expr) ? 1 : -1]; \ + (void)sizeof(RPS_CONCATENATE_INDIRECT(__RPS_STATIC_ASSERT_, __LINE__)); \ + } while (0) +#define RPS_STATIC_ASSERT_STANDALONE(Expr, Msg, Postfix) \ + static void RPS_CONCATENATE_INDIRECT(_RPS_STATIC_ASSERT_SCOPE_FUNC_##Postfix##_, __LINE__)(void) \ + { \ + RPS_STATIC_ASSERT(Expr, Msg); \ + } +#endif + +// Helper to get struct pointer from field pointer +#define RPS_STRUCT_PTR_FROM_FIELD(StructType, FieldName, FieldPtr) \ + ((StructType*)(((uint8_t*)(FieldPtr)) - offsetof(StructType, FieldName))) + +/// A helper function to assert an error code does not indicate an error. +/// +/// @param errCode The error code to check, if this is != RPS_OK the assertion will fail. +/// +/// @ingroup Core +static inline void RPS_ASSERT_OK(RpsResult result) +{ + RPS_ASSERT(result == RPS_OK); +} + +#define RPS_TODO(...) RPS_ASSERT(RPS_FALSE && "RPS TODO!" __VA_ARGS__) + +#define RPS_TODO_RETURN(Result, ...) \ + do \ + { \ + RPS_TODO(__VA_ARGS__); \ + return (Result); \ + } while (0) + +#define RPS_TODO_RETURN_NOT_IMPLEMENTED() RPS_TODO_RETURN(RPS_ERROR_NOT_IMPLEMENTED, "Not Implemented!") + +void rpsDiagLog(const char* fmt, ...); + +#ifndef RPS_DIAG +/// A macro to output debug info about an expression to stderr. +/// +/// The expression itself, it's file and line of occurence will be printed to stderr +/// output. Can be overwritten to any behaviour by specifying RPS_DIAG before including this header. +/// +/// @param Expr The expression to print. +/// +/// @ingroup Core +#define RPS_DIAG(Expr) rpsDiagLog("\nRPS_DIAG: '%s' @%s line %d.\n", Expr, __FILE__, __LINE__) +#endif //RPS_DIAG + +#ifndef RPS_DIAG_MSG +#define RPS_DIAG_MSG(Expr, Fmt, ...) \ + rpsDiagLog("\nRPS_DIAG: '%s':\n '" Fmt "', @%s line %d.\n", Expr, __VA_ARGS__, __FILE__, __LINE__) +#endif //RPS_DIAG_MSG + +#ifndef RPS_DIAG_RESULT_CODE +#define RPS_DIAG_RESULT_CODE(Expr, Err) \ + rpsDiagLog("\nRPS_DIAG: '%s' Result = %s(%d) @%s line %d.\n", Expr, rpsResultGetName(Err), Err, __FILE__, __LINE__) +#endif //RPS_DIAG_RESULT_CODE + +/// A macro to return an error from the current function if an expression indicates this error. +/// +/// If the result of the expression is != RPS_OK debug info will be printed for it before returning the result of the expression. +/// +/// @param Expr The expression to check. +/// +/// @ingroup Core +#define RPS_V_RETURN(Expr) \ + do \ + { \ + RpsResult _RPS_RESULT_TEMP__ = Expr; \ + if (_RPS_RESULT_TEMP__ != RPS_OK) \ + { \ + RPS_DIAG_RESULT_CODE((#Expr), _RPS_RESULT_TEMP__); \ + return _RPS_RESULT_TEMP__; \ + } \ + } while (0) + +/// A macro to assign the result of an expression to a variable if that result indicates an error. +/// +/// If the result of the expression is != RPS_OK the assignee will be assigned this result and debug info will be printed for the expression. +/// +/// @param Assignee The object to assign the result to in case of an error. +/// @param Expr The expression to check +/// +/// @ingroup Core +#define RPS_ASSIGN_IF_ERROR(Assignee, Expr) \ + do \ + { \ + RpsResult _RPS_RESULT_TEMP__ = Expr; \ + if (_RPS_RESULT_TEMP__ != RPS_OK) \ + { \ + Assignee = _RPS_RESULT_TEMP__; \ + RPS_DIAG_RESULT_CODE((#Expr), _RPS_RESULT_TEMP__); \ + } \ + } while (0) + +/// A macro to return a specific error code if a given condition is satisfied. +/// +/// If the given condition is RPS_TRUE debug info will be printed and the specified error code will be returned from the current function. +/// +/// @param Cond The condition to check. +/// @param ErrCode The error code to return in case the condition is satisfied. +/// +/// @ingroup Core +#define RPS_RETURN_ERROR_IF(Cond, ErrorRet) \ + do \ + { \ + if (Cond) \ + { \ + RPS_DIAG((#Cond)); \ + return ErrorRet; \ + } \ + } while (0) + +#define RPS_RETURN_ERROR_IF_MSG(Cond, ErrorRet, ...) \ + do \ + { \ + if (Cond) \ + { \ + RPS_DIAG_MSG((#Cond), __VA_ARGS__); \ + return ErrorRet; \ + } \ + } while (0) + +/// A macro to return a specific error code if a given condition is satisfied. +/// +/// If the given condition is RPS_TRUE debug info will be printed and the specified error code will be returned from the current function. +/// +/// @param Cond The condition to check. +/// @param ErrCode The error code to return in case the condition is satisfied. +/// +/// @ingroup Core +#define RPS_SET_ERROR_IF(Assignee, Cond, ErrCode) \ + do \ + { \ + if (Cond) \ + { \ + RPS_DIAG((#Cond)); \ + (Assignee) = (ErrCode); \ + } \ + } while (0) + +/// A macro to return RPS_OK if a given condition is satisfied. +/// +/// @param Cond The condition to check. +/// +/// @ingroup Core +#define RPS_RETURN_OK_IF(Cond) \ + do \ + { \ + if (Cond) \ + { \ + return RPS_OK; \ + } \ + } while (0) + +/// A macro to return RPS_ERROR_INVALID_ARGUMENTS if the arguments of a function call do not satisfy a condition. +/// +/// @param Cond The condition to check. +/// +/// @ingroup Core +#define RPS_CHECK_ARGS(Cond) \ + do \ + { \ + if (!(Cond)) \ + { \ + RPS_DIAG((#Cond)); \ + return RPS_ERROR_INVALID_ARGUMENTS; \ + } \ + } while (0) + +/// A macro to return RPS_ERROR_OUT_OF_MEMORY if a pointer returned from a memory allocation is NULL. +/// +/// @param Ptr The pointer to check. +/// +/// @ingroup Core +#define RPS_CHECK_ALLOC(Ptr) \ + do \ + { \ + if (!(Ptr)) \ + { \ + RPS_DIAG((#Ptr)); \ + return RPS_ERROR_OUT_OF_MEMORY; \ + } \ + } while (0) + +namespace rps +{ + template + struct RpsHandleTrait + { + }; + + template + constexpr typename RpsHandleTrait::impl_type* FromHandle(T* pHandle) + { + return reinterpret_cast::impl_type*>(pHandle); + } + + template + constexpr const typename RpsHandleTrait::impl_type* FromHandle(const T* pHandle) + { + return reinterpret_cast::impl_type*>(pHandle); + } + + template + constexpr typename RpsHandleTrait::impl_type** FromHandle(T** ppHandle) + { + return reinterpret_cast::impl_type**>(ppHandle); + } + + template + constexpr typename RpsHandleTrait::handle_type* ToHandle(T* pImpl) + { + return reinterpret_cast::handle_type*>(pImpl); + } + + template + constexpr const typename RpsHandleTrait::handle_type* ToHandle(const T* pImpl) + { + return reinterpret_cast::handle_type*>(pImpl); + } + + template + constexpr T* FromOpaqueHandle(H hdl) + { + return static_cast(hdl.ptr); + } + +} // namespace rps + +#define RPS_ASSOCIATE_HANDLE(Type) \ + template <> \ + struct RpsHandleTrait \ + { \ + typedef rps::Type impl_type; \ + }; \ + template <> \ + struct RpsHandleTrait \ + { \ + typedef Rps##Type##_T handle_type; \ + }; + +#endif // #ifndef _RPS_CORE_H_ diff --git a/src/core/rps_device.cpp b/src/core/rps_device.cpp new file mode 100644 index 0000000..f3c0905 --- /dev/null +++ b/src/core/rps_device.cpp @@ -0,0 +1,176 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "core/rps_device.hpp" +#include "core/rps_util.hpp" + +namespace rps +{ + void* rpsDefaultMalloc(void* pContext, size_t size, size_t alignment) + { +#ifdef _MSC_VER + return _aligned_malloc(size, alignment); +#else + return aligned_alloc(alignment, rpsAlignUp(size, alignment)); +#endif + } + + void rpsDefaultFree(void* pContext, void* ptr) + { +#ifdef _MSC_VER + return _aligned_free(ptr); +#else + return free(ptr); +#endif + } + + void* rpsDefaultRealloc(void* pContext, void* pOldBuffer, size_t oldSize, size_t newSize, size_t alignment); + + // Used when user Allocator provides Alloc/Free but not Realloc + void* rpsFallbackRealloc( + const RpsAllocator* pAllocator, void* pOldBuffer, size_t oldSize, size_t newSize, size_t alignment) + { + if (newSize <= oldSize) + { + return pOldBuffer; + } + else if (alignment <= alignof(std::max_align_t)) + { + return realloc(pOldBuffer, newSize); + } + + void* pNewBuffer = pAllocator->pfnAlloc(pAllocator->pContext, newSize, alignment); + + if (pOldBuffer) + { + if (pNewBuffer) + { + memcpy(pNewBuffer, pOldBuffer, rpsMin(oldSize, newSize)); + } + pAllocator->pfnFree(pAllocator->pContext, pOldBuffer); + } + return pNewBuffer; + } + + static const RpsAllocator s_DefaultAllocator = { + &rpsDefaultMalloc, // pfnAlloc; + &rpsDefaultFree, // pfnFree; + &rpsDefaultRealloc, // pfnRealloc; + nullptr, // pContext; + }; + + void* rpsDefaultRealloc(void* pContext, void* pOldBuffer, size_t oldSize, size_t newSize, size_t alignment) + { +#ifdef _MSC_VER + return _aligned_realloc(pOldBuffer, newSize, alignment); +#else + return rpsFallbackRealloc(&s_DefaultAllocator, pOldBuffer, oldSize, newSize, alignment); +#endif + } + + void rpsDefaultPrint(void* pUserContext, const char* format, ...) + { + va_list vl; + va_start(vl, format); + vprintf(format, vl); + va_end(vl); + } + + void rpsDefaultVPrint(void* pUserContext, const char* format, va_list vl) + { + vprintf(format, vl); + } + + static const RpsPrinter s_DefaultPrinter = { + &rpsDefaultPrint, // pfnPrintf; + &rpsDefaultVPrint, // pfnVPrinf; + nullptr, // pContext; + }; + + RpsResult Device::Create(const RpsDeviceCreateInfo* pCreateInfo, Device** ppDevice) + { + RPS_CHECK_ARGS(ppDevice); + + RpsDeviceCreateInfo createInfo = pCreateInfo ? *pCreateInfo : RpsDeviceCreateInfo{}; + + if (!createInfo.allocator.pfnAlloc) + { + createInfo.allocator = s_DefaultAllocator; + } + + if (!createInfo.printer.pfnPrintf) + { + createInfo.printer = s_DefaultPrinter; + } + + Device* pDevice; + void* pPrivateData; + + void* pMemory = AllocateCompound( + createInfo.allocator, &pDevice, CompoundEntry(&pPrivateData, createInfo.privateDataAllocInfo)); + + RPS_CHECK_ALLOC(pMemory); + + RPS_ASSERT(pMemory == pDevice); + + new (pMemory) Device(createInfo); + + pDevice->m_pPrivateData = (createInfo.privateDataAllocInfo.size > 0) ? pPrivateData : nullptr; + + *ppDevice = pDevice; + + return RPS_OK; + } + + void Device::Destroy() + { + RpsAllocator allocator = Allocator(); + + this->~Device(); + + allocator.pfnFree(allocator.pContext, this); + } + + Device::Device(const RpsDeviceCreateInfo& createInfo) + : m_allocator(createInfo.allocator) + , m_printer(createInfo.printer) + , m_pfnOnDestory(createInfo.pfnDeviceOnDestroy) + , m_pPrivateData(nullptr) + { + } + + Device::~Device() + { + if (m_pfnOnDestory) + { + m_pfnOnDestory(ToHandle(this)); + } + } + +} // namespace rps + +extern "C" { + +RpsResult rpsDeviceCreate(const RpsDeviceCreateInfo* pCreateInfo, RpsDevice* pDevice) +{ + return rps::Device::Create(pCreateInfo, rps::FromHandle(pDevice)); +} + +void rpsDeviceDestroy(RpsDevice device) +{ + if (device != RPS_NULL_HANDLE) + { + rps::FromHandle(device)->Destroy(); + } +} + +void* rpsDeviceGetPrivateData(RpsDevice hDevice) +{ + return hDevice ? rps::FromHandle(hDevice)->GetPrivateData() : nullptr; +} + +} // extern "C" diff --git a/src/core/rps_device.hpp b/src/core/rps_device.hpp new file mode 100644 index 0000000..985ecac --- /dev/null +++ b/src/core/rps_device.hpp @@ -0,0 +1,95 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_DEVICE_INTERNAL_H_ +#define _RPS_DEVICE_INTERNAL_H_ + +#include "core/rps_util.hpp" + +namespace rps +{ + class Device + { + RPS_CLASS_NO_COPY_MOVE(Device); + + private: + Device(const RpsDeviceCreateInfo& createInfo); + ~Device(); + + public: + static RpsResult Create(const RpsDeviceCreateInfo* pCreateInfo, Device** ppDevice); + void Destroy(); + + void* GetPrivateData() const + { + return m_pPrivateData; + } + + void Print(const char* format, va_list vl) const + { + m_printer.pfnVPrintf(m_printer.pContext, format, vl); + } + + template + void Print(const char* format, TArgs... args) const + { + m_printer.pfnPrintf(m_printer.pContext, format, args...); + } + + void* Allocate(const AllocInfo& allocInfo) const + { + return Allocate(allocInfo.size, allocInfo.alignment); + } + + void* Allocate(size_t sizeInBytes, size_t alignment) const + { + return m_allocator.pfnAlloc(m_allocator.pContext, sizeInBytes, alignment); + } + + void* AllocateZeroed(size_t sizeInBytes, size_t alignment) const + { + void* pMemory = Allocate(sizeInBytes, alignment); + if (pMemory) + { + memset(pMemory, 0, sizeInBytes); + } + return pMemory; + } + + void* Reallocate(void* originalBuffer, size_t originalSize, size_t newSize, size_t alignment) const + { + return m_allocator.pfnRealloc(m_allocator.pContext, originalBuffer, originalSize, newSize, alignment); + } + + void Free(void* buffer) const + { + m_allocator.pfnFree(m_allocator.pContext, buffer); + } + + // TODO: + const RpsAllocator& Allocator() const + { + return m_allocator; + } + + const RpsPrinter& Printer() const + { + return m_printer; + } + + private: + const RpsAllocator m_allocator; + const RpsPrinter m_printer; + PFN_rpsDeviceOnDestroy m_pfnOnDestory; + void* m_pPrivateData; + }; + + RPS_ASSOCIATE_HANDLE(Device); + +} // namespace rps + +#endif //_RPS_DEVICE_INTERNAL_H_ diff --git a/src/core/rps_graph.cpp b/src/core/rps_graph.cpp new file mode 100644 index 0000000..00d527c --- /dev/null +++ b/src/core/rps_graph.cpp @@ -0,0 +1,89 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "core/rps_graph.hpp" +#include "core/rps_device.hpp" +#include "core/rps_util.hpp" + +namespace rps +{ + Graph::Graph(const Device& device, Arena& arena) + : m_nodes(&arena) + , m_edges(&arena) + , m_subgraphs(&arena) + , m_edgeListPool(m_edges) + { + } + + NodeId Graph::AddNode(int32_t cmdId) + { + NodeId nodeId = NodeId(m_nodes.size()); + m_nodes.emplace_back(cmdId); + return nodeId; + } + + NodeId Graph::CloneNode(NodeId srcNodeId, int32_t cmdId) + { + NodeId newNodeId = AddNode(cmdId); + + RPS_RETURN_ERROR_IF(newNodeId == RPS_INDEX_NONE_U32, newNodeId); + + const Node* pSrcNode = GetNode(srcNodeId); + Node* pNewNode = GetNode(newNodeId); + + pNewNode->barrierScope = pSrcNode->barrierScope; + pNewNode->subgraph = pSrcNode->subgraph; + + const size_t edgeCapacity = m_edges.size() + (pSrcNode->inEdges.size() + pSrcNode->outEdges.size()) * 2; + m_edges.reserve(edgeCapacity); + + RPS_RETURN_ERROR_IF(m_edges.capacity() < edgeCapacity, RPS_INDEX_NONE_U32); + + for (auto e : pSrcNode->inEdges.Get(m_edges)) + { + AddEdge(e.src, newNodeId); + } + + for (auto e : pSrcNode->outEdges.Get(m_edges)) + { + AddEdge(newNodeId, e.dst); + } + + return newNodeId; + } + + void Graph::AddToEdgeList(Span& edgeList, Edge newEdge) + { + m_edgeListPool.push_to_span(edgeList, newEdge); + } + + bool Graph::IsParentSubgraphOf(uint32_t parentSubgraphId, uint32_t childSubgraphId) const + { + if ((parentSubgraphId == RPS_INDEX_NONE_U32) || (childSubgraphId == RPS_INDEX_NONE_U32)) + { + return false; + } + + uint32_t currIdx = childSubgraphId; + + while (parentSubgraphId < currIdx) + { + currIdx = m_subgraphs[currIdx].parentSubgraph; + } + + return currIdx == parentSubgraphId; + } + + void Graph::Reset() + { + m_nodes.reset(); + m_edges.reset(); + m_subgraphs.reset(); + m_edgeListPool.reset(); + } + +} // namespace rps diff --git a/src/core/rps_graph.hpp b/src/core/rps_graph.hpp new file mode 100644 index 0000000..a7b25e1 --- /dev/null +++ b/src/core/rps_graph.hpp @@ -0,0 +1,155 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_GRAPH_H_ +#define _RPS_GRAPH_H_ + +#include "rps_core.hpp" +#include "rps_util.hpp" +#include "rps_device.hpp" + +namespace rps +{ + using NodeId = uint32_t; + using SubgraphId = uint32_t; + + struct Edge + { + NodeId src; + NodeId dst; + }; + + struct Node + { + Span inEdges; + Span outEdges; + int32_t cmdId; + uint32_t subgraph = RPS_INDEX_NONE_U32; + uint32_t barrierScope = 0; + + Node() + { + } + + Node(int32_t inCmdId) + : cmdId(inCmdId) + { + } + + bool IsTransition() const + { + return cmdId < 0; + } + + uint32_t GetTransitionId() const + { + RPS_ASSERT(IsTransition()); + return uint32_t(-cmdId); + } + + uint32_t GetCmdId() const + { + RPS_ASSERT(!IsTransition()); + return uint32_t(cmdId); + } + }; + + struct Subgraph + { + uint32_t parentSubgraph; + RpsSubgraphFlags flags; + NodeId beginNode; + NodeId endNode; + + Subgraph() = default; + + Subgraph(uint32_t inParentSubgraph, RpsSubgraphFlags inFlags, NodeId inBeginNode) + : parentSubgraph(inParentSubgraph) + , flags(inFlags) + , beginNode(inBeginNode) + , endNode(inBeginNode) + { + } + + bool IsAtomic() const + { + return rpsAnyBitsSet(flags, RPS_SUBGRAPH_FLAG_ATOMIC); + } + + bool IsSequential() const + { + return rpsAnyBitsSet(flags, RPS_SUBGRAPH_FLAG_SEQUENTIAL); + } + }; + + class Graph + { + public: + Graph(const Device& device, Arena& arena); + + Node* GetNode(NodeId nodeId) + { + return &m_nodes[nodeId]; + } + const Node* GetNode(NodeId nodeId) const + { + return &m_nodes[nodeId]; + } + + ConstArrayRef GetNodes() const + { + return m_nodes.range_all(); + } + + ConstArrayRef GetEdges() const + { + return m_edges.range_all(); + } + + ConstArrayRef GetSubgraphs() const + { + return m_subgraphs.range_all(); + } + + NodeId AddNode(int32_t cmdId); + NodeId CloneNode(NodeId srcNode, int32_t cmdId); + + void AddEdge(NodeId fromNode, NodeId toNode) + { + AddToEdgeList(m_nodes[fromNode].outEdges, Edge{fromNode, toNode}); + AddToEdgeList(m_nodes[toNode].inEdges, Edge{fromNode, toNode}); + } + + SubgraphId AddSubgraph(uint32_t parentId, RpsSubgraphFlags flags, NodeId beginNode) + { + SubgraphId resultId = SubgraphId(m_subgraphs.size()); + m_subgraphs.push_back(Subgraph{parentId, flags, beginNode}); + return ((resultId + 1) == m_subgraphs.size()) ? resultId : RPS_INDEX_NONE_U32; + } + + Subgraph* GetSubgraph(SubgraphId subgraphId) + { + return &m_subgraphs[subgraphId]; + } + + bool IsParentSubgraphOf(uint32_t parentSubgraphId, uint32_t childSubgraphId) const; + + void Reset(); + + private: + void AddToEdgeList(Span& edgeList, Edge newEdge); + + private: + ArenaVector m_nodes; + ArenaVector m_edges; + ArenaVector m_subgraphs; + + SpanPool> m_edgeListPool; + }; +} + +#endif //_RPS_GRAPH_H_ diff --git a/src/core/rps_persistent_index_generator.hpp b/src/core/rps_persistent_index_generator.hpp new file mode 100644 index 0000000..01d8811 --- /dev/null +++ b/src/core/rps_persistent_index_generator.hpp @@ -0,0 +1,275 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_PERSISTENT_ID_GENERATOR_HPP +#define RPS_PERSISTENT_ID_GENERATOR_HPP + +#include "core/rps_core.hpp" +#include "core/rps_util.hpp" + +namespace rps +{ + template + class PersistentIdGenerator + { + struct BlockInfo + { + uint32_t numResources[NumResourceKinds]; + uint32_t localIndex; + uint32_t numChildren; + + BlockInfo() + : localIndex(RPS_INDEX_NONE_U32) + , numChildren(0) + { + } + + bool IsInitialized() + { + return localIndex != RPS_INDEX_NONE_U32; + } + }; + + struct BlockInstance + { + uint32_t isReached : 1; + uint32_t blockId : 31; + uint32_t nextIteration; + uint32_t offsets[NumResourceKinds]; + + BlockInstance() + : isReached(RPS_FALSE) + , blockId(0) + , nextIteration(RPS_INDEX_NONE_U32) + , offsets{} + { + } + }; + + public: + PersistentIdGenerator(Arena& allocator) + : m_numIndicesTotal{} + , m_blocks(&allocator) + , m_blockStack(&allocator) + , m_blockInstances(&allocator) + { + } + + RpsResult EnterFunction(uint32_t blockId, + ConstArrayRef resourceCounts, + uint32_t localLoopIndex, + uint32_t numChildren) + { + RPS_ASSERT(m_blockStack.empty()); // Currently everything is inlined. + + InitBlockInfo(blockId, resourceCounts, localLoopIndex, numChildren); + + RPS_ASSERT(m_blockInstances.empty() || (m_blockInstances.front().blockId == 0)); + + uint32_t rootBlockInstanceId = 0; + + if (m_blockInstances.empty()) + { + rootBlockInstanceId = AllocBlockInstances(1 + numChildren); + RPS_RETURN_ERROR_IF(rootBlockInstanceId == RPS_INDEX_NONE_U32, RPS_ERROR_OUT_OF_MEMORY); + + RPS_ASSERT(rootBlockInstanceId == 0); // Currently everything is inlined. + } + + InitBlockInstance(blockId, rootBlockInstanceId); + + m_currentBlockInstanceId = rootBlockInstanceId; + + return RPS_OK; + } + + RpsResult EnterLoop(uint32_t blockId, + ConstArrayRef resourceCounts, + uint32_t localLoopIndex, + uint32_t numChildren) + { + RPS_V_RETURN(InitBlockInfo(blockId, resourceCounts, localLoopIndex, numChildren)); + + RPS_CHECK_ALLOC(m_blockStack.push_back(m_currentBlockInstanceId)); + + return RPS_OK; + } + + RpsResult ExitLoop(uint32_t blockId) + { + const uint32_t parentBlockInstance = m_blockStack.back(); + + m_blockStack.pop_back(); + + m_currentBlockInstanceId = parentBlockInstance; + + return RPS_OK; + } + + RpsResult LoopIteration(uint32_t blockId) + { + RPS_ASSERT(!m_blockStack.empty()); + + const uint32_t parentId = m_blockStack.back(); + const bool bFirstIteration = (parentId == m_currentBlockInstanceId); + + const BlockInfo& currBlockInfo = m_blocks[blockId]; + + uint32_t currBlockInstanceId = RPS_INDEX_NONE_U32; + uint32_t prevBlockInstanceId = m_currentBlockInstanceId; + + BlockInstance* pPrevBlock = &m_blockInstances[prevBlockInstanceId]; + + if (bFirstIteration) + { + prevBlockInstanceId = m_currentBlockInstanceId + 1 + currBlockInfo.localIndex; + pPrevBlock = &m_blockInstances[prevBlockInstanceId]; + } + else + { + RPS_RETURN_ERROR_IF(blockId != pPrevBlock->blockId, RPS_ERROR_INVALID_PROGRAM); + } + + if (pPrevBlock->nextIteration == UINT32_MAX) + { + uint32_t newRangeOffset = AllocBlockInstances(1 + currBlockInfo.numChildren); + RPS_RETURN_ERROR_IF(newRangeOffset == RPS_INDEX_NONE_U32, RPS_ERROR_OUT_OF_MEMORY); + + pPrevBlock = &m_blockInstances[prevBlockInstanceId]; + pPrevBlock->nextIteration = newRangeOffset; + } + + currBlockInstanceId = pPrevBlock->nextIteration; + + InitBlockInstance(blockId, currBlockInstanceId); + m_currentBlockInstanceId = currBlockInstanceId; + + return RPS_OK; + } + + void Reset() + { + std::fill(std::begin(m_numIndicesTotal), std::end(m_numIndicesTotal), 0); + + m_blocks.reset(); + m_blockStack.reset(); + m_blockInstances.reset(); + + m_currentBlockInstanceId = RPS_INDEX_NONE_U32; + } + + void Clear() + { + std::fill(std::begin(m_numIndicesTotal), std::end(m_numIndicesTotal), 0); + + m_blocks.clear(); + m_blockStack.clear(); + m_blockInstances.clear(); + + m_currentBlockInstanceId = RPS_INDEX_NONE_U32; + } + + template + TResult Generate(uint32_t localIndex) + { + const BlockInstance& currBlockInstance = m_blockInstances[m_currentBlockInstanceId]; + if (localIndex >= m_blocks[currBlockInstance.blockId].numResources[IndexKind]) + { + return MakeResult(RPS_INDEX_NONE_U32, RPS_ERROR_INVALID_PROGRAM); + } + + const uint32_t resourceIdx = localIndex + currBlockInstance.offsets[IndexKind]; + return resourceIdx; + } + + private: + RpsResult InitBlockInfo(uint32_t blockId, + ConstArrayRef resourceCounts, + uint32_t localLoopIndex, + uint32_t numChildren) + { + if (blockId >= m_blocks.size()) + { + m_blocks.resize(blockId + 1, BlockInfo()); + } + + auto& blockInfo = m_blocks[blockId]; + + if (!blockInfo.IsInitialized()) + { + std::copy(resourceCounts.begin(), resourceCounts.end(), blockInfo.numResources); + blockInfo.localIndex = localLoopIndex; + blockInfo.numChildren = numChildren; + } + else + { + RPS_RETURN_ERROR_IF(!std::equal(resourceCounts.begin(), resourceCounts.end(), blockInfo.numResources) || + (blockInfo.localIndex != localLoopIndex) || + (blockInfo.numChildren != numChildren), + RPS_ERROR_INVALID_PROGRAM); + } + + return RPS_OK; + } + + uint32_t AllocBlockInstances(uint32_t count) + { + const uint32_t offset = uint32_t(m_blockInstances.size()); + + if (m_blockInstances.resize(m_blockInstances.size() + count, BlockInstance())) + { + return offset; + } + + return RPS_INDEX_NONE_U32; + } + + RpsResult InitBlockInstance(uint32_t blockId, uint32_t instanceId) + { + const BlockInfo& blockInfo = m_blocks[blockId]; + BlockInstance& blockInstance = m_blockInstances[instanceId]; + + if (!blockInstance.isReached) + { + blockInstance.isReached = RPS_TRUE; + blockInstance.blockId = blockId; + blockInstance.nextIteration = UINT32_MAX; + + for (uint32_t iResourceKind = 0; iResourceKind < NumResourceKinds; iResourceKind++) + { + blockInstance.offsets[iResourceKind] = m_numIndicesTotal[iResourceKind]; + m_numIndicesTotal[iResourceKind] += blockInfo.numResources[iResourceKind]; + } + } + else + { + RPS_RETURN_ERROR_IF((blockInstance.blockId != blockId), RPS_ERROR_INVALID_PROGRAM); + + for (uint32_t iResourceKind = 0; iResourceKind < NumResourceKinds; iResourceKind++) + { + RPS_RETURN_ERROR_IF((blockInstance.offsets[iResourceKind] + blockInfo.numResources[iResourceKind]) > + m_numIndicesTotal[iResourceKind], + RPS_ERROR_INVALID_PROGRAM); + } + } + + return RPS_OK; + } + + private: + uint32_t m_numIndicesTotal[NumResourceKinds]; + + ArenaVector m_blocks; + ArenaVector m_blockStack; + ArenaVector m_blockInstances; + + uint32_t m_currentBlockInstanceId = RPS_INDEX_NONE_U32; + }; + +} // namespace rps + +#endif //RPS_PERSISTENT_ID_GENERATOR_HPP diff --git a/src/core/rps_result.cpp b/src/core/rps_result.cpp new file mode 100644 index 0000000..89b83f4 --- /dev/null +++ b/src/core/rps_result.cpp @@ -0,0 +1,56 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "rps/core/rps_result.h" +#include "core/rps_core.hpp" + +#define RPS_DEFINE_RESULT_INFO( Name ) { #Name, Name } + +const char* rpsResultGetName(RpsResult result) +{ + static constexpr struct { + const char* name; + RpsResult value; + } resultInfo[] = { + RPS_DEFINE_RESULT_INFO(RPS_ERROR_INTERNAL_ERROR), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_RUNTIME_API_ERROR), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_NOT_SUPPORTED), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_TYPE_MISMATCH), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_UNSUPPORTED_MODULE_VERSION), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_INVALID_PROGRAM), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_VALIDATION_FAILED), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_RANGE_OVERLAPPING), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_INTEGER_OVERFLOW), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_NOT_IMPLEMENTED), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_KEY_DUPLICATED), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_KEY_NOT_FOUND), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_INTEROP_DATA_LAYOUT_MISMATCH), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_COMMAND_ALREADY_FINAL), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_INDEX_OUT_OF_BOUNDS), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_UNKNOWN_NODE), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_UNSUPPORTED_VERSION_TOO_NEW), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_UNSUPPORTED_VERSION_TOO_OLD), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_INVALID_FILE_FORMAT), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_FILE_NOT_FOUND), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_OUT_OF_MEMORY), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_INVALID_OPERATION), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_INVALID_DATA), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_INVALID_ARGUMENTS), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_UNRECOGNIZED_COMMAND), + RPS_DEFINE_RESULT_INFO(RPS_ERROR_UNSPECIFIED), + RPS_DEFINE_RESULT_INFO(RPS_OK) + }; + static_assert(RPS_COUNTOF(resultInfo) == RPS_RESULT_CODE_COUNT, "RpsResult name table needs update."); + for (size_t i = 0; i < RPS_COUNTOF(resultInfo); i++) { + if (resultInfo[i].value == result) { + return resultInfo[i].name; + } + } + return ""; +} + +#undef RPS_DEFINE_RESULT_INFO \ No newline at end of file diff --git a/src/core/rps_util.hpp b/src/core/rps_util.hpp new file mode 100644 index 0000000..8df373f --- /dev/null +++ b/src/core/rps_util.hpp @@ -0,0 +1,2709 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_UTIL_HPP_ +#define _RPS_UTIL_HPP_ + +#include "rps_core.hpp" + +template +static constexpr T rpsMin(const T lhs, const T rhs) +{ + return (lhs < rhs) ? lhs : rhs; +} + +template +static constexpr T rpsMax(const T lhs, const T rhs) +{ + return (lhs < rhs) ? rhs : lhs; +} + +template +static constexpr T rpsClamp(const T val, const T minVal, const T maxVal) +{ + return rpsMin(rpsMax(val, minVal), maxVal); +} + +static inline uint32_t rpsCountBits(uint32_t value) +{ +#ifdef RPS_HAS_POPCNT + return __popcnt(value); +#elif RPS_HAS_BUILTIN_POP_COUNT + return (uint32_t)__builtin_popcount(value); +#else + uint32_t a = (value & 0x55555555u) + ((value >> 1u) & 0x55555555u); + uint32_t b = (a & 0x33333333u) + ((a >> 2u) & 0x33333333u); + uint32_t c = (b & 0x0F0F0F0Fu) + ((b >> 4u) & 0x0F0F0F0Fu); + uint32_t d = (c & 0x00FF00FFu) + ((c >> 8u) & 0x00FF00FFu); + uint32_t e = (d & 0xFFFFu) + (d >> 16u); + return e; +#endif +} + +static inline uint32_t rpsFirstBitHigh(uint32_t value) +{ +#ifdef RPS_HAS_BITSCAN + unsigned long idx; + (void)_BitScanReverse(&idx, value); + return value ? (31 - idx) : 32; +#elif defined(RPS_HAS_BUILTIN_CLZ_CTZ) + return value ? __builtin_clz(value) : 32; +#else + for (uint32_t i = 0; i < 32; i++) + { + if (value & 0x80000000u) + return i; + value = value << 1u; + } + return 32; +#endif +} + +static inline uint32_t rpsFirstBitHigh(uint64_t value) +{ +#ifdef RPS_HAS_BITSCAN + unsigned long idx; + (void)_BitScanReverse64(&idx, value); + return value ? (63 - idx) : 64; +#elif defined(RPS_HAS_BUILTIN_CLZ_CTZ) + return value ? __builtin_clzll(value) : 64; +#else + for (uint32_t i = 0; i < 64; i++) + { + if (value & 0x8000'0000'0000'0000u) + return i; + value = value << 1u; + } + return 64; +#endif +} + +static inline uint32_t rpsFirstBitLow(uint32_t value) +{ +#ifdef RPS_HAS_BITSCAN + unsigned long idx; + (void)_BitScanForward(&idx, value); + return value ? idx : 32; +#elif defined(RPS_HAS_BUILTIN_CLZ_CTZ) + return value ? __builtin_ctz(value) : 32; +#else + for (uint32_t i = 0; i < 32; i++) + { + if (value & 1u) + return i; + value = value >> 1u; + } + return 32; +#endif +} + +static inline uint32_t rpsFirstBitLow(uint64_t value) +{ +#ifdef RPS_HAS_BITSCAN + unsigned long idx; + (void)_BitScanForward64(&idx, value); + return value ? idx : 64; +#elif defined(RPS_HAS_BUILTIN_CLZ_CTZ) + return value ? __builtin_ctzll(value) : 64; +#else + for (uint32_t i = 0; i < 64; i++) + { + if (value & 1ull) + return i; + value = value >> 1u; + } + return 64; +#endif +} + +static constexpr uint32_t rpsReverseBits32(uint32_t v) +{ + v = ((v >> 1) & 0x55555555u) | ((v & 0x55555555u) << 1); + v = ((v >> 2) & 0x33333333u) | ((v & 0x33333333u) << 2); + v = ((v >> 4) & 0x0f0f0f0fu) | ((v & 0x0f0f0f0fu) << 4); + v = ((v >> 8) & 0x00ff00ffu) | ((v & 0x00ff00ffu) << 8); + return ((v >> 16) & 0xffffu) | ((v & 0xffffu) << 16); +} + +static constexpr uint32_t rpsRoundUpToPowerOfTwo(uint32_t v) +{ + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + + return v; +} + +static constexpr uint64_t rpsRoundUpToPowerOfTwo(uint64_t v) +{ + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + v++; + + return v; +} + +template ::value && std::is_unsigned::value>::type> +static constexpr bool rpsIsPowerOfTwo(T v) +{ + return (v & (v - 1u)) == 0; +} + +template ::value>::type> +static constexpr T rpsDivRoundUp(T dividend, T divisor) +{ + return (dividend + divisor - 1) / divisor; +} + +template ::value>::type> +static constexpr T rpsAlignUp(T offset, T alignment) +{ + return alignment ? (offset + (alignment - T(1))) & ~(alignment - T(1)) : offset; +} + +static inline void* rpsAlignUpPtr(void* ptr, size_t alignment) +{ + return alignment ? reinterpret_cast(((uintptr_t)ptr + (alignment - (size_t)1)) & ~(alignment - (size_t)1)) + : ptr; +} + +static inline const void* rpsAlignUpConstPtr(const void* ptr, size_t alignment) +{ + return alignment ? reinterpret_cast(((uintptr_t)ptr + (alignment - (size_t)1)) & ~(alignment - (size_t)1)) + : ptr; +} + +static inline size_t rpsPaddingSize(void* ptr, size_t alignment) +{ + if (alignment == 0) + return 0; + + const uintptr_t mask = uintptr_t(alignment - size_t(1)); + return size_t((alignment - (uintptr_t(ptr) & mask)) & mask); +} + +static constexpr RpsBool rpsIsPointerAlignedTo(const void* ptr, size_t alignment) +{ + return (alignment == 0) || (0 == (((uintptr_t)ptr) & ((uintptr_t)alignment - 1))); +} + +static constexpr uint32_t rpsDataSize(const size_t elementSize, const uint32_t elementCount) +{ + return rpsAlignUp((uint32_t)elementSize * elementCount, 8u); +} + +static constexpr void* rpsBytePtrInc(void* ptr, size_t size) +{ + return (uint8_t*)ptr + size; +} + +static constexpr const void* rpsBytePtrInc(const void* ptr, size_t size) +{ + return (const uint8_t*)ptr + size; +} + +template +static constexpr T* rpsBytePtrInc(void* ptr, size_t size) +{ + return static_cast(rpsBytePtrInc(ptr, size)); +} + +template ::value || std::is_enum::value) && + (std::is_integral::value || std::is_enum::value)>::type> +constexpr bool rpsAllBitsSet(T value, U bits) +{ + return (((value) & static_cast(bits)) == static_cast(bits)); +} + +template ::value || std::is_enum::value) && + (std::is_integral::value || std::is_enum::value)>::type> +constexpr bool rpsAnyBitsSet(T value, U bits) +{ + return ((value) & static_cast(bits)) != T(0); +} + +namespace rps +{ + +#if RPS_DEBUG && !RPS_DISABLE_MEMORY_DEBUG_FILL +#define RPS_DEBUG_FILL_MEMORY_ON_ALLOC(Ptr, Size) ((void)((Ptr) ? memset((Ptr), 0xBE, (Size)) : nullptr)) +#define RPS_DEBUG_FILL_MEMORY_ON_POOL_ALLOC(Ptr, Size) ((void)((Ptr) ? memset((Ptr), 0xB0, (Size)) : nullptr)) +#define RPS_DEBUG_FILL_MEMORY_ON_POOL_FREE(Ptr, Size) ((void)((Ptr) ? memset((Ptr), 0xF0, (Size)) : nullptr)) +#define RPS_DEBUG_FILL_MEMORY_ON_FREE(Ptr, Size) ((void)((Ptr) ? memset((Ptr), 0xFE, (Size)) : nullptr)) +#else //RPS_DEBUG +#define RPS_DEBUG_FILL_MEMORY_ON_ALLOC(Ptr, Size) +#define RPS_DEBUG_FILL_MEMORY_ON_POOL_ALLOC(Ptr, Size) +#define RPS_DEBUG_FILL_MEMORY_ON_POOL_FREE(Ptr, Size) +#define RPS_DEBUG_FILL_MEMORY_ON_FREE(Ptr, Size) +#endif //RPS_DEBUG + + struct AllocInfo : public RpsAllocInfo + { + public: + AllocInfo() + : AllocInfo(0, 0) + { + } + + AllocInfo(size_t size_, size_t alignment_) + : RpsAllocInfo{size_, alignment_} + { + } + + AllocInfo(const RpsAllocInfo& allocInfo) + : RpsAllocInfo{allocInfo} + { + } + + size_t Append(const AllocInfo& allocInfo) + { + return Append(allocInfo.size, allocInfo.alignment); + } + + size_t Append(size_t size_, size_t alignment_) + { + const size_t offset = rpsAlignUp(size, alignment_); + size = offset + size_; + alignment = rpsMax(alignment, alignment_); + + return offset; + } + + size_t Append(size_t elementSize, size_t numElements, size_t alignment_) + { + return Append(elementSize * numElements, alignment_); + } + + template + size_t Append(size_t count = 1) + { + return Append(sizeof(T) * count, alignof(T)); + } + + template + static AllocInfo FromType(size_t count = 1) + { + return AllocInfo(sizeof(T) * count, alignof(T)); + } + }; + + template + class ArrayRef + { + public: + typedef T value_type; + typedef T* iterator; + typedef const T* const_iterator; + + typedef std::reverse_iterator reverse_iterator; + typedef std::reverse_iterator const_reverse_iterator; + + constexpr ArrayRef() = default; + + ArrayRef(T* pData, SizeType size) + : m_pData(pData) + , m_Size(size) + { + } + + template + ArrayRef(T (&arr)[N]) + : m_pData(arr) + , m_Size(N) + { + } + + template ::type> + ArrayRef(std::initializer_list initList) + : ArrayRef(initList.begin(), SizeType(initList.size())) + { + } + + ArrayRef(const ArrayRef& other) = default; + ArrayRef& operator=(const ArrayRef& other) = default; + + template + operator ArrayRef() const + { + return ArrayRef{ + data(), + static_cast(size()), + }; + } + + template < + typename U, + typename = typename std::enable_if::value && !std::is_same::value>::type> + ArrayRef(const ArrayRef& otherNonConst) + : m_pData(otherNonConst.data()) + , m_Size(otherNonConst.size()) + { + } + + template < + typename U, + typename = typename std::enable_if::value && !std::is_same::value>::type> + ArrayRef& operator=(const ArrayRef& otherNonConst) + { + Set(otherNonConst.data(), otherNonConst.size()); + return *this; + } + + void Set(T* pData, SizeType size) + { + m_pData = pData; + m_Size = size; + } + + template + void Set(T (&pArray)[N]) + { + Set(pArray, N); + } + + T& operator[](SizeType index) + { + RPS_ASSERT(index < size()); + return m_pData[index]; + } + + const T& operator[](SizeType index) const + { + RPS_ASSERT(index < size()); + return m_pData[index]; + } + + void clear() + { + Set(nullptr, 0); + } + + bool empty() const + { + return size() == 0; + } + + T* data() const + { + return m_pData; + } + + SizeType size() const + { + return m_Size; + } + + iterator begin() const + { + return data(); + } + + iterator end() const + { + return data() + size(); + } + + const_iterator cbegin() const + { + return data(); + } + + const_iterator cend() const + { + return data() + size(); + } + + reverse_iterator rbegin() const + { + return reverse_iterator(end()); + } + + reverse_iterator rend() const + { + return reverse_iterator(begin()); + } + + const_reverse_iterator crbegin() const + { + return const_reverse_iterator(cend()); + } + + const_reverse_iterator crend() const + { + return const_reverse_iterator(cbegin()); + } + + const T& front() const + { + return *begin(); + } + + const T& back() const + { + return *rbegin(); + } + + T& front() + { + return *begin(); + } + + T& back() + { + return *rbegin(); + } + + ArrayRef range(SizeType offset, SizeType count) + { + RPS_ASSERT(offset + count <= size()); + return {data() + offset, count}; + } + + ArrayRef range(SizeType offset) + { + RPS_ASSERT(offset <= size()); + return {data() + offset, size() - rpsMin(size(), offset)}; + } + + ArrayRef range(SizeType offset, SizeType count) const + { + RPS_ASSERT(offset + count <= size()); + return {data() + offset, count}; + } + + ArrayRef range(SizeType offset) const + { + RPS_ASSERT(offset <= size()); + return {data() + offset, size() - rpsMin(size(), offset)}; + } + + private: + T* m_pData = nullptr; + SizeType m_Size = 0; + }; + + template + using ConstArrayRef = ArrayRef; + + template + class GeneralAllocator; + + template > + class Vector + { + RPS_CLASS_NO_COPY(Vector); + + public: + typedef T value_type; + typedef T* iterator; + typedef const T* const_iterator; + + typedef std::reverse_iterator reverse_iterator; + typedef std::reverse_iterator const_reverse_iterator; + + static constexpr bool ElementTrivialCopyable = std::is_trivially_copyable::value; + static constexpr bool ElementTrivialDestructible = std::is_trivially_destructible::value; + + Vector() = default; + + Vector(size_t count, const AllocatorT& allocator) + : m_Allocator(allocator) + , m_Count(count) + , m_Capacity(count) + { + m_pArray = AllocNoConstruct(count); + ConstructElements(m_pArray, count); + } + + Vector(const AllocatorT& allocator) + : Vector(0, allocator) + { + } + + Vector(size_t count, const T& value, const AllocatorT& allocator) + : Vector(count, allocator) + { + } + + Vector(ConstArrayRef values, const AllocatorT& allocator) + : Vector(values.size(), allocator) + { + if (!values.empty() && data()) + { + std::copy(values.begin(), values.end(), data()); + } + } + + protected: + Vector(size_t count, size_t capacity, T* pInitialInplaceData, const AllocatorT& allocator) + : m_Allocator(allocator) + , m_Count(count) + , m_Capacity(rpsMax(count, capacity)) + { + m_pArray = (count > capacity) ? AllocNoConstruct(count) : pInitialInplaceData; + ConstructElements(m_pArray, count); + } + + public: + ~Vector() + { + CleanUp(); + } + + Vector(Vector&& other) = default; + + Vector& operator=(Vector&& other) = default; + + Vector Clone() const + { + return Vector(range_all(), *m_Allocator); + } + + template + void emplace_back(TArgs&&... args) + { + ResizeNoConstruct(m_Count + 1); + ConstructElements(&m_pArray[m_Count - 1], 1, std::forward(args)...); + } + + bool empty() const + { + return m_Count == 0; + } + + size_t size() const + { + return m_Count; + } + + size_t capacity() const + { + return m_Capacity; + } + + T* data() + { + return m_pArray; + } + + T& front() + { + RPS_ASSERT(m_Count > 0); + return m_pArray[0]; + } + + T& back() + { + RPS_ASSERT(m_Count > 0); + return m_pArray[m_Count - 1]; + } + + const T* data() const + { + return m_pArray; + } + + const T& front() const + { + RPS_ASSERT(m_Count > 0); + return m_pArray[0]; + } + + const T& back() const + { + RPS_ASSERT(m_Count > 0); + return m_pArray[m_Count - 1]; + } + + iterator begin() + { + return data(); + } + iterator end() + { + return data() + size(); + } + const_iterator cbegin() const + { + return data(); + } + const_iterator cend() const + { + return data() + size(); + } + const_iterator begin() const + { + return cbegin(); + } + const_iterator end() const + { + return cend(); + } + + reverse_iterator rbegin() + { + return reverse_iterator(end()); + } + + reverse_iterator rend() + { + return reverse_iterator(begin()); + } + + const_reverse_iterator rbegin() const + { + return const_reverse_iterator(end()); + } + + const_reverse_iterator rend() const + { + return const_reverse_iterator(begin()); + } + + const_reverse_iterator crbegin() const + { + return const_reverse_iterator(cend()); + } + + const_reverse_iterator crend() const + { + return const_reverse_iterator(cbegin()); + } + + ArrayRef range(size_t startIndex, size_t count) + { + return ArrayRef(data() + startIndex, count); + } + + ConstArrayRef range(size_t startIndex, size_t count) const + { + return ConstArrayRef(data() + startIndex, count); + } + + ArrayRef range_all() + { + return ArrayRef(data(), size()); + } + + ConstArrayRef range_all() const + { + return crange_all(); + } + + ConstArrayRef crange_all() const + { + return ConstArrayRef(data(), size()); + } + + void pop_front() + { + RPS_ASSERT(m_Count > 0); + remove(0); + } + void pop_back() + { + RPS_ASSERT(m_Count > 0); + resize(size() - 1); + } + void push_front(const T& src) + { + insert(0, src); + } + void push_front(T&& src) + { + insert(0, std::forward(src)); + } + + bool push_back(const T& src) + { + const size_t newIndex = size(); + if (!ResizeNoConstruct(newIndex + 1)) + return false; + + ConstructElements(&m_pArray[newIndex], 1, src); + return true; + } + + bool push_back(T&& src) + { + const size_t newIndex = size(); + if (!ResizeNoConstruct(newIndex + 1)) + return false; + + ConstructElements(&m_pArray[newIndex], 1, std::move(src)); + return true; + } + + bool reserve(size_t newCapacity) + { + if (newCapacity > m_Capacity) + { + T* const newArray = newCapacity ? m_Allocator.allocate(newCapacity) : nullptr; + + if (newCapacity && !newArray) + { + RPS_DIAG("Allocation failed"); + return false; + } + + if (m_Count != 0) + { + std::copy(m_pArray, m_pArray + m_Count, newArray); + } + const size_t prevCount = m_Count; + CleanUp(); + Init(newArray, newCapacity, prevCount); + } + + return true; + } + + bool reserve_additional(size_t sizeIncrease) + { + return reserve(size() + sizeIncrease); + } + + T* grow(size_t growCount) + { + if (resize(m_Count + growCount)) + { + return &m_pArray[m_Count - growCount]; + } + return nullptr; + } + + T* grow(size_t growCount, const T& fill) + { + size_t oldCount = m_Count; + if (!ResizeNoConstruct(oldCount + growCount)) + { + return nullptr; + } + + ConstructElements(m_pArray + oldCount, growCount, fill); + + return m_pArray + oldCount; + } + + bool resize(size_t newCount, const T& fill) + { + size_t oldCount = size(); + bool result = ResizeNoConstruct(newCount); + + if (result && (newCount > oldCount)) + { + ConstructElements(m_pArray + oldCount, newCount - oldCount, fill); + } + + return result; + } + + bool resize(size_t newCount) + { + size_t oldCount = m_Count; + + if (!ResizeNoConstruct(newCount)) + return false; + + if (oldCount < newCount) + { + ConstructElements(m_pArray + oldCount, newCount - oldCount); + } + + return true; + } + + void clear() + { + resize(0); + } + + void reset() + { + clear(); + CleanUp(); + } + + void reset(const AllocatorT& newAllocator) + { + clear(); + CleanUp(); + + m_Allocator = newAllocator; + } + + bool reset_keep_capacity(const AllocatorT& newAllocator) + { + const auto oldCapacity = capacity(); + reset(newAllocator); + return reserve(oldCapacity); + } + + bool shrink_to_fit() + { + if (m_Capacity > m_Count) + { + T* newArray = nullptr; + if (m_Count > 0) + { + newArray = m_Allocator.allocate(m_Count); + + if (!newArray) + { + RPS_DIAG("Allocation failed"); + return false; + } + + if (newArray) + { + std::copy(m_pArray, m_pArray + m_Count, newArray); + } + } + const size_t currCount = m_Count; + CleanUp(); + Init(newArray, currCount, currCount); + } + + return true; + } + + bool insert(size_t index, const T& src) + { + RPS_ASSERT(index <= m_Count); + const size_t oldCount = size(); + if (!ResizeNoConstruct(oldCount + 1)) + return false; + + if (index < oldCount) + { + // Copy construct the last element + ConstructElements(m_pArray + oldCount, 1, m_pArray[oldCount - 1]); + // Move the rest by 1 + std::move_backward(m_pArray + index, m_pArray + oldCount - 1, m_pArray + oldCount); + m_pArray[index] = src; + } + else + { + ConstructElements(m_pArray + oldCount, 1, src); + } + + return true; + } + + bool insert(size_t index, T&& src) + { + RPS_ASSERT(index <= m_Count); + const size_t oldCount = size(); + if (!ResizeNoConstruct(oldCount + 1)) + return false; + + if (index < oldCount) + { + // Move construct the last element + ConstructElements(m_pArray + oldCount, 1, std::forward(m_pArray[oldCount - 1])); + // Move the rest by 1 + std::move_backward(m_pArray + index, m_pArray + oldCount - 1, m_pArray + oldCount); + m_pArray[index] = std::move(src); + } + else + { + ConstructElements(m_pArray + oldCount, 1, std::forward(src)); + } + + return true; + } + + bool insert(size_t index, const T* pSrcs, size_t numSrcs) + { + RPS_ASSERT(index <= m_Count); + const size_t oldCount = size(); + if (!ResizeNoConstruct(oldCount + numSrcs)) + return false; + + if (index < oldCount) + { + const size_t constructStart = rpsMax(oldCount, index + numSrcs); + const size_t numToMoveConstruct = oldCount + numSrcs - constructStart; + + // Construct tail new elements + for (auto iDst = rbegin(), dstEnd = rbegin() + numToMoveConstruct, iSrc = rbegin() + numSrcs; + iDst != dstEnd; + ++iDst, ++iSrc) + { + ConstructElements(&*iDst, 1, *iSrc); + } + + std::move_backward(begin() + index, begin() + constructStart - numSrcs, begin() + constructStart); + } + + if (pSrcs) + { + const size_t numCopy = rpsMin(oldCount - index, numSrcs); + + std::copy(pSrcs, pSrcs + numCopy, begin() + index); + + auto iSrc = pSrcs + numCopy; + for (auto iDst = begin() + index + numCopy, dstEnd = begin() + index + numSrcs; iDst != dstEnd; + ++iDst, ++iSrc) + { + ConstructElements(iDst, 1, *iSrc); + } + } + + return true; + } + + T* get_or_grow(size_t index) + { + if ((index >= size()) && !resize(index + 1)) + { + return nullptr; + } + + return data() + index; + } + + T* get_or_grow(size_t index, const T& fill) + { + if ((index >= size()) && !resize(index + 1, fill)) + { + return nullptr; + } + + return data() + index; + } + + void remove(size_t index) + { + RPS_ASSERT(index < m_Count); + const size_t oldCount = size(); + if (index + 1 < oldCount) + { + std::move(m_pArray + (index + 1), m_pArray + oldCount, m_pArray + index); + } + resize(oldCount - 1); + } + + void remove_unordered(size_t index) + { + RPS_ASSERT(index < m_Count); + const size_t oldCount = size(); + if (index < oldCount - 1) + { + std::swap(m_pArray[index], m_pArray[oldCount - 1]); + } + resize(oldCount - 1); + } + + T& operator[](size_t index) + { + RPS_ASSERT(index < m_Count); + return m_pArray[index]; + } + + const T& operator[](size_t index) const + { + RPS_ASSERT(index < m_Count); + return m_pArray[index]; + } + + private: + void Init(T* pArray, size_t capacity, size_t count) + { + m_pArray = pArray; + m_Count = count; + m_Capacity = capacity; + } + + void CleanUp() + { + if (m_pArray) + { + DestructElements(m_pArray, m_Count); + + m_Allocator.deallocate(m_pArray, m_Capacity); + } + m_pArray = nullptr; + m_Count = 0; + m_Capacity = 0; + } + + bool ResizeNoConstruct(size_t newCount) + { + if (newCount < m_Count) + { + DestructElements(m_pArray + newCount, m_Count - newCount); + } + + size_t newCapacity = m_Capacity; + if (newCount > m_Capacity) + { + newCapacity = rpsMax(newCount, rpsMax(m_Capacity * 3 / 2, (size_t)8)); + } + + if (newCapacity != m_Capacity) + { + T* const newArray = newCapacity ? m_Allocator.allocate(newCapacity) : nullptr; + + if (newCapacity && !newArray) + { + RPS_DIAG("Allocation failed"); + return false; + } + + const size_t elementsToCopy = rpsMin(m_Count, newCount); + if (elementsToCopy != 0) + { + auto iSrc = m_pArray; + for (auto iDst = newArray, dstEnd = newArray + elementsToCopy; iDst != dstEnd; ++iDst, ++iSrc) + { + ConstructElements(iDst, 1, std::forward(*iSrc)); + } + } + + CleanUp(); + Init(newArray, newCapacity, newCount); + } + + m_Count = newCount; + return true; + } + + T* AllocNoConstruct(size_t count) + { + return count ? m_Allocator.allocate(count) : nullptr; + } + + template + void ConstructElements(T* pElements, size_t count, Args&&... args) + { + std::for_each(pElements, pElements + count, [&](auto& elem) { + std::allocator_traits::construct(m_Allocator, &elem, std::forward(args)...); + }); + } + + void DestructElements(T* pElements, size_t count) + { + if (!ElementTrivialDestructible) + { + std::for_each(pElements, pElements + count, [&](auto& elem) { + std::allocator_traits::destroy(m_Allocator, &elem); + }); + } + } + + void CopyElements(T* pDst, const T* pSrc, size_t count) + { + std::copy(pSrc, pSrc + count, pDst); + } + + private: + AllocatorT m_Allocator = {}; + T* m_pArray = nullptr; + size_t m_Count = 0; + size_t m_Capacity = 0; + }; + + // Bit Vector + template > + class BitVector + { + public: + static constexpr uint32_t ELEMENT_NUM_BITS = sizeof(TElement) * (CHAR_BIT); + + public: + struct BitIndex + { + uint32_t element; + uint32_t bit; + }; + + BitVector() + { + } + + BitVector(const TAllocator& allocator) + : m_bitVector(allocator) + { + } + + void reset(const TAllocator& allocator) + { + m_bitVector.reset(allocator); + } + + const Vector& GetVector() const + { + return m_bitVector; + } + + size_t size() const + { + return m_BitSize; + } + + bool Resize(size_t numBits) + { + if (m_bitVector.resize((numBits + ELEMENT_NUM_BITS - 1) / ELEMENT_NUM_BITS)) + { + m_BitSize = numBits; + return true; + } + + return false; + } + + bool Resize(size_t numBits, bool bSet) + { + const size_t oldBits = m_BitSize; + + if (!Resize(numBits)) + return false; + + Fill(oldBits, numBits, bSet); + + return true; + } + + void Fill(bool bSet) + { + std::fill(m_bitVector.begin(), m_bitVector.end(), bSet ? ~TElement(0) : 0); + } + + void Fill(size_t beginBitIndex, size_t endBitIndex, bool bSet) + { + const size_t fullElemBegin = rpsDivRoundUp(beginBitIndex, size_t(ELEMENT_NUM_BITS)); + const size_t fullElemEnd = endBitIndex / ELEMENT_NUM_BITS; + + const size_t beginSubElemBits = (beginBitIndex % ELEMENT_NUM_BITS); + if (beginSubElemBits != 0) + { + const TElement keepMask = (TElement(1) << beginSubElemBits) - 1; + TElement& beginElem = m_bitVector[fullElemBegin - 1]; + beginElem = bSet ? (beginElem | ~keepMask) : (beginElem & keepMask); + } + + std::fill(m_bitVector.begin() + fullElemBegin, m_bitVector.begin() + fullElemEnd, bSet ? ~TElement(0) : 0); + + const size_t endSubElemBits = endBitIndex % ELEMENT_NUM_BITS; + if (endSubElemBits != 0) + { + const TElement fillMask = (TElement(1) << endSubElemBits) - 1; + TElement& endElem = m_bitVector[fullElemEnd]; + endElem = bSet ? (endElem | fillMask) : (endElem & ~fillMask); + } + } + + constexpr bool GetBit(size_t index) const + { + RPS_ASSERT(index < m_BitSize); + + const size_t elementIdx = index / ELEMENT_NUM_BITS; + const size_t bitIndex = index % ELEMENT_NUM_BITS; + + return !!(m_bitVector[elementIdx] & (TElement(1) << bitIndex)); + } + + constexpr bool ExchangeBit(size_t index, bool newValue) + { + RPS_ASSERT(index < m_BitSize); + + const size_t elementIdx = index / ELEMENT_NUM_BITS; + const size_t bitIndex = index % ELEMENT_NUM_BITS; + + TElement& dstMask = m_bitVector[elementIdx]; + TElement mask = (TElement(1) << bitIndex); + + bool oldValue = !!(dstMask & mask); + dstMask = newValue ? (dstMask | mask) : (dstMask & ~mask); + + return oldValue; + } + + void SetBit(size_t index, bool value) + { + RPS_ASSERT(index < m_BitSize); + + const size_t elementIdx = index / ELEMENT_NUM_BITS; + const size_t bitIndex = index % ELEMENT_NUM_BITS; + + TElement& dstMask = m_bitVector[elementIdx]; + TElement mask = (TElement(1) << bitIndex); + dstMask = value ? (dstMask | mask) : (dstMask & ~mask); + } + + void SetBit(BitIndex index, bool value) + { + RPS_ASSERT(index < m_BitSize); + + TElement& dstMask = m_bitVector[index.element]; + TElement mask = (TElement(1) << index.bit); + dstMask = value ? (dstMask | mask) : (dstMask & ~mask); + } + + BitIndex FindFirstBitLow(size_t startElement) const + { + for (size_t i = startElement, end = m_bitVector.size(); i < end; i++) + { + if (m_bitVector[i]) + { + return BitIndex{i, rpsFirstBitLow(m_bitVector[i])}; + } + } + + return BitIndex{RPS_INDEX_NONE_U32, RPS_INDEX_NONE_U32}; + } + + private: + Vector m_bitVector; + size_t m_BitSize = 0; + }; + + template + constexpr uint32_t BitVector::ELEMENT_NUM_BITS; + + struct StrRef + { + const char* str; + size_t len; + + constexpr StrRef() + : str(nullptr) + , len(0) + { + } + + constexpr StrRef(const char* cStr) + : str(cStr) + , len(cStr ? strlen(cStr) : 0) + { + } + + constexpr StrRef(const char* inStr, size_t inLen) + : str(inStr) + , len(inLen) + { + } + + template + static constexpr StrRef From(const char (&charArray)[Len]) + { + return StrRef{charArray, Len - 1}; + } + + bool empty() const + { + return len == 0; + } + + operator bool() const + { + return !empty(); + } + + void Print(const RpsPrinter& printer) const + { + printer.pfnPrintf(printer.pContext, "%.*s", len, str); + } + + bool ToCStr(char* dstBuf, size_t bufSize) const + { + size_t copyLen = (bufSize > 0) ? rpsMin(bufSize - 1, len) : 0; + + if (copyLen) + { + memcpy(dstBuf, str, copyLen); + } + + if (bufSize > 0) + { + dstBuf[copyLen] = '\0'; + } + + return (copyLen == len); + } + + bool operator==(const StrRef& other) const + { + return (len == other.len) && (0 == strncmp(str, other.str, len)); + } + + bool operator!=(const StrRef& other) const + { + return !(*this == other); + } + }; + + // TODO: + // - add more string types for append and construct. + // - add ability to resize over current terminating behaviour. + // - add a str() function returning StrRef, or allow implicit cast to StrRef. + template + class StrBuilder + { + char m_buf[Capacity]; + size_t m_spaceLeft; // includes the terminator of the existing string + + public: + constexpr StrBuilder() + : m_buf{} + , m_spaceLeft(Capacity) + { + } + + explicit StrBuilder(const char* str) + : StrBuilder() + { + Append(str); + } + + StrBuilder& Reset() + { + m_buf[0] = '\0'; + m_spaceLeft = Capacity; + + return *this; + } + + StrBuilder& AppendFormatV(const char* fmt, va_list vl) + { + if (m_spaceLeft <= 1) + return *this; + + const int written = vsnprintf(m_buf + Length(), m_spaceLeft, fmt, vl); + + if (written >= 0) + { + m_spaceLeft = (size_t(written) > (m_spaceLeft - 1)) ? 1 : (m_spaceLeft - written); + } + + return *this; + } + + StrBuilder& AppendFormat(const char* fmt, ...) + { + if (m_spaceLeft <= 1) + return *this; + + va_list vl; + va_start(vl, fmt); + AppendFormatV(fmt, vl); + + va_end(vl); + + return *this; + } + + StrBuilder& Append(StrRef strRef) + { + if (m_spaceLeft <= 1) + return *this; + + const size_t copyLen = rpsMin(m_spaceLeft - 1, strRef.len); + memcpy(m_buf + Length(), strRef.str, copyLen); + m_buf[Length() + copyLen] = '\0'; + m_spaceLeft = (strRef.len > (m_spaceLeft - 1)) ? 1 : (m_spaceLeft - strRef.len); + + return *this; + } + + StrBuilder& PopBack(size_t lengthToPop) + { + m_spaceLeft = rpsMin(m_spaceLeft + lengthToPop, Capacity); + m_buf[Length()] = '\0'; + return *this; + } + + StrBuilder& operator+=(const char* str) + { + return Append(str); + } + + const char* c_str() const + { + return m_buf; + } + + StrRef GetStr() const + { + return StrRef{m_buf, Length()}; + } + + size_t Length() const + { + return Capacity - m_spaceLeft; + } + + RpsPrinter AsPrinter() + { + RpsPrinter printer = {}; + + printer.pContext = this; + printer.pfnPrintf = &PrintfCb; + printer.pfnVPrintf = &VPrintfCb; + + return printer; + } + + StrBuilder& operator=(const StrBuilder& other) + { + return Reset().Append(other.GetStr()); + } + + private: + typedef StrBuilder Self; + + static void PrintfCb(void* pUserContext, const char* format, ...) + { + Self* pThis = static_cast(pUserContext); + if (pThis) + { + va_list vl; + va_start(vl, format); + pThis->AppendFormatV(format, vl); + va_end(vl); + } + } + + static void VPrintfCb(void* pUserContext, const char* format, va_list vl) + { + Self* pThis = static_cast(pUserContext); + if (pThis) + { + pThis->AppendFormatV(format, vl); + } + } + }; + + template + class Span + { + public: + using size_type = SizeType; + + constexpr Span() + : Span(0, 0) + { + } + + constexpr Span(SizeType offset, SizeType count) + : m_offset(offset) + , m_count(count) + { + } + + template + ArrayRef Get(TCollection& collection) const + { + return ArrayRef{collection.begin() + m_offset, m_count}; + } + + template + ConstArrayRef GetConstRef(TCollection& collection) const + { + return ConstArrayRef{collection.begin() + m_offset, m_count}; + } + + template + ConstArrayRef Get(const TCollection& collection) const + { + return ConstArrayRef{collection.cbegin() + m_offset, m_count}; + } + + void SetRange(SizeType offset, SizeType count) + { + m_offset = offset; + m_count = count; + } + + void SetCount(SizeType count) + { + m_count = count; + } + + void SetEnd(SizeType endIndex) + { + RPS_ASSERT(endIndex >= m_offset); + SetCount(endIndex - m_offset); + } + + SizeType GetBegin() const + { + return m_offset; + } + + SizeType GetEnd() const + { + return m_offset + m_count; + } + + SizeType size() const + { + return m_count; + } + + bool empty() const + { + return m_count == 0; + } + + private: + SizeType m_offset; + SizeType m_count; + }; + + template , + typename = + typename std::enable_if= sizeof(uint32_t) && std::is_trivially_destructible::value && + std::is_trivially_copyable::value>::type> + class SpanPool + { + public: + SpanPool(TContainer& container) + : m_container(container) + { + reset(); + } + + void reset() + { + std::fill(std::begin(m_freeLists), std::end(m_freeLists), UINT32_MAX); + } + + void alloc_span(Span& span, uint32_t count) + { + if (count != 0) + span = AllocSpan(rpsRoundUpToPowerOfTwo(count)); + else + span = {0, 0}; + } + + void push_to_span(Span& span, const T& newElement) + { + // Implicit capacity is the next pow of 2 + if (rpsIsPowerOfTwo(span.size())) + { + uint32_t newSize = span.size() ? (span.size() << 1) : 1; + + auto newSpan = AllocSpan(newSize); + auto oldData = span.Get(m_container); + std::copy(oldData.begin(), oldData.end(), newSpan.Get(m_container).begin()); + + free_span(span); + + span = {newSpan.GetBegin(), oldData.size()}; + } + + span.SetCount(span.size() + 1); + span.Get(m_container).back() = newElement; + } + + void free_span(Span& span) + { + if (!span.empty()) + { + const uint32_t freeListIndex = rpsFirstBitLow(span.size()); + + *reinterpret_cast(&span.Get(m_container)[0]) = m_freeLists[freeListIndex]; + + m_freeLists[freeListIndex] = span.GetBegin(); + } + span = {0, 0}; + } + + private: + Span AllocSpan(uint32_t size) + { + const uint32_t freeListIndex = rpsFirstBitLow(size); + const uint32_t freeLoc = m_freeLists[freeListIndex]; + + if (freeLoc != UINT32_MAX) + { + const uint32_t nextFree = *reinterpret_cast(&m_container[freeLoc]); + m_freeLists[freeListIndex] = nextFree; + return {freeLoc, size}; + } + else if (m_container.grow(size)) + { + return {uint32_t(m_container.size() - size), size}; + } + return {0, 0}; + } + + private: + TContainer& m_container; + uint32_t m_freeLists[32]; + }; + + class Arena + { + RPS_CLASS_NO_COPY_MOVE(Arena); + + struct Block + { + struct Block* pNext; + size_t size; + }; + + static const size_t DEFAULT_BLOCK_SIZE = 65500; + static const size_t DEFAULT_ALIGNMENT = alignof(max_align_t); + + size_t m_blockSize = DEFAULT_BLOCK_SIZE; + Block* m_pBlocks = {}; + void* m_pCurrBufferPos = {}; + size_t m_currBufferSize = 0; + Block* m_pFreeBlocks = {}; + const RpsAllocator m_allocCbs = {}; + + public: + struct CheckPoint + { + void* pBlock; + size_t remainingSize; + }; + + public: + Arena(const RpsAllocator& parentAllocator, size_t defaultBlockSize = 0) + : m_blockSize(defaultBlockSize ? defaultBlockSize : DEFAULT_BLOCK_SIZE) + , m_allocCbs(parentAllocator) + { + } + + ~Arena() + { + FreeBlockList(m_pBlocks); + FreeBlockList(m_pFreeBlocks); + } + + void* Alloc(size_t size) + { + return AlignedAlloc(size, DEFAULT_ALIGNMENT); + } + + void* AlignedAlloc(size_t size, size_t alignment) + { + size_t paddingSize = rpsPaddingSize(m_pCurrBufferPos, alignment); + + if (paddingSize + size > m_currBufferSize) + { + if (RPS_SUCCEEDED(AllocBlock(size + alignment))) + { + paddingSize = rpsPaddingSize(m_pCurrBufferPos, alignment); + } + else + { + return nullptr; + } + } + + void* pAllocated = rpsBytePtrInc(m_pCurrBufferPos, paddingSize); + + RPS_DEBUG_FILL_MEMORY_ON_POOL_ALLOC(pAllocated, size); + + m_pCurrBufferPos = rpsBytePtrInc(pAllocated, size); + m_currBufferSize -= paddingSize + size; + + return pAllocated; + } + + void* Realloc(void* ptr, size_t oldSize, size_t newSize) + { + return AlignedRealloc(ptr, oldSize, newSize, DEFAULT_ALIGNMENT); + } + + void* AlignedRealloc(void* pOldBuffer, size_t oldSize, size_t newSize, size_t alignment) + { + RPS_ASSERT(rpsIsPointerAlignedTo(pOldBuffer, alignment)); + + // ptr is the last allocation, try extending without realloc + if (pOldBuffer && (rpsBytePtrInc(pOldBuffer, oldSize) == m_pCurrBufferPos) && + (newSize <= (oldSize + m_currBufferSize))) + { + RPS_DEBUG_FILL_MEMORY_ON_POOL_ALLOC(rpsBytePtrInc(pOldBuffer, oldSize), + (newSize > oldSize) ? (newSize - oldSize) : 0); + RPS_DEBUG_FILL_MEMORY_ON_POOL_FREE(rpsBytePtrInc(pOldBuffer, newSize), + (oldSize > newSize) ? (oldSize - newSize) : 0); + + m_pCurrBufferPos = rpsBytePtrInc(pOldBuffer, newSize); + m_currBufferSize = m_currBufferSize + oldSize - newSize; + + return pOldBuffer; + } + else if (newSize < oldSize) + { + RPS_DEBUG_FILL_MEMORY_ON_POOL_FREE(rpsBytePtrInc(pOldBuffer, oldSize), oldSize - newSize); + + // Not the last allocation, but can reuse old buffer for now. + return pOldBuffer; + } + + void* pNewBuffer = AlignedAlloc(newSize, alignment); + + RPS_DEBUG_FILL_MEMORY_ON_POOL_ALLOC(pNewBuffer, newSize); + RPS_DEBUG_FILL_MEMORY_ON_POOL_ALLOC(pOldBuffer, oldSize); + + if (pOldBuffer && pNewBuffer) + { + memcpy(pNewBuffer, pOldBuffer, oldSize); + } + + return pNewBuffer; + } + + void* AllocZeroed(size_t size) + { + return AlignedAllocZeroed(size, DEFAULT_ALIGNMENT); + } + + void* AlignedAllocZeroed(size_t size, size_t alignment) + { + void* pAllocated = AlignedAlloc(size, alignment); + if (pAllocated) + { + memset(pAllocated, 0, size); + } + return pAllocated; + } + + CheckPoint GetCheckPoint() const + { + return CheckPoint{m_pBlocks, m_currBufferSize}; + } + + void ResetToCheckPoint(const CheckPoint& checkPoint) + { + Block* pFreeBlocks = m_pFreeBlocks; + Block* pBlock = m_pBlocks; + + while ((pBlock != nullptr) && (pBlock != checkPoint.pBlock)) + { + Block* pNextBlock = pBlock->pNext; + + RPS_DEBUG_FILL_MEMORY_ON_POOL_FREE(rpsBytePtrInc(pBlock, sizeof(Block)), + (pBlock->size - sizeof(Block))); + + pBlock->pNext = pFreeBlocks; + pFreeBlocks = pBlock; + pBlock = pNextBlock; + } + + m_pFreeBlocks = pFreeBlocks; + m_pBlocks = pBlock; + + if (pBlock) + { + m_pCurrBufferPos = rpsBytePtrInc(pBlock, pBlock->size - checkPoint.remainingSize); + m_currBufferSize = checkPoint.remainingSize; + + RPS_DEBUG_FILL_MEMORY_ON_POOL_FREE(m_pCurrBufferPos, m_currBufferSize); + } + else + { + m_pCurrBufferPos = nullptr; + m_currBufferSize = 0; + } + } + + void Reset() + { + ResetToCheckPoint({nullptr, 0}); + } + + StrRef StoreCStr(const char* s) + { + if (!s) + { + return StrRef{}; + } + + const size_t len = strlen(s); + return StoreStr(StrRef{s, len}); + } + + StrRef StoreStr(const StrRef& s) + { + if (s.empty()) + { + return s; + } + + char* const pDst = static_cast(AlignedAlloc(s.len + 1, 1)); + + if (pDst) + { + memcpy(pDst, s.str, s.len); + pDst[s.len] = '\0'; + } + + return StrRef{pDst, s.len}; + } + + const void* StoreData(const void* pData, size_t size) + { + return StoreDataAligned(pData, size, DEFAULT_ALIGNMENT); + } + + const void* StoreDataAligned(const void* pData, size_t size, size_t alignment) + { + char* pDst = (char*)AlignedAlloc(size, DEFAULT_ALIGNMENT); + + if (pDst) + { + memcpy(pDst, pData, size); + } + + return pDst; + } + + bool HasFreeBlocks() const + { + return m_pFreeBlocks != nullptr; + } + + template ::value>::type> + T* New() + { + return static_cast(AlignedAlloc(sizeof(T), alignof(T))); + } + + template + void* Alloc() + { + return AlignedAlloc(sizeof(T), alignof(T)); + } + + template ::value>::type> + ArrayRef NewArray(size_t count) + { + if (count > 0) + { + T* p = static_cast(AlignedAlloc(count * sizeof(T), alignof(T))); + return {p, p ? count : 0}; + } + + return {nullptr, 0}; + } + + template ::value>::type> + ArrayRef NewArrayZeroed(size_t count) + { + auto result = NewArray(count); + if (!result.empty()) + { + memset(result.data(), 0, result.size() * sizeof(T)); + } + return result; + } + + template + T* New(TCtorArgs&&... args) + { + void* pBuffer = AlignedAlloc(sizeof(T), alignof(T)); + new (pBuffer) T(args...); + return static_cast(pBuffer); + } + + template + void Delete(T* p) + { + p->~T(); + } + + template + ArrayRef NewArray( + typename std::enable_if::value && !std::is_trivial::value, + size_t>::type count, + TCtorArgs&&... args) + { + T* pBuffer = static_cast(AlignedAlloc(count * sizeof(T), alignof(T))); + for (T *pIter = pBuffer, *pEnd = (pBuffer + count); pIter != pEnd; ++pIter) + { + new (pIter) T(args...); + } + return {pBuffer, pBuffer ? count : 0}; + } + + template + ArrayRef NewArray( + TCtor ctor, + typename std::enable_if::value && !std::is_trivial::value, + size_t>::type count) + { + T* pBuffer = static_cast(AlignedAlloc(count * sizeof(T), alignof(T))); + for (size_t idx = 0; idx < count; ++idx) + { + ctor(idx, pBuffer + idx); + } + return {pBuffer, pBuffer ? count : 0}; + } + + RpsAllocator AsRpsAllocator() + { + RpsAllocator result; + result.pfnAlloc = &AllocatorAllocCb; + result.pfnFree = &AllocatorFreeCb; + result.pfnRealloc = &AllocatorReallocCb; + result.pContext = this; + + return result; + } + + private: + static void* AllocatorAllocCb(void* pUserContext, size_t size, size_t alignment) + { + return static_cast(pUserContext)->AlignedAlloc(size, alignment); + } + + static void* AllocatorReallocCb( + void* pUserContext, void* oldBuffer, size_t oldSize, size_t newSize, size_t alignment) + { + return static_cast(pUserContext)->AlignedRealloc(oldBuffer, oldSize, newSize, alignment); + } + + static void AllocatorFreeCb(void* pUserContext, void* buffer) + { + } + + private: + RpsResult AllocBlock(size_t minSize) + { + const size_t requiredBlockSize = minSize + sizeof(Block); + + m_blockSize = rpsMax(m_blockSize, requiredBlockSize); + + Block* pNewBlock = nullptr; + + if (m_pFreeBlocks && (m_pFreeBlocks->size >= requiredBlockSize)) + { + pNewBlock = m_pFreeBlocks; + m_pFreeBlocks = pNewBlock->pNext; + } + else + { + void* pNewBuffer = m_allocCbs.pfnAlloc(m_allocCbs.pContext, m_blockSize, alignof(Block)); + + RPS_CHECK_ALLOC(pNewBuffer); + + RPS_DEBUG_FILL_MEMORY_ON_ALLOC(pNewBuffer, m_blockSize); + + pNewBlock = static_cast(pNewBuffer); + pNewBlock->size = m_blockSize; + } + + pNewBlock->pNext = m_pBlocks; + m_pBlocks = pNewBlock; + + m_pCurrBufferPos = rpsBytePtrInc(pNewBlock, sizeof(Block)); + m_currBufferSize = pNewBlock->size - sizeof(Block); + + return RPS_OK; + } + + void FreeBlockList(Block* pBlockList) + { + Block* pNext = nullptr; + for (Block* pBlock = pBlockList; pBlock != NULL; pBlock = pNext) + { + pNext = pBlock->pNext; + + RPS_DEBUG_FILL_MEMORY_ON_FREE(pBlock, pBlock->size); + + m_allocCbs.pfnFree(m_allocCbs.pContext, pBlock); + } + } + }; + + class RPS_NO_DISCARD ArenaCheckPoint + { + RPS_CLASS_NO_COPY_MOVE(ArenaCheckPoint); + + Arena* m_arena = nullptr; + Arena::CheckPoint m_checkpoint = {}; + + public: + ArenaCheckPoint(Arena& arena) + : m_arena(&arena) + , m_checkpoint(m_arena->GetCheckPoint()) + { + } + + ~ArenaCheckPoint() + { + if (m_arena) + { + m_arena->ResetToCheckPoint(m_checkpoint); + } + } + }; + + template + class ArenaAllocator + { + public: + using value_type = T; + + ArenaAllocator() = default; + + template + ArenaAllocator(ArenaAllocator const& other) noexcept + : m_pArena(other.m_pArena) + { + } + + ArenaAllocator(Arena* pArena) + : m_pArena(pArena) + { + } + + value_type* allocate(size_t n) + { + return static_cast(m_pArena->AlignedAlloc(n * sizeof(T), alignof(T))); + } + + value_type* allocate(size_t n, const void* hint) + { + return allocate(n); + } + + void deallocate(value_type* p, size_t n) + { + } + + template + bool operator==(ArenaAllocator const& rhs) const noexcept + { + return m_pArena == rhs.m_pArena; + } + + template + bool operator!=(ArenaAllocator const& rhs) const noexcept + { + return !(*this == rhs); + } + + private: + Arena* m_pArena = {}; + }; + + template + class GeneralAllocator + { + public: + using value_type = T; + + GeneralAllocator() = default; + + template + GeneralAllocator(GeneralAllocator const& other) noexcept + : m_pCallbacks(other.m_pCallbacks) + { + } + + GeneralAllocator(const RpsAllocator* pCallbacks) + : m_pCallbacks(pCallbacks) + { + } + + template + GeneralAllocator& operator=(GeneralAllocator const& other) noexcept + { + m_pCallbacks = other.m_pCallbacks; + return *this; + } + + value_type* allocate(size_t n) + { + return static_cast(m_pCallbacks->pfnAlloc(m_pCallbacks->pContext, n * sizeof(T), alignof(T))); + } + + value_type* allocate(size_t n, const void* hint) + { + return allocate(n); + } + + void deallocate(value_type* p, size_t n) + { + m_pCallbacks->pfnFree(m_pCallbacks->pContext, p); + } + + template + bool operator==(GeneralAllocator const& rhs) const noexcept + { + return m_pCallbacks == rhs.m_pCallbacks; + } + + template + bool operator!=(GeneralAllocator const& rhs) const noexcept + { + return !(*this == rhs); + } + + private: + const RpsAllocator* m_pCallbacks = {}; + }; + + template + using ArenaVector = Vector>; + + template + using ArenaBitVector = BitVector>; + + template + class InplaceAllocator + { + public: + using value_type = T; + + InplaceAllocator() = default; + + template + InplaceAllocator(InplaceAllocator const& other) noexcept + : m_dynamicAllocator(other.m_dynamicAllocator) + , m_pStaticArray(other.m_pStaticArray) + { + } + + template + InplaceAllocator(const TDynamicAllocator& dynamicAllocator, const T* pInplaceArray) + : m_dynamicAllocator(dynamicAllocator) + , m_pStaticArray(pInplaceArray) + { + } + + template + InplaceAllocator& operator=(InplaceAllocator const& other) noexcept + { + m_dynamicAllocator = other.m_dynamicAllocator; + m_pStaticArray = other.m_pStaticArray; + return *this; + } + + value_type* allocate(size_t n) + { + return m_dynamicAllocator.allocate(n); + } + + value_type* allocate(size_t n, const void* hint) + { + return allocate(n); + } + + void deallocate(value_type* p, size_t n) + { + if (p != m_pStaticArray) + { + m_dynamicAllocator.deallocate(p, n); + } + } + + template + bool operator==(InplaceAllocator const& rhs) const noexcept + { + return (m_dynamicAllocator == rhs.m_dynamicAllocator) && (m_pStaticArray == rhs.m_pStaticArray); + } + + template + bool operator!=(InplaceAllocator const& rhs) const noexcept + { + return !(*this == rhs); + } + + private: + GeneralAllocator m_dynamicAllocator; + const T* m_pStaticArray = nullptr; + }; + + template > + class InplaceVector : public Vector> + { + RPS_CLASS_NO_COPY(InplaceVector); + + public: + InplaceVector() = default; + + InplaceVector(size_t count, const AllocatorT& allocator) + : Vector>(count, + InplaceSize, + reinterpret_cast(m_staticData), + InplaceAllocator(allocator, reinterpret_cast(m_staticData))) + { + } + + InplaceVector(const AllocatorT& allocator) + : InplaceVector(0, allocator) + { + } + + InplaceVector(size_t count, const T& value, const AllocatorT& allocator) + : InplaceVector(count, allocator) + { + } + + InplaceVector(ConstArrayRef values, const AllocatorT& allocator) + : InplaceVector(values.size(), allocator) + { + } + + private: + alignas(T) uint8_t m_staticData[InplaceSize * sizeof(T)]; + }; + + namespace details + { + template + union FreeListPoolSlot + { + T value; + uint32_t nextFree; + + FreeListPoolSlot() + { + new (&value) T(); + } + }; + } // namespace details + + template >, + typename = typename std::enable_if::value, T>::type> + class FreeListPool + { + public: + FreeListPool() = default; + + FreeListPool(const AllocatorT& allocator) + : m_vector(allocator) + { + } + + uint32_t AllocSlot(T** ppValue = nullptr) + { + uint32_t result = m_freeList; + + if (result != UINT32_MAX) + { + if (ppValue) + { + *ppValue = &m_vector[result].value; + } + m_freeList = m_vector[result].nextFree; + + new (&m_vector[result].value) T(); + } + else + { + size_t slotIndex = m_vector.size(); + auto* pSlot = m_vector.grow(1); + + if (pSlot) + { + result = uint32_t(slotIndex); + + if (ppValue) + { + *ppValue = &pSlot->value; + } + } + } + + return result; + } + + void FreeSlot(uint32_t slot) + { + m_vector[slot].nextFree = m_freeList; + m_freeList = slot; + } + + T* GetSlot(uint32_t slot) + { + return &m_vector[slot].value; + } + + const T* GetSlot(uint32_t slot) const + { + return &m_vector[slot].value; + } + + void Reset(const AllocatorT& allocator) + { + m_vector.reset(allocator); + m_freeList = UINT32_MAX; + } + + private: + Vector, AllocatorT> m_vector; + + uint32_t m_freeList = UINT32_MAX; + }; + + template + using ArenaFreeListPool = rps::FreeListPool>>; + + namespace details + { + template + struct FieldInfo + { + }; + + template + struct FieldInfo + { + T** ppData = nullptr; + size_t offset = 0; + + void Init(AllocInfo& allocInfo, T** pPtr) + { + ppData = pPtr; + offset = allocInfo.Append(); + } + + void Resolve(void* pMemory) + { + *ppData = static_cast(rpsBytePtrInc(pMemory, offset)); + } + }; + + template + struct FieldInfo> + { + T** ppData = nullptr; + size_t offset = 0; + + void Init(AllocInfo& allocInfo, std::pair arg) + { + ppData = arg.first; + offset = allocInfo.Append(arg.second); + } + + void Resolve(void* pMemory) + { + *ppData = rpsBytePtrInc(pMemory, offset); + } + }; + + template + struct FieldInfo*, TSize>> + { + ArrayRef* pArray = nullptr; + size_t offset = 0; + size_t count = 0; + + void Init(AllocInfo& allocInfo, std::pair*, TSize> arg) + { + pArray = arg.first; + count = arg.second; + offset = allocInfo.Append(arg.second); + } + + void Resolve(void* pMemory) + { + pArray->Set(rpsBytePtrInc(pMemory, offset), count); + } + }; + + template + struct CompoundAllocInfo : AllocInfo + { + std::tuple...> fields; + + CompoundAllocInfo(TArgs... args) + { + Init<0>(args...); + } + + void Resolve(void* pMemory) + { + Resolve<0, TArgs...>(pMemory); + } + + private: + template + void Init(TArg0 arg0, TArgX... argx) + { + std::get(fields).Init(*this, arg0); + + Init(argx...); + } + + template + void Init() + { + } + + template + void Resolve(void* pMemory) + { + std::get(fields).Resolve(pMemory); + + Resolve(pMemory); + } + + template + void Resolve(void* pMemory) + { + } + }; + } // namespace details + + template + std::pair CompoundEntry(T** ppEntry) + { + return std::make_pair(ppEntry, AllocInfo::FromType()); + } + + template + std::pair CompoundEntry(T** ppEntry, const AllocInfo& allocInfo) + { + return std::make_pair(ppEntry, allocInfo); + } + + template + std::pair*, TSize> CompoundEntry(ArrayRef* pArray, size_t count = 1) + { + return std::make_pair(pArray, count); + } + + template + void* AllocateCompound(const RpsAllocator& allocator, TArgs... args) + { + details::CompoundAllocInfo layout(args...); + + void* pMemory = allocator.pfnAlloc(allocator.pContext, layout.size, layout.alignment); + + if (pMemory) + { + layout.Resolve(pMemory); + } + + return pMemory; + } + + static inline void* Allocate(const RpsAllocator& allocator, const AllocInfo& allocInfo) + { + return allocator.pfnAlloc(allocator.pContext, allocInfo.size, allocInfo.alignment); + } + + static inline void Free(const RpsAllocator& allocator, void* pMemory) + { + allocator.pfnFree(allocator.pContext, pMemory); + } + + template + struct TResult + { + RpsResult code = RPS_ERROR_UNSPECIFIED; + T data = {}; + + TResult() + { + } + + TResult(RpsResult inCode, T inData = {}) + : code(inCode) + , data(inData) + { + } + + TResult(T value) + : code(RPS_OK) + , data(value) + { + } + + bool IsSucceeded() const + { + return RPS_SUCCEEDED(code); + } + + operator bool() const + { + return IsSucceeded(); + } + + operator const T&() const + { + return data; + } + + RpsResult Result() const + { + return code; + } + + template + TResult StaticCast() const + { + return TResult(code, static_cast(data)); + } + }; + + template + static inline TResult MakeResult(T value, RpsResult errorCode) + { + return TResult(errorCode, value); + } + + template + struct NameValuePair + { + StrRef name; + TValue value; + }; + +#define RPS_INIT_NAME_VALUE_PAIR(Value) \ + { \ + StrRef(#Value), Value \ + } + +#define RPS_INIT_NAME_VALUE_PAIR_PREFIXED(ValuePrefix, Name) \ + { \ + StrRef(#Name), ValuePrefix##Name \ + } + +#define RPS_INIT_NAME_VALUE_PAIR_PREFIXED_POSTFIXED(ValuePrefix, Name, ValuePostfix) \ + { \ + StrRef(#Name), ValuePrefix##Name##ValuePostfix \ + } + + class PrinterRef + { + public: + PrinterRef(const RpsPrinter& printer) + : m_printer(printer) + { + } + + operator const RpsPrinter&() const + { + return m_printer; + } + + template + const PrinterRef& operator()(const char* fmt, T... args) const + { + m_printer.pfnPrintf(m_printer.pContext, fmt, args...); + return *this; + } + + const PrinterRef& operator()(const StrRef str) const + { + m_printer.pfnPrintf(m_printer.pContext, "%.*s", str.len, str.str); + return *this; + } + + template + const PrinterRef& PrintFlags(TValue value, + const NameValuePair (&names)[Count], + StrRef separator = " | ", + StrRef defaultString = "NONE") const + { + return PrintFlags(value, ConstArrayRef>(names), separator, defaultString); + } + + template + const PrinterRef& PrintFlags(TValue value, + ConstArrayRef> names, + StrRef separator = " | ", + StrRef defaultString = "NONE") const + { + bool bFirstMatch = true; + + for (auto& nameValuePair : names) + { + if (value == nameValuePair.value) + { + (*this)(nameValuePair.name); + break; + } + else if (value & nameValuePair.value) + { + if (!bFirstMatch) + { + (*this)(separator); + } + (*this)(nameValuePair.name); + + bFirstMatch = false; + } + } + + if (bFirstMatch) + { + (*this)(defaultString); + } + + return *this; + } + + template + const PrinterRef& PrintValueName(TValue value, const NameValuePair (&names)[Count]) const + { + return PrintValueName(value, ConstArrayRef>(names)); + } + + template + const PrinterRef& PrintValueName(TValue value, ConstArrayRef> names) const + { + for (auto& nameValuePair : names) + { + if (value == nameValuePair.value) + { + (*this)(nameValuePair.name); + break; + } + } + + return *this; + } + + private: + const RpsPrinter& m_printer; + }; + + /// @brief Util for setting a pointer to a given value temporarily, and restore it when going out of scope with RAAI + template ::value>::type> + struct ScopedContext + { + ScopedContext(T* pPrev, T curr) + : m_pPrev(pPrev) + , m_prevValue(*pPrev) + { + *pPrev = curr; + } + + ~ScopedContext() + { + *m_pPrev = m_prevValue; + } + + private: + T* const m_pPrev; + T m_prevValue; + + RPS_CLASS_NO_COPY_MOVE(ScopedContext); + }; + +} // namespace rps + +#endif // _RPS_UTIL_HPP_ diff --git a/src/frontend/rps_builder.cpp b/src/frontend/rps_builder.cpp new file mode 100644 index 0000000..5f5a248 --- /dev/null +++ b/src/frontend/rps_builder.cpp @@ -0,0 +1,137 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "rps/frontend/rps_builder.h" + +#include "core/rps_util.hpp" +#include "core/rps_device.hpp" +#include "runtime/common/rps_cmd_buf.hpp" +#include "runtime/common/rps_subprogram.hpp" +#include "runtime/common/rps_render_graph.hpp" +#include "runtime/common/rps_render_graph_builder.hpp" + +namespace rps +{ + RenderGraphBuilderRef::RenderGraphBuilderRef(RpsRenderGraphBuilder builder) + : m_builder(*FromHandle(builder)) + { + } + + void* RenderGraphBuilderRef::AllocateData(size_t size, size_t alignment) const + { + return m_builder.AllocateData(size, alignment); + } + + RpsNodeDeclId RenderGraphBuilderRef::DeclNode(const RpsNodeDesc& nodeDesc) const + { + return m_builder.DeclareDynamicNode(&nodeDesc); + } + + RpsNodeId RenderGraphBuilderRef::AddNode(RpsNodeDeclId nodeDeclId, + uint32_t tag, + PFN_rpsCmdCallback callback, + void* callbackUserContext, + std::initializer_list args) + { + RpsNodeId nodeId = RPS_CMD_ID_INVALID; + + RPS_RETURN_ERROR_IF( + RPS_FAILED(m_builder.AddCmdNode( + nodeDeclId, tag, RpsCmdCallback{callback, callbackUserContext}, args.begin(), uint32_t(args.size()), &nodeId)), + RPS_CMD_ID_INVALID); + + RPS_ASSERT((nodeId != RPS_CMD_ID_INVALID) && "invalid RenderGraphBuilder::AddCmdNode impl"); + + return nodeId; + } + + RpsResourceId RenderGraphBuilderRef::GetParamResourceId(RpsParamId paramId, uint32_t arrayIndex) const + { + return m_builder.GetParamResourceId(paramId, arrayIndex); + } + + RpsResult RenderGraphBuilderRef::DeclareResource(uint32_t localResourceId, + RpsVariable hDescVar, + const char* name, + RpsResourceId* pOutResId) + { + return m_builder.DeclareResource(localResourceId, hDescVar, name, pOutResId); + } + + RpsVariable RenderGraphBuilderRef::GetParamVariable(RpsParamId paramId, size_t* pSize) const + { + return m_builder.GetParamVariable(paramId, pSize); + } + + +} // namespace rps + +void* rpsRenderGraphAllocateData(RpsRenderGraphBuilder builder, size_t size) +{ + return rpsRenderGraphAllocateDataAligned(builder, size, alignof(std::max_align_t)); +} + +void* rpsRenderGraphAllocateDataAligned(RpsRenderGraphBuilder builder, size_t size, size_t alignment) +{ + return builder ? rps::FromHandle(builder)->AllocateData(size, alignment) : nullptr; +} + +RpsNodeDeclId rpsRenderGraphDeclareDynamicNode(RpsRenderGraphBuilder builder, const RpsNodeDesc* pNodeDesc) +{ + RPS_CHECK_ARGS(builder); + return rps::FromHandle(builder)->DeclareDynamicNode(pNodeDesc); +} + +RpsVariable rpsRenderGraphGetParamVariable(RpsRenderGraphBuilder builder, RpsParamId paramId) +{ + RPS_RETURN_ERROR_IF(builder == RPS_NULL_HANDLE, nullptr); + + return rps::FromHandle(builder)->GetParamVariable(paramId); +} + +RpsResourceId rpsRenderGraphGetParamResourceId(RpsRenderGraphBuilder builder, RpsParamId paramId) +{ + RPS_RETURN_ERROR_IF(builder == RPS_NULL_HANDLE, RPS_RESOURCE_ID_INVALID); + + return rps::FromHandle(builder)->GetParamResourceId(paramId); +} + +RpsResourceId rpsRenderGraphDeclareResource(RpsRenderGraphBuilder builder, + const char* name, + RpsResourceId localId, + RpsVariable hDescVar) +{ + RPS_RETURN_ERROR_IF(builder == RPS_NULL_HANDLE, RPS_RESOURCE_ID_INVALID); + RPS_RETURN_ERROR_IF(localId == RPS_RESOURCE_ID_INVALID, RPS_RESOURCE_ID_INVALID); + + RpsResourceId resId; + RPS_RETURN_ERROR_IF(RPS_FAILED(rps::FromHandle(builder)->DeclareResource(localId, hDescVar, name, &resId)), + RPS_RESOURCE_ID_INVALID); + + return resId; +} + +RpsNodeId rpsRenderGraphAddNode(RpsRenderGraphBuilder builder, + RpsNodeDeclId nodeDeclId, + uint32_t tag, + PFN_rpsCmdCallback callback, + void* callbackContext, + const RpsVariable* pArgs, + uint32_t numArgs) +{ + RPS_RETURN_ERROR_IF(builder == RPS_NULL_HANDLE, RPS_CMD_ID_INVALID); + + RpsNodeId nodeId = RPS_CMD_ID_INVALID; + + RPS_RETURN_ERROR_IF( + RPS_FAILED(rps::FromHandle(builder)->AddCmdNode(nodeDeclId, tag, {callback, callbackContext}, pArgs, numArgs, &nodeId)), + RPS_CMD_ID_INVALID); + + RPS_ASSERT((nodeId != RPS_CMD_ID_INVALID) && "invalid RenderGraphBuilder::AddCmdNode impl"); + + return nodeId; +} diff --git a/src/runtime/common/phases/rps_access_dag_build.hpp b/src/runtime/common/phases/rps_access_dag_build.hpp new file mode 100644 index 0000000..60c01c0 --- /dev/null +++ b/src/runtime/common/phases/rps_access_dag_build.hpp @@ -0,0 +1,527 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_CMD_DAG_BUILD_HPP_ +#define _RPS_CMD_DAG_BUILD_HPP_ + +#include "runtime/common/rps_render_graph.hpp" + +namespace rps +{ + class AccessDAGBuilderPass : public IRenderGraphPhase + { + public: + AccessDAGBuilderPass(RenderGraph& renderGraph) + : m_renderGraph(renderGraph) + , m_pRuntimeDevice(RuntimeDevice::Get(renderGraph.GetDevice())) + , m_transitions(renderGraph.GetTransitions()) + , m_nodeRefListPool(m_nodeRefLists) + { + } + + virtual RpsResult Run(RenderGraphUpdateContext& context) override final + { + RPS_ASSERT(&m_renderGraph == &context.renderGraph); + + const auto cmds = m_renderGraph.GetCmdInfos().crange_all(); + auto& graph = m_renderGraph.GetGraph(); + + auto& resourceInsts = m_renderGraph.GetResourceInstances(); + + ArenaCheckPoint arenaCheckpoint{context.scratchArena}; + + m_resourceStates.reset_keep_capacity(&context.scratchArena); + m_subResStates.reset_keep_capacity(&context.scratchArena); + m_nodeRefLists.reset_keep_capacity(&context.scratchArena); + m_nodeRefListPool.reset(); + + RPS_V_RETURN(InitResourceStates()); + + m_transitions.resize(1); // 0 is reserved for INVALID_TRANSITION + m_transitions[0] = TransitionInfo{ + { + RPS_RESOURCE_ID_INVALID, + SubresourceRangePacked(1, 0, 1, 0, 1), + {RPS_ACCESS_UNKNOWN, RPS_SHADER_STAGE_NONE}, + RPS_FORMAT_UNKNOWN, + }, + RPS_INDEX_NONE_U32, + RPS_INDEX_NONE_U32, + }; + + // For each node, process accesses in two passes, first writeable then readonly accesses. + // This is to allow write accesses to take precedence over read in case of overlapping R/W ranges. + static constexpr uint32_t WriteAccessDAGPass = 0; + static constexpr uint32_t ReadAccessDAGPass = 1; + static constexpr uint32_t NumAccessDAGPasses = 2; + + for (uint32_t iCmd = 0, numCmds = uint32_t(cmds.size()); iCmd < numCmds; ++iCmd) + { + const NodeId nodeId = iCmd; // TODO + + auto cmdAccesses = m_renderGraph.GetCmdAccesses(iCmd); + + for (uint32_t iPass = 0; iPass < NumAccessDAGPasses; iPass++) + { + // TODO: Bitmask R/W accesses per node for common cases where access count is limited. + for (const CmdAccessInfo& currAccess : cmdAccesses) + { + if (currAccess.resourceId == RPS_RESOURCE_ID_INVALID) + continue; + + if (((iPass == WriteAccessDAGPass) && + !(currAccess.access.accessFlags & RPS_ACCESS_ALL_GPU_WRITE)) || + ((iPass == ReadAccessDAGPass) && + (currAccess.access.accessFlags & RPS_ACCESS_ALL_GPU_WRITE))) + { + continue; + } + + ResourceInstance& resInfo = resourceInsts[currAccess.resourceId]; + + const bool isSingleSubresource = (resInfo.numSubResources == 1); + + // Previous states + ResourceState& resState = m_resourceStates[currAccess.resourceId]; + auto subResStates = resState.subResStates.Get(m_subResStates); + + // Set initial state + if ((resInfo.initialAccess.accessFlags == RPS_ACCESS_UNKNOWN) && + ((!isSingleSubresource && subResStates[0].access.accessorNodes.empty()) || + resState.access.accessorNodes.empty())) + { + RPS_ASSERT((isSingleSubresource && subResStates.empty()) || (subResStates.size() == 1)); + resInfo.initialAccess = currAccess.access; + } + + // Buffers and single-subresource images: simple path + + if (isSingleSubresource) + { + RPS_V_RETURN(ProcessTransition(graph, + nodeId, + resState.access, + currAccess.access, + currAccess.resourceId, + currAccess.range)); + } + else + { + // Check each subresource range in previous access ranges + auto prevRanges = resState.subResStates.Get(m_subResStates); + + for (SubresourceState& prevSubResState : prevRanges) + { + SubresourceRangePacked overlapRange; + SubresourceRangePacked remainingRanges[SubresourceRangePacked::MAX_CLIP_COMPLEMENTS]; + uint32_t numRemainingRanges = 0; + + // Try to clip current access range against existing range + if (SubresourceRangePacked::Clip(prevSubResState.range, + currAccess.range, + remainingRanges, + &numRemainingRanges, + &overlapRange)) + { + RPS_ASSERT(resState.subResStates.size() + numRemainingRanges <= + resInfo.numSubResources); + + const uint32_t newRangeOffset = resState.subResStates.GetEnd(); + resState.subResStates.SetRange(resState.subResStates.GetBegin(), + resState.subResStates.size() + numRemainingRanges); + + for (uint32_t iRemaining = 0; iRemaining < numRemainingRanges; iRemaining++) + { + SubresourceState& newAccessState = m_subResStates[newRangeOffset + iRemaining]; + + newAccessState.range = remainingRanges[iRemaining]; + + RPS_V_RETURN(CloneReferenceList(prevSubResState.access.accessorNodes, + newAccessState.access.accessorNodes)); + + uint32_t newTransition = RenderGraph::INVALID_TRANSITION; + if (prevSubResState.access.lastTransition != RenderGraph::INVALID_TRANSITION) + { + RPS_V_RETURN(CloneTransition( + graph, prevSubResState.access.lastTransition, newTransition)); + m_transitions[newTransition].access.range = remainingRanges[iRemaining]; + } + + newAccessState.access.lastTransition = newTransition; + } + + // Replace previous transition node's access range to the overlapping region. + if (prevSubResState.access.lastTransition != RenderGraph::INVALID_TRANSITION) + { + auto& access = m_transitions[prevSubResState.access.lastTransition].access; + + access.range = overlapRange; + FilterAccessByRange(access.access, overlapRange); + } + + prevSubResState.range = overlapRange; + + auto filteredCurrentAccess = currAccess.access; + FilterAccessByRange(filteredCurrentAccess, overlapRange); + + ProcessTransition(graph, + nodeId, + prevSubResState.access, + filteredCurrentAccess, + currAccess.resourceId, + overlapRange); + } + } + } + } + } + } + + auto& finalAccesses = context.renderGraph.GetResourceFinalAccesses(); + finalAccesses.resize(0); + + // Collect final states + for (uint32_t iRes = 0, numRes = uint32_t(resourceInsts.size()); iRes < numRes; iRes++) + { + ResourceInstance& resInfo = resourceInsts[iRes]; + + if (resInfo.IsTemporalParent()) + continue; + + if (resInfo.numSubResources > 1) + { + auto subResRangesRef = m_resourceStates[iRes].subResStates.Get(m_subResStates); + + resInfo.finalAccesses.SetRange(uint32_t(finalAccesses.size()), subResRangesRef.size()); + + auto* pRanges = finalAccesses.grow(subResRangesRef.size()); + + for (uint32_t iRange = 0; iRange < subResRangesRef.size(); iRange++) + { + pRanges[iRange].range = subResRangesRef[iRange].range; + pRanges[iRange].prevTransition = subResRangesRef[iRange].access.lastTransition; + } + } + else + { + resInfo.finalAccesses.SetRange(uint32_t(finalAccesses.size()), 1); + auto* pRange = finalAccesses.grow(1); + pRange->range = resInfo.fullSubresourceRange; + pRange->prevTransition = m_resourceStates[iRes].access.lastTransition; + } + } + + // Save high watermark scale by 1.5 for next frame reservation size + m_transitionCountWatermark = uint32_t(m_transitions.size()); + m_transitionCountWatermark = m_transitionCountWatermark + (m_transitionCountWatermark >> 1); + + return RPS_OK; + } + + private: + struct AccessState + { + Span accessorNodes; + uint32_t lastTransition; + }; + + struct SubresourceState + { + AccessState access; + SubresourceRangePacked range; + }; + + struct ResourceState + { + AccessState access; + Span subResStates; + }; + + public: + ConstArrayRef GetTransitionInfos() const + { + return m_transitions.range_all(); + } + + private: + RpsResult InitResourceStates() + { + auto resourceInstances = m_renderGraph.GetResourceInstances().range_all(); + + RPS_RETURN_ERROR_IF(!m_resourceStates.resize(resourceInstances.size(), {}), RPS_ERROR_OUT_OF_MEMORY); + + const uint32_t numRes = uint32_t(resourceInstances.size()); + + uint32_t totalSubResources = 0; + + for (uint32_t iRes = 0; iRes < numRes; iRes++) + { + const auto& resInstance = resourceInstances[iRes]; + if (!resInstance.IsTemporalParent() && (resInstance.numSubResources > 1)) + { + SubresourceState* pSubResStates = m_subResStates.grow(resInstance.numSubResources, {}); + RPS_RETURN_ERROR_IF(!pSubResStates, RPS_ERROR_OUT_OF_MEMORY); + + pSubResStates[0].range = resInstance.fullSubresourceRange; + + // Init size to 0. SubResStates grows as subresource ranges are accessed: + m_resourceStates[iRes].subResStates.SetRange(totalSubResources, 1); + totalSubResources += resInstance.numSubResources; + } + } + + return RPS_OK; + } + + bool IsFullResource(const SubresourceRangePacked& range, const ResourceInstance& resourceDesc) + { + return range.GetNumSubresources() == resourceDesc.numSubResources; + } + + RpsResult CloneReferenceList(const Span src, Span& dst) + { + m_nodeRefListPool.alloc_span(dst, src.size()); + + if (!src.empty()) + { + if (dst.GetEnd() > m_nodeRefLists.size()) + { + RPS_CHECK_ALLOC(m_nodeRefLists.resize(dst.GetEnd())); + } + + auto srcRange = src.Get(m_nodeRefLists); + auto dstRange = dst.Get(m_nodeRefLists); + std::copy(srcRange.begin(), srcRange.end(), dstRange.begin()); + } + + return RPS_OK; + } + + RpsResult CloneTransition(Graph& graph, uint32_t srcTransition, uint32_t& dstTransition) + { + const TransitionInfo& srcTrans = m_transitions[srcTransition]; + int32_t newTransitionId = int32_t(m_transitions.size()); + + const NodeId newTransNodeId = graph.CloneNode(srcTrans.nodeId, -newTransitionId); + RPS_RETURN_ERROR_IF(newTransNodeId == RPS_INDEX_NONE_U32, RPS_ERROR_UNSPECIFIED); + + TransitionInfo* pNewTrans = m_transitions.grow(1); + RPS_RETURN_ERROR_IF(!pNewTrans, RPS_ERROR_OUT_OF_MEMORY); + + *pNewTrans = srcTrans; + pNewTrans->nodeId = newTransNodeId; + + dstTransition = newTransitionId; + + return RPS_OK; + } + + RpsResult AddNewTransition(Graph& graph, + NodeId currNodeId, + AccessState& accessorInfo, + const RpsAccessAttr& newAccess, + uint32_t resourceId, + const SubresourceRangePacked& range) + { + uint32_t newTransitionId = uint32_t(m_transitions.size()); + + TransitionInfo* pNewTrans = m_transitions.grow(1); + RPS_RETURN_ERROR_IF(!pNewTrans, RPS_ERROR_OUT_OF_MEMORY); + + const NodeId newTransNodeId = graph.AddNode(-int32_t(newTransitionId)); + + pNewTrans->access.resourceId = resourceId; + pNewTrans->access.range = range; + pNewTrans->access.access = newAccess; + pNewTrans->nodeId = newTransNodeId; + pNewTrans->prevTransition = accessorInfo.lastTransition; + + Node* pNewTransNode = graph.GetNode(newTransNodeId); + const Node* pCurrNode = graph.GetNode(currNodeId); + pNewTransNode->subgraph = pCurrNode->subgraph; + pNewTransNode->barrierScope = pCurrNode->barrierScope; + + // Add edges for existing accessors -> new transition node + // TODO: If previous accessors are ordered, only add edge from the last node + for (auto accessorNodeId : accessorInfo.accessorNodes.Get(m_nodeRefLists)) + { + graph.AddEdge(accessorNodeId, newTransNodeId); + // RPS_V_RETURN(result); + } + + m_nodeRefListPool.free_span(accessorInfo.accessorNodes); + accessorInfo.lastTransition = newTransitionId; + + return RPS_OK; + } + + RpsResult ProcessTransition(Graph& graph, + NodeId currNodeId, + AccessState& accessState, + const RpsAccessAttr& newAccess, + uint32_t resourceId, + const SubresourceRangePacked& range) + { + const RpsAccessAttr beforeAccess = m_transitions[accessState.lastTransition].access.access; + + AccessTransitionInfo transitionInfo; + + if ((accessState.lastTransition == RenderGraph::INVALID_TRANSITION) || + NeedTransition(beforeAccess, newAccess, transitionInfo)) + { + // New Transition + NodeId lastAccessor = accessState.accessorNodes.empty() + ? RPS_INDEX_NONE_U32 + : accessState.accessorNodes.Get(m_nodeRefLists).back(); + + if (lastAccessor != currNodeId) + { + AddNewTransition(graph, currNodeId, accessState, newAccess, resourceId, range); + } + else + { + // This normally indicates an application error where the same subresource has multiple incompatible accesses on the same node: + + // If current node has both write and read access, write takes precedence. + // TODO: Add a flag to control if to allow R/W accesses to coexist at all. + if (newAccess.accessFlags & RPS_ACCESS_ALL_GPU_WRITE) + { + RPS_TODO("Handle before/after & overlapped access with new write access."); + } + } + } + else + { + // No Transition + if (transitionInfo.bMergedAccessStates) + { + // We should have splitted (cloned) the previou transition node if any, and patched its range to the current one. + RPS_ASSERT(m_transitions[accessState.lastTransition].access.range == range); + m_transitions[accessState.lastTransition].access.access = transitionInfo.mergedAccess; + } + + if (transitionInfo.bKeepOrdering && !accessState.accessorNodes.empty()) + { + NodeId lastAccessor = accessState.accessorNodes.Get(m_nodeRefLists).back(); + + // TODO: Validate all existing accessors are in order. + if (lastAccessor != currNodeId) + { + graph.AddEdge(lastAccessor, currNodeId); + } + } + } + + graph.AddEdge(m_transitions[accessState.lastTransition].nodeId, currNodeId); + AddNodeReference(accessState.accessorNodes, currNodeId); + + return RPS_OK; + } + + void AddNodeReference(Span& refNodes, NodeId newRef) + { + if (refNodes.empty() || (refNodes.Get(m_nodeRefLists).back() != newRef)) + { + m_nodeRefListPool.push_to_span(refNodes, newRef); + } + } + + void FilterAccessByRange(RpsAccessAttr& access, const SubresourceRangePacked& range) + { + if (access.accessFlags & RPS_ACCESS_DEPTH_STENCIL) + { + const auto aspectUsage = m_pRuntimeDevice->GetImageAspectUsages(range.aspectMask); + + if (!(aspectUsage & RPS_IMAGE_ASPECT_DEPTH)) + { + access.accessFlags &= ~(RPS_ACCESS_DEPTH_READ_BIT | RPS_ACCESS_DEPTH_WRITE_BIT); + + // Any SRV states should be for depth plane, removing: + if (access.accessFlags & RPS_ACCESS_STENCIL_WRITE_BIT) + { + access.accessFlags &= ~RPS_ACCESS_SHADER_RESOURCE_BIT; + access.accessStages = RPS_SHADER_STAGE_NONE; + } + } + + if (!(aspectUsage & RPS_IMAGE_ASPECT_STENCIL)) + { + access.accessFlags &= ~(RPS_ACCESS_STENCIL_READ_BIT | RPS_ACCESS_STENCIL_WRITE_BIT); + + // Any SRV states should be for stencil plane, removing: + if (access.accessFlags & RPS_ACCESS_DEPTH_WRITE_BIT) + { + access.accessFlags &= ~RPS_ACCESS_SHADER_RESOURCE_BIT; + access.accessStages = RPS_SHADER_STAGE_NONE; + } + } + } + } + + static bool IsReadOnly(const RpsAccessAttr& access) + { + return !rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_ALL_GPU_WRITE | RPS_ACCESS_CPU_WRITE_BIT); + } + + inline bool NeedTransition(const RpsAccessAttr& before, + const RpsAccessAttr& after, + AccessTransitionInfo& accessTransInfo) const + { + // Coarse filter before calling runtime dependent CalculateAccessTransition: + // TODO: Get this from runtime device + constexpr RpsAccessFlags c_runtimeDependentAccessFlags = RPS_ACCESS_COPY_SRC_BIT | + RPS_ACCESS_COPY_DEST_BIT | RPS_ACCESS_CLEAR_BIT | + RPS_ACCESS_RENDER_PASS | RPS_ACCESS_DEPTH_STENCIL; + + accessTransInfo = {}; + + if (rpsAnyBitsSet((before.accessFlags | after.accessFlags), c_runtimeDependentAccessFlags) && + m_pRuntimeDevice->CalculateAccessTransition(before, after, accessTransInfo)) + { + return accessTransInfo.bTransition; + } + + if (IsReadOnly(before) && IsReadOnly(after)) + { + accessTransInfo.bMergedAccessStates = before != after; + accessTransInfo.mergedAccess = before | after; + + return false; + } + else + { + const bool relaxOrdering = + rpsAnyBitsSet(before.accessFlags & after.accessFlags, RPS_ACCESS_RELAXED_ORDER_BIT); + + accessTransInfo.bKeepOrdering = !relaxOrdering; + + const bool isUav = rpsAnyBitsSet(before.accessFlags, RPS_ACCESS_UNORDERED_ACCESS_BIT); + + // By default UAV to UAV access needs a sync, unless RELAXED_ORDER is specified on both access. + accessTransInfo.bTransition = (before != after) || (isUav && !relaxOrdering); + + return accessTransInfo.bTransition; + } + } + + private: + RenderGraph& m_renderGraph; + RuntimeDevice* m_pRuntimeDevice = nullptr; + + ArenaVector& m_transitions; + + ArenaVector m_resourceStates; + ArenaVector m_subResStates; + + ArenaVector m_nodeRefLists; + SpanPool> m_nodeRefListPool; + + uint32_t m_transitionCountWatermark = 0; + }; +} + +#endif //_RPS_CMD_DAG_BUILD_HPP_ diff --git a/src/runtime/common/phases/rps_cmd_dag_print.hpp b/src/runtime/common/phases/rps_cmd_dag_print.hpp new file mode 100644 index 0000000..6377f24 --- /dev/null +++ b/src/runtime/common/phases/rps_cmd_dag_print.hpp @@ -0,0 +1,274 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_CMD_DAG_PRINT_HPP_ +#define _RPS_CMD_DAG_PRINT_HPP_ + +#include "runtime/common/rps_render_graph.hpp" + +namespace rps +{ + class DAGPrintPhase : public IRenderGraphPhase + { + struct SubgraphInfo + { + uint32_t firstChild; + uint32_t lastChild; + uint32_t nextSibling; + }; + + public: + DAGPrintPhase(RenderGraph& renderGraph) + : m_renderGraph(renderGraph) + , m_graph(m_renderGraph.GetGraph()) + { + } + + virtual RpsResult Run(RenderGraphUpdateContext& context) override final + { + RPS_RETURN_OK_IF(!rpsAnyBitsSet(context.pUpdateInfo->diagnosticFlags, RPS_DIAGNOSTIC_ENABLE_DAG_DUMP)); + + const auto cmds = m_renderGraph.GetCmdInfos().range_all(); + const auto nodes = m_graph.GetNodes(); + + ArenaCheckPoint arenaCheckpoint{context.scratchArena}; + + m_subgraphInfos.reset_keep_capacity(&context.scratchArena); + + // TODO: Expose print functionality separately / allow specify printer + PrinterRef printer(m_renderGraph.GetDevice().Printer()); + + printer("\ndigraph G {\n"); + printer(" graph [ size = \"128,64\" ];\n"); + printer(" edge [ style = bold ];\n"); + printer( + " node [ shape = polygon, sides = 4, color = magenta, style = filled, orientation = \"45.0\" ];"); + + // Transition m_nodes. + for (uint32_t i = uint32_t(cmds.size()), iEnd = uint32_t(nodes.size()); i < iEnd; i++) + { + const uint32_t iTrans = i - uint32_t(cmds.size()); + if ((iTrans & 0x7) == 0) + { + printer("\n "); + } + + printer(" "); + m_renderGraph.PrintTransitionNodeName(printer, i); + } + + printer(";\n"); + + uint32_t printedNodes = 0; + + constexpr RpsNodeDeclFlags QueueTypeFlags = + RPS_NODE_DECL_GRAPHICS_BIT | RPS_NODE_DECL_COMPUTE_BIT | RPS_NODE_DECL_COPY_BIT; + + printer(" node [ shape = circle, color = cyan, style = filled ];\n"); + printedNodes += + PrintNodes(printer, [=](auto flags) { return (flags & QueueTypeFlags) == RPS_NODE_DECL_GRAPHICS_BIT; }); + printer(" node [ shape = circle, color = orange, style = filled ];\n"); + printedNodes += + PrintNodes(printer, [=](auto flags) { return (flags & QueueTypeFlags) == RPS_NODE_DECL_COMPUTE_BIT; }); + printer(" node [ shape = circle, color = lime, style = filled ];\n"); + printedNodes += + PrintNodes(printer, [=](auto flags) { return (flags & QueueTypeFlags) == RPS_NODE_DECL_COPY_BIT; }); + + printer(" node [ shape = circle, color = gray, style = filled ];\n"); + printedNodes += PrintNodes(printer, [=](auto flags) { return rpsCountBits(flags & QueueTypeFlags) != 1; }); + + RPS_ASSERT(printedNodes == cmds.size()); + + PrintSubgraphs(printer); + + PrintEdges(printer); + + printer("}\n"); + + return RPS_OK; + } + + private: + template + uint32_t PrintNodes(PrinterRef printer, TFilter filter) + { + const auto cmds = m_renderGraph.GetCmdInfos().range_all(); + + uint32_t count = 0; + + // Command m_nodes. + for (uint32_t i = 0; i < cmds.size(); i++) + { + auto& nodeDecl = *m_renderGraph.GetCmdInfo(i)->pNodeDecl; + + if (filter(nodeDecl.flags)) + { + count++; + + printer(" "); + m_renderGraph.PrintCmdNodeName(printer, i); + + if ((count & 0x7) == 0x7) + { + printer("\n "); + } + } + } + + if (count > 0) + { + printer(";\n"); + } + + return count; + } + + void PrintSubgraphs(PrinterRef printer) + { + const auto subgraphs = m_graph.GetSubgraphs(); + + if (subgraphs.empty()) + { + return; + } + + m_subgraphInfos.resize(subgraphs.size()); + + for (uint32_t i = 0; i < subgraphs.size(); i++) + { + auto& subgraphInfo = m_subgraphInfos[i]; + + subgraphInfo.firstChild = RPS_INDEX_NONE_U32; + subgraphInfo.lastChild = RPS_INDEX_NONE_U32; + subgraphInfo.nextSibling = RPS_INDEX_NONE_U32; + + const Subgraph& subgraph = subgraphs[i]; + + if (subgraph.parentSubgraph != RPS_INDEX_NONE_U32) + { + RPS_ASSERT(subgraph.parentSubgraph < i); + + SubgraphInfo& parentInfo = m_subgraphInfos[subgraph.parentSubgraph]; + + if (parentInfo.firstChild == RPS_INDEX_NONE_U32) + { + parentInfo.firstChild = parentInfo.lastChild = i; + } + else + { + m_subgraphInfos[parentInfo.lastChild].nextSibling = i; + parentInfo.lastChild = i; + } + } + } + + for (uint32_t i = 0; i < subgraphs.size(); i++) + { + if (subgraphs[i].parentSubgraph != RPS_INDEX_NONE_U32) + { + continue; // Handled by parent + } + + PrintSubgraphRecursive(printer, i, 1); + } + } + + void PrintSubgraphRecursive(PrinterRef printer, uint32_t sgIdx, uint32_t depth) + { + const Subgraph& subgraph = m_graph.GetSubgraphs()[sgIdx]; + const SubgraphInfo& subgraphInfo = m_subgraphInfos[sgIdx]; + const auto transitions = m_renderGraph.GetTransitions().crange_all(); + + printer("%*csubgraph cluster_%d { style=\"rounded%s\"", + depth * 4, + ' ', + sgIdx, + subgraph.IsAtomic() ? "" : ",dashed"); + + for (uint32_t i = subgraph.beginNode; i <= subgraph.endNode; i++) + { + printer(" "); + PrintNodeName(printer, i); + } + + // TODO: Don't always iterate all transition m_nodes. + // Skip transitons[0] since it's reserved for invalid transition. + RPS_ASSERT(transitions.empty() || (transitions[0].nodeId == RPS_NODEDECL_ID_INVALID)); + + for (uint32_t transIdx = 1; transIdx < transitions.size(); transIdx++) + { + const Node* pN = m_graph.GetNode(transitions[transIdx].nodeId); + if (pN->subgraph == sgIdx) + { + printer(" "); + m_renderGraph.PrintTransitionNodeName(printer, transitions[transIdx].nodeId); + } + } + + RpsBool bNeedNewLine = (subgraphInfo.firstChild != RPS_INDEX_NONE_U32); + + if (bNeedNewLine) + { + printer("\n"); + } + + for (uint32_t childIdx = subgraphInfo.firstChild; childIdx != RPS_INDEX_NONE_U32; + childIdx = m_subgraphInfos[childIdx].nextSibling) + { + PrintSubgraphRecursive(printer, childIdx, depth + 1); + } + + if (bNeedNewLine) + { + printer("%*c};\n", depth * 4, ' '); + } + else + { + printer(" };\n"); + } + } + + void PrintEdges(PrinterRef printer) + { + const auto nodes = m_graph.GetNodes(); + const auto edges = m_graph.GetEdges(); + + for (uint32_t iNode = 0, numNodes = uint32_t(nodes.size()); iNode < numNodes; iNode++) + { + auto inEdges = nodes[iNode].inEdges.GetConstRef(edges); + for (auto e : inEdges) + { + printer(" "); + PrintNodeName(printer, e.src); + printer(" -> "); + PrintNodeName(printer, e.dst); + printer(";\n"); + } + } + } + + void PrintNodeName(PrinterRef printer, uint32_t nodeId) + { + if (m_graph.GetNode(nodeId)->IsTransition()) + { + m_renderGraph.PrintTransitionNodeName(printer, nodeId); + } + else + { + m_renderGraph.PrintCmdNodeName(printer, nodeId); + } + } + + private: + const RenderGraph& m_renderGraph; + const Graph& m_graph; + ArenaVector m_subgraphInfos; + }; + +} // namespace rps + +#endif //_RPS_CMD_DAG_PRINT_HPP_ diff --git a/src/runtime/common/phases/rps_cmd_print.hpp b/src/runtime/common/phases/rps_cmd_print.hpp new file mode 100644 index 0000000..260fb1f --- /dev/null +++ b/src/runtime/common/phases/rps_cmd_print.hpp @@ -0,0 +1,168 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_CMD_PRINT_HPP +#define RPS_CMD_PRINT_HPP + +#include "runtime/common/rps_render_graph.hpp" + +namespace rps +{ + class CmdDebugPrintPhase : public IRenderGraphPhase + { + public: + virtual RpsResult Run(RenderGraphUpdateContext& context) override final + { + RPS_RETURN_OK_IF( + !rpsAnyBitsSet(context.pUpdateInfo->diagnosticFlags, RPS_DIAGNOSTIC_ENABLE_PRE_SCHEDULE_DUMP)); + + RenderGraph& renderGraph = context.renderGraph; + + const auto cmds = renderGraph.GetCmdInfos().crange_all(); + const auto resDecls = renderGraph.GetBuilder().GetResourceDecls(); + + PrinterRef printer(context.renderGraph.GetDevice().Printer()); + + printer("\nResource Declarations:"); + + for (uint32_t iRes = 0; iRes < resDecls.size(); iRes++) + { + printer("\n%4d : ", iRes); + + PrintResourceDecl(printer, resDecls[iRes]); + } + + printer("\nCommands:"); + + for (uint32_t iCmd = 0; iCmd < cmds.size(); iCmd++) + { + printer("\n%4d : ", iCmd); + + PrintCmdInfo(context, printer, iCmd); + } + + printer("\n"); + + return RPS_OK; + } + + static void PrintResourceDecl(PrinterRef printer, const ResourceDecl& resourceDecl) + { + printer("%.*s", resourceDecl.name.len, resourceDecl.name.str); + + const auto* pResDesc = static_cast(resourceDecl.desc); + + if (pResDesc) + { + const ResourceDescPacked resDesc{*pResDesc}; + + printer("\n "); + resDesc.Print(printer); + } + else + { + printer(""); + } + } + + static void PrintCmdInfo(RenderGraphUpdateContext& context, PrinterRef printer, NodeId nodeId) + { + const auto& renderGraph = context.renderGraph; + + renderGraph.PrintCmdNodeName(printer, nodeId); + + const CmdInfo& cmdInfo = *renderGraph.GetCmdInfo(nodeId); + const NodeDeclInfo& nodeDeclInfo = *cmdInfo.pNodeDecl; + const auto cmdAccesses = renderGraph.GetCmdAccesses(nodeId); + + printer("( "); + + bool bFirstArg = true; + for (uint32_t iParam = 0, numParams = uint32_t(nodeDeclInfo.params.size()); iParam < numParams; iParam++) + { + auto& paramDecl = nodeDeclInfo.params[iParam]; + + if (paramDecl.access.accessFlags == RPS_ACCESS_UNKNOWN) + continue; + + const auto cmdAccessRange = cmdAccesses.range(paramDecl.accessOffset, paramDecl.numElements); + + printer("%s\n ", (bFirstArg) ? "" : ","); // Can't use iParam == 0 since it can be skipped + bFirstArg = false; + + if (paramDecl.name) + { + printer("%.*s [", paramDecl.name.len, paramDecl.name.str); + } + else + { + printer("param_%u [", iParam); + } + + bool bFirstAccess = true; + + for (auto& access : cmdAccessRange) + { + if (access.resourceId == RPS_RESOURCE_ID_INVALID) + continue; + + printer(bFirstAccess ? "" : ", "); + bFirstAccess = false; + + PrintResourceReference(context, printer, access.resourceId, access.range); + + printer(" : ("); + AccessAttr(access.access).Print(printer); + printer(")"); + } + + printer("]"); + + SemanticAttr(paramDecl.semantic, paramDecl.baseSemanticIndex).Print(printer); + } + printer(" )"); + } + + static void PrintResourceReference(RenderGraphUpdateContext& context, + PrinterRef printer, + uint32_t resourceId, + const SubresourceRangePacked& range) + { + const auto& resInstance = context.renderGraph.GetResourceInstance(resourceId); + const auto resName = context.renderGraph.GetBuilder().GetResourceDecls()[resInstance.resourceDeclId].name; + + if (resName) + { + printer(resName); + } + else + { + printer("resource(%u)", resInstance.resourceDeclId); + } + + if (resInstance.isTemporalSlice) + { + auto& temporalParentRes = context.renderGraph.GetResourceInstance(resInstance.resourceDeclId); + RPS_ASSERT(temporalParentRes.IsTemporalParent()); + RPS_ASSERT( + (resourceId >= temporalParentRes.temporalLayerOffset) && + (resourceId <= temporalParentRes.temporalLayerOffset + temporalParentRes.desc.temporalLayers)); + + printer(", temporal_layer %u", resourceId - temporalParentRes.temporalLayerOffset); + } + + if (resInstance.numSubResources > 1) + { + printer(", "); + range.Print(printer); + } + } + }; + +} // namespace rps + +#endif //RPS_CMD_PRINT_HPP diff --git a/src/runtime/common/phases/rps_dag_build.h b/src/runtime/common/phases/rps_dag_build.h new file mode 100644 index 0000000..9cbafd4 --- /dev/null +++ b/src/runtime/common/phases/rps_dag_build.h @@ -0,0 +1,101 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "core/rps_graph.hpp" +#include "runtime/common/rps_cmd_buf.hpp" +#include "runtime/common/rps_render_graph.hpp" + +namespace rps +{ + class DAGBuilderPass final : public IRenderGraphPhase + { + public: + virtual RpsResult Run(RenderGraphUpdateContext& context) override final + { + auto& renderGraph = context.renderGraph; + auto& graph = renderGraph.GetGraph(); + const auto cmds = renderGraph.GetCmdInfos().crange_all(); + + ArenaCheckPoint arenaCheckpoint{context.scratchArena}; + + ArenaVector subgraphStack(&context.scratchArena); + + uint32_t numSequentialSubgraphOnStack = 0; + uint32_t schBarrierount = 0; + + for (uint32_t iCmd = 0, numCmds = uint32_t(cmds.size()); iCmd < numCmds; ++iCmd) + { + auto& cmd = cmds[iCmd]; + + NodeId newNodeId = graph.AddNode(iCmd); + RPS_ASSERT(iCmd == newNodeId); // TODO - Assuming all cmd nodes are added to graph first of all. + + Node* pNode = graph.GetNode(newNodeId); + + // Handle subgraphs + // If we are currently in a subgraph: + if (!subgraphStack.empty()) + { + pNode->subgraph = subgraphStack.back(); + + if (numSequentialSubgraphOnStack > 0) + { + RPS_ASSERT(newNodeId > 0); + graph.AddEdge(newNodeId - 1, newNodeId); + } + } + + if (cmd.nodeDeclIndex == RPS_BUILTIN_NODE_SUBGRAPH_BEGIN) + { + const uint32_t parent = subgraphStack.empty() ? RPS_INDEX_NONE_U32 : subgraphStack.back(); + + const SubgraphId newSubgraphId = graph.AddSubgraph(parent, cmd.subgraphFlags, newNodeId); + Subgraph* pSubgraph = graph.GetSubgraph(newSubgraphId); + + if (pSubgraph->IsSequential()) + { + numSequentialSubgraphOnStack++; + } + + subgraphStack.push_back(newSubgraphId); + } + else if (cmd.nodeDeclIndex == RPS_BUILTIN_NODE_SUBGRAPH_END) + { + RPS_RETURN_ERROR_IF(subgraphStack.empty(), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + Subgraph* pSubgraph = graph.GetSubgraph(subgraphStack.back()); + + pSubgraph->endNode = newNodeId; + + if (pSubgraph->IsSequential()) + { + RPS_ASSERT(numSequentialSubgraphOnStack > 0); + numSequentialSubgraphOnStack--; + } + + subgraphStack.pop_back(); + } + else if (cmd.nodeDeclIndex == RPS_BUILTIN_NODE_SCHEDULER_BARRIER) + { + schBarrierount++; + } + + pNode->barrierScope = schBarrierount; + } + + // Apply explicit dependencies + auto explicitDeps = renderGraph.GetBuilder().GetExplicitDependencies(); + for (auto dep : explicitDeps) + { + RPS_ASSERT(dep.before < dep.after); + graph.AddEdge(dep.before, dep.after); + } + + return RPS_OK; + } + }; +} // namespace rps diff --git a/src/runtime/common/phases/rps_dag_schedule.hpp b/src/runtime/common/phases/rps_dag_schedule.hpp new file mode 100644 index 0000000..65db3bd --- /dev/null +++ b/src/runtime/common/phases/rps_dag_schedule.hpp @@ -0,0 +1,1170 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_CMD_DAG_SCHEDULE_HPP_ +#define _RPS_CMD_DAG_SCHEDULE_HPP_ + +#include "rps/runtime/common/rps_runtime.h" + +#include "runtime/common/rps_render_graph.hpp" +#include "runtime/common/phases/rps_access_dag_build.hpp" + +#include + +namespace rps +{ + class DAGSchedulePass : public IRenderGraphPhase + { + struct ScheduleFlags + { + bool bUseAsync; + bool bEliminateDeadNodes; + bool bUseSplitBarrier; + bool bPreferMemorySaving; + bool bMinimizeGfxCompSwitch; + bool bWorkloadTypePipeliningDisabled; + bool bWorkloadTypePipeliningAggressive; + bool bForceProgramOrder; + bool bRandomOrder; + + explicit ScheduleFlags(uint32_t numQueues = 1, RpsScheduleFlags flags = RPS_SCHEDULE_DEFAULT) + { + bUseAsync = !!(numQueues > 1); + bEliminateDeadNodes = !(flags & RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT); + bUseSplitBarrier = !!(flags & RPS_SCHEDULE_ALLOW_SPLIT_BARRIERS_BIT); + bPreferMemorySaving = !!(flags & RPS_SCHEDULE_PREFER_MEMORY_SAVING_BIT); + + bWorkloadTypePipeliningDisabled = !!(flags & RPS_SCHEDULE_WORKLOAD_TYPE_PIPELINING_DISABLE_BIT); + bWorkloadTypePipeliningAggressive = !bWorkloadTypePipeliningDisabled && + !!(flags & RPS_SCHEDULE_WORKLOAD_TYPE_PIPELINING_AGGRESSIVE_BIT); + + bMinimizeGfxCompSwitch = !!(flags & RPS_SCHEDULE_MINIMIZE_COMPUTE_GFX_SWITCH_BIT); + bForceProgramOrder = !!(flags & RPS_SCHEDULE_KEEP_PROGRAM_ORDER_BIT); + bRandomOrder = !bForceProgramOrder && !!(flags & RPS_SCHEDULE_RANDOM_ORDER_BIT); + } + }; + + struct TargetInfo + { + const RpsQueueFlags* pQueueFlags; + uint32_t numQueues; + ScheduleFlags options; + }; + + struct ReadyNodeInfo + { + uint32_t nodeId; + uint32_t depNodeId; + uint32_t schBarrierScopeIdx; + float timeReady; + }; + + struct NodeSchedulingInfo + { + Span resourceRefs; + Span accessFootprints; + uint32_t validQueueMask : 8; + uint32_t preferredQueueMask : 8; + RpsNodeDeclFlags workloadTypeMask : 8; + uint32_t canBeEliminated : 1; + }; + + struct ResourceSchedulingInfo + { + uint64_t aliasableSize; + uint32_t totalUserNodesCount; + uint32_t scheduledUserNodesCount; + uint32_t mostRecentRefNodeId; + Span subResourceInfoRange; + }; + + struct SubgraphSchedulingInfo + { + uint32_t atomicParentId; + uint32_t atomicSubgraphId; + }; + + struct NodeQueueInfo + { + uint32_t queueIndex : 4; + uint32_t batchIndex : 28; + uint32_t scheduledIndex; + }; + + struct BatchInfo + { + uint32_t queueIndex : 8; + uint32_t bSignal : 1; + uint32_t bHasCmdNode : 1; + uint32_t runtimeCmdBegin; + uint32_t runtimeCmdEnd; + Span waitForBatchIndices; + + BatchInfo(uint32_t queueIndex = RPS_MAX_QUEUES, + uint32_t cmdBegin = RPS_INDEX_NONE_U32, + uint32_t cmdEnd = RPS_INDEX_NONE_U32) + : queueIndex(queueIndex) + , bSignal(false) + , bHasCmdNode(false) + , runtimeCmdBegin(cmdBegin) + , runtimeCmdEnd(cmdEnd) + { + } + }; + + public: + DAGSchedulePass(RenderGraph& renderGraph) + : m_renderGraph(renderGraph) + , graph(m_renderGraph.GetGraph()) + { + m_targetInfo.numQueues = m_renderGraph.GetCreateInfo().scheduleInfo.numQueues; + m_targetInfo.pQueueFlags = m_renderGraph.GetCreateInfo().scheduleInfo.pQueueInfos; + + if (m_targetInfo.numQueues == 0) + { + static const RpsQueueFlags defaultQueueInfos[] = {RPS_QUEUE_FLAG_GRAPHICS}; + + m_targetInfo.numQueues = 1; + m_targetInfo.pQueueFlags = defaultQueueInfos; + } + } + + virtual RpsResult Run(RenderGraphUpdateContext& context) override final + { + RPS_RETURN_OK_IF(context.renderGraph.GetCmdInfos().empty()); + + nodes = graph.GetNodes(); + subgraphs = graph.GetSubgraphs(); + resources = m_renderGraph.GetResourceInstances().range_all(); + cmdInfos = m_renderGraph.GetCmdInfos().range_all(); + + const RpsRenderGraphCreateInfo& renderGraphCreateInfo = context.renderGraph.GetCreateInfo(); + + m_targetInfo.options = ScheduleFlags(renderGraphCreateInfo.scheduleInfo.numQueues, + (context.pUpdateInfo->scheduleFlags != RPS_SCHEDULE_UNSPECIFIED) + ? context.pUpdateInfo->scheduleFlags + : renderGraphCreateInfo.scheduleInfo.scheduleFlags); + + const ScheduleFlags flags = m_targetInfo.options; + + RPS_CHECK_ARGS(flags.bRandomOrder ? (context.pUpdateInfo->pRandomNumberGenerator != nullptr) : true); + + const RpsRandomNumberGenerator randGen = + flags.bRandomOrder ? *context.pUpdateInfo->pRandomNumberGenerator : RpsRandomNumberGenerator{}; + + Arena& scratchArena = context.scratchArena; + ArenaCheckPoint arenaCheckpoint{scratchArena}; + + readyNodes = scratchArena.NewArray(nodes.size()); + nodeSchInfos = scratchArena.NewArray(nodes.size()); + resourceSchInfos = scratchArena.NewArray(resources.size()); + subgraphSchInfos = scratchArena.NewArray(subgraphs.size()); + nodeReadyDeps = scratchArena.NewArray(nodes.size()); + nodeAtomicSubgraphIndices = scratchArena.NewArray(nodes.size()); + scheduledNodes = scratchArena.NewArray(nodes.size()); + + ArrayRef nodeQueueInfos; + if (flags.bUseAsync) + { + nodeQueueInfos = scratchArena.NewArray(nodes.size()); + } + + nodeResourceRefs.reset(&scratchArena); + + // Node scoring: + // [31] : Scope bit + // [30] : Queue bit + // [29] : Barrier Batching High + // [29 : 16] : Memory Saving + // [16] : Barrier Batching Low + // [15] : Work Type Grouping + // [8] : Work Type Interleave + // [0 : 15] : Program Order + // Note WorkType grouping bit can be shifted within the Program Order bit range, + // and BarrierBatching bit can be shifted within the Memory Saving bit range. + // This allows us to select to be able to interpolate between ordering preferences. + // For example, if the schedule option prefers barrier batching over memroy saving, + // we can put barrier batching bit on the highest bit in the memory saving score range, + // and vice versa. + + static constexpr uint32_t ScopeScoreBit = (1u << 31); + static constexpr uint32_t QueueScoreBit = (1u << 30); + static constexpr uint32_t BarrierScoreHighBit = (1u << 29); + static constexpr uint32_t BarrierScoreLowBit = (1u << 16); + static constexpr uint32_t MemoryScoreShift = (16u); + static constexpr uint32_t WorkTypeGroupingBit = (1u << 15); + static constexpr uint32_t WorkTypeInterleaveBit = (1u << 8); + static constexpr uint32_t MemoryScoreMax = (0x1fff); + static constexpr uint32_t ProgramOrderScoreMax = (0xffff); + + const uint32_t numTotalCmdNodes = uint32_t(cmdInfos.size()); + const uint32_t maxCmdNodeId = numTotalCmdNodes - 1; + const uint32_t barrierBatchingBit = flags.bPreferMemorySaving ? BarrierScoreLowBit : BarrierScoreHighBit; + const uint32_t memoryAllocScoreShift = flags.bPreferMemorySaving ? 17 : 16; + + // Initialize node and resource scheduling info + InitNodeResourceSchedulingInfos(); + + // (Atomic) subgraph info + const bool bHasAnyAtomicSubgraphs = InitAtomicSubgraphTopology(); + + if (flags.bForceProgramOrder) + { + EnforceProgramOrder(); + } + + uint32_t numScheduled = 0; + uint32_t numEliminated = 0; + uint32_t numReadyCmdNodes = 0; + uint32_t numReadyTransNodes = 0; + + // Starting from command nodes only as transition nodes are expected to have out edges. + for (int32_t iNode = int32_t(numTotalCmdNodes) - 1; iNode >= 0; iNode--) + { + const Node& node = nodes[iNode]; + + if (node.outEdges.empty() || (node.outEdges.size() == nodeReadyDeps[iNode])) + { + readyNodes[numReadyCmdNodes].nodeId = iNode; + readyNodes[numReadyCmdNodes].depNodeId = RPS_INDEX_NONE_U32; + readyNodes[numReadyCmdNodes].schBarrierScopeIdx = node.barrierScope; + readyNodes[numReadyCmdNodes].timeReady = 0.0f; + + numReadyCmdNodes++; + } + } + + RPS_RETURN_OK_IF(numReadyCmdNodes == 0); + + uint32_t lastNodeId = readyNodes[0].nodeId; + uint32_t lastCmdNodeId = lastNodeId; + + uint32_t lastBarrierScopeIdx = nodes[lastNodeId].barrierScope; + uint32_t lastAtomicSubgraphId = RPS_INDEX_NONE_U32; + uint32_t atomicSGStackDepth = 0; + + // TODO: Should be per queue + float currentTime = 0.0f; + + bool lastNodeIsSplitCandidate = false; + + uint32_t currQueueIndex = RPS_INDEX_NONE_U32; + uint32_t currQueueMask = 0; + + uint32_t numBatches = 0; + + uint32_t firstUsedGfxQueue = RPS_INDEX_NONE_U32; + + while ((numReadyCmdNodes + numReadyTransNodes) > 0) + { + // Score and pick one node to schedule + uint32_t highScore = 0; + uint32_t highScoreIndex = UINT32_MAX; + + const bool lastNodeIsTransition = IsTransitionNode(lastNodeId); + + lastCmdNodeId = lastNodeIsTransition ? lastCmdNodeId : lastNodeId; + + for (uint32_t iRN = 0; iRN < (numReadyCmdNodes + numReadyTransNodes); iRN++) + { + const uint32_t currNodeId = readyNodes[iRN].nodeId; + + const NodeSchedulingInfo& nodeInfo = nodeSchInfos[currNodeId]; + + const bool currNodeIsTransition = IsTransitionNode(currNodeId); + + if (nodeInfo.canBeEliminated) + { + highScoreIndex = iRN; + break; + } + + // Score nodes + + // Barrier batching scoring + uint32_t barrierBatchingScore = + ((lastNodeIsTransition == currNodeIsTransition) ? barrierBatchingBit : 0); + + // Program order scoring + const uint32_t nodeOrder = flags.bRandomOrder + ? randGen.pfnRandomUniformInt(randGen.pContext, 0, maxCmdNodeId) + : currNodeId; + + const uint32_t programOrderScore = currNodeIsTransition ? 0 : nodeOrder; + + // Workload type scoring + uint32_t pipelineWorkTypeScore = 0; + + if (!(currNodeIsTransition || lastNodeIsTransition)) + { + const bool workloadTypeEq = + (nodeSchInfos[lastCmdNodeId].workloadTypeMask == nodeInfo.workloadTypeMask); + + if (flags.bMinimizeGfxCompSwitch) + pipelineWorkTypeScore = workloadTypeEq ? WorkTypeGroupingBit : 0; + else if (!flags.bWorkloadTypePipeliningDisabled) + pipelineWorkTypeScore = workloadTypeEq ? 0 : WorkTypeGroupingBit; + } + else if (lastNodeIsTransition && flags.bWorkloadTypePipeliningAggressive) + { + // TODO: Check the benefit / effect of this. Add a flag for control? + pipelineWorkTypeScore = + (nodeInfo.workloadTypeMask & RPS_NODE_DECL_GRAPHICS_BIT) ? WorkTypeInterleaveBit : 0; + } + + // Memory allocation scoring + // TODO: Should be per heap + uint64_t allocAliasableMemSize = 0; + uint64_t freeAliasableMemSize = 0; + uint64_t usingAliasableMemSize = 0; + uint64_t immediateReuseMemSize = 0; + for (uint32_t resIdx : nodeInfo.resourceRefs.Get(nodeResourceRefs)) + { + ResourceSchedulingInfo& resSchInfo = resourceSchInfos[resIdx]; + + if (resSchInfo.scheduledUserNodesCount == 0) // First access to a resource + { + allocAliasableMemSize += resSchInfo.aliasableSize; + } + + if ((resSchInfo.scheduledUserNodesCount + 1) == + resSchInfo.totalUserNodesCount) // Last access to a resource + { + freeAliasableMemSize += resSchInfo.aliasableSize; + } + + if (resSchInfo.mostRecentRefNodeId == lastCmdNodeId) + { + immediateReuseMemSize += resSchInfo.aliasableSize; //TODO - should not limit to aliasable. + } + + usingAliasableMemSize += resSchInfo.aliasableSize; + } + + // Prioritize nodes with smaller new allocations + uint32_t memorySavingScore = uint32_t( + (rpsClamp((maxNodeMemorySize - allocAliasableMemSize) >> 16, 0ull, MemoryScoreMax) + + rpsClamp(freeAliasableMemSize >> 16, 0ull, MemoryScoreMax)) + << memoryAllocScoreShift); + + // Maximize memory saving score if the transition node is the initial access of a resource and + // is ready to be batched into a transition batch. + const bool bIsInitialTransitionNode = currNodeIsTransition && (freeAliasableMemSize != 0); + if (bIsInitialTransitionNode && lastNodeIsTransition) + { + memorySavingScore = MemoryScoreMax << memoryAllocScoreShift; + } + + memorySavingScore = flags.bRandomOrder ? 0 : memorySavingScore; + + uint32_t memoryReuseScore = + uint32_t(rpsClamp((immediateReuseMemSize >> 19), 0ull, 0xFFFull) + << memoryAllocScoreShift); // TODO - use its own shift. + + // Queue scoring: + + uint32_t queueScore = QueueScoreBit; + + if (flags.bUseAsync) + { + // Prioritize independent node between workload on another queue and its immediate dependent node. + + // Penalize nodes who want a queue switch. + if (0 == (nodeInfo.preferredQueueMask & currQueueMask)) + { + // Candidate require a queue switch, and is an immediate dependent node of the previously scheduled command, + // raise penalty. + if ((0 == (nodeInfo.validQueueMask & currQueueMask)) && + (readyNodes[iRN].depNodeId == lastCmdNodeId)) + { + queueScore = 0; + } + + queueScore = (QueueScoreBit >> 1); + } + } + + // Schedule barriers & Atomic subgraph should have highest priority. + uint32_t scopeScore = ScopeScoreBit; // This is the highest bit + + // Penalize nodes in an earlier barrier scope. + const uint32_t currBarrierScopeIdx = readyNodes[iRN].schBarrierScopeIdx; + if (currBarrierScopeIdx < lastBarrierScopeIdx) + { + scopeScore = 0; + } + + // Penalize nodes in another atomic subgraph. + uint32_t currAtomicSubgraphId = nodeAtomicSubgraphIndices[currNodeId]; + if (bHasAnyAtomicSubgraphs && (currAtomicSubgraphId != lastAtomicSubgraphId) && + ((currAtomicSubgraphId == RPS_INDEX_NONE_U32) || + (lastAtomicSubgraphId != subgraphSchInfos[currAtomicSubgraphId].atomicParentId))) + { + scopeScore = 0; + } + + // Individual score categories should already be in their own bit range, + // OR them together as the final score. + const uint32_t currScore = rpsMax((scopeScore | queueScore | barrierBatchingScore | + memorySavingScore | programOrderScore | pipelineWorkTypeScore), + 1u); + + if (highScore < currScore) + { + highScore = currScore; + highScoreIndex = iRN; + } + } + + RPS_ASSERT(highScoreIndex != UINT32_MAX); + + // Move winner from ready list to scheduled list + const uint32_t scheduledNodeId = readyNodes[highScoreIndex].nodeId; + const Node& scheduledNode = nodes[scheduledNodeId]; + const NodeSchedulingInfo& scheduledNodeInfo = nodeSchInfos[scheduledNodeId]; + + lastBarrierScopeIdx = readyNodes[highScoreIndex].schBarrierScopeIdx; + + const bool scheduledNodeIsTransition = IsTransitionNode(scheduledNodeId); + + lastNodeIsSplitCandidate = + (scheduledNodeIsTransition && (readyNodes[highScoreIndex].depNodeId != lastCmdNodeId)); + + if (scheduledNodeIsTransition) + numReadyTransNodes--; + else + numReadyCmdNodes--; + + readyNodes[highScoreIndex] = readyNodes[(numReadyCmdNodes + numReadyTransNodes)]; + + // Eliminate nodes if allowed. + // Also skip subgraph/subroutine markers. + const bool bEliminate = + nodeSchInfos[scheduledNodeId].canBeEliminated || + (!scheduledNodeIsTransition && cmdInfos[scheduledNode.GetCmdId()].IsNodeDeclBuiltIn()); + + if (!bEliminate) + { + scheduledNodes[numScheduled] = scheduledNodeId; + numScheduled++; + + // Scheduled node prefer a queue switch: + if (flags.bUseAsync) + { + // First scheduled node, queue is not determined yet, should set bRequireAnotherQueue. + RPS_ASSERT((currQueueIndex != RPS_INDEX_NONE_U32) || (currQueueMask == 0)); + + const bool bRequireQueueSwitch = (0 == (scheduledNodeInfo.validQueueMask & currQueueMask)); + const bool bPreferQueueSwitch = (0 == (scheduledNodeInfo.preferredQueueMask & currQueueMask)); + const bool bImmediateDependent = (readyNodes[highScoreIndex].depNodeId == lastCmdNodeId); + + // Only actually do queue switch if: + // Current queue is not compatible with the candidate at all, + // or, candidate is not an immediate dependent. + + const bool bSwitchQueue = bRequireQueueSwitch || (bPreferQueueSwitch && !bImmediateDependent); + + if (bSwitchQueue) + { + numBatches++; + + currQueueIndex = rpsFirstBitLow(scheduledNodeInfo.preferredQueueMask); + currQueueMask = 1u << currQueueIndex; + + if (m_targetInfo.pQueueFlags[currQueueIndex] & RPS_QUEUE_FLAG_GRAPHICS) + { + // Scheduling in reversed order, so last visited = first used in frame. + firstUsedGfxQueue = currQueueIndex; + } + } + + nodeQueueInfos[scheduledNodeId].queueIndex = currQueueIndex; + nodeQueueInfos[scheduledNodeId].scheduledIndex = (numScheduled - 1); + } + } + else + { + numEliminated++; + } + + lastAtomicSubgraphId = nodeAtomicSubgraphIndices[scheduledNodeId]; + + if (bHasAnyAtomicSubgraphs && (lastAtomicSubgraphId != RPS_INDEX_NONE_U32)) + { + if (scheduledNodeId == subgraphs[lastAtomicSubgraphId].beginNode) + { + lastAtomicSubgraphId = subgraphSchInfos[lastAtomicSubgraphId].atomicParentId; + } + } + + // Add new ready nodes to ready list + const Node& newScheduledNode = nodes[scheduledNodeId]; + + for (auto& inEdge : newScheduledNode.inEdges.Get(graph.GetEdges())) + { + uint32_t srcNodeId = inEdge.src; + const Node& srcNode = nodes[srcNodeId]; + + int32_t outInputReadyCount = ++nodeReadyDeps[srcNodeId]; + + if (srcNode.outEdges.size() == outInputReadyCount) + { + ReadyNodeInfo* pNewReadyNode = &readyNodes[(numReadyCmdNodes + numReadyTransNodes)]; + pNewReadyNode->nodeId = srcNodeId; + pNewReadyNode->depNodeId = scheduledNodeId; + pNewReadyNode->schBarrierScopeIdx = srcNode.barrierScope; + pNewReadyNode->timeReady = currentTime; + + if (IsTransitionNode(srcNodeId)) + { + numReadyTransNodes++; + } + else + { + numReadyCmdNodes++; + } + } + } + + for (uint32_t resIdx : scheduledNodeInfo.resourceRefs.Get(nodeResourceRefs)) + { + ResourceSchedulingInfo& resSchInfo = resourceSchInfos[resIdx]; + resSchInfo.scheduledUserNodesCount++; + resSchInfo.mostRecentRefNodeId = scheduledNodeId; + RPS_ASSERT(resSchInfo.scheduledUserNodesCount <= resSchInfo.totalUserNodesCount); + } + + lastNodeId = scheduledNodeId; + + // TODO: Per queue, use actual time estimation. + static const uint32_t RPS_DEFAULT_COMMAND_MAKESPAN = 1; + currentTime += RPS_DEFAULT_COMMAND_MAKESPAN; + } + + RPS_ASSERT((numScheduled + numEliminated) == nodes.size()); + RPS_RETURN_ERROR_IF((numScheduled + numEliminated) != nodes.size(), RPS_ERROR_INTERNAL_ERROR); + + auto& runtimeCmds = context.renderGraph.GetRuntimeCmdInfos(); + runtimeCmds.clear(); + runtimeCmds.reserve(numScheduled + 2); + + auto appendRuntimeCmdFromNodeId = [&](NodeId nodeId) { + const auto& node = nodes[nodeId]; + const uint32_t cmdId = node.IsTransition() ? node.GetTransitionId() : node.GetCmdId(); + runtimeCmds.push_back({cmdId, node.IsTransition()}); + }; + + auto& cmdBatches = context.renderGraph.GetCmdBatches(); + auto& cmdBatchWaitIndices = context.renderGraph.GetCmdBatchWaitFenceIds(); + uint32_t numWaitFenceIdCount = 0; + + if (flags.bUseAsync && (numBatches > 1)) + { + // Analyze cross queue synchronization + + auto batchInfos = ArenaVector{&scratchArena}; + auto waitIndicesStorage = ArenaVector{&scratchArena}; + auto batchWaitListPool = SpanPool>{waitIndicesStorage}; + + batchInfos.reserve(numBatches + 1); + waitIndicesStorage.reserve(numBatches + 1); + + auto createNewBatch = [&](uint32_t queueIndex) { + const uint32_t currRuntimeCmdId = uint32_t(runtimeCmds.size()); + + if (!batchInfos.empty()) + { + batchInfos.back().runtimeCmdEnd = currRuntimeCmdId; + } + + batchInfos.emplace_back(BatchInfo{queueIndex, currRuntimeCmdId, currRuntimeCmdId}); + + batchWaitListPool.alloc_span(batchInfos.back().waitForBatchIndices, 0); + + currQueueIndex = queueIndex; + }; + + // TODO: Aliasing handling assumes preamble/postamble to be graphics atm. + // Forcing these to graphics queue for now! + // (If no graphics queue is found, fall back to queue 0). + firstUsedGfxQueue = (firstUsedGfxQueue != RPS_INDEX_NONE_U32) ? firstUsedGfxQueue : 0; + createNewBatch(firstUsedGfxQueue); + + runtimeCmds.push_back(RuntimeCmdInfo{CMD_ID_PREAMBLE, true}); + + int32_t perQueueLastWaitBatch[RPS_MAX_QUEUES * RPS_MAX_QUEUES]; + std::fill(std::begin(perQueueLastWaitBatch), std::end(perQueueLastWaitBatch), -1); + + for (int32_t iScheduledIdx = numScheduled - 1; iScheduledIdx >= 0; --iScheduledIdx) + { + const NodeId nodeId = scheduledNodes[iScheduledIdx]; + const Node& currNode = nodes[nodeId]; + + NodeQueueInfo& currNodeInfo = nodeQueueInfos[nodeId]; + RPS_ASSERT(currNodeInfo.scheduledIndex == uint32_t(iScheduledIdx)); + + bool bAddingBatchDependency = false; + + int32_t srcDependBatchIndices[RPS_MAX_QUEUES]; + std::fill(std::begin(srcDependBatchIndices), std::end(srcDependBatchIndices), -1); + + ArrayRef currQueueLastWaitBatches = { + perQueueLastWaitBatch + currNodeInfo.queueIndex * RPS_MAX_QUEUES, RPS_MAX_QUEUES}; + + for (auto& inEdge : currNode.inEdges.Get(graph.GetEdges())) + { + const NodeQueueInfo& srcNodeInfo = nodeQueueInfos[inEdge.src]; + + RPS_ASSERT(srcNodeInfo.batchIndex < batchInfos.size()); + RPS_ASSERT(batchInfos[srcNodeInfo.batchIndex].queueIndex == srcNodeInfo.queueIndex); + + const int32_t iSrcBatchIdx = int32_t(srcNodeInfo.batchIndex); + + if ((currNodeInfo.queueIndex != srcNodeInfo.queueIndex) && + (currQueueLastWaitBatches[srcNodeInfo.queueIndex] < iSrcBatchIdx)) + { + // TODO: Recursively check if any indirect dependency already exists. + + currQueueLastWaitBatches[srcNodeInfo.queueIndex] = iSrcBatchIdx; + + RPS_ASSERT(srcDependBatchIndices[srcNodeInfo.queueIndex] < iSrcBatchIdx); + srcDependBatchIndices[srcNodeInfo.queueIndex] = iSrcBatchIdx; + + batchInfos[srcNodeInfo.batchIndex].bSignal = true; + + bAddingBatchDependency = true; + } + } + + // Create new batch if: + // - Switch queue + // - Adding new dependency, and if current batch is not transition-only. + if ((currNodeInfo.queueIndex != currQueueIndex) || + (bAddingBatchDependency && batchInfos.back().bHasCmdNode)) + { + createNewBatch(currNodeInfo.queueIndex); + } + + currNodeInfo.batchIndex = uint32_t(batchInfos.size() - 1); + + batchInfos.back().bHasCmdNode |= !currNode.IsTransition(); + + appendRuntimeCmdFromNodeId(scheduledNodes[iScheduledIdx]); + + if (bAddingBatchDependency) + { + BatchInfo& currBatch = batchInfos.back(); + + for (uint32_t iQ = 0; iQ < RPS_MAX_QUEUES; iQ++) + { + const int32_t srcDepBatchIdx = srcDependBatchIndices[iQ]; + if (srcDepBatchIdx != -1) + { + RPS_ASSERT(iQ != currNodeInfo.queueIndex); + RPS_ASSERT(batchInfos[srcDepBatchIdx].bSignal); + RPS_ASSERT(batchInfos[srcDepBatchIdx].queueIndex == iQ); + + batchWaitListPool.push_to_span(currBatch.waitForBatchIndices, uint32_t(srcDepBatchIdx)); + numWaitFenceIdCount++; + } + } + } + } + + if (!(m_targetInfo.pQueueFlags[currQueueIndex] & RPS_QUEUE_FLAG_GRAPHICS)) + { + createNewBatch(firstUsedGfxQueue); + } + + runtimeCmds.push_back(RuntimeCmdInfo{CMD_ID_POSTAMBLE, true}); + + batchInfos.back().runtimeCmdEnd = uint32_t(runtimeCmds.size()); + + cmdBatches.resize(batchInfos.size()); + cmdBatchWaitIndices.reserve(numWaitFenceIdCount); + + uint32_t fenceIndex = 0; + + for (uint32_t iBatch = 0; iBatch < batchInfos.size(); iBatch++) + { + const BatchInfo& batchInfo = batchInfos[iBatch]; + RpsCommandBatch& cmdBatch = cmdBatches[iBatch]; + + cmdBatch.cmdBegin = batchInfo.runtimeCmdBegin; + cmdBatch.numCmds = batchInfo.runtimeCmdEnd - batchInfo.runtimeCmdBegin; + cmdBatch.queueIndex = batchInfo.queueIndex; + + cmdBatch.signalFenceIndex = batchInfo.bSignal ? fenceIndex : RPS_INDEX_NONE_U32; + fenceIndex += batchInfo.bSignal ? 1 : 0; + + cmdBatch.waitFencesBegin = uint32_t(cmdBatchWaitIndices.size()); + cmdBatch.numWaitFences = batchInfo.waitForBatchIndices.size(); + + auto waitForBatchIndicesRange = batchInfo.waitForBatchIndices.Get(waitIndicesStorage); + + for (auto waitBatchIndex : waitForBatchIndicesRange) + { + RPS_ASSERT(waitBatchIndex < iBatch); + RPS_ASSERT(cmdBatches[waitBatchIndex].signalFenceIndex != RPS_INDEX_NONE_U32); + + cmdBatchWaitIndices.push_back(cmdBatches[waitBatchIndex].signalFenceIndex); + } + } + + RPS_ASSERT(cmdBatchWaitIndices.size() == numWaitFenceIdCount); + } + else + { + runtimeCmds.push_back(RuntimeCmdInfo{CMD_ID_PREAMBLE, true}); + + for (int32_t iScheduledIdx = numScheduled - 1; iScheduledIdx >= 0; --iScheduledIdx) + { + appendRuntimeCmdFromNodeId(scheduledNodes[iScheduledIdx]); + } + + runtimeCmds.push_back(RuntimeCmdInfo{CMD_ID_POSTAMBLE, true}); + + cmdBatches.resize(1, CommandBatch{}); + cmdBatches.back().cmdBegin = 0; + cmdBatches.back().numCmds = uint32_t(runtimeCmds.size()); + } + + return RPS_OK; + } + + private: + void InitNodeResourceSchedulingInfos() + { + const bool bEliminateDeadNodes = m_targetInfo.options.bEliminateDeadNodes; + + std::fill(nodeReadyDeps.begin(), nodeReadyDeps.end(), 0); + + // Initialize per-resource scheduling info + for (uint32_t iRes = 0, numRes = uint32_t(resources.size()); iRes < numRes; iRes++) + { + auto& resSchInfo = resourceSchInfos[iRes]; + + resSchInfo.aliasableSize = resources[iRes].allocRequirement.size; + resSchInfo.scheduledUserNodesCount = 0; + resSchInfo.totalUserNodesCount = 0; + } + + // Initialize per-node scheduling info + maxNodeMemorySize = 0; + + constexpr RpsQueueFlags defaultQueueFlags[] = {RPS_QUEUE_FLAG_GRAPHICS}; + + // Initialize queue info + // TODO: If no need to change queue layout per schedule, this can be done statically. + + uint32_t gfxQueueMask = 0; + uint32_t validCompQueueMask = 0; + uint32_t validCopyQueueMask = (1u << m_targetInfo.numQueues) - 1; + uint32_t allQueueMask = validCopyQueueMask; + uint32_t asyncCompQueueMask = 0; + uint32_t asyncCopyQueueMask = 0; + + if (m_targetInfo.numQueues > 0) + { + for (uint32_t iQueue = 0; iQueue < m_targetInfo.numQueues; iQueue++) + { + if (m_targetInfo.pQueueFlags[iQueue] & RPS_QUEUE_FLAG_GRAPHICS) + gfxQueueMask |= (1u << iQueue); + + if (m_targetInfo.pQueueFlags[iQueue] & (RPS_QUEUE_FLAG_COMPUTE | RPS_QUEUE_FLAG_GRAPHICS)) + validCompQueueMask |= (1u << iQueue); + else + asyncCopyQueueMask |= (1u << iQueue); + + if ((m_targetInfo.pQueueFlags[iQueue] & RPS_QUEUE_FLAG_COMPUTE) && + !(m_targetInfo.pQueueFlags[iQueue] & RPS_QUEUE_FLAG_GRAPHICS)) + asyncCompQueueMask |= (1u << iQueue); + } + } + else + { + gfxQueueMask = 1; + } + + asyncCompQueueMask = (asyncCompQueueMask == 0) ? validCompQueueMask : asyncCompQueueMask; + asyncCopyQueueMask = (asyncCopyQueueMask == 0) ? asyncCompQueueMask : asyncCopyQueueMask; + + constexpr RpsNodeDeclFlags AllNodeDeclWorkloadTypeMask = + RPS_NODE_DECL_GRAPHICS_BIT | RPS_NODE_DECL_COMPUTE_BIT | RPS_NODE_DECL_COPY_BIT; + + // Starting from command nodes only as transition nodes are expected to have out edges. + for (uint32_t iNode = 0; iNode < cmdInfos.size(); iNode++) + { + auto cmdAcceses = m_renderGraph.GetCmdAccesses(iNode); + NodeSchedulingInfo& nodeResInfo = nodeSchInfos[iNode]; + + // TODO: Can we infer queue requirement only based on resource accesses & remove graphics/compute/copy specifiers? + uint32_t queueMask = gfxQueueMask; + uint32_t preferredQueueMask = gfxQueueMask; + + const CmdInfo* const pCmdInfo = m_renderGraph.GetCmdInfo(iNode); + const NodeDeclInfo* pNodeDecl = pCmdInfo->pNodeDecl; + + if (!pCmdInfo->IsNodeDeclBuiltIn()) + { + if (pNodeDecl->flags & RPS_NODE_DECL_COMPUTE_BIT) + { + queueMask |= validCompQueueMask; + preferredQueueMask = pCmdInfo->bPreferAsync ? asyncCompQueueMask : preferredQueueMask; + } + + if (pNodeDecl->flags & RPS_NODE_DECL_COPY_BIT) + { + queueMask |= validCopyQueueMask; + preferredQueueMask = pCmdInfo->bPreferAsync ? asyncCopyQueueMask : preferredQueueMask; + } + + nodeResInfo.validQueueMask = queueMask; + nodeResInfo.preferredQueueMask = preferredQueueMask; + nodeResInfo.workloadTypeMask = (pNodeDecl->flags & AllNodeDeclWorkloadTypeMask); + } + else + { + // Built-in nodes such as subgraph/subprogram begin/end. + // TODO: Anything to do here? Deduce from contained nodes? + nodeResInfo.validQueueMask = allQueueMask; + nodeResInfo.preferredQueueMask = allQueueMask; + nodeResInfo.workloadTypeMask = AllNodeDeclWorkloadTypeMask; + } + + RPS_ASSERT(nodeResInfo.preferredQueueMask != 0); + RPS_ASSERT(nodeResInfo.validQueueMask != 0); + RPS_ASSERT(nodeResInfo.workloadTypeMask != 0); + RPS_ASSERT((nodeResInfo.preferredQueueMask & nodeResInfo.validQueueMask) == + nodeResInfo.preferredQueueMask); + + nodeResInfo.resourceRefs.SetRange(uint32_t(nodeResourceRefs.size()), 0); + nodeResourceRefs.reserve(nodeResourceRefs.size() + cmdAcceses.size()); + + uint64_t nodeAliasableMemSize = 0; + + bool bMustKeep = false; + + for (uint32_t iAccess = 0, numAccesses = cmdAcceses.size(); iAccess < numAccesses; iAccess++) + { + const CmdAccessInfo& access = cmdAcceses[iAccess]; + + if (access.resourceId == RPS_RESOURCE_ID_INVALID) + continue; + + if (std::find(nodeResourceRefs.begin() + nodeResInfo.resourceRefs.GetBegin(), + nodeResourceRefs.end(), + access.resourceId) == nodeResourceRefs.end()) + { + nodeResourceRefs.push_back(access.resourceId); + + resourceSchInfos[access.resourceId].totalUserNodesCount++; + nodeAliasableMemSize += resourceSchInfos[access.resourceId].aliasableSize; + } + + auto& resInfo = resources[access.resourceId]; + + const bool bHasGPUWrite = rpsAnyBitsSet(access.access.accessFlags, RPS_ACCESS_ALL_GPU_WRITE); + const bool bIsPersistentRes = rpsAnyBitsSet(resInfo.desc.flags, RPS_RESOURCE_FLAG_PERSISTENT_BIT); + const bool bIsExternalRes = resInfo.isExternal; + const bool bHasCPUAccess = rpsAnyBitsSet(access.access.accessFlags, RPS_ACCESS_ALL_CPU); + + if ((bHasGPUWrite && (bIsPersistentRes || bIsExternalRes)) || bHasCPUAccess) + { + bMustKeep = true; + } + } + + nodeResInfo.resourceRefs.SetCount(uint32_t(nodeResourceRefs.size()) - + nodeResInfo.resourceRefs.GetBegin()); + + maxNodeMemorySize = rpsMax(maxNodeMemorySize, nodeAliasableMemSize); + + nodeResInfo.canBeEliminated = bEliminateDeadNodes && nodes[iNode].outEdges.empty() && !bMustKeep; + } + + // Transition nodes: + + auto transitions = m_renderGraph.GetTransitions().range_all(); + nodeResourceRefs.reserve_additional(transitions.size()); + + ArrayRef transNodeSchInfos = nodeSchInfos.range(uint32_t(cmdInfos.size())); + + for (uint32_t iTrans = 1, numTrans = uint32_t(transitions.size()); iTrans < numTrans; iTrans++) + { + NodeSchedulingInfo& nodeResInfo = transNodeSchInfos[iTrans - 1]; + const auto& transitionInfo = transitions[iTrans]; + + nodeResInfo.resourceRefs.SetRange(uint32_t(nodeResourceRefs.size()), 1); + + if (m_targetInfo.options.bUseAsync) + { + RpsNodeDeclFlags nodeQueueFlag = + GetRequiredQueueFlagsFromAccessAttr(RPS_NODE_DECL_FLAG_NONE, transitionInfo.access.access); + + if (transitionInfo.prevTransition != RenderGraph::INVALID_TRANSITION) + { + static_assert((RPS_NODE_DECL_GRAPHICS_BIT < RPS_NODE_DECL_COMPUTE_BIT) && + (RPS_NODE_DECL_COMPUTE_BIT < RPS_NODE_DECL_COPY_BIT), + "Unexpected Node Decl flag ordering"); + + nodeQueueFlag = rpsMin( + nodeQueueFlag, + GetRequiredQueueFlagsFromAccessAttr( + RPS_NODE_DECL_FLAG_NONE, transitions[transitionInfo.prevTransition].access.access)); + } + + uint32_t queueMask = gfxQueueMask; + + if (nodeQueueFlag & RPS_NODE_DECL_COMPUTE_BIT) + { + queueMask |= validCompQueueMask; + } + + if (nodeQueueFlag & RPS_NODE_DECL_COPY_BIT) + { + queueMask |= validCopyQueueMask; + } + + nodeResInfo.workloadTypeMask = nodeQueueFlag; + nodeResInfo.validQueueMask = queueMask; + nodeResInfo.preferredQueueMask = queueMask; + } + + RPS_ASSERT(transitionInfo.access.resourceId != RPS_RESOURCE_ID_INVALID); + + resourceSchInfos[transitionInfo.access.resourceId].totalUserNodesCount++; + + nodeResourceRefs.push_back(transitionInfo.access.resourceId); + + RPS_ASSERT(maxNodeMemorySize >= resourceSchInfos[transitionInfo.access.resourceId].aliasableSize); + } + } + + // Returns if there are any atomic subgraphs + bool InitAtomicSubgraphTopology() + { + uint32_t atomicSubgraphCount = 0; + + for (uint32_t iSG = 0; iSG < subgraphs.size(); iSG++) + { + auto& sg = subgraphs[iSG]; + auto& sgInfo = subgraphSchInfos[iSG]; + + sgInfo.atomicParentId = RPS_INDEX_NONE_U32; + + atomicSubgraphCount += (sg.IsAtomic() ? 1 : 0); + + uint32_t parentId = sg.parentSubgraph; + while (parentId != RPS_INDEX_NONE_U32) + { + if (subgraphs[parentId].IsAtomic()) + { + sgInfo.atomicParentId = parentId; + break; + } + parentId = subgraphs[parentId].parentSubgraph; + } + + const uint32_t enclosingAtomicSGId = sg.IsAtomic() ? iSG : sgInfo.atomicParentId; + + sgInfo.atomicSubgraphId = enclosingAtomicSGId; + + if (sg.IsAtomic() && sgInfo.atomicParentId != RPS_INDEX_NONE_U32) + { + const Subgraph& parentAtomicSG = subgraphs[sgInfo.atomicParentId]; + graph.AddEdge(parentAtomicSG.beginNode, sg.beginNode); + graph.AddEdge(sg.beginNode, parentAtomicSG.endNode); + } + } + + if (atomicSubgraphCount == 0) + { + return false; + } + + std::fill(nodeAtomicSubgraphIndices.begin(), nodeAtomicSubgraphIndices.end(), RPS_INDEX_NONE_U32); + + for (uint32_t iSG = 0; iSG < subgraphs.size(); iSG++) + { + const Subgraph& sg = subgraphs[iSG]; + + if (sg.IsAtomic()) + { + nodeAtomicSubgraphIndices[sg.beginNode] = iSG; + + for (uint32_t iNode = sg.beginNode + 1; iNode < sg.endNode; iNode++) + { + const Node& node = nodes[iNode]; + + const uint32_t currAtomicSGId = subgraphSchInfos[node.subgraph].atomicSubgraphId; + + if (currAtomicSGId != iSG) + { + // Current node is in a nested atomic subgraph, skip the nested subgraph range + RPS_ASSERT((currAtomicSGId != RPS_INDEX_NONE_U32) && (currAtomicSGId > iSG)); + iNode = subgraphs[currAtomicSGId].endNode; + RPS_ASSERT(iNode < sg.endNode); + continue; + } + + nodeAtomicSubgraphIndices[iNode] = currAtomicSGId; + + RPS_ASSERT(currAtomicSGId == iSG); + + AddAtomicSubgraphEdges(currAtomicSGId, sg, node, iNode); + } + + nodeAtomicSubgraphIndices[sg.endNode] = iSG; + } + } + + auto transitions = m_renderGraph.GetTransitions().range_all(); + + for (uint32_t iTrans = 1, numTrans = uint32_t(transitions.size()); iTrans < numTrans; iTrans++) + { + const auto& transitionInfo = transitions[iTrans]; + + const Node& node = nodes[transitionInfo.nodeId]; + + if (node.subgraph != RPS_INDEX_NONE_U32) + { + uint32_t currAtomicSGId = subgraphSchInfos[node.subgraph].atomicSubgraphId; + + if (currAtomicSGId != RPS_INDEX_NONE_U32) + { + nodeAtomicSubgraphIndices[transitionInfo.nodeId] = currAtomicSGId; + + AddAtomicSubgraphEdges(currAtomicSGId, subgraphs[currAtomicSGId], node, transitionInfo.nodeId); + } + } + } + + return true; + } + + void EnforceProgramOrder() + { + const uint32_t numSrcNodes = (cmdInfos.size() > 1) ? uint32_t(cmdInfos.size() - 1) : 0; + + for (uint32_t iSrcNode = 0; iSrcNode < numSrcNodes; iSrcNode++) + { + graph.AddEdge(iSrcNode, iSrcNode + 1); + } + } + + void AddAtomicSubgraphEdges(uint32_t currAtomicSGId, const Subgraph& sg, const Node& node, uint32_t nodeId) + { + bool addEdgeFromSGBegin = (node.inEdges.empty()); + + for (const Edge& inEdge : node.inEdges.Get(graph.GetEdges())) + { + uint32_t srcNode = inEdge.src; + uint32_t srcSubgraph = nodes[srcNode].subgraph; + uint32_t srcAtomicSGId = (srcSubgraph == RPS_INDEX_NONE_U32) + ? RPS_INDEX_NONE_U32 + : subgraphSchInfos[srcSubgraph].atomicSubgraphId; + + if (currAtomicSGId != srcAtomicSGId) + { + if (!graph.IsParentSubgraphOf(currAtomicSGId, srcAtomicSGId)) + { + addEdgeFromSGBegin = true; + + uint32_t outermostAtomicSGIdx = currAtomicSGId; + for (uint32_t parentAtomicId = currAtomicSGId; + (parentAtomicId != RPS_INDEX_NONE_U32) && + !graph.IsParentSubgraphOf(parentAtomicId, srcAtomicSGId); + parentAtomicId = subgraphSchInfos[parentAtomicId].atomicParentId) + { + outermostAtomicSGIdx = parentAtomicId; + } + + const Subgraph& outermostAtomicSG = subgraphs[outermostAtomicSGIdx]; + if (srcNode != outermostAtomicSG.beginNode) + { + graph.AddEdge(srcNode, outermostAtomicSG.beginNode); + } + } + } + } + + if (addEdgeFromSGBegin) + { + graph.AddEdge(sg.beginNode, nodeId); + } + + RpsBool addEdgeToSGEnd = (node.outEdges.empty()); + + for (const Edge& outEdge : node.outEdges.Get(graph.GetEdges())) + { + uint32_t dstNode = outEdge.dst; + uint32_t dstSubgraph = nodes[dstNode].subgraph; + uint32_t dstAtomicSGId = (dstSubgraph == RPS_INDEX_NONE_U32) + ? RPS_INDEX_NONE_U32 + : subgraphSchInfos[dstSubgraph].atomicSubgraphId; + + if (currAtomicSGId != dstAtomicSGId) + { + if (!graph.IsParentSubgraphOf(currAtomicSGId, dstAtomicSGId)) + { + addEdgeToSGEnd = RPS_TRUE; + + uint32_t outermostAtomicSGIdx = currAtomicSGId; + for (uint32_t parentAtomicId = currAtomicSGId; + (parentAtomicId != RPS_INDEX_NONE_U32) && + !graph.IsParentSubgraphOf(parentAtomicId, dstAtomicSGId); + parentAtomicId = subgraphSchInfos[parentAtomicId].atomicParentId) + { + outermostAtomicSGIdx = parentAtomicId; + } + + const Subgraph& outermostAtomicSG = subgraphs[outermostAtomicSGIdx]; + + if (dstNode != outermostAtomicSG.endNode) + { + graph.AddEdge(outermostAtomicSG.endNode, dstNode); + } + } + } + } + + if (addEdgeToSGEnd) + { + graph.AddEdge(nodeId, sg.endNode); + } + } + + bool IsTransitionNode(NodeId id) const + { + return id >= cmdInfos.size(); + } + + private: + RenderGraph& m_renderGraph; + TargetInfo m_targetInfo; + + Graph& graph; + ConstArrayRef nodes; + ConstArrayRef subgraphs; + ConstArrayRef resources; + ConstArrayRef cmdInfos; + + ArrayRef scheduledNodes; + ArrayRef readyNodes; + ArrayRef nodeSchInfos; + ArrayRef resourceSchInfos; + ArrayRef subgraphSchInfos; + ArrayRef nodeReadyDeps; + ArrayRef nodeAtomicSubgraphIndices; + ArenaVector nodeResourceRefs; + + uint64_t maxNodeMemorySize = 0; + }; +} // namespace rps + +#endif //_RPS_CMD_DAG_SCHEDULE_HPP_ diff --git a/src/runtime/common/phases/rps_memory_schedule.hpp b/src/runtime/common/phases/rps_memory_schedule.hpp new file mode 100644 index 0000000..54fcf71 --- /dev/null +++ b/src/runtime/common/phases/rps_memory_schedule.hpp @@ -0,0 +1,783 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_MEMORY_SCHEDULE_H_ +#define _RPS_MEMORY_SCHEDULE_H_ + +#include "runtime/common/rps_render_graph.hpp" + +#include +#include + +namespace rps +{ + class MemorySchedulePhase : public IRenderGraphPhase + { + RenderGraphUpdateContext* m_pContext = nullptr; + + public: + MemorySchedulePhase(RenderGraph& renderGraph) + { + } + + virtual RpsResult Run(RenderGraphUpdateContext& context) override final + { + m_pContext = &context; + + CollectResourceLifeTimes(context); + + RPS_V_RETURN(CalculateResourcePlacements(context)); + + RPS_V_RETURN(CalculateResourceAliasing(context)); + + return RPS_OK; + } + + private: + + void CollectResourceLifeTimes(RenderGraphUpdateContext& context) + { + auto& resourceInstances = context.renderGraph.GetResourceInstances(); + const auto& transitions = context.renderGraph.GetTransitions(); + const auto nodes = context.renderGraph.GetGraph().GetNodes(); + const auto runtimeCmds = context.renderGraph.GetRuntimeCmdInfos().range_all(); + + const uint32_t lastCmdId = runtimeCmds.empty() ? 0 : uint32_t(runtimeCmds.size() - 1); + + for (auto& resInst : resourceInstances) + { + const bool bIsPersistent = + resInst.isExternal || rpsAnyBitsSet(resInst.desc.flags, RPS_RESOURCE_FLAG_PERSISTENT_BIT); + + resInst.lifetimeBegin = bIsPersistent ? 0 : UINT32_MAX; + resInst.lifetimeEnd = bIsPersistent ? lastCmdId : 0; + } + + if (runtimeCmds.empty()) + { + return; + } + + auto fnUpdateAccessRange = [&](uint32_t resourceIndex, uint32_t runtimeCmdIdx) { + auto& resInst = resourceInstances[resourceIndex]; + resInst.lifetimeBegin = rpsMin(resInst.lifetimeBegin, runtimeCmdIdx); + resInst.lifetimeEnd = rpsMax(resInst.lifetimeEnd, runtimeCmdIdx); + }; + + for (uint32_t runtimeCmdIdx = 1; runtimeCmdIdx < (runtimeCmds.size() - 1); runtimeCmdIdx++) + { + const auto& runtimeCmd = runtimeCmds[runtimeCmdIdx]; + if (runtimeCmd.isTransition) + { + const auto& transitionInfo = transitions[runtimeCmd.GetTransitionId()]; + fnUpdateAccessRange(transitionInfo.access.resourceId, runtimeCmdIdx); + } + else + { + auto paramInfos = context.renderGraph.GetCmdAccesses(runtimeCmd.GetCmdId()); + + for (auto& paramInfo : paramInfos) + { + if (paramInfo.resourceId != RPS_RESOURCE_ID_INVALID) + { + fnUpdateAccessRange(paramInfo.resourceId, runtimeCmdIdx); + } + } + } + } + } + + void InsertToSortedAllocationList(ArenaVector& allocatedIndices, + uint32_t resIndex, + ConstArrayRef resources) + { + auto upperBound = + std::upper_bound(allocatedIndices.begin(), allocatedIndices.end(), resIndex, [&](auto a, auto b) { + auto& resA = resources[a]; + auto& resB = resources[b]; + + if (resA.allocPlacement.heapId < resB.allocPlacement.heapId) + return true; + else if (resA.allocPlacement.heapId > resB.allocPlacement.heapId) + return false; + else if (resA.allocPlacement.offset < resB.allocPlacement.offset) + return true; + else if (resA.allocPlacement.offset > resB.allocPlacement.offset) + return false; + else if (resA.allocPlacement.offset + resA.allocRequirement.size < + resB.allocPlacement.offset + resB.allocRequirement.size) + return true; + else + return false; + }); + + allocatedIndices.insert(upperBound - allocatedIndices.begin(), resIndex); + } + + RpsResult CalculateResourcePlacements(RenderGraphUpdateContext& context) + { + ArenaCheckPoint arenaCheckpoint{context.scratchArena}; + + auto& heaps = context.renderGraph.GetHeapInfos(); + + const bool bUseAliasing = !rpsAnyBitsSet(context.renderGraph.GetCreateInfo().renderGraphFlags, + RPS_RENDER_GRAPH_NO_GPU_MEMORY_ALIASING); + + auto resourceInstances = context.renderGraph.GetResourceInstances().range_all(); + + ArenaVector sortedResourceIndices(&context.scratchArena); + sortedResourceIndices.reserve(resourceInstances.size()); + + for (uint32_t iRes = 0; iRes < resourceInstances.size(); iRes++) + { + auto& resInst = resourceInstances[iRes]; + + if ((!resInst.isExternal) && (resInst.allocRequirement.size > 0) && + (resInst.lifetimeBegin <= resInst.lifetimeEnd)) + { + sortedResourceIndices.push_back(iRes); + } + } + + // Reset currently used size mark. + // TODO: Allow external allocations to occuply spaces. + for (auto& heap : heaps) + { + heap.usedSize = 0; + } + + // TODO: Precompute comparison key + std::sort(sortedResourceIndices.begin(), sortedResourceIndices.end(), [&](uint32_t a, uint32_t b) { + const auto& resA = resourceInstances[a]; + const auto& resB = resourceInstances[b]; + + // memory type + uint32_t memTypeA = resA.allocRequirement.memoryTypeIndex; + uint32_t memTypeB = resB.allocRequirement.memoryTypeIndex; + + if (memTypeA < memTypeB) + return true; + else if (memTypeA > memTypeB) + return false; + + // For the same heap type, put pre-allocated (reused) resources ahead, + // so they occupy the existing allocation before we try allocate anything else. + if (!resA.isPendingCreate && resB.isPendingCreate) + return true; + else if (resA.isPendingCreate && !resB.isPendingCreate) + return false; + + uint64_t sizeA = + rpsAlignUp(resA.allocRequirement.size, uint64_t(rpsMax(1u, resA.allocRequirement.alignment))); + uint64_t sizeB = + rpsAlignUp(resB.allocRequirement.size, uint64_t(rpsMax(1u, resB.allocRequirement.alignment))); + + // Sort size in decreasing order + if (sizeA > sizeB) + return true; + else if (sizeA < sizeB) + return false; + + uint32_t cmdIndexA = resA.lifetimeBegin; + uint32_t cmdIndexB = resB.lifetimeBegin; + + return (cmdIndexA < cmdIndexB); + }); + + // Ordered index list by (heap, offset, end), cleared when switching heap type + ArenaVector allocatedIndices(&context.scratchArena); + allocatedIndices.reserve(sortedResourceIndices.size()); + + bool bLastResPreallocated = false; + + // For each resource in sorted list, try allocate in a 2d rectangle ( width = cmd index span, height = size ) + uint32_t currHeapMemType = UINT32_MAX; + for (size_t iIndex = 0, endIndex = sortedResourceIndices.size(); iIndex < endIndex; iIndex++) + { + const uint32_t iRes = sortedResourceIndices[iIndex]; + auto& currRes = resourceInstances[iRes]; + + RPS_ASSERT(currRes.allocRequirement.size > 0); + + // Switch heap if heap type changes + if (currHeapMemType != currRes.allocRequirement.memoryTypeIndex) + { + currHeapMemType = currRes.allocRequirement.memoryTypeIndex; + allocatedIndices.clear(); + } + + // Existing allocation should have a valid resource handle and heap placement. + RPS_ASSERT((!currRes.isPendingCreate) == + (currRes.hRuntimeResource && (currRes.allocPlacement.heapId != RPS_INDEX_NONE_U32))); + + // Insert existing allocations into the allocatedIndices list and update the heap infos accordingly, + // in order to hold their placements. + if (!currRes.isPendingCreate) + { + HeapInfo* pHeap = &heaps[currRes.allocPlacement.heapId]; + + RPS_ASSERT(pHeap->memTypeIndex == currRes.allocRequirement.memoryTypeIndex); + RPS_ASSERT(pHeap->alignment >= currRes.allocRequirement.alignment); + RPS_ASSERT(pHeap->size >= (currRes.allocPlacement.offset + currRes.allocRequirement.size)); + RPS_ASSERT(bLastResPreallocated || allocatedIndices.empty()); + + InsertToSortedAllocationList(allocatedIndices, iRes, resourceInstances); + + pHeap->usedSize = + rpsMax(pHeap->usedSize, currRes.allocPlacement.offset + currRes.allocRequirement.size); + pHeap->maxUsedSize = rpsMax(pHeap->maxUsedSize, pHeap->usedSize); + + bLastResPreallocated = true; + + continue; + } + + bLastResPreallocated = false; + + // Search for a valid range, for each existing resource allocated with current heap type: + uint32_t currHeapIndex = UINT32_MAX; + + uint64_t prevRangeEndAligned = 0; + uint64_t fitness = UINT64_MAX; // Smaller is better + RpsHeapPlacement rangeCandidate; + + // TODO: Allow choose first fit / best fit + + if (bUseAliasing) + { + // Filter allocated resources by command index range + for (uint32_t iAllocated = 0, numAllocated = uint32_t(allocatedIndices.size()); + iAllocated < numAllocated; + iAllocated++) + { + const uint32_t iResAllocated = allocatedIndices[iAllocated]; + const auto& allocatedRes = resourceInstances[iResAllocated]; + + RPS_ASSERT(allocatedRes.allocRequirement.memoryTypeIndex == currHeapMemType); + + // Before moving on to new heap, check any space left in current heap. + if (allocatedRes.allocPlacement.heapId != currHeapIndex) + { + if (currHeapIndex != UINT32_MAX) + { + const auto& currHeap = heaps[currHeapIndex]; + + // Check any space left in previous heap from last allocation to its top + CheckReusableSpaceInHeap(prevRangeEndAligned, + currHeap.size, + currRes.allocRequirement, + currHeapIndex, + &fitness, + &rangeCandidate); + + // Size fits perfectly, no more search + if (fitness == 0) + break; + } + + // Switch to next heap, reset states + prevRangeEndAligned = 0; + currHeapIndex = allocatedRes.allocPlacement.heapId; + } + + // If lifetimes overlap: + if ((allocatedRes.lifetimeBegin <= currRes.lifetimeEnd) && + (currRes.lifetimeBegin <= allocatedRes.lifetimeEnd)) + { + // Only check if there is a gap between previous range end and current allocated resource start + if (prevRangeEndAligned < allocatedRes.allocPlacement.offset) + { + const auto& currHeap = heaps[currHeapIndex]; + + CheckReusableSpaceInHeap(prevRangeEndAligned, + allocatedRes.allocPlacement.offset, + currRes.allocRequirement, + currHeapIndex, + &fitness, + &rangeCandidate); + + // Size fit perfectly, no more search + if (fitness == 0) + break; + } + + const uint64_t allocatedEnd = + allocatedRes.allocPlacement.offset + allocatedRes.allocRequirement.size; + + prevRangeEndAligned = + rpsMax(prevRangeEndAligned, + rpsAlignUp(allocatedEnd, uint64_t(currRes.allocRequirement.alignment))); + } + } + } + else + { + // Not using aliasing. Check last allocation for current heap + if (!allocatedIndices.empty()) + { + // Only check last allocation for now. Can probably look through all allocated heaps with same type and scrape any space from top to limit + const auto& allocatedRes = resourceInstances[allocatedIndices.back()]; + + RPS_ASSERT(allocatedRes.allocRequirement.memoryTypeIndex == currHeapMemType); + + // Before moving on to new heap, check any space left in current heap. + currHeapIndex = allocatedRes.allocPlacement.heapId; + RPS_ASSERT(prevRangeEndAligned == 0); + } + + if (currHeapIndex != UINT32_MAX) + { + prevRangeEndAligned = rpsMax( + prevRangeEndAligned, + rpsAlignUp(heaps[currHeapIndex].usedSize, uint64_t(currRes.allocRequirement.alignment))); + } + } + + // Did not find valid space, try top of heap + if ((fitness == UINT64_MAX) && (currHeapIndex != UINT32_MAX)) + { + // Check any space left in previous heap from last allocation to its top + const auto& currHeap = heaps[currHeapIndex]; + + CheckReusableSpaceInHeap(prevRangeEndAligned, + currHeap.size, + currRes.allocRequirement, + currHeapIndex, + &fitness, + &rangeCandidate); + } + + // Did not find valid space, try grab an unused existing heap / create a new heap + if (fitness == UINT64_MAX) + { + const uint32_t newHeapIdx = FindOrCreateFreeHeap( + currHeapMemType, currRes.allocRequirement.size, currRes.allocRequirement.alignment); + + RPS_ASSERT(newHeapIdx != UINT32_MAX); // TODO + + prevRangeEndAligned = 0; + currHeapIndex = newHeapIdx; + + const auto& currHeap = heaps[currHeapIndex]; + + CheckReusableSpaceInHeap(prevRangeEndAligned, + currHeap.size, + currRes.allocRequirement, + currHeapIndex, + &fitness, + &rangeCandidate); + } + + RPS_RETURN_ERROR_IF(fitness == UINT64_MAX, RPS_ERROR_OUT_OF_MEMORY); + + auto& selectedHeap = heaps[rangeCandidate.heapId]; + + // Adjust alignment if RtHeap is not created yet. + if (!selectedHeap.hRuntimeHeap) + { + selectedHeap.alignment = rpsMax(currRes.allocRequirement.alignment, selectedHeap.alignment); + } + + RPS_ASSERT(selectedHeap.alignment >= currRes.allocRequirement.alignment); + + currRes.allocPlacement = rangeCandidate; + + // Increase heap top if needed + selectedHeap.usedSize = + rpsMax(selectedHeap.usedSize, rangeCandidate.offset + currRes.allocRequirement.size); + selectedHeap.maxUsedSize = rpsMax(selectedHeap.maxUsedSize, selectedHeap.usedSize); + + // Insert current range to allocatedIndices sorted + InsertToSortedAllocationList(allocatedIndices, iRes, resourceInstances); + } + + return RPS_OK; + } + + void CheckReusableSpaceInHeap(uint64_t spaceBegin, + uint64_t spaceEnd, + const RpsGpuMemoryRequirement memRequirement, + uint32_t heapIndex, + uint64_t* pFitness, + RpsHeapPlacement* pCandidate) + { + auto& heaps = m_pContext->renderGraph.GetHeapInfos(); + + auto& heapInfo = heaps[heapIndex]; + + if ((heapInfo.hRuntimeHeap) && (heapInfo.alignment < memRequirement.alignment)) + { + // Fail if runtime heap already exists but heap alignment is smaller than resource required alignment. + return; + } + + uint64_t newRangeEnd = spaceBegin + memRequirement.size; + + // Check if requiredSize can fit the space: + if (newRangeEnd <= spaceEnd) + { + uint64_t newFitness = spaceEnd - newRangeEnd; + + if (newFitness < *pFitness) + { + pCandidate->heapId = heapIndex; + pCandidate->offset = spaceBegin; + + *pFitness = newFitness; + } + } + } + + uint32_t FindOrCreateFreeHeap(uint32_t memoryTypeIndex, uint64_t minSize, uint32_t minAlignment) + { + auto& heaps = m_pContext->renderGraph.GetHeapInfos(); + + for (size_t heapIdx = 0, numHeaps = heaps.size(); heapIdx < numHeaps; heapIdx++) + { + const auto& heap = heaps[heapIdx]; + // TODO: If allocation minSize > default heap size, we allocate a heap just fit the size. + // Need to make sure this heap is not grabbed by other smaller allocations, or check for a better solution. + if ((heap.memTypeIndex == memoryTypeIndex) && (heap.usedSize == 0) && (minSize <= heap.size) && + (minAlignment <= heap.alignment)) + { + RPS_ASSERT(heap.hRuntimeHeap); + return uint32_t(heapIdx); + } + } + + return AddNewHeap(memoryTypeIndex, minSize, minAlignment); + } + + uint32_t AddNewHeap(uint32_t memoryTypeIndex, uint64_t minSize, uint32_t alignment) + { + auto& heaps = m_pContext->renderGraph.GetHeapInfos(); + + uint32_t newHeapIdx = 0; + + for (const size_t numHeaps = heaps.size(); newHeapIdx < numHeaps; newHeapIdx++) + { + if (heaps[newHeapIdx].memTypeIndex == UINT32_MAX) + { + break; + } + } + + if (newHeapIdx == heaps.size()) + { + heaps.emplace_back(); + } + + const auto& memTypeInfo = m_pContext->renderGraph.GetMemoryTypes()[memoryTypeIndex]; + + auto& newHeap = heaps[newHeapIdx]; + newHeap = {}; + + newHeap.memTypeIndex = memoryTypeIndex; + newHeap.index = newHeapIdx; + newHeap.size = UINT64_MAX; + newHeap.alignment = rpsMax(alignment, memTypeInfo.minAlignment); + + if (memTypeInfo.defaultHeapSize > 0) + { + newHeap.size = rpsMax(minSize, memTypeInfo.defaultHeapSize); + } + + return newHeapIdx; + } + + struct HeapRangeUsage + { + uint64_t size; + uint64_t heapOffset; + uint32_t heapIndex; + uint32_t resourceIndex; + }; + + // Foreach command, update resource usage ranges & find alias + RpsResult CalculateResourceAliasing(RenderGraphUpdateContext& context) + { + const bool bUseAliasing = !rpsAnyBitsSet(context.renderGraph.GetCreateInfo().renderGraphFlags, + RPS_RENDER_GRAPH_NO_GPU_MEMORY_ALIASING); + + auto resourceInstances = context.renderGraph.GetResourceInstances().range_all(); + auto scheduledCmds = context.renderGraph.GetRuntimeCmdInfos().range_all(); + + RPS_RETURN_OK_IF(resourceInstances.empty() || scheduledCmds.empty()); + + ArenaCheckPoint arenaCheckpoint{context.scratchArena}; + + ArenaVector heapRangeUsages(&context.scratchArena); + heapRangeUsages.reserve(resourceInstances.size()); + + ArenaBitVector<> aliasingSrcBitMask(&context.scratchArena); + aliasingSrcBitMask.Resize(uint32_t(resourceInstances.size())); + aliasingSrcBitMask.Fill(false); + + ArenaVector pendingAliasingSrcs(&context.scratchArena); + pendingAliasingSrcs.reserve(resourceInstances.size()); + + // TODO: Only check the resources referenced by current cmd? Do all resources starting at a cmd referenced by the cmd? + ArenaVector resourceIdxSortedByLifetimeStart(resourceInstances.size(), &context.scratchArena); + std::iota(resourceIdxSortedByLifetimeStart.begin(), resourceIdxSortedByLifetimeStart.end(), 0); + std::sort(resourceIdxSortedByLifetimeStart.begin(), + resourceIdxSortedByLifetimeStart.end(), + [&](uint32_t i, uint32_t j) { + return resourceInstances[i].lifetimeBegin < resourceInstances[j].lifetimeBegin; + }); + + auto& aliasingInfos = context.renderGraph.GetResourceAliasingInfos(); + aliasingInfos.clear(); + + uint32_t numCmdExtraAliasingInfos = 0; + + if (bUseAliasing) + { + uint32_t resIdxSorted = 0; + + uint32_t numAliasingRes = 0; + uint32_t numDeactivatedRes = 0; + + for (uint32_t iCmd = 0, numNodes = uint32_t(scheduledCmds.size()); iCmd < numNodes; iCmd++) + { + RuntimeCmdInfo& runtimeCmd = scheduledCmds[iCmd]; + runtimeCmd.aliasingInfos.SetRange(uint32_t(aliasingInfos.size()), 0); + + // For each resource starting at cmd, find overlapping with previous ranges + for (; (resIdxSorted < resourceIdxSortedByLifetimeStart.size()); resIdxSorted++) + { + const uint32_t resIndex = resourceIdxSortedByLifetimeStart[resIdxSorted]; + auto& resInst = resourceInstances[resIndex]; + + // Move on to the next cmd + if (resInst.lifetimeBegin != iCmd) + break; + + if (resInst.isExternal) + continue; + + RPS_ASSERT((resInst.allocRequirement.size > 0) || (resInst.HasNoAccess())); + RPS_ASSERT(resInst.lifetimeBegin != UINT32_MAX); + + RPS_ASSERT(runtimeCmd.isTransition); + + HeapRangeUsage currentResourceRange; + currentResourceRange.heapIndex = resInst.allocPlacement.heapId; + currentResourceRange.heapOffset = resInst.allocPlacement.offset; + currentResourceRange.size = resInst.allocRequirement.size; + currentResourceRange.resourceIndex = resIndex; + + HeapRangeUsage complementParts[2]; + + uint32_t initialNumActiveRanges = uint32_t(heapRangeUsages.size()); + + resInst.isAliased = false; + + for (uint32_t iRange = 0; + iRange < rpsMin(initialNumActiveRanges, uint32_t(heapRangeUsages.size())); + iRange++) + { + if (heapRangeUsages[iRange].heapIndex != currentResourceRange.heapIndex) + { + continue; + } + + uint32_t clipResultMask = 0; + + if (HeapRangeClip( + ¤tResourceRange, &heapRangeUsages[iRange], complementParts, clipResultMask)) + { + const uint32_t currAliasingInfoIdx = uint32_t(aliasingInfos.size()); + + const uint32_t srcResourceIdx = heapRangeUsages[iRange].resourceIndex; + + ResourceAliasingInfo* pAliasingInfo = aliasingInfos.grow(1); + + pAliasingInfo->srcResourceIndex = srcResourceIdx; + pAliasingInfo->dstResourceIndex = resIndex; + pAliasingInfo->srcDeactivating = RPS_FALSE; + pAliasingInfo->dstActivating = RPS_FALSE; + // dstActivating Will be set on the last aliasing info where current resIndex is dst. + + // First time seen src as aliasing src, deactivate + bool bFirstTimeAsSrc = !aliasingSrcBitMask.ExchangeBit(srcResourceIdx, true); + if (bFirstTimeAsSrc) + { + pAliasingInfo->srcDeactivating = RPS_TRUE; + numDeactivatedRes++; + } + + auto& srcResInfo = resourceInstances[srcResourceIdx]; + + if (!srcResInfo.isAliased) + { + // Src resource is not aliased yet. May need to initialize it before first access. + pendingAliasingSrcs.push_back(srcResourceIdx); + srcResInfo.isAliased = true; + + numAliasingRes++; + } + + resInst.isAliased = true; + + if (clipResultMask & 0x1) + { + heapRangeUsages[iRange] = complementParts[0]; + + if (clipResultMask & 0x2) + { + heapRangeUsages.push_back(complementParts[1]); + } + } + else if (clipResultMask == 0x2) + { + heapRangeUsages[iRange] = complementParts[1]; + } + else // fully overlap + { + heapRangeUsages[iRange] = heapRangeUsages.back(); + heapRangeUsages.pop_back(); + + iRange--; + } + } + } + + heapRangeUsages.push_back(currentResourceRange); + + if (resInst.isAliased) + { + RPS_ASSERT(!aliasingInfos.empty()); + RPS_ASSERT(aliasingInfos.back().dstResourceIndex == resIndex); + + aliasingInfos.back().dstActivating = RPS_TRUE; + numAliasingRes++; + } + } + + // Update Cmd Aliasing info + runtimeCmd.aliasingInfos.SetCount( + uint32_t(aliasingInfos.size() - runtimeCmd.aliasingInfos.GetBegin())); + + // TODO: Only need to iterate through transitions? + RPS_ASSERT(runtimeCmd.isTransition || runtimeCmd.aliasingInfos.empty()); + } + + RPS_ASSERT(numAliasingRes <= resourceInstances.size()); + RPS_ASSERT(numDeactivatedRes <= numAliasingRes); + + + // Preamble: + // For aliasing source-only resources. May need to initialize them before first access. + // Adding to the first node's aliasing info list + + uint32_t preambleAliasingInfoOffset = uint32_t(aliasingInfos.size()); + + for (auto pendingAliasingSrcRes : pendingAliasingSrcs) + { + ResourceAliasingInfo* pPreambleAliasingInfo = aliasingInfos.grow(1); + pPreambleAliasingInfo->srcResourceIndex = RPS_RESOURCE_ID_INVALID; + pPreambleAliasingInfo->dstResourceIndex = pendingAliasingSrcRes; + pPreambleAliasingInfo->srcDeactivating = RPS_FALSE; + pPreambleAliasingInfo->dstActivating = RPS_TRUE; + } + + RPS_ASSERT(scheduledCmds[0].cmdId == CMD_ID_PREAMBLE); + scheduledCmds[0].aliasingInfos.SetRange(preambleAliasingInfoOffset, + uint32_t(pendingAliasingSrcs.size())); + + // Postamble: + // Resources that are aliased but no successor, so havn't been deactivated at the end of frame. + // Insert aliasingInfo at the end. + // purely for generating DX12 transition barrier to raise them to RT/DS states for discard next frame. + // Check per resource aliasing info, mark pre-discard and post-discard transitions + + uint32_t postambleAliasingInfoOffset = uint32_t(aliasingInfos.size()); + + uint32_t numAliasingResCounted = 0; + for (uint32_t iRes = 0, numRes = uint32_t(resourceInstances.size()); iRes < numRes; iRes++) + { + const auto& resInst = resourceInstances[iRes]; + + // Persistent resource shouldn't be aliased. + RPS_ASSERT( + (resInst.isExternal || rpsAnyBitsSet(resInst.desc.flags, RPS_RESOURCE_FLAG_PERSISTENT_BIT)) + ? !resInst.isAliased + : true); + + if (resInst.lifetimeBegin == UINT32_MAX) + { + continue; + } + + if (resInst.isAliased && !aliasingSrcBitMask.GetBit(iRes)) + { + ResourceAliasingInfo* pFrameEndAliasingInfo = aliasingInfos.grow(1); + pFrameEndAliasingInfo->srcResourceIndex = iRes; + pFrameEndAliasingInfo->dstResourceIndex = RPS_RESOURCE_ID_INVALID; + pFrameEndAliasingInfo->srcDeactivating = RPS_TRUE; + pFrameEndAliasingInfo->dstActivating = RPS_FALSE; + + numDeactivatedRes++; + } + + numAliasingResCounted += resInst.isAliased ? 1 : 0; + } + + RPS_ASSERT(scheduledCmds.back().cmdId == CMD_ID_POSTAMBLE); + scheduledCmds.back().aliasingInfos.SetRange( + postambleAliasingInfoOffset, uint32_t(aliasingInfos.size() - postambleAliasingInfoOffset)); + + RPS_ASSERT(numAliasingRes == numAliasingResCounted); + } + + return RPS_OK; + } + + // Clip rhs with lhs, returns if clipping (intersection) happened, and output a bit mask of complement ranges: + // Bit 0: complements from rhs, that's less than lhs + // Bit 1: complements from rhs, that's greater than lhs + bool HeapRangeClip(const HeapRangeUsage* lhs, + const HeapRangeUsage* rhs, + HeapRangeUsage rhsComplements[2], + uint32_t& complementMask) + { + complementMask = 0; + + RPS_ASSERT(lhs->heapIndex == rhs->heapIndex); + + const uint64_t lhsEnd = (lhs->heapOffset + lhs->size); + const uint64_t rhsEnd = (rhs->heapOffset + rhs->size); + + if ((lhsEnd > rhs->heapOffset) && (lhs->heapOffset < rhsEnd)) + { + if (lhs->heapOffset > rhs->heapOffset) + { + rhsComplements[0].heapIndex = rhs->heapIndex; + rhsComplements[0].heapOffset = rhs->heapOffset; + rhsComplements[0].size = lhs->heapOffset - rhs->heapOffset; + rhsComplements[0].resourceIndex = rhs->resourceIndex; + complementMask |= 0x1; + } + + if (lhsEnd < rhsEnd) + { + rhsComplements[1].heapIndex = lhs->heapIndex; + rhsComplements[1].heapOffset = lhsEnd; + rhsComplements[1].size = rhsEnd - lhsEnd; + rhsComplements[1].resourceIndex = rhs->resourceIndex; + complementMask |= 0x2; + } + + return true; + } + + return false; + } + }; +} + +#endif //_RPS_MEMORY_SCHEDULE_H_ diff --git a/src/runtime/common/phases/rps_pre_process.hpp b/src/runtime/common/phases/rps_pre_process.hpp new file mode 100644 index 0000000..3ee150e --- /dev/null +++ b/src/runtime/common/phases/rps_pre_process.hpp @@ -0,0 +1,856 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_PRE_PROCESS_H_ +#define _RPS_PRE_PROCESS_H_ + +#include "rps/runtime/common/rps_render_states.h" + +#include "runtime/common/rps_render_graph.hpp" +#include "runtime/common/rps_runtime_device.hpp" + +#include "core/rps_util.hpp" + +namespace rps +{ + class PreProcessPhase : public IRenderGraphPhase + { + public: + virtual RpsResult Run(RenderGraphUpdateContext& context) override final + { + m_pRuntimeDevice = RuntimeDevice::Get(context.renderGraph.GetDevice()); + m_pRuntimeBackend = context.renderGraph.GetRuntimeBackend(); + + ArenaCheckPoint arenaCheckpoint{context.scratchArena}; + + RPS_V_RETURN(CollectResourceAllAccesses(context)); + + RPS_V_RETURN(InitResourceInstances(context)); + + InitParamResources(context); + + RPS_V_RETURN(InitCmdAccessInfos(context)); + + // Initialize resource alloc info after access flags is known + auto& resInstances = context.renderGraph.GetResourceInstances(); + RPS_V_RETURN(m_pRuntimeDevice->InitializeResourceAllocInfos(resInstances.range_all())); + + m_pRuntimeBackend = nullptr; + m_pRuntimeDevice = nullptr; + m_resourceAllAccesses = {}; + + return RPS_OK; + } + + protected: + + RpsResult CollectResourceAllAccesses(RenderGraphUpdateContext& context) + { + auto& renderGraph = context.renderGraph; + auto cmdInfos = renderGraph.GetCmdInfos().crange_all(); + auto& cmdAccesses = renderGraph.GetCmdAccessInfos(); + + const auto resDecls = renderGraph.GetBuilder().GetResourceDecls(); + + m_resourceAllAccesses = context.scratchArena.NewArray(resDecls.size()); + std::fill(m_resourceAllAccesses.begin(), m_resourceAllAccesses.end(), AccessAttr{}); + + const uint64_t frameIndex = context.pUpdateInfo->frameIndex; + + for (uint32_t iCmd = 0; iCmd < cmdInfos.size(); iCmd++) + { + auto& cmdInfo = cmdInfos[iCmd]; + + if (!cmdInfo.pNodeDecl || (cmdInfo.pNodeDecl->numAccesses == 0)) + continue; + + auto& cmdDef = *cmdInfo.pCmdDecl; + const auto nodeParams = cmdInfo.pNodeDecl->params; + + for (uint32_t iParam = 0; iParam < nodeParams.size(); iParam++) + { + const auto& paramDecl = nodeParams[iParam]; + + if (paramDecl.access.accessFlags == RPS_ACCESS_UNKNOWN) + continue; + + const uint32_t numViews = nodeParams[iParam].GetNumElements(); + const size_t elementSize = nodeParams[iParam].GetElementSize(); + const void* pViewData = cmdDef.args[iParam]; + + for (uint32_t iElement = 0; iElement < numViews; iElement++) + { + auto pView = static_cast(pViewData); + + if (!pView || (pView->resourceId == RPS_RESOURCE_ID_INVALID)) + continue; + + RPS_RETURN_ERROR_IF(pView->resourceId > resDecls.size(), RPS_ERROR_INVALID_PROGRAM); + + const uint32_t resInstanceId = pView->resourceId; + + // No need to resolve temporal layer here, as we want to merge allAccess from all temporal layers + + m_resourceAllAccesses[resInstanceId] = m_resourceAllAccesses[resInstanceId] | paramDecl.access; + + pViewData = rpsBytePtrInc(pViewData, elementSize); + } + } + } + + auto& signature = context.renderGraph.GetSignature(); + + // TODO: Make sure input array size matches GetNumParamResources + const uint32_t numParamResToUpdate = signature.GetMaxExternalResourceCount(); + + const RpsRuntimeResource* const* ppExternalArgResources = context.pUpdateInfo->ppArgResources; + + // Propagate out resource access to source resource. + for (uint32_t iRes = 0; iRes < numParamResToUpdate; iRes++) + { + const auto paramId = signature.GetResourceParamId(iRes); + const ParamDecl& paramDecl = signature.GetParamDecls()[paramId]; + + if (paramDecl.IsOutputResource()) + { + // Assign the initialAccess to the source resource: + auto outputParamResIds = context.renderGraph.GetBuilder().GetOutputParamResourceIds(paramId); + + for (uint32_t iElem = 0; iElem < outputParamResIds.size(); iElem++) + { + if (outputParamResIds[iElem] != RPS_RESOURCE_ID_INVALID) + { + RPS_ASSERT(outputParamResIds[iElem] < resDecls.size()); + + const uint32_t srcResourceIdx = outputParamResIds[iElem]; + + m_resourceAllAccesses[srcResourceIdx] = + m_resourceAllAccesses[srcResourceIdx] | paramDecl.access; + } + } + } + } + + return RPS_OK; + } + + bool UpdateResourceDesc(ResourceInstance& instance, RpsVariable pDescVar) + { + const auto* pResDesc = static_cast(pDescVar); + auto newDesc = ResourceDescPacked(*pResDesc); + CanonicalizeMipLevels(newDesc); + + newDesc.flags |= instance.desc.flags; + const bool bDescUpdated = (instance.desc != newDesc); + instance.desc = newDesc; + + return bDescUpdated; + // TODO: Make sure temporal layer count can't be changed + } + + RpsResult InitResourceInstances(RenderGraphUpdateContext& context) + { + auto resDecls = context.renderGraph.GetBuilder().GetResourceDecls(); + auto& resInstances = context.renderGraph.GetResourceInstances(); + auto* pRuntimeBackend = context.renderGraph.GetRuntimeBackend(); + + const uint32_t numParamResources = context.renderGraph.GetSignature().GetMaxExternalResourceCount(); + + resInstances.resize(rpsMax(resInstances.size(), size_t(resDecls.size()))); + + uint32_t pendingResStart = 0; + uint32_t pendingResCount = 0; + + auto deactivateResourceInstance = [pRuntimeBackend](ResourceInstance& resInstance) { + if (resInstance.hRuntimeResource) + { + pRuntimeBackend->DestroyRuntimeResourceDeferred(resInstance); + } + resInstance.desc.type = RPS_RESOURCE_TYPE_UNKNOWN; + resInstance.isAliased = false; + resInstance.isPendingCreate = false; + resInstance.isPendingInit = false; + resInstance.allocPlacement = {RPS_INDEX_NONE_U32, 0}; + }; + + // Initialize resource instances + for (uint32_t iRes = 0; iRes < resDecls.size(); iRes++) + { + const auto& resDecl = resDecls[iRes]; + auto* pResInstance = &resInstances[iRes]; + + if (!resDecl.desc) + { + if (pendingResCount > 0) + { + RPS_V_RETURN(m_pRuntimeDevice->InitializeSubresourceInfos( + resInstances.range(pendingResStart, pendingResCount))); + pendingResCount = 0; + } + pendingResStart = iRes + 1; + + if (pResInstance->IsTemporalParent()) + { + RPS_ASSERT(pResInstance->temporalLayerOffset + pResInstance->desc.temporalLayers <= + resInstances.size()); + + for (auto temporalSlice = resInstances.begin() + pResInstance->temporalLayerOffset, + temporalSliceEnd = resInstances.begin() + pResInstance->temporalLayerOffset + + pResInstance->desc.temporalLayers; + temporalSlice != temporalSliceEnd; + ++temporalSlice) + { + deactivateResourceInstance(*temporalSlice); + } + } + + deactivateResourceInstance(*pResInstance); + + continue; + } + else if (pResInstance->IsTemporalParent() && (pResInstance->temporalLayerOffset < resDecls.size())) + { + // Temporal slice range overlapping with current resource desc index range, + // indicating some resource slots previously occupied by these temporal slices + // need to be cleared for newly declared resources. Move them to the end of instance list. + + const uint32_t newTemporalLayerOffset = rpsMax(resDecls.size(), uint32_t(resInstances.size())); + RPS_CHECK_ALLOC( + resInstances.resize(newTemporalLayerOffset + pResInstance->desc.temporalLayers, {})); + + // Array might have reallocated + pResInstance = &resInstances[iRes]; + + for (auto src = resInstances.begin() + pResInstance->temporalLayerOffset, + srcEnd = src + pResInstance->desc.temporalLayers, + dst = resInstances.begin() + newTemporalLayerOffset; + src != srcEnd; + ++src, ++dst) + { + std::swap(*src, *dst); + } + + pResInstance->temporalLayerOffset = newTemporalLayerOffset; + } + + pendingResCount++; + + if (pResInstance->resourceDeclId == RPS_INDEX_NONE_U32) + { + pResInstance->resourceDeclId = iRes; + } + + bool bDescUpdated = UpdateResourceDesc(*pResInstance, resDecl.desc); + + const AccessAttr mergedAllAccess = pResInstance->allAccesses | m_resourceAllAccesses[iRes]; + + if (pResInstance->allAccesses != mergedAllAccess) + { + pResInstance->allAccesses = mergedAllAccess; + bDescUpdated = true; + } + + SetRuntimeResourcePendingCreate(*pResInstance, bDescUpdated && (iRes >= numParamResources)); + pResInstance->isPendingInit = false; + + // Handle temporal resources: + const bool isTemporalResource = pResInstance->desc.temporalLayers > 1; + if (isTemporalResource) + { + RPS_V_RETURN(InitTemporalSlices(context, resInstances, iRes)); + } + } + + for (const RpsResourceId outResId : context.renderGraph.GetBuilder().GetOutputParamResourceIds()) + { + if (outResId != RPS_RESOURCE_ID_INVALID) + { + auto& resInstance = resInstances[outResId]; + + // TODO: Add a "retained" keyword to indicate the resource can out live the render graph + // & adjust allocation strategy accordingly. + + // No need to handle temporal slices because temporal resources are forced to persistent already. + RPS_ASSERT(!resInstance.IsTemporalParent() || + rpsAllBitsSet(resInstance.desc.flags, RPS_RESOURCE_FLAG_PERSISTENT_BIT)); + + resInstance.desc.flags |= RPS_RESOURCE_FLAG_PERSISTENT_BIT; + } + } + + if (pendingResCount > 0) + { + RPS_V_RETURN( + m_pRuntimeDevice->InitializeSubresourceInfos(resInstances.range(pendingResStart, pendingResCount))); + } + + return RPS_OK; + } + + inline RpsResult InitTemporalSlices(RenderGraphUpdateContext& context, + ArenaVector& resInstances, + uint32_t resIndex) + { + ResourceInstance* pResInstance = &resInstances[resIndex]; + + const uint32_t numTemporalLayers = pResInstance->desc.temporalLayers; + + if (pResInstance->temporalLayerOffset == RPS_INDEX_NONE_U32) + { + // First-time seeing this temporal resource, temporal slices are not allocated yet: + + uint32_t temporalLayerOffset = uint32_t(resInstances.size()); + + ResourceInstance tempResInstCopy = *pResInstance; + tempResInstCopy.isTemporalSlice = true; + // Force persistent flag for temporal resources + tempResInstCopy.desc.flags |= RPS_RESOURCE_FLAG_PERSISTENT_BIT; + + auto* pTemporalLayers = resInstances.grow(numTemporalLayers, tempResInstCopy); + pResInstance = &resInstances[resIndex]; + + RPS_RETURN_ERROR_IF(!pTemporalLayers, RPS_ERROR_OUT_OF_MEMORY); + + // Mark the parent resource as a pointer to the temporal layers only. + pResInstance->temporalLayerOffset = temporalLayerOffset; + RPS_ASSERT(pResInstance->IsTemporalParent()); + + RPS_V_RETURN(m_pRuntimeDevice->InitializeSubresourceInfos( + resInstances.range(temporalLayerOffset, numTemporalLayers))); + } + else + { + RPS_ASSERT(pResInstance->temporalLayerOffset + numTemporalLayers <= resInstances.size()); + + const uint32_t currTemporalLayerOffset = + pResInstance->temporalLayerOffset + context.pUpdateInfo->frameIndex % numTemporalLayers; + + auto& temporalSlice = resInstances[currTemporalLayerOffset]; + + RPS_ASSERT(temporalSlice.resourceDeclId == resIndex); + RPS_ASSERT(temporalSlice.isTemporalSlice); + RPS_ASSERT(temporalSlice.isExternal == pResInstance->isExternal); + + temporalSlice.desc = pResInstance->desc; + // Force persistent flag for temporal resources + temporalSlice.desc.flags |= RPS_RESOURCE_FLAG_PERSISTENT_BIT; + + temporalSlice.fullSubresourceRange = pResInstance->fullSubresourceRange; + temporalSlice.numSubResources = pResInstance->numSubResources; + temporalSlice.allAccesses = pResInstance->allAccesses; + temporalSlice.initialAccess = pResInstance->initialAccess; + temporalSlice.finalAccesses = pResInstance->finalAccesses; + + // TODO: Should compare current resDesc vs this temporal slice to determine dirty bit + SetRuntimeResourcePendingCreate(temporalSlice, pResInstance->isPendingCreate); + } + + return RPS_OK; + } + + inline void InitParamResources(RenderGraphUpdateContext& context) + { + auto& resInstances = context.renderGraph.GetResourceInstances(); + auto& signature = context.renderGraph.GetSignature(); + + // TODO: Make sure input array size matches GetNumParamResources + const uint32_t numParamResToUpdate = signature.GetMaxExternalResourceCount(); + + const RpsRuntimeResource* const* ppExternalArgResources = context.pUpdateInfo->ppArgResources; + + for (uint32_t iRes = 0; iRes < numParamResToUpdate; iRes++) + { + auto& resInstance = resInstances[iRes]; + + // Initialize param resource states + const auto paramId = signature.GetResourceParamId(iRes); + const ParamDecl& paramDecl = signature.GetParamDecls()[paramId]; + + resInstance.initialAccess = paramDecl.access; + + // Skip out resource at input. + // TODO: May need to handle inout? + if (paramDecl.IsOutputResource()) + { + // Assign the initialAccess to the source resource: + auto outputParamResIds = context.renderGraph.GetBuilder().GetOutputParamResourceIds(paramId); + + for (uint32_t iElem = 0; iElem < outputParamResIds.size(); iElem++) + { + if (outputParamResIds[iElem] != RPS_RESOURCE_ID_INVALID) + { + RPS_ASSERT(outputParamResIds[iElem] < resInstances.size()); + + auto& sourceResInstance = resInstances[outputParamResIds[iElem]]; + + // TODO: Check if this works with temporal resources. + sourceResInstance.initialAccess = paramDecl.access; + } + } + + continue; + } + + resInstance.isExternal = true; + + // Copy param resource handles etc. + + const RpsRuntimeResource* pExternResArray = + ppExternalArgResources ? ppExternalArgResources[iRes] : nullptr; + + if (resInstance.desc.temporalLayers == 1) + { + resInstance.hRuntimeResource = pExternResArray ? pExternResArray[0] : resInstance.hRuntimeResource; + } + else + { + for (uint32_t iT = 0; iT < resInstance.desc.temporalLayers; iT++) + { + ResourceInstance& temporalSlice = resInstances[resInstance.temporalLayerOffset + iT]; + temporalSlice.isExternal = true; + temporalSlice.initialAccess = resInstance.initialAccess; + temporalSlice.hRuntimeResource = + pExternResArray ? pExternResArray[iT] : temporalSlice.hRuntimeResource; + } + } + } + } + + inline RpsResult InitCmdAccessInfos(RenderGraphUpdateContext& context) + { + // Preprocess cmd nodes + auto& renderGraph = context.renderGraph; + auto& cmdInfos = renderGraph.GetCmdInfos(); + auto& cmdAccesses = renderGraph.GetCmdAccessInfos(); + + const auto resDecls = renderGraph.GetBuilder().GetResourceDecls(); + auto resInstancesRef = renderGraph.GetResourceInstances().range_all(); + + ArenaCheckPoint arenaCheckpoint{context.scratchArena}; + + RPS_ASSERT(cmdAccesses.empty()); + + uint32_t totalParamAccesses = 0; + + static const CmdAccessInfo invalidCmdAccess = { + RPS_RESOURCE_ID_INVALID, + }; + + for (uint32_t iCmd = 0, numCmds = uint32_t(cmdInfos.size()); iCmd < numCmds; iCmd++) + { + auto& cmdInfo = cmdInfos[iCmd]; + + if (!cmdInfo.pNodeDecl || (cmdInfo.pNodeDecl->numAccesses == 0)) + continue; + + auto& cmdDef = *cmdInfo.pCmdDecl; + auto& nodeDecl = *cmdInfo.pNodeDecl; + + // TODO: Check if it's worth it to make cmdAccess sparse. + const uint32_t cmdAccessOffset = uint32_t(cmdAccesses.size()); + auto* pCurrCmdAccesses = cmdAccesses.grow(nodeDecl.numAccesses, invalidCmdAccess); + RPS_CHECK_ALLOC(pCurrCmdAccesses); + + cmdInfo.accesses.SetRange(cmdAccessOffset, nodeDecl.numAccesses); + + for (uint32_t iParam = 0, numParams = uint32_t(nodeDecl.params.size()); iParam < numParams; iParam++) + { + const auto& paramDecl = nodeDecl.params[iParam]; + + if (paramDecl.access.accessFlags == RPS_ACCESS_UNKNOWN) + continue; + + const uint32_t numViews = nodeDecl.params[iParam].GetNumElements(); + const size_t elementSize = nodeDecl.params[iParam].GetElementSize(); + const void* pViewData = cmdDef.args[iParam]; + + for (uint32_t iElement = 0; iElement < numViews; iElement++) + { + auto pView = static_cast(pViewData); + + if (!pView || (pView->resourceId == RPS_RESOURCE_ID_INVALID)) + continue; + + RPS_RETURN_ERROR_IF(pView->resourceId > resDecls.size(), RPS_ERROR_INVALID_PROGRAM); + + RPS_V_RETURN(InitCmdAccessInfo(context.pUpdateInfo->frameIndex, + pCurrCmdAccesses[paramDecl.accessOffset + iElement], + iParam, + paramDecl, + *pView, + resInstancesRef)); + + pViewData = rpsBytePtrInc(pViewData, elementSize); + } + } + + if (nodeDecl.pRenderPassInfo) + { + const auto& rpInfo = *nodeDecl.pRenderPassInfo; + + // TODO: Using scratch arena for now. + // TODO: Generate common viewports to share at compile time + cmdInfo.pRenderPassInfo = context.frameArena.New(); + *cmdInfo.pRenderPassInfo = {}; + auto& renderTargetInfo = cmdInfo.pRenderPassInfo->renderTargetInfo; + auto& viewportInfo = cmdInfo.pRenderPassInfo->viewportInfo; + + uint32_t clearRTMask = rpInfo.renderTargetClearMask; + auto clearValueRefs = rpInfo.GetRenderTargetClearValueRefs(); + auto clearTargetRefs = rpInfo.GetRenderTargetRefs(); + + RPS_ASSERT(clearValueRefs.size() == rpsCountBits(clearRTMask)); + + uint32_t numSamples = 1; + uint32_t minTargetDim[2] = {UINT32_MAX, UINT32_MAX}; + + auto updateRTDimInfo = [&](const ResourceInstance& resInfo, + const CmdAccessInfo& accessInfo) -> RpsResult { + if (resInfo.desc.IsImage()) + { + const uint32_t mipWidth = + GetMipLevelDimension(resInfo.desc.image.width, accessInfo.range.baseMipLevel); + const uint32_t mipHeight = + GetMipLevelDimension(resInfo.desc.image.height, accessInfo.range.baseMipLevel); + minTargetDim[0] = rpsMin(minTargetDim[0], mipWidth); + minTargetDim[1] = rpsMin(minTargetDim[1], mipHeight); + numSamples = rpsMax(numSamples, resInfo.desc.GetSampleCount()); + } + else if (resInfo.desc.IsBuffer()) + { + const uint32_t elementSize = rpsGetFormatElementBytes(accessInfo.viewFormat); + RPS_RETURN_ERROR_IF(elementSize == 0, RPS_ERROR_INVALID_ARGUMENTS); + const uint64_t numElements = resInfo.desc.GetBufferSize() / elementSize; + RPS_RETURN_ERROR_IF(numElements > UINT32_MAX, RPS_ERROR_INTEGER_OVERFLOW); + minTargetDim[0] = rpsMin(minTargetDim[0], uint32_t(numElements)); + minTargetDim[1] = rpsMin(minTargetDim[1], uint32_t(1)); + } + + return RPS_OK; + }; + + for (uint32_t iRT = 0, rtIndex = 0, clearValueIndex = 0; + iRT < RPS_MAX_SIMULTANEOUS_RENDER_TARGET_COUNT; + iRT++) + { + const uint32_t rtSlotBit = 1u << iRT; + + if (rpInfo.renderTargetsMask & rtSlotBit) + { + const uint32_t accessIndex = + nodeDecl.params[clearTargetRefs[rtIndex].paramId].accessOffset + + clearTargetRefs[rtIndex].arrayOffset; + + RPS_ASSERT(accessIndex < nodeDecl.numAccesses); + auto& accessInfo = pCurrCmdAccesses[accessIndex]; + + if (accessInfo.resourceId != RPS_RESOURCE_ID_INVALID) + { + // TODO: Flag if clear value is already set. + RPS_ASSERT(accessInfo.access.accessFlags & RPS_ACCESS_RENDER_TARGET_BIT); + + if (!rpInfo.clearOnly) + { + accessInfo.access.accessFlags |= RPS_ACCESS_RENDER_PASS; + } + + renderTargetInfo.numRenderTargets = iRT + 1; + renderTargetInfo.renderTargetFormats[iRT] = accessInfo.viewFormat; + + auto& resInfo = resInstancesRef[accessInfo.resourceId]; + + RPS_V_RETURN(updateRTDimInfo(resInfo, accessInfo)); + + if (rpInfo.renderTargetClearMask & rtSlotBit) + { + accessInfo.access.accessFlags |= RPS_ACCESS_CLEAR_BIT; + + auto& clearValueRef = clearValueRefs[clearValueIndex]; + + RpsClearInfo clearValue = {}; + clearValue.format = accessInfo.viewFormat; + + clearValue.value.color = static_cast( + cmdDef.args[clearValueRef.paramId])[clearValueRef.arrayOffset]; + + context.renderGraph.SetResourceClearValue(resInfo, clearValue); + + clearValueIndex++; + } + } + + rtIndex++; + } + } + + if (rpInfo.depthStencilTargetMask != 0) + { + const auto pDepthStencilRef = rpInfo.GetDepthStencilRef(); + + const uint32_t accessIndex = + nodeDecl.params[pDepthStencilRef->paramId].accessOffset + pDepthStencilRef->arrayOffset; + + RPS_ASSERT(accessIndex < nodeDecl.numAccesses); + auto& accessInfo = pCurrCmdAccesses[accessIndex]; + + if (accessInfo.resourceId != RPS_RESOURCE_ID_INVALID) + { + renderTargetInfo.depthStencilFormat = accessInfo.viewFormat; + + auto& resInfo = resInstancesRef[accessInfo.resourceId]; + + RPS_V_RETURN(updateRTDimInfo(resInfo, accessInfo)); + + if (rpInfo.clearDepth || rpInfo.clearStencil) + { + RpsClearInfo clearValue = {}; + clearValue.format = accessInfo.viewFormat; + + if (!rpInfo.clearOnly) + { + accessInfo.access.accessFlags |= RPS_ACCESS_RENDER_PASS; + } + + if (rpInfo.clearDepth) + { + const auto pClearDepthValueRef = rpInfo.GetDepthClearValueRef(); + + clearValue.value.depthStencil.depth = + static_cast(cmdDef.args[pClearDepthValueRef->paramId])[0]; + + accessInfo.access.accessFlags |= RPS_ACCESS_CLEAR_BIT; + } + + if (rpInfo.clearStencil) + { + const auto pClearStencilValueRef = rpInfo.GetStencilClearValueRef(); + + clearValue.value.depthStencil.stencil = + static_cast(cmdDef.args[pClearStencilValueRef->paramId])[0]; + + accessInfo.access.accessFlags |= RPS_ACCESS_CLEAR_BIT; + } + + context.renderGraph.SetResourceClearValue(resInfo, clearValue); + } + } + } + + auto viewportRefs = rpInfo.GetViewportRefs(); + if (viewportRefs.empty()) + { + viewportInfo.numViewports = 1; + viewportInfo.pViewports = context.frameArena.New( + RpsViewport{0, 0, float(minTargetDim[0]), float(minTargetDim[1]), 0.0f, 1.0f}); + } + else + { + bool singleParam = true; // Reference cmd args directly + RpsParamId lastParamId = viewportRefs[0].paramId; + + for (auto& viewportRef : viewportRefs) + { + auto& paramDecl = nodeDecl.params[viewportRef.paramId]; + const uint32_t viewportIndex = paramDecl.baseSemanticIndex + viewportRef.arrayOffset; + + viewportInfo.numViewports = rpsMax(viewportInfo.numViewports, viewportIndex + 1); + + singleParam |= (viewportRef.paramId != lastParamId); + } + + if (singleParam) + { + viewportInfo.pViewports = + static_cast(cmdDef.args[viewportRefs[0].paramId]); + } + else + { + auto viewports = context.frameArena.NewArrayZeroed(viewportInfo.numViewports); + RPS_CHECK_ALLOC(viewports.data()); + + viewportInfo.pViewports = viewports.data(); + + for (auto& viewportRef : viewportRefs) + { + auto& paramAccessInfo = nodeDecl.params[viewportRef.paramId]; + const uint32_t viewportIndex = + paramAccessInfo.baseSemanticIndex + viewportRef.arrayOffset; + + viewports[viewportIndex] = static_cast( + cmdDef.args[viewportRef.paramId])[viewportRef.arrayOffset]; + } + } + } + + auto scissorRefs = rpInfo.GetScissorRefs(); + if (scissorRefs.empty()) + { + viewportInfo.numScissorRects = 1; + viewportInfo.pScissorRects = context.frameArena.New( + RpsRect{0, 0, int32_t(minTargetDim[0]), int32_t(minTargetDim[1])}); + } + else + { + bool singleParam = true; // Reference cmd args directly + const RpsParamId lastParamId = scissorRefs[0].paramId; + + for (auto& scissorRef : scissorRefs) + { + auto& paramAccessInfo = nodeDecl.params[scissorRef.paramId]; + const uint32_t scissorIndex = paramAccessInfo.baseSemanticIndex + scissorRef.arrayOffset; + + viewportInfo.numScissorRects = rpsMax(viewportInfo.numScissorRects, scissorIndex + 1); + + singleParam |= (scissorRef.paramId != lastParamId); + } + + if (singleParam) + { + viewportInfo.pScissorRects = + static_cast(cmdDef.args[scissorRefs[0].paramId]); + } + else + { + auto scissorRects = + context.frameArena.NewArrayZeroed(viewportInfo.numScissorRects); + RPS_CHECK_ALLOC(scissorRects.data()); + + viewportInfo.pScissorRects = scissorRects.data(); + + for (auto& scissorRef : scissorRefs) + { + auto& paramAccessInfo = nodeDecl.params[scissorRef.paramId]; + const uint32_t viewportIndex = + paramAccessInfo.baseSemanticIndex + scissorRef.arrayOffset; + + scissorRects[viewportIndex] = static_cast( + cmdDef.args[scissorRef.paramId])[scissorRef.arrayOffset]; + } + } + } + + viewportInfo.defaultRenderArea = RpsRect{0, 0, int32_t(minTargetDim[0]), int32_t(minTargetDim[1])}; + + renderTargetInfo.numSamples = numSamples; + } + } + + // TODO: Combine access flags for temporal slices + + return RPS_OK; + } + + inline RpsResult InitCmdAccessInfo(uint64_t frameIndex, + CmdAccessInfo& accessInfo, + uint32_t paramId, + const NodeParamDecl& paramAccessInfo, + const RpsResourceView& view, + ArrayRef resInstances) + { + RPS_ASSERT(view.resourceId != RPS_RESOURCE_ID_INVALID); + + uint32_t resInstanceId = view.resourceId; + + // Resolve temporal layer + if (resInstances[resInstanceId].desc.temporalLayers > 1) + { + resInstanceId = resInstances[resInstanceId].temporalLayerOffset + + uint32_t(frameIndex % resInstances[resInstanceId].desc.temporalLayers); + } + + auto& resInstance = resInstances[resInstanceId]; + RPS_ASSERT(!resInstance.IsTemporalParent()); + + RPS_ASSERT(paramAccessInfo.access.accessFlags != RPS_ACCESS_UNKNOWN); + + accessInfo.resourceId = resInstanceId; + + bool bPendingRecreate = false; + + if (resInstance.desc.IsImage()) + { + auto pImageView = reinterpret_cast(&view); + m_pRuntimeDevice->GetSubresourceRangeFromImageView( + accessInfo.range, resInstance, paramAccessInfo.access, *pImageView); + + if ((view.viewFormat != RPS_FORMAT_UNKNOWN) && (view.viewFormat != resInstance.desc.image.format)) + { + bPendingRecreate |= !resInstance.isMutableFormat; + resInstance.isMutableFormat = true; + } + + if (rpsAnyBitsSet(view.flags, RPS_RESOURCE_VIEW_FLAG_CUBEMAP_BIT)) + { + // TODO: If recreation is needed is a per-API property. + bPendingRecreate |= !(resInstance.desc.flags & RPS_RESOURCE_FLAG_CUBEMAP_COMPATIBLE_BIT); + resInstance.desc.flags |= RPS_RESOURCE_FLAG_CUBEMAP_COMPATIBLE_BIT; + } + } + else if (resInstance.desc.IsBuffer()) + { + accessInfo.range = SubresourceRangePacked(1, 0, 1, 0, 1); + + if (view.viewFormat != RPS_FORMAT_UNKNOWN) + { + if (rpsAnyBitsSet(paramAccessInfo.access.accessFlags, RPS_ACCESS_ALL_GPU_WRITE)) + { + bPendingRecreate |= !resInstance.bBufferFormattedWrite; + resInstance.bBufferFormattedWrite = true; + } + else + { + bPendingRecreate |= !resInstance.bBufferFormattedRead; + resInstance.bBufferFormattedRead = true; + } + } + } + else + { + // Invalid resource type + return RPS_ERROR_INVALID_DATA; + } + + accessInfo.access = paramAccessInfo.access; + accessInfo.viewFormat = + (view.viewFormat != RPS_FORMAT_UNKNOWN) + ? view.viewFormat + : (resInstance.desc.IsImage() ? resInstance.desc.image.format : RPS_FORMAT_UNKNOWN); + accessInfo.pViewInfo = &view; + + // TODO: Consider propagate temporal resource slice access back to parent and all siblings. + + SetRuntimeResourcePendingCreate(resInstance, bPendingRecreate); + + return RPS_OK; + } + + inline void SetRuntimeResourcePendingCreate(ResourceInstance& resourceInstance, bool bPendingCreate) + { + if (bPendingCreate && !resourceInstance.isPendingCreate && !resourceInstance.isExternal) + { + if (resourceInstance.hRuntimeResource) + { + m_pRuntimeBackend->DestroyRuntimeResourceDeferred(resourceInstance); + RPS_ASSERT( + !resourceInstance.hRuntimeResource && + "Bad DestroyRuntimeResourceDeferred implementation - expect hRuntimeResource t= be cleared"); + } + + resourceInstance.allocPlacement = {RPS_INDEX_NONE_U32, 0}; + resourceInstance.isPendingCreate = true; + } + } + + private: + RuntimeDevice* m_pRuntimeDevice = nullptr; + RuntimeBackend* m_pRuntimeBackend = nullptr; + + ArrayRef m_resourceAllAccesses = {}; + }; +} // namespace rps + +#endif //_RPS_PRE_PROCESS_H_ diff --git a/src/runtime/common/phases/rps_schedule_print.hpp b/src/runtime/common/phases/rps_schedule_print.hpp new file mode 100644 index 0000000..36bef9b --- /dev/null +++ b/src/runtime/common/phases/rps_schedule_print.hpp @@ -0,0 +1,132 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_SCHEDULE_PRINT_HPP_ +#define _RPS_SCHEDULE_PRINT_HPP_ + +#include "rps/runtime/common/rps_runtime.h" + +#include "runtime/common/rps_render_graph.hpp" +#include "runtime/common/phases/rps_cmd_print.hpp" + +namespace rps +{ + class ScheduleDebugPrintPhase : public IRenderGraphPhase + { + public: + virtual RpsResult Run(RenderGraphUpdateContext& context) override final + { + RPS_RETURN_OK_IF( + !rpsAnyBitsSet(context.pUpdateInfo->diagnosticFlags, RPS_DIAGNOSTIC_ENABLE_POST_SCHEDULE_DUMP)); + + RenderGraph& renderGraph = context.renderGraph; + + auto runtimeCmds = renderGraph.GetRuntimeCmdInfos().crange_all(); + auto cmdBatches = renderGraph.GetCmdBatches().crange_all(); + auto cmds = renderGraph.GetCmdInfos().crange_all(); + + PrinterRef printer(context.renderGraph.GetDevice().Printer()); + + printer("\nSchedule:"); + + for (uint32_t iBatch = 0; iBatch < cmdBatches.size(); iBatch++) + { + const RpsCommandBatch& batchInfo = cmdBatches[iBatch]; + + printer("\n Batch %u Queue %u:", iBatch, batchInfo.queueIndex); + + if (batchInfo.signalFenceIndex != RPS_INDEX_NONE_U32) + { + printer("\n Signal : %u", batchInfo.signalFenceIndex); + } + + if (batchInfo.numWaitFences > 0) + { + printer("\n Wait : [ "); + + auto waitFenceIds = renderGraph.GetCmdBatchWaitFenceIds().crange_all(); + + for (uint32_t i = 0; i < batchInfo.numWaitFences; i++) + { + printer("%s%u", (i == 0) ? "" : ", ", waitFenceIds[batchInfo.waitFencesBegin + i]); + } + printer(" ]"); + } + + for (uint32_t iCmd = batchInfo.waitFencesBegin, cmdEnd = batchInfo.waitFencesBegin + batchInfo.numCmds; + iCmd < cmdEnd; + iCmd++) + { + const RuntimeCmdInfo& runtimeCmd = runtimeCmds[iCmd]; + + printer("\n %4d : ", iCmd); + + if (runtimeCmd.isTransition) + { + // TODO: Move transitions out + PrintTransitionInfo(context, printer, runtimeCmd.GetTransitionId()); + } + else + { + CmdDebugPrintPhase::PrintCmdInfo(context, printer, runtimeCmd.GetCmdId()); + } + } + } + + printer("\n"); + + return RPS_OK; + } + + private: + void PrintTransitionInfo(RenderGraphUpdateContext& context, PrinterRef printer, uint32_t transitionId) const + { + const auto& renderGraph = context.renderGraph; + + if (PrintBuiltInCmdMarker(context, printer, transitionId)) + { + return; + } + + const auto& transInfo = renderGraph.GetTransitionInfo(transitionId); + + renderGraph.PrintTransitionNodeName(printer, transInfo.nodeId); + + printer(" <"); + CmdDebugPrintPhase::PrintResourceReference(context, printer, transInfo.access.resourceId, transInfo.access.range); + printer("> : "); + + const auto& prevTrans = renderGraph.GetTransitionInfo(transInfo.prevTransition); + + printer("("); + AccessAttr(prevTrans.access.access).Print(printer); + printer(")"); + + printer(" => ("); + AccessAttr(transInfo.access.access).Print(printer); + printer(")"); + } + + bool PrintBuiltInCmdMarker(RenderGraphUpdateContext& context, PrinterRef printer, uint32_t transitionId) const + { + switch (transitionId) + { + case CMD_ID_PREAMBLE: + printer(""); + break; + case CMD_ID_POSTAMBLE: + printer(""); + break; + default: + return false; + } + return true; + } + }; +} // namespace rps + +#endif //_RPS_SCHEDULE_PRINT_HPP_ diff --git a/src/runtime/common/rps_access.cpp b/src/runtime/common/rps_access.cpp new file mode 100644 index 0000000..c2a68f6 --- /dev/null +++ b/src/runtime/common/rps_access.cpp @@ -0,0 +1,194 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "rps/runtime/common/rps_access.h" +#include "core/rps_util.hpp" + +namespace rps +{ + void AccessAttr::Print(const RpsPrinter& printer) const + { + PrinterRef print(printer); + + static const struct + { + RpsAccessFlagBits flags; + StrRef name; + } c_accessNames[] = { + {RPS_ACCESS_INDIRECT_ARGS_BIT, "indirect_arg"}, + {RPS_ACCESS_INDEX_BUFFER_BIT, "ib"}, + {RPS_ACCESS_VERTEX_BUFFER_BIT, "vb"}, + {RPS_ACCESS_CONSTANT_BUFFER_BIT, "cb"}, + // {RPS_ACCESS_SHADER_RESOURCE_BIT, ""}, + {RPS_ACCESS_SHADING_RATE_BIT, "shading_rate"}, + {RPS_ACCESS_RENDER_TARGET_BIT, "color"}, + {RPS_ACCESS_DEPTH_READ_BIT, "depth_read"}, + {RPS_ACCESS_DEPTH_WRITE_BIT, "depth_write"}, + {RPS_ACCESS_STENCIL_READ_BIT, "stencil_read"}, + {RPS_ACCESS_STENCIL_WRITE_BIT, "stencil_write"}, + // {RPS_ACCESS_UNORDERED_ACCESS_BIT, ""}, + {RPS_ACCESS_STREAM_OUT_BIT, "stream_out"}, + {RPS_ACCESS_COPY_SRC_BIT, "copy_src"}, + {RPS_ACCESS_COPY_DEST_BIT, "copy_dst"}, + {RPS_ACCESS_RESOLVE_SRC_BIT, "resolve_src"}, + {RPS_ACCESS_RESOLVE_DEST_BIT, "resolve_dst"}, + {RPS_ACCESS_RAYTRACING_AS_BUILD_BIT, "rtas_build"}, + {RPS_ACCESS_RAYTRACING_AS_READ_BIT, "rtas_read"}, + {RPS_ACCESS_PRESENT_BIT, "present"}, + {RPS_ACCESS_CPU_READ_BIT, "cpu_read"}, + {RPS_ACCESS_CPU_WRITE_BIT, "cpu_write"}, + + // Special flags + {RPS_ACCESS_RENDER_PASS, "render_pass"}, + {RPS_ACCESS_BEFORE_BIT, "before"}, + {RPS_ACCESS_AFTER_BIT, "after"}, + {RPS_ACCESS_CLEAR_BIT, "clear"}, + {RPS_ACCESS_DISCARD_OLD_DATA_BIT, "discard"}, + {RPS_ACCESS_RELAXED_ORDER_BIT, "relaxed"}, + {RPS_ACCESS_NO_VIEW_BIT, "no_view"}, + }; + + static const struct + { + RpsShaderStageBits stage; + const StrRef name; + } c_shaderStageNames[] = { + {RPS_SHADER_STAGE_VS, "vs"}, + {RPS_SHADER_STAGE_PS, "ps"}, + {RPS_SHADER_STAGE_GS, "gs"}, + {RPS_SHADER_STAGE_CS, "cs"}, + {RPS_SHADER_STAGE_HS, "hs"}, + {RPS_SHADER_STAGE_DS, "ds"}, + {RPS_SHADER_STAGE_RAYTRACING, "rt"}, + {RPS_SHADER_STAGE_AS, "as"}, + {RPS_SHADER_STAGE_MS, "ms"}, + }; + + auto printShaderStages = [&]() { + uint32_t accessStagesMask = accessStages; + for (auto i = std::begin(c_shaderStageNames), e = std::end(c_shaderStageNames); i != e; ++i) + { + if (i->stage & accessStagesMask) + { + print("%s%.*s", (accessStagesMask == accessStages) ? "" : ", ", i->name.len, i->name.str); + accessStagesMask &= ~i->stage; + } + } + + if (accessStagesMask != 0) + { + print("%sRpsShaderStageBits(0x%x)", (accessStagesMask == accessFlags) ? "" : ", ", accessStagesMask); + } + }; + + if (accessFlags != RPS_ACCESS_UNKNOWN) + { + uint32_t accessFlagsMask = accessFlags; + + for (auto i = std::begin(c_accessNames), e = std::end(c_accessNames); i != e; ++i) + { + if (i->flags & accessFlagsMask) + { + print("%s%.*s", (accessFlagsMask == accessFlags) ? "" : ", ", i->name.len, i->name.str); + accessFlagsMask &= ~i->flags; + } + } + + if (rpsAnyBitsSet(accessFlags, RPS_ACCESS_UNORDERED_ACCESS_BIT)) + { + print((accessFlagsMask == accessFlags) ? "" : ", "); + accessFlagsMask &= ~RPS_ACCESS_UNORDERED_ACCESS_BIT; + + print("uav("); + printShaderStages(); + print(")"); + } + + if (rpsAnyBitsSet(accessFlags, RPS_ACCESS_SHADER_RESOURCE_BIT)) + { + print((accessFlagsMask == accessFlags) ? "" : ", "); + accessFlagsMask &= ~RPS_ACCESS_SHADER_RESOURCE_BIT; + + print("srv("); + printShaderStages(); + print(")"); + } + + if (accessFlagsMask != 0) + { + print("%sRpsAccessFlags(0x%x)", (accessFlagsMask == accessFlags) ? "" : ", ", accessFlagsMask); + } + } + else + { + print("*"); + } + } + + void SemanticAttr::Print(const RpsPrinter& printer) const + { + // clang-format off + static const StrRef c_semanticNames[] = { + "", //RPS_SEMANTIC_UNSPECIFIED = 0, + + "VertexShader", //RPS_SEMANTIC_VERTEX_SHADER, + "PixelShader", //RPS_SEMANTIC_PIXEL_SHADER, + "GeometryShader", //RPS_SEMANTIC_GEOMETRY_SHADER, + "ComputeShader", //RPS_SEMANTIC_COMPUTE_SHADER, + "HullShader", //RPS_SEMANTIC_HULL_SHADER, + "DomainShader", //RPS_SEMANTIC_DOMAIN_SHADER, + "RaytracingPipeline", //RPS_SEMANTIC_RAYTRACING_PIPELINE, + "AmplificationShader", //RPS_SEMANTIC_AMPLIFICATION_SHADER, + "MeshShader", //RPS_SEMANTIC_MESHS_HADER, + + "VertexLayout", //RPS_SEMANTIC_VERTEX_LAYOUT, + "StreamOutLayout", //RPS_SEMANTIC_STREAM_OUT_LAYOUT, + "StreamOutDesc", //RPS_SEMANTIC_STREAM_OUT_DESC, + "BlendState", //RPS_SEMANTIC_BLEND_STATE, + "RenderTargetBlend", //RPS_SEMANTIC_RENDER_TARGET_BLEND, + "DepthStencil", //RPS_SEMANTIC_DEPTH_STENCIL_STATE, + "RasterizerState", //RPS_SEMANTIC_RASTERIZER_STATE, + + "SV_Viewport", //RPS_SEMANTIC_VIEWPORT = RPS_SEMANTIC_DYNAMIC_STATE_BEGIN, + "SV_ScissorRect", //RPS_SEMANTIC_SCISSOR, + "SV_PrimitiveTopology", //RPS_SEMANTIC_PRIMITIVE_TOPOLOGY, + "SV_PatchControlPoints", //RPS_SEMANTIC_PATCH_CONTROL_POINTS, + "SV_PrimitiveStripCutIndex",//RPS_SEMANTIC_PRIMITIVE_STRIP_CUT_INDEX, + "SV_BlendFactor", //RPS_SEMANTIC_BLEND_FACTOR, + "SV_StencilRef", //RPS_SEMANTIC_STENCIL_REF, + "SV_DepthBounds", //RPS_SEMANTIC_DEPTH_BOUNDS, + "SV_SampleLocation", //RPS_SEMANTIC_SAMPLE_LOCATION, + "SV_ShadingRate", //RPS_SEMANTIC_SHADING_RATE, + "SV_ClearColor", //RPS_SEMANTIC_COLOR_CLEAR_VALUE, + "SV_ClearDepth", //RPS_SEMANTIC_DEPTH_CLEAR_VALUE, + "SV_ClearStencil", //RPS_SEMANTIC_STENCIL_CLEAR_VALUE, + + "SV_VertexBuffer", //RPS_SEMANTIC_VERTEX_BUFFER = RPS_SEMANTIC_RESOURCE_BINDING_BEGIN, + "SV_IndexBuffer", //RPS_SEMANTIC_INDEX_BUFFER, + "SV_IndirectArgs", //RPS_SEMANTIC_INDIRECT_ARGS, + "SV_IndirectCount", //RPS_SEMANTIC_INDIRECT_COUNT, + "SV_StreamOutBuffer", //RPS_SEMANTIC_STREAM_OUT_BUFFER, + "SV_Target", //RPS_SEMANTIC_RENDER_TARGET, + "SV_DepthStencil", //RPS_SEMANTIC_DEPTH_STENCIL_TARGET, + "SV_ShadingRateImage", //RPS_SEMANTIC_SHADING_RATE_IMAGE, + "SV_ResolveTarget", //RPS_SEMANTIC_RESOLVE_TARGET, + }; + // clang-format on + + static_assert(RPS_COUNTOF(c_semanticNames) == RPS_SEMANTIC_USER_RESOURCE_BINDING, "SemanticName needs update"); + + auto print = PrinterRef(printer); + + if (semantic != RPS_SEMANTIC_UNSPECIFIED) + { + if (size_t(semantic) < RPS_COUNTOF(c_semanticNames)) + print(" : %.*s[%u]", c_semanticNames[semantic].len, c_semanticNames[semantic].str, semanticIndex); + else + print(" : [%u]", semantic, semanticIndex); + } + } +} // namespace rps diff --git a/src/runtime/common/rps_cmd_buf.hpp b/src/runtime/common/rps_cmd_buf.hpp new file mode 100644 index 0000000..1ce2036 --- /dev/null +++ b/src/runtime/common/rps_cmd_buf.hpp @@ -0,0 +1,48 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_CMD_BUF_HPP_ +#define _RPS_CMD_BUF_HPP_ + +#include "core/rps_core.hpp" +#include "core/rps_util.hpp" +#include "core/rps_device.hpp" + +#include "rps/runtime/common/rps_runtime.h" + +namespace rps +{ + enum BuiltInNodeDeclIds + { + RPS_BUILTIN_NODE_INVALID = -1, + RPS_BUILTIN_NODE_SCHEDULER_BARRIER = -2, ///< Built-in node to mark a scheduler barrier. + RPS_BUILTIN_NODE_SUBGRAPH_BEGIN = -3, ///< Built-in node to mark beginning of a subgraph. + RPS_BUILTIN_NODE_SUBGRAPH_END = -4, ///< Built-in node to mark ending of a subgraph. + RPS_BUILTIN_NODE_BEGIN_SUBROUTINE = -5, + RPS_BUILTIN_NODE_END_SUBROUTINE = -6, + + RPS_BUILTIN_NODE_FORCE_INT32 = INT32_MIN, + }; + + struct Cmd + { + RpsNodeDeclId nodeDeclId = RPS_NODEDECL_ID_INVALID; + uint32_t programInstanceId = RPS_INDEX_NONE_U32; + uint32_t tag = 0; + ArrayRef args; + RpsCmdCallback callback; + }; + + struct NodeDependency + { + RpsNodeId before; + RpsNodeId after; + }; + +} // namespace rps + +#endif // _RPS_CMD_BUF_HPP_ diff --git a/src/runtime/common/rps_format.cpp b/src/runtime/common/rps_format.cpp new file mode 100644 index 0000000..65fe78d --- /dev/null +++ b/src/runtime/common/rps_format.cpp @@ -0,0 +1,317 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "rps/runtime/common/rps_format.h" +#include "core/rps_util.hpp" + +uint32_t rpsGetFormatElementBytes(RpsFormat format) +{ + static const uint32_t s_Sizes[RPS_FORMAT_COUNT] = { + 0, // RPS_FORMAT_UNKNOWN, + 16, // RPS_FORMAT_R32G32B32A32_TYPELESS, + 16, // RPS_FORMAT_R32G32B32A32_FLOAT, + 16, // RPS_FORMAT_R32G32B32A32_UINT, + 16, // RPS_FORMAT_R32G32B32A32_SINT, + 12, // RPS_FORMAT_R32G32B32_TYPELESS, + 12, // RPS_FORMAT_R32G32B32_FLOAT, + 12, // RPS_FORMAT_R32G32B32_UINT, + 12, // RPS_FORMAT_R32G32B32_SINT, + 8, // RPS_FORMAT_R16G16B16A16_TYPELESS, + 8, // RPS_FORMAT_R16G16B16A16_FLOAT, + 8, // RPS_FORMAT_R16G16B16A16_UNORM, + 8, // RPS_FORMAT_R16G16B16A16_UINT, + 8, // RPS_FORMAT_R16G16B16A16_SNORM, + 8, // RPS_FORMAT_R16G16B16A16_SINT, + 8, // RPS_FORMAT_R32G32_TYPELESS, + 8, // RPS_FORMAT_R32G32_FLOAT, + 8, // RPS_FORMAT_R32G32_UINT, + 8, // RPS_FORMAT_R32G32_SINT, + 8, // RPS_FORMAT_R32G8X24_TYPELESS, + 8, // RPS_FORMAT_D32_FLOAT_S8X24_UINT, + 8, // RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS, + 8, // RPS_FORMAT_X32_TYPELESS_G8X24_UINT, + 4, // RPS_FORMAT_R10G10B10A2_TYPELESS, + 4, // RPS_FORMAT_R10G10B10A2_UNORM, + 4, // RPS_FORMAT_R10G10B10A2_UINT, + 4, // RPS_FORMAT_R11G11B10_FLOAT, + 4, // RPS_FORMAT_R8G8B8A8_TYPELESS, + 4, // RPS_FORMAT_R8G8B8A8_UNORM, + 4, // RPS_FORMAT_R8G8B8A8_UNORM_SRGB, + 4, // RPS_FORMAT_R8G8B8A8_UINT, + 4, // RPS_FORMAT_R8G8B8A8_SNORM, + 4, // RPS_FORMAT_R8G8B8A8_SINT, + 4, // RPS_FORMAT_R16G16_TYPELESS, + 4, // RPS_FORMAT_R16G16_FLOAT, + 4, // RPS_FORMAT_R16G16_UNORM, + 4, // RPS_FORMAT_R16G16_UINT, + 4, // RPS_FORMAT_R16G16_SNORM, + 4, // RPS_FORMAT_R16G16_SINT, + 4, // RPS_FORMAT_R32_TYPELESS, + 4, // RPS_FORMAT_D32_FLOAT, + 4, // RPS_FORMAT_R32_FLOAT, + 4, // RPS_FORMAT_R32_UINT, + 4, // RPS_FORMAT_R32_SINT, + 4, // RPS_FORMAT_R24G8_TYPELESS, + 4, // RPS_FORMAT_D24_UNORM_S8_UINT, + 4, // RPS_FORMAT_R24_UNORM_X8_TYPELESS, + 4, // RPS_FORMAT_X24_TYPELESS_G8_UINT, + 2, // RPS_FORMAT_R8G8_TYPELESS, + 2, // RPS_FORMAT_R8G8_UNORM, + 2, // RPS_FORMAT_R8G8_UINT, + 2, // RPS_FORMAT_R8G8_SNORM, + 2, // RPS_FORMAT_R8G8_SINT, + 2, // RPS_FORMAT_R16_TYPELESS, + 2, // RPS_FORMAT_R16_FLOAT, + 2, // RPS_FORMAT_D16_UNORM, + 2, // RPS_FORMAT_R16_UNORM, + 2, // RPS_FORMAT_R16_UINT, + 2, // RPS_FORMAT_R16_SNORM, + 2, // RPS_FORMAT_R16_SINT, + 1, // RPS_FORMAT_R8_TYPELESS, + 1, // RPS_FORMAT_R8_UNORM, + 1, // RPS_FORMAT_R8_UINT, + 1, // RPS_FORMAT_R8_SNORM, + 1, // RPS_FORMAT_R8_SINT, + 1, // RPS_FORMAT_A8_UNORM, + 0, // RPS_FORMAT_R1_UNORM, + 4, // RPS_FORMAT_R9G9B9E5_SHAREDEXP, + 2, // RPS_FORMAT_R8G8_B8G8_UNORM, + 2, // RPS_FORMAT_G8R8_G8B8_UNORM, + 8, // RPS_FORMAT_BC1_TYPELESS, + 8, // RPS_FORMAT_BC1_UNORM, + 8, // RPS_FORMAT_BC1_UNORM_SRGB, + 16, // RPS_FORMAT_BC2_TYPELESS, + 16, // RPS_FORMAT_BC2_UNORM, + 16, // RPS_FORMAT_BC2_UNORM_SRGB, + 16, // RPS_FORMAT_BC3_TYPELESS, + 16, // RPS_FORMAT_BC3_UNORM, + 16, // RPS_FORMAT_BC3_UNORM_SRGB, + 8, // RPS_FORMAT_BC4_TYPELESS, + 8, // RPS_FORMAT_BC4_UNORM, + 8, // RPS_FORMAT_BC4_SNORM, + 16, // RPS_FORMAT_BC5_TYPELESS, + 16, // RPS_FORMAT_BC5_UNORM, + 16, // RPS_FORMAT_BC5_SNORM, + 2, // RPS_FORMAT_B5G6R5_UNORM, + 2, // RPS_FORMAT_B5G5R5A1_UNORM, + 4, // RPS_FORMAT_B8G8R8A8_UNORM, + 4, // RPS_FORMAT_B8G8R8X8_UNORM, + 4, // RPS_FORMAT_R10G10B10_XR_BIAS_A2_UNORM, + 4, // RPS_FORMAT_B8G8R8A8_TYPELESS, + 4, // RPS_FORMAT_B8G8R8A8_UNORM_SRGB, + 4, // RPS_FORMAT_B8G8R8X8_TYPELESS, + 4, // RPS_FORMAT_B8G8R8X8_UNORM_SRGB, + 16, // RPS_FORMAT_BC6H_TYPELESS, + 16, // RPS_FORMAT_BC6H_UF16, + 16, // RPS_FORMAT_BC6H_SF16, + 16, // RPS_FORMAT_BC7_TYPELESS, + 16, // RPS_FORMAT_BC7_UNORM, + 16, // RPS_FORMAT_BC7_UNORM_SRGB, + 0, // RPS_FORMAT_AYUV, + 0, // RPS_FORMAT_Y410, + 0, // RPS_FORMAT_Y416, + 0, // RPS_FORMAT_NV12, + 0, // RPS_FORMAT_P010, + 0, // RPS_FORMAT_P016, + 0, // RPS_FORMAT_420_OPAQUE, + 0, // RPS_FORMAT_YUY2, + 0, // RPS_FORMAT_Y210, + 0, // RPS_FORMAT_Y216, + 0, // RPS_FORMAT_NV11, + 0, // RPS_FORMAT_AI44, + 0, // RPS_FORMAT_IA44, + 0, // RPS_FORMAT_P8, + 0, // RPS_FORMAT_A8P8, + 2, // RPS_FORMAT_B4G4R4A4_UNORM, + // + // RPS_FORMAT_COUNT, + }; + + return (format < RPS_FORMAT_COUNT) ? s_Sizes[format] : 0; +} + +RpsBool rpsFormatIsBlockCompressed(RpsFormat format) +{ + return (((format >= RPS_FORMAT_BC1_TYPELESS) && (format <= RPS_FORMAT_BC5_SNORM)) || + ((format >= RPS_FORMAT_BC6H_TYPELESS) && (format <= RPS_FORMAT_BC7_UNORM_SRGB))); +} + +const char* rpsFormatGetName(RpsFormat format) +{ + static constexpr const char* fmtNames[] = { + "UNKNOWN", + "R32G32B32A32_TYPELESS", + "R32G32B32A32_FLOAT", + "R32G32B32A32_UINT", + "R32G32B32A32_SINT", + "R32G32B32_TYPELESS", + "R32G32B32_FLOAT", + "R32G32B32_UINT", + "R32G32B32_SINT", + "R16G16B16A16_TYPELESS", + "R16G16B16A16_FLOAT", + "R16G16B16A16_UNORM", + "R16G16B16A16_UINT", + "R16G16B16A16_SNORM", + "R16G16B16A16_SINT", + "R32G32_TYPELESS", + "R32G32_FLOAT", + "R32G32_UINT", + "R32G32_SINT", + "R32G8X24_TYPELESS", + "D32_FLOAT_S8X24_UINT", + "R32_FLOAT_X8X24_TYPELESS", + "X32_TYPELESS_G8X24_UINT", + "R10G10B10A2_TYPELESS", + "R10G10B10A2_UNORM", + "R10G10B10A2_UINT", + "R11G11B10_FLOAT", + "R8G8B8A8_TYPELESS", + "R8G8B8A8_UNORM", + "R8G8B8A8_UNORM_SRGB", + "R8G8B8A8_UINT", + "R8G8B8A8_SNORM", + "R8G8B8A8_SINT", + "R16G16_TYPELESS", + "R16G16_FLOAT", + "R16G16_UNORM", + "R16G16_UINT", + "R16G16_SNORM", + "R16G16_SINT", + "R32_TYPELESS", + "D32_FLOAT", + "R32_FLOAT", + "R32_UINT", + "R32_SINT", + "R24G8_TYPELESS", + "D24_UNORM_S8_UINT", + "R24_UNORM_X8_TYPELESS", + "X24_TYPELESS_G8_UINT", + "R8G8_TYPELESS", + "R8G8_UNORM", + "R8G8_UINT", + "R8G8_SNORM", + "R8G8_SINT", + "R16_TYPELESS", + "R16_FLOAT", + "D16_UNORM", + "R16_UNORM", + "R16_UINT", + "R16_SNORM", + "R16_SINT", + "R8_TYPELESS", + "R8_UNORM", + "R8_UINT", + "R8_SNORM", + "R8_SINT", + "A8_UNORM", + "R1_UNORM", + "R9G9B9E5_SHAREDEXP", + "R8G8_B8G8_UNORM", + "G8R8_G8B8_UNORM", + "BC1_TYPELESS", + "BC1_UNORM", + "BC1_UNORM_SRGB", + "BC2_TYPELESS", + "BC2_UNORM", + "BC2_UNORM_SRGB", + "BC3_TYPELESS", + "BC3_UNORM", + "BC3_UNORM_SRGB", + "BC4_TYPELESS", + "BC4_UNORM", + "BC4_SNORM", + "BC5_TYPELESS", + "BC5_UNORM", + "BC5_SNORM", + "B5G6R5_UNORM", + "B5G5R5A1_UNORM", + "B8G8R8A8_UNORM", + "B8G8R8X8_UNORM", + "R10G10B10_XR_BIAS_A2_UNORM", + "B8G8R8A8_TYPELESS", + "B8G8R8A8_UNORM_SRGB", + "B8G8R8X8_TYPELESS", + "B8G8R8X8_UNORM_SRGB", + "BC6H_TYPELESS", + "BC6H_UF16", + "BC6H_SF16", + "BC7_TYPELESS", + "BC7_UNORM", + "BC7_UNORM_SRGB", + "AYUV", + "Y410", + "Y416", + "NV12", + "P010", + "P016", + "420_OPAQUE", + "YUY2", + "Y210", + "Y216", + "NV11", + "AI44", + "IA44", + "P8", + "A8P8", + "B4G4R4A4_UNORM", + }; + + static_assert(RPS_COUNTOF(fmtNames) == RPS_FORMAT_COUNT, "Format name table needs update."); + + return fmtNames[(format < RPS_FORMAT_COUNT) ? format : RPS_FORMAT_UNKNOWN]; +} + +RpsBool rpsFormatHasDepth(RpsFormat format) +{ + switch (format) + { + case RPS_FORMAT_D32_FLOAT: + case RPS_FORMAT_D16_UNORM: + case RPS_FORMAT_R32G8X24_TYPELESS: + case RPS_FORMAT_D32_FLOAT_S8X24_UINT: + case RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case RPS_FORMAT_X32_TYPELESS_G8X24_UINT: + case RPS_FORMAT_R24G8_TYPELESS: + case RPS_FORMAT_D24_UNORM_S8_UINT: + case RPS_FORMAT_R24_UNORM_X8_TYPELESS: + case RPS_FORMAT_X24_TYPELESS_G8_UINT: + return RPS_TRUE; + default: + break; + } + return RPS_FALSE; +} + +RpsBool rpsFormatHasStencil(RpsFormat format) +{ + switch (format) + { + case RPS_FORMAT_R32G8X24_TYPELESS: + case RPS_FORMAT_D32_FLOAT_S8X24_UINT: + case RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case RPS_FORMAT_X32_TYPELESS_G8X24_UINT: + case RPS_FORMAT_R24G8_TYPELESS: + case RPS_FORMAT_D24_UNORM_S8_UINT: + case RPS_FORMAT_R24_UNORM_X8_TYPELESS: + case RPS_FORMAT_X24_TYPELESS_G8_UINT: + return RPS_TRUE; + default: + break; + } + return RPS_FALSE; +} + +RpsBool rpsFormatHasDepthStencil(RpsFormat format) +{ + return rpsFormatHasDepth(format); +} + +RpsBool rpsFormatIsDepthOnly(RpsFormat format) +{ + return rpsFormatHasDepth(format) && !rpsFormatHasStencil(format); +} diff --git a/src/runtime/common/rps_null_runtime_backend.cpp b/src/runtime/common/rps_null_runtime_backend.cpp new file mode 100644 index 0000000..d4b00b4 --- /dev/null +++ b/src/runtime/common/rps_null_runtime_backend.cpp @@ -0,0 +1,70 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "runtime/common/rps_render_graph.hpp" + +#include "runtime/common/rps_render_graph.hpp" + +namespace rps +{ + RpsResult NullRuntimeBackend::CreateHeaps(const RenderGraphUpdateContext& context, ArrayRef heaps) + { + for (HeapInfo& heapInfo : heaps) + { + if (heapInfo.hRuntimeHeap == RPS_NULL_HANDLE) + { + //Set dummy heap handle + ++m_heapCounter; + heapInfo.hRuntimeHeap = rpsNullRuntimeHeapToHandle(reinterpret_cast(m_heapCounter)); + } + } + + return RPS_OK; + } + + RpsResult NullRuntimeBackend::RecordCommands(const RenderGraph& renderGraph, + const RpsRenderGraphRecordCommandInfo& recordInfo) const + { + //Fallback cmd recording for null runtime and missing runtimes + RuntimeCmdCallbackContext cmdCbCtx{nullptr, recordInfo}; + + for (uint32_t rtCmdId = recordInfo.cmdBeginIndex, + rtCmdIdEnd = uint32_t(recordInfo.cmdBeginIndex + recordInfo.numCmds); + rtCmdId < rtCmdIdEnd; + rtCmdId++) + { + const RuntimeCmdInfo& runtimeCmdInfo = renderGraph.GetRuntimeCmdInfos()[rtCmdId]; + + if (runtimeCmdInfo.isTransition) + continue; + + const CmdInfo& cmdInfo = renderGraph.GetCmdInfos()[runtimeCmdInfo.cmdId]; + const Cmd& cmd = *cmdInfo.pCmdDecl; + + if (cmd.callback.pfnCallback) + { + cmdCbCtx.pCmdCallbackContext = cmd.callback.pUserContext; + cmdCbCtx.ppArgs = cmd.args.data(); + cmdCbCtx.numArgs = uint32_t(cmd.args.size()); + cmdCbCtx.userTag = cmd.tag; + cmdCbCtx.pCmd = &cmd; + cmdCbCtx.cmdId = runtimeCmdInfo.cmdId; + + cmd.callback.pfnCallback(&cmdCbCtx); + + RPS_V_RETURN(cmdCbCtx.result); + } + } + + return RPS_OK; + } + + void NullRuntimeBackend::DestroyRuntimeResourceDeferred(ResourceInstance& resource) + { + } + +} // namespace rps diff --git a/src/runtime/common/rps_null_runtime_device.cpp b/src/runtime/common/rps_null_runtime_device.cpp new file mode 100644 index 0000000..2e94f13 --- /dev/null +++ b/src/runtime/common/rps_null_runtime_device.cpp @@ -0,0 +1,314 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "core/rps_util.hpp" +#include "core/rps_device.hpp" + +#include "runtime/common/rps_runtime_device.hpp" +#include "runtime/common/rps_runtime_util.hpp" + +#include "runtime/common/phases/rps_pre_process.hpp" +#include "runtime/common/phases/rps_dag_build.h" +#include "runtime/common/phases/rps_access_dag_build.hpp" +#include "runtime/common/phases/rps_cmd_print.hpp" +#include "runtime/common/phases/rps_cmd_dag_print.hpp" +#include "runtime/common/phases/rps_dag_schedule.hpp" +#include "runtime/common/phases/rps_memory_schedule.hpp" +#include "runtime/common/phases/rps_schedule_print.hpp" + +namespace rps +{ + // Generic, Null-Runtime implementations. + // Actual runtime device implementations should query runtime APIs to get these information. + static uint32_t GetFormatPlaneCount(RpsFormat format) + { + switch (format) + { + case RPS_FORMAT_R32G8X24_TYPELESS: + case RPS_FORMAT_D32_FLOAT_S8X24_UINT: + case RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case RPS_FORMAT_X32_TYPELESS_G8X24_UINT: + case RPS_FORMAT_R24G8_TYPELESS: + case RPS_FORMAT_D24_UNORM_S8_UINT: + case RPS_FORMAT_R24_UNORM_X8_TYPELESS: + case RPS_FORMAT_X24_TYPELESS_G8_UINT: + return 2; + default: + return 1; + } + } + + static uint32_t GetFormatAspectMask(RpsFormat format) + { + switch (format) + { + case RPS_FORMAT_R32G8X24_TYPELESS: + case RPS_FORMAT_D32_FLOAT_S8X24_UINT: + case RPS_FORMAT_R24G8_TYPELESS: + case RPS_FORMAT_D24_UNORM_S8_UINT: + return 0x3; + case RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case RPS_FORMAT_R24_UNORM_X8_TYPELESS: + return 0x1; + case RPS_FORMAT_X32_TYPELESS_G8X24_UINT: + case RPS_FORMAT_X24_TYPELESS_G8_UINT: + return 0x2; + default: + break; + } + return 0x1; + } + + static uint32_t CalcSubresourceCount(const ResourceDescPacked& desc) + { + return desc.IsBuffer() ? 1 + : (((desc.type == RPS_RESOURCE_TYPE_IMAGE_3D) ? 1 : desc.image.arrayLayers) * + desc.image.mipLevels * GetFormatPlaneCount(desc.image.format)); + } + + static uint32_t GetResourceAspectMask(const ResourceDescPacked& resDesc) + { + return resDesc.IsBuffer() ? 1 : GetFormatAspectMask(resDesc.image.format); + } + + RPS_MAYBE_UNUSED + static uint32_t GetViewAspectMask(const ResourceDescPacked& resDesc, const RpsImageView& imageView) + { + RPS_ASSERT(!resDesc.IsBuffer()); + + const RpsFormat viewForamt = + (imageView.base.viewFormat != RPS_FORMAT_UNKNOWN) ? imageView.base.viewFormat : resDesc.image.format; + + return GetFormatAspectMask(viewForamt); + } + + static uint32_t GetFormatElementBytes(RpsFormat format) + { + static const uint32_t s_Sizes[RPS_FORMAT_COUNT] = { + 0, // RPS_FORMAT_UNKNOWN, + 16, // RPS_FORMAT_R32G32B32A32_TYPELESS, + 16, // RPS_FORMAT_R32G32B32A32_FLOAT, + 16, // RPS_FORMAT_R32G32B32A32_UINT, + 16, // RPS_FORMAT_R32G32B32A32_SINT, + 12, // RPS_FORMAT_R32G32B32_TYPELESS, + 12, // RPS_FORMAT_R32G32B32_FLOAT, + 12, // RPS_FORMAT_R32G32B32_UINT, + 12, // RPS_FORMAT_R32G32B32_SINT, + 8, // RPS_FORMAT_R16G16B16A16_TYPELESS, + 8, // RPS_FORMAT_R16G16B16A16_FLOAT, + 8, // RPS_FORMAT_R16G16B16A16_UNORM, + 8, // RPS_FORMAT_R16G16B16A16_UINT, + 8, // RPS_FORMAT_R16G16B16A16_SNORM, + 8, // RPS_FORMAT_R16G16B16A16_SINT, + 8, // RPS_FORMAT_R32G32_TYPELESS, + 8, // RPS_FORMAT_R32G32_FLOAT, + 8, // RPS_FORMAT_R32G32_UINT, + 8, // RPS_FORMAT_R32G32_SINT, + 8, // RPS_FORMAT_R32G8X24_TYPELESS, + 8, // RPS_FORMAT_D32_FLOAT_S8X24_UINT, + 8, // RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS, + 8, // RPS_FORMAT_X32_TYPELESS_G8X24_UINT, + 4, // RPS_FORMAT_R10G10B10A2_TYPELESS, + 4, // RPS_FORMAT_R10G10B10A2_UNORM, + 4, // RPS_FORMAT_R10G10B10A2_UINT, + 4, // RPS_FORMAT_R11G11B10_FLOAT, + 4, // RPS_FORMAT_R8G8B8A8_TYPELESS, + 4, // RPS_FORMAT_R8G8B8A8_UNORM, + 4, // RPS_FORMAT_R8G8B8A8_UNORM_SRGB, + 4, // RPS_FORMAT_R8G8B8A8_UINT, + 4, // RPS_FORMAT_R8G8B8A8_SNORM, + 4, // RPS_FORMAT_R8G8B8A8_SINT, + 4, // RPS_FORMAT_R16G16_TYPELESS, + 4, // RPS_FORMAT_R16G16_FLOAT, + 4, // RPS_FORMAT_R16G16_UNORM, + 4, // RPS_FORMAT_R16G16_UINT, + 4, // RPS_FORMAT_R16G16_SNORM, + 4, // RPS_FORMAT_R16G16_SINT, + 4, // RPS_FORMAT_R32_TYPELESS, + 4, // RPS_FORMAT_D32_FLOAT, + 4, // RPS_FORMAT_R32_FLOAT, + 4, // RPS_FORMAT_R32_UINT, + 4, // RPS_FORMAT_R32_SINT, + 4, // RPS_FORMAT_R24G8_TYPELESS, + 4, // RPS_FORMAT_D24_UNORM_S8_UINT, + 4, // RPS_FORMAT_R24_UNORM_X8_TYPELESS, + 4, // RPS_FORMAT_X24_TYPELESS_G8_UINT, + 2, // RPS_FORMAT_R8G8_TYPELESS, + 2, // RPS_FORMAT_R8G8_UNORM, + 2, // RPS_FORMAT_R8G8_UINT, + 2, // RPS_FORMAT_R8G8_SNORM, + 2, // RPS_FORMAT_R8G8_SINT, + 2, // RPS_FORMAT_R16_TYPELESS, + 2, // RPS_FORMAT_R16_FLOAT, + 2, // RPS_FORMAT_D16_UNORM, + 2, // RPS_FORMAT_R16_UNORM, + 2, // RPS_FORMAT_R16_UINT, + 2, // RPS_FORMAT_R16_SNORM, + 2, // RPS_FORMAT_R16_SINT, + 1, // RPS_FORMAT_R8_TYPELESS, + 1, // RPS_FORMAT_R8_UNORM, + 1, // RPS_FORMAT_R8_UINT, + 1, // RPS_FORMAT_R8_SNORM, + 1, // RPS_FORMAT_R8_SINT, + 1, // RPS_FORMAT_A8_UNORM, + 0, // RPS_FORMAT_R1_UNORM, + 4, // RPS_FORMAT_R9G9B9E5_SHAREDEXP, + 2, // RPS_FORMAT_R8G8_B8G8_UNORM, + 2, // RPS_FORMAT_G8R8_G8B8_UNORM, + 8, // RPS_FORMAT_BC1_TYPELESS, + 8, // RPS_FORMAT_BC1_UNORM, + 8, // RPS_FORMAT_BC1_UNORM_SRGB, + 16, // RPS_FORMAT_BC2_TYPELESS, + 16, // RPS_FORMAT_BC2_UNORM, + 16, // RPS_FORMAT_BC2_UNORM_SRGB, + 16, // RPS_FORMAT_BC3_TYPELESS, + 16, // RPS_FORMAT_BC3_UNORM, + 16, // RPS_FORMAT_BC3_UNORM_SRGB, + 8, // RPS_FORMAT_BC4_TYPELESS, + 8, // RPS_FORMAT_BC4_UNORM, + 8, // RPS_FORMAT_BC4_SNORM, + 16, // RPS_FORMAT_BC5_TYPELESS, + 16, // RPS_FORMAT_BC5_UNORM, + 16, // RPS_FORMAT_BC5_SNORM, + 2, // RPS_FORMAT_B5G6R5_UNORM, + 2, // RPS_FORMAT_B5G5R5A1_UNORM, + 4, // RPS_FORMAT_B8G8R8A8_UNORM, + 4, // RPS_FORMAT_B8G8R8X8_UNORM, + 4, // RPS_FORMAT_R10G10B10_XR_BIAS_A2_UNORM, + 4, // RPS_FORMAT_B8G8R8A8_TYPELESS, + 4, // RPS_FORMAT_B8G8R8A8_UNORM_SRGB, + 4, // RPS_FORMAT_B8G8R8X8_TYPELESS, + 4, // RPS_FORMAT_B8G8R8X8_UNORM_SRGB, + 16, // RPS_FORMAT_BC6H_TYPELESS, + 16, // RPS_FORMAT_BC6H_UF16, + 16, // RPS_FORMAT_BC6H_SF16, + 16, // RPS_FORMAT_BC7_TYPELESS, + 16, // RPS_FORMAT_BC7_UNORM, + 16, // RPS_FORMAT_BC7_UNORM_SRGB, + 0, // RPS_FORMAT_AYUV, + 0, // RPS_FORMAT_Y410, + 0, // RPS_FORMAT_Y416, + 0, // RPS_FORMAT_NV12, + 0, // RPS_FORMAT_P010, + 0, // RPS_FORMAT_P016, + 0, // RPS_FORMAT_420_OPAQUE, + 0, // RPS_FORMAT_YUY2, + 0, // RPS_FORMAT_Y210, + 0, // RPS_FORMAT_Y216, + 0, // RPS_FORMAT_NV11, + 0, // RPS_FORMAT_AI44, + 0, // RPS_FORMAT_IA44, + 0, // RPS_FORMAT_P8, + 0, // RPS_FORMAT_A8P8, + 2, // RPS_FORMAT_B4G4R4A4_UNORM, + // + // RPS_FORMAT_COUNT, + }; + + return (format < RPS_FORMAT_COUNT) ? s_Sizes[format] : 0; + } + + static uint64_t EstimateAllocationSize(const ResourceDescPacked& resDesc) + { + if (resDesc.IsBuffer()) + { + return resDesc.GetBufferSize(); + } + else if (resDesc.IsImage()) + { + const uint64_t depthOrArraySlices = + (resDesc.type == RPS_RESOURCE_TYPE_IMAGE_3D) ? resDesc.image.depth : resDesc.image.arrayLayers; + + uint64_t sizeInBytes = (uint64_t(resDesc.image.width) * resDesc.image.height * depthOrArraySlices * + GetFormatElementBytes(resDesc.image.format)); + + for (uint32_t i = 0; i < resDesc.image.mipLevels; i++) + { + sizeInBytes += (sizeInBytes >> (i << 1)); + } + + return sizeInBytes; + } + + return 0ull; + } + + RpsResult NullRuntimeDevice::BuildDefaultRenderGraphPhases(RenderGraph& renderGraph) + { + RPS_V_RETURN(renderGraph.ReservePhases(6)); + RPS_V_RETURN(renderGraph.AddPhase()); + RPS_V_RETURN(renderGraph.AddPhase()); + RPS_V_RETURN(renderGraph.AddPhase()); + RPS_V_RETURN(renderGraph.AddPhase(renderGraph)); + RPS_V_RETURN(renderGraph.AddPhase(renderGraph)); + RPS_V_RETURN(renderGraph.AddPhase(renderGraph)); + RPS_V_RETURN(renderGraph.AddPhase(renderGraph)); + RPS_V_RETURN(renderGraph.AddPhase()); + //A NullRuntime backend will be added by the render graph automatically because no backend is set + + return RPS_OK; + } + + RpsResult NullRuntimeDevice::InitializeSubresourceInfos(ArrayRef resInstances) + { + for (auto& resInstance : resInstances) + { + GetFullSubresourceRange( + resInstance.fullSubresourceRange, resInstance.desc, GetResourceAspectMask(resInstance.desc)); + + resInstance.numSubResources = CalcSubresourceCount(resInstance.desc); + } + + return RPS_OK; + } + + RpsResult NullRuntimeDevice::InitializeResourceAllocInfos(ArrayRef resInstances) + { + for (auto& resInst : resInstances) + { + resInst.allocRequirement.size = EstimateAllocationSize(resInst.desc); + resInst.allocRequirement.alignment = 0; + resInst.allocRequirement.memoryTypeIndex = 0; + resInst.hRuntimeResource = {RPS_NULL_HANDLE}; + } + + return RPS_OK; + } + + RpsResult NullRuntimeDevice::GetSubresourceRangeFromImageView(SubresourceRangePacked& outRange, + const ResourceInstance& resourceInfo, + const RpsAccessAttr& accessAttr, + const RpsImageView& imageView) + { + // TODO: Filter aspect by format + view + access flags + // Filter access flags by format + view ? + const uint32_t aspectMask = GetFormatAspectMask(imageView.base.viewFormat); + outRange = SubresourceRangePacked(aspectMask, imageView.subresourceRange, resourceInfo.desc); + + return RPS_OK; + } + + RpsImageAspectUsageFlags NullRuntimeDevice::GetImageAspectUsages(uint32_t aspectMask) const + { + return ((aspectMask & 1) ? (RPS_IMAGE_ASPECT_COLOR | RPS_IMAGE_ASPECT_DEPTH) : RPS_IMAGE_ASPECT_UNKNOWN) | + ((aspectMask & 2) ? RPS_IMAGE_ASPECT_STENCIL : RPS_IMAGE_ASPECT_UNKNOWN); + } + + ConstArrayRef NullRuntimeDevice::GetMemoryTypeInfos() const + { + //Create a dummy memory type for memory scheduling + static RpsMemoryTypeInfo dummyMemType = {0, 1}; + return {&dummyMemType, 1}; + } + +} // namespace rps + +RpsResult rpsNullRuntimeDeviceCreate(const RpsNullRuntimeDeviceCreateInfo* pCreateInfo, RpsDevice* phDevice) +{ + return rps::RuntimeDevice::Create( + phDevice, (pCreateInfo && pCreateInfo->pDeviceCreateInfo) ? pCreateInfo->pDeviceCreateInfo : nullptr); +} diff --git a/src/runtime/common/rps_render_graph.cpp b/src/runtime/common/rps_render_graph.cpp new file mode 100644 index 0000000..3f8420e --- /dev/null +++ b/src/runtime/common/rps_render_graph.cpp @@ -0,0 +1,668 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "rps/runtime/common/rps_render_states.h" + +#include "runtime/common/rps_render_graph.hpp" +#include "runtime/common/rps_runtime_device.hpp" +#include "runtime/common/rps_rpsl_host.hpp" +#include "runtime/common/rps_subprogram.hpp" + +namespace rps +{ + RpsResult RenderGraph::Create(Device& device, + const RpsRenderGraphCreateInfo* pCreateInfo, + RenderGraph** ppRenderGraph) + { + RPS_CHECK_ARGS(ppRenderGraph); + RPS_CHECK_ARGS(!pCreateInfo || ((pCreateInfo->numPhases == 0) == (pCreateInfo->pPhases == nullptr))); + + auto allocInfo = AllocInfo::FromType(); + + void* pMemory = device.Allocate(allocInfo); + RPS_CHECK_ALLOC(pMemory); + + *ppRenderGraph = new (pMemory) RenderGraph(device, pCreateInfo ? *pCreateInfo : RpsRenderGraphCreateInfo{}); + + if (pCreateInfo) + { + (*ppRenderGraph)->OnInit(*pCreateInfo); + } + + auto pRuntimeDevice = RuntimeDevice::Get(device); + if (pRuntimeDevice) + { + if ((*ppRenderGraph)->m_createInfo.numPhases == 0) + { + RPS_V_RETURN(pRuntimeDevice->BuildDefaultRenderGraphPhases(**ppRenderGraph)); + } + } + + if ((*ppRenderGraph)->m_pBackend == nullptr) + { + RPS_V_RETURN((*ppRenderGraph)->AddPhase(**ppRenderGraph)); + } + + // TODO: Apply user settings + (*ppRenderGraph)->m_memoryTypes = pRuntimeDevice->GetMemoryTypeInfos(); + + return RPS_OK; + } + + void RenderGraph::Destroy() + { + const Device& device = GetDevice(); + + OnDestroy(); + + this->~RenderGraph(); + + device.Free(this); + } + + RenderGraph::RenderGraph(const Device& device, const RpsRenderGraphCreateInfo& createInfo) + : m_device(device) + , m_createInfo(createInfo) + , m_persistentArena(device.Allocator()) + , m_frameArena(device.Allocator()) + , m_scratchArena(device.Allocator()) + , m_graph(device, m_frameArena) + , m_phases(0, &m_persistentArena) + , m_resourceCache(0, &m_persistentArena) + , m_programInstances(0, &m_persistentArena) + , m_cmds(0, &m_frameArena) + , m_cmdAccesses(0, &m_frameArena) + , m_transitions(0, &m_frameArena) + , m_resourceFinalAccesses(0, &m_persistentArena) + , m_runtimeCmdInfos(0, &m_frameArena) + , m_cmdBatches(0, &m_frameArena) + , m_cmdBatchWaitFenceIds(0, &m_frameArena) + , m_aliasingInfos(0, &m_frameArena) + , m_heaps(0, &m_persistentArena) + , m_resourceClearValues(&m_persistentArena) + , m_builder(*this, m_persistentArena, m_frameArena) + , m_diagInfoArena(device.Allocator()) + { + m_createInfo.mainEntryCreateInfo.pSignatureDesc = nullptr; + + m_diagData.resourceInfos.reset(&m_diagInfoArena); + m_diagData.cmdInfos.reset(&m_diagInfoArena); + m_diagData.heapInfos.reset(&m_diagInfoArena); + } + + RpsResult RenderGraph::OnInit(const RpsRenderGraphCreateInfo& createInfo) + { + RPS_ASSERT(m_pMainEntry == nullptr); + + if (createInfo.scheduleInfo.numQueues > 0) + { + RPS_CHECK_ARGS(createInfo.scheduleInfo.pQueueInfos); + + auto queueInfosCopy = m_persistentArena.NewArray(createInfo.scheduleInfo.numQueues); + RPS_CHECK_ALLOC(!queueInfosCopy.empty()); + + m_createInfo.scheduleInfo.pQueueInfos = queueInfosCopy.data(); + std::copy(createInfo.scheduleInfo.pQueueInfos, + createInfo.scheduleInfo.pQueueInfos + createInfo.scheduleInfo.numQueues, + queueInfosCopy.begin()); + } + + RPS_V_RETURN(Subprogram::Create(m_device, &createInfo.mainEntryCreateInfo, &m_pMainEntry)); + + m_pSignature = m_pMainEntry->GetSignature(); + + // TODO: Clean up + m_programInstances.push_back(m_persistentArena.New(m_pMainEntry, m_persistentArena)); + + return m_builder.Init(m_pSignature, m_persistentArena, m_programInstances.back()); + } + + void RenderGraph::OnDestroy() + { + for (auto pPhase : m_phases) + { + pPhase->Destroy(); + } + + if (m_pMainEntry) + { + m_pMainEntry->Destroy(); + m_pMainEntry = nullptr; + } + } + + ProgramInstance* RenderGraph::GetOrCreateProgramInstance(Subprogram* pSubprogram, uint32_t& globalProgramInstanceId) + { + if (globalProgramInstanceId == RPS_INDEX_NONE_U32) + { + const uint32_t newProgramId = uint32_t(m_programInstances.size()); + + if (!m_programInstances.push_back(m_persistentArena.New(pSubprogram, m_persistentArena))) + { + return nullptr; + } + + globalProgramInstanceId = newProgramId; + } + + RPS_ASSERT(globalProgramInstanceId < m_programInstances.size()); + + // In case the node was re-bound to a new program + const auto pResult = m_programInstances[globalProgramInstanceId]; + if (pResult->m_pProgram != pSubprogram) + { + pResult->Reset(pSubprogram); + } + + return pResult; + } + + RpsResult RenderGraph::Update(const RpsRenderGraphUpdateInfo& updateInfo) + { + m_status = UpdateImpl(updateInfo); + return m_status; + } + + RpsResult RenderGraph::UpdateImpl(const RpsRenderGraphUpdateInfo& updateInfo) + { + m_frameArena.Reset(); + m_cmds.reset_keep_capacity(&m_frameArena); + m_cmdAccesses.reset_keep_capacity(&m_frameArena); + m_transitions.reset_keep_capacity(&m_frameArena); + m_runtimeCmdInfos.reset_keep_capacity(&m_frameArena); + m_cmdBatches.reset_keep_capacity(&m_frameArena); + m_cmdBatchWaitFenceIds.reset_keep_capacity(&m_frameArena); + m_aliasingInfos.reset_keep_capacity(&m_frameArena); + + ArenaCheckPoint arenaCheckpoint{m_scratchArena}; + + m_graph.Reset(); + + const RenderGraphSignature* const pSignature = m_pMainEntry->GetSignature(); + + ArrayRef paramPtrs = + m_scratchArena.NewArray(pSignature->GetParamDecls().size()); + + for (uint32_t iParam = 0; iParam < paramPtrs.size(); iParam++) + { + auto paramVar = m_builder.GetParamVariable(iParam); + paramPtrs[iParam] = paramVar; + + if ((iParam < updateInfo.numArgs) && updateInfo.ppArgs[iParam]) + { + memcpy(paramVar, updateInfo.ppArgs[iParam], pSignature->GetParamDecl(iParam).GetSize()); + } + } + + { + RPS_V_RETURN(m_builder.Begin()); + + RpsResult buildResult = RPS_OK; + + if (updateInfo.pfnBuildCallback) + { + buildResult = + updateInfo.pfnBuildCallback(rps::ToHandle(&m_builder), paramPtrs.data(), paramPtrs.size()); + } + else + { + RpslExecuteInfo rpslExecInfo = {m_pMainEntry, paramPtrs.data(), paramPtrs.size()}; + RpslHost rpslHost(&m_builder); + buildResult = rpslHost.Execute(rpslExecInfo); + } + + if (RPS_FAILED(buildResult)) + { + m_builder.SetBuildError(buildResult); + } + + RPS_V_RETURN(m_builder.End()); + } + + RenderGraphUpdateContext updateContext = { + &updateInfo, *this, RuntimeDevice::Get(m_device), m_frameArena, m_scratchArena}; + + for (auto& phase : m_phases) + { + RPS_V_RETURN(phase->Run(updateContext)); + } + + return RPS_OK; + } + + RpsResult RenderGraph::RecordCommands(const RpsRenderGraphRecordCommandInfo& recordInfo) const + { + RPS_RETURN_ERROR_IF(RPS_FAILED(m_status), RPS_ERROR_INVALID_OPERATION); + + return m_pBackend->RecordCommands(*this, recordInfo); + } + + RpsResult RenderGraph::GetDiagnosticInfo(RpsRenderGraphDiagnosticInfo& diagInfos, + RpsRenderGraphDiagnosticInfoFlags diagnosticFlags) + { + const bool bFirst = + m_diagData.resourceInfos.empty() && m_diagData.cmdInfos.empty() && m_diagData.heapInfos.empty(); + const bool bReturnCached = !!(diagnosticFlags & RPS_RENDER_GRAPH_DIAGNOSTIC_INFO_USE_CACHED_BIT); + + //Resize diag cache for non cached usage and first time + if (!bReturnCached || bFirst) + { + RPS_V_RETURN(UpdateDiagCache()); + } + + diagInfos.numResourceInfos = uint32_t(m_diagData.resourceInfos.size()); + diagInfos.numHeapInfos = uint32_t(m_diagData.heapInfos.size()); + diagInfos.numCommandInfos = uint32_t(m_diagData.cmdInfos.size()); + diagInfos.pResourceDiagInfos = m_diagData.resourceInfos.data(); + diagInfos.pCmdDiagInfos = m_diagData.cmdInfos.data(); + diagInfos.pHeapDiagInfos = m_diagData.heapInfos.data(); + + return RPS_OK; + } + + RpsResult RenderGraph::GetCmdRenderTargetInfo(RpsNodeId cmdId, RpsCmdRenderTargetInfo& renderTargetInfo) const + { + RPS_RETURN_ERROR_IF(cmdId >= m_cmds.size(), RPS_ERROR_INVALID_ARGUMENTS); + RPS_RETURN_ERROR_IF(m_cmds[cmdId].pRenderPassInfo == nullptr, RPS_ERROR_INVALID_OPERATION); + + renderTargetInfo = m_cmds[cmdId].pRenderPassInfo->renderTargetInfo; + return RPS_OK; + } + + RpsResult RenderGraph::GetCmdViewportInfo(RpsNodeId cmdId, RpsCmdViewportInfo& viewportInfo) const + { + RPS_RETURN_ERROR_IF(cmdId >= m_cmds.size(), RPS_ERROR_INVALID_ARGUMENTS); + RPS_RETURN_ERROR_IF(m_cmds[cmdId].pRenderPassInfo == nullptr, RPS_ERROR_INVALID_OPERATION); + + viewportInfo = m_cmds[cmdId].pRenderPassInfo->viewportInfo; + return RPS_OK; + } + + static void GetRuntimeResourceInfoFromResourceInstance(const ResourceInstance& resourceInstance, + RpsRuntimeResourceInfo& outResInfo) + { + outResInfo.hResource = resourceInstance.hRuntimeResource; + resourceInstance.desc.Get(outResInfo.resourceDesc); + outResInfo.numSubresources = resourceInstance.numSubResources; + resourceInstance.fullSubresourceRange.Get(outResInfo.fullRange); + outResInfo.heapId = resourceInstance.allocPlacement.heapId; + outResInfo.allocInfo = resourceInstance.allocRequirement; + } + + RpsResult RenderGraph::GetRuntimeResourceInfo(RpsResourceId resourceId, + uint32_t temporalLayerIndex, + RpsRuntimeResourceInfo* pResourceInfo) const + { + RPS_CHECK_ARGS(pResourceInfo); + + RPS_CHECK_ARGS(resourceId < GetResourceInstances().size()); + + const auto* pResourceInstance = &GetResourceInstance(resourceId); + + if (pResourceInstance->IsTemporalParent()) + { + RPS_CHECK_ARGS(temporalLayerIndex != RPS_INDEX_NONE_U32); + RPS_RETURN_ERROR_IF(temporalLayerIndex >= pResourceInstance->desc.temporalLayers, + RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + pResourceInstance = &GetResourceInstance(pResourceInstance->temporalLayerOffset + temporalLayerIndex); + } + + GetRuntimeResourceInfoFromResourceInstance(*pResourceInstance, *pResourceInfo); + + return RPS_OK; + } + + RpsResult RenderGraph::GetOutputParameterRuntimeResourceInfos(RpsParamId paramId, + uint32_t arrayOffset, + uint32_t resourceCount, + RpsRuntimeResourceInfo* pResourceInfos) const + { + RPS_CHECK_ARGS(paramId < GetSignature().GetParamDecls().size()); + + const auto resourceIds = m_builder.GetOutputParamResourceIds(paramId); + + RPS_CHECK_ARGS(arrayOffset < resourceIds.size()); + RPS_CHECK_ARGS((arrayOffset + resourceCount) <= resourceIds.size()); + + const auto numResourceInstances = GetResourceInstances().size(); + const auto resourceIdsToGet = resourceIds.range(arrayOffset, resourceCount); + + for (uint32_t i = 0; i < resourceCount; i++) + { + if (resourceIdsToGet[i] < numResourceInstances) + { + const auto& resourceInstance = GetResourceInstance(resourceIdsToGet[i]); + + // TODO: Need to handle temporal slice translation + RPS_RETURN_ERROR_IF(resourceInstance.IsTemporalParent(), RPS_ERROR_NOT_IMPLEMENTED); + + GetRuntimeResourceInfoFromResourceInstance(resourceInstance, pResourceInfos[i]); + } + else + { + pResourceInfos[i] = {}; + } + } + + return RPS_OK; + } + +} // namespace rps + +RpsResult rpsRenderGraphCreate(RpsDevice hDevice, + const RpsRenderGraphCreateInfo* pCreateInfo, + RpsRenderGraph* phRenderGraph) +{ + RPS_CHECK_ARGS(hDevice != RPS_NULL_HANDLE); + + return rps::RenderGraph::Create(*rps::FromHandle(hDevice), pCreateInfo, rps::FromHandle(phRenderGraph)); +} + +RpsResult rpsRenderGraphUpdate(RpsRenderGraph hRenderGraph, const RpsRenderGraphUpdateInfo* pUpdateInfo) +{ + RPS_CHECK_ARGS(hRenderGraph != RPS_NULL_HANDLE); + RPS_CHECK_ARGS(pUpdateInfo != nullptr); + RPS_CHECK_ARGS((pUpdateInfo->gpuCompletedFrameIndex + 1) <= pUpdateInfo->frameIndex); + + auto pRenderGraph = rps::FromHandle(hRenderGraph); + + return pRenderGraph->Update(*pUpdateInfo); +} + +void rpsRenderGraphDestroy(RpsRenderGraph hRenderGraph) +{ + if (hRenderGraph != RPS_NULL_HANDLE) + { + rps::FromHandle(hRenderGraph)->Destroy(); + } +} + +RpsResult rpsRenderGraphGetResourceInfo(RpsRenderGraph hRenderGraph, + RpsResourceId resourceId, + uint32_t temporalLayerIndex, + RpsRuntimeResourceInfo* pResourceInfo) +{ + RPS_CHECK_ARGS(hRenderGraph != RPS_NULL_HANDLE); + return rps::FromHandle(hRenderGraph)->GetRuntimeResourceInfo(resourceId, temporalLayerIndex, pResourceInfo); +} + +RpsResult rpsRenderGraphGetOutputParameterResourceInfos(RpsRenderGraph hRenderGraph, + RpsParamId paramId, + uint32_t arrayOffset, + uint32_t resourceCount, + RpsRuntimeResourceInfo* pResourceInfos) +{ + RPS_CHECK_ARGS(hRenderGraph != RPS_NULL_HANDLE); + return rps::FromHandle(hRenderGraph) + ->GetOutputParameterRuntimeResourceInfos(paramId, arrayOffset, resourceCount, pResourceInfos); +} + +RpsSubprogram rpsRenderGraphGetMainEntry(RpsRenderGraph hRenderGraph) +{ + RPS_RETURN_ERROR_IF(hRenderGraph == RPS_NULL_HANDLE, RPS_NULL_HANDLE); + return rps::ToHandle(rps::FromHandle(hRenderGraph)->GetMainEntry()); +} + +RpsResult rpsRenderGraphGetBatchLayout(RpsRenderGraph hRenderGraph, RpsRenderGraphBatchLayout* pBatchLayout) +{ + RPS_CHECK_ARGS(hRenderGraph != RPS_NULL_HANDLE); + RPS_CHECK_ARGS(pBatchLayout != nullptr); + + return rps::FromHandle(hRenderGraph)->GetBatchLayout(*pBatchLayout); +} + +RpsResult rpsRenderGraphRecordCommands(RpsRenderGraph hRenderGraph, const RpsRenderGraphRecordCommandInfo* pRecordRange) +{ + RPS_CHECK_ARGS(hRenderGraph != RPS_NULL_HANDLE); + RPS_CHECK_ARGS(pRecordRange != nullptr); + + return rps::FromHandle(hRenderGraph)->RecordCommands(*pRecordRange); +} + +RpsResult rpsRenderGraphGetDiagnosticInfo(RpsRenderGraph hRenderGraph, + RpsRenderGraphDiagnosticInfo* pInfo, + RpsRenderGraphDiagnosticInfoFlags diagnosticFlags) +{ + RPS_CHECK_ARGS(hRenderGraph); + RPS_CHECK_ARGS(pInfo); + + return rps::FromHandle(hRenderGraph)->GetDiagnosticInfo(*pInfo, diagnosticFlags); +} + +RpsResult rpsCmdCallbackReportError(const RpsCmdCallbackContext* pContext, RpsResult errorCode) +{ + RPS_CHECK_ARGS(pContext); + RPS_RETURN_OK_IF(errorCode == RPS_OK); + + auto pBackendContext = rps::RuntimeCmdCallbackContext::GetMutable(pContext); + + RPS_RETURN_ERROR_IF(!pBackendContext->bIsPrimaryContext, RPS_ERROR_INVALID_OPERATION); + RPS_RETURN_ERROR_IF(pBackendContext->result != RPS_OK, RPS_ERROR_INVALID_OPERATION); + + pBackendContext->result = errorCode; + + return RPS_OK; +} + +RpsResult rpsCmdGetRenderTargetsInfo(const RpsCmdCallbackContext* pContext, RpsCmdRenderTargetInfo* pRenderTargetInfo) +{ + RPS_CHECK_ARGS(pContext && pRenderTargetInfo); + + auto pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + return pBackendContext->pRenderGraph->GetCmdRenderTargetInfo(pBackendContext->cmdId, *pRenderTargetInfo); +} + +RpsResult rpsCmdGetViewportInfo(const RpsCmdCallbackContext* pContext, RpsCmdViewportInfo* pViewportInfo) +{ + RPS_CHECK_ARGS(pContext && pViewportInfo); + + auto pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + return pBackendContext->pRenderGraph->GetCmdViewportInfo(pBackendContext->cmdId, *pViewportInfo); +} + +RpsResult rpsCmdGetNodeName(const RpsCmdCallbackContext* pContext, const char** ppNodeName, size_t* pNodeNameLength) +{ + RPS_CHECK_ARGS(pContext && ppNodeName); + + auto pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + + auto& nodeDecl = *pBackendContext->pCmdInfo->pNodeDecl; + + *ppNodeName = nodeDecl.name.str; + + if (pNodeNameLength) + { + *pNodeNameLength = nodeDecl.name.len; + } + + return RPS_OK; +} + +RpsResult rpsCmdGetParamDesc(const RpsCmdCallbackContext* pContext, RpsParamId paramId, RpsParameterDesc* pDesc) +{ + RPS_CHECK_ARGS(pContext && pDesc); + + auto pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + + auto& nodeDecl = *pBackendContext->pCmdInfo->pNodeDecl; + + RPS_RETURN_ERROR_IF(paramId >= nodeDecl.params.size(), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + nodeDecl.params[paramId].GetDesc(pDesc); + + return RPS_OK; +} + +RpsResult rpsCmdGetArgResourceDescArray(const RpsCmdCallbackContext* pContext, + RpsParamId argIndex, + uint32_t srcArrayOffset, + RpsResourceDesc* pResourceDesc, + uint32_t numDescs) +{ + RPS_CHECK_ARGS(pContext && pResourceDesc); + + const auto* pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + + RPS_RETURN_ERROR_IF(argIndex >= pBackendContext->pNodeDeclInfo->params.size(), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + const auto& paramAccessInfo = pBackendContext->pNodeDeclInfo->params[argIndex]; + + RPS_RETURN_ERROR_IF(!paramAccessInfo.IsResource(), RPS_ERROR_TYPE_MISMATCH); + RPS_RETURN_ERROR_IF(srcArrayOffset + numDescs > paramAccessInfo.numElements, RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + auto cmdAccesses = pBackendContext->pRenderGraph->GetCmdAccesses(pBackendContext->cmdId); + + for (uint32_t descIndex = 0; descIndex < numDescs; ++descIndex) + { + const auto& accessInfo = cmdAccesses[paramAccessInfo.accessOffset + srcArrayOffset + descIndex]; + const uint32_t resId = accessInfo.resourceId; + + if (resId != RPS_RESOURCE_ID_INVALID) + { + pBackendContext->pRenderGraph->GetResourceInstance(resId).desc.Get(pResourceDesc[descIndex]); + } + else + { + *pResourceDesc = rps::ResourceDesc(); + } + } + + return RPS_OK; +} + +RpsResult rpsCmdGetArgResourceDesc(const RpsCmdCallbackContext* pContext, + RpsParamId argIndex, + RpsResourceDesc* pResourceDesc) +{ + return rpsCmdGetArgResourceDescArray(pContext, argIndex, 0, pResourceDesc, 1); +} + +RpsResult rpsCmdGetArgRuntimeResourceArray(const RpsCmdCallbackContext* pContext, + RpsParamId argIndex, + uint32_t srcArrayOffset, + RpsRuntimeResource* pRuntimeResources, + uint32_t numResources) +{ + RPS_CHECK_ARGS(pContext && pRuntimeResources); + + const auto* pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + + RPS_RETURN_ERROR_IF(argIndex >= pBackendContext->pNodeDeclInfo->params.size(), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + const auto& paramAccessInfo = pBackendContext->pNodeDeclInfo->params[argIndex]; + + RPS_RETURN_ERROR_IF(!paramAccessInfo.IsResource(), RPS_ERROR_TYPE_MISMATCH); + RPS_RETURN_ERROR_IF(srcArrayOffset + numResources > paramAccessInfo.numElements, RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + auto cmdAccesses = pBackendContext->pRenderGraph->GetCmdAccesses(pBackendContext->cmdId); + + for (uint32_t resourceIndex = 0; resourceIndex < numResources; ++resourceIndex) + { + const auto& accessInfo = cmdAccesses[paramAccessInfo.accessOffset + srcArrayOffset + resourceIndex]; + const uint32_t resId = accessInfo.resourceId; + + pRuntimeResources[resourceIndex] = + resId != RPS_RESOURCE_ID_INVALID + ? pBackendContext->pRenderGraph->GetResourceInstance(resId).hRuntimeResource + : RpsRuntimeResource(); + } + + return RPS_OK; +} + +RpsResult rpsCmdGetArgRuntimeResource(const RpsCmdCallbackContext* pContext, + RpsParamId argIndex, + RpsRuntimeResource* pRuntimeResource) +{ + return rpsCmdGetArgRuntimeResourceArray(pContext, argIndex, 0, pRuntimeResource, 1); +} + +RpsResult rpsCmdGetArgResourceAccessInfoArray(const RpsCmdCallbackContext* pContext, + RpsParamId argIndex, + uint32_t srcArrayOffset, + RpsResourceAccessInfo* pResourceAccessInfos, + uint32_t numAccessess) +{ + RPS_CHECK_ARGS(pContext && pResourceAccessInfos); + + const auto* pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + + RPS_RETURN_ERROR_IF(argIndex >= pBackendContext->pNodeDeclInfo->params.size(), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + const auto& paramAccessInfo = pBackendContext->pNodeDeclInfo->params[argIndex]; + + RPS_RETURN_ERROR_IF(!paramAccessInfo.IsResource(), RPS_ERROR_TYPE_MISMATCH); + RPS_RETURN_ERROR_IF(srcArrayOffset + numAccessess > paramAccessInfo.numElements, RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + auto cmdAccesses = pBackendContext->pRenderGraph->GetCmdAccesses(pBackendContext->cmdId); + + for (uint32_t accessIndex = 0; accessIndex < numAccessess; ++accessIndex) + { + const auto& accessInfo = cmdAccesses[paramAccessInfo.accessOffset + srcArrayOffset + accessIndex]; + const uint32_t resId = accessInfo.resourceId; + + if (resId != RPS_RESOURCE_ID_INVALID) + { + accessInfo.Get(pResourceAccessInfos[accessIndex]); + } + else + { + pResourceAccessInfos[accessIndex] = {}; + } + } + + return RPS_OK; +} + +RpsResult rpsCmdGetArgResourceAccessInfo(const RpsCmdCallbackContext* pContext, + RpsParamId argIndex, + RpsResourceAccessInfo* pResourceAccessInfo) +{ + return rpsCmdGetArgResourceAccessInfoArray(pContext, argIndex, 0, pResourceAccessInfo, 1); +} + +RpsResult rpsCmdCloneContext(const RpsCmdCallbackContext* pContext, + RpsRuntimeCommandBuffer hCmdBufferForDerivedContext, + const RpsCmdCallbackContext** ppDerivedContext) +{ + RPS_CHECK_ARGS(pContext && ppDerivedContext); + + auto pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + + return pBackendContext->pRenderGraph->GetRuntimeBackend()->CloneContext( + *pBackendContext, hCmdBufferForDerivedContext, ppDerivedContext); +} + +RpsResult rpsCmdBeginRenderPass(const RpsCmdCallbackContext* pContext, RpsRuntimeRenderPassFlags flags) +{ + RPS_CHECK_ARGS(pContext); + + auto pBackendContext = rps::RuntimeCmdCallbackContext::GetMutable(pContext); + pBackendContext->renderPassFlags = flags; + + return pBackendContext->pRenderGraph->GetRuntimeBackend()->RecordCmdRenderPassBegin(*pBackendContext); +} + +RpsResult rpsCmdEndRenderPass(const RpsCmdCallbackContext* pContext) +{ + RPS_CHECK_ARGS(pContext); + + auto pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + + return pBackendContext->pRenderGraph->GetRuntimeBackend()->RecordCmdRenderPassEnd(*pBackendContext); +} + +RpsResult rpsCmdSetCommandBuffer(const RpsCmdCallbackContext* pContext, RpsRuntimeCommandBuffer hCmdBuffer) +{ + RPS_CHECK_ARGS(pContext); + + auto pBackendContext = rps::RuntimeCmdCallbackContext::GetMutable(pContext); + + pBackendContext->hCommandBuffer = hCmdBuffer; + + return RPS_OK; +} diff --git a/src/runtime/common/rps_render_graph.hpp b/src/runtime/common/rps_render_graph.hpp new file mode 100644 index 0000000..0683d64 --- /dev/null +++ b/src/runtime/common/rps_render_graph.hpp @@ -0,0 +1,887 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_RENDER_GRAPH_H +#define RPS_RENDER_GRAPH_H + +#include "core/rps_core.hpp" +#include "core/rps_util.hpp" +#include "core/rps_graph.hpp" + +#include "core/rps_persistent_index_generator.hpp" + +#include "rps/runtime/common/rps_runtime.h" +#include "runtime/common/rps_cmd_buf.hpp" +#include "runtime/common/rps_render_graph_resource.hpp" +#include "runtime/common/rps_render_graph_signature.hpp" +#include "runtime/common/rps_render_graph_builder.hpp" +#include "runtime/common/rps_subprogram.hpp" + +namespace rps +{ + class RenderGraph; + class RenderGraphBuilder; + class RuntimeDevice; + class RuntimeBackend; + class RpslHost; + + struct RuntimeCmdCallbackContext; + + struct ResourceAliasingInfo + { + uint32_t srcResourceIndex; + uint32_t dstResourceIndex; + RpsBool srcDeactivating : 1; + RpsBool dstActivating : 1; + }; + + struct FinalAccessInfo + { + uint32_t prevTransition; + SubresourceRangePacked range; + }; + + struct ResourceInstance + { + uint32_t resourceDeclId = RPS_INDEX_NONE_U32; + uint32_t temporalLayerOffset = RPS_INDEX_NONE_U32; + ResourceDescPacked desc = {}; + SubresourceRangePacked fullSubresourceRange; + uint32_t numSubResources = 0; + uint32_t clearValueId = RPS_INDEX_NONE_U32; + RpsAccessAttr allAccesses = {}; + RpsAccessAttr prevAllAccesses = {}; + RpsAccessAttr initialAccess = {}; + Span finalAccesses; + uint32_t lifetimeBegin = UINT32_MAX; + uint32_t lifetimeEnd = UINT32_MAX; + bool isTemporalSlice : 1; + bool isFirstTemporalSlice : 1; + bool isExternal : 1; + bool isAliased : 1; + bool isPendingCreate : 1; + bool isPendingInit : 1; + bool isMutableFormat : 1; + bool bBufferFormattedWrite : 1; + bool bBufferFormattedRead : 1; + RpsGpuMemoryRequirement allocRequirement = {0, 0, RPS_INDEX_NONE_U32}; + RpsHeapPlacement allocPlacement = {RPS_INDEX_NONE_U32, 0}; + RpsRuntimeResource hRuntimeResource = {}; + + ResourceInstance() + : isTemporalSlice(false) + , isFirstTemporalSlice(false) + , isExternal(false) + , isAliased(false) + , isPendingCreate(false) + , isPendingInit(false) + , isMutableFormat(false) + , bBufferFormattedWrite(false) + , bBufferFormattedRead(false) + { + } + + bool IsTemporalParent() const + { + return temporalLayerOffset != RPS_INDEX_NONE_U32; + } + + bool HasNoAccess() const + { + return (allAccesses.accessFlags == RPS_ACCESS_UNKNOWN); + } + + }; + + struct CmdAccessInfo + { + uint32_t resourceId; + SubresourceRangePacked range; + AccessAttr access; + RpsFormat viewFormat; + const RpsResourceView* pViewInfo; + + void Get(RpsResourceAccessInfo& accessInfo) const + { + accessInfo.resourceId = resourceId; + range.Get(accessInfo.range); + accessInfo.access = access; + accessInfo.viewFormat = viewFormat; + } + }; + + struct CmdRenderPassInfo + { + RpsCmdViewportInfo viewportInfo; + RpsCmdRenderTargetInfo renderTargetInfo; + }; + + class ProgramInstance + { + RPS_CLASS_NO_COPY_MOVE(ProgramInstance); + + public: + ProgramInstance(const Subprogram* pProgram, Arena& persistentArena) + : m_pProgram(pProgram) + , m_resourceIds(&persistentArena) + , m_cmdIds(&persistentArena) + , m_persistentIndexGenerator(persistentArena) + { + } + + void Reset(const Subprogram* pProgram) + { + m_pProgram = pProgram; + m_cmdIds.clear(); + m_resourceIds.clear(); + m_persistentIndexGenerator.Clear(); + } + + const Subprogram* m_pProgram = nullptr; + ArenaVector m_resourceIds; + ArenaVector m_cmdIds; + + enum PersistentIndexKinds + { + PERSISTENT_INDEX_KIND_RESOURCE_ID, + PERSISTENT_INDEX_KIND_NODE_ID, + PERSISTENT_INDEX_KIND_COUNT, + }; + + PersistentIdGenerator m_persistentIndexGenerator; + }; + + struct CmdInfo + { + uint32_t nodeDeclIndex; + union + { + struct + { + uint32_t cmdDeclIndex : 31; + bool bPreferAsync : 1; + }; + uint32_t subgraphFlags; + }; + const NodeDeclInfo* pNodeDecl; + const Cmd* pCmdDecl; + Span accesses; + CmdRenderPassInfo* pRenderPassInfo; + + static bool IsNodeDeclIdBuiltIn(RpsNodeDeclId nodeDeclId) + { + return int32_t(nodeDeclId) < 0; + } + + bool IsNodeDeclBuiltIn() const + { + return CmdInfo::IsNodeDeclIdBuiltIn(nodeDeclIndex); + } + }; + + struct TransitionInfo + { + CmdAccessInfo access; + NodeId nodeId; + uint32_t prevTransition; + }; + + struct RenderGraphUpdateContext + { + const RpsRenderGraphUpdateInfo* pUpdateInfo; + RenderGraph& renderGraph; + RuntimeDevice* pRuntimeDevice; + Arena& frameArena; + Arena& scratchArena; + }; + + static constexpr uint32_t CMD_ID_PREAMBLE = 0x7FFFFFFE; + static constexpr uint32_t CMD_ID_POSTAMBLE = 0x7FFFFFFF; + + struct RuntimeCmdInfo + { + uint32_t cmdId : 31; + uint32_t isTransition : 1; + Span aliasingInfos = {}; + + RuntimeCmdInfo() + : RuntimeCmdInfo(0, false) + { + } + + RuntimeCmdInfo(uint32_t inCmdId, bool inIsTransition) + : cmdId(inCmdId) + , isTransition(inIsTransition ? 1 : 0) + { + } + + uint32_t GetTransitionId() const + { + return isTransition ? cmdId : RPS_INDEX_NONE_U32; + } + + uint32_t GetCmdId() const + { + return isTransition ? RPS_INDEX_NONE_U32 : cmdId; + } + + bool HasTransitionInfo() const + { + return GetTransitionId() < uint32_t(CMD_ID_PREAMBLE); + } + }; + + struct CommandBatch : public RpsCommandBatch + { + CommandBatch(uint32_t inQueueIndex = 0, + uint32_t inCmdBegin = 0, + uint32_t inNumCmds = 0, + uint32_t inWaitFencesBegin = 0, + uint32_t inWaitFencesCount = 0, + uint32_t inSignalFenceId = RPS_INDEX_NONE_U32) + { + queueIndex = inQueueIndex; + waitFencesBegin = inWaitFencesBegin; + numWaitFences = inWaitFencesCount; + signalFenceIndex = inSignalFenceId; + cmdBegin = inCmdBegin; + numCmds = inNumCmds; + }; + }; + + struct HeapInfo + { + uint32_t memTypeIndex; + uint32_t index; + uint64_t size; + uint32_t alignment; + uint64_t usedSize; + uint64_t maxUsedSize; + + RpsRuntimeHeap hRuntimeHeap; + }; + + class IRenderGraphPhase + { + RPS_CLASS_NO_COPY_MOVE(IRenderGraphPhase); + + protected: + IRenderGraphPhase() = default; + + public: + virtual ~IRenderGraphPhase() = default; + + virtual RpsResult Run(RenderGraphUpdateContext& context) = 0; + + virtual RuntimeBackend* AsRuntimeBackend() + { + return nullptr; + } + + void Destroy() + { + OnDestroy(); + + this->~IRenderGraphPhase(); + } + + protected: + virtual void OnDestroy() + { + } + }; + + struct RuntimeCmd + { + RpsNodeId cmdId; + + RuntimeCmd(RpsNodeId cmdIdIn = RPS_CMD_ID_INVALID) + : cmdId(cmdIdIn) + { + } + }; + + class RuntimeBackend : public IRenderGraphPhase + { + protected: + RuntimeBackend(RenderGraph& renderGraph) + : m_renderGraph(renderGraph) + { + } + + public: + virtual ~RuntimeBackend() + { + } + + virtual RpsResult Run(RenderGraphUpdateContext& context) override; + + virtual RuntimeBackend* AsRuntimeBackend() override final + { + return this; + } + + virtual RpsResult RecordCommands(const RenderGraph& renderGraph, + const RpsRenderGraphRecordCommandInfo& recordInfo) const = 0; + + virtual RpsResult RecordCmdRenderPassBegin(const RuntimeCmdCallbackContext& context) const + { + return RPS_OK; + } + + virtual RpsResult RecordCmdRenderPassEnd(const RuntimeCmdCallbackContext& context) const + { + return RPS_OK; + } + + virtual RpsResult RecordCmdFixedFunctionBindingsAndDynamicStates(const RuntimeCmdCallbackContext& context) const + { + return RPS_OK; + } + + RpsResult CloneContext(const RuntimeCmdCallbackContext& context, + RpsRuntimeCommandBuffer hNewCmdBuffer, + const RpsCmdCallbackContext** ppNewContext) const; + + virtual void DestroyRuntimeResourceDeferred(ResourceInstance& resource) = 0; + + RenderGraph& GetRenderGraph() const + { + return m_renderGraph; + } + + static RpsResult GetCmdArgResourceInfos(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayIndex, + const ResourceInstance** ppResources, + uint32_t count); + + protected: + virtual RpsResult UpdateFrame(const RenderGraphUpdateContext& context) + { + return RPS_OK; + } + + virtual RpsResult CreateHeaps(const RenderGraphUpdateContext& context, ArrayRef heaps) + { + return RPS_OK; + } + + virtual void DestroyHeaps(ArrayRef heaps) + { + } + + virtual RpsResult CreateResources(const RenderGraphUpdateContext& context, ArrayRef resources) + { + return RPS_OK; + } + + virtual void DestroyResources(ArrayRef resources) + { + } + + virtual RpsResult CreateCommandResources(const RenderGraphUpdateContext& context) + { + return RPS_OK; + } + + virtual void DestroyCommandResources() + { + } + + virtual void RecordDebugMarker(const RuntimeCmdCallbackContext& context, + RpsRuntimeDebugMarkerMode mode, + StrRef name) const; + + RpsResult RecordCommand(RuntimeCmdCallbackContext& context, const RuntimeCmd& runtimeCmd) const; + + private: + RpsResult RecordCmdBegin(const RuntimeCmdCallbackContext& context) const; + RpsResult RecordCmdEnd(const RuntimeCmdCallbackContext& context) const; + + protected: + virtual void OnDestroy() override; + + static uint64_t GetNumQueuedFrames(const RenderGraphUpdateContext& context) + { + // Returns the number of queued frames based on the current frame index and known completed frame index. + // If gpuCompletedFrameIndex is RPS_GPU_COMPLETED_FRAME_INDEX_NONE (UINT64_MAX), it indicates no frames + // are known to have completed yet and it returns the current frame index. + return context.pUpdateInfo->frameIndex - (context.pUpdateInfo->gpuCompletedFrameIndex + 1); + } + + private: + RenderGraph& m_renderGraph; + }; + + class NullRuntimeBackend : public RuntimeBackend + { + public: + NullRuntimeBackend(RenderGraph& renderGraph) + : RuntimeBackend(renderGraph) + { + } + + virtual RpsResult CreateHeaps(const RenderGraphUpdateContext& context, ArrayRef heaps) override final; + + virtual RpsResult RecordCommands(const RenderGraph& renderGraph, + const RpsRenderGraphRecordCommandInfo& recordInfo) const override final; + + virtual void DestroyRuntimeResourceDeferred(ResourceInstance& resource) override final; + + private: + uint64_t m_heapCounter = 0; + }; + + struct RuntimeCmdCallbackContext final : public RpsCmdCallbackContext + { + const RuntimeBackend* pBackend; + const RenderGraph* pRenderGraph; + const NodeDeclInfo* pNodeDeclInfo = nullptr; + const CmdInfo* pCmdInfo = nullptr; + const Cmd* pCmd = nullptr; + const RuntimeCmd* pRuntimeCmd = nullptr; + uint32_t cmdId = RPS_CMD_ID_INVALID; + RpsResult result = RPS_OK; + RpsRecordCommandFlags recordFlags; + RpsRuntimeRenderPassFlags renderPassFlags : 30; + bool bIsPrimaryContext : 1; + bool bIsCmdBeginEnd : 1; + + RuntimeCmdCallbackContext(const RuntimeBackend* pBackendIn, const RpsRenderGraphRecordCommandInfo& recordInfo) + : RpsCmdCallbackContext{recordInfo.hCmdBuffer, recordInfo.pUserContext} + , pBackend(pBackendIn) + , pRenderGraph(pBackendIn ? &pBackendIn->GetRenderGraph() : nullptr) + , result(RPS_OK) + , recordFlags(recordInfo.flags) + , renderPassFlags(RPS_RUNTIME_RENDER_PASS_FLAG_NONE) + , bIsPrimaryContext(true) + , bIsCmdBeginEnd(false) + { + static_assert(std::is_trivially_copyable::value, + "RuntimeCmdCallbackContext must be trivially copyable for CloneContext to work"); + } + + static const RuntimeCmdCallbackContext* Get(const RpsCmdCallbackContext* pContext) + { + return static_cast(pContext); + } + + // Get a modifiable pointer to RuntimeCmdCallbackContext. + // The contexts are created within the RPS library on stack and + // passed to command callbacks as a const pointer to prevent user from modifying it. + // A context is expected to be accessed single threaded. + // In a few limited cases we need to modify it from within the record API calls: + // (If you use this function please add to the list below) + // - rpsCmdCallbackReportError, to set the error status of current context. + // - rpsCmdBeginRenderPass, to setup flags for both Begin and End render pass operations. + // - rpsCmdSetCommandBuffer, to setup a new command buffer for subsequent recording. + static RuntimeCmdCallbackContext* GetMutable(const RpsCmdCallbackContext* pContext) + { + return const_cast(Get(pContext)); + } + + static const RuntimeCmdCallbackContext& Get(const RpsCmdCallbackContext& context) + { + return *Get(&context); + } + + template + const T* GetBackend() const + { + return static_cast(pBackend); + } + + template + const T* GetRuntimeCmd() const + { + return static_cast(pRuntimeCmd); + } + }; + + class RenderGraph + { + public: + static RpsResult Create(Device& device, + const RpsRenderGraphCreateInfo* pCreateInfo, + RenderGraph** ppRenderGraph); + void Destroy(); + + RenderGraphBuilder* BeginBuild(); + + RpsResult Update(const RpsRenderGraphUpdateInfo& updateInfo); + + RpsResult RecordCommands(const RpsRenderGraphRecordCommandInfo& recordInfo) const; + + RpsResult GetDiagnosticInfo(RpsRenderGraphDiagnosticInfo& diagInfos, RpsRenderGraphDiagnosticInfoFlags diagnosticFlags); + + static constexpr uint32_t INVALID_TRANSITION = 0; + + private: + RenderGraph(const Device& device, const RpsRenderGraphCreateInfo& createInfo); + + ~RenderGraph() + { + } + + RpsResult OnInit(const RpsRenderGraphCreateInfo& createInfo); + + void OnDestroy(); + + RpsResult UpdateImpl(const RpsRenderGraphUpdateInfo& buildInfo); + + RpsResult UpdateDiagCache(); + + RpsResult AllocateDiagnosticInfo(); + + void GatherDiagnosticInfo(); + + void GatherResourceDiagnosticInfo(RpsResourceDiagnosticInfo& dst, + const ResourceInstance& src, + uint32_t resIndex); + + void GatherCmdDiagnosticInfo(RpsCmdDiagnosticInfo& dst, const rps::RuntimeCmdInfo& src, uint32_t cmdIndex); + + void GatherHeapDiagnosticInfo(RpsHeapDiagnosticInfo& dst, const rps::HeapInfo& src); + + public: + + const Device& GetDevice() const + { + return m_device; + } + + const RpsRenderGraphCreateInfo& GetCreateInfo() const + { + return m_createInfo; + } + + Subprogram* GetMainEntry() const + { + return m_pMainEntry; + } + + ProgramInstance* GetOrCreateProgramInstance(Subprogram* pSubprogram, uint32_t& globalProgramInstanceId); + + const RenderGraphBuilder& GetBuilder() const + { + return m_builder; + } + + Graph& GetGraph() + { + return m_graph; + } + + const Graph& GetGraph() const + { + return m_graph; + } + + const ResourceInstance& GetResourceInstance(RpsResourceId resourceId) const + { + return m_resourceCache[resourceId]; + } + + const ArenaVector& GetResourceInstances() const + { + return m_resourceCache; + } + + ArenaVector& GetResourceInstances() + { + return m_resourceCache; + } + + void SetResourceClearValue(ResourceInstance& resourceInfo, const RpsClearInfo& clearInfo) + { + RpsClearInfo* pClearInfo = nullptr; + if (resourceInfo.clearValueId == UINT32_MAX) + { + resourceInfo.clearValueId = m_resourceClearValues.AllocSlot(&pClearInfo); + } + else + { + pClearInfo = m_resourceClearValues.GetSlot(resourceInfo.clearValueId); + } + + if (pClearInfo) + { + *pClearInfo = clearInfo; + } + } + + const RpsClearInfo& GetResourceClearValue(uint32_t slot) const + { + return *m_resourceClearValues.GetSlot(slot); + } + + ArenaVector& GetResourceFinalAccesses() + { + return m_resourceFinalAccesses; + } + + const ArenaVector& GetResourceFinalAccesses() const + { + return m_resourceFinalAccesses; + } + + ArenaVector& GetHeapInfos() + { + return m_heaps; + } + + const ArenaVector& GetHeapInfos() const + { + return m_heaps; + } + + const CmdInfo* GetCmdInfo(RpsNodeId cmdId) const + { + return &m_cmds[cmdId]; + } + + const ArenaVector& GetCmdInfos() const + { + return m_cmds; + } + + ArenaVector& GetCmdInfos() + { + return m_cmds; + } + + ArenaVector& GetCmdAccessInfos() + { + return m_cmdAccesses; + } + + ConstArrayRef GetCmdAccessInfos() const + { + return m_cmdAccesses.range_all(); + } + + ConstArrayRef GetCmdAccesses(RpsNodeId cmdId) const + { + return m_cmds[cmdId].accesses.Get(m_cmdAccesses); + } + + RpsResult GetCmdRenderTargetInfo(RpsNodeId cmdId, RpsCmdRenderTargetInfo& renderTargetInfo) const; + + RpsResult GetCmdViewportInfo(RpsNodeId, RpsCmdViewportInfo& viewportInfo) const; + + const RenderGraphSignature& GetSignature() const + { + return *m_pSignature; + } + + ArenaVector& GetRuntimeCmdInfos() + { + return m_runtimeCmdInfos; + } + + ConstArrayRef GetRuntimeCmdInfos() const + { + return m_runtimeCmdInfos.range_all(); + } + + RpsResult GetRuntimeResourceInfo(RpsResourceId resourceId, + uint32_t temporalLayerIndex, + RpsRuntimeResourceInfo* pResourceInfo) const; + + RpsResult GetOutputParameterRuntimeResourceInfos(RpsParamId paramId, + uint32_t arrayOffset, + uint32_t resourceCount, + RpsRuntimeResourceInfo* pResourceInfos) const; + + const TransitionInfo& GetTransitionInfo(uint32_t transitionId) const + { + return m_transitions[transitionId]; + } + + const ArenaVector& GetTransitions() const + { + return m_transitions; + } + + ArenaVector& GetTransitions() + { + return m_transitions; + } + + ConstArrayRef GetMemoryTypes() const + { + return m_memoryTypes; + } + + ArenaVector& GetResourceAliasingInfos() + { + return m_aliasingInfos; + } + + const ArenaVector& GetResourceAliasingInfos() const + { + return m_aliasingInfos; + } + + ArenaVector& GetCmdBatches() + { + return m_cmdBatches; + } + + ArenaVector& GetCmdBatchWaitFenceIds() + { + return m_cmdBatchWaitFenceIds; + } + + RpsResult GetBatchLayout(RpsRenderGraphBatchLayout& batchLayout) const + { + batchLayout.numFenceSignals = uint32_t(m_cmdBatchWaitFenceIds.size()); + batchLayout.numCmdBatches = uint32_t(m_cmdBatches.size()); + batchLayout.pCmdBatches = m_cmdBatches.empty() ? nullptr : m_cmdBatches.data(); + batchLayout.pWaitFenceIndices = m_cmdBatchWaitFenceIds.data(); + + return RPS_OK; + } + + RpsResult ReservePhases(uint32_t numPhases) + { + return m_phases.reserve(numPhases) ? RPS_OK : RPS_ERROR_OUT_OF_MEMORY; + } + + template + RpsResult AddPhase(TArgs&&... args) + { + T* pPhase = m_persistentArena.New(args...); + RPS_CHECK_ALLOC(pPhase); + + return AddPhase(pPhase); + } + + RpsResult AddPhase(IRenderGraphPhase* pPhase) + { + RPS_RETURN_ERROR_IF(!m_phases.push_back(pPhase), RPS_ERROR_OUT_OF_MEMORY); + + if (m_pBackend == nullptr) + { + m_pBackend = pPhase->AsRuntimeBackend(); + } + + return RPS_OK; + } + + RuntimeBackend* GetRuntimeBackend() const + { + return m_pBackend; + } + + template + T* FrameAlloc() + { + return static_cast(m_frameArena.AlignedAlloc(sizeof(T), alignof(T))); + } + + public: + void PrintCmdNodeName(PrinterRef printer, NodeId id) const + { + RPS_ASSERT(id < m_cmds.size()); + RPS_ASSERT(m_graph.GetNodes().empty() || (id == m_graph.GetNode(id)->GetCmdId())); + + auto pNodeDecl = m_cmds[id].pNodeDecl; + if (pNodeDecl) + { + printer("%*s_%d", pNodeDecl->name.len, pNodeDecl->name.str, id); + } + else + { + printer("n_%d", id); + } + } + + void PrintTransitionNodeName(PrinterRef printer, NodeId id) const + { + RPS_ASSERT(id >= m_cmds.size()); + + printer("t_%d", m_graph.GetNode(id)->GetTransitionId()); + } + + private: + const Device& m_device; + RpsRenderGraphCreateInfo m_createInfo; + Arena m_persistentArena; + Arena m_frameArena; + Arena m_scratchArena; + Graph m_graph; + RpsResult m_status = RPS_OK; + + const RenderGraphSignature* m_pSignature = nullptr; + Subprogram* m_pMainEntry = nullptr; + + ConstArrayRef m_memoryTypes; + + ArenaVector m_phases; + ArenaVector m_resourceCache; + ArenaVector m_programInstances; + ArenaVector m_cmds; + ArenaVector m_cmdAccesses; + ArenaVector m_transitions; + ArenaVector m_resourceFinalAccesses; + + RuntimeBackend* m_pBackend = nullptr; + + ArenaVector m_runtimeCmdInfos; + ArenaVector m_cmdBatches; + ArenaVector m_cmdBatchWaitFenceIds; + ArenaVector m_aliasingInfos; + ArenaVector m_heaps; + + ArenaFreeListPool m_resourceClearValues; + + RenderGraphBuilder m_builder; + + //Diagnostics cache + struct + { + ArenaVector resourceInfos; + ArenaVector cmdInfos; + ArenaVector heapInfos; + } m_diagData; + + Arena m_diagInfoArena; + + friend class RenderGraphBuilder; + }; + + RPS_ASSOCIATE_HANDLE(RenderGraph); + + class RenderGraphPhaseWrapper final : public IRenderGraphPhase + { + public: + RenderGraphPhaseWrapper(const RpsRenderGraphPhaseInfo& phaseInfo) + : m_phaseInfo(phaseInfo) + { + } + + virtual RpsResult Run(RenderGraphUpdateContext& context) override final + { + return m_phaseInfo.pfnRun(ToHandle(&context.renderGraph), context.pUpdateInfo, m_phaseInfo.hPhase); + } + + virtual ~RenderGraphPhaseWrapper() + { + m_phaseInfo.pfnDestroy(m_phaseInfo.hPhase); + } + + private: + RpsRenderGraphPhaseInfo m_phaseInfo; + }; + +} // namespace rps + +RPS_IMPL_OPAQUE_HANDLE(NullRuntimeHeap, RpsRuntimeHeap, void); + +#endif //RPS_RENDER_GRAPH_H diff --git a/src/runtime/common/rps_render_graph_builder.cpp b/src/runtime/common/rps_render_graph_builder.cpp new file mode 100644 index 0000000..0acaacb --- /dev/null +++ b/src/runtime/common/rps_render_graph_builder.cpp @@ -0,0 +1,461 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "runtime/common/rps_render_graph_builder.hpp" +#include "runtime/common/rps_render_graph.hpp" +#include "runtime/common/rps_rpsl_host.hpp" + +namespace rps +{ + + RpsResult RenderGraphBuilder::Init(const RenderGraphSignature* pSignature, + Arena& persistentArena, + ProgramInstance* pRootProgramInstance) + { + const auto paramDecls = pSignature->GetParamDecls(); + m_paramData = persistentArena.NewArray(paramDecls.size()); + RPS_CHECK_ALLOC(m_paramData.size() == paramDecls.size()); + + uint32_t maxNumOutResources = 0; + for (uint32_t i = 0; i < paramDecls.size(); i++) + { + // TODO: Using elementSize as alignment for now + m_paramData[i].data = + persistentArena.AlignedAllocZeroed(paramDecls[i].GetSize(), paramDecls[i].GetElementSize()); + RPS_CHECK_ALLOC(m_paramData[i].data); + + if (paramDecls[i].IsOutputResource()) + { + RPS_ASSERT(!paramDecls[i].IsUnboundedArray() && "TODO"); + + m_paramData[i].outputResourceIds.SetRange(maxNumOutResources, paramDecls[i].GetNumElements()); + maxNumOutResources += paramDecls[i].GetNumElements(); + } + } + + m_outputResourceIds = persistentArena.NewArray(maxNumOutResources); + RPS_CHECK_ALLOC(m_outputResourceIds.size() == maxNumOutResources); + + m_pCurrProgram = pRootProgramInstance; + + m_dynamicNodeDeclIdBegin = uint32_t(pSignature->GetNodeDecls().size()); + + return RPS_OK; + } + + RpsResourceId RenderGraphBuilder::GetParamResourceId(RpsParamId paramId, uint32_t arrayIndex) const + { + RPS_RETURN_ERROR_IF(paramId >= m_paramData.size(), RPS_INDEX_NONE_U32); + RPS_RETURN_ERROR_IF(m_paramData[paramId].resources.empty(), RPS_INDEX_NONE_U32); + RPS_RETURN_ERROR_IF(m_paramData[paramId].resources.size() <= arrayIndex, RPS_INDEX_NONE_U32); + + return m_paramData[paramId].resources.GetBegin() + arrayIndex; + } + + TResult RenderGraphBuilder::GetParamVariable(RpsParamId paramId, size_t* pVariableSize) const + { + RPS_RETURN_ERROR_IF(paramId >= m_paramData.size(), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + if (pVariableSize) + { + *pVariableSize = + m_renderGraph.GetSignature().GetParamDecls()[paramId].GetElementSize(); // TODO: Handle array + } + + return m_paramData[paramId].data; + } + + RpsResult RenderGraphBuilder::Begin() + { + RPS_RETURN_ERROR_IF(m_state == State::Building, RPS_ERROR_INVALID_OPERATION); + + m_state = State::Building; + m_buildStatus = RPS_OK; + + m_explicitDependencies.reset_keep_capacity(&m_cmdArena); + + m_dynamicNodeDecls.reset_keep_capacity(&m_cmdArena); + + m_resourceDecls.reset_keep_capacity(&m_cmdArena); + m_resourceDecls.resize(m_renderGraph.GetSignature().GetMaxExternalResourceCount(), {}); + + uint32_t resOffset = 0; + auto paramDecls = m_renderGraph.GetSignature().GetParamDecls(); + + for (uint32_t iParam = 0; iParam < paramDecls.size(); iParam++) + { + const auto& paramDecl = paramDecls[iParam]; + + if (paramDecl.IsResource()) + { + auto resSlots = m_resourceDecls.range(resOffset, paramDecl.GetNumElements()); + auto& paramData = m_paramData[iParam]; + + if (!resSlots.empty()) + { + paramData.resources = {resOffset, uint32_t(resSlots.size())}; + + RpsVariable pResDescVar = paramData.data; + for (uint32_t iRes = 0, numRes = uint32_t(resSlots.size()); iRes < numRes; iRes++) + { + auto& resSlot = resSlots[iRes]; + + resSlot.desc = pResDescVar; + resSlot.name = paramDecl.name; + + pResDescVar = rpsBytePtrInc(pResDescVar, paramDecl.typeInfo.size); + + resOffset++; + } + } + } + } + + std::fill(m_outputResourceIds.begin(), m_outputResourceIds.end(), RPS_RESOURCE_ID_INVALID); + + RPS_ASSERT(resOffset == m_renderGraph.GetSignature().GetMaxExternalResourceCount()); + + return RPS_OK; + } + + RpsResult RenderGraphBuilder::End() + { + RPS_RETURN_ERROR_IF(m_state != State::Building, RPS_ERROR_INVALID_OPERATION); + + RpsResult result = m_buildStatus; + m_buildStatus = RPS_OK; + m_state = State::Closed; + + auto& cmdInfos = m_renderGraph.GetCmdInfos(); + for (auto cmdIter = cmdInfos.begin(), cmdEnd = cmdInfos.end(); cmdIter != cmdEnd; ++cmdIter) + { + cmdIter->pCmdDecl = cmdIter->IsNodeDeclBuiltIn() ? nullptr : m_cmdNodes.GetSlot(cmdIter->cmdDeclIndex); + } + + return result; + } + + void* RenderGraphBuilder::AllocateData(size_t size, size_t alignment) + { + RPS_RETURN_ERROR_IF(m_state != State::Building, nullptr); + + return m_cmdArena.AlignedAlloc(size, alignment); + } + + RpsVariable RenderGraphBuilder::DeclareVariable(size_t size, size_t alignment, const void* pInitData) + { + void* pVarData = AllocateData(size, rpsMax(alignof(uint32_t), alignment)); + if (pVarData && pInitData) + { + memcpy(pVarData, pInitData, size); + } + return pVarData; + } + + RpsNodeDeclId RenderGraphBuilder::DeclareDynamicNode(const RpsNodeDesc* pNodeDesc) + { + RPS_RETURN_ERROR_IF(pNodeDesc == nullptr, RPS_NODEDECL_ID_INVALID); + + auto* pNewNodeDecl = m_cmdArena.New(); + RPS_RETURN_ERROR_IF(pNewNodeDecl == nullptr, RPS_NODEDECL_ID_INVALID); + + RPS_RETURN_ERROR_IF(RPS_FAILED(RenderGraphSignature::InitNodeDecl(m_cmdArena, *pNodeDesc, *pNewNodeDecl)), + RPS_NODEDECL_ID_INVALID); + + return m_dynamicNodeDecls.push_back(pNewNodeDecl) + ? (m_dynamicNodeDeclIdBegin + RpsNodeDeclId(m_dynamicNodeDecls.size() - 1)) + : RPS_NODEDECL_ID_INVALID; + } + + RpsResult RenderGraphBuilder::DeclareResource(uint32_t localResourceId, + RpsVariable hDescVar, + StrRef name, + RpsResourceId* pOutResId) + { + uint32_t resourceId = GetOrAllocResourceSlot(localResourceId); + + RPS_ASSERT(resourceId >= m_renderGraph.GetSignature().GetMaxExternalResourceCount()); + + if (resourceId >= m_resourceDecls.size()) + { + m_resourceDecls.resize(resourceId + 1, {}); + } + + auto& resDecl = m_resourceDecls[resourceId]; + resDecl.desc = hDescVar; + resDecl.name = m_cmdArena.StoreStr(name); + + *pOutResId = resourceId; + + return RPS_OK; + } + + RpsResult RenderGraphBuilder::SetResourceName(RpsResourceId resourceId, StrRef name) + { + RPS_CHECK_ARGS((resourceId >= m_renderGraph.GetSignature().GetMaxExternalResourceCount()) || + (resourceId < m_resourceDecls.size())); + + auto& resDecl = m_resourceDecls[resourceId]; + resDecl.name = m_cmdArena.StoreStr(name); + + return RPS_OK; + } + + static TResult GetBuiltInCmdNodeInfo(BuiltInNodeDeclIds nodeDeclId) + { + static constexpr struct + { + BuiltInNodeDeclIds id; + const NodeDeclInfo declInfo; + } c_builtInNodeNames[] = { + {RPS_BUILTIN_NODE_INVALID, {StrRef::From("invalid")}}, + {RPS_BUILTIN_NODE_SCHEDULER_BARRIER, {StrRef::From("scheduler_barrier")}}, + {RPS_BUILTIN_NODE_SUBGRAPH_BEGIN, {StrRef::From("subgraph_begin")}}, + {RPS_BUILTIN_NODE_SUBGRAPH_END, {StrRef::From("subgraph_end")}}, + {RPS_BUILTIN_NODE_BEGIN_SUBROUTINE, {StrRef::From("subroutine_begin")}}, + {RPS_BUILTIN_NODE_END_SUBROUTINE, {StrRef::From("subroutine_end")}}, + }; + + const uint32_t tableIdx = uint32_t(-(int32_t(nodeDeclId) + 1)); + + RPS_CHECK_ARGS(tableIdx < RPS_COUNTOF(c_builtInNodeNames)); + RPS_RETURN_ERROR_IF(c_builtInNodeNames[tableIdx].id != nodeDeclId, RPS_ERROR_INTERNAL_ERROR); + + return &c_builtInNodeNames[tableIdx].declInfo; + } + + RpsResult RenderGraphBuilder::AddNode(RpslHost* pRpslHost, + RpsNodeDeclId localNodeDeclId, + ArrayRef args, + RpsNodeFlags callFlags, + uint32_t nodeLocalId, + RpsNodeId* pOutCmdId) + { + const Subprogram* pCurrProgram = m_pCurrProgram->m_pProgram; + + auto& nodeImpl = pCurrProgram->GetNodeImpl(localNodeDeclId); + + if (nodeImpl.type == Subprogram::RpslNodeImpl::Type::RpslEntry) + { + RPS_ASSERT(nodeImpl.pSubprogram->GetEntry()); + + // Dummy BeginSubroutine node + RpsNodeId beginSubroutine = RPS_CMD_ID_INVALID; + + auto subRoutineNodeDecl = GetBuiltInCmdNodeInfo(RPS_BUILTIN_NODE_BEGIN_SUBROUTINE); + RPS_V_RETURN(subRoutineNodeDecl.Result()); + + RPS_V_RETURN(AddCmdNode( + RPS_BUILTIN_NODE_BEGIN_SUBROUTINE, subRoutineNodeDecl, nodeLocalId, {}, nullptr, 0, &beginSubroutine)); + RPS_ASSERT((beginSubroutine != RPS_CMD_ID_INVALID) && "invalid RenderGraphBuilder::AddCmdNode impl"); + + // TODO: using CmdId as global persistent ProgramInstanceId for now. + // TODO: need to version nodeImpl.pSubprogram in case it's recreated at the same address. + auto pSubprogramInstanceId = &m_cmdNodes.GetSlot(beginSubroutine)->programInstanceId; + ProgramInstance* pSubprogramInstance = + m_renderGraph.GetOrCreateProgramInstance(nodeImpl.pSubprogram, *pSubprogramInstanceId); + + if (pCurrProgram->GetEntry()) + { + // Fast path, both caller and callee are RPSL functions, + // call the function directly without extra context setup. + + ScopedContext programContext(&m_pCurrProgram, pSubprogramInstance); + + (nodeImpl.pSubprogram->GetEntry()->pfnEntry)( + uint32_t(args.size()), args.data(), RPSL_ENTRY_CALL_SUBPROGRAM); + } + else + { + ScopedContext programContext(&m_pCurrProgram, pSubprogramInstance); + + RpslExecuteInfo callInfo = {}; + callInfo.pProgram = nodeImpl.pSubprogram; + callInfo.ppArgs = args.data(); + callInfo.numArgs = uint32_t(args.size()); + + // Temp - Remove pRpslHost param when making RpslHost local context. + RPS_V_RETURN(pRpslHost->Execute(callInfo)); + } + + *pOutCmdId = beginSubroutine; + } + else + { + auto pNodeDecl = pCurrProgram->GetSignature()->GetNodeDecl(localNodeDeclId); // TODO: Handle dynamic nodes + RPS_ASSERT(pNodeDecl->params.size() == args.size()); + + for (uint32_t iParam = 0, numParams = uint32_t(args.size()); iParam < numParams; iParam++) + { + auto& paramDecl = pNodeDecl->params[iParam]; + + const size_t alignment = rpsMin(paramDecl.GetElementSize(), alignof(std::max_align_t)); // TODO + const size_t paramSize = paramDecl.GetSize(); + + const void* pSrc = args[iParam]; + + args[iParam] = AllocateData(paramSize, alignment); + + // TODO: Check if we can pass the pointer to be filled on RPSL side. + memcpy(args[iParam], pSrc, paramSize); + } + + const auto& callback = ((nodeImpl.type == Subprogram::RpslNodeImpl::Type::Callback) && + (nodeImpl.callback.pfnCallback != nullptr)) + ? nodeImpl.callback + : pCurrProgram->GetDefaultNodeCallback(); + + RPS_V_RETURN(AddCmdNode( + localNodeDeclId, pNodeDecl, nodeLocalId, callback, args.data(), uint32_t(args.size()), pOutCmdId, callFlags)); + RPS_ASSERT((*pOutCmdId != RPS_CMD_ID_INVALID) && "invalid RenderGraphBuilder::AddCmdNode impl"); + } + + return RPS_OK; + } + + RpsNodeId RenderGraphBuilder::GetOrAllocCmdSlot(uint32_t localNodeId) + { + auto pCmdId = m_pCurrProgram->m_cmdIds.get_or_grow(localNodeId, RPS_CMD_ID_INVALID); + RPS_RETURN_ERROR_IF(!pCmdId, RPS_CMD_ID_INVALID); + + if (*pCmdId == RPS_CMD_ID_INVALID) + { + *pCmdId = AllocCmdSlot(); + } + + return *pCmdId; + } + + uint32_t RenderGraphBuilder::GetOrAllocResourceSlot(uint32_t localResourceId) + { + auto pResId = m_pCurrProgram->m_resourceIds.get_or_grow(localResourceId, RPS_RESOURCE_ID_INVALID); + RPS_RETURN_ERROR_IF(!pResId, RPS_RESOURCE_ID_INVALID); + + if (*pResId == RPS_RESOURCE_ID_INVALID) + { + *pResId = AllocResourceSlot(); + } + + return *pResId; + } + + uint32_t RenderGraphBuilder::AllocResourceSlot() + { + return m_resourceDeclSlots.AllocSlot() + m_renderGraph.GetSignature().GetMaxExternalResourceCount(); + } + + RpsResult RenderGraphBuilder::AddCmdNode(RpsNodeDeclId nodeDeclId, + uint32_t localNodeId, + const RpsCmdCallback& callback, + const RpsVariable* pArgs, + uint32_t numArgs, + RpsNodeId* pOutCmdId) + { + auto pNodeDecl = (nodeDeclId < m_dynamicNodeDeclIdBegin) + ? m_pCurrProgram->m_pProgram->GetSignature()->GetNodeDecl(nodeDeclId) + : m_dynamicNodeDecls[nodeDeclId - m_dynamicNodeDeclIdBegin]; + + RPS_CHECK_ARGS(pNodeDecl->params.size() == numArgs); + + return AddCmdNode(nodeDeclId, pNodeDecl, localNodeId, callback, pArgs, numArgs, pOutCmdId); + } + + RpsResult RenderGraphBuilder::AddCmdNode(RpsNodeDeclId nodeDeclId, + const NodeDeclInfo* pNodeDecl, + uint32_t localNodeId, + const RpsCmdCallback& callback, + const RpsVariable* pArgs, + uint32_t numArgs, + RpsNodeId* pOutCmdId, + RpsNodeFlags flags) + { + if (!CmdInfo::IsNodeDeclIdBuiltIn(nodeDeclId) && + !!(m_renderGraph.GetCreateInfo().renderGraphFlags & RPS_RENDER_GRAPH_DISALLOW_UNBOUND_NODES_BIT) && + !callback.pfnCallback) + { + return RPS_ERROR_UNRECOGNIZED_COMMAND; + } + + const uint32_t currCmdSlot = GetOrAllocCmdSlot(localNodeId); + + Cmd* pOp = m_cmdNodes.GetSlot(currCmdSlot); + pOp->nodeDeclId = nodeDeclId; + pOp->callback = callback; + pOp->args = m_cmdArena.NewArray(numArgs); + std::copy(pArgs, pArgs + numArgs, pOp->args.begin()); + + auto& cmdInfos = m_renderGraph.GetCmdInfos(); + + const uint32_t currNodeIdx = uint32_t(cmdInfos.size()); + CmdInfo* pCmdInfo = cmdInfos.grow(1, {}); + + pCmdInfo->nodeDeclIndex = nodeDeclId; + pCmdInfo->cmdDeclIndex = currCmdSlot; + pCmdInfo->bPreferAsync = + !!(flags & RPS_NODE_PREFER_ASYNC) || (pNodeDecl && !!(pNodeDecl->flags & RPS_NODE_DECL_PREFER_ASYNC)); + pCmdInfo->pNodeDecl = pNodeDecl; + + *pOutCmdId = currNodeIdx; + + return RPS_OK; + } + + RpsResult RenderGraphBuilder::ScheduleBarrier() + { + return AddBuiltInCmdNode(RPS_BUILTIN_NODE_SCHEDULER_BARRIER).Result(); + } + + RpsResult RenderGraphBuilder::BeginSubgraph(RpsSubgraphFlags flags) + { + TResult cmdInfoResult = AddBuiltInCmdNode(RPS_BUILTIN_NODE_SUBGRAPH_BEGIN); + RPS_V_RETURN(cmdInfoResult.Result()); + + cmdInfoResult.data->subgraphFlags = flags; + return RPS_OK; + } + + RpsResult RenderGraphBuilder::EndSubgraph() + { + return AddBuiltInCmdNode(RPS_BUILTIN_NODE_SUBGRAPH_END).Result(); + } + + TResult RenderGraphBuilder::AddBuiltInCmdNode(BuiltInNodeDeclIds nodeDeclId) + { + auto pNodeDeclInfo = GetBuiltInCmdNodeInfo(nodeDeclId); + RPS_V_RETURN(pNodeDeclInfo.Result()); + + CmdInfo* pCmdInfo = m_renderGraph.GetCmdInfos().grow(1, {}); + RPS_CHECK_ALLOC(pCmdInfo); + + pCmdInfo->nodeDeclIndex = nodeDeclId; + pCmdInfo->pNodeDecl = pNodeDeclInfo; + + return pCmdInfo; + } + + void RenderGraphBuilder::AddDependency(RpsNodeId before, RpsNodeId after) + { + m_explicitDependencies.emplace_back(NodeDependency{before, after}); + } + + RpsResult RenderGraphBuilder::SetOutputParamResourceView(RpsParamId paramId, const RpsResourceView* pViews) + { + const ParamDecl& paramDecl = GetRenderGraph().GetSignature().GetParamDecl(paramId); + RPS_RETURN_ERROR_IF(!paramDecl.IsOutputResource(), RPS_ERROR_INVALID_PROGRAM); + + auto outResIdRangeRef = m_paramData[paramId].outputResourceIds.Get(m_outputResourceIds); + + for (uint32_t iElem = 0; iElem < paramDecl.GetNumElements(); iElem++) + { + // TODO: Ignoring view info currently, only taking the resource info. + // Need to investigate passing on view info as well. + outResIdRangeRef[iElem] = pViews[iElem].resourceId; + } + + return RPS_OK; + } +} // namespace rps diff --git a/src/runtime/common/rps_render_graph_builder.hpp b/src/runtime/common/rps_render_graph_builder.hpp new file mode 100644 index 0000000..b74e417 --- /dev/null +++ b/src/runtime/common/rps_render_graph_builder.hpp @@ -0,0 +1,221 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_RENDER_GRAPH_BUILDER_H +#define RPS_RENDER_GRAPH_BUILDER_H + +#include "core/rps_core.hpp" +#include "core/rps_util.hpp" +#include "runtime/common/rps_cmd_buf.hpp" +#include "runtime/common/rps_render_graph_signature.hpp" +#include "runtime/common/rps_render_graph_resource.hpp" + +namespace rps +{ + class RenderGraph; + class RpslHost; + class ProgramInstance; + class RenderGraphSignature; + struct CmdInfo; + + class RenderGraphBuilder + { + friend class RenderGraph; + RPS_CLASS_NO_COPY_MOVE(RenderGraphBuilder); + + RenderGraphBuilder(RenderGraph& renderGraph, Arena& persistentArena, Arena& frameArena) + : m_renderGraph(renderGraph) + , m_cmdArena(frameArena) + , m_resourceDecls(&m_cmdArena) + , m_resourceDeclSlots(&persistentArena) + , m_cmdNodes(&persistentArena) + , m_explicitDependencies(&m_cmdArena) + , m_dynamicNodeDecls(&m_cmdArena) + { + } + + RpsResult Init(const RenderGraphSignature* pSignature, + Arena& persistentArena, + ProgramInstance* pRootProgramInstance); + + public: + ProgramInstance* GetCurrentProgram() const + { + return m_pCurrProgram; + } + + const RenderGraph& GetRenderGraph() const + { + return m_renderGraph; + } + + Cmd* GetCmdDecl(RpsNodeId cmdId) + { + return m_cmdNodes.GetSlot(cmdId); + } + + const Cmd* GetCmdDecl(RpsNodeId cmdId) const + { + return m_cmdNodes.GetSlot(cmdId); + } + + ConstArrayRef GetResourceDecls() const + { + return m_resourceDecls.range_all(); + } + + ConstArrayRef GetOutputParamResourceIds() const + { + return m_outputResourceIds; + } + + ConstArrayRef GetOutputParamResourceIds(RpsParamId paramId) const + { + return m_paramData[paramId].outputResourceIds.Get(m_outputResourceIds); + } + + ConstArrayRef GetExplicitDependencies() const + { + return m_explicitDependencies.range_all(); + } + + TResult GetParamVariable(RpsParamId paramId, size_t* pSize = nullptr) const; + + template + TResult GetParamVariable(RpsParamId paramId) const + { + size_t varSize = 0; + auto result = GetParamVariable(paramId, &varSize); + + RPS_RETURN_ERROR_IF(varSize != sizeof(T), RPS_ERROR_TYPE_MISMATCH); + + return result.StaticCast(); + } + + template + RpsResult SetParamVariable(RpsParamId paramId, const T& value) + { + auto pData = GetParamVariable(paramId); + if (pData) + { + *pData = value; + return RPS_OK; + } + return RPS_ERROR_INDEX_OUT_OF_BOUNDS; + } + + RpsResourceId GetParamResourceId(RpsParamId paramId, uint32_t arrayIndex = 0) const; + + RpsResult Begin(); + RpsResult End(); + void* AllocateData(size_t size, size_t alignment); + RpsVariable DeclareVariable(size_t size, size_t alignment, const void* pData = nullptr); + RpsNodeDeclId DeclareDynamicNode(const RpsNodeDesc* pNodeDesc); + RpsResult DeclareResource(uint32_t localResourceId, + RpsVariable hDescVar, + StrRef name, + RpsResourceId* pOutResId); + RpsResult SetResourceName(RpsResourceId resourceId, StrRef name); + RpsResult AddCmdNode(RpsNodeDeclId nodeDeclId, + uint32_t tag, + const RpsCmdCallback& callback, + const RpsVariable* pArgs, + uint32_t numArgs, + RpsNodeId* pOutCmdId); + RpsResult AddNode(RpslHost* pRpslHost, + RpsNodeDeclId localNodeDeclId, + ArrayRef args, + RpsNodeFlags callFlags, + uint32_t nodeLocalId, + RpsNodeId* pOutCmdId); + RpsResult ScheduleBarrier(); + RpsResult BeginSubgraph(RpsSubgraphFlags flags); + RpsResult EndSubgraph(); + void AddDependency(RpsNodeId before, RpsNodeId after); + RpsResult SetOutputParamResourceView(RpsParamId paramId, const RpsResourceView* pViews); + + RpsResult Print(const RpsPrinter* pPrinter); + + private: + void SetBuildError(RpsResult errorCode) + { + RPS_ASSERT(m_state == State::Building); + + m_state = State::Error; + m_buildStatus = errorCode; + } + + uint32_t GetOrAllocCmdSlot(uint32_t localNodeId); + uint32_t GetOrAllocResourceSlot(uint32_t localResourceId); + + uint32_t AllocCmdSlot() + { + return m_cmdNodes.AllocSlot(); + } + + void FreeCmdSlot(uint32_t cmdId) + { + m_cmdNodes.FreeSlot(cmdId); + } + + RpsResult AddCmdNode(RpsNodeDeclId nodeDeclId, + const NodeDeclInfo* pNodeDecl, + uint32_t tag, + const RpsCmdCallback& callback, + const RpsVariable* pArgs, + uint32_t numArgs, + RpsNodeId* pOutCmdId, + RpsNodeFlags flags = RPS_NODE_FLAG_NONE); + + TResult AddBuiltInCmdNode(BuiltInNodeDeclIds nodeDeclId); + + uint32_t AllocResourceSlot(); + + void FreeResourceSlot(uint32_t resourceId) + { + m_resourceDeclSlots.FreeSlot(resourceId); + } + + public: + struct RenderGraphArgInfo + { + RpsVariable data; + Span resources; + Span outputResourceIds; + }; + + private: + enum class State + { + Created, + Closed, + Building, + Error, + }; + + RenderGraph& m_renderGraph; + Arena& m_cmdArena; + State m_state = State::Created; + RpsResult m_buildStatus = RPS_OK; + + ArrayRef m_paramData; + ArrayRef m_outputResourceIds; + ArenaVector m_resourceDecls; + ArenaFreeListPool m_resourceDeclSlots; + ArenaFreeListPool m_cmdNodes; + ArenaVector m_explicitDependencies; + + ArenaVector m_dynamicNodeDecls; + uint32_t m_dynamicNodeDeclIdBegin = 0; + + ProgramInstance* m_pCurrProgram = nullptr; + }; + + RPS_ASSOCIATE_HANDLE(RenderGraphBuilder); +} + +#endif //RPS_RENDER_GRAPH_BUILDER_H diff --git a/src/runtime/common/rps_render_graph_diagnostics.cpp b/src/runtime/common/rps_render_graph_diagnostics.cpp new file mode 100644 index 0000000..b4062b0 --- /dev/null +++ b/src/runtime/common/rps_render_graph_diagnostics.cpp @@ -0,0 +1,107 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "runtime/common/rps_render_graph.hpp" + +namespace rps +{ + + void RenderGraph::GatherResourceDiagnosticInfo(RpsResourceDiagnosticInfo& dst, + const ResourceInstance& src, + uint32_t resIndex) + { + const StrRef name = (src.resourceDeclId == RPS_INDEX_NONE_U32) + ? StrRef() + : GetBuilder().GetResourceDecls()[src.resourceDeclId].name; + + dst.name = m_diagInfoArena.StoreStr(name).str; + dst.temporalChildIndex = src.temporalLayerOffset; + dst.isExternal = src.isExternal; + src.desc.Get(dst.desc); + dst.allAccesses = src.allAccesses; + dst.initialAccess = src.initialAccess; + dst.lifetimeBegin = src.lifetimeBegin; + dst.lifetimeEnd = src.lifetimeEnd; + dst.allocRequirement = src.allocRequirement; + dst.allocPlacement = src.allocPlacement; + dst.hRuntimeResource = src.hRuntimeResource; + } + + void RenderGraph::GatherCmdDiagnosticInfo(RpsCmdDiagnosticInfo& dst, const RuntimeCmdInfo& src, uint32_t cmdIndex) + { + dst = {}; + + dst.cmdIndex = cmdIndex; + dst.isTransition = src.isTransition; + + if (src.HasTransitionInfo()) + { + const TransitionInfo& transInfo = m_transitions[src.cmdId]; + dst.transition.prevAccess = (transInfo.prevTransition != RenderGraph::INVALID_TRANSITION) + ? m_transitions[transInfo.prevTransition].access.access + : GetResourceInstance(transInfo.access.resourceId).initialAccess; + dst.transition.nextAccess = transInfo.access.access; + transInfo.access.range.Get(dst.transition.range); + dst.transition.resourceIndex = transInfo.access.resourceId; + } + } + + void RenderGraph::GatherHeapDiagnosticInfo(RpsHeapDiagnosticInfo& dst, + const HeapInfo& src) + { + dst.size = (src.size == UINT64_MAX) ? src.maxUsedSize : src.size; + dst.usedSize = src.usedSize; + dst.maxUsedSize = src.maxUsedSize; + dst.alignment = src.alignment; + dst.memoryTypeIndex = src.memTypeIndex; + dst.hRuntimeHeap = src.hRuntimeHeap; + } + + RpsResult RenderGraph::UpdateDiagCache() + { + m_diagInfoArena.Reset(); + m_diagData.resourceInfos.reset(&m_diagInfoArena); + m_diagData.cmdInfos.reset(&m_diagInfoArena); + m_diagData.heapInfos.reset(&m_diagInfoArena); + + RPS_CHECK_ALLOC(m_diagData.resourceInfos.resize(m_resourceCache.size())); + RPS_CHECK_ALLOC(m_diagData.cmdInfos.resize(m_runtimeCmdInfos.size())); + RPS_CHECK_ALLOC(m_diagData.heapInfos.resize(m_heaps.size())); + + //Resource Infos + const uint32_t numResources = uint32_t(m_resourceCache.size()); + for (uint32_t resIndex = 0; resIndex < numResources; ++resIndex) + { + RpsResourceDiagnosticInfo& writeResourceInfo = m_diagData.resourceInfos[resIndex]; + const ResourceInstance& resInstance = m_resourceCache[resIndex]; + + GatherResourceDiagnosticInfo(writeResourceInfo, resInstance, resIndex); + } + + //Command Infos + const uint32_t numTotalCmds = uint32_t(m_runtimeCmdInfos.size()); + for (uint32_t rtCmdIndex = 0; rtCmdIndex < numTotalCmds; ++rtCmdIndex) + { + const RuntimeCmdInfo& rtCmdInfo = m_runtimeCmdInfos[rtCmdIndex]; + RpsCmdDiagnosticInfo& writeCmdInfo = m_diagData.cmdInfos[rtCmdIndex]; + + GatherCmdDiagnosticInfo(writeCmdInfo, rtCmdInfo, rtCmdIndex); + } + + //Heap Infos + const uint32_t numHeaps = uint32_t(m_heaps.size()); + for (uint32_t heapIndex = 0; heapIndex < numHeaps; ++heapIndex) + { + RpsHeapDiagnosticInfo& writeHeapInfo = m_diagData.heapInfos[heapIndex]; + const HeapInfo& heapInfo = m_heaps[heapIndex]; + + GatherHeapDiagnosticInfo(writeHeapInfo, heapInfo); + } + + return RPS_OK; + } +} // namespace rps diff --git a/src/runtime/common/rps_render_graph_resource.hpp b/src/runtime/common/rps_render_graph_resource.hpp new file mode 100644 index 0000000..e00df8a --- /dev/null +++ b/src/runtime/common/rps_render_graph_resource.hpp @@ -0,0 +1,455 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_RENDER_GRAPH_RESOURCE_H +#define RPS_RENDER_GRAPH_RESOURCE_H + +#include "core/rps_core.hpp" +#include "core/rps_util.hpp" + +#include "runtime/common/rps_cmd_buf.hpp" + +#include "rps/runtime/common/rps_runtime.h" + +namespace rps +{ + struct ResourceDecl + { + RpsVariable desc; + StrRef name; + }; + + struct ResourceDescPacked + { + RpsResourceType type : 8; ///< An enumeration indicating the type (and dimension) of the resource. + uint32_t temporalLayers : 8; ///< The number of frames of temporal data. + RpsResourceFlags flags : 16; ///< A collection of RpsResourceFlagBits values. + + union + { + struct + { + uint32_t width; ///< The width of an image, or low 32 bit of the byte size of a buffer. + uint32_t height; ///< The height of an image, or high 32 bit of the byte size of a buffer. + union + { + uint32_t depth; ///< The depth of an 3D image. + uint32_t arrayLayers; ///< The number of array layers for an non-3D image. + }; + uint32_t mipLevels : 8; ///< The number of mipmap levels. + RpsFormat format : 8; ///< A platform independent format to be interepreted by the runtime. + uint32_t sampleCount : 8; ///< The number of MSAA samples of an image. + } image; + struct + { + uint32_t sizeInBytesLo; + uint32_t sizeInBytesHi; + } buffer; + }; + + ResourceDescPacked() + : type(RPS_RESOURCE_TYPE_UNKNOWN) + , temporalLayers(0) + , flags(RPS_RESOURCE_FLAG_NONE) + , image{} + { + } + + ResourceDescPacked(const RpsResourceDesc& desc) + { + type = desc.type; + temporalLayers = (desc.temporalLayers == 0) ? 1 : desc.temporalLayers; + flags = desc.flags; + + if (desc.type == RPS_RESOURCE_TYPE_BUFFER) + { + buffer.sizeInBytesLo = desc.buffer.sizeInBytesLo; + buffer.sizeInBytesHi = desc.buffer.sizeInBytesHi; + } + else + { + image.width = desc.image.width; + image.height = desc.image.height; + + if (desc.type == RPS_RESOURCE_TYPE_IMAGE_3D) + { + image.depth = desc.image.depth; + } + else + { + image.arrayLayers = desc.image.arrayLayers; + } + + image.mipLevels = desc.image.mipLevels; + image.format = desc.image.format; + image.sampleCount = desc.image.sampleCount; + } + } + + bool operator==(const ResourceDescPacked& rhs) const + { + return (type == rhs.type) && (temporalLayers == rhs.temporalLayers) && (flags == rhs.flags) && + (IsImage() ? ((image.width == rhs.image.width) && (image.height == rhs.image.height) && + (image.depth == rhs.image.depth) && (image.mipLevels == rhs.image.mipLevels) && + (image.format == rhs.image.format) && (image.sampleCount == rhs.image.sampleCount)) + : ((buffer.sizeInBytesLo == rhs.buffer.sizeInBytesLo) && + (buffer.sizeInBytesHi == rhs.buffer.sizeInBytesHi))); + } + + bool operator!=(const ResourceDescPacked& rhs) const + { + return !(*this == rhs); + } + + bool IsBuffer() const + { + return type == RPS_RESOURCE_TYPE_BUFFER; + } + + bool IsImage() const + { + return (type == RPS_RESOURCE_TYPE_IMAGE_1D) || (type == RPS_RESOURCE_TYPE_IMAGE_2D) || + (type == RPS_RESOURCE_TYPE_IMAGE_3D); + } + + uint64_t GetBufferSize() const + { + RPS_ASSERT(IsBuffer()); + return (uint64_t(buffer.sizeInBytesHi) << 32u) | buffer.sizeInBytesLo; + } + + void SetBufferSize(uint64_t newSize) + { + RPS_ASSERT(IsBuffer()); + buffer.sizeInBytesLo = uint32_t(newSize & UINT32_MAX); + buffer.sizeInBytesHi = uint32_t(newSize >> 32u); + } + + RpsFormat GetFormat() const + { + return IsBuffer() ? RPS_FORMAT_UNKNOWN : image.format; + } + + uint32_t GetSampleCount() const + { + return IsBuffer() ? 1 : image.sampleCount; + } + + uint32_t GetImageDepth() const + { + return (type == RPS_RESOURCE_TYPE_IMAGE_3D) ? image.depth : 1; + } + + uint32_t GetImageArrayLayers() const + { + return ((type == RPS_RESOURCE_TYPE_IMAGE_1D) || (type == RPS_RESOURCE_TYPE_IMAGE_2D)) ? image.arrayLayers + : 1; + } + + void Get(RpsResourceDesc& unpacked) const + { + unpacked.type = type; + unpacked.temporalLayers = temporalLayers; + unpacked.flags = flags; + + if (type == RPS_RESOURCE_TYPE_BUFFER) + { + unpacked.buffer.sizeInBytesLo = buffer.sizeInBytesLo; + unpacked.buffer.sizeInBytesHi = buffer.sizeInBytesHi; + } + else + { + unpacked.image.width = image.width; + unpacked.image.height = image.height; + + if (type == RPS_RESOURCE_TYPE_IMAGE_3D) + { + unpacked.image.depth = image.depth; + } + else + { + unpacked.image.arrayLayers = image.arrayLayers; + } + + unpacked.image.mipLevels = image.mipLevels; + unpacked.image.format = image.format; + unpacked.image.sampleCount = image.sampleCount; + } + } + + PrinterRef Print(PrinterRef printer) const + { + static constexpr const char* typeNames[] = {"unknown", "buffer", "tex1D", "tex2D", "tex3D"}; + + static_assert(RPS_COUNTOF(typeNames) == RPS_RESOURCE_TYPE_COUNT, + "ResourceDescPacked::Print - typeNames need update"); + + printer("type : %s", typeNames[(type < RPS_RESOURCE_TYPE_COUNT) ? type : RPS_RESOURCE_TYPE_UNKNOWN]); + + switch (type) + { + case RPS_RESOURCE_TYPE_BUFFER: + printer(", num_bytes : 0x%p", uintptr_t(GetBufferSize())); + break; + case RPS_RESOURCE_TYPE_IMAGE_1D: + printer("( %u x 1 ), fmt : %s, array : %u, mip : %u", + image.width, + rpsFormatGetName(image.format), + image.arrayLayers, + image.mipLevels); + break; + case RPS_RESOURCE_TYPE_IMAGE_2D: + printer("( %u x %u ), fmt : %s, array : %u, mip : %u, samples : %u", + image.width, + image.height, + rpsFormatGetName(image.format), + image.arrayLayers, + image.mipLevels, + image.sampleCount); + break; + case RPS_RESOURCE_TYPE_IMAGE_3D: + printer("( %u x %u x %u ), fmt : %s, mip : %u", + image.width, + image.height, + image.depth, + rpsFormatGetName(image.format), + image.mipLevels); + break; + default: + break; + } + + static const NameValuePair resFlagNames[] = { + RPS_INIT_NAME_VALUE_PAIR_PREFIXED_POSTFIXED(RPS_RESOURCE_FLAG_, CUBEMAP_COMPATIBLE, _BIT), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED_POSTFIXED(RPS_RESOURCE_FLAG_, ROWMAJOR_IMAGE, _BIT), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED_POSTFIXED(RPS_RESOURCE_FLAG_, PREFER_GPU_LOCAL_CPU_VISIBLE, _BIT), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED_POSTFIXED(RPS_RESOURCE_FLAG_, PREFER_DEDICATED_ALLOCATION, _BIT), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED_POSTFIXED(RPS_RESOURCE_FLAG_, PERSISTENT, _BIT), + }; + + if (flags != RPS_RESOURCE_FLAG_NONE) + { + printer(", flags("); + printer.PrintFlags(flags, resFlagNames); + printer(")"); + } + + return printer; + } + }; + + static_assert(sizeof(ResourceDescPacked) == sizeof(uint32_t) * 5, "Unexpected packing of ResourceDescPacked"); + + struct SubresourceRangePacked + { + uint32_t aspectMask : 8; ///< The aspect mask. + uint32_t baseArrayLayer : 24; ///< The first layer accessible to the view. + uint32_t arrayLayerEnd : 22; ///< The number of layers. + uint32_t baseMipLevel : 5; ///< The base mipmapping level for the resource access. + uint32_t mipLevelEnd : 5; ///< The number of mipmap levels accessible to the view. + + SubresourceRangePacked() + : SubresourceRangePacked(1, 0, 1, 0, 1) + { + } + + constexpr SubresourceRangePacked(uint32_t aspectMask, const RpsSubresourceRange& range) + : SubresourceRangePacked(aspectMask, + range.baseArrayLayer, + range.baseArrayLayer + range.arrayLayers, + range.baseMipLevel, + range.baseMipLevel + range.mipLevels) + { + } + + constexpr SubresourceRangePacked(uint32_t inAspectMask, + uint32_t inBaseArrayLayer, + uint32_t inArrayLayerEnd, + uint32_t inBaseMipLevel, + uint32_t inMipLevelEnd) + : aspectMask(inAspectMask) + , baseArrayLayer(inBaseArrayLayer) + , arrayLayerEnd(inArrayLayerEnd) + , baseMipLevel(inBaseMipLevel) + , mipLevelEnd(inMipLevelEnd) + { + RPS_ASSERT(arrayLayerEnd > baseArrayLayer); + RPS_ASSERT(mipLevelEnd > baseMipLevel); + } + + constexpr SubresourceRangePacked(uint32_t aspectMask, + const RpsSubresourceRange& range, + const ResourceDescPacked& resourceDesc) + : SubresourceRangePacked( + aspectMask, + range.baseArrayLayer, + (range.arrayLayers != UINT32_MAX) ? (range.baseArrayLayer + range.arrayLayers) + : (resourceDesc.GetImageArrayLayers() - range.baseArrayLayer), + range.baseMipLevel, + (range.mipLevels != UINT16_MAX) ? (range.baseMipLevel + range.mipLevels) + : (resourceDesc.image.mipLevels - range.baseMipLevel)) + { + } + + void Get(RpsSubresourceRange& unpacked) const + { + unpacked.baseArrayLayer = baseArrayLayer; + unpacked.arrayLayers = GetArrayLayerCount(); + unpacked.baseMipLevel = baseMipLevel; + unpacked.mipLevels = GetMipLevelCount(); + } + + uint32_t GetArrayLayerCount() const + { + return arrayLayerEnd - baseArrayLayer; + } + + uint32_t GetMipLevelCount() const + { + return mipLevelEnd - baseMipLevel; + } + + uint32_t GetNumSubresources() const + { + return rpsCountBits(aspectMask) * GetArrayLayerCount() * GetMipLevelCount(); + } + + void Print(PrinterRef printer) const + { + printer("plane_mask 0x%x, array[ %u - %u ], mips[ %u - %u ]", + aspectMask, + baseArrayLayer, + arrayLayerEnd - 1, + baseMipLevel, + mipLevelEnd - 1); + } + + bool operator==(const SubresourceRangePacked& rhs) const + { + return (aspectMask == rhs.aspectMask) && (baseArrayLayer == rhs.baseArrayLayer) && + (arrayLayerEnd == rhs.arrayLayerEnd) && (baseMipLevel == rhs.baseMipLevel) && + (mipLevelEnd == rhs.mipLevelEnd); + } + + bool operator!=(const SubresourceRangePacked& rhs) const + { + return !(*this == rhs); + } + + static bool Intersect(const SubresourceRangePacked& lhs, const SubresourceRangePacked& rhs) + { + return !((lhs.baseMipLevel >= rhs.mipLevelEnd) || (rhs.baseMipLevel >= lhs.mipLevelEnd) || + (lhs.baseArrayLayer >= rhs.arrayLayerEnd) || (rhs.baseArrayLayer >= lhs.arrayLayerEnd) || + ((lhs.aspectMask & rhs.aspectMask) == 0)); + } + + enum + { + // Max remaining ranges is 5 from the 2.5D clipping - 1 for aspects, 2 for array layers and 2 for mips. + MAX_CLIP_COMPLEMENTS = 5, + }; + + static bool Clip(const SubresourceRangePacked& lhs, + const SubresourceRangePacked& rhs, + SubresourceRangePacked outComplements[MAX_CLIP_COMPLEMENTS], + uint32_t* pNumOutComplement, + SubresourceRangePacked* pOutUnionPart) + { + if (!Intersect(lhs, rhs)) + { + return false; + } + + uint32_t numComplements = 0; + + // + // Planes + // + + // Adding planes not included in other range to complement list + uint32_t complementPlaneMask = (lhs.aspectMask & (~rhs.aspectMask)); + if (complementPlaneMask != 0) + { + outComplements[numComplements++] = SubresourceRangePacked( + complementPlaneMask, lhs.baseArrayLayer, lhs.arrayLayerEnd, lhs.baseMipLevel, lhs.mipLevelEnd); + } + + const uint32_t intersectPlaneMask = (lhs.aspectMask & rhs.aspectMask); + + // + // Array Slices + // + uint32_t intersectbaseArrayLayer = lhs.baseArrayLayer; + uint32_t intersectArrayLayerEnd = lhs.arrayLayerEnd; + + if (rhs.baseArrayLayer > lhs.baseArrayLayer) + { + outComplements[numComplements++] = SubresourceRangePacked( + intersectPlaneMask, lhs.baseArrayLayer, rhs.baseArrayLayer, lhs.baseMipLevel, lhs.mipLevelEnd); + + intersectbaseArrayLayer = rhs.baseArrayLayer; + } + + if (intersectArrayLayerEnd > rhs.arrayLayerEnd) + { + outComplements[numComplements++] = SubresourceRangePacked( + intersectPlaneMask, rhs.arrayLayerEnd, lhs.arrayLayerEnd, lhs.baseMipLevel, lhs.mipLevelEnd); + + intersectArrayLayerEnd = rhs.arrayLayerEnd; + } + + // + // Mips + // + uint32_t intersectBaseMip = lhs.baseMipLevel; + uint32_t intersectMipUpperBound = lhs.mipLevelEnd; + + if (rhs.baseMipLevel > lhs.baseMipLevel) + { + outComplements[numComplements++] = SubresourceRangePacked(intersectPlaneMask, + intersectbaseArrayLayer, + intersectArrayLayerEnd, + lhs.baseMipLevel, + rhs.baseMipLevel); + + intersectBaseMip = rhs.baseMipLevel; + } + + if (lhs.mipLevelEnd > rhs.mipLevelEnd) + { + outComplements[numComplements++] = SubresourceRangePacked(intersectPlaneMask, + intersectbaseArrayLayer, + intersectArrayLayerEnd, + rhs.mipLevelEnd, + lhs.mipLevelEnd); + + intersectMipUpperBound = rhs.mipLevelEnd; + } + + // + // Union + // + if (pOutUnionPart) + { + *pOutUnionPart = SubresourceRangePacked(intersectPlaneMask, + intersectbaseArrayLayer, + intersectArrayLayerEnd, + intersectBaseMip, + intersectMipUpperBound); + } + + if (pNumOutComplement) + { + *pNumOutComplement = numComplements; + } + + return true; + } + }; +} // namespace rps + +#endif //RPS_RENDER_GRAPH_RESOURCE_H diff --git a/src/runtime/common/rps_render_graph_signature.hpp b/src/runtime/common/rps_render_graph_signature.hpp new file mode 100644 index 0000000..39e848c --- /dev/null +++ b/src/runtime/common/rps_render_graph_signature.hpp @@ -0,0 +1,960 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_RENDER_GRAPH_SIGNATURE_HPP +#define RPS_RENDER_GRAPH_SIGNATURE_HPP + + + +#include "core/rps_core.hpp" +#include "core/rps_util.hpp" +#include "runtime/common/rps_cmd_buf.hpp" + +#include "rps/runtime/common/rps_runtime.h" + +namespace rps +{ + static constexpr bool IsFixedFunctionResourceBindingSemantic(RpsSemantic semantic) + { + return (semantic >= RPS_SEMANTIC_RESOURCE_BINDING_BEGIN) && (semantic < RPS_SEMANTIC_USER_RESOURCE_BINDING); + } + + static constexpr bool IsDynamicRenderStateSemantic(RpsSemantic semantic) + { + return (semantic >= RPS_SEMANTIC_DYNAMIC_STATE_BEGIN) && (semantic < RPS_SEMANTIC_RESOURCE_BINDING_BEGIN); + } + + static inline RpsAccessAttr GetAccessAttrFromParamAttrList(RpsConstant attrListConst) + { + if (!attrListConst) + { + return {}; + } + + const ParamAttrList& attrList = *static_cast(attrListConst); + + RpsAccessAttr accessAttr = attrList.access; + + // Infer access from semantics: + if (IsFixedFunctionResourceBindingSemantic(attrList.semantic.semantic)) + { + static const RpsAccessFlags c_semanticToAccessMap[] = { + RPS_ACCESS_VERTEX_BUFFER_BIT, // RPS_SEMANTIC_VERTEX_BUFFER + RPS_ACCESS_INDEX_BUFFER_BIT, // RPS_SEMANTIC_INDEX_BUFFER + RPS_ACCESS_INDIRECT_ARGS_BIT, // RPS_SEMANTIC_INDIRECT_ARGS + RPS_ACCESS_INDIRECT_ARGS_BIT, // RPS_SEMANTIC_INDIRECT_COUNT + RPS_ACCESS_STREAM_OUT_BIT, // RPS_SEMANTIC_STREAM_OUT_BUFFER + RPS_ACCESS_RENDER_TARGET_BIT, // RPS_SEMANTIC_RENDER_TARGET + RPS_ACCESS_DEPTH_WRITE_BIT | RPS_ACCESS_STENCIL_WRITE_BIT, // RPS_SEMANTIC_DEPTH_STENCIL_TARGET + RPS_ACCESS_SHADING_RATE_BIT, // RPS_SEMANTIC_SHADING_RATE_IMAGE + RPS_ACCESS_RENDER_TARGET_BIT | RPS_ACCESS_RESOLVE_DEST_BIT, // RPS_SEMANTIC_RESOLVE_TARGET + }; + + + RpsAccessFlags assumedAccessFlagsFromSemantic = + c_semanticToAccessMap[attrList.semantic.semantic - RPS_SEMANTIC_RESOURCE_BINDING_BEGIN]; + + // Avoid adding additional write access if readonly access is explicitly specified for depth/stencil: + // If no access attributes are explicitly specified, RPS_SEMANTIC_DEPTH_STENCIL_TARGET / SV_DepthStencil + // semantic will implicitly set (RPS_ACCESS_DEPTH_WRITE_BIT | RPS_ACCESS_STENCIL_WRITE_BIT) access. + // But if user explicitly specified any readonly access, we need to avoid adding the WRITE access bit + // to the same image aspect. + if (accessAttr.accessFlags & RPS_ACCESS_DEPTH_READ_BIT) + assumedAccessFlagsFromSemantic &= ~RPS_ACCESS_DEPTH_WRITE_BIT; + + if (accessAttr.accessFlags & RPS_ACCESS_STENCIL_READ_BIT) + assumedAccessFlagsFromSemantic &= ~RPS_ACCESS_STENCIL_WRITE_BIT; + + accessAttr.accessFlags |= assumedAccessFlagsFromSemantic; + + accessAttr.accessStages = RPS_SHADER_STAGE_NONE; + } + else if (attrList.semantic.semantic >= RPS_SEMANTIC_USER_RESOURCE_BINDING) + { + RPS_TODO("Translate bindings to access flags"); + } + + return accessAttr; + } + + static inline RpsSemanticAttr GetSemanticAttrFromParamAttrList(RpsConstant attrListConst) + { + if (!attrListConst) + { + return {}; + } + + return static_cast(attrListConst)->semantic; + } + + static inline RpsNodeDeclFlags GetRequiredQueueFlagsFromAccessAttr(const RpsNodeDeclFlags nodeDeclFlag, + const RpsAccessAttr& access) + { + static constexpr RpsAccessFlags GfxComputeOnlyAccessMask = + (RPS_ACCESS_ALL_GPU & ~(RPS_ACCESS_COPY_SRC_BIT | RPS_ACCESS_COPY_DEST_BIT)); + + static constexpr RpsAccessFlags GfxOnlyAccessMask = + (RPS_ACCESS_ALL_GPU & ~(RPS_ACCESS_INDIRECT_ARGS_BIT | RPS_ACCESS_CONSTANT_BUFFER_BIT | + RPS_ACCESS_RAYTRACING_AS_BUILD_BIT | RPS_ACCESS_RAYTRACING_AS_READ_BIT)); + + static constexpr RpsShaderStageFlags GfxShaderStages = (RPS_SHADER_STAGE_ALL & (~RPS_SHADER_STAGE_CS)); + + static constexpr RpsAccessFlags ShaderStageDependentAccessMask = + RPS_ACCESS_SHADER_RESOURCE_BIT | RPS_ACCESS_UNORDERED_ACCESS_BIT; + + if ((access.accessFlags & RPS_ACCESS_ALL_GPU) == 0) + { + return RPS_NODE_DECL_FLAG_NONE; + } + + if ((access.accessFlags & GfxComputeOnlyAccessMask) == 0) + { + return RPS_NODE_DECL_COPY_BIT; + } + + // For SRV | UAV access, the queue requirements depend on the shader stages. + // Also if user specified a compute node type but included GFX shader stages in the shader stage mask, + // we still treat it as compute node. This is because legacy "uav" access includes both CS & PS stages. + if (((access.accessFlags & GfxOnlyAccessMask) == 0) || + ((access.accessFlags & ShaderStageDependentAccessMask) && + (!(access.accessStages & GfxShaderStages) || (nodeDeclFlag & RPS_NODE_DECL_COMPUTE_BIT)))) + { + return RPS_NODE_DECL_COMPUTE_BIT; + } + + return RPS_NODE_DECL_GRAPHICS_BIT; + } + + struct ParamDecl + { + StrRef name = {}; + RpsTypeInfo typeInfo = {}; + uint32_t numElements = {}; + RpsParameterFlags flags : 30; + uint32_t isArray : 1; + uint32_t isUnboundedArray : 1; + RpsAccessAttr access = {}; + + ParamDecl() + : flags(RPS_PARAMETER_FLAG_NONE) + , isArray(RPS_FALSE) + , isUnboundedArray(RPS_FALSE) + { + } + + ParamDecl(Arena& allocator, const RpsParameterDesc& desc) + : name(allocator.StoreCStr(desc.name)) + , typeInfo(desc.typeInfo) + , numElements((desc.arraySize == UINT32_MAX) ? 0 : ((desc.arraySize == 0) ? 1 : desc.arraySize)) + , flags(desc.flags) + , isArray(desc.arraySize != 0) + , isUnboundedArray(desc.arraySize == UINT32_MAX) + , access(GetAccessAttrFromParamAttrList(desc.attr)) + { + } + + bool IsOptional() const + { + return rpsAnyBitsSet(flags, RPS_PARAMETER_FLAG_OPTIONAL_BIT); + } + + bool IsResource() const + { + return rpsAnyBitsSet(flags, RPS_PARAMETER_FLAG_RESOURCE_BIT); + } + + bool IsOutputResource() const + { + return rpsAllBitsSet(flags, RPS_PARAMETER_FLAG_RESOURCE_BIT | RPS_PARAMETER_FLAG_OUT_BIT); + } + + bool IsArray() const + { + return isArray; + } + + bool IsUnboundedArray() const + { + return isUnboundedArray; + } + + uint32_t GetNumElements() const + { + return numElements; + } + + size_t GetElementSize() const + { + return typeInfo.size; + } + + size_t GetSize() const + { + return numElements * GetElementSize(); + } + + void GetDesc(RpsParameterDesc* pDesc) const + { + pDesc->typeInfo = typeInfo; + pDesc->arraySize = IsUnboundedArray() ? UINT32_MAX : (IsArray() ? GetNumElements() : 0); + pDesc->attr = nullptr; + pDesc->name = name.str; // TODO - Make sure this is null terminated + pDesc->flags = flags; + } + }; + + struct NodeParamDecl : public ParamDecl + { + RpsSemantic semantic = {}; + uint32_t baseSemanticIndex = 0; + uint32_t accessOffset = 0; + + NodeParamDecl() + { + } + + NodeParamDecl(Arena& allocator, const RpsParameterDesc& desc, uint32_t* pNumAccessesInNode) + : ParamDecl(allocator, desc) + { + if (desc.attr) + { + const RpsAccessAttr accessAttr = GetAccessAttrFromParamAttrList(desc.attr); + const RpsSemanticAttr semanticAttr = GetSemanticAttrFromParamAttrList(desc.attr); + + access = accessAttr; + semantic = semanticAttr.semantic; + accessOffset = *pNumAccessesInNode; + baseSemanticIndex = semanticAttr.semanticIndex; + + if (accessAttr.accessFlags != RPS_ACCESS_UNKNOWN) + { + *pNumAccessesInNode += GetNumElements(); + } + } + } + }; + + struct ParamSemanticsKindInfo + { + RpsSemantic semantic; + Span params; + }; + + struct ParamElementRef + { + RpsParamId paramId; + uint32_t arrayOffset; + }; + + struct NodeDeclRenderPassInfo + { + ParamElementRef* paramRefs; + uint32_t numParamRefs : 8; + uint32_t renderTargetsMask : 8; + uint32_t resolveTargetsMask : 8; + uint32_t renderTargetClearMask : 8; + uint32_t depthStencilTargetMask : 1; + uint32_t clearDepth : 1; + uint32_t clearStencil : 1; + uint32_t resolveDepthStencil : 1; + uint32_t resolveTargetRefs : 4; + uint32_t clearOnly : 1; + uint32_t clearValueRefs : 7; + uint32_t viewportRefs : 8; + uint32_t scissorRectRefs : 8; + + uint32_t GetRenderTargetsCount() const + { + return rpsCountBits(renderTargetsMask); + } + + uint32_t GetRenderTargetsSlotCount() const + { + return 32 - rpsFirstBitHigh(renderTargetsMask); + } + + uint32_t GetResolveTargetCount() const + { + return rpsCountBits(resolveTargetsMask); + } + + ArrayRef GetRenderTargetRefs() const + { + return {paramRefs, GetRenderTargetsCount()}; + } + + ParamElementRef* GetDepthStencilRef() const + { + return depthStencilTargetMask ? paramRefs + GetRenderTargetsCount() : nullptr; + } + + ArrayRef GetResolveTargetRefs() const + { + return {paramRefs + resolveTargetRefs, GetResolveTargetCount()}; + } + + uint32_t GetRenderTargetClearCount() const + { + return rpsCountBits(renderTargetClearMask); + } + + ArrayRef GetRenderTargetClearValueRefs() const + { + return {paramRefs + clearValueRefs, GetRenderTargetClearCount()}; + } + + ParamElementRef* GetDepthClearValueRef() const + { + return clearDepth ? (paramRefs + clearValueRefs + GetRenderTargetClearCount()) : nullptr; + } + + ParamElementRef* GetStencilClearValueRef() const + { + return clearStencil ? (paramRefs + clearValueRefs + GetRenderTargetClearCount() + (clearDepth ? 1 : 0)) + : nullptr; + } + + ArrayRef GetViewportRefs() const + { + return {paramRefs + viewportRefs, size_t(scissorRectRefs - viewportRefs)}; + } + + ArrayRef GetScissorRefs() const + { + return {paramRefs + scissorRectRefs, size_t(clearValueRefs - scissorRectRefs)}; + } + }; + + struct NodeDeclInfo + { + StrRef name; + ArrayRef params; + RpsNodeDeclFlags flags; + ArrayRef semanticKinds; + ArrayRef semanticParamTable; + Span dynamicStates; + Span fixedFunctionBindings; + uint32_t numAccesses; + NodeDeclRenderPassInfo* pRenderPassInfo; + + bool MaybeGraphicsNode() const + { + // Default to gfx node if no queue-flags set. + return !((flags & RPS_NODE_DECL_COMPUTE_BIT) || (flags & RPS_NODE_DECL_COPY_BIT)); + } + }; + + class RenderGraphSignature + { + RPS_CLASS_NO_COPY_MOVE(RenderGraphSignature); + + RenderGraphSignature(Arena& allocator) + : m_allocator(allocator) + { + } + + public: + static RpsResult Create(Arena& allocator, + const RpsRenderGraphSignatureDesc* pSignatureDesc, + RenderGraphSignature** ppSignature) + { + RPS_CHECK_ARGS(pSignatureDesc); + RPS_CHECK_ARGS(ppSignature); + + void* pMem = allocator.Alloc(); + + RenderGraphSignature* pSignature = new (pMem) RenderGraphSignature(allocator); + + RPS_CHECK_ALLOC(pSignature); + + RpsResult result = pSignature->Init(*pSignatureDesc); + + if (RPS_SUCCEEDED(result)) + { + *ppSignature = pSignature; + } + else + { + pSignature->Destroy(); + } + + return result; + } + + void Destroy() + { + // Nothing to do atm. + this->~RenderGraphSignature(); + } + + ConstArrayRef GetNodeDecls() const + { + return m_nodeDecls; + } + + const NodeDeclInfo* GetNodeDecl(RpsNodeDeclId nodeDeclId) const + { + return (nodeDeclId < m_nodeDecls.size()) ? &m_nodeDecls[nodeDeclId] : nullptr; + } + + uint32_t FindNodeDeclIndexByName(const StrRef name) const + { + auto iter = std::find_if(m_nodeDecls.begin(), m_nodeDecls.end(), [&](const auto& nodeDecl) { + return ((0 == strncmp(nodeDecl.name.str, name.str, name.len)) && (nodeDecl.name.str[name.len] == '\0')); + }); + return (iter != m_nodeDecls.end()) ? uint32_t(iter - m_nodeDecls.begin()) : RPS_INDEX_NONE_U32; + } + + ConstArrayRef GetParamDecls() const + { + return m_paramDecls; + } + + const ParamDecl& GetParamDecl(RpsParamId paramId) const + { + return m_paramDecls[paramId]; + } + + uint32_t GetMaxExternalResourceCount() const + { + return m_maxExternalResources; + } + + // TODO: Remove when external resource access is provided by caller + RpsParamId GetResourceParamId(RpsResourceId resourceId) const + { + return (resourceId < m_externalResourceParamIds.size()) ? m_externalResourceParamIds[resourceId] + : RPS_PARAM_ID_INVALID; + } + + static RpsResult InitNodeDecl(Arena& allocator, const RpsNodeDesc& nodeDesc, NodeDeclInfo& nodeDecl) + { + auto rpsAllocator = allocator.AsRpsAllocator(); + + SortedParamSemanticList semanticList{&rpsAllocator}; + + return InitNodeDecl(allocator, nodeDesc, nodeDecl, semanticList); + } + + private: + RpsResult Init(const RpsRenderGraphSignatureDesc& signatureDesc) + { + RPS_V_RETURN(InitParams(signatureDesc)); + RPS_V_RETURN(InitNodeDeclInfos(signatureDesc)); + + return RPS_OK; + } + + RpsResult InitParams(const RpsRenderGraphSignatureDesc& signatureDesc) + { + if (signatureDesc.numParams) + { + m_paramDecls = m_allocator.NewArray( + [&](size_t idx, ParamDecl* ptr) { + new (ptr) ParamDecl(m_allocator, signatureDesc.pParamDescs[idx]); + }, + signatureDesc.numParams); + + m_maxExternalResources = signatureDesc.maxExternalResources; + + if ((signatureDesc.maxExternalResources == 0) || + (signatureDesc.maxExternalResources == UINT32_MAX)) + { + uint32_t resCount = 0; + for (uint32_t iParam = 0; iParam < signatureDesc.numParams; iParam++) + { + const auto& paramDecl = m_paramDecls[iParam]; + + const uint32_t currResCount = paramDecl.IsResource() ? paramDecl.GetNumElements() : 0; + + resCount += currResCount; + } + + m_maxExternalResources = resCount; + } + + if (m_maxExternalResources != 0) + { + m_externalResourceParamIds = m_allocator.NewArray(m_maxExternalResources); + + uint32_t resCount = 0; + for (uint32_t iParam = 0; iParam < signatureDesc.numParams; iParam++) + { + const auto& paramDecl = m_paramDecls[iParam]; + + const uint32_t currResCount = paramDecl.IsResource() ? paramDecl.GetNumElements() : 0; + + std::fill(m_externalResourceParamIds.begin() + resCount, + m_externalResourceParamIds.begin() + resCount + currResCount, + iParam); + + resCount += currResCount; + } + + RPS_ASSERT(m_maxExternalResources == resCount); + } + } + + return RPS_OK; + } + + struct ParamSemanticKey + { + uint32_t paramIndex; + RpsSemantic semantic; + uint32_t semanticIndex; + }; + + using SortedParamSemanticList = InplaceVector; + + RpsResult InitNodeDeclInfos(const RpsRenderGraphSignatureDesc& signatureDesc) + { + // TODO: For RPSL path, most of the below logic can be done in offline compiler. + + auto nodeDescs = ConstArrayRef{signatureDesc.pNodeDescs, signatureDesc.numNodeDescs}; + m_nodeDecls = m_allocator.NewArray(nodeDescs.size()); + + auto rpsAllocator = m_allocator.AsRpsAllocator(); + + SortedParamSemanticList sortedSemantics(&rpsAllocator); + + for (uint32_t iNodeDecl = 0; iNodeDecl < nodeDescs.size(); iNodeDecl++) + { + auto& nodeDesc = nodeDescs[iNodeDecl]; + auto& nodeDecl = m_nodeDecls[iNodeDecl]; + + RPS_V_RETURN(InitNodeDecl(m_allocator, nodeDesc, nodeDecl, sortedSemantics)); + } + + return RPS_OK; + } + + static inline RpsNodeDeclFlags CalcNodeDeclFlags(RpsNodeDeclFlags inFlags, RpsNodeDeclFlags requiredQueueFlags) + { + static constexpr uint32_t AllNodeQueueTypeMask = + RPS_NODE_DECL_GRAPHICS_BIT | RPS_NODE_DECL_COMPUTE_BIT | RPS_NODE_DECL_COPY_BIT; + + const RpsNodeDeclFlags combinedQueueFlags = (inFlags | requiredQueueFlags) & AllNodeQueueTypeMask; + + RpsNodeDeclFlags maxRequiredQueueFlag = RPS_NODE_DECL_FLAG_NONE; + + if (combinedQueueFlags & RPS_NODE_DECL_GRAPHICS_BIT) + maxRequiredQueueFlag = RPS_NODE_DECL_GRAPHICS_BIT; + else if (combinedQueueFlags & RPS_NODE_DECL_COMPUTE_BIT) + maxRequiredQueueFlag = RPS_NODE_DECL_COMPUTE_BIT; + else if (combinedQueueFlags & RPS_NODE_DECL_COPY_BIT) + maxRequiredQueueFlag = RPS_NODE_DECL_COPY_BIT; + + return (inFlags & (~AllNodeQueueTypeMask)) | maxRequiredQueueFlag; + } + + static RpsResult InitNodeDecl(Arena& allocator, + const RpsNodeDesc& nodeDesc, + NodeDeclInfo& nodeDecl, + SortedParamSemanticList& sortedSemantics) + { + sortedSemantics.clear(); + + nodeDecl = {}; + + uint32_t numAccessesInNode = 0; + + RpsNodeDeclFlags requiredQueueFlags = RPS_NODE_DECL_FLAG_NONE; + + nodeDecl.params = allocator.NewArray( + [&](size_t idx, NodeParamDecl* ptr) { + new (ptr) NodeParamDecl(allocator, nodeDesc.pParamDescs[idx], &numAccessesInNode); + + if (ptr->semantic != RPS_SEMANTIC_UNSPECIFIED) + { + sortedSemantics.push_back( + ParamSemanticKey{uint32_t(idx), ptr->semantic, ptr->baseSemanticIndex}); + } + + requiredQueueFlags |= GetRequiredQueueFlagsFromAccessAttr(nodeDesc.flags, ptr->access); + }, + nodeDesc.numParams); + + nodeDecl.name = allocator.StoreCStr(nodeDesc.name); + nodeDecl.flags = CalcNodeDeclFlags(nodeDesc.flags, requiredQueueFlags); + nodeDecl.numAccesses = numAccessesInNode; + + const bool bMaybeGraphicsNode = nodeDecl.MaybeGraphicsNode(); + + uint32_t renderTargetsInfoOffset = RPS_INDEX_NONE_U32; + bool bHasRenderPassInfo = false; + + if (!sortedSemantics.empty()) + { + // First sort by semantic and param index + std::sort(sortedSemantics.begin(), sortedSemantics.end(), [&](const auto& lhs, const auto& rhs) { + return (lhs.semantic == rhs.semantic) ? (lhs.paramIndex < rhs.paramIndex) + : (lhs.semantic < rhs.semantic); + }); + + // Handle RPS_SEMANTIC_INDEX_APPEND. + // TODO: Skip this for RPSL path. + uint32_t numSemanticKinds = 0; + RpsSemantic prevSemantic = RPS_SEMANTIC_UNSPECIFIED; + uint32_t nextSemanticIndex = 0; + + for (auto iter = sortedSemantics.begin(), end = sortedSemantics.end(); iter != end; ++iter) + { + if (iter->semantic != prevSemantic) + { + prevSemantic = iter->semantic; + nextSemanticIndex = 0; + numSemanticKinds++; + } + + if (iter->semanticIndex == RPS_SEMANTIC_INDEX_APPEND) + { + iter->semanticIndex = nextSemanticIndex; + } + + nextSemanticIndex = iter->semanticIndex + nodeDecl.params[iter->paramIndex].GetNumElements(); + } + + // Sort again by semantic and semantic index for backend consumption. + std::sort(sortedSemantics.begin(), sortedSemantics.end(), [&](const auto& lhs, const auto& rhs) { + return (lhs.semantic == rhs.semantic) ? (lhs.semanticIndex < rhs.semanticIndex) + : (lhs.semantic < rhs.semantic); + }); + + // TODO: Using scratch arena for now. + nodeDecl.semanticKinds = allocator.NewArray(numSemanticKinds); + nodeDecl.semanticParamTable = allocator.NewArray(sortedSemantics.size()); + + numSemanticKinds = 0; + uint32_t semanticParamRangeBegin = 0; + + for (uint32_t i = 0, numSemantics = uint32_t(sortedSemantics.size()); i < numSemantics; i++) + { + const auto& currParam = sortedSemantics[i]; + + if (((i + 1) == numSemantics) || (sortedSemantics[i + 1].semantic != currParam.semantic)) + { + auto& currSemanticKind = nodeDecl.semanticKinds[numSemanticKinds]; + + currSemanticKind.semantic = currParam.semantic; + currSemanticKind.params.SetRange(semanticParamRangeBegin, 1 + i - semanticParamRangeBegin); + + if (bMaybeGraphicsNode) + { + if (IsDynamicRenderStateSemantic(currParam.semantic)) + { + if (nodeDecl.dynamicStates.empty()) + nodeDecl.dynamicStates.SetRange(numSemanticKinds, 1); + else + nodeDecl.dynamicStates.SetEnd(numSemanticKinds + 1); + + constexpr uint32_t renderPassInfoStateMask = + (1u << RPS_SEMANTIC_VIEWPORT) | (1u << RPS_SEMANTIC_SCISSOR) | + (1u << RPS_SEMANTIC_COLOR_CLEAR_VALUE) | (1u << RPS_SEMANTIC_DEPTH_CLEAR_VALUE) | + (1u << RPS_SEMANTIC_STENCIL_CLEAR_VALUE); + + if ((1u << currParam.semantic) & renderPassInfoStateMask) + { + bHasRenderPassInfo = true; + } + } + + if (IsFixedFunctionResourceBindingSemantic(currParam.semantic)) + { + if (nodeDecl.fixedFunctionBindings.empty()) + nodeDecl.fixedFunctionBindings.SetRange(numSemanticKinds, 1); + else + nodeDecl.fixedFunctionBindings.SetEnd(numSemanticKinds + 1); + + if ((renderTargetsInfoOffset == RPS_INDEX_NONE_U32) && + ((currParam.semantic == RPS_SEMANTIC_RENDER_TARGET) || + (currParam.semantic == RPS_SEMANTIC_DEPTH_STENCIL_TARGET) || + (currParam.semantic == RPS_SEMANTIC_RESOLVE_TARGET))) + { + renderTargetsInfoOffset = numSemanticKinds; + bHasRenderPassInfo = true; + } + } + } + + numSemanticKinds++; + semanticParamRangeBegin = i + 1; + } + else if ((currParam.semanticIndex + nodeDecl.params[currParam.paramIndex].GetNumElements()) > + sortedSemantics[i + 1].semanticIndex) + { + RPS_DIAG("Semantic index range overlap"); + return RPS_ERROR_INVALID_PROGRAM; + } + + nodeDecl.semanticParamTable[i] = currParam.paramIndex; + } + + RPS_ASSERT(numSemanticKinds == nodeDecl.semanticKinds.size()); + } + + if (bHasRenderPassInfo) + { + NodeDeclRenderPassInfo* pRpInfo = {}; + RPS_V_RETURN(GatherNodeRenderPassInfo(allocator, nodeDecl, renderTargetsInfoOffset, &pRpInfo)); + + nodeDecl.pRenderPassInfo = pRpInfo; + } + + return RPS_OK; + } + + static inline RpsResult GatherNodeRenderPassInfo(Arena& allocator, + const NodeDeclInfo& nodeDeclInfo, + uint32_t renderTargetsInfoOffset, + NodeDeclRenderPassInfo** ppOutRenderPassInfo) + { + NodeDeclRenderPassInfo rpInfo = {}; + ParamElementRef paramRefs[128]; + uint32_t paramRefOffset = 0; + + auto appendParamRef = [&](RpsParamId paramId, uint32_t elementIndex) { + RPS_RETURN_ERROR_IF(paramRefOffset >= RPS_COUNTOF(paramRefs), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + paramRefs[paramRefOffset] = {paramId, elementIndex}; + paramRefOffset++; + return RPS_OK; + }; + + // Gather RT/DS/Resolve targets + if (renderTargetsInfoOffset == RPS_INDEX_NONE_U32) + { + // Special case for clear-only nodes (No SV_Target... semantics, only access flags) + + for (uint32_t paramId = 0, numParams = uint32_t(nodeDeclInfo.params.size()); paramId < numParams; + paramId++) + { + const auto& paramAccess = nodeDeclInfo.params[paramId]; + + if ((paramAccess.access.accessFlags & RPS_ACCESS_CLEAR_BIT) && + (paramAccess.access.accessFlags & + (RPS_ACCESS_RENDER_TARGET_BIT | RPS_ACCESS_DEPTH_STENCIL_WRITE)) && + (paramAccess.baseSemanticIndex == 0) && (paramAccess.GetNumElements() == 1)) + { + rpInfo.clearOnly = RPS_TRUE; + + if (paramAccess.access.accessFlags & RPS_ACCESS_RENDER_TARGET_BIT) + { + rpInfo.renderTargetsMask = 1; + } + else + { + RPS_ASSERT(paramAccess.access.accessFlags & + (RPS_ACCESS_RENDER_TARGET_BIT | RPS_ACCESS_DEPTH_STENCIL_WRITE)); + + rpInfo.depthStencilTargetMask = 1; + } + + RPS_V_RETURN(appendParamRef(paramId, 0)); + + break; + } + } + } + else + { + for (uint32_t i = renderTargetsInfoOffset; i < nodeDeclInfo.semanticKinds.size(); i++) + { + const auto& semanticKind = nodeDeclInfo.semanticKinds[i]; + + if (semanticKind.semantic == RPS_SEMANTIC_RENDER_TARGET) + { + auto params = semanticKind.params.Get(nodeDeclInfo.semanticParamTable); + + for (auto paramId : params) + { + auto& paramDecl = nodeDeclInfo.params[paramId]; + RPS_RETURN_ERROR_IF(paramDecl.baseSemanticIndex + paramDecl.GetNumElements() > + RPS_MAX_SIMULTANEOUS_RENDER_TARGET_COUNT, + RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + for (uint32_t iElem = 0; iElem < paramDecl.GetNumElements(); iElem++) + { + uint32_t semanticIndex = iElem + paramDecl.baseSemanticIndex; + if (semanticIndex < RPS_MAX_SIMULTANEOUS_RENDER_TARGET_COUNT) + { + rpInfo.renderTargetsMask |= (1u << semanticIndex); + RPS_V_RETURN(appendParamRef(paramId, iElem)); + } + } + } + } + else if (semanticKind.semantic == RPS_SEMANTIC_DEPTH_STENCIL_TARGET) + { + auto params = semanticKind.params.Get(nodeDeclInfo.semanticParamTable); + RPS_ASSERT(params.size() == 1); + + const auto& paramDecl = nodeDeclInfo.params[params.front()]; + RPS_ASSERT(paramDecl.numElements == 1); + + rpInfo.depthStencilTargetMask = 1; + RPS_V_RETURN(appendParamRef(params.front(), 0)); + } + else if (semanticKind.semantic == RPS_SEMANTIC_RESOLVE_TARGET) + { + auto params = semanticKind.params.Get(nodeDeclInfo.semanticParamTable); + + RPS_RETURN_ERROR_IF(paramRefOffset > 0xF, RPS_ERROR_INTEGER_OVERFLOW); + rpInfo.resolveTargetRefs = paramRefOffset; + + for (auto paramId : params) + { + const auto& paramDecl = nodeDeclInfo.params[paramId]; + RPS_RETURN_ERROR_IF(paramDecl.baseSemanticIndex + paramDecl.numElements > + RPS_MAX_SIMULTANEOUS_RENDER_TARGET_COUNT, + RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + for (uint32_t iElem = 0; iElem < paramDecl.numElements; iElem++) + { + const uint32_t semanticIndex = iElem + paramDecl.baseSemanticIndex; + if (semanticIndex < RPS_MAX_SIMULTANEOUS_RENDER_TARGET_COUNT) + { + rpInfo.resolveTargetsMask |= (1u << semanticIndex); + RPS_V_RETURN(appendParamRef(paramId, iElem)); + } + } + } + } + } + } + + RPS_RETURN_ERROR_IF(rpInfo.resolveTargetsMask & ~rpInfo.renderTargetsMask, RPS_ERROR_INVALID_PROGRAM); + RPS_RETURN_ERROR_IF(!rpInfo.depthStencilTargetMask && (rpInfo.clearDepth || rpInfo.clearStencil), + RPS_ERROR_INVALID_PROGRAM); + + // Gather Viewports / Scissor Rects / Clear Values + auto dynStates = nodeDeclInfo.dynamicStates.Get(nodeDeclInfo.semanticKinds); + + RPS_RETURN_ERROR_IF(rpInfo.renderTargetClearMask & ~rpInfo.renderTargetsMask, RPS_ERROR_INVALID_PROGRAM); + + rpInfo.viewportRefs = paramRefOffset; + rpInfo.scissorRectRefs = paramRefOffset; + + uint32_t numViewports = 0; + + for (auto iter = dynStates.begin(); iter != dynStates.end(); ++iter) + { + auto params = iter->params.Get(nodeDeclInfo.semanticParamTable); + + if (iter->semantic == RPS_SEMANTIC_COLOR_CLEAR_VALUE) + { + for (auto paramId : params) + { + auto& paramDecl = nodeDeclInfo.params[paramId]; + RPS_RETURN_ERROR_IF(paramDecl.baseSemanticIndex + paramDecl.numElements > + RPS_MAX_SIMULTANEOUS_RENDER_TARGET_COUNT, + RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + for (uint32_t iElement = 0; iElement < paramDecl.numElements; iElement++) + { + const uint32_t iRT = paramDecl.baseSemanticIndex + iElement; + + rpInfo.renderTargetClearMask |= (1u << iRT); + RPS_V_RETURN(appendParamRef(paramId, iElement)); + } + } + } + else if (iter->semantic == RPS_SEMANTIC_DEPTH_CLEAR_VALUE) + { + RPS_ASSERT(params.size() == 1); + + auto& paramAccessInfo = nodeDeclInfo.params[params.front()]; + RPS_ASSERT(paramAccessInfo.numElements == 1); + + rpInfo.clearDepth = RPS_TRUE; + RPS_V_RETURN(appendParamRef(params.front(), 0)); + } + else if (iter->semantic == RPS_SEMANTIC_STENCIL_CLEAR_VALUE) + { + RPS_ASSERT(params.size() == 1); + + auto& paramAccessInfo = nodeDeclInfo.params[params.front()]; + RPS_ASSERT(paramAccessInfo.numElements == 1); + + rpInfo.clearStencil = RPS_TRUE; + RPS_V_RETURN(appendParamRef(params.front(), 0)); + } + else if (iter->semantic == RPS_SEMANTIC_VIEWPORT) + { + rpInfo.viewportRefs = paramRefOffset; + + for (auto paramId : params) + { + auto& paramAccessInfo = nodeDeclInfo.params[paramId]; + + for (uint32_t iElement = 0; iElement < paramAccessInfo.numElements; iElement++) + { + RPS_V_RETURN(appendParamRef(paramId, iElement)); + } + + numViewports += paramAccessInfo.numElements; + } + + rpInfo.scissorRectRefs = paramRefOffset; + } + else if (iter->semantic == RPS_SEMANTIC_SCISSOR) + { + rpInfo.scissorRectRefs = paramRefOffset; + + for (auto paramId : params) + { + auto& paramAccessInfo = nodeDeclInfo.params[paramId]; + + for (uint32_t iElement = 0; iElement < paramAccessInfo.numElements; iElement++) + { + RPS_V_RETURN(appendParamRef(paramId, iElement)); + } + } + } + } + + rpInfo.clearValueRefs = paramRefOffset - (rpsCountBits(rpInfo.renderTargetClearMask) + + (rpInfo.clearDepth ? 1 : 0) + (rpInfo.clearStencil ? 1 : 0)); + rpInfo.numParamRefs = paramRefOffset; + + if (rpInfo.numParamRefs > 0) + { + auto pRenderPassInfo = allocator.New(rpInfo); + RPS_CHECK_ALLOC(pRenderPassInfo); + + pRenderPassInfo->paramRefs = allocator.NewArray(rpInfo.numParamRefs).data(); + RPS_CHECK_ALLOC(pRenderPassInfo->paramRefs); + + std::copy(paramRefs, paramRefs + paramRefOffset, pRenderPassInfo->paramRefs); + + *ppOutRenderPassInfo = pRenderPassInfo; + } + + return RPS_OK; + } + + private: + Arena& m_allocator; + ArrayRef m_nodeDecls; + ArrayRef m_paramDecls; + // TODO: Assuming 1:1 external resource to param element mapping: + ArrayRef m_externalResourceParamIds; + uint32_t m_maxExternalResources = 0; + uint32_t m_totalParamDataBufferSize = 0; + }; + + + // TODO: Move to somewhere proper + RPS_ASSOCIATE_HANDLE(ParamAttrList); + + inline RpsParamAttrList ParamAttrList::ToHandle(ParamAttrList* pAttrList) + { + return rps::ToHandle(pAttrList); + } + +} // namespace rps + +#endif //RPS_RENDER_GRAPH_SIGNATURE_HPP diff --git a/src/runtime/common/rps_rpsl_host.c b/src/runtime/common/rps_rpsl_host.c new file mode 100644 index 0000000..cddce17 --- /dev/null +++ b/src/runtime/common/rps_rpsl_host.c @@ -0,0 +1,499 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include +#include + +#include "rps/core/rps_api.h" + +#include "runtime/common/rps_rpsl_host.h" + +extern RpsResult RpslHostBlockMarker(uint32_t markerType, + uint32_t blockIndex, + uint32_t resourceCount, + uint32_t nodeCount, + uint32_t localLoopIndex, + uint32_t numChildren, + uint32_t parentId); + +extern RpsResult RpslHostCallNode(uint32_t nodeDeclId, + uint32_t numArgs, + void** ppArgs, + uint32_t nodeCallFlags, + uint32_t localNodeId, + uint32_t* pCmdIdOut); + +extern RpsResult RpslHostNodeDependencies(uint32_t numDeps, const uint32_t* pDeps, uint32_t dstNodeId); + +extern RpsResult RpslHostDescribeHandle(void* pOutData, + uint32_t dataSize, + const uint32_t* inHandle, + uint32_t describeOp); + +extern RpsResult RpslHostCreateResource(uint32_t type, + uint32_t flags, + uint32_t format, + uint32_t width, + uint32_t height, + uint32_t depthOrArraySize, + uint32_t mipLevels, + uint32_t sampleCount, + uint32_t sampleQuality, + uint32_t temporalLayers, + uint32_t id, + uint32_t* pOutResourceId); + +RpsResult RpslHostNameResource(uint32_t resourceHdl, const char* name, uint32_t nameLength); + +RpsResult RpslNotifyOutParamResources(uint32_t paramId, const void* pViews); + +RpsResult RpslSchedulerMarker(uint32_t opCode, uint32_t flags, const char* name, uint32_t nameLength); + +void RpslNotifyAbort(RpsResult result); + +// Thread local +#ifdef __cplusplus +#define RPS_THREAD_LOCAL thread_local +#else // #ifdef __cplusplus +#ifdef _MSC_VER +#define RPS_THREAD_LOCAL __declspec(thread) +#else +#define RPS_THREAD_LOCAL __thread +#endif // #ifdef _MSC_VER +#endif // #ifdef __cplusplus + +RPS_THREAD_LOCAL jmp_buf* tls_pJmpBuf = NULL; + +static inline void RpslAbortIfFail(RpsResult result) +{ + if (RPS_FAILED(result)) + { + RpslNotifyAbort(result); + longjmp(*tls_pJmpBuf, result); + } +} + +RpsResult RpslHostCallEntry(PFN_RpslEntry pfnEntry, uint32_t numArgs, const void* const* ppArgs) +{ + RpsResult result = RPS_OK; + + jmp_buf jmpBuf; + + jmp_buf* pPrevJmpBuf = tls_pJmpBuf; + tls_pJmpBuf = &jmpBuf; + + if ((result = setjmp(jmpBuf)) == 0) + { + pfnEntry(numArgs, ppArgs, RPSL_ENTRY_CALL_DEFAULT); + } + + tls_pJmpBuf = pPrevJmpBuf; + + return result; +} + +void ___rpsl_abort(uint32_t errorCode) +{ + RpslAbortIfFail(errorCode); +} + +uint32_t ___rpsl_node_call( + uint32_t nodeDeclId, uint32_t numArgs, void** ppArgs, uint32_t nodeCallFlags, uint32_t nodeId) +{ + uint32_t cmdId; + RpslAbortIfFail(RpslHostCallNode(nodeDeclId, numArgs, ppArgs, nodeCallFlags, nodeId, &cmdId)); + + return cmdId; +} + +void ___rpsl_node_dependencies(uint32_t numDeps, const uint32_t* pDeps, uint32_t dstNodeId) +{ + RpslAbortIfFail(RpslHostNodeDependencies(numDeps, pDeps, dstNodeId)); +} + +void ___rpsl_block_marker(uint32_t markerType, + uint32_t blockIndex, + uint32_t resourceCount, + uint32_t nodeCount, + uint32_t localLoopIndex, + uint32_t numChildren, + uint32_t parentId) +{ + RpslAbortIfFail( + RpslHostBlockMarker(markerType, blockIndex, resourceCount, nodeCount, localLoopIndex, numChildren, parentId)); +} + +void ___rpsl_scheduler_marker(uint32_t opCode, uint32_t flags, const char* name, uint32_t nameLength) +{ + RpslAbortIfFail(RpslSchedulerMarker(opCode, flags, name, nameLength)); +} + +void ___rpsl_describe_handle(void* pOutData, uint32_t dataSize, const uint32_t* inHandle, uint32_t describeOp) +{ + RpslAbortIfFail(RpslHostDescribeHandle(pOutData, dataSize, inHandle, describeOp)); +} + +uint32_t ___rpsl_create_resource(uint32_t type, + uint32_t flags, + uint32_t format, + uint32_t width, + uint32_t height, + uint32_t depthOrArraySize, + uint32_t mipLevels, + uint32_t sampleCount, + uint32_t sampleQuality, + uint32_t temporalLayers, + uint32_t id) +{ + uint32_t resourceId; + RpslAbortIfFail(RpslHostCreateResource(type, + flags, + format, + width, + height, + depthOrArraySize, + mipLevels, + sampleCount, + sampleQuality, + temporalLayers, + id, + &resourceId)); + return resourceId; +} + +void ___rpsl_name_resource(uint32_t resourceHdl, const char* name, uint32_t nameLength) +{ + RpslAbortIfFail(RpslHostNameResource(resourceHdl, name, nameLength)); +} + +void ___rpsl_notify_out_param_resources(uint32_t paramId, const void* pViews) +{ + RpslAbortIfFail(RpslNotifyOutParamResources(paramId, pViews)); +} + +#define RPS_SHADER_HOST 1 + +#include "rps_rpsl_host_dll.c" + +// DXIL Intrinsics +// TODO: Generate from hctdb +enum DXILOpCode +{ + // Binary float + FMax = 35, // returns a if a >= b, else b + FMin = 36, // returns a if a < b, else b + + // Binary int with two outputs + IMul = 41, // multiply of 32-bit operands to produce the correct full 64-bit result. + + // Binary int + IMax = 37, // IMax(a,b) returns a if a > b, else b + IMin = 38, // IMin(a,b) returns a if a < b, else b + + // Binary uint with carry or borrow + UAddc = 44, // unsigned add of 32-bit operand with the carry + USubb = 45, // unsigned subtract of 32-bit operands with the borrow + + // Binary uint with two outputs + UDiv = 43, // unsigned divide of the 32-bit operand src0 by the 32-bit operand src1. + UMul = 42, // multiply of 32-bit operands to produce the correct full 64-bit result. + + // Binary uint + UMax = 39, // unsigned integer maximum. UMax(a,b) = a > b ? a : b + UMin = 40, // unsigned integer minimum. UMin(a,b) = a < b ? a : b + + // Bitcasts with different sizes + BitcastF16toI16 = 125, // bitcast between different sizes + BitcastF32toI32 = 127, // bitcast between different sizes + BitcastF64toI64 = 129, // bitcast between different sizes + BitcastI16toF16 = 124, // bitcast between different sizes + BitcastI32toF32 = 126, // bitcast between different sizes + BitcastI64toF64 = 128, // bitcast between different sizes + + // Dot product with accumulate + Dot2AddHalf = 162, // 2D half dot product with accumulate to float + Dot4AddI8Packed = 163, // signed dot product of 4 x i8 vectors packed into i32, with accumulate to i32 + Dot4AddU8Packed = 164, // unsigned dot product of 4 x u8 vectors packed into i32, with accumulate to i32 + + // Dot + Dot2 = 54, // Two-dimensional vector dot-product + Dot3 = 55, // Three-dimensional vector dot-product + Dot4 = 56, // Four-dimensional vector dot-product + + // Double precision + LegacyDoubleToFloat = 132, // legacy fuction to convert double to float + LegacyDoubleToSInt32 = 133, // legacy fuction to convert double to int32 + LegacyDoubleToUInt32 = 134, // legacy fuction to convert double to uint32 + MakeDouble = 101, // creates a double value + SplitDouble = 102, // splits a double into low and high parts + + // Legacy floating-point + LegacyF16ToF32 = 131, // legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision) + LegacyF32ToF16 = 130, // legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision) + + // Packing intrinsics + Pack4x8 = 220, // packs vector of 4 signed or unsigned values into a packed datatype, drops or clamps unused bits + + // Quaternary + Bfi = 53, // Given a bit range from the LSB of a number, places that number of bits in another number at any offset + + // Tertiary float + FMad = 46, // floating point multiply & add + Fma = 47, // fused multiply-add + + // Tertiary int + IMad = 48, // Signed integer multiply & add + Ibfe = 51, // Integer bitfield extract + Msad = 50, // masked Sum of Absolute Differences. + + // Tertiary uint + UMad = 49, // Unsigned integer multiply & add + Ubfe = 52, // Unsigned integer bitfield extract + + // Unary float - rounding + Round_ne = 26, // floating-point round to integral float. + Round_ni = 27, // floating-point round to integral float. + Round_pi = 28, // floating-point round to integral float. + Round_z = 29, // floating-point round to integral float. + + // Unary float + Acos = + 15, // Returns the arccosine of the specified value. Input should be a floating-point value within the range of -1 to 1. + Asin = + 16, // Returns the arccosine of the specified value. Input should be a floating-point value within the range of -1 to 1 + Atan = 17, // Returns the arctangent of the specified value. The return value is within the range of -PI/2 to PI/2. + Cos = 12, // returns cosine(theta) for theta in radians. + Exp = 21, // returns 2^exponent + FAbs = 6, // returns the absolute value of the input value. + Frc = 22, // extract fracitonal component. + Hcos = 18, // returns the hyperbolic cosine of the specified value. + Hsin = 19, // returns the hyperbolic sine of the specified value. + Htan = 20, // returns the hyperbolic tangent of the specified value. + IsFinite = 10, // Returns true if x is finite, false otherwise. + IsInf = 9, // Returns true if x is +INF or -INF, false otherwise. + IsNaN = 8, // Returns true if x is NAN or QNAN, false otherwise. + IsNormal = 11, // returns IsNormal + Log = 23, // returns log base 2. + Rsqrt = 25, // returns reciprocal square root (1 / sqrt(src) + Saturate = 7, // clamps the result of a single or double precision floating point value to [0.0f...1.0f] + Sin = 13, // returns sine(theta) for theta in radians. + Sqrt = 24, // returns square root + Tan = 14, // returns tan(theta) for theta in radians. + + // Unary int + Bfrev = 30, // Reverses the order of the bits. + Countbits = 31, // Counts the number of bits in the input integer. + FirstbitLo = + 32, // Returns the location of the first set bit starting from the lowest order bit and working upward. + FirstbitSHi = 34, // Returns the location of the first set bit from the highest order bit based on the sign. + + // Unary uint + FirstbitHi = + 33, // Returns the location of the first set bit starting from the highest order bit and working downward. + + // Unpacking intrinsics + Unpack4x8 = 219, // unpacks 4 8-bit signed or unsigned values into int32 or int16 vector +}; + +uint32_t ___rpsl_dxop_binary_i32(uint32_t op, uint32_t a, uint32_t b) +{ + switch (op) + { + case IMax: + return (int32_t)a > (int32_t)b ? a : b; + case IMin: + return (int32_t)a < (int32_t)b ? a : b; + case UMax: + return a > b ? a : b; + case UMin: + return a < b ? a : b; + + default: + break; + } + + ___rpsl_abort(RPS_ERROR_NOT_IMPLEMENTED); + return 0; +} + +uint32_t RpslHostReverseBits32(uint32_t value); +uint32_t RpslHostCountBits(uint32_t value); +uint32_t RpslHostFirstBitLow(uint32_t value); +uint32_t RpslHostFirstBitHigh(uint32_t value); + +uint32_t ___rpsl_dxop_unary_i32(uint32_t op, uint32_t a) +{ + switch (op) + { + case Bfrev: + return RpslHostReverseBits32(a); + case Countbits: + return RpslHostCountBits(a); + case FirstbitLo: + return RpslHostFirstBitLow(a); + case FirstbitSHi: + return 31 - RpslHostFirstBitHigh(((int32_t)a < 0) ? ~a : a); + case FirstbitHi: + { + uint32_t fbh = RpslHostFirstBitHigh(a); + return fbh == 32 ? 0 : (31 - fbh); + } + + default: + break; + } + + ___rpsl_abort(RPS_ERROR_NOT_IMPLEMENTED); + return 0; +} + +uint32_t ___rpsl_dxop_tertiary_i32(uint32_t op, uint32_t a, uint32_t b, uint32_t c) +{ + switch (op) + { + case IMad: + return (int32_t)a * (int32_t)b + (int32_t)c; + case UMad: + return a * b + c; + case Ubfe: + case Ibfe: + case Msad: + default: + break; + } + + ___rpsl_abort(RPS_ERROR_NOT_IMPLEMENTED); + return 0; +} + +float ___rpsl_dxop_binary_f32(uint32_t op, float a, float b) +{ + switch (op) + { + case FMax: + return a >= b ? a : b; + case FMin: + return a < b ? a : b; + default: + break; + } + + ___rpsl_abort(RPS_ERROR_NOT_IMPLEMENTED); + return 0; +} + +float ___rpsl_dxop_unary_f32(uint32_t op, float a) +{ + switch (op) + { + case Acos: + return acosf(a); + case Asin: + return asinf(a); + case Atan: + return atanf(a); + case Cos: + return cosf(a); + case Exp: + return exp2f(a); + case FAbs: + return fabsf(a); + case Frc: + return a - floorf(a); + case Hcos: + return coshf(a); + case Hsin: + return sinhf(a); + case Htan: + return tanhf(a); + case Log: + return logf(a); + case Rsqrt: + return 1 / sqrtf(a); + case Saturate: + return fminf(fmaxf(a, 0.0f), 1.0f); + case Sin: + return sinf(a); + case Sqrt: + return sqrtf(a); + case Tan: + return tanf(a); + + case Round_ne: + return roundf(a * 0.5f) * 2.0f; + case Round_ni: + return floorf(a); + case Round_pi: + return ceilf(a); + case Round_z: + return truncf(a); + } + + ___rpsl_abort(RPS_ERROR_NOT_IMPLEMENTED); + return 0; +} + +uint8_t ___rpsl_dxop_isSpecialFloat_f32(uint32_t op, float a) +{ + switch (op) + { + case IsFinite: + return isfinite(a); + case IsInf: + return isinf(a); + case IsNaN: + return isnan(a); + case IsNormal: + return isnormal(a); + }; + + ___rpsl_abort(RPS_ERROR_NOT_IMPLEMENTED); + return 0; +} + +float ___rpsl_dxop_tertiary_f32(uint32_t op, float a, float b, float c) +{ + switch (op) + { + case FMad: + return a * b + c; + case Fma: + return fmaf(a, b, c); + default: + break; + } + + ___rpsl_abort(RPS_ERROR_NOT_IMPLEMENTED); + return 0; +} + +RpsResult rpsRpslDynamicLibraryInit(PFN_rpslDynLibInit pfn_dynLibInit) +{ + ___rpsl_runtime_procs procs; + memset(&procs, 0, sizeof(procs)); + + procs.pfn_rpsl_abort = &___rpsl_abort; + procs.pfn_rpsl_node_call = &___rpsl_node_call; + procs.pfn_rpsl_node_dependencies = &___rpsl_node_dependencies; + procs.pfn_rpsl_block_marker = &___rpsl_block_marker; + procs.pfn_rpsl_scheduler_marker = &___rpsl_scheduler_marker; + procs.pfn_rpsl_describe_handle = &___rpsl_describe_handle; + procs.pfn_rpsl_create_resource = &___rpsl_create_resource; + procs.pfn_rpsl_name_resource = &___rpsl_name_resource; + procs.pfn_rpsl_notify_out_param_resources = &___rpsl_notify_out_param_resources; + procs.pfn_rpsl_dxop_unary_i32 = &___rpsl_dxop_unary_i32; + procs.pfn_rpsl_dxop_binary_i32 = &___rpsl_dxop_binary_i32; + procs.pfn_rpsl_dxop_tertiary_i32 = &___rpsl_dxop_tertiary_i32; + procs.pfn_rpsl_dxop_unary_f32 = &___rpsl_dxop_unary_f32; + procs.pfn_rpsl_dxop_binary_f32 = &___rpsl_dxop_binary_f32; + procs.pfn_rpsl_dxop_tertiary_f32 = &___rpsl_dxop_tertiary_f32; + procs.pfn_rpsl_dxop_isSpecialFloat_f32 = &___rpsl_dxop_isSpecialFloat_f32; + + pfn_dynLibInit(&procs, sizeof(procs)); + + return RPS_OK; +} diff --git a/src/runtime/common/rps_rpsl_host.cpp b/src/runtime/common/rps_rpsl_host.cpp new file mode 100644 index 0000000..0102540 --- /dev/null +++ b/src/runtime/common/rps_rpsl_host.cpp @@ -0,0 +1,407 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "runtime/common/rps_rpsl_host.hpp" +#include "runtime/common/rps_subprogram.hpp" +#include "runtime/common/rps_render_graph.hpp" + +extern "C" RpsResult RpslHostCallEntry(PFN_RpslEntry func, uint32_t numArgs, const void* const* ppArgs); + +namespace rps +{ + static thread_local RpslHost* s_pRpslContext = nullptr; + + RpsResult RpslHost::Execute(const RpslExecuteInfo& execInfo) + { + RPS_CHECK_ARGS(execInfo.pProgram); + + ScopedContext context{&s_pRpslContext, this}; + + auto* pSignature = execInfo.pProgram->GetSignature(); + RPS_CHECK_ARGS(execInfo.numArgs == pSignature->GetParamDecls().size()); + + Subprogram* const pCurrProgram = execInfo.pProgram; + + if (pCurrProgram->GetEntry()) + { + RPS_STATIC_ASSERT(RPS_OK == 0, "RPS_OK must be 0"); + RPS_STATIC_ASSERT(!RPS_FAILED(0), "RPS_FAILED no longer maps 0 as a success code"); + RPS_V_RETURN(RpslHostCallEntry(pCurrProgram->GetEntry()->pfnEntry, execInfo.numArgs, execInfo.ppArgs)); + } + else + { + // TODO: handle pre-built cmdBuf + RPS_TODO_RETURN_NOT_IMPLEMENTED(); // Mixing non-RPSL program + } + + return RPS_OK; + } + + RpsResult RpslHost::RpslCallNode(RpsNodeDeclId localNodeDeclId, + ArrayRef args, + uint32_t callFlags, + uint32_t nodeLocalId, + RpsNodeId* pOutCmdId) + { + const uint32_t stableLocalNodeId = + m_pGraphBuilder->GetCurrentProgram() + ->m_persistentIndexGenerator.Generate(nodeLocalId); + + return m_pGraphBuilder->AddNode(this, localNodeDeclId, args, callFlags, stableLocalNodeId, pOutCmdId); + } + + void RpslHost::AddDependencies(ConstArrayRef dependencies, RpsNodeId dstNode) + { + for (auto dep : dependencies) + { + if (dep != RPS_INDEX_NONE_U32) + { + m_pGraphBuilder->AddDependency(dep, dstNode); + } + } + } + + enum + { + RPS_MARKER_FUNCTION_INFO = 0, + RPS_MARKER_LOOP_BEGIN, + RPS_MARKER_LOOP_ITERATION, + RPS_MARKER_LOOP_END, + RPS_MARKER_BASIC_BLOCK_BEGIN, + RPS_MARKER_BASIC_BLOCK_END, + }; + + RpsResult RpslHost::BlockMarker(uint32_t markerType, + uint32_t blockIndex, + ConstArrayRef resourceCounts, + uint32_t localLoopIndex, + uint32_t numChildren, + uint32_t parentId) + { + RpsResult result = RPS_OK; + + auto& indexGen = m_pGraphBuilder->GetCurrentProgram()->m_persistentIndexGenerator; + + switch (markerType) + { + case RPS_MARKER_FUNCTION_INFO: + RPS_ASSERT(parentId == UINT32_MAX); + result = indexGen.EnterFunction(blockIndex, resourceCounts, localLoopIndex, numChildren); + break; + case RPS_MARKER_LOOP_BEGIN: + result = indexGen.EnterLoop(blockIndex, resourceCounts, localLoopIndex, numChildren); + break; + case RPS_MARKER_LOOP_END: + result = indexGen.ExitLoop(blockIndex); + break; + case RPS_MARKER_LOOP_ITERATION: + result = indexGen.LoopIteration(blockIndex); + break; + default: + break; + } + + return result; + } + + RpsResult RpslHost::SchedulerMarker(SchedulerMarkerOpCodes opCode, + RpsSubgraphFlags flags, + const char* name, + uint32_t nameLength) + { + static constexpr BuiltInNodeDeclIds s_schedulerMarkerOpCodesToNodeDeclIds[] = { + RPS_BUILTIN_NODE_SCHEDULER_BARRIER, // BARRIER + RPS_BUILTIN_NODE_SUBGRAPH_BEGIN, // SUBGRAPH_BEGIN + RPS_BUILTIN_NODE_SUBGRAPH_END, // SUBGRAPH_END + }; + + static_assert(RPS_COUNTOF(s_schedulerMarkerOpCodesToNodeDeclIds) == uint32_t(SchedulerMarkerOpCodes::MaxValue), + "s_schedulerMarkerOpCodesToNodeDeclIds outdated."); + + RPS_RETURN_ERROR_IF(uint32_t(opCode) >= uint32_t(SchedulerMarkerOpCodes::MaxValue), RPS_ERROR_INVALID_PROGRAM); + + switch (opCode) + { + case SchedulerMarkerOpCodes::BARRIER: + return m_pGraphBuilder->ScheduleBarrier(); + case SchedulerMarkerOpCodes::SUBGRAPH_BEGIN: + //TODO: Do we need to create stable Ids for subgraphs? + return m_pGraphBuilder->BeginSubgraph(flags); + case SchedulerMarkerOpCodes::SUBGRAPH_END: + return m_pGraphBuilder->EndSubgraph(); + default: + break; + } + + return RPS_ERROR_INVALID_PROGRAM; + } + + RpsResult RpslHost::RpslDeclareResource(uint32_t type, + uint32_t flags, + uint32_t format, + uint32_t width, + uint32_t height, + uint32_t depthOrArraySize, + uint32_t mipLevels, + uint32_t sampleCount, + uint32_t sampleQuality, + uint32_t temporalLayers, + uint32_t id, + uint32_t* pOutResourceId) + { + auto* pVariable = m_pGraphBuilder->AllocateData(sizeof(RpsResourceDesc), alignof(RpsResourceDesc)); + RPS_CHECK_ALLOC(pVariable); + + auto* pResDesc = static_cast(pVariable); + + pResDesc->type = RpsResourceType(type); + pResDesc->temporalLayers = temporalLayers; + pResDesc->flags = flags; + + if (type != RPS_RESOURCE_TYPE_BUFFER) + { + pResDesc->image.width = width; + pResDesc->image.height = height; + pResDesc->image.depth = depthOrArraySize; + pResDesc->image.mipLevels = mipLevels; + pResDesc->image.format = RpsFormat(format); + pResDesc->image.sampleCount = sampleCount; + + RPS_ASSERT((sampleQuality == 0) && "TODO!"); + } + else + { + pResDesc->buffer.sizeInBytesLo = width; + pResDesc->buffer.sizeInBytesHi = height; + } + + const uint32_t stableResId = + m_pGraphBuilder->GetCurrentProgram() + ->m_persistentIndexGenerator.Generate(id); + + RPS_V_RETURN(m_pGraphBuilder->DeclareResource(stableResId, pResDesc, {}, pOutResourceId)); + + return RPS_OK; + } + + RpsResult RpslHost::NotifyOutParamResources(uint32_t paramId, const void* pData) + { + auto pView = static_cast(pData); + + return GetRenderGraphBuilder()->SetOutputParamResourceView(paramId, pView); + } + +} // namespace rps + +#define USING_RPSL_CONTEXT(Ctx) rps::RpslHost* Ctx = rps::s_pRpslContext; + +extern "C" { + +RpsResult RpslHostBlockMarker(uint32_t markerType, + uint32_t blockIndex, + uint32_t resourceCount, + uint32_t nodeCount, + uint32_t localLoopIndex, + uint32_t numChildren, + uint32_t parentId) +{ + USING_RPSL_CONTEXT(pCtx); + + return pCtx->BlockMarker(markerType, blockIndex, {resourceCount, nodeCount}, localLoopIndex, numChildren, parentId); +} + +RpsResult RpslSchedulerMarker(uint32_t opCode, uint32_t flags, const char* name, uint32_t nameLength) +{ + USING_RPSL_CONTEXT(pCtx); + + return pCtx->SchedulerMarker(rps::SchedulerMarkerOpCodes(opCode), flags, name, nameLength); +} + +RpsResult RpslHostCallNode(uint32_t nodeDeclId, + uint32_t numArgs, + void** ppArgs, + uint32_t nodeCallFlags, + uint32_t localNodeId, + uint32_t* pCmdIdOut) +{ + USING_RPSL_CONTEXT(pCtx); + + *pCmdIdOut = RPS_CMD_ID_INVALID; + return pCtx->RpslCallNode(nodeDeclId, {ppArgs, numArgs}, nodeCallFlags, localNodeId, pCmdIdOut); +} + +RpsResult RpslHostNodeDependencies(uint32_t numDeps, const uint32_t* pDeps, uint32_t dstNodeId) +{ + USING_RPSL_CONTEXT(pCtx); + + pCtx->AddDependencies({pDeps, numDeps}, dstNodeId); + + return RPS_OK; +} + +RpsResult RpslHostDescribeHandle(void* pOutData, uint32_t dataSize, const uint32_t* inHandle, uint32_t describeOp) +{ + USING_RPSL_CONTEXT(pCtx); + + if (dataSize != sizeof(RpsResourceDesc)) + { + return RPS_ERROR_INVALID_PROGRAM; + } + + auto& resDecl = pCtx->GetRenderGraphBuilder()->GetResourceDecls()[*inHandle]; + memcpy(pOutData, resDecl.desc, dataSize); + + return RPS_OK; +} + +RpsResult RpslHostCreateResource(uint32_t type, + uint32_t flags, + uint32_t format, + uint32_t width, + uint32_t height, + uint32_t depthOrArraySize, + uint32_t mipLevels, + uint32_t sampleCount, + uint32_t sampleQuality, + uint32_t temporalLayers, + uint32_t id, + uint32_t* pOutResourceId) +{ + USING_RPSL_CONTEXT(pCtx); + + return pCtx->RpslDeclareResource(type, + flags, + format, + width, + height, + depthOrArraySize, + mipLevels, + sampleCount, + sampleQuality, + temporalLayers, + id, + pOutResourceId); +} + +RpsResult RpslHostNameResource(uint32_t resourceHdl, const char* name, uint32_t nameLength) +{ + USING_RPSL_CONTEXT(pCtx); + + return pCtx->GetRenderGraphBuilder()->SetResourceName(resourceHdl, rps::StrRef(name, nameLength)); +} + +void RpslNotifyAbort(RpsResult result) +{ +#if RPS_DEBUG + static volatile RpsResult breakHere; + breakHere = result; + + static volatile RpsBool bAssertOnRpslError = RPS_TRUE; + RPS_ASSERT(bAssertOnRpslError); +#endif //RPS_DEBUG +} + +RpsResult RpslNotifyOutParamResources(uint32_t paramId, const void* pViews) +{ + USING_RPSL_CONTEXT(pCtx); + + return pCtx->NotifyOutParamResources(paramId, pViews); +} + +uint32_t RpslHostReverseBits32(uint32_t value) +{ + return rpsReverseBits32(value); +} + +uint32_t RpslHostCountBits(uint32_t value) +{ + return rpsCountBits(value); +} + +uint32_t RpslHostFirstBitLow(uint32_t value) +{ + return rpsFirstBitLow(value); +} + +uint32_t RpslHostFirstBitHigh(uint32_t value) +{ + return rpsFirstBitHigh(value); +} + +} // extern "C" + +const char* rpsMakeRpslEntryName(char* pBuf, size_t bufSize, const char* moduleName, const char* entryName) +{ + static constexpr char modulePrefix[] = "rpsl_M_"; + static constexpr char entryPrefix[] = "_E_"; + + if (!moduleName || !entryName) + return nullptr; + + size_t moduleNameLen = strlen(moduleName); + size_t entryNameLen = strlen(entryName); + + if ((sizeof(modulePrefix) + sizeof(entryPrefix) - 1 + moduleNameLen + entryNameLen) > bufSize) + return nullptr; + + char* pDst = pBuf; + memcpy(pDst, "rpsl_M_", sizeof(modulePrefix) - 1); + pDst += sizeof(modulePrefix) - 1; + memcpy(pDst, moduleName, moduleNameLen); + pDst += moduleNameLen; + memcpy(pDst, entryPrefix, sizeof(entryPrefix) - 1); + pDst += sizeof(entryPrefix) - 1; + memcpy(pDst, entryName, entryNameLen + 1); + + return pBuf; +} + +RpsResult rpsRpslEntryGetSignatureDesc(RpsRpslEntry hRpslEntry, RpsRenderGraphSignatureDesc* pDesc) +{ + RPS_CHECK_ARGS(hRpslEntry != RPS_NULL_HANDLE); + RPS_CHECK_ARGS(pDesc != nullptr); + + auto pEntry = rps::FromHandle(hRpslEntry); + + // TODO: Move to compiler. + uint32_t maxParamResources = 0; + + for (auto paramIter = pEntry->pParamDescs, paramEnd = pEntry->pParamDescs + pEntry->numParams; + paramIter != paramEnd; + ++paramIter) + { + if (paramIter->flags & RPS_PARAMETER_FLAG_RESOURCE_BIT) + { + maxParamResources += + ((paramIter->arraySize == UINT32_MAX) ? 0 : ((paramIter->arraySize == 0) ? 1 : paramIter->arraySize)); + } + } + + pDesc->numParams = pEntry->numParams; + pDesc->numNodeDescs = pEntry->numNodeDecls; + pDesc->maxExternalResources = maxParamResources; + pDesc->pParamDescs = pEntry->pParamDescs; + pDesc->pNodeDescs = pEntry->pNodeDecls; + pDesc->name = pEntry->name; + + return RPS_OK; +} + +namespace rps +{ + namespace details + { + RpsResult ProgramGetBindingSlot(RpsSubprogram hProgram, + const char* name, + size_t size, + RpsCmdCallback** ppCallback) + { + RPS_CHECK_ARGS(hProgram); + return rps::FromHandle(hProgram)->BindDeferred(name, size, ppCallback); + } + } // namespace details +} // namespace rps diff --git a/src/runtime/common/rps_rpsl_host.h b/src/runtime/common/rps_rpsl_host.h new file mode 100644 index 0000000..9918e7f --- /dev/null +++ b/src/runtime/common/rps_rpsl_host.h @@ -0,0 +1,19 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_RPSL_HOST_H_ +#define _RPS_RPSL_HOST_H_ + +/// @brief Bitflags for the type of entry calls. +enum RpslEntryCallFlagBits +{ + RPSL_ENTRY_CALL_DEFAULT = 0, ///< Default entry call. + RPSL_ENTRY_CALL_SUBPROGRAM = 1 << 0, ///< The current entry call is used to execute a subprogram for a node in a + /// parent subprogram. +}; + +#endif //_RPSL_HOST_H_ diff --git a/src/runtime/common/rps_rpsl_host.hpp b/src/runtime/common/rps_rpsl_host.hpp new file mode 100644 index 0000000..8c5da97 --- /dev/null +++ b/src/runtime/common/rps_rpsl_host.hpp @@ -0,0 +1,103 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_RPSL_HOST_HPP_ +#define _RPS_RPSL_HOST_HPP_ + +#include "rps/core/rps_api.h" +#include "rps/runtime/common/rps_runtime.h" + +#include "core/rps_core.hpp" +#include "core/rps_persistent_index_generator.hpp" +#include "runtime/common/rps_cmd_buf.hpp" +#include "runtime/common/rps_rpsl_host.h" + +namespace rps +{ + class Subprogram; + class RenderGraphBuilder; + + struct RpslExecuteInfo + { + Subprogram* pProgram; + const void* const* ppArgs; + uint32_t numArgs; + }; + + enum class SchedulerMarkerOpCodes + { + BARRIER = 0, + SUBGRAPH_BEGIN, + SUBGRAPH_END, + MaxValue, + }; + + class RpslHost + { + RPS_CLASS_NO_COPY_MOVE(RpslHost); + + public: + RpslHost(RenderGraphBuilder* pBuilder) + : m_pGraphBuilder(pBuilder) + { + } + + RpsResult Execute(const RpslExecuteInfo& execInfo); + + public: + RpsResult RpslCallNode(RpsNodeDeclId nodeDeclId, + ArrayRef args, + uint32_t callFlags, + uint32_t nodeLocalId, + RpsNodeId* pNodeId); + + RpsResult RpslDeclareResource(uint32_t type, + uint32_t flags, + uint32_t format, + uint32_t width, + uint32_t height, + uint32_t depthOrArraySize, + uint32_t mipLevels, + uint32_t sampleCount, + uint32_t sampleQuality, + uint32_t temporalLayers, + uint32_t id, + uint32_t* pOutId); + + void AddDependencies(ConstArrayRef dependencies, RpsNodeId dstNode); + + RpsResult BlockMarker(uint32_t markerType, + uint32_t blockIndex, + ConstArrayRef resourceCounts, + uint32_t localLoopIndex, + uint32_t numChildren, + uint32_t parentId); + + RpsResult SchedulerMarker(SchedulerMarkerOpCodes opCode, + RpsSubgraphFlags flags, + const char* name, + uint32_t nameLength); + + RpsResult NotifyOutParamResources(uint32_t paramId, const void* pViews); + + RenderGraphBuilder* GetRenderGraphBuilder() const + { + return m_pGraphBuilder; + } + + RpsResult ExecuteProgram(const RpslExecuteInfo& execInfo); + + private: + void UpdateNodeDecls(); + + private: + RenderGraphBuilder* m_pGraphBuilder = nullptr; + }; + +} // namespace rps + +#endif //_RPSL_HOST_HPP_ diff --git a/src/runtime/common/rps_rpsl_host_dll.c b/src/runtime/common/rps_rpsl_host_dll.c new file mode 100644 index 0000000..2b5625b --- /dev/null +++ b/src/runtime/common/rps_rpsl_host_dll.c @@ -0,0 +1,223 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#if defined(RPS_SHADER_GUEST) + +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; + +#endif //RPS_SHADER_GUEST + +#define RPS_RPSL_INTERFACE_DECL \ + RPSL_RETURN() + +#if defined(RPS_SHADER_GUEST) || defined(RPS_SHADER_HOST) + +typedef void (*PFN_rpsl_abort) (uint32_t result); +typedef uint32_t (*PFN_rpsl_node_call) (uint32_t nodeDeclId, uint32_t numArgs, void** ppArgs, uint32_t nodeCallFlags, uint32_t nodeId); +typedef void (*PFN_rpsl_node_dependencies) (uint32_t numDeps, const uint32_t* pDeps, uint32_t dstNodeId); +typedef void (*PFN_rpsl_block_marker) (uint32_t markerType, + uint32_t blockIndex, + uint32_t resourceCount, + uint32_t nodeCount, + uint32_t localLoopIndex, + uint32_t numChildren, + uint32_t parentId); +typedef void (*PFN_rpsl_scheduler_marker) (uint32_t opCode, uint32_t flags, const char* name, uint32_t nameLength); +typedef void (*PFN_rpsl_describe_handle) (void* pDstDesc, uint32_t bufferSize, const uint32_t* pHandle, uint32_t describeOp); +typedef uint32_t (*PFN_rpsl_create_resource) (uint32_t type, + uint32_t flags, + uint32_t format, + uint32_t width, + uint32_t height, + uint32_t depthOrArraySize, + uint32_t mipLevels, + uint32_t sampleCount, + uint32_t sampleQuality, + uint32_t temporalLayers, + uint32_t id); +typedef void (*PFN_rpsl_name_resource) (uint32_t resourceHdl, const char* name, uint32_t nameLength); +typedef void (*PFN_rpsl_notify_out_param_resources) (uint32_t paramId, const void* pViews); + +typedef uint32_t (*PFN_rpsl_dxop_unary_i32) (uint32_t op, uint32_t a); +typedef uint32_t (*PFN_rpsl_dxop_binary_i32) (uint32_t op, uint32_t a, uint32_t b); +typedef uint32_t (*PFN_rpsl_dxop_tertiary_i32) (uint32_t op, uint32_t a, uint32_t b, uint32_t c); +typedef float (*PFN_rpsl_dxop_unary_f32) (uint32_t op, float a); +typedef float (*PFN_rpsl_dxop_binary_f32) (uint32_t op, float a, float b); +typedef float (*PFN_rpsl_dxop_tertiary_f32) (uint32_t op, float a, float b, float c); +typedef uint8_t (*PFN_rpsl_dxop_isSpecialFloat_f32) (uint32_t op, float a); + +typedef struct ___rpsl_runtime_procs +{ + PFN_rpsl_abort pfn_rpsl_abort; + PFN_rpsl_node_call pfn_rpsl_node_call; + PFN_rpsl_node_dependencies pfn_rpsl_node_dependencies; + PFN_rpsl_block_marker pfn_rpsl_block_marker; + PFN_rpsl_scheduler_marker pfn_rpsl_scheduler_marker; + PFN_rpsl_describe_handle pfn_rpsl_describe_handle; + PFN_rpsl_create_resource pfn_rpsl_create_resource; + PFN_rpsl_name_resource pfn_rpsl_name_resource; + PFN_rpsl_notify_out_param_resources pfn_rpsl_notify_out_param_resources; + PFN_rpsl_dxop_unary_i32 pfn_rpsl_dxop_unary_i32; + PFN_rpsl_dxop_binary_i32 pfn_rpsl_dxop_binary_i32; + PFN_rpsl_dxop_tertiary_i32 pfn_rpsl_dxop_tertiary_i32; + PFN_rpsl_dxop_unary_f32 pfn_rpsl_dxop_unary_f32; + PFN_rpsl_dxop_binary_f32 pfn_rpsl_dxop_binary_f32; + PFN_rpsl_dxop_tertiary_f32 pfn_rpsl_dxop_tertiary_f32; + PFN_rpsl_dxop_isSpecialFloat_f32 pfn_rpsl_dxop_isSpecialFloat_f32; +} ___rpsl_runtime_procs; + +typedef int (*PFN_rps_dyn_lib_init)(const ___rpsl_runtime_procs* pProcs, uint32_t sizeofProcs); + +#endif // defined(RPS_SHADER_GUEST) || defined(RPS_SHADER_HOST) + +#if defined(RPS_SHADER_GUEST) + +static ___rpsl_runtime_procs s_rpslRuntimeProcs; + +#define RPS_TRAMPOLINE_IMPL_RET(RetType, FuncName, ParamsList, Args) \ + RetType FuncName ParamsList \ + { \ + return (*s_rpslRuntimeProcs.pfn##FuncName)Args; \ + } + +#define RPS_TRAMPOLINE_IMPL(FuncName, ParamsList, Args) \ + void FuncName ParamsList \ + { \ + (*s_rpslRuntimeProcs.pfn##FuncName) Args; \ + } + + +void ___rpsl_abort(uint32_t result) +{ + (*s_rpslRuntimeProcs.pfn_rpsl_abort)(result); +} + +uint32_t ___rpsl_node_call( + uint32_t nodeDeclId, uint32_t numArgs, void** ppArgs, uint32_t nodeCallFlags, uint32_t nodeId) +{ + return (*s_rpslRuntimeProcs.pfn_rpsl_node_call)(nodeDeclId, numArgs, ppArgs, nodeCallFlags, nodeId); +} + +void ___rpsl_node_dependencies(uint32_t numDeps, const uint32_t* pDeps, uint32_t dstNodeId) +{ + (*s_rpslRuntimeProcs.pfn_rpsl_node_dependencies)(numDeps, pDeps, dstNodeId); +} + +void ___rpsl_block_marker(uint32_t markerType, + uint32_t blockIndex, + uint32_t resourceCount, + uint32_t nodeCount, + uint32_t localLoopIndex, + uint32_t numChildren, + uint32_t parentId) +{ + (*s_rpslRuntimeProcs.pfn_rpsl_block_marker)(markerType, blockIndex, resourceCount, nodeCount, localLoopIndex, numChildren, parentId); +} + +void ___rpsl_scheduler_marker(uint32_t opCode, uint32_t flags, const char* name, uint32_t nameLength) +{ + (*s_rpslRuntimeProcs.pfn_rpsl_scheduler_marker)(opCode, flags, name, nameLength); +} + +void ___rpsl_describe_handle(void* pOutData, uint32_t dataSize, uint32_t* inHandle, uint32_t describeOp) +{ + (*s_rpslRuntimeProcs.pfn_rpsl_describe_handle)( pOutData, dataSize, inHandle, describeOp); +} + +uint32_t ___rpsl_create_resource(uint32_t type, + uint32_t flags, + uint32_t format, + uint32_t width, + uint32_t height, + uint32_t depthOrArraySize, + uint32_t mipLevels, + uint32_t sampleCount, + uint32_t sampleQuality, + uint32_t temporalLayers, + uint32_t id) +{ + return (*s_rpslRuntimeProcs.pfn_rpsl_create_resource)( + type, flags, format, width, height, depthOrArraySize, mipLevels, sampleCount, sampleQuality, temporalLayers, id); +} + +void ___rpsl_name_resource(uint32_t resourceHdl, const char* name, uint32_t nameLength) +{ + (*s_rpslRuntimeProcs.pfn_rpsl_name_resource)(resourceHdl, name, nameLength); +} + +void ___rpsl_notify_out_param_resources(uint32_t paramId, const void* pViews) +{ + (*s_rpslRuntimeProcs.pfn_rpsl_notify_out_param_resources)(paramId, pViews); +} + +uint32_t ___rpsl_dxop_unary_i32(uint32_t op, uint32_t a) +{ + return (*s_rpslRuntimeProcs.pfn_rpsl_dxop_unary_i32)(op, a); +} + +uint32_t ___rpsl_dxop_binary_i32(uint32_t op, uint32_t a, uint32_t b) +{ + return (*s_rpslRuntimeProcs.pfn_rpsl_dxop_binary_i32)(op, a, b); +} + +uint32_t ___rpsl_dxop_tertiary_i32(uint32_t op, uint32_t a, uint32_t b, uint32_t c) +{ + return (*s_rpslRuntimeProcs.pfn_rpsl_dxop_tertiary_i32)(op, a, b, c); +} + +float ___rpsl_dxop_unary_f32(uint32_t op, float a) +{ + return (*s_rpslRuntimeProcs.pfn_rpsl_dxop_unary_f32)(op, a); +} + +float ___rpsl_dxop_binary_f32(uint32_t op, float a, float b) +{ + return (*s_rpslRuntimeProcs.pfn_rpsl_dxop_binary_f32)(op, a, b); +} + +float ___rpsl_dxop_tertiary_f32(uint32_t op, float a, float b, float c) +{ + return (*s_rpslRuntimeProcs.pfn_rpsl_dxop_tertiary_f32)(op, a, b, c); +} + +uint8_t ___rpsl_dxop_isSpecialFloat_f32(uint32_t op, float a) +{ + return (*s_rpslRuntimeProcs.pfn_rpsl_dxop_isSpecialFloat_f32)(op, a); +} + +int __declspec(dllexport) ___rps_dyn_lib_init(const ___rpsl_runtime_procs* pProcs, uint32_t sizeofProcs) +{ + if (sizeof(___rpsl_runtime_procs) != sizeofProcs) + { + return -1; + } + + s_rpslRuntimeProcs.pfn_rpsl_abort = pProcs->pfn_rpsl_abort; + s_rpslRuntimeProcs.pfn_rpsl_node_call = pProcs->pfn_rpsl_node_call; + s_rpslRuntimeProcs.pfn_rpsl_node_dependencies = pProcs->pfn_rpsl_node_dependencies; + s_rpslRuntimeProcs.pfn_rpsl_block_marker = pProcs->pfn_rpsl_block_marker; + s_rpslRuntimeProcs.pfn_rpsl_scheduler_marker = pProcs->pfn_rpsl_scheduler_marker; + s_rpslRuntimeProcs.pfn_rpsl_describe_handle = pProcs->pfn_rpsl_describe_handle; + s_rpslRuntimeProcs.pfn_rpsl_create_resource = pProcs->pfn_rpsl_create_resource; + s_rpslRuntimeProcs.pfn_rpsl_name_resource = pProcs->pfn_rpsl_name_resource; + s_rpslRuntimeProcs.pfn_rpsl_notify_out_param_resources = pProcs->pfn_rpsl_notify_out_param_resources; + s_rpslRuntimeProcs.pfn_rpsl_dxop_unary_i32 = pProcs->pfn_rpsl_dxop_unary_i32 ; + s_rpslRuntimeProcs.pfn_rpsl_dxop_binary_i32 = pProcs->pfn_rpsl_dxop_binary_i32; + s_rpslRuntimeProcs.pfn_rpsl_dxop_tertiary_i32 = pProcs->pfn_rpsl_dxop_tertiary_i32; + s_rpslRuntimeProcs.pfn_rpsl_dxop_unary_f32 = pProcs->pfn_rpsl_dxop_unary_f32 ; + s_rpslRuntimeProcs.pfn_rpsl_dxop_binary_f32 = pProcs->pfn_rpsl_dxop_binary_f32; + s_rpslRuntimeProcs.pfn_rpsl_dxop_tertiary_f32 = pProcs->pfn_rpsl_dxop_tertiary_f32; + s_rpslRuntimeProcs.pfn_rpsl_dxop_isSpecialFloat_f32 = pProcs->pfn_rpsl_dxop_isSpecialFloat_f32; + + return 0; +} + +#endif //RPS_SHADER_GUEST + +// clang-format off + diff --git a/src/runtime/common/rps_runtime_backend.cpp b/src/runtime/common/rps_runtime_backend.cpp new file mode 100644 index 0000000..8fc152a --- /dev/null +++ b/src/runtime/common/rps_runtime_backend.cpp @@ -0,0 +1,173 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "runtime/common/rps_render_graph.hpp" +#include "runtime/common/rps_render_graph_signature.hpp" +#include "runtime/common/rps_runtime_device.hpp" + +namespace rps +{ + RpsResult RuntimeBackend::Run(RenderGraphUpdateContext& context) + { + RPS_V_RETURN(UpdateFrame(context)); + + RPS_ASSERT(&context.renderGraph == &m_renderGraph); + + RPS_V_RETURN(CreateHeaps(context, m_renderGraph.GetHeapInfos().range_all())); + + auto& resources = m_renderGraph.GetResourceInstances(); + const uint32_t maxExternResources = m_renderGraph.GetSignature().GetMaxExternalResourceCount(); + RPS_ASSERT(maxExternResources <= resources.size()); + + RPS_V_RETURN( + CreateResources(context, resources.range(maxExternResources, resources.size() - maxExternResources))); + + RPS_V_RETURN(CreateCommandResources(context)); + + return RPS_OK; + } + + void RuntimeBackend::OnDestroy() + { + DestroyCommandResources(); + + DestroyResources(m_renderGraph.GetResourceInstances().range_all()); + + DestroyHeaps(m_renderGraph.GetHeapInfos().range_all()); + } + + RpsResult RuntimeBackend::CloneContext(const RuntimeCmdCallbackContext& context, + RpsRuntimeCommandBuffer hNewCmdBuffer, + const RpsCmdCallbackContext** ppNewContext) const + { + auto pNewContext = m_renderGraph.FrameAlloc(); + *pNewContext = context; + + pNewContext->hCommandBuffer = hNewCmdBuffer; + pNewContext->bIsPrimaryContext = false; + + *ppNewContext = pNewContext; + + return RPS_OK; + } + + RpsResult RuntimeBackend::GetCmdArgResourceInfos(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayIndex, + const ResourceInstance** ppResources, + uint32_t count) + { + RPS_CHECK_ARGS(pContext && ppResources); + RPS_CHECK_ARGS(pContext && ppResources); + auto pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + + RPS_RETURN_ERROR_IF(argIndex >= pBackendContext->pNodeDeclInfo->params.size(), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + auto& paramInfo = pBackendContext->pNodeDeclInfo->params[argIndex]; + RPS_RETURN_ERROR_IF(srcArrayIndex + count > paramInfo.numElements, RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + const auto cmdAccessInfos = + pBackendContext->pCmdInfo->accesses.Get(pBackendContext->pRenderGraph->GetCmdAccessInfos()); + + for (uint32_t i = 0; i < count; i++) + { + auto& accessInfo = cmdAccessInfos[paramInfo.accessOffset + srcArrayIndex + i]; + ppResources[i] = (accessInfo.resourceId != RPS_RESOURCE_ID_INVALID) + ? &pBackendContext->pRenderGraph->GetResourceInstance(accessInfo.resourceId) + : nullptr; + } + + return RPS_OK; + } + + void RuntimeBackend::RecordDebugMarker(const RuntimeCmdCallbackContext& context, + RpsRuntimeDebugMarkerMode mode, + StrRef name) const + { + if (context.recordFlags & RPS_RECORD_COMMAND_FLAG_ENABLE_COMMAND_DEBUG_MARKERS) + { + const auto& runtimeCreateInfo = RuntimeDevice::Get(m_renderGraph.GetDevice())->GetCreateInfo(); + auto pfnRecordDebugMarker = runtimeCreateInfo.callbacks.pfnRecordDebugMarker; + + if (pfnRecordDebugMarker) + { + RpsRuntimeOpRecordDebugMarkerArgs markerArgs = {}; + markerArgs.hCommandBuffer = context.hCommandBuffer; + markerArgs.pUserRecordContext = context.pUserRecordContext; + markerArgs.mode = mode; + markerArgs.text = name.str; + + pfnRecordDebugMarker(runtimeCreateInfo.pUserContext, &markerArgs); + } + } + } + + RpsResult RuntimeBackend::RecordCommand(RuntimeCmdCallbackContext& context, const RuntimeCmd& runtimeCmd) const + { + if (runtimeCmd.cmdId != RPS_CMD_ID_INVALID) + { + auto pCmdInfo = context.pRenderGraph->GetCmdInfo(runtimeCmd.cmdId); + auto pCmd = pCmdInfo->pCmdDecl; + + context.pNodeDeclInfo = pCmdInfo->pNodeDecl; + context.pCmdInfo = pCmdInfo; + context.pCmd = pCmd; + context.pRuntimeCmd = &runtimeCmd; + context.cmdId = runtimeCmd.cmdId; + + context.bIsCmdBeginEnd = true; + RPS_V_RETURN(RecordCmdBegin(context)); + context.bIsCmdBeginEnd = false; + + if (pCmd->callback.pfnCallback) + { + context.pCmdCallbackContext = pCmd->callback.pUserContext; + context.ppArgs = pCmd->args.data(); + context.numArgs = uint32_t(pCmd->args.size()); + context.userTag = pCmd->tag; + + pCmd->callback.pfnCallback(&context); + + RPS_V_RETURN(context.result); + } + + context.bIsCmdBeginEnd = true; + RPS_V_RETURN(RecordCmdEnd(context)); + context.bIsCmdBeginEnd = false; + } + + return RPS_OK; + } + + RpsResult RuntimeBackend::RecordCmdBegin(const RuntimeCmdCallbackContext& context) const + { + RecordDebugMarker(context, RPS_RUNTIME_DEBUG_MARKER_BEGIN, context.pNodeDeclInfo->name.str); + + // Default render state setup for graphics nodes + if (context.pNodeDeclInfo->MaybeGraphicsNode()) + { + RPS_V_RETURN(RecordCmdRenderPassBegin(context)); + + RPS_V_RETURN(RecordCmdFixedFunctionBindingsAndDynamicStates(context)); + } + + return RPS_OK; + } + + RpsResult RuntimeBackend::RecordCmdEnd(const RuntimeCmdCallbackContext& context) const + { + if (context.pNodeDeclInfo->MaybeGraphicsNode()) + { + RPS_V_RETURN(RecordCmdRenderPassEnd(context)); + } + + RecordDebugMarker(context, RPS_RUNTIME_DEBUG_MARKER_END, nullptr); + + return RPS_OK; + } + +} // namespace rps diff --git a/src/runtime/common/rps_runtime_device.cpp b/src/runtime/common/rps_runtime_device.cpp new file mode 100644 index 0000000..9da6c0c --- /dev/null +++ b/src/runtime/common/rps_runtime_device.cpp @@ -0,0 +1,13 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "rps/runtime/common/rps_runtime.h" +#include "runtime/common/rps_render_graph.hpp" + +RPS_STATIC_ASSERT_STANDALONE(RPS_SEMANTIC_USER_RESOURCE_BINDING + 1 == RPS_SEMANTIC_COUNT, + "RPS_SEMANTIC_USER_RESOURCE_BINDING must be the last valid element of RpsSemantic", + RpsSemantic); diff --git a/src/runtime/common/rps_runtime_device.hpp b/src/runtime/common/rps_runtime_device.hpp new file mode 100644 index 0000000..1ba28e5 --- /dev/null +++ b/src/runtime/common/rps_runtime_device.hpp @@ -0,0 +1,225 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_RUNTIME_DEVICE_H_ +#define _RPS_RUNTIME_DEVICE_H_ + +#include "rps/core/rps_api.h" +#include "rps/runtime/common/rps_runtime.h" + +#include "core/rps_device.hpp" +#include "runtime/common/rps_render_graph.hpp" + +namespace rps +{ + struct BuiltInNodeInfo + { + StrRef name; + RpsCmdCallback callbackInfo; + }; + + struct AccessTransitionInfo + { + bool bTransition; + bool bMergedAccessStates; + bool bKeepOrdering; + RpsAccessAttr mergedAccess; + }; + + class RuntimeDevice + { + protected: + RuntimeDevice(Device* pDevice, const RpsRuntimeDeviceCreateInfo* pRuntimeCreateInfo) + : m_pDevice(pDevice) + , m_createInfo{pRuntimeCreateInfo ? *pRuntimeCreateInfo : RpsRuntimeDeviceCreateInfo{}} + { + } + + public: + virtual ~RuntimeDevice() + { + if (m_createInfo.callbacks.pfnDestroyRuntime) + { + m_createInfo.callbacks.pfnDestroyRuntime(m_createInfo.pUserContext); + } + } + + virtual RpsResult Init() + { + return RPS_OK; + } + + virtual RpsResult BuildDefaultRenderGraphPhases(RenderGraph& renderGraph) = 0; + virtual RpsResult InitializeSubresourceInfos(ArrayRef resInstances) = 0; + virtual RpsResult InitializeResourceAllocInfos(ArrayRef resInstances) = 0; + virtual RpsResult GetSubresourceRangeFromImageView(SubresourceRangePacked& outRange, + const ResourceInstance& resourceInfo, + const RpsAccessAttr& accessAttr, + const RpsImageView& imageView) = 0; + + virtual ConstArrayRef GetMemoryTypeInfos() const + { + return {}; + } + + virtual RpsResult DescribeMemoryType(uint32_t memoryTypeIndex, PrinterRef printer) const + { + return RPS_OK; + } + + virtual RpsResult UpdateHeaps(ArrayRef heaps) const + { + return RPS_OK; + } + + virtual void DestroyHeaps(ArrayRef heaps) const + { + } + + virtual ConstArrayRef GetBuiltInNodes() const + { + return {}; + } + + virtual bool CalculateAccessTransition(const RpsAccessAttr& beforeAccess, + const RpsAccessAttr& afterAccess, + AccessTransitionInfo& results) const + { + return false; + } + + virtual RpsImageAspectUsageFlags GetImageAspectUsages(uint32_t aspectMask) const + { + return RPS_IMAGE_ASPECT_UNKNOWN; + } + + public: + static RuntimeDevice* Get(const Device& device) + { + return static_cast(device.GetPrivateData()); + } + + template::value>::type> + static T* Get(const Device& device) + { + return static_cast(Get(device)); + } + + template + static RpsResult Create(RpsDevice* phDevice, + const RpsDeviceCreateInfo* pDeviceCreateInfo, + TRuntimeCreateArgs... runtimeCreateArgs) + { + RPS_CHECK_ARGS(phDevice); + + RpsDeviceCreateInfo deviceCreateInfo = pDeviceCreateInfo ? *pDeviceCreateInfo : RpsDeviceCreateInfo{}; + + AllocInfo privateDataAllocInfo = rps::AllocInfo::FromType(); + + deviceCreateInfo.pfnDeviceOnDestroy = &OnDestroy; + deviceCreateInfo.privateDataAllocInfo = privateDataAllocInfo; + + RPS_V_RETURN(rpsDeviceCreate(&deviceCreateInfo, phDevice)); + + void* pPrivateData = rpsDeviceGetPrivateData(*phDevice); + + T* pRuntimeDevice = new (pPrivateData) T(FromHandle(*phDevice), runtimeCreateArgs...); + + RpsResult result = pRuntimeDevice->Init(); + if (RPS_FAILED(result)) + { + rpsDeviceDestroy(*phDevice); + *phDevice = RPS_NULL_HANDLE; + } + + return RPS_OK; + } + + Device& GetDevice() const + { + return *m_pDevice; + } + + const RpsRuntimeDeviceCreateInfo& GetCreateInfo() const + { + return m_createInfo; + } + + private: + + static void OnDestroy(RpsDevice device) + { + Get(*FromHandle(device))->~RuntimeDevice(); + } + + RpsResult BuildUserDefinedRenderGraphPhases(RenderGraph& renderGraph) + { + if (!m_createInfo.callbacks.pfnBuildRenderGraphPhases) + { + return RPS_OK; + } + + const RpsRenderGraphPhaseInfo* pPhases = {}; + uint32_t numPhases = 0; + + RPS_V_RETURN(m_createInfo.callbacks.pfnBuildRenderGraphPhases( + m_createInfo.pUserContext, ToHandle(&renderGraph), &pPhases, &numPhases)); + + RpsResult result = RPS_OK; + uint32_t phaseIndex = 0; + + result = renderGraph.ReservePhases(numPhases); + + if (RPS_SUCCEEDED(result)) + { + for (; phaseIndex < numPhases; phaseIndex++) + { + result = renderGraph.AddPhase(pPhases[phaseIndex]); + + if (RPS_FAILED(result)) + break; + } + } + + RPS_ASSERT(RPS_SUCCEEDED(result) || (phaseIndex != numPhases)); + + for (; phaseIndex < numPhases; phaseIndex++) + { + pPhases[phaseIndex].pfnDestroy(pPhases[phaseIndex].hPhase); + } + + return result; + } + + private: + Device* const m_pDevice = nullptr; + const RpsRuntimeDeviceCreateInfo m_createInfo = {}; + }; + + class NullRuntimeDevice final : public RuntimeDevice + { + public: + NullRuntimeDevice(Device* pDevice) + : RuntimeDevice(pDevice, nullptr) + { + } + + virtual RpsResult BuildDefaultRenderGraphPhases(RenderGraph& renderGraph) override final; + virtual RpsResult InitializeSubresourceInfos(ArrayRef resInstances) override final; + virtual RpsResult InitializeResourceAllocInfos(ArrayRef resInstances) override final; + virtual RpsResult GetSubresourceRangeFromImageView(SubresourceRangePacked& outRange, + const ResourceInstance& resourceInfo, + const RpsAccessAttr& accessAttr, + const RpsImageView& imageView) override final; + virtual RpsImageAspectUsageFlags GetImageAspectUsages(uint32_t aspectMask) const override final; + virtual ConstArrayRef GetMemoryTypeInfos() const override final; + + }; + +} // namespace rps + +#endif //_RPS_RUNTIME_DEVICE_H_ diff --git a/src/runtime/common/rps_runtime_util.hpp b/src/runtime/common/rps_runtime_util.hpp new file mode 100644 index 0000000..82726d8 --- /dev/null +++ b/src/runtime/common/rps_runtime_util.hpp @@ -0,0 +1,115 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_RUNTIME_UTILS_H_ +#define _RPS_RUNTIME_UTILS_H_ + +#include "rps/runtime/common/rps_runtime.h" +#include "runtime/common/rps_render_graph_resource.hpp" + +namespace rps +{ +#define RPS_V_REPORT_AND_RETURN(Context, Expr) \ + do \ + { \ + RpsResult _RPS_RESULT_TEMP__ = Expr; \ + if (_RPS_RESULT_TEMP__ != RPS_OK) \ + { \ + RPS_DIAG_RESULT_CODE((#Expr), _RPS_RESULT_TEMP__); \ + rpsCmdCallbackReportError(Context, _RPS_RESULT_TEMP__); \ + } \ + } while (0) + + static inline void CanonicalizeMipLevels(ResourceDescPacked& resDesc) + { + if (!resDesc.IsImage()) + { + return; + } + + if (resDesc.image.sampleCount > 1) + { + resDesc.image.mipLevels = 1; + } + + if (resDesc.image.mipLevels == 0) + { + uint32_t w = resDesc.image.width; + uint32_t h = resDesc.image.height; + uint32_t d = (resDesc.type == RPS_RESOURCE_TYPE_IMAGE_3D) ? resDesc.image.depth : 1; + uint32_t mips = 1; + + while ((w > 1) || (h > 1) || (d > 1)) + { + mips++; + w = w >> 1; + h = h >> 1; + d = d >> 1; + } + + resDesc.image.mipLevels = mips; + } + } + + static inline uint32_t GetMipLevelDimension(uint32_t mostDetailedMipDim, uint32_t mipLevel) + { + return rpsMax(1u, mostDetailedMipDim >> mipLevel); + } + + static inline void GetFullSubresourceRange(SubresourceRangePacked& subResRange, + const ResourceDescPacked& resDesc, + uint32_t aspectMask) + { + subResRange.aspectMask = 1; + subResRange.baseArrayLayer = 0; + subResRange.arrayLayerEnd = 1; + subResRange.baseMipLevel = 0; + subResRange.mipLevelEnd = 1; + + if (resDesc.IsImage()) + { + subResRange.mipLevelEnd = resDesc.image.mipLevels; + + if (resDesc.type != RPS_RESOURCE_TYPE_IMAGE_3D) + { + subResRange.arrayLayerEnd = resDesc.image.arrayLayers; + } + + subResRange.aspectMask = aspectMask; + } + } + + static inline bool IsDepthStencilReadWriteTransition(RpsAccessFlags before, RpsAccessFlags after) + { + RpsAccessFlags beforeDepth = (before & RPS_ACCESS_DEPTH); + RpsAccessFlags beforeStencil = (before & RPS_ACCESS_STENCIL); + RpsAccessFlags afterDepth = (after & RPS_ACCESS_DEPTH); + RpsAccessFlags afterStencil = (after & RPS_ACCESS_STENCIL); + + return (beforeDepth && afterDepth && + ((beforeDepth & RPS_ACCESS_DEPTH_WRITE_BIT) != (afterDepth & RPS_ACCESS_DEPTH_WRITE_BIT))) || + (beforeStencil && afterStencil && + ((beforeStencil & RPS_ACCESS_STENCIL_WRITE_BIT) != (afterStencil & RPS_ACCESS_STENCIL_WRITE_BIT))); + } + + static inline bool IsResourceTypeValid(RpsResourceType type) + { + return (type != RPS_RESOURCE_TYPE_UNKNOWN) && (int32_t(type) < RPS_RESOURCE_TYPE_COUNT); + } + + static inline uint64_t GetBufferViewBytes(const RpsBufferView* pBufView, const ResourceDescPacked& resourceDesc) + { + const uint64_t bufferViewBytes = (pBufView->sizeInBytes != RPS_BUFFER_WHOLE_SIZE) + ? pBufView->sizeInBytes + : (resourceDesc.GetBufferSize() - pBufView->offset); + + return bufferViewBytes; + } + +} // namespace rps + +#endif //_RPS_RUNTIME_UTILS_H_ diff --git a/src/runtime/common/rps_subprogram.cpp b/src/runtime/common/rps_subprogram.cpp new file mode 100644 index 0000000..eb7b475 --- /dev/null +++ b/src/runtime/common/rps_subprogram.cpp @@ -0,0 +1,104 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "runtime/common/rps_subprogram.hpp" +#include "runtime/common/rps_runtime_device.hpp" +#include "runtime/common/rps_rpsl_host.hpp" + +namespace rps +{ + RpsResult Subprogram::Create(const Device& device, const RpsProgramCreateInfo* pCreateInfo, Subprogram** ppInstance) + { + RPS_CHECK_ARGS(pCreateInfo); + RPS_CHECK_ARGS(ppInstance); + + auto allocInfo = AllocInfo::FromType(); + void* pMemory = Allocate(device.Allocator(), allocInfo); + + RPS_CHECK_ALLOC(pMemory); + auto pInstance = new (pMemory) + Subprogram(device, FromHandle(pCreateInfo->hRpslEntryPoint), pCreateInfo->defaultNodeCallback); + + *ppInstance = pInstance; + + return pInstance->Init(pCreateInfo); + } + + RpsResult Subprogram::Init(const RpsProgramCreateInfo* pCreateInfo) + { + RPS_RETURN_ERROR_IF(m_pSignature != nullptr, RPS_ERROR_INVALID_OPERATION); + + auto pSignatureDesc = pCreateInfo->pSignatureDesc; + + RpsRenderGraphSignatureDesc signatureDescTmp; + if (pCreateInfo->hRpslEntryPoint) + { + RPS_V_RETURN(rpsRpslEntryGetSignatureDesc(pCreateInfo->hRpslEntryPoint, &signatureDescTmp)); + pSignatureDesc = &signatureDescTmp; + } + + RenderGraphSignature* pSignature = nullptr; + + if (pSignatureDesc) + { + RPS_V_RETURN(RenderGraphSignature::Create(m_arena, pSignatureDesc, &pSignature)); + m_pSignature = pSignature; + m_nodeImpls = m_arena.NewArray(pSignature->GetNodeDecls().size()); + + auto pRuntimeDevice = RuntimeDevice::Get(m_device); + if (pRuntimeDevice) + { + // TODO: Only bind built-in at top level + auto builtInNodes = pRuntimeDevice->GetBuiltInNodes(); + for (auto& nodeInfo : builtInNodes) + { + auto nodeDeclId = m_pSignature->FindNodeDeclIndexByName(nodeInfo.name); + if (nodeDeclId != RPS_INDEX_NONE_U32) + { + RPS_V_RETURN(Bind(nodeDeclId, nodeInfo.callbackInfo)); + } + } + } + } + + return RPS_OK; + } + +} // namespace rps + +RpsResult rpsProgramCreate(RpsDevice hDevice, const RpsProgramCreateInfo* pCreateInfo, RpsSubprogram* phRpslInstance) +{ + RPS_CHECK_ARGS(hDevice); + RPS_CHECK_ARGS(pCreateInfo); + RPS_CHECK_ARGS(phRpslInstance); + + auto pDevice = rps::FromHandle(hDevice); + + return rps::Subprogram::Create(*pDevice, pCreateInfo, rps::FromHandle(phRpslInstance)); +} + +void rpsProgramDestroy(RpsSubprogram hRpslInstance) +{ + if (hRpslInstance) + { + rps::FromHandle(hRpslInstance)->Destroy(); + } +} + +RpsResult rpsProgramBindNodeCallback(RpsSubprogram hRpslInstance, const char* name, const RpsCmdCallback* pCallback) +{ + RPS_CHECK_ARGS(hRpslInstance); + + return rps::FromHandle(hRpslInstance)->Bind(name, pCallback ? *pCallback : RpsCmdCallback{}); +} + +RpsResult rpsProgramBindNodeSubprogram(RpsSubprogram hRpslInstance, const char* name, RpsSubprogram hSubprogram) +{ + RPS_CHECK_ARGS(hRpslInstance); + + return rps::FromHandle(hRpslInstance)->Bind(name, rps::FromHandle(hSubprogram)); +} diff --git a/src/runtime/common/rps_subprogram.hpp b/src/runtime/common/rps_subprogram.hpp new file mode 100644 index 0000000..482d012 --- /dev/null +++ b/src/runtime/common/rps_subprogram.hpp @@ -0,0 +1,225 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_SUBPROGRAM_HPP +#define RPS_SUBPROGRAM_HPP + +#include "runtime/common/rps_render_graph_signature.hpp" + +namespace rps +{ + class RenderGraph; + class ProgramInstance; + class RpslHost; + + struct RpslEntry + { + const char* name; + PFN_RpslEntry pfnEntry; + const RpsParameterDesc* pParamDescs; + const RpsNodeDesc* pNodeDecls; + uint32_t numParams; + uint32_t numNodeDecls; + }; + + RPS_ASSOCIATE_HANDLE(RpslEntry); + + class Subprogram + { + RPS_CLASS_NO_COPY_MOVE(Subprogram); + + public: + struct RpslNodeImpl + { + enum class Type + { + Unknown, + RpslEntry, + Callback, + }; + + union + { + Subprogram* pSubprogram; + RpsCmdCallback callback; + }; + + void* pBuffer = nullptr; + uint32_t bufferSize = 0; + + Type type = Type::Unknown; + + RpslNodeImpl() = default; + + void Set(const RpsCmdCallback& inCallback) + { + callback = inCallback; + type = Type::Callback; + } + + void Set(Subprogram* pSubprogramIn) + { + pSubprogram = pSubprogramIn; + type = Type::RpslEntry; + } + + private: + RPS_CLASS_NO_COPY_MOVE(RpslNodeImpl); + }; + + private: + Subprogram(const Device& device, const RpslEntry* pRpslEntry, const RpsCmdCallback& defaultCmdCallback) + : m_device(device) + , m_arena(device.Allocator()) + , m_pEntry(pRpslEntry) + { + m_defaultNodeImpl.Set(defaultCmdCallback); + } + + ~Subprogram() + { + } + + public: + static RpsResult Create(const Device& device, const RpsProgramCreateInfo* pCreateInfo, Subprogram** ppInstance); + + void Destroy() + { + const Device* pDevice = &m_device; + this->~Subprogram(); + rps::Free(pDevice->Allocator(), this); + } + + Arena& GetArena() + { + return m_arena; + } + + const RpslEntry* GetEntry() const + { + return m_pEntry; + } + + const RenderGraphSignature* GetSignature() const + { + return m_pSignature; + } + + const RpslNodeImpl& GetNodeImpl(uint32_t localNodeDeclId) const + { + return m_nodeImpls[localNodeDeclId]; + } + + const RpsCmdCallback& GetDefaultNodeCallback() const + { + return m_defaultNodeImpl.callback; + } + + RpsResult BindDefaultCallback(const RpsCmdCallback& callback) + { + m_defaultNodeImpl.callback = callback; + return RPS_OK; + } + + RpsResult Bind(const StrRef name, Subprogram* pRpslEntry) + { + const uint32_t nodeDeclId = m_pSignature->FindNodeDeclIndexByName(name); + return Bind(nodeDeclId, pRpslEntry); + } + + RpsResult Bind(StrRef name, const RpsCmdCallback& callback) + { + if (name.empty()) + { + return BindDefaultCallback(callback); + } + else + { + const uint32_t nodeDeclId = m_pSignature->FindNodeDeclIndexByName(name); + RPS_RETURN_ERROR_IF(nodeDeclId == RPS_INDEX_NONE_U32, RPS_ERROR_UNKNOWN_NODE); + + return Bind(nodeDeclId, callback); + } + } + + RpsResult Bind(uint32_t nodeDeclId, Subprogram* pRpslEntry) + { + RPS_CHECK_ARGS(nodeDeclId < m_nodeImpls.size()); + + m_nodeImpls[nodeDeclId].Set(pRpslEntry); + + return RPS_OK; + } + + RpsResult Bind(uint32_t nodeDeclId, const RpsCmdCallback& callback) + { + RPS_CHECK_ARGS(nodeDeclId < m_nodeImpls.size()); + + m_nodeImpls[nodeDeclId].Set(callback); + + return RPS_OK; + } + + RpsResult BindDeferred(StrRef name, size_t contextSize, RpsCmdCallback** ppCallback) + { + if (name.empty()) + { + RPS_V_RETURN(InitNodeImplForContext(m_defaultNodeImpl, contextSize)); + *ppCallback = &m_defaultNodeImpl.callback; + return RPS_OK; + } + + uint32_t nodeDeclId = m_pSignature->FindNodeDeclIndexByName(name); + return BindDeferred(nodeDeclId, contextSize, ppCallback); + } + + RpsResult BindDeferred(uint32_t nodeDeclId, size_t contextSize, RpsCmdCallback** ppCallback) + { + RPS_CHECK_ARGS(ppCallback != nullptr); + RPS_CHECK_ARGS(nodeDeclId < m_nodeImpls.size()); + + auto& nodeImpl = m_nodeImpls[nodeDeclId]; + RPS_V_RETURN(InitNodeImplForContext(nodeImpl, contextSize)); + + *ppCallback = &nodeImpl.callback; + + return RPS_OK; + } + + private: + RpsResult InitNodeImplForContext(Subprogram::RpslNodeImpl& nodeImpl, size_t contextSize) + { + if ((nodeImpl.pBuffer == nullptr) || (nodeImpl.bufferSize < contextSize)) + { + nodeImpl.bufferSize = uint32_t(contextSize); + nodeImpl.pBuffer = m_arena.Alloc(contextSize); + RPS_CHECK_ALLOC(nodeImpl.pBuffer); + } + + nodeImpl.Set(RpsCmdCallback{PFN_rpsCmdCallback(nullptr), nodeImpl.pBuffer, RPS_CMD_CALLBACK_FLAG_NONE}); + + return RPS_OK; + } + + private: + RpsResult Init(const RpsProgramCreateInfo* pCreateInfo); + + private: + const Device& m_device; + Arena m_arena; + const RenderGraphSignature* m_pSignature = nullptr; + const RpslEntry* const m_pEntry; + + ArrayRef m_nodeImpls; + RpslNodeImpl m_defaultNodeImpl; + }; + + RPS_ASSOCIATE_HANDLE(Subprogram); + +} // namespace rps + +#endif //RPS_SUBPROGRAM_HPP diff --git a/src/runtime/d3d11/rps_d3d11_built_in_nodes.cpp b/src/runtime/d3d11/rps_d3d11_built_in_nodes.cpp new file mode 100644 index 0000000..f7cde9b --- /dev/null +++ b/src/runtime/d3d11/rps_d3d11_built_in_nodes.cpp @@ -0,0 +1,426 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "rps/runtime/d3d_common/rps_d3d_common.h" +#include "rps/runtime/common/rps_render_states.h" + +#include "runtime/common/rps_runtime_util.hpp" + +#include "runtime/d3d11/rps_d3d11_runtime_device.hpp" +#include "runtime/d3d11/rps_d3d11_runtime_backend.hpp" +#include "runtime/d3d11/rps_d3d11_util.hpp" + +namespace rps +{ + + static constexpr bool NoRegions = false; + static constexpr bool WithRegions = true; + + // template + // graphics node clear_color_regions( [writeonly(clear)] texture t, float4 data, uint numRects, int4 rects[MaxRects] ); + // template + // graphics node clear_depth_stencil_regions( [writeonly(clear)] texture t, RPS_CLEAR_FLAGS option, float d, uint s, uint numRects, int4 rects[MaxRects] ); + // template + // compute node clear_texture_regions( [writeonly(clear)] texture t, uint4 data, uint numRects, int4 rects[MaxRects] ); + + // graphics node clear_color ( [writeonly(clear)] texture t, float4 data ); + // graphics node clear_depth_stencil ( [writeonly(clear)] texture t, RPS_CLEAR_FLAGS option, float d, uint s ); + // compute node clear_texture ( [writeonly(clear)] texture t, uint4 data ); + // copy node clear_buffer ( [writeonly(clear)] buffer b, uint4 data ); + // copy node copy_texture ( [writeonly(copy)] texture dst, uint3 dstOffset, [readonly(copy)] texture src, uint3 srcOffset, uint3 extent ); + // copy node copy_buffer ( [writeonly(copy)] buffer dst, uint64_t dstOffset, [readonly(copy)] buffer src, uint64_t srcOffset, uint64_t size ); + // copy node copy_texture_to_buffer ( [writeonly(copy)] buffer dst, uint64_t dstByteOffset, uint rowPitch, uint3 bufferImageSize, uint3 dstOffset, [readonly(copy)] texture src, uint3 srcOffset, uint3 extent ); + // copy node copy_buffer_to_texture ( [writeonly(copy)] texture dst, uint3 dstOffset, [readonly(copy)] buffer src, uint64_t srcByteOffset, uint rowPitch, uint3 bufferImageSize, uint3 srcOffset, uint3 extent ); + // graphics node resolve ( [writeonly(resolve)] texture dst, uint2 dstOffset, [readonly(resolve)] texture src, uint2 srcOffset, uint2 extent, RPS_RESOLVE_MODE resolveMode ); + + template + void D3D11BuiltInClearColorImpl(const RpsCmdCallbackContext* pContext) + { + } + + void D3D11BuiltInClearColorRegions(const RpsCmdCallbackContext* pContext) + { + ID3D11DeviceContext* pD3DDC = rpsD3D11DeviceContextFromHandle(pContext->hCommandBuffer); + ScopedComPtr pD3DDC1; + + // TODO + HRESULT hr = pD3DDC->QueryInterface(pD3DDC1.ReleaseAndGetAddressOf()); + + RPS_ASSERT(pContext->numArgs == 4); + + ID3D11RenderTargetView* pRTV; + RPS_V_REPORT_AND_RETURN(pContext, rpsD3D11GetCmdArgRTV(pContext, 0, &pRTV)); + + auto pClearValue = rpsCmdGetArg(pContext); + uint32_t numRects = *rpsCmdGetArg(pContext); + const D3D11_RECT* pRects = rpsCmdGetArg(pContext); + + static_assert(sizeof(RpsRect) == sizeof(D3D11_RECT), + "Assumption 'sizeof(RpsRect) == sizeof(D3D11_RECT)' is no longer true."); + + pD3DDC1->ClearView(pRTV, pClearValue->color.float32, pRects, numRects); + } + + void D3D11BuiltInClearColor(const RpsCmdCallbackContext* pContext) + { + ID3D11DeviceContext* pD3DDC = rpsD3D11DeviceContextFromHandle(pContext->hCommandBuffer); + + RPS_ASSERT(pContext->numArgs == 2); + + ID3D11RenderTargetView* pRTV; + rpsD3D11GetCmdArgRTV(pContext, 0, &pRTV); + + auto pClearValue = rpsCmdGetArg(pContext); + + static_assert(sizeof(RpsRect) == sizeof(D3D11_RECT), + "Assumption 'sizeof(RpsRect) == sizeof(D3D11_RECT)' is no longer true."); + + pD3DDC->ClearRenderTargetView(pRTV, pClearValue->color.float32); + } + + void D3D11BuiltInClearDepthStencil(const RpsCmdCallbackContext* pContext) + { + ID3D11DeviceContext* pD3DDC = rpsD3D11DeviceContextFromHandle(pContext->hCommandBuffer); + + RPS_ASSERT(pContext->numArgs == 4); + + auto pClearFlags = rpsCmdGetArg(pContext); + auto pDepthValue = rpsCmdGetArg(pContext); + auto pStencilValue = rpsCmdGetArg(pContext); + + uint32_t numRects = 0; + const D3D11_RECT* pRects = nullptr; + + static_assert(sizeof(RpsRect) == sizeof(D3D11_RECT), + "Assumption 'sizeof(RpsRect) == sizeof(D3D11_RECT)' is no longer true."); + + ID3D11DepthStencilView* pDSV; + RPS_V_REPORT_AND_RETURN(pContext, rpsD3D11GetCmdArgDSV(pContext, 0, &pDSV)); + + const uint32_t d3dClearFlags = (((*pClearFlags) & RPS_CLEAR_FLAG_DEPTH) ? D3D11_CLEAR_DEPTH : 0) | + (((*pClearFlags) & RPS_CLEAR_FLAG_STENCIL) ? D3D11_CLEAR_STENCIL : 0); + + pD3DDC->ClearDepthStencilView(pDSV, d3dClearFlags, *pDepthValue, *pStencilValue); + } + + void D3D11BuiltInClearDepthStencilRegions(const RpsCmdCallbackContext* pContext) + { + ID3D11DeviceContext* pD3DDC = rpsD3D11DeviceContextFromHandle(pContext->hCommandBuffer); + ScopedComPtr pD3DDC1; + + // TODO + HRESULT hr = pD3DDC->QueryInterface(pD3DDC1.ReleaseAndGetAddressOf()); + + RPS_ASSERT(pContext->numArgs == 6); + + auto pClearFlags = rpsCmdGetArg(pContext); + auto pDepthValue = rpsCmdGetArg(pContext); + auto pStencilValue = rpsCmdGetArg(pContext); + uint32_t numRects = *rpsCmdGetArg(pContext); + const D3D11_RECT* pRects = rpsCmdGetArg(pContext); + + static_assert(sizeof(RpsRect) == sizeof(D3D11_RECT), + "Assumption 'sizeof(RpsRect) == sizeof(D3D11_RECT)' is no longer true."); + + ID3D11DepthStencilView* pDSV; + RPS_V_REPORT_AND_RETURN(pContext, rpsD3D11GetCmdArgDSV(pContext, 0, &pDSV)); + + D3D11_DEPTH_STENCIL_VIEW_DESC dsvDesc = {}; + pDSV->GetDesc(&dsvDesc); + + if (((*pClearFlags) & RPS_CLEAR_FLAG_STENCIL) || (dsvDesc.Format == DXGI_FORMAT_D32_FLOAT_S8X24_UINT) || + (dsvDesc.Format == DXGI_FORMAT_D24_UNORM_S8_UINT)) + { + RPS_V_REPORT_AND_RETURN(pContext, RPS_ERROR_NOT_SUPPORTED); + } + + float depth = *pDepthValue; + float clearValues[4] = {depth, depth, depth, depth}; + + pD3DDC1->ClearView(pDSV, clearValues, pRects, numRects); + } + + void D3D11BuiltInClearTextureUAV(const RpsCmdCallbackContext* pContext) + { + RPS_TODO(); + } + void D3D11BuiltInClearTextureUAVRegions(const RpsCmdCallbackContext* pContext) + { + RPS_TODO(); + } + void D3D11BuiltInClearBufferUAV(const RpsCmdCallbackContext* pContext) + { + RPS_TODO(); + } + + // copy node copy_texture ( [writeonly(copy)] texture dst, uint3 dstOffset, [readonly(copy)] texture src, uint3 srcOffset, uint3 extent ); + void D3D11BuiltInCopyTexture(const RpsCmdCallbackContext* pContext) + { + ID3D11DeviceContext* pD3DDC = rpsD3D11DeviceContextFromHandle(pContext->hCommandBuffer); + + auto* pBackend = rps::D3D11RuntimeBackend::Get(pContext); + + auto* pRuntimeDevice = D3D11RuntimeDevice::Get(pBackend->GetRenderGraph().GetDevice()); + + RPS_ASSERT(pContext->numArgs == 5); + + const ResourceInstance *pDstResource, *pSrcResource; + RPS_V_REPORT_AND_RETURN(pContext, + rps::D3D11RuntimeBackend::GetCmdArgResourceInfos(pContext, 0, 0, &pDstResource, 1)); + RPS_V_REPORT_AND_RETURN(pContext, + rps::D3D11RuntimeBackend::GetCmdArgResourceInfos(pContext, 2, 0, &pSrcResource, 1)); + + auto pDstView = rpsCmdGetArg(pContext); + auto pDstOffset = *rpsCmdGetArg(pContext); + auto pSrcView = rpsCmdGetArg(pContext); + auto pSrcOffset = *rpsCmdGetArg(pContext); + auto pExtent = *rpsCmdGetArg(pContext); + + ID3D11Resource* pDstD3DResource = rpsD3D11ResourceFromHandle(pDstResource->hRuntimeResource); + ID3D11Resource* pSrcD3DResource = rpsD3D11ResourceFromHandle(pSrcResource->hRuntimeResource); + + RpsFormat srcFmt = (pSrcView->base.viewFormat != RPS_FORMAT_UNKNOWN) ? pSrcView->base.viewFormat + : pSrcResource->desc.image.format; + RpsFormat dstFmt = (pDstView->base.viewFormat != RPS_FORMAT_UNKNOWN) ? pDstView->base.viewFormat + : pDstResource->desc.image.format; + + uint32_t srcMipDim[3] = { + GetMipLevelDimension(pSrcResource->desc.image.width, pSrcView->subresourceRange.baseMipLevel), + GetMipLevelDimension(pSrcResource->desc.image.height, pSrcView->subresourceRange.baseMipLevel), + GetMipLevelDimension(pSrcResource->desc.GetImageDepth(), pSrcView->subresourceRange.baseMipLevel), + }; + + D3D11_BOX box; + box.left = pSrcOffset[0]; + box.top = pSrcOffset[1]; + box.front = pSrcOffset[2]; + box.right = (pExtent[0] == UINT32_MAX) ? srcMipDim[0] : (pSrcOffset[0] + pExtent[0]); + box.bottom = (pExtent[1] == UINT32_MAX) ? srcMipDim[1] : (pSrcOffset[1] + pExtent[1]); + box.back = (pExtent[2] == UINT32_MAX) ? srcMipDim[2] : (pSrcOffset[2] + pExtent[2]); + + uint32_t dstOffset[3] = {pDstOffset[0], pDstOffset[1], pDstOffset[2]}; + + const bool isFullSubresource = + (pSrcOffset[0] == 0) && (pSrcOffset[1] == 0) && (pSrcOffset[2] == 0) && + ((box.right == srcMipDim[0]) && (box.bottom == srcMipDim[1]) && (box.back == srcMipDim[2])); + + const uint32_t mipLevels = rpsMin(pSrcView->subresourceRange.mipLevels, pDstView->subresourceRange.mipLevels); + + const uint32_t arrayLayers = + rpsMin(pSrcView->subresourceRange.arrayLayers, pDstView->subresourceRange.arrayLayers); + + for (uint32_t iMip = 0; iMip < mipLevels; iMip++) + { + const uint32_t srcMip = pSrcView->subresourceRange.baseMipLevel + iMip; + const uint32_t dstMip = pDstView->subresourceRange.baseMipLevel + iMip; + + for (uint32_t iArrayLayer = 0; iArrayLayer < arrayLayers; iArrayLayer++) + { + uint32_t srcSubresourceIndex = + D3D11CalcSubresource(srcMip, + pSrcView->subresourceRange.baseArrayLayer + iArrayLayer, + pSrcResource->desc.image.mipLevels); + uint32_t dstSubresourceIndex = + D3D11CalcSubresource(dstMip, + pDstView->subresourceRange.baseArrayLayer + iArrayLayer, + pDstResource->desc.image.mipLevels); + + pD3DDC->CopySubresourceRegion(pDstD3DResource, + dstSubresourceIndex, + dstOffset[0], + dstOffset[1], + dstOffset[2], + pSrcD3DResource, + srcSubresourceIndex, + &box); + } + + box.left = box.left >> 1; + box.right = box.right >> 1; + box.front = box.front >> 1; + box.right = box.right >> 1; + box.bottom = box.bottom >> 1; + box.back = box.back >> 1; + + dstOffset[0] = dstOffset[0] >> 1; + dstOffset[1] = dstOffset[1] >> 1; + dstOffset[2] = dstOffset[2] >> 1; + } + } + + static constexpr bool TextureToBuffer = true; + static constexpr bool BufferToTexture = false; + + void D3D11BuiltInCopyBuffer(const RpsCmdCallbackContext* pContext) + { + ID3D11DeviceContext* pD3DDC = rpsD3D11DeviceContextFromHandle(pContext->hCommandBuffer); + + auto* pBackend = rps::D3D11RuntimeBackend::Get(pContext); + auto* pRuntimeDevice = D3D11RuntimeDevice::Get(pBackend->GetRenderGraph().GetDevice()); + + const ResourceInstance *pDstResource, *pSrcResource; + RPS_V_REPORT_AND_RETURN(pContext, + rps::D3D11RuntimeBackend::GetCmdArgResourceInfos(pContext, 0, 0, &pDstResource, 1)); + RPS_V_REPORT_AND_RETURN(pContext, + rps::D3D11RuntimeBackend::GetCmdArgResourceInfos(pContext, 2, 0, &pSrcResource, 1)); + + const auto* pDstView = rpsCmdGetArg(pContext); + uint64_t dstOffset = *rpsCmdGetArg(pContext); + const auto* pSrcView = rpsCmdGetArg(pContext); + uint64_t srcOffset = *rpsCmdGetArg(pContext); + uint64_t copySize = *rpsCmdGetArg(pContext); + + if ((srcOffset > UINT32_MAX) || (dstOffset > UINT32_MAX)) + { + RPS_V_REPORT_AND_RETURN(pContext, RPS_ERROR_NOT_SUPPORTED); + } + + const uint64_t dstTotalSize = pDstResource->desc.GetBufferSize(); + const uint64_t srcTotalSize = pSrcResource->desc.GetBufferSize(); + + if ((dstOffset == 0) && (srcOffset == 0) && (dstTotalSize == srcTotalSize) && + ((copySize == UINT64_MAX) || (copySize == srcTotalSize))) + { + pD3DDC->CopyResource(rpsD3D11ResourceFromHandle(pDstResource->hRuntimeResource), + rpsD3D11ResourceFromHandle(pSrcResource->hRuntimeResource)); + } + else + { + D3D11_BOX box = {}; + box.left = UINT(srcOffset); + box.top = 0; + box.front = 0; + box.right = (copySize == UINT64_MAX) ? uint32_t(srcTotalSize - srcOffset) : uint32_t(copySize); + box.bottom = 1; + box.back = 1; + + pD3DDC->CopySubresourceRegion(rpsD3D11ResourceFromHandle(pDstResource->hRuntimeResource), + 0, + UINT(dstOffset), + 1, + 1, + rpsD3D11ResourceFromHandle(pSrcResource->hRuntimeResource), + 0, + &box); + } + } + + // copy node copy_texture_to_buffer ( [writeonly(copy)] buffer dst, uint64_t dstByteOffset, uint rowPitch, uint3 bufferImageSize, uint3 dstOffset, [readonly(copy)] texture src, uint3 srcOffset, uint3 extent ); + // copy node copy_buffer_to_texture ( [writeonly(copy)] texture dst, uint3 dstOffset, [readonly(copy)] buffer src, uint64_t srcByteOffset, uint rowPitch, uint3 bufferImageSize, uint3 srcOffset, uint3 extent ); + // graphics node resolve ( [writeonly(resolve)] texture dst, uint2 dstOffset, [readonly(resolve)] texture src, uint2 srcOffset, uint2 extent, RPS_RESOLVE_MODE resolveMode ); + void D3D11BuiltInCopyTextureToBuffer(const RpsCmdCallbackContext* pContext) + { + RPS_V_REPORT_AND_RETURN(pContext, RPS_ERROR_NOT_IMPLEMENTED); + } + + void D3D11BuiltInCopyBufferToTexture(const RpsCmdCallbackContext* pContext) + { + RPS_V_REPORT_AND_RETURN(pContext, RPS_ERROR_NOT_IMPLEMENTED); + } + + void D3D11BuiltInResolve(const RpsCmdCallbackContext* pContext) + { + ID3D11DeviceContext* pD3DDC = rpsD3D11DeviceContextFromHandle(pContext->hCommandBuffer); + + auto* pBackend = rps::D3D11RuntimeBackend::Get(pContext); + + auto* pRuntimeDevice = D3D11RuntimeDevice::Get(pBackend->GetRenderGraph().GetDevice()); + + RPS_ASSERT(pContext->numArgs == 6); + + const ResourceInstance *pDstResource, *pSrcResource; + RPS_V_REPORT_AND_RETURN(pContext, + rps::D3D11RuntimeBackend::GetCmdArgResourceInfos(pContext, 0, 0, &pDstResource, 1)); + RPS_V_REPORT_AND_RETURN(pContext, + rps::D3D11RuntimeBackend::GetCmdArgResourceInfos(pContext, 2, 0, &pSrcResource, 1)); + + auto pDstView = rpsCmdGetArg(pContext); + auto pDstOffset = *rpsCmdGetArg(pContext); + auto pSrcView = rpsCmdGetArg(pContext); + auto pSrcOffset = *rpsCmdGetArg(pContext); + auto pExtent = *rpsCmdGetArg(pContext); + auto resolveMode = *rpsCmdGetArg(pContext); + + RPS_ASSERT(!pDstResource->desc.IsBuffer()); + RPS_ASSERT(!pSrcResource->desc.IsBuffer()); + + // TODO: Precalculate these flags at shader loading time. + const bool isFullSubresource = ((pDstOffset[0] == 0) && (pDstOffset[1] == 0)) && + ((pSrcOffset[0] == 0) && (pSrcOffset[1] == 0)) && + ((pExtent[0] == UINT32_MAX) && (pExtent[1] == UINT32_MAX)); + + if (!isFullSubresource || (resolveMode != RPS_RESOLVE_MODE_AVERAGE)) + { + RPS_V_REPORT_AND_RETURN(pContext, RPS_ERROR_NOT_SUPPORTED); + } + + if (pSrcResource->desc.GetSampleCount() < pDstResource->desc.GetSampleCount()) + { + rpsCmdCallbackReportError(pContext, RPS_ERROR_INVALID_OPERATION); + return; + } + + RpsFormat srcFmt = (pSrcView->base.viewFormat != RPS_FORMAT_UNKNOWN) ? pSrcView->base.viewFormat + : pSrcResource->desc.image.format; + RpsFormat dstFmt = (pDstView->base.viewFormat != RPS_FORMAT_UNKNOWN) ? pDstView->base.viewFormat + : pDstResource->desc.image.format; + + uint32_t srcAspectMask = pRuntimeDevice->GetFormatPlaneMask(srcFmt); + uint32_t dstAspectMask = pRuntimeDevice->GetFormatPlaneMask(dstFmt); + + const uint32_t mipLevels = + (pSrcResource->desc.GetSampleCount() > 1) + ? 1 + : rpsMin(pDstView->subresourceRange.mipLevels, pDstView->subresourceRange.mipLevels); + const uint32_t arrayLayers = + rpsMin(pDstView->subresourceRange.arrayLayers, pSrcView->subresourceRange.arrayLayers); + + while ((srcAspectMask != 0) && (dstAspectMask != 0)) + { + const uint32_t srcPlane = (srcAspectMask & 1u) ? 0 : 1; + srcAspectMask &= ~(1 << srcPlane); + const uint32_t dstPlane = (dstAspectMask & 1u) ? 0 : 1; + dstAspectMask &= ~(1 << dstPlane); + + for (uint32_t iMip = 0; iMip < mipLevels; iMip++) + { + const uint32_t srcMip = pSrcView->subresourceRange.baseMipLevel + iMip; + const uint32_t dstMip = pDstView->subresourceRange.baseMipLevel + iMip; + + D3D11_RECT srcRect; + srcRect.left = pSrcOffset[0] >> iMip; + srcRect.top = pSrcOffset[1] >> iMip; + srcRect.right = rpsMax(1u, (pSrcOffset[0] + pExtent[0]) >> iMip); + srcRect.bottom = rpsMax(1u, (pSrcOffset[1] + pExtent[1]) >> iMip); + + UINT dstOffset[2] = { + (pDstOffset[0] >> iMip), + (pDstOffset[1] >> iMip), + }; + + for (uint32_t iArrayLayer = 0; iArrayLayer < arrayLayers; iArrayLayer++) + { + const uint32_t srcSubresourceIndex = + D3D11CalcSubresource(srcMip, + pSrcView->subresourceRange.baseArrayLayer + iArrayLayer, + pSrcResource->desc.image.mipLevels); + + const uint32_t dstSubresourceIndex = + D3D11CalcSubresource(dstMip, + pDstView->subresourceRange.baseArrayLayer + iArrayLayer, + pDstResource->desc.image.mipLevels); + + pD3DDC->ResolveSubresource(rpsD3D11ResourceFromHandle(pDstResource->hRuntimeResource), + dstSubresourceIndex, + rpsD3D11ResourceFromHandle(pSrcResource->hRuntimeResource), + srcSubresourceIndex, + rpsFormatToDXGI(dstFmt)); + } + } + } + } +} // namespace rps diff --git a/src/runtime/d3d11/rps_d3d11_runtime_backend.cpp b/src/runtime/d3d11/rps_d3d11_runtime_backend.cpp new file mode 100644 index 0000000..0a7204e --- /dev/null +++ b/src/runtime/d3d11/rps_d3d11_runtime_backend.cpp @@ -0,0 +1,721 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "rps/runtime/d3d11/rps_d3d11_runtime.h" +#include "rps/runtime/common/rps_render_states.h" + +#include "runtime/common/rps_render_graph.hpp" +#include "runtime/common/rps_runtime_util.hpp" +#include "runtime/d3d11/rps_d3d11_runtime_backend.hpp" +#include "runtime/d3d11/rps_d3d11_runtime_device.hpp" +#include "runtime/d3d11/rps_d3d11_util.hpp" + +namespace rps +{ + RpsResult D3D11RuntimeBackend::CreateCommandResources(const RenderGraphUpdateContext& context) + { + const auto& graph = context.renderGraph.GetGraph(); + auto& runtimeCmds = context.renderGraph.GetRuntimeCmdInfos(); + const auto cmdInfos = context.renderGraph.GetCmdInfos().range_all(); + + ArenaCheckPoint arenaCheckpoint{context.scratchArena}; + + uint32_t numGraphicsCmds = 0; + + for (uint32_t iCmd = 0, numCmds = uint32_t(runtimeCmds.size()); iCmd < numCmds; iCmd++) + { + const auto& runtimeCmd = runtimeCmds[iCmd]; + + if (runtimeCmd.isTransition) + { + continue; + } + + auto pNewRuntimeCmd = m_runtimeCmds.grow(1); + + pNewRuntimeCmd->cmdId = runtimeCmd.cmdId; + pNewRuntimeCmd->resourceBindingInfo = RPS_INDEX_NONE_U32; + } + + // Create Views + + ArenaVector srvs(&context.scratchArena); + ArenaVector uavs(&context.scratchArena); + ArenaVector rtvs(&context.scratchArena); + ArenaVector dsvs(&context.scratchArena); + + srvs.reserve(context.renderGraph.GetCmdAccessInfos().size()); + uavs.reserve(context.renderGraph.GetCmdAccessInfos().size()); + rtvs.reserve(context.renderGraph.GetCmdAccessInfos().size()); + + const auto& cmdAccesses = context.renderGraph.GetCmdAccessInfos(); + + for (auto& runtimeCmd : m_runtimeCmds) + { + if (runtimeCmd.cmdId == RPS_CMD_ID_INVALID) + continue; + + auto& cmdInfo = cmdInfos[runtimeCmd.cmdId]; + auto& nodeDeclInfo = *cmdInfo.pNodeDecl; + + const uint32_t accessOffset = cmdInfo.accesses.GetBegin(); + + for (uint32_t accessIdx = 0, accessCount = cmdInfo.accesses.size(); accessIdx < accessCount; accessIdx++) + { + const uint32_t globalAccessIdx = accessOffset + accessIdx; + auto& access = cmdAccesses[globalAccessIdx]; + + if (!rpsAnyBitsSet(access.access.accessFlags, RPS_ACCESS_NO_VIEW_BIT)) + { + if (rpsAnyBitsSet(access.access.accessFlags, RPS_ACCESS_SHADER_RESOURCE_BIT)) + { + srvs.push_back(globalAccessIdx); + } + else if (rpsAnyBitsSet(access.access.accessFlags, RPS_ACCESS_UNORDERED_ACCESS_BIT)) + { + uavs.push_back(globalAccessIdx); + } + else if (rpsAnyBitsSet(access.access.accessFlags, RPS_ACCESS_RENDER_TARGET_BIT)) + { + rtvs.push_back(globalAccessIdx); + } + else if (rpsAnyBitsSet(access.access.accessFlags, RPS_ACCESS_DEPTH_STENCIL)) + { + dsvs.push_back(globalAccessIdx); + } + } + } + } + + m_views.resize(cmdAccesses.size(), nullptr); + + RPS_V_RETURN(CreateResourceViews(context, ViewType::SRV, srvs.range_all())); + RPS_V_RETURN(CreateResourceViews(context, ViewType::UAV, uavs.range_all())); + RPS_V_RETURN(CreateResourceViews(context, ViewType::RTV, rtvs.range_all())); + RPS_V_RETURN(CreateResourceViews(context, ViewType::DSV, dsvs.range_all())); + + // TODO: Multi queue + auto& cmdBatches = context.renderGraph.GetCmdBatches(); + if (!m_runtimeCmds.empty()) + { + cmdBatches.resize(1, CommandBatch{}); + cmdBatches[0].cmdBegin = 0; + cmdBatches[0].numCmds = uint32_t(m_runtimeCmds.size()); + } + + return RPS_OK; + } + + void D3D11RuntimeBackend::OnDestroy() + { + for (auto& frameResource : m_frameResources) + { + frameResource.DestroyDeviceResources(); + } + + m_frameResources.clear(); + + std::for_each(m_views.begin(), m_views.end(), [](auto pView) { SafeRelease(pView); }); + + m_views.clear(); + + RuntimeBackend::OnDestroy(); + } + + RpsResult D3D11RuntimeBackend::UpdateFrame(const RenderGraphUpdateContext& context) + { + m_currentResourceFrame = + m_frameResources.empty() ? 0 : (m_currentResourceFrame + 1) % uint32_t(m_frameResources.size()); + + if (m_frameResources.size() <= GetNumQueuedFrames(context)) + { + RPS_RETURN_ERROR_IF(m_frameResources.size() > RPS_MAX_QUEUED_FRAMES, RPS_ERROR_INVALID_OPERATION); + + RPS_CHECK_ALLOC(m_frameResources.insert(m_currentResourceFrame, FrameResources{})); + m_frameResources[m_currentResourceFrame].Reset(m_persistentPool); + } + else + { + // TODO - Recycle + m_frameResources[m_currentResourceFrame].DestroyDeviceResources(); + std::swap(m_pendingReleaseResources, m_frameResources[m_currentResourceFrame].pendingResources); + } + + // TODO + auto& pendingRes = m_frameResources[m_currentResourceFrame].pendingResources; + pendingRes.reserve(pendingRes.size() + m_views.size()); + std::for_each(m_views.begin(), m_views.end(), [&](auto pView) { + if (pView) + { + pendingRes.push_back(pView); + } + }); + + // TODO: + m_runtimeCmds.reset(&context.frameArena); + m_views.clear(); + + return RPS_OK; + } + + RpsResult D3D11RuntimeBackend::CreateHeaps(const RenderGraphUpdateContext& context, ArrayRef heaps) + { + return RPS_OK; + } + + void D3D11RuntimeBackend::DestroyHeaps(ArrayRef heaps) + { + } + + RpsResult D3D11RuntimeBackend::CreateResources(const RenderGraphUpdateContext& context, + ArrayRef resInstances) + { + ID3D11Device* pD3DDevice = m_device.GetD3DDevice(); + auto resourceDecls = GetRenderGraph().GetBuilder().GetResourceDecls(); + + const bool bEnableDebugNames = + !!(context.pUpdateInfo->diagnosticFlags & RPS_DIAGNOSTIC_ENABLE_RUNTIME_DEBUG_NAMES); + + uint32_t temporalSlice = RPS_INDEX_NONE_U32; + + // Create resources + for (uint32_t iRes = 0, numRes = uint32_t(resInstances.size()); iRes < numRes; iRes++) + { + auto& resInfo = resInstances[iRes]; + + if (resInfo.isExternal) + { + continue; + } + + temporalSlice = + resInfo.isFirstTemporalSlice ? 0 : (resInfo.isTemporalSlice ? (temporalSlice + 1) : RPS_INDEX_NONE_U32); + + if (resInfo.isPendingCreate) + { + if (resInfo.hRuntimeResource) + { + m_frameResources[m_currentResourceFrame].pendingResources.push_back( + D3D11RuntimeDevice::FromHandle(resInfo.hRuntimeResource)); + resInfo.hRuntimeResource = {}; + } + + RPS_ASSERT(resInfo.allocPlacement.heapId == RPS_INDEX_NONE_U32); + + ID3D11Resource* pD3DRes; + CreateD3D11ResourceDesc(pD3DDevice, resInfo, &pD3DRes); + + resInfo.hRuntimeResource = rpsD3D11ResourceToHandle(pD3DRes); + resInfo.isPendingCreate = false; + + if (bEnableDebugNames) + { + SetResourceDebugName(pD3DRes, resourceDecls[resInfo.resourceDeclId].name, temporalSlice); + } + } + } + + return RPS_OK; + } + + void D3D11RuntimeBackend::DestroyResources(ArrayRef resources) + { + for (auto& resInfo : resources) + { + if (!resInfo.isExternal && resInfo.hRuntimeResource) + { + rpsD3D11ResourceFromHandle(resInfo.hRuntimeResource)->Release(); + } + } + } + + void D3D11RuntimeBackend::SetResourceDebugName(ID3D11DeviceChild* pObject, StrRef name, uint32_t index) + { + if (!pObject || name.empty()) + { + return; + } + + if (index != RPS_INDEX_NONE_U32) + { + char buf[RPS_NAME_MAX_LEN]; + snprintf(buf, RPS_NAME_MAX_LEN, "%s[%u]", name.str, index); + + pObject->SetPrivateData(WKPDID_D3DDebugObjectName, uint32_t(strlen(buf)), buf); + } + else + { + pObject->SetPrivateData(WKPDID_D3DDebugObjectName, uint32_t(name.len), name.str); + } + } + + RpsResult D3D11RuntimeBackend::RecordCommands(const RenderGraph& renderGraph, + const RpsRenderGraphRecordCommandInfo& recordInfo) const + { + RuntimeCmdCallbackContext cmdCbCtx{this, recordInfo}; + + for (auto cmdIter = m_runtimeCmds.cbegin() + recordInfo.cmdBeginIndex, cmdEnd = cmdIter + recordInfo.numCmds; + cmdIter != cmdEnd; + ++cmdIter) + { + RecordCommand(cmdCbCtx, *cmdIter); + } + + return RPS_OK; + } + + void D3D11RuntimeBackend::DestroyRuntimeResourceDeferred(ResourceInstance& resource) + { + if (resource.hRuntimeResource) + { + m_pendingReleaseResources.push_back(D3D11RuntimeDevice::FromHandle(resource.hRuntimeResource)); + resource.hRuntimeResource = {}; + } + } + + RpsResult D3D11RuntimeBackend::RecordCmdRenderPassBegin(const RuntimeCmdCallbackContext& context) const + { + auto& renderGraph = *context.pRenderGraph; + auto& cmd = *context.pCmd; + auto* pCmdInfo = context.pCmdInfo; + const auto& nodeDeclInfo = *pCmdInfo->pNodeDecl; + + RPS_RETURN_ERROR_IF(!nodeDeclInfo.MaybeGraphicsNode(), RPS_ERROR_INVALID_OPERATION); + + auto pD3DDeviceContext = GetD3DDeviceContext(context); + + const bool bBindRenderTargets = !rpsAnyBitsSet(cmd.callback.flags, RPS_CMD_CALLBACK_CUSTOM_RENDER_TARGETS_BIT); + const bool bSetViewportScissors = !rpsAnyBitsSet(cmd.callback.flags, RPS_CMD_CALLBACK_CUSTOM_VIEWPORT_BIT); + + // Need to skip clears if it's render pass resume + const bool bIsRenderPassResuming = rpsAnyBitsSet(context.renderPassFlags, RPS_RUNTIME_RENDER_PASS_RESUMING); + + auto cmdViewRange = m_views.range(pCmdInfo->accesses.GetBegin(), pCmdInfo->accesses.size()); + + uint32_t numRtvs = 0; + + ID3D11RenderTargetView* rtvs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT]; + ID3D11DepthStencilView* dsv = {0}; + + D3D11_RECT d3dScissorRects[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE] = {}; + + if ((nodeDeclInfo.pRenderPassInfo) && !(nodeDeclInfo.pRenderPassInfo->clearOnly) && + (bBindRenderTargets || !bIsRenderPassResuming)) + { + auto& rpInfo = *nodeDeclInfo.pRenderPassInfo; + + auto clearColorValueRefs = rpInfo.GetRenderTargetClearValueRefs(); + uint32_t clearColorValueIndex = 0; + + for (auto& rtParamRef : rpInfo.GetRenderTargetRefs()) + { + auto& paramAccessInfo = nodeDeclInfo.params[rtParamRef.paramId]; + + const uint32_t rtvSlot = paramAccessInfo.baseSemanticIndex + rtParamRef.arrayOffset; + + numRtvs = rpsMax(numRtvs, rtvSlot + 1); + + rtvs[rtvSlot] = static_cast( + cmdViewRange[paramAccessInfo.accessOffset + rtParamRef.arrayOffset]); + + if ((!bIsRenderPassResuming) && (rpInfo.renderTargetClearMask & (1u << rtvSlot))) + { + auto clearValueRef = clearColorValueRefs[clearColorValueIndex]; + + auto pClearColor = static_cast( + cmd.args[clearValueRef.paramId])[clearValueRef.arrayOffset] + .float32; + + // TODO: Sub-rect clear implemented separatedly + pD3DDeviceContext->ClearRenderTargetView(rtvs[rtvSlot], pClearColor); + + clearColorValueIndex++; + } + } + + if (rpInfo.depthStencilTargetMask) + { + auto& paramAccessInfo = nodeDeclInfo.params[rpInfo.GetDepthStencilRef()->paramId]; + RPS_ASSERT(paramAccessInfo.numElements == 1); + + dsv = static_cast(cmdViewRange[paramAccessInfo.accessOffset]); + + if ((!bIsRenderPassResuming) && (rpInfo.clearDepth || rpInfo.clearStencil)) + { + float depthClearValue = 0.0f; + uint32_t stencilClearValue = 0; + uint32_t clearFlag = {}; + + if (rpInfo.clearDepth) + { + auto pClearValueRef = rpInfo.GetDepthClearValueRef(); + depthClearValue = static_cast(cmd.args[pClearValueRef->paramId])[0]; + clearFlag |= D3D11_CLEAR_DEPTH; + } + + if (rpInfo.clearStencil) + { + auto pClearValueRef = rpInfo.GetStencilClearValueRef(); + stencilClearValue = static_cast(cmd.args[pClearValueRef->paramId])[0]; + clearFlag |= D3D11_CLEAR_STENCIL; + } + + pD3DDeviceContext->ClearDepthStencilView(dsv, clearFlag, depthClearValue, UINT8(stencilClearValue)); + } + } + + if (bBindRenderTargets && ((numRtvs > 0) || (dsv != nullptr))) + { + pD3DDeviceContext->OMSetRenderTargets(numRtvs, rtvs, dsv); + } + + if (bSetViewportScissors) + { + auto& cmdRPInfo = *pCmdInfo->pRenderPassInfo; + + RPS_STATIC_ASSERT(sizeof(D3D11_VIEWPORT) == sizeof(RpsViewport), + "RpsViewport / D3D12_VIEWPORT size mismatch"); + + pD3DDeviceContext->RSSetViewports( + cmdRPInfo.viewportInfo.numViewports, + reinterpret_cast(cmdRPInfo.viewportInfo.pViewports)); + + RPS_RETURN_ERROR_IF(cmdRPInfo.viewportInfo.numScissorRects > RPS_COUNTOF(d3dScissorRects), + RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + for (uint32_t iScissor = 0; iScissor < cmdRPInfo.viewportInfo.numScissorRects; iScissor++) + { + auto& rect = cmdRPInfo.viewportInfo.pScissorRects[iScissor]; + d3dScissorRects[iScissor] = D3D11_RECT{ + rect.x, + rect.y, + rect.x + rect.width, + rect.y + rect.height, + }; + } + + pD3DDeviceContext->RSSetScissorRects(cmdRPInfo.viewportInfo.numScissorRects, d3dScissorRects); + } + } + + return RPS_OK; + } + + RpsResult D3D11RuntimeBackend::RecordCmdRenderPassEnd(const RuntimeCmdCallbackContext& context) const + { + auto& renderGraph = *context.pRenderGraph; + auto* pCmdInfo = context.pCmdInfo; + auto cmdAccesses = pCmdInfo->accesses.Get(renderGraph.GetCmdAccessInfos()); + auto resInstances = renderGraph.GetResourceInstances().range_all(); + + auto pD3DDeviceContext = GetD3DDeviceContext(context); + + // TODO: Add a pass to split graphics cmd to separated clear / resolve cmds + + const auto& nodeDeclInfo = *pCmdInfo->pNodeDecl; + if ((nodeDeclInfo.pRenderPassInfo) && (nodeDeclInfo.pRenderPassInfo->resolveTargetsMask != 0)) + { + auto rpInfo = *nodeDeclInfo.pRenderPassInfo; + auto resolveDsts = rpInfo.GetResolveTargetRefs(); + auto resolveSrcs = rpInfo.GetRenderTargetRefs(); + + uint32_t srcMask = rpInfo.renderTargetsMask; + uint32_t dstMask = rpInfo.resolveTargetsMask; + uint32_t srcIndex = 0; + uint32_t dstIndex = 0; + + while (dstMask != 0) + { + uint32_t nextRTMask = (1u << rpsFirstBitLow(srcMask)); + srcMask &= ~nextRTMask; + + if (dstMask & nextRTMask) + { + dstMask &= ~nextRTMask; + + auto& dstParamAccessInfo = nodeDeclInfo.params[resolveDsts[dstIndex].paramId]; + auto& dstAccessInfo = cmdAccesses[dstParamAccessInfo.accessOffset]; + auto& dstResInfo = resInstances[dstAccessInfo.resourceId]; + auto pD3DResDst = rpsD3D11ResourceFromHandle(dstResInfo.hRuntimeResource); + + auto& srcParamAccessInfo = nodeDeclInfo.params[resolveSrcs[srcIndex].paramId]; + auto& srcAccessInfo = cmdAccesses[srcParamAccessInfo.accessOffset]; + auto& srcResInfo = resInstances[srcAccessInfo.resourceId]; + auto pD3DResSrc = rpsD3D11ResourceFromHandle(srcResInfo.hRuntimeResource); + + RPS_ASSERT(dstAccessInfo.range.GetNumSubresources() == srcAccessInfo.range.GetNumSubresources()); + RPS_ASSERT(dstAccessInfo.range.aspectMask == 1); + RPS_ASSERT(dstAccessInfo.range.GetMipLevelCount() == 1); + + auto format = rpsFormatToDXGI(dstAccessInfo.viewFormat); + + for (uint32_t iArray = 0; iArray < dstAccessInfo.range.GetArrayLayerCount(); iArray++) + { + uint32_t dstSubRes = D3D11CalcSubresource(dstAccessInfo.range.baseMipLevel, + iArray + dstAccessInfo.range.baseArrayLayer, + dstResInfo.desc.image.mipLevels); + uint32_t srcSubRes = D3D11CalcSubresource(srcAccessInfo.range.baseMipLevel, + iArray + srcAccessInfo.range.baseArrayLayer, + srcResInfo.desc.image.mipLevels); + + pD3DDeviceContext->ResolveSubresource(pD3DResDst, dstSubRes, pD3DResSrc, srcSubRes, format); + } + + dstIndex++; + } + + srcIndex++; + } + } + + return RPS_OK; + } + + RpsResult D3D11RuntimeBackend::RecordCmdFixedFunctionBindingsAndDynamicStates( + const RuntimeCmdCallbackContext& context) const + { + const auto& nodeDeclInfo = *context.pCmdInfo->pNodeDecl; + + auto fixedFuncBindings = nodeDeclInfo.fixedFunctionBindings.Get(nodeDeclInfo.semanticKinds); + auto dynamicStates = nodeDeclInfo.dynamicStates.Get(nodeDeclInfo.semanticKinds); + + for (auto& binding : fixedFuncBindings) + { + auto paramIndices = binding.params.Get(nodeDeclInfo.semanticParamTable); + + switch (binding.semantic) + { + case RPS_SEMANTIC_VERTEX_BUFFER: + break; + case RPS_SEMANTIC_INDEX_BUFFER: + break; + case RPS_SEMANTIC_INDIRECT_ARGS: + break; + case RPS_SEMANTIC_INDIRECT_COUNT: + break; + case RPS_SEMANTIC_STREAM_OUT_BUFFER: + break; + case RPS_SEMANTIC_SHADING_RATE_IMAGE: + break; + case RPS_SEMANTIC_RENDER_TARGET: + case RPS_SEMANTIC_DEPTH_STENCIL_TARGET: + case RPS_SEMANTIC_RESOLVE_TARGET: + default: + break; + } + } + + for (auto& dynamicState : dynamicStates) + { + switch (dynamicState.semantic) + { + case RPS_SEMANTIC_PRIMITIVE_TOPOLOGY: + break; + case RPS_SEMANTIC_PATCH_CONTROL_POINTS: + break; + case RPS_SEMANTIC_PRIMITIVE_STRIP_CUT_INDEX: + break; + case RPS_SEMANTIC_BLEND_FACTOR: + break; + case RPS_SEMANTIC_STENCIL_REF: + break; + case RPS_SEMANTIC_DEPTH_BOUNDS: + break; + case RPS_SEMANTIC_SAMPLE_LOCATION: + break; + case RPS_SEMANTIC_SHADING_RATE: + break; + case RPS_SEMANTIC_COLOR_CLEAR_VALUE: + case RPS_SEMANTIC_DEPTH_CLEAR_VALUE: + case RPS_SEMANTIC_STENCIL_CLEAR_VALUE: + case RPS_SEMANTIC_VIEWPORT: + case RPS_SEMANTIC_SCISSOR: + default: + break; + } + } + + return RPS_OK; + } + + RpsResult D3D11RuntimeBackend::GetCmdArgResources(const RuntimeCmdCallbackContext& context, + uint32_t argIndex, + uint32_t srcArrayIndex, + ID3D11Resource** ppResources, + uint32_t count) const + { + RPS_RETURN_ERROR_IF(argIndex >= context.pNodeDeclInfo->params.size(), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + auto& paramAccessInfo = context.pNodeDeclInfo->params[argIndex]; + RPS_RETURN_ERROR_IF(srcArrayIndex + count > paramAccessInfo.numElements, RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + auto cmdAccessInfos = context.pCmdInfo->accesses.Get(context.pRenderGraph->GetCmdAccessInfos()); + + RPS_ASSERT((paramAccessInfo.accessOffset + paramAccessInfo.numElements) <= cmdAccessInfos.size()); + + for (uint32_t i = 0; i < count; i++) + { + auto& accessInfo = cmdAccessInfos[paramAccessInfo.accessOffset + srcArrayIndex + i]; + ppResources[i] = + (accessInfo.resourceId != RPS_RESOURCE_ID_INVALID) + ? rpsD3D11ResourceFromHandle( + context.pRenderGraph->GetResourceInstance(accessInfo.resourceId).hRuntimeResource) + : nullptr; + } + + return RPS_OK; + } + + RpsResult D3D11RuntimeBackend::GetCmdArgViews(const RuntimeCmdCallbackContext& context, + uint32_t argIndex, + uint32_t srcArrayIndex, + ID3D11View** ppViews, + uint32_t count) const + { + RPS_RETURN_ERROR_IF(argIndex >= context.pNodeDeclInfo->params.size(), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + auto& paramAccessInfo = context.pNodeDeclInfo->params[argIndex]; + RPS_RETURN_ERROR_IF(srcArrayIndex + count > paramAccessInfo.numElements, RPS_ERROR_INDEX_OUT_OF_BOUNDS); + RPS_RETURN_ERROR_IF(paramAccessInfo.access.accessFlags & RPS_ACCESS_NO_VIEW_BIT, RPS_ERROR_INVALID_OPERATION); + + auto cmdViewRange = m_views.range(context.pCmdInfo->accesses.GetBegin(), context.pCmdInfo->accesses.size()); + + RPS_ASSERT((paramAccessInfo.accessOffset + paramAccessInfo.numElements) <= cmdViewRange.size()); + + // Assuming all elements in the same parameter have the same access + for (uint32_t i = 0; i < count; i++) + { + ppViews[i] = cmdViewRange[paramAccessInfo.accessOffset + srcArrayIndex + i]; + } + + return RPS_OK; + } + + RpsResult D3D11RuntimeBackend::GetCmdArgResources(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayIndex, + ID3D11Resource** ppResources, + uint32_t count) + { + RPS_CHECK_ARGS(pContext && ppResources); + auto pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + + return pBackendContext->GetBackend()->GetCmdArgResources( + *pBackendContext, argIndex, srcArrayIndex, ppResources, count); + } + + RpsResult D3D11RuntimeBackend::GetCmdArgViews(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayIndex, + ID3D11View** ppViews, + uint32_t count) + { + RPS_CHECK_ARGS(pContext && ppViews); + + auto pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + + return pBackendContext->GetBackend()->GetCmdArgViews( + *pBackendContext, argIndex, srcArrayIndex, ppViews, count); + } + + const D3D11RuntimeBackend* D3D11RuntimeBackend::Get(const RpsCmdCallbackContext* pContext) + { + auto pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + return pBackendContext->GetBackend(); + } + +} // namespace rps + +RpsResult rpsD3D11GetCmdArgResourceArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + ID3D11Resource** pResources, + uint32_t count) +{ + return rps::D3D11RuntimeBackend::GetCmdArgResources(pContext, argIndex, srcArrayOffset, pResources, count); +} + +RpsResult rpsD3D11GetCmdArgResource(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + ID3D11Resource** pResources) +{ + return rpsD3D11GetCmdArgResourceArray(pContext, argIndex, 0, pResources, 1); +} + +RpsResult rpsD3D11GetCmdArgViewArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + ID3D11View** ppViews, + uint32_t count) +{ + return rps::D3D11RuntimeBackend::GetCmdArgViews(pContext, argIndex, srcArrayOffset, ppViews, count); +} + +RpsResult rpsD3D11GetCmdArgView(const RpsCmdCallbackContext* pContext, uint32_t argIndex, ID3D11View** ppView) +{ + return rps::D3D11RuntimeBackend::GetCmdArgViews(pContext, argIndex, 0, ppView, 1); +} + +RpsResult rpsD3D11GetCmdArgRTVArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + ID3D11RenderTargetView** pRTVs, + uint32_t count) +{ + return rps::D3D11RuntimeBackend::GetCmdArgViews(pContext, argIndex, srcArrayOffset, pRTVs, count); +} + +RpsResult rpsD3D11GetCmdArgRTV(const RpsCmdCallbackContext* pContext, uint32_t argIndex, ID3D11RenderTargetView** pRTV) +{ + return rps::D3D11RuntimeBackend::GetCmdArgViews(pContext, argIndex, 0, pRTV, 1); +} + +RpsResult rpsD3D11GetCmdArgDSVArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + ID3D11DepthStencilView** pDSVs, + uint32_t count) +{ + return rps::D3D11RuntimeBackend::GetCmdArgViews(pContext, argIndex, srcArrayOffset, pDSVs, count); +} + +RpsResult rpsD3D11GetCmdArgDSV(const RpsCmdCallbackContext* pContext, uint32_t argIndex, ID3D11DepthStencilView** pDSV) +{ + return rps::D3D11RuntimeBackend::GetCmdArgViews(pContext, argIndex, 0, pDSV, 1); +} + +RpsResult rpsD3D11GetCmdArgSRVArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + ID3D11ShaderResourceView** pSRVs, + uint32_t count) +{ + return rps::D3D11RuntimeBackend::GetCmdArgViews(pContext, argIndex, srcArrayOffset, pSRVs, count); +} + +RpsResult rpsD3D11GetCmdArgSRV(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + ID3D11ShaderResourceView** pSRV) +{ + return rps::D3D11RuntimeBackend::GetCmdArgViews(pContext, argIndex, 0, pSRV, 1); +} + +RpsResult rpsD3D11GetCmdArgUAVArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + ID3D11UnorderedAccessView** pUAVs, + uint32_t count) +{ + return rps::D3D11RuntimeBackend::GetCmdArgViews(pContext, argIndex, srcArrayOffset, pUAVs, count); +} + +RpsResult rpsD3D11GetCmdArgUAV(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + ID3D11UnorderedAccessView** pUAV) +{ + return rps::D3D11RuntimeBackend::GetCmdArgViews(pContext, argIndex, 0, pUAV, 1); +} diff --git a/src/runtime/d3d11/rps_d3d11_runtime_backend.hpp b/src/runtime/d3d11/rps_d3d11_runtime_backend.hpp new file mode 100644 index 0000000..8c73bf8 --- /dev/null +++ b/src/runtime/d3d11/rps_d3d11_runtime_backend.hpp @@ -0,0 +1,161 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_D3D11_CMD_H_ +#define _RPS_D3D11_CMD_H_ + +#include "runtime/common/rps_render_graph.hpp" +#include "runtime/d3d_common/rps_d3d_common_util.hpp" +#include "runtime/d3d11/rps_d3d11_runtime_device.hpp" + +namespace rps +{ + class D3D11RuntimeBackend : public RuntimeBackend + { + private: + struct D3D11RuntimeCmd : public RuntimeCmd + { + uint32_t resourceBindingInfo; + + D3D11RuntimeCmd() = default; + + D3D11RuntimeCmd(uint32_t inCmdId, uint32_t inResourceBindingInfo) + : RuntimeCmd(inCmdId) + , resourceBindingInfo(inResourceBindingInfo) + { + } + }; + + enum class ViewType + { + RTV, + DSV, + SRV, + UAV, + }; + + public: + D3D11RuntimeBackend(D3D11RuntimeDevice& device, RenderGraph& renderGraph) + : RuntimeBackend(renderGraph) + , m_device(device) + , m_persistentPool(device.GetDevice().Allocator()) + , m_views(&m_persistentPool) + , m_pendingReleaseResources(&m_persistentPool) + , m_frameResources(&m_persistentPool) + { + } + + virtual RpsResult RecordCommands(const RenderGraph& renderGraph, + const RpsRenderGraphRecordCommandInfo& recordInfo) const override final; + + virtual RpsResult RecordCmdRenderPassBegin(const RuntimeCmdCallbackContext& context) const override final; + + virtual RpsResult RecordCmdRenderPassEnd(const RuntimeCmdCallbackContext& context) const override final; + + virtual RpsResult RecordCmdFixedFunctionBindingsAndDynamicStates( + const RuntimeCmdCallbackContext& context) const override final; + + virtual void DestroyRuntimeResourceDeferred(ResourceInstance& resource) override; + + RpsResult GetCmdArgResources(const RuntimeCmdCallbackContext& context, + uint32_t argIndex, + uint32_t srcArrayIndex, + ID3D11Resource** ppResources, + uint32_t count) const; + + RpsResult GetCmdArgViews(const RuntimeCmdCallbackContext& context, + uint32_t argIndex, + uint32_t srcArrayIndex, + ID3D11View** ppViews, + uint32_t count) const; + + static RpsResult GetCmdArgResources(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayIndex, + ID3D11Resource** ppResources, + uint32_t count); + + static RpsResult GetCmdArgViews(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayIndex, + ID3D11View** ppViews, + uint32_t count); + + template + static RpsResult GetCmdArgViews(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayIndex, + TView** ppViews, + uint32_t count) + { + RPS_V_RETURN( + GetCmdArgViews(pContext, argIndex, srcArrayIndex, reinterpret_cast(ppViews), count)); + + for (uint32_t i = 0; i < count; i++) + { + ppViews[i] = static_cast(reinterpret_cast(ppViews)[i]); + } + + return RPS_OK; + } + + static const D3D11RuntimeBackend* Get(const RpsCmdCallbackContext* pContext); + + static ID3D11DeviceContext* GetD3DDeviceContext(const RuntimeCmdCallbackContext& context) + { + return rpsD3D11DeviceContextFromHandle(context.hCommandBuffer); + } + + protected: + virtual RpsResult UpdateFrame(const RenderGraphUpdateContext& context) override final; + virtual RpsResult CreateHeaps(const RenderGraphUpdateContext& context, ArrayRef heaps) override final; + virtual void DestroyHeaps(ArrayRef heaps) override final; + virtual RpsResult CreateResources(const RenderGraphUpdateContext& context, + ArrayRef resources) override final; + virtual void DestroyResources(ArrayRef resources) override final; + virtual RpsResult CreateCommandResources(const RenderGraphUpdateContext& context) override final; + virtual void OnDestroy() override final; + + private: + + void SetResourceDebugName(ID3D11DeviceChild* pObject, StrRef name, uint32_t index); + + RPS_NO_DISCARD + RpsResult CreateResourceViews(const RenderGraphUpdateContext& context, + ViewType viewType, + ConstArrayRef accessIndices); + + private: + D3D11RuntimeDevice& m_device; + Arena m_persistentPool; + + ArenaVector m_runtimeCmds; + ArenaVector m_views; + + struct FrameResources + { + ArenaVector pendingResources; + + void Reset(Arena& arena) + { + pendingResources.reset(&arena); + } + + void DestroyDeviceResources() + { + std::for_each(pendingResources.begin(), pendingResources.end(), [&](auto& i) { i->Release(); }); + pendingResources.clear(); + } + }; + + ArenaVector m_pendingReleaseResources; + ArenaVector m_frameResources; + uint32_t m_currentResourceFrame = 0; + }; +} // namespace rps + +#endif //_RPS_D3D11_CMD_H_ diff --git a/src/runtime/d3d11/rps_d3d11_runtime_backend_views.cpp b/src/runtime/d3d11/rps_d3d11_runtime_backend_views.cpp new file mode 100644 index 0000000..8eea4b8 --- /dev/null +++ b/src/runtime/d3d11/rps_d3d11_runtime_backend_views.cpp @@ -0,0 +1,531 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "runtime/d3d11/rps_d3d11_runtime_backend.hpp" + +#include "runtime/common/rps_runtime_util.hpp" + +namespace rps +{ + + RpsFormat GetD3D11SRVFormat(const CmdAccessInfo& accessInfo) + { + RpsFormat viewFormat = accessInfo.viewFormat; + + switch (viewFormat) + { + case RPS_FORMAT_D32_FLOAT: + viewFormat = RPS_FORMAT_R32_FLOAT; + break; + case RPS_FORMAT_D16_UNORM: + viewFormat = RPS_FORMAT_R16_UNORM; + break; + case RPS_FORMAT_D24_UNORM_S8_UINT: + viewFormat = RPS_FORMAT_R24_UNORM_X8_TYPELESS; + break; + case RPS_FORMAT_D32_FLOAT_S8X24_UINT: + viewFormat = RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS; + break; + default: + break; + } + + return viewFormat; + } + + RpsResult InitD3D11RTVDesc(D3D11_RENDER_TARGET_VIEW_DESC* pRTVDesc, + const CmdAccessInfo& accessInfo, + const ResourceInstance& resource) + { + const auto& resDesc = resource.desc; + + RPS_ASSERT(resDesc.IsImage() || (accessInfo.viewFormat != RPS_FORMAT_UNKNOWN)); + + pRTVDesc->Format = rpsFormatToDXGI(accessInfo.viewFormat); + + RPS_ASSERT(rpsCountBits(accessInfo.range.aspectMask) == 1); + + if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_2D) + { + if (resource.desc.image.arrayLayers <= 1) + { + if (resource.desc.image.sampleCount <= 1) + { + pRTVDesc->ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; + pRTVDesc->Texture2D.MipSlice = accessInfo.range.baseMipLevel; + } + else + { + pRTVDesc->ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2DMS; + } + } + else + { + if (resDesc.image.sampleCount <= 1) + { + pRTVDesc->ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2DARRAY; + pRTVDesc->Texture2DArray.MipSlice = accessInfo.range.baseMipLevel; + pRTVDesc->Texture2DArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pRTVDesc->Texture2DArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + else + { + pRTVDesc->ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2DMSARRAY; + pRTVDesc->Texture2DMSArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pRTVDesc->Texture2DMSArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + } + } + else if (resDesc.type == RPS_RESOURCE_TYPE_IMAGE_3D) + { + //TODO: No WSlice info here. Using full-resource for now + + pRTVDesc->ViewDimension = D3D11_RTV_DIMENSION_TEXTURE3D; + pRTVDesc->Texture3D.MipSlice = accessInfo.range.baseMipLevel; + pRTVDesc->Texture3D.FirstWSlice = 0; + pRTVDesc->Texture3D.WSize = resDesc.image.depth; + } + else if (resDesc.type == RPS_RESOURCE_TYPE_IMAGE_1D) + { + if (resDesc.image.arrayLayers <= 1) + { + pRTVDesc->ViewDimension = D3D11_RTV_DIMENSION_TEXTURE1D; + pRTVDesc->Texture1D.MipSlice = accessInfo.range.baseMipLevel; + } + else + { + pRTVDesc->ViewDimension = D3D11_RTV_DIMENSION_TEXTURE1DARRAY; + pRTVDesc->Texture1DArray.MipSlice = accessInfo.range.baseMipLevel; + pRTVDesc->Texture1DArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pRTVDesc->Texture1DArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + } + else if (resDesc.type == RPS_RESOURCE_TYPE_BUFFER) + { + auto pBufView = reinterpret_cast(accessInfo.pViewInfo); + const uint32_t elementSize = rpsGetFormatElementBytes(accessInfo.viewFormat); + RPS_RETURN_ERROR_IF(elementSize == 0, RPS_ERROR_INVALID_ARGUMENTS); + + const uint64_t bufViewBytes = GetBufferViewBytes(pBufView, resource.desc); + const uint64_t numElements = bufViewBytes / elementSize; + const uint64_t firstElement = pBufView->offset / elementSize; + RPS_RETURN_ERROR_IF(numElements > UINT32_MAX, RPS_ERROR_INTEGER_OVERFLOW); + RPS_RETURN_ERROR_IF(firstElement > UINT32_MAX, RPS_ERROR_INTEGER_OVERFLOW); + + pRTVDesc->Buffer.FirstElement = uint32_t(firstElement); + pRTVDesc->Buffer.NumElements = uint32_t(numElements); + } + else + { + return RPS_ERROR_INVALID_OPERATION; + } + + return RPS_OK; + } + + RpsResult InitD3D11DSVDesc(D3D11_DEPTH_STENCIL_VIEW_DESC* pDSVDesc, + const CmdAccessInfo& accessInfo, + const ResourceInstance& resource) + { + RPS_RETURN_ERROR_IF(!resource.desc.IsImage(), RPS_ERROR_INVALID_OPERATION); + + // TODO: Add actual view Format info to Access. + const RpsFormat viewFormat = accessInfo.viewFormat; + pDSVDesc->Format = rpsFormatToDXGI(viewFormat); + + pDSVDesc->Flags = 0; + + if ((accessInfo.access.accessFlags & RPS_ACCESS_DEPTH_READ_BIT) && + !(accessInfo.access.accessFlags & RPS_ACCESS_DEPTH_WRITE_BIT)) + { + pDSVDesc->Flags |= D3D11_DSV_READ_ONLY_DEPTH; + } + + if (rpsFormatHasStencil(viewFormat) && (accessInfo.access.accessFlags & RPS_ACCESS_STENCIL_READ_BIT) && + !(accessInfo.access.accessFlags & RPS_ACCESS_STENCIL_WRITE_BIT)) + { + pDSVDesc->Flags |= D3D11_DSV_READ_ONLY_STENCIL; + } + + if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_2D) + { + if (resource.desc.image.arrayLayers <= 1) + { + if (resource.desc.image.sampleCount <= 1) + { + pDSVDesc->ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2D; + pDSVDesc->Texture2D.MipSlice = accessInfo.range.baseMipLevel; + } + else + { + pDSVDesc->ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2DMS; + } + } + else + { + if (resource.desc.image.sampleCount <= 1) + { + pDSVDesc->ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2DARRAY; + pDSVDesc->Texture2DArray.MipSlice = accessInfo.range.baseMipLevel; + pDSVDesc->Texture2DArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pDSVDesc->Texture2DArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + else + { + pDSVDesc->ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2DMSARRAY; + pDSVDesc->Texture2DMSArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pDSVDesc->Texture2DMSArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + } + } + else if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_1D) + { + if (resource.desc.image.arrayLayers <= 1) + { + pDSVDesc->ViewDimension = D3D11_DSV_DIMENSION_TEXTURE1D; + pDSVDesc->Texture1D.MipSlice = accessInfo.range.baseMipLevel; + } + else + { + pDSVDesc->ViewDimension = D3D11_DSV_DIMENSION_TEXTURE1DARRAY; + pDSVDesc->Texture1DArray.MipSlice = accessInfo.range.baseMipLevel; + pDSVDesc->Texture1DArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pDSVDesc->Texture1DArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + } + else + { + return RPS_ERROR_INVALID_OPERATION; + } + + return RPS_OK; + } + + RpsResult InitD3D11SRVDesc(D3D11RuntimeDevice& device, + D3D11_SHADER_RESOURCE_VIEW_DESC* pSRVDesc, + const CmdAccessInfo& accessInfo, + const ResourceInstance& resource) + { + RPS_RETURN_ERROR_IF(!IsResourceTypeValid(resource.desc.type), RPS_ERROR_INVALID_OPERATION); + + pSRVDesc->Format = rpsFormatToDXGI(GetD3D11SRVFormat(accessInfo)); + + if (resource.desc.IsBuffer()) + { + auto pBufView = reinterpret_cast(accessInfo.pViewInfo); + + if (!(accessInfo.access.accessFlags & RPS_ACCESS_RAYTRACING_AS_READ_BIT)) + { + uint32_t elementSize = rpsGetFormatElementBytes(accessInfo.viewFormat); + + if (elementSize == 0) + { + elementSize = pBufView->stride; + } + if (elementSize == 0) + { + RPS_ASSERT(accessInfo.viewFormat == RPS_FORMAT_UNKNOWN); + elementSize = 4; //TODO: RAW + } + + const uint64_t bufViewBytes = GetBufferViewBytes(pBufView, resource.desc); + const uint64_t numElements = bufViewBytes / elementSize; + const uint64_t firstElement = pBufView->offset / elementSize; + RPS_RETURN_ERROR_IF(firstElement > UINT32_MAX, RPS_ERROR_INTEGER_OVERFLOW); + RPS_RETURN_ERROR_IF(numElements > UINT32_MAX, RPS_ERROR_INTEGER_OVERFLOW); + + pSRVDesc->ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX; + pSRVDesc->BufferEx.FirstElement = uint32_t(firstElement); + pSRVDesc->BufferEx.NumElements = uint32_t(numElements); + + if (((accessInfo.viewFormat == RPS_FORMAT_UNKNOWN) || + (accessInfo.viewFormat == RPS_FORMAT_R32_TYPELESS)) && + (pBufView->stride == 0)) + { + pSRVDesc->BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW; + pSRVDesc->Format = DXGI_FORMAT_R32_TYPELESS; + } + } + else + { + RPS_ASSERT("NoImpl"); + } + } + else + { + auto pImageView = reinterpret_cast(accessInfo.pViewInfo); + + if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_2D) + { + if (resource.desc.image.sampleCount > 1) + { + if (resource.desc.image.arrayLayers <= 1) + { + pSRVDesc->ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DMS; + } + else + { + RPS_ASSERT(!(accessInfo.pViewInfo->flags & RPS_RESOURCE_VIEW_FLAG_CUBEMAP_BIT)); + { + pSRVDesc->ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DMSARRAY; + pSRVDesc->Texture2DMSArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pSRVDesc->Texture2DMSArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + } + } + else if (resource.desc.image.arrayLayers <= 1) + { + pSRVDesc->ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + pSRVDesc->Texture2D.MostDetailedMip = accessInfo.range.baseMipLevel; + pSRVDesc->Texture2D.MipLevels = accessInfo.range.GetMipLevelCount(); + } + else + { + if (!(accessInfo.pViewInfo->flags & RPS_RESOURCE_VIEW_FLAG_CUBEMAP_BIT)) + { + pSRVDesc->ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; + pSRVDesc->Texture2DArray.MostDetailedMip = accessInfo.range.baseMipLevel; + pSRVDesc->Texture2DArray.MipLevels = accessInfo.range.GetMipLevelCount(); + pSRVDesc->Texture2DArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pSRVDesc->Texture2DArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + else if ((accessInfo.range.GetArrayLayerCount() > 6) || (accessInfo.range.baseArrayLayer > 0)) + { + pSRVDesc->ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBEARRAY; + pSRVDesc->TextureCubeArray.MostDetailedMip = accessInfo.range.baseMipLevel; + pSRVDesc->TextureCubeArray.MipLevels = accessInfo.range.GetMipLevelCount(); + pSRVDesc->TextureCubeArray.First2DArrayFace = accessInfo.range.baseArrayLayer; + pSRVDesc->TextureCubeArray.NumCubes = accessInfo.range.GetArrayLayerCount() / 6; + } + else + { + RPS_ASSERT(accessInfo.range.GetArrayLayerCount() == 6); + pSRVDesc->ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBE; + pSRVDesc->TextureCube.MostDetailedMip = accessInfo.range.baseMipLevel; + pSRVDesc->TextureCube.MipLevels = accessInfo.range.GetMipLevelCount(); + } + } + } + else if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_3D) + { + pSRVDesc->ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; + pSRVDesc->Texture3D.MostDetailedMip = accessInfo.range.baseMipLevel; + pSRVDesc->Texture3D.MipLevels = accessInfo.range.GetMipLevelCount(); + } + else if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_1D) + { + if (resource.desc.image.arrayLayers <= 1) + { + pSRVDesc->ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; + pSRVDesc->Texture1D.MostDetailedMip = accessInfo.range.baseMipLevel; + pSRVDesc->Texture1D.MipLevels = accessInfo.range.GetMipLevelCount(); + } + else + { + pSRVDesc->ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY; + pSRVDesc->Texture1DArray.MostDetailedMip = accessInfo.range.baseMipLevel; + pSRVDesc->Texture1DArray.MipLevels = accessInfo.range.GetMipLevelCount(); + pSRVDesc->Texture1DArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pSRVDesc->Texture1DArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + } + else + { + return RPS_ERROR_INVALID_OPERATION; + } + } + + return RPS_OK; + } + + RpsResult InitD3D11UAVDesc(D3D11RuntimeDevice& device, + D3D11_UNORDERED_ACCESS_VIEW_DESC* pUAVDesc, + const CmdAccessInfo& accessInfo, + const ResourceInstance& resource) + + { + const RpsFormat viewFormat = accessInfo.viewFormat; + + pUAVDesc->Format = rpsFormatToDXGI(viewFormat); + + if (resource.desc.IsBuffer()) + { + auto pBufView = reinterpret_cast(accessInfo.pViewInfo); + + uint32_t elementSize = rpsGetFormatElementBytes(viewFormat); + + if (elementSize == 0) + { + elementSize = pBufView->stride; + } + if (elementSize == 0) + { + elementSize = 4; //TODO: RAW + } + + const uint64_t bufViewBytes = GetBufferViewBytes(pBufView, resource.desc); + const uint64_t numElements = bufViewBytes / elementSize; + const uint64_t firstElement = pBufView->offset / elementSize; + RPS_RETURN_ERROR_IF(firstElement > UINT32_MAX, RPS_ERROR_INTEGER_OVERFLOW); + RPS_RETURN_ERROR_IF(numElements > UINT32_MAX, RPS_ERROR_INTEGER_OVERFLOW); + + pUAVDesc->ViewDimension = D3D11_UAV_DIMENSION_BUFFER; + pUAVDesc->Buffer.FirstElement = uint32_t(firstElement); + pUAVDesc->Buffer.NumElements = uint32_t(numElements); + pUAVDesc->Buffer.Flags = 0; + + if (((pUAVDesc->Format == DXGI_FORMAT_UNKNOWN) || (pUAVDesc->Format == DXGI_FORMAT_R32_TYPELESS)) && + (pBufView->stride == 0)) + { + pUAVDesc->Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW; + pUAVDesc->Format = DXGI_FORMAT_R32_TYPELESS; + } + + // TODO: Append/Counter + } + else + { + auto pImageView = reinterpret_cast(accessInfo.pViewInfo); + + RPS_ASSERT(resource.desc.image.sampleCount == 1); + + if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_2D) + { + if (resource.desc.image.arrayLayers <= 1) + { + pUAVDesc->ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; + pUAVDesc->Texture2D.MipSlice = accessInfo.range.baseMipLevel; + } + else + { + pUAVDesc->ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2DARRAY; + pUAVDesc->Texture2DArray.MipSlice = accessInfo.range.baseMipLevel; + pUAVDesc->Texture2DArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pUAVDesc->Texture2DArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + } + else if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_3D) + { + pUAVDesc->ViewDimension = D3D11_UAV_DIMENSION_TEXTURE3D; + pUAVDesc->Texture3D.MipSlice = accessInfo.range.baseMipLevel; + pUAVDesc->Texture3D.FirstWSlice = 0; + pUAVDesc->Texture3D.WSize = resource.desc.image.depth; // TODO - W range + } + else if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_1D) + { + if (resource.desc.image.arrayLayers <= 1) + { + pUAVDesc->ViewDimension = D3D11_UAV_DIMENSION_TEXTURE1D; + pUAVDesc->Texture1D.MipSlice = accessInfo.range.baseMipLevel; + } + else + { + pUAVDesc->ViewDimension = D3D11_UAV_DIMENSION_TEXTURE1DARRAY; + pUAVDesc->Texture1DArray.MipSlice = accessInfo.range.baseMipLevel; + pUAVDesc->Texture1DArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pUAVDesc->Texture1DArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + } + else + { + return RPS_ERROR_INVALID_OPERATION; + } + } + return RPS_OK; + } + + RpsResult D3D11RuntimeBackend::CreateResourceViews(const RenderGraphUpdateContext& context, + ViewType type, + ConstArrayRef accessIndices) + { + RPS_RETURN_OK_IF(accessIndices.empty()); + + auto& cmdAccesses = context.renderGraph.GetCmdAccessInfos(); + + auto resourceInstances = context.renderGraph.GetResourceInstances().range_all(); + auto pD3DDevice = m_device.GetD3DDevice(); + + if (type == ViewType::RTV) + { + D3D11_RENDER_TARGET_VIEW_DESC rtvDesc = {}; + ID3D11RenderTargetView* pD3DRTV = {}; + + for (auto accessIndex : accessIndices) + { + auto& access = cmdAccesses[accessIndex]; + + const auto& resource = resourceInstances[access.resourceId]; + auto* pD3DRes = D3D11RuntimeDevice::FromHandle(resource.hRuntimeResource); + + RPS_V_RETURN(InitD3D11RTVDesc(&rtvDesc, access, resource)); + RPS_V_RETURN(HRESULTToRps(pD3DDevice->CreateRenderTargetView(pD3DRes, &rtvDesc, &pD3DRTV))); + + m_views[accessIndex] = pD3DRTV; + } + } + else if (type == ViewType::DSV) + { + D3D11_DEPTH_STENCIL_VIEW_DESC dsvDesc = {}; + ID3D11DepthStencilView* pD3DDSV = {}; + + for (auto accessIndex : accessIndices) + { + auto& access = cmdAccesses[accessIndex]; + + const auto& resource = resourceInstances[access.resourceId]; + auto* pD3DRes = D3D11RuntimeDevice::FromHandle(resource.hRuntimeResource); + + RPS_V_RETURN(InitD3D11DSVDesc(&dsvDesc, access, resource)); + RPS_V_RETURN(HRESULTToRps(pD3DDevice->CreateDepthStencilView(pD3DRes, &dsvDesc, &pD3DDSV))); + + m_views[accessIndex] = pD3DDSV; + } + } + else if (type == ViewType::SRV) + { + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + ID3D11ShaderResourceView* pD3DSRV = {}; + + for (auto accessIndex : accessIndices) + { + auto& access = cmdAccesses[accessIndex]; + + const auto& resource = resourceInstances[access.resourceId]; + auto* pD3DRes = D3D11RuntimeDevice::FromHandle(resource.hRuntimeResource); + + RPS_V_RETURN(InitD3D11SRVDesc(m_device, &srvDesc, access, resource)); + RPS_V_RETURN(HRESULTToRps(pD3DDevice->CreateShaderResourceView(pD3DRes, &srvDesc, &pD3DSRV))); + + m_views[accessIndex] = pD3DSRV; + } + } + else if (type == ViewType::UAV) + { + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + ID3D11UnorderedAccessView* pD3DUAV = {}; + + for (auto accessIndex : accessIndices) + { + auto& access = cmdAccesses[accessIndex]; + + const auto& resource = resourceInstances[access.resourceId]; + auto* pD3DRes = D3D11RuntimeDevice::FromHandle(resource.hRuntimeResource); + + RPS_V_RETURN(InitD3D11UAVDesc(m_device, &uavDesc, access, resource)); + pD3DDevice->CreateUnorderedAccessView(pD3DRes, &uavDesc, &pD3DUAV); + + m_views[accessIndex] = pD3DUAV; + } + } + else + { + RPS_TODO_RETURN_NOT_IMPLEMENTED(); + } + + return RPS_OK; + } +} // namespace rps diff --git a/src/runtime/d3d11/rps_d3d11_runtime_device.cpp b/src/runtime/d3d11/rps_d3d11_runtime_device.cpp new file mode 100644 index 0000000..e7baae6 --- /dev/null +++ b/src/runtime/d3d11/rps_d3d11_runtime_device.cpp @@ -0,0 +1,167 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "rps/runtime/d3d_common/rps_d3d_common.h" + +#include "runtime/common/rps_runtime_util.hpp" +#include "runtime/common/phases/rps_pre_process.hpp" +#include "runtime/common/phases/rps_dag_build.h" +#include "runtime/common/phases/rps_access_dag_build.hpp" +#include "runtime/common/phases/rps_cmd_print.hpp" +#include "runtime/common/phases/rps_cmd_dag_print.hpp" +#include "runtime/common/phases/rps_dag_schedule.hpp" +#include "runtime/common/phases/rps_schedule_print.hpp" +#include "runtime/common/phases/rps_memory_schedule.hpp" + +#include "runtime/d3d11/rps_d3d11_runtime_device.hpp" +#include "runtime/d3d11/rps_d3d11_runtime_backend.hpp" +#include "runtime/d3d11/rps_d3d11_util.hpp" + +namespace rps +{ + D3D11RuntimeDevice::D3D11RuntimeDevice(Device* pDevice, const RpsD3D11RuntimeDeviceCreateInfo* pCreateInfo) + : RuntimeDevice(pDevice, pCreateInfo->pRuntimeCreateInfo) + , m_pD3DDevice(pCreateInfo->pD3D11Device) + , m_flags(pCreateInfo->flags) + { + m_pD3DDevice->AddRef(); + } + + RpsResult D3D11RuntimeDevice::Init() + { + return RPS_OK; + } + + D3D11RuntimeDevice::~D3D11RuntimeDevice() + { + SafeRelease(m_pD3DDevice); + } + + RpsResult D3D11RuntimeDevice::BuildDefaultRenderGraphPhases(RenderGraph& renderGraph) + { + RPS_V_RETURN(renderGraph.ReservePhases(8)); + RPS_V_RETURN(renderGraph.AddPhase()); + RPS_V_RETURN(renderGraph.AddPhase()); + RPS_V_RETURN(renderGraph.AddPhase()); + RPS_V_RETURN(renderGraph.AddPhase(renderGraph)); + RPS_V_RETURN(renderGraph.AddPhase(renderGraph)); + RPS_V_RETURN(renderGraph.AddPhase(renderGraph)); + RPS_V_RETURN(renderGraph.AddPhase()); + RPS_V_RETURN(renderGraph.AddPhase(*this, renderGraph)); + + return RPS_OK; + } + + RpsResult D3D11RuntimeDevice::InitializeSubresourceInfos(ArrayRef resInstances) + { + for (auto& resInstance : resInstances) + { + GetFullSubresourceRange( + resInstance.fullSubresourceRange, resInstance.desc, GetResourcePlaneMask(resInstance.desc)); + + resInstance.numSubResources = GetSubresourceCount(resInstance.desc); + } + + return RPS_OK; + } + + RpsResult D3D11RuntimeDevice::InitializeResourceAllocInfos(ArrayRef resInstances) + { + return RPS_OK; + } + + RpsResult D3D11RuntimeDevice::GetSubresourceRangeFromImageView(SubresourceRangePacked& outRange, + const ResourceInstance& resourceInfo, + const RpsAccessAttr& accessAttr, + const RpsImageView& imageView) + { + uint32_t viewPlaneMask = (imageView.base.viewFormat == RPS_FORMAT_UNKNOWN) + ? UINT32_MAX + : GetFormatPlaneMask(imageView.base.viewFormat); + uint32_t planeMask = GetResourcePlaneMask(resourceInfo.desc) & viewPlaneMask; + outRange = SubresourceRangePacked(planeMask, imageView.subresourceRange, resourceInfo.desc); + return RPS_OK; + } + + ConstArrayRef D3D11RuntimeDevice::GetMemoryTypeInfos() const + { + return {}; + } + + uint32_t D3D11RuntimeDevice::GetSubresourceCount(const ResourceDescPacked& resDesc) const + { + return resDesc.IsBuffer() ? 1 + : (((resDesc.type == RPS_RESOURCE_TYPE_IMAGE_3D) ? 1 : resDesc.image.arrayLayers) * + resDesc.image.mipLevels); + } + + uint32_t D3D11RuntimeDevice::GetResourcePlaneMask(const ResourceDescPacked& resDesc) const + { + return resDesc.IsBuffer() ? 1u : GetFormatPlaneMask(resDesc.image.format); + } + + void D3D11BuiltInClearColorRegions(const RpsCmdCallbackContext* pContext); + void D3D11BuiltInClearColor(const RpsCmdCallbackContext* pContext); + void D3D11BuiltInClearDepthStencil(const RpsCmdCallbackContext* pContext); + void D3D11BuiltInClearDepthStencilRegions(const RpsCmdCallbackContext* pContext); + void D3D11BuiltInClearTextureUAV(const RpsCmdCallbackContext* pContext); + void D3D11BuiltInClearTextureUAVRegions(const RpsCmdCallbackContext* pContext); + void D3D11BuiltInClearBufferUAV(const RpsCmdCallbackContext* pContext); + void D3D11BuiltInCopyTexture(const RpsCmdCallbackContext* pContext); + void D3D11BuiltInCopyBuffer(const RpsCmdCallbackContext* pContext); + void D3D11BuiltInCopyTextureToBuffer(const RpsCmdCallbackContext* pContext); + void D3D11BuiltInCopyBufferToTexture(const RpsCmdCallbackContext* pContext); + void D3D11BuiltInResolve(const RpsCmdCallbackContext* pContext); + + ConstArrayRef D3D11RuntimeDevice::GetBuiltInNodes() const + { + static const BuiltInNodeInfo c_builtInNodes[] = { + {"clear_color", {&D3D11BuiltInClearColor, nullptr}}, + {"clear_color_regions", {&D3D11BuiltInClearColorRegions, nullptr}}, + {"clear_depth_stencil", {&D3D11BuiltInClearDepthStencil, nullptr}}, + {"clear_depth_stencil_regions", {&D3D11BuiltInClearDepthStencilRegions, nullptr}}, + {"clear_texture", {&D3D11BuiltInClearTextureUAV, nullptr}}, + {"clear_texture_regions", {&D3D11BuiltInClearTextureUAVRegions, nullptr}}, + {"clear_buffer", {&D3D11BuiltInClearBufferUAV,nullptr}}, + {"copy_texture", {&D3D11BuiltInCopyTexture,nullptr}}, + {"copy_buffer", {&D3D11BuiltInCopyBuffer,nullptr}}, + {"copy_texture_to_buffer", {&D3D11BuiltInCopyTextureToBuffer,nullptr}}, + {"copy_buffer_to_texture", {&D3D11BuiltInCopyBufferToTexture,nullptr}}, + {"resolve", {&D3D11BuiltInResolve,nullptr}}, + }; + + return c_builtInNodes; + } + + bool D3D11RuntimeDevice::CalculateAccessTransition(const RpsAccessAttr& beforeAccess, + const RpsAccessAttr& afterAccess, + AccessTransitionInfo& results) const + { + if (rpsAnyBitsSet(beforeAccess.accessFlags | afterAccess.accessFlags, RPS_ACCESS_CLEAR_BIT) && + ((beforeAccess.accessFlags & ~RPS_ACCESS_CLEAR_BIT) == (afterAccess.accessFlags & ~RPS_ACCESS_CLEAR_BIT))) + { + results.bKeepOrdering = true; + results.bMergedAccessStates = true; + results.bTransition = false; + results.mergedAccess = beforeAccess | afterAccess; + + return true; + } + + return false; + } +} // namespace rps + +RpsResult rpsD3D11RuntimeDeviceCreate(const RpsD3D11RuntimeDeviceCreateInfo* pCreateInfo, RpsDevice* phDevice) +{ + RPS_CHECK_ARGS(pCreateInfo && pCreateInfo->pD3D11Device); + + RpsResult result = + rps::RuntimeDevice::Create(phDevice, pCreateInfo->pDeviceCreateInfo, pCreateInfo); + + return result; +} diff --git a/src/runtime/d3d11/rps_d3d11_runtime_device.hpp b/src/runtime/d3d11/rps_d3d11_runtime_device.hpp new file mode 100644 index 0000000..1608b67 --- /dev/null +++ b/src/runtime/d3d11/rps_d3d11_runtime_device.hpp @@ -0,0 +1,86 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_DEVICE_D3D11_H_ +#define _RPS_DEVICE_D3D11_H_ + +#include "rps/runtime/d3d11/rps_d3d11_runtime.h" + +#include "runtime/common/rps_runtime_device.hpp" + +namespace rps +{ + class D3D11RuntimeDevice final : public RuntimeDevice + { + public: + D3D11RuntimeDevice(Device* pDevice, const RpsD3D11RuntimeDeviceCreateInfo* pCreateInfo); + virtual ~D3D11RuntimeDevice(); + + virtual RpsResult Init() override; + virtual RpsResult BuildDefaultRenderGraphPhases(RenderGraph& renderGraph) override final; + virtual RpsResult InitializeSubresourceInfos(ArrayRef resInstances) override final; + virtual RpsResult InitializeResourceAllocInfos(ArrayRef resInstances) override final; + virtual RpsResult GetSubresourceRangeFromImageView(SubresourceRangePacked& outRange, + const ResourceInstance& resourceInfo, + const RpsAccessAttr& accessAttr, + const RpsImageView& imageView) override final; + virtual ConstArrayRef GetMemoryTypeInfos() const override final; + + virtual ConstArrayRef GetBuiltInNodes() const override final; + + virtual bool CalculateAccessTransition(const RpsAccessAttr& beforeAccess, + const RpsAccessAttr& afterAccess, + AccessTransitionInfo& results) const override final; + + virtual RpsImageAspectUsageFlags GetImageAspectUsages(uint32_t aspectMask) const override final + { + return ((aspectMask & 1) ? (RPS_IMAGE_ASPECT_COLOR | RPS_IMAGE_ASPECT_DEPTH) : RPS_IMAGE_ASPECT_UNKNOWN) | + ((aspectMask & 2) ? RPS_IMAGE_ASPECT_STENCIL : RPS_IMAGE_ASPECT_UNKNOWN); + } + + static ID3D11Resource* FromHandle(RpsRuntimeResource hRuntimeResource) + { + return static_cast(hRuntimeResource.ptr); + } + + static RpsRuntimeResource ToHandle(ID3D11Resource* pD3DResource) + { + return RpsRuntimeResource{pD3DResource}; + } + + public: + ID3D11Device* GetD3DDevice() const + { + return m_pD3DDevice; + } + + uint32_t GetFormatPlaneMask(RpsFormat format) const + { + return 1; + } + + uint32_t GetFormatPlaneIndex(RpsFormat format) const + { + return 0; + } + + private: + uint32_t GetSubresourceCount(const ResourceDescPacked& resDesc) const; + uint32_t GetResourcePlaneMask(const ResourceDescPacked& resDesc) const; + + uint32_t GetFormatPlaneCount(RpsFormat format) const + { + return 1; + } + + private: + ID3D11Device* m_pD3DDevice = nullptr; + RpsD3D11RuntimeFlags m_flags = {}; + }; +} // namespace rps + +#endif //_RPS_DEVICE_D3D11_H_ diff --git a/src/runtime/d3d11/rps_d3d11_util.hpp b/src/runtime/d3d11/rps_d3d11_util.hpp new file mode 100644 index 0000000..afe5ba0 --- /dev/null +++ b/src/runtime/d3d11/rps_d3d11_util.hpp @@ -0,0 +1,203 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_D3D11_UTILS_H_ +#define _RPS_D3D11_UTILS_H_ + +#include "rps/runtime/d3d_common/rps_d3d_common.h" +#include "rps/runtime/d3d11/rps_d3d11_runtime.h" + +namespace rps +{ + static inline D3D11_BIND_FLAG GetD3D11BindFlags(const RpsAccessAttr& access) + { + uint32_t result = 0; + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_UNORDERED_ACCESS_BIT)) + result |= D3D11_BIND_UNORDERED_ACCESS; + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_RENDER_TARGET_BIT)) + result |= D3D11_BIND_RENDER_TARGET; + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_DEPTH_STENCIL)) + result |= D3D11_BIND_DEPTH_STENCIL; + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_SHADER_RESOURCE_BIT)) + result |= D3D11_BIND_SHADER_RESOURCE; + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_VERTEX_BUFFER_BIT)) + result |= D3D11_BIND_VERTEX_BUFFER; + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_INDEX_BUFFER_BIT)) + result |= D3D11_BIND_INDEX_BUFFER; + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_CONSTANT_BUFFER_BIT)) + result |= D3D11_BIND_CONSTANT_BUFFER; + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_STREAM_OUT_BIT)) + result |= D3D11_BIND_STREAM_OUTPUT; + + return D3D11_BIND_FLAG(result); + } + + static inline D3D11_CPU_ACCESS_FLAG GetD3D11CpuAccessFlags(const RpsAccessAttr& access) + { + uint32_t flags = 0; + + if (access.accessFlags & RPS_ACCESS_CPU_READ_BIT) + flags |= D3D11_CPU_ACCESS_READ; + + if (access.accessFlags & RPS_ACCESS_CPU_WRITE_BIT) + flags |= D3D11_CPU_ACCESS_WRITE; + + return D3D11_CPU_ACCESS_FLAG(flags); + } + + static inline D3D11_RESOURCE_MISC_FLAG GetD3D11ResourceMiscFlags(const ResourceInstance& resInfo) + { + uint32_t flags = 0; + + //D3D11_RESOURCE_MISC_GENERATE_MIPS = 0x1L, + //D3D11_RESOURCE_MISC_SHARED = 0x2L, + + if (resInfo.desc.flags & RPS_RESOURCE_FLAG_CUBEMAP_COMPATIBLE_BIT) + flags |= D3D11_RESOURCE_MISC_TEXTURECUBE; + + if (resInfo.allAccesses.accessFlags & RPS_ACCESS_INDIRECT_ARGS_BIT) + flags |= D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS; + + //D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS = 0x20L, + //D3D11_RESOURCE_MISC_BUFFER_STRUCTURED = 0x40L, + + return D3D11_RESOURCE_MISC_FLAG(flags); + } + + static inline D3D11_USAGE GetD3D11Usage(const ResourceInstance& resourceInstance) + { + if (rpsAnyBitsSet(resourceInstance.allAccesses.accessFlags, RPS_ACCESS_CPU_READ_BIT)) + { + return D3D11_USAGE_STAGING; + } + else if (rpsAnyBitsSet(resourceInstance.allAccesses.accessFlags, RPS_ACCESS_CPU_WRITE_BIT)) + { + return D3D11_USAGE_STAGING; + } + + return D3D11_USAGE_DEFAULT; + } + + static inline RpsResult CreateD3D11ResourceDesc(ID3D11Device* pDevice, + const ResourceInstance& resInfo, + ID3D11Resource** ppResource) + { + auto& rpsDesc = resInfo.desc; + + const auto d3dUsage = GetD3D11Usage(resInfo); + const auto d3dBindFlags = GetD3D11BindFlags(resInfo.allAccesses); + const auto d3dCpuAccessFlags = GetD3D11CpuAccessFlags(resInfo.allAccesses); + const auto d3dMiscFlags = GetD3D11ResourceMiscFlags(resInfo); + + HRESULT hr = E_NOTIMPL; + + switch (resInfo.desc.type) + { + case RPS_RESOURCE_TYPE_BUFFER: + { + ID3D11Buffer* pBuffer; + D3D11_BUFFER_DESC bufDesc; + + uint64_t bufSize = rpsDesc.GetBufferSize(); + RPS_RETURN_ERROR_IF(bufSize > UINT32_MAX, RPS_ERROR_NOT_SUPPORTED); + + bufDesc.ByteWidth = UINT(bufSize); + bufDesc.Usage = d3dUsage; + bufDesc.BindFlags = d3dBindFlags; + bufDesc.CPUAccessFlags = d3dCpuAccessFlags; + bufDesc.MiscFlags = d3dMiscFlags; + bufDesc.StructureByteStride = 0; // TODO + + hr = pDevice->CreateBuffer(&bufDesc, nullptr, &pBuffer); + + *ppResource = pBuffer; + } + break; + case RPS_RESOURCE_TYPE_IMAGE_2D: + { + ID3D11Texture2D* pTex2D; + D3D11_TEXTURE2D_DESC tex2DDesc; + + tex2DDesc.Width = rpsDesc.image.width; + tex2DDesc.Height = rpsDesc.image.height; + tex2DDesc.MipLevels = rpsDesc.image.mipLevels; + tex2DDesc.ArraySize = rpsDesc.GetImageArrayLayers(); + tex2DDesc.Format = rpsFormatToDXGI(rpsDesc.GetFormat()); + tex2DDesc.SampleDesc.Count = rpsDesc.image.sampleCount; + tex2DDesc.SampleDesc.Quality = 0; // TODO + tex2DDesc.Usage = d3dUsage; + tex2DDesc.BindFlags = d3dBindFlags; + tex2DDesc.CPUAccessFlags = d3dCpuAccessFlags; + tex2DDesc.MiscFlags = d3dMiscFlags; + + hr = pDevice->CreateTexture2D(&tex2DDesc, nullptr, &pTex2D); + + *ppResource = pTex2D; + } + break; + case RPS_RESOURCE_TYPE_IMAGE_3D: + { + ID3D11Texture3D* pTex3D; + D3D11_TEXTURE3D_DESC tex3DDesc; + + tex3DDesc.Width = rpsDesc.image.width; + tex3DDesc.Height = rpsDesc.image.height; + tex3DDesc.Depth = rpsDesc.GetImageDepth(); + tex3DDesc.MipLevels = rpsDesc.image.mipLevels; + tex3DDesc.Format = rpsFormatToDXGI(rpsDesc.GetFormat()); + tex3DDesc.Usage = d3dUsage; + tex3DDesc.BindFlags = d3dBindFlags; + tex3DDesc.CPUAccessFlags = d3dCpuAccessFlags; + tex3DDesc.MiscFlags = d3dMiscFlags; + + hr = pDevice->CreateTexture3D(&tex3DDesc, nullptr, &pTex3D); + + *ppResource = pTex3D; + } + break; + case RPS_RESOURCE_TYPE_IMAGE_1D: + { + ID3D11Texture1D* pTex1D; + D3D11_TEXTURE1D_DESC tex1DDesc; + + tex1DDesc.Width = rpsDesc.image.width; + tex1DDesc.MipLevels = rpsDesc.image.mipLevels; + tex1DDesc.ArraySize = rpsDesc.GetImageArrayLayers(); + tex1DDesc.Format = rpsFormatToDXGI(rpsDesc.GetFormat()); + tex1DDesc.Usage = d3dUsage; + tex1DDesc.BindFlags = d3dBindFlags; + tex1DDesc.CPUAccessFlags = d3dCpuAccessFlags; + tex1DDesc.MiscFlags = d3dMiscFlags; + + hr = pDevice->CreateTexture1D(&tex1DDesc, nullptr, &pTex1D); + + *ppResource = pTex1D; + } + break; + default: + break; + } + + if (FAILED(hr)) + { + *ppResource = nullptr; + } + + return HRESULTToRps(hr); + } + +} // namespace rps + +#endif //_RPS_D3D11_UTILS_H_ diff --git a/src/runtime/d3d12/rps_d3d12_barrier.hpp b/src/runtime/d3d12/rps_d3d12_barrier.hpp new file mode 100644 index 0000000..b524ac1 --- /dev/null +++ b/src/runtime/d3d12/rps_d3d12_barrier.hpp @@ -0,0 +1,451 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_D3D12_BARRIER_H +#define RPS_D3D12_BARRIER_H + +#include "runtime/d3d12/rps_d3d12_runtime_device.hpp" +#include "runtime/d3d12/rps_d3d12_util.hpp" + +namespace rps +{ + class D3D12BarrierBuilder + { + public: + virtual ~D3D12BarrierBuilder() + { + } + + virtual bool MayNeedPlacedResourceInitState() const + { + return false; + } + + virtual void EnsurePlacedResourceInitState(ResourceInstance& resInfo) const + { + } + + virtual void UpdateFrame(const RenderGraphUpdateContext& context) = 0; + + virtual uint32_t CreateBarrierBatch(const RenderGraphUpdateContext& context, + Span transitionRange) = 0; + + virtual void RecordBarrierBatch(ID3D12GraphicsCommandList* pD3DCmdList, uint32_t barrierBatch) = 0; + }; + + class D3D12ConventionalBarrierBuilder : public D3D12BarrierBuilder + { + struct BarrierBatch + { + Span earlyBarriers; + Span discardResources; + Span lateBarriers; + }; + + public: + + D3D12ConventionalBarrierBuilder(const D3D12RuntimeDevice& device) + : m_d3dRuntimeDevice(device) + { + } + + static bool NeedPlacedResourceInit(const ResourceInstance& resInfo) + { + return rpsAnyBitsSet(resInfo.allAccesses.accessFlags, + RPS_ACCESS_RENDER_TARGET_BIT | RPS_ACCESS_DEPTH_STENCIL); + } + + static bool IsStateCompatibleForPlacedResourceInit(RpsAccessFlags accessFlags) + { + // TODO: Add full resource copy. + return rpsAnyBitsSet(accessFlags, RPS_ACCESS_RENDER_TARGET_BIT | RPS_ACCESS_DEPTH_STENCIL_WRITE); + } + + virtual bool MayNeedPlacedResourceInitState() const override final + { + return true; + } + + virtual void EnsurePlacedResourceInitState(ResourceInstance& resInfo) const override final + { + if (NeedPlacedResourceInit(resInfo) && + !IsStateCompatibleForPlacedResourceInit(resInfo.initialAccess.accessFlags)) + { + if (rpsAnyBitsSet(resInfo.allAccesses.accessFlags, RPS_ACCESS_DEPTH_STENCIL)) + { + resInfo.initialAccess.accessFlags = RPS_ACCESS_DEPTH_STENCIL_WRITE; + } + else if (rpsAnyBitsSet(resInfo.allAccesses.accessFlags, RPS_ACCESS_RENDER_TARGET_BIT)) + { + resInfo.initialAccess.accessFlags = RPS_ACCESS_RENDER_TARGET_BIT; + } + } + } + + virtual void UpdateFrame(const RenderGraphUpdateContext& context) override final + { + m_barriers.reset_keep_capacity(&context.frameArena); + m_barrierBatches.reset_keep_capacity(&context.frameArena); + m_discardResources.reset_keep_capacity(&context.frameArena); + + m_deactivatedResMask.reset(&context.scratchArena); + m_deactivatedResMask.Resize(uint32_t(context.renderGraph.GetResourceInstances().size())); + m_deactivatedResMask.Fill(false); + } + + virtual uint32_t CreateBarrierBatch(const RenderGraphUpdateContext& context, + Span transitionRange) override final + { + auto& aliasingInfos = context.renderGraph.GetResourceAliasingInfos(); + auto& resourceInstances = context.renderGraph.GetResourceInstances(); + auto& transitions = context.renderGraph.GetTransitions(); + + ArenaCheckPoint arenaCheckpoint{context.scratchArena}; + + ArenaVector lateCmds(&context.scratchArena); + + auto transitionRangeCmds = transitionRange.Get(context.renderGraph.GetRuntimeCmdInfos()); + + BarrierBatch currBatch = {}; + + currBatch.earlyBarriers.SetRange(uint32_t(m_barriers.size()), 0); + currBatch.discardResources.SetRange(uint32_t(m_discardResources.size()), 0); + + for (uint32_t idx = 0; idx < transitionRangeCmds.size(); idx++) + { + auto& cmd = transitionRangeCmds[idx]; + RPS_ASSERT(cmd.isTransition); + + for (auto& aliasing : cmd.aliasingInfos.Get(aliasingInfos)) + { + if (aliasing.dstActivating && (aliasing.dstResourceIndex != RPS_RESOURCE_ID_INVALID)) + { + auto& dstResInfo = resourceInstances[aliasing.dstResourceIndex]; + + if (NeedPlacedResourceInit(dstResInfo)) + { + // TODO: Make sure it's full resource clear. + if (!rpsAnyBitsSet(dstResInfo.initialAccess.accessFlags, RPS_ACCESS_CLEAR_BIT) && + !rpsAllBitsSet(dstResInfo.initialAccess.accessFlags, + RPS_ACCESS_COPY_DEST_BIT | RPS_ACCESS_DISCARD_OLD_DATA_BIT)) + { + m_discardResources.push_back(D3D12RuntimeDevice::FromHandle( + resourceInstances[aliasing.dstResourceIndex].hRuntimeResource)); + } + } + } + + if (aliasing.srcDeactivating && (aliasing.srcResourceIndex != RPS_RESOURCE_ID_INVALID)) + { + auto& srcResInfo = resourceInstances[aliasing.srcResourceIndex]; + + m_deactivatedResMask.SetBit(aliasing.srcResourceIndex, true); + + // Before deactivating resource, transition it to the initial state. + // For placed resource that needs init, we have made sure initialAccess + // is compatible with the states required for init. + for (auto& finalAccess : + srcResInfo.finalAccesses.Get(context.renderGraph.GetResourceFinalAccesses())) + { + if (finalAccess.prevTransition != RenderGraph::INVALID_TRANSITION) + { + AppendBarrier(D3D12RuntimeDevice::FromHandle(srcResInfo.hRuntimeResource), + transitions[finalAccess.prevTransition].access.access, + srcResInfo.initialAccess, + srcResInfo, + finalAccess.range); + } + } + } + } + + if (cmd.cmdId < CMD_ID_PREAMBLE) + { + const auto& currTrans = transitions[cmd.cmdId]; + const auto& resInstance = resourceInstances[currTrans.access.resourceId]; + + if (resInstance.isAliased && (resInstance.lifetimeBegin >= transitionRange.GetBegin())) + { + lateCmds.push_back(idx); + } + else + { + const auto& prevAccess = (currTrans.prevTransition != RenderGraph::INVALID_TRANSITION) + ? transitions[currTrans.prevTransition].access.access + : resInstance.initialAccess; + + AppendBarrier(D3D12RuntimeDevice::FromHandle(resInstance.hRuntimeResource), + prevAccess, + currTrans.access.access, + resInstance, + currTrans.access.range); + } + } + else if (cmd.cmdId == CMD_ID_POSTAMBLE) + { + // At frame end, transit non-deactivated resource states to initial states + for (uint32_t iRes = 0, numRes = uint32_t(resourceInstances.size()); iRes < numRes; iRes++) + { + auto& resInstance = resourceInstances[iRes]; + + if (!resInstance.isAliased || !m_deactivatedResMask.GetBit(iRes)) + { + for (auto& finalAccess : + resInstance.finalAccesses.Get(context.renderGraph.GetResourceFinalAccesses())) + { + if (finalAccess.prevTransition != RenderGraph::INVALID_TRANSITION) + { + AppendBarrier(D3D12RuntimeDevice::FromHandle(resInstance.hRuntimeResource), + transitions[finalAccess.prevTransition].access.access, + resInstance.initialAccess, + resInstance, + finalAccess.range); + } + } + } + } + + m_deactivatedResMask = {}; + } + } + + currBatch.earlyBarriers.SetEnd(uint32_t(m_barriers.size())); + currBatch.discardResources.SetEnd(uint32_t(m_discardResources.size())); + + currBatch.lateBarriers.SetRange(uint32_t(m_barriers.size()), 0); + + for (auto lateCmdIdx : lateCmds) + { + auto& cmd = transitionRangeCmds[lateCmdIdx]; + RPS_ASSERT(cmd.isTransition); + + const auto& currTrans = transitions[cmd.cmdId]; + const auto& resInstance = resourceInstances[currTrans.access.resourceId]; + + const auto& prevAccess = (currTrans.prevTransition != RenderGraph::INVALID_TRANSITION) + ? transitions[currTrans.prevTransition].access.access + : resInstance.initialAccess; + + AppendBarrier(D3D12RuntimeDevice::FromHandle(resInstance.hRuntimeResource), + prevAccess, + currTrans.access.access, + resInstance, + currTrans.access.range); + } + + currBatch.lateBarriers.SetEnd(uint32_t(m_barriers.size())); + + uint32_t batchId = RPS_INDEX_NONE_U32; + + if (!currBatch.earlyBarriers.empty() || !currBatch.discardResources.empty() || + !currBatch.lateBarriers.empty()) + { + batchId = uint32_t(m_barrierBatches.size()); + m_barrierBatches.push_back(currBatch); + } + + return batchId; + } + + virtual void RecordBarrierBatch(ID3D12GraphicsCommandList* pD3DCmdList, uint32_t barrierBatch) override final + { + const auto& batch = m_barrierBatches[barrierBatch]; + + if (!batch.earlyBarriers.empty()) + { + auto barriers = batch.earlyBarriers.GetConstRef(m_barriers); + pD3DCmdList->ResourceBarrier(barriers.size(), barriers.data()); + } + + auto discardResources = batch.discardResources.Get(m_discardResources); + + for (auto pRes : discardResources) + { + pD3DCmdList->DiscardResource(pRes, nullptr); + } + + if (!batch.lateBarriers.empty()) + { + auto barriers = batch.lateBarriers.GetConstRef(m_barriers); + pD3DCmdList->ResourceBarrier(barriers.size(), barriers.data()); + } + } + + static D3D12_RESOURCE_STATES CalcD3D12State(const RpsAccessAttr& access) + { + const auto accessFlags = access.accessFlags & RPS_ACCESS_ALL_ACCESS_MASK; + + if (rpsAnyBitsSet(accessFlags, RPS_ACCESS_DEPTH_STENCIL_WRITE)) + return D3D12_RESOURCE_STATE_DEPTH_WRITE; + + switch (accessFlags) + { + case RPS_ACCESS_RENDER_TARGET_BIT: + return D3D12_RESOURCE_STATE_RENDER_TARGET; + case RPS_ACCESS_UNORDERED_ACCESS_BIT: + return D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + case RPS_ACCESS_STREAM_OUT_BIT: + return D3D12_RESOURCE_STATE_STREAM_OUT; + case RPS_ACCESS_COPY_DEST_BIT: + return D3D12_RESOURCE_STATE_COPY_DEST; + case RPS_ACCESS_RESOLVE_DEST_BIT: + case RPS_ACCESS_RESOLVE_DEST_BIT | RPS_ACCESS_RENDER_TARGET_BIT: + return D3D12_RESOURCE_STATE_RESOLVE_DEST; + case RPS_ACCESS_RAYTRACING_AS_BUILD_BIT: + return D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + case RPS_ACCESS_RAYTRACING_AS_READ_BIT: + return D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + case RPS_ACCESS_CPU_READ_BIT: + return D3D12_RESOURCE_STATE_COPY_DEST; + case RPS_ACCESS_CPU_WRITE_BIT: + return D3D12_RESOURCE_STATE_GENERIC_READ; + case RPS_ACCESS_PRESENT_BIT: + return D3D12_RESOURCE_STATE_PRESENT; + default: + break; + }; + + static constexpr struct + { + RpsAccessFlagBits rpsFlag; + D3D12_RESOURCE_STATES d3dState; + } rpsToD3D12ResStates[] = { + {RPS_ACCESS_INDIRECT_ARGS_BIT, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT}, + {RPS_ACCESS_INDEX_BUFFER_BIT, D3D12_RESOURCE_STATE_INDEX_BUFFER}, + {RPS_ACCESS_VERTEX_BUFFER_BIT, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER}, + {RPS_ACCESS_CONSTANT_BUFFER_BIT, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER}, + {RPS_ACCESS_SHADING_RATE_BIT, D3D12_RESOURCE_STATE_SHADING_RATE_SOURCE}, + {RPS_ACCESS_DEPTH_READ_BIT, D3D12_RESOURCE_STATE_DEPTH_READ}, + {RPS_ACCESS_STENCIL_READ_BIT, D3D12_RESOURCE_STATE_DEPTH_READ}, + {RPS_ACCESS_COPY_SRC_BIT, D3D12_RESOURCE_STATE_COPY_SOURCE}, + {RPS_ACCESS_RESOLVE_SRC_BIT, D3D12_RESOURCE_STATE_RESOLVE_SOURCE}, + }; + + D3D12_RESOURCE_STATES readStates = static_cast(0); + + for (auto& mapEntry : rpsToD3D12ResStates) + { + if (rpsAnyBitsSet(accessFlags, mapEntry.rpsFlag)) + { + readStates |= mapEntry.d3dState; + } + } + + if (rpsAnyBitsSet(accessFlags, RPS_ACCESS_SHADER_RESOURCE_BIT)) + { + if (rpsAnyBitsSet(access.accessStages, RPS_SHADER_STAGE_PS)) + readStates |= D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + + if (rpsAnyBitsSet(access.accessStages, (~RPS_SHADER_STAGE_PS) & RPS_SHADER_STAGE_ALL)) + readStates |= D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + } + + return readStates; + } + + static bool IsUploadOrReadbackResource(const D3D12RuntimeDevice& device, const ResourceInstance& resInfo) + { + if (resInfo.isExternal) + return false; // TODO + + auto& heapInfo = device.GetD3D12HeapTypeInfo(resInfo.allocRequirement.memoryTypeIndex); + return (heapInfo.type == D3D12_HEAP_TYPE_READBACK) || (heapInfo.type == D3D12_HEAP_TYPE_UPLOAD); + } + + static D3D12_RESOURCE_STATES CalcResourceInitState(const D3D12RuntimeDevice& device, + const ResourceInstance& resInfo) + { + auto& heapInfo = device.GetD3D12HeapTypeInfo(resInfo.allocRequirement.memoryTypeIndex); + + if (heapInfo.type == D3D12_HEAP_TYPE_READBACK) + return D3D12_RESOURCE_STATE_COPY_DEST; + else if (heapInfo.type == D3D12_HEAP_TYPE_UPLOAD) + return D3D12_RESOURCE_STATE_GENERIC_READ; + + return resInfo.desc.IsImage() ? D3D12ConventionalBarrierBuilder::CalcD3D12State(resInfo.initialAccess) + : D3D12_RESOURCE_STATE_COMMON; + } + + private: + void AppendBarrier(ID3D12Resource* pResource, + const RpsAccessAttr& prevAccess, + const RpsAccessAttr& currAccess, + const ResourceInstance& resInfo, + const SubresourceRangePacked range) + { + auto stateBefore = CalcD3D12State(prevAccess); + auto stateAfter = CalcD3D12State(currAccess); + + if (IsUploadOrReadbackResource(m_d3dRuntimeDevice, resInfo)) + { + return; + } + + if (stateBefore != stateAfter) + { + const bool isFullRes = (resInfo.numSubResources == 1) || (resInfo.fullSubresourceRange == range); + + auto* pBarriers = m_barriers.grow(isFullRes ? 1 : range.GetNumSubresources()); + + pBarriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + pBarriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + pBarriers[0].Transition.pResource = pResource; + pBarriers[0].Transition.StateBefore = stateBefore; + pBarriers[0].Transition.StateAfter = stateAfter; + pBarriers[0].Transition.Subresource = isFullRes ? D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES : 0; + + if (!isFullRes) + { + RPS_ASSERT(resInfo.desc.IsImage()); + + uint32_t planeMask = range.aspectMask; + uint32_t idx = 0; + + for (uint32_t iPlane = 0; planeMask != 0; planeMask = planeMask >> 1, iPlane++) + { + if (planeMask & 1) + { + for (uint32_t iArray = range.baseArrayLayer; iArray < range.arrayLayerEnd; iArray++) + { + for (uint32_t iMip = range.baseMipLevel; iMip < range.mipLevelEnd; iMip++) + { + pBarriers[idx] = pBarriers[0]; + pBarriers[idx].Transition.Subresource = + D3D12CalcSubresource(iMip, + iArray, + iPlane, + resInfo.desc.image.mipLevels, + resInfo.desc.image.arrayLayers); + + idx++; + } + } + } + } + } + } + else if ((stateBefore == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) && + (stateAfter == D3D12_RESOURCE_STATE_UNORDERED_ACCESS)) + { + auto* pBarrier = m_barriers.grow(1); + + pBarrier->Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + pBarrier->Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + pBarrier->UAV.pResource = pResource; + } + } + + private: + const D3D12RuntimeDevice& m_d3dRuntimeDevice; + ArenaVector m_barrierBatches; + ArenaVector m_barriers; + ArenaVector m_discardResources; + ArenaBitVector<> m_deactivatedResMask; + }; +} // namespace rps + +#endif //RPS_D3D12_BARRIER_H diff --git a/src/runtime/d3d12/rps_d3d12_built_in_nodes.cpp b/src/runtime/d3d12/rps_d3d12_built_in_nodes.cpp new file mode 100644 index 0000000..06946db --- /dev/null +++ b/src/runtime/d3d12/rps_d3d12_built_in_nodes.cpp @@ -0,0 +1,546 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "rps/runtime/common/rps_render_states.h" +#include "rps/runtime/d3d_common/rps_d3d_common.h" + +#include "runtime/common/rps_runtime_util.hpp" +#include "runtime/d3d12/rps_d3d12_runtime_device.hpp" +#include "runtime/d3d12/rps_d3d12_runtime_backend.hpp" +#include "runtime/d3d12/rps_d3d12_util.hpp" + +namespace rps +{ + + static constexpr bool NoRegions = false; + static constexpr bool WithRegions = true; + + // template + // graphics node clear_color_regions( [writeonly(clear)] texture t, float4 data, uint numRects, int4 rects[MaxRects] ); + // template + // graphics node clear_depth_stencil_regions( [writeonly(clear)] texture t, RPS_CLEAR_FLAGS option, float d, uint s, uint numRects, int4 rects[MaxRects] ); + // template + // compute node clear_texture_regions( [writeonly(clear)] texture t, uint4 data, uint numRects, int4 rects[MaxRects] ); + + // graphics node clear_color ( [writeonly(clear)] texture t, float4 data ); + // graphics node clear_depth_stencil ( [writeonly(clear)] texture t, RPS_CLEAR_FLAGS option, float d, uint s ); + // compute node clear_texture ( [writeonly(clear)] texture t, uint4 data ); + // copy node clear_buffer ( [writeonly(clear)] buffer b, uint4 data ); + // copy node copy_texture ( [writeonly(copy)] texture dst, uint3 dstOffset, [readonly(copy)] texture src, uint3 srcOffset, uint3 extent ); + // copy node copy_buffer ( [writeonly(copy)] buffer dst, uint64_t dstOffset, [readonly(copy)] buffer src, uint64_t srcOffset, uint64_t size ); + // copy node copy_texture_to_buffer ( [writeonly(copy)] buffer dst, uint64_t dstByteOffset, uint rowPitch, uint3 bufferImageSize, uint3 dstOffset, [readonly(copy)] texture src, uint3 srcOffset, uint3 extent ); + // copy node copy_buffer_to_texture ( [writeonly(copy)] texture dst, uint3 dstOffset, [readonly(copy)] buffer src, uint64_t srcByteOffset, uint rowPitch, uint3 bufferImageSize, uint3 srcOffset, uint3 extent ); + // graphics node resolve ( [writeonly(resolve)] texture dst, uint2 dstOffset, [readonly(resolve)] texture src, uint2 srcOffset, uint2 extent, RPS_RESOLVE_MODE resolveMode ); + + template + void D3D12BuiltInClearColorImpl(const RpsCmdCallbackContext* pContext) + { + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + RPS_ASSERT(pContext->numArgs > 1); + + auto pClearValue = rpsCmdGetArg(pContext); + uint32_t numRects = 0; + const D3D12_RECT* pRects = nullptr; + + static_assert(sizeof(RpsRect) == sizeof(D3D12_RECT), + "Assumption 'sizeof(RpsRect) == sizeof(D3D12_RECT)' is no longer true."); + + if (HasRegions) + { + RPS_ASSERT(pContext->numArgs == 4); + numRects = *rpsCmdGetArg(pContext); + pRects = rpsCmdGetArg(pContext); + } + + D3D12_CPU_DESCRIPTOR_HANDLE rtvHdl; + rpsD3D12GetCmdArgDescriptor(pContext, 0, &rtvHdl); + + pCmdList->ClearRenderTargetView(rtvHdl, pClearValue->color.float32, numRects, pRects); + } + + void D3D12BuiltInClearColorRegions(const RpsCmdCallbackContext* pContext) + { + D3D12BuiltInClearColorImpl(pContext); + } + + void D3D12BuiltInClearColor(const RpsCmdCallbackContext* pContext) + { + D3D12BuiltInClearColorImpl(pContext); + } + + template + void D3D12BuiltInClearDepthStencilImpl(const RpsCmdCallbackContext* pContext) + { + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + RPS_ASSERT(pContext->numArgs > 1); + + auto pClearFlags = rpsCmdGetArg(pContext); + auto pDepthValue = rpsCmdGetArg(pContext); + auto pStencilValue = rpsCmdGetArg(pContext); + + uint32_t numRects = 0; + const D3D12_RECT* pRects = nullptr; + + static_assert(sizeof(RpsRect) == sizeof(D3D12_RECT), + "Assumption 'sizeof(RpsRect) == sizeof(D3D12_RECT)' is no longer true."); + + if (HasRegions) + { + RPS_ASSERT(pContext->numArgs == 6); + numRects = *rpsCmdGetArg(pContext); + pRects = rpsCmdGetArg(pContext); + } + + D3D12_CPU_DESCRIPTOR_HANDLE dsvHdl; + RPS_V_REPORT_AND_RETURN(pContext, rpsD3D12GetCmdArgDescriptor(pContext, 0, &dsvHdl)); + + const D3D12_CLEAR_FLAGS d3dClearFlags = + (((*pClearFlags) & RPS_CLEAR_FLAG_DEPTH) ? D3D12_CLEAR_FLAG_DEPTH : D3D12_CLEAR_FLAGS(0)) | + (((*pClearFlags) & RPS_CLEAR_FLAG_STENCIL) ? D3D12_CLEAR_FLAG_STENCIL : D3D12_CLEAR_FLAGS(0)); + + pCmdList->ClearDepthStencilView(dsvHdl, d3dClearFlags, *pDepthValue, *pStencilValue, numRects, pRects); + } + + void D3D12BuiltInClearDepthStencil(const RpsCmdCallbackContext* pContext) + { + D3D12BuiltInClearDepthStencilImpl(pContext); + } + + void D3D12BuiltInClearDepthStencilRegions(const RpsCmdCallbackContext* pContext) + { + D3D12BuiltInClearDepthStencilImpl(pContext); + } + + void D3D12BuiltInClearTextureUAV(const RpsCmdCallbackContext* pContext) + { + RPS_TODO(); + } + void D3D12BuiltInClearTextureUAVRegions(const RpsCmdCallbackContext* pContext) + { + RPS_TODO(); + } + void D3D12BuiltInClearBufferUAV(const RpsCmdCallbackContext* pContext) + { + RPS_TODO(); + } + + // copy node copy_texture ( [writeonly(copy)] texture dst, uint3 dstOffset, [readonly(copy)] texture src, uint3 srcOffset, uint3 extent ); + void D3D12BuiltInCopyTexture(const RpsCmdCallbackContext* pContext) + { + auto* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + auto* pBackend = rps::D3D12RuntimeBackend::Get(pContext); + + auto* pRuntimeDevice = D3D12RuntimeDevice::Get(pBackend->GetRenderGraph().GetDevice()); + + RPS_ASSERT(pContext->numArgs == 5); + + const ResourceInstance *pDstResource, *pSrcResource; + RPS_V_REPORT_AND_RETURN(pContext, + rps::D3D12RuntimeBackend::GetCmdArgResourceInfos(pContext, 0, 0, &pDstResource, 1)); + RPS_V_REPORT_AND_RETURN(pContext, + rps::D3D12RuntimeBackend::GetCmdArgResourceInfos(pContext, 2, 0, &pSrcResource, 1)); + + auto pDstView = rpsCmdGetArg(pContext); + auto pDstOffset = *rpsCmdGetArg(pContext); + auto pSrcView = rpsCmdGetArg(pContext); + auto pSrcOffset = *rpsCmdGetArg(pContext); + auto pExtent = *rpsCmdGetArg(pContext); + + D3D12_TEXTURE_COPY_LOCATION dstLocation; + dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstLocation.pResource = rpsD3D12ResourceFromHandle(pDstResource->hRuntimeResource); + + D3D12_TEXTURE_COPY_LOCATION srcLocation; + srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcLocation.pResource = rpsD3D12ResourceFromHandle(pSrcResource->hRuntimeResource); + + RpsFormat srcFmt = (pSrcView->base.viewFormat != RPS_FORMAT_UNKNOWN) ? pSrcView->base.viewFormat + : pSrcResource->desc.image.format; + RpsFormat dstFmt = (pDstView->base.viewFormat != RPS_FORMAT_UNKNOWN) ? pDstView->base.viewFormat + : pDstResource->desc.image.format; + + uint32_t srcMipDim[3] = { + GetMipLevelDimension(pSrcResource->desc.image.width, pSrcView->subresourceRange.baseMipLevel), + GetMipLevelDimension(pSrcResource->desc.image.height, pSrcView->subresourceRange.baseMipLevel), + GetMipLevelDimension(pSrcResource->desc.GetImageDepth(), pSrcView->subresourceRange.baseMipLevel), + }; + + D3D12_BOX box; + box.left = pSrcOffset[0]; + box.top = pSrcOffset[1]; + box.front = pSrcOffset[2]; + box.right = (pExtent[0] == UINT32_MAX) ? srcMipDim[0] : (pSrcOffset[0] + pExtent[0]); + box.bottom = (pExtent[1] == UINT32_MAX) ? srcMipDim[1] : (pSrcOffset[1] + pExtent[1]); + box.back = (pExtent[2] == UINT32_MAX) ? srcMipDim[2] : (pSrcOffset[2] + pExtent[2]); + + uint32_t dstOffset[3] = {pDstOffset[0], pDstOffset[1], pDstOffset[2]}; + + const bool isFullSubresource = + (pSrcOffset[0] == 0) && (pSrcOffset[1] == 0) && (pSrcOffset[2] == 0) && + ((box.right == srcMipDim[0]) && (box.bottom == srcMipDim[1]) && (box.back == srcMipDim[2])); + + uint32_t srcAspectMask = pRuntimeDevice->GetFormatPlaneMask(srcFmt); + uint32_t dstAspectMask = pRuntimeDevice->GetFormatPlaneMask(dstFmt); + + const uint32_t mipLevels = rpsMin(pSrcView->subresourceRange.mipLevels, pDstView->subresourceRange.mipLevels); + + const uint32_t numArrayLayers = + rpsMin(pSrcView->subresourceRange.arrayLayers, pDstView->subresourceRange.arrayLayers); + + while ((srcAspectMask != 0) && (dstAspectMask != 0)) + { + const uint32_t srcPlane = (srcAspectMask & 1u) ? 0 : 1; + srcAspectMask &= ~(1 << srcPlane); + const uint32_t dstPlane = (dstAspectMask & 1u) ? 0 : 1; + dstAspectMask &= ~(1 << dstPlane); + + for (uint32_t iMip = 0; iMip < mipLevels; iMip++) + { + const uint32_t srcMip = pSrcView->subresourceRange.baseMipLevel + iMip; + const uint32_t dstMip = pDstView->subresourceRange.baseMipLevel + iMip; + + for (uint32_t iArrayLayer = 0; iArrayLayer < numArrayLayers; iArrayLayer++) + { + srcLocation.SubresourceIndex = + D3D12CalcSubresource(srcMip, + pSrcView->subresourceRange.baseArrayLayer + iArrayLayer, + srcPlane, + pSrcResource->desc.image.mipLevels, + pSrcResource->desc.GetImageArrayLayers()); + dstLocation.SubresourceIndex = + D3D12CalcSubresource(dstMip, + pDstView->subresourceRange.baseArrayLayer + iArrayLayer, + dstPlane, + pDstResource->desc.image.mipLevels, + pDstResource->desc.GetImageArrayLayers()); + + pCmdList->CopyTextureRegion(&dstLocation, + dstOffset[0], + dstOffset[1], + dstOffset[2], + &srcLocation, + isFullSubresource ? nullptr : &box); + } + + box.left = box.left >> 1; + box.right = box.right >> 1; + box.front = box.front >> 1; + box.right = box.right >> 1; + box.bottom = box.bottom >> 1; + box.back = box.back >> 1; + + dstOffset[0] = dstOffset[0] >> 1; + dstOffset[1] = dstOffset[1] >> 1; + dstOffset[2] = dstOffset[2] >> 1; + } + } + } + + static constexpr bool TextureToBuffer = true; + static constexpr bool BufferToTexture = false; + + void D3D12BuiltInCopyBuffer(const RpsCmdCallbackContext* pContext) + { + auto* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + auto* pBackend = rps::D3D12RuntimeBackend::Get(pContext); + + auto* pRuntimeDevice = D3D12RuntimeDevice::Get(pBackend->GetRenderGraph().GetDevice()); + + const ResourceInstance *pDstResource, *pSrcResource; + RPS_V_REPORT_AND_RETURN(pContext, + rps::D3D12RuntimeBackend::GetCmdArgResourceInfos(pContext, 0, 0, &pDstResource, 1)); + RPS_V_REPORT_AND_RETURN(pContext, + rps::D3D12RuntimeBackend::GetCmdArgResourceInfos(pContext, 2, 0, &pSrcResource, 1)); + + const auto* pDstView = rpsCmdGetArg(pContext); + uint64_t dstOffset = *rpsCmdGetArg(pContext); + const auto* pSrcView = rpsCmdGetArg(pContext); + uint64_t srcOffset = *rpsCmdGetArg(pContext); + uint64_t copySize = *rpsCmdGetArg(pContext); + + const uint64_t dstTotalSize = pDstResource->desc.GetBufferSize(); + const uint64_t srcTotalSize = pSrcResource->desc.GetBufferSize(); + + if ((dstOffset == 0) && (srcOffset == 0) && (dstTotalSize == srcTotalSize) && + ((copySize == UINT64_MAX) || (copySize == srcTotalSize))) + { + pCmdList->CopyResource(rpsD3D12ResourceFromHandle(pDstResource->hRuntimeResource), + rpsD3D12ResourceFromHandle(pSrcResource->hRuntimeResource)); + } + else + { + pCmdList->CopyBufferRegion(rpsD3D12ResourceFromHandle(pDstResource->hRuntimeResource), + dstOffset, + rpsD3D12ResourceFromHandle(pSrcResource->hRuntimeResource), + srcOffset, + copySize); + } + } + + template + void D3D12BuiltInCopyTextureBufferCommon(const RpsCmdCallbackContext* pContext) + { + auto* pCmdList = rpsD3D12CommandList1FromHandle(pContext->hCommandBuffer); + auto* pBackend = rps::D3D12RuntimeBackend::Get(pContext); + + auto* pRuntimeDevice = D3D12RuntimeDevice::Get(pBackend->GetRenderGraph().GetDevice()); + + const ResourceInstance *pTextureResource, *pBufferResource; + RPS_V_REPORT_AND_RETURN( + pContext, + rps::D3D12RuntimeBackend::GetCmdArgResourceInfos(pContext, TextureArgIdx, 0, &pTextureResource, 1)); + RPS_V_REPORT_AND_RETURN( + pContext, rps::D3D12RuntimeBackend::GetCmdArgResourceInfos(pContext, BufferArgIdx, 0, &pBufferResource, 1)); + + auto* pBufferView = rpsCmdGetArg(pContext); + auto* pTextureView = rpsCmdGetArg(pContext); + auto bufferByteOffset = *rpsCmdGetArg(pContext); + auto bufferRowPitch = *rpsCmdGetArg(pContext); + auto pBufferImageSize = *rpsCmdGetArg(pContext); + auto pBufferImgOffset = *rpsCmdGetArg(pContext); + auto pTextureOffset = *rpsCmdGetArg(pContext); + auto pExtent = *rpsCmdGetArg(pContext); + + D3D12_TEXTURE_COPY_LOCATION textureLocation; + textureLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + + D3D12_TEXTURE_COPY_LOCATION bufferLocation; + bufferLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + + uint32_t texMipDim[3] = { + GetMipLevelDimension(pTextureResource->desc.image.width, pTextureView->subresourceRange.baseMipLevel), + GetMipLevelDimension(pTextureResource->desc.image.height, pTextureView->subresourceRange.baseMipLevel), + GetMipLevelDimension(pTextureResource->desc.GetImageDepth(), pTextureView->subresourceRange.baseMipLevel), + }; + + const uint32_t* pSrcOffset = SourceIsTexture ? pTextureOffset : pBufferImgOffset; + + uint32_t boxEnd[3] = { + (pExtent[0] == UINT32_MAX) ? texMipDim[0] : (pSrcOffset[0] + pExtent[0]), + (pExtent[1] == UINT32_MAX) ? texMipDim[1] : (pSrcOffset[1] + pExtent[1]), + (pExtent[2] == UINT32_MAX) ? texMipDim[2] : (pSrcOffset[2] + pExtent[2]), + }; + + const bool isFullSubresource = + ((boxEnd[0] == texMipDim[0]) && (boxEnd[1] == texMipDim[1]) && (boxEnd[2] == texMipDim[2])); + const bool onlyAllowFullSubresource = (pTextureResource->allAccesses.accessFlags & RPS_ACCESS_DEPTH_STENCIL) || + (pTextureResource->desc.GetSampleCount() > 1); + + const RpsFormat viewFormat = (pTextureView->base.viewFormat != RPS_FORMAT_UNKNOWN) + ? pTextureView->base.viewFormat + : pTextureResource->desc.image.format; + const uint32_t planeMask = pRuntimeDevice->GetFormatPlaneMask(viewFormat); + + RPS_ASSERT((planeMask == 1) || (planeMask == 2)); + + textureLocation.pResource = rpsD3D12ResourceFromHandle(pTextureResource->hRuntimeResource); + textureLocation.SubresourceIndex = D3D12CalcSubresource(pTextureView->subresourceRange.baseMipLevel, + pTextureView->subresourceRange.baseArrayLayer, + ((planeMask == 0x1) ? 0 : 1), + pTextureResource->desc.image.mipLevels, + pTextureResource->desc.GetImageArrayLayers()); + + bufferLocation.pResource = rpsD3D12ResourceFromHandle(pBufferResource->hRuntimeResource); + bufferLocation.PlacedFootprint.Offset = bufferByteOffset; + bufferLocation.PlacedFootprint.Footprint.Format = rpsFormatToDXGI(viewFormat); + bufferLocation.PlacedFootprint.Footprint.Width = pBufferImageSize[0]; + bufferLocation.PlacedFootprint.Footprint.Height = pBufferImageSize[1]; + bufferLocation.PlacedFootprint.Footprint.Depth = pBufferImageSize[2]; + bufferLocation.PlacedFootprint.Footprint.RowPitch = bufferRowPitch; + + if (SourceIsTexture) + { + D3D12_BOX box = {pTextureOffset[0], pTextureOffset[1], pTextureOffset[2], boxEnd[0], boxEnd[1], boxEnd[2]}; + + pCmdList->CopyTextureRegion(&bufferLocation, + pBufferImgOffset[0], + pBufferImgOffset[1], + pBufferImgOffset[2], + &textureLocation, + isFullSubresource ? nullptr : &box); + } + else + { + D3D12_BOX box = { + pBufferImgOffset[0], pBufferImgOffset[1], pBufferImgOffset[2], boxEnd[0], boxEnd[1], boxEnd[2]}; + + pCmdList->CopyTextureRegion(&textureLocation, + pTextureOffset[0], + pTextureOffset[1], + pTextureOffset[2], + &bufferLocation, + isFullSubresource ? nullptr : &box); + } + } + + // copy node copy_texture_to_buffer ( [writeonly(copy)] buffer dst, uint64_t dstByteOffset, uint rowPitch, uint3 bufferImageSize, uint3 dstOffset, [readonly(copy)] texture src, uint3 srcOffset, uint3 extent ); + // copy node copy_buffer_to_texture ( [writeonly(copy)] texture dst, uint3 dstOffset, [readonly(copy)] buffer src, uint64_t srcByteOffset, uint rowPitch, uint3 bufferImageSize, uint3 srcOffset, uint3 extent ); + // graphics node resolve ( [writeonly(resolve)] texture dst, uint2 dstOffset, [readonly(resolve)] texture src, uint2 srcOffset, uint2 extent, RPS_RESOLVE_MODE resolveMode ); + void D3D12BuiltInCopyTextureToBuffer(const RpsCmdCallbackContext* pContext) + { + enum + { + BUFFER_DST = 0, + BUFFER_BYTE_OFFSET, + ROW_PITCH, + BUFFER_IMAGE_SIZE, + BUFFER_IMAGE_OFFSET, + TEXTURE_SRC, + TEXTURE_OFFSET, + EXTENT, + }; + + D3D12BuiltInCopyTextureBufferCommon(pContext); + } + void D3D12BuiltInCopyBufferToTexture(const RpsCmdCallbackContext* pContext) + { + enum + { + TEXTURE_DST = 0, + TEXTURE_OFFSET, + BUFFER_SRC, + BUFFER_BYTE_OFFSET, + ROW_PITCH, + BUFFER_IMAGE_SIZE, + BUFFER_IMAGE_OFFSET, + EXTENT, + }; + + D3D12BuiltInCopyTextureBufferCommon(pContext); + } + + void D3D12BuiltInResolve(const RpsCmdCallbackContext* pContext) + { + auto* pCmdList = rpsD3D12CommandList1FromHandle(pContext->hCommandBuffer); + + auto* pBackend = rps::D3D12RuntimeBackend::Get(pContext); + + auto* pRuntimeDevice = D3D12RuntimeDevice::Get(pBackend->GetRenderGraph().GetDevice()); + + RPS_ASSERT(pContext->numArgs == 6); + + const ResourceInstance *pDstResource, *pSrcResource; + RPS_V_REPORT_AND_RETURN(pContext, + rps::D3D12RuntimeBackend::GetCmdArgResourceInfos(pContext, 0, 0, &pDstResource, 1)); + RPS_V_REPORT_AND_RETURN(pContext, + rps::D3D12RuntimeBackend::GetCmdArgResourceInfos(pContext, 2, 0, &pSrcResource, 1)); + + auto pDstView = rpsCmdGetArg(pContext); + auto pDstOffset = *rpsCmdGetArg(pContext); + auto pSrcView = rpsCmdGetArg(pContext); + auto pSrcOffset = *rpsCmdGetArg(pContext); + auto pExtent = *rpsCmdGetArg(pContext); + auto resolveMode = *rpsCmdGetArg(pContext); + + RPS_ASSERT(!pDstResource->desc.IsBuffer()); + RPS_ASSERT(!pSrcResource->desc.IsBuffer()); + + // TODO: Precalculate these flags at shader loading time. + const bool isFullSubresource = ((pDstOffset[0] == 0) && (pDstOffset[1] == 0)) && + ((pSrcOffset[0] == 0) && (pSrcOffset[1] == 0)) && + ((pExtent[0] == UINT32_MAX) && (pExtent[1] == UINT32_MAX)); + + if (pSrcResource->desc.GetSampleCount() < pDstResource->desc.GetSampleCount()) + { + rpsCmdCallbackReportError(pContext, RPS_ERROR_INVALID_OPERATION); + return; + } + + RpsFormat srcFmt = (pSrcView->base.viewFormat != RPS_FORMAT_UNKNOWN) ? pSrcView->base.viewFormat + : pSrcResource->desc.image.format; + RpsFormat dstFmt = (pDstView->base.viewFormat != RPS_FORMAT_UNKNOWN) ? pDstView->base.viewFormat + : pDstResource->desc.image.format; + + uint32_t srcAspectMask = pRuntimeDevice->GetFormatPlaneMask(srcFmt); + uint32_t dstAspectMask = pRuntimeDevice->GetFormatPlaneMask(dstFmt); + + const uint32_t mipLevelCount = + (pSrcResource->desc.GetSampleCount() > 1) + ? 1 + : rpsMin(pDstView->subresourceRange.mipLevels, pDstView->subresourceRange.mipLevels); + const uint32_t arrayLayerCount = + rpsMin(pDstView->subresourceRange.arrayLayers, pSrcView->subresourceRange.arrayLayers); + + while ((srcAspectMask != 0) && (dstAspectMask != 0)) + { + const uint32_t srcPlane = (srcAspectMask & 1u) ? 0 : 1; + srcAspectMask &= ~(1 << srcPlane); + const uint32_t dstPlane = (dstAspectMask & 1u) ? 0 : 1; + dstAspectMask &= ~(1 << dstPlane); + + for (uint32_t iMip = 0; iMip < mipLevelCount; iMip++) + { + const uint32_t srcMip = pSrcView->subresourceRange.baseMipLevel + iMip; + const uint32_t dstMip = pDstView->subresourceRange.baseMipLevel + iMip; + + D3D12_RECT srcRect; + srcRect.left = pSrcOffset[0] >> iMip; + srcRect.top = pSrcOffset[1] >> iMip; + srcRect.right = rpsMax(1u, (pSrcOffset[0] + pExtent[0]) >> iMip); + srcRect.bottom = rpsMax(1u, (pSrcOffset[1] + pExtent[1]) >> iMip); + + UINT dstOffset[2] = { + (pDstOffset[0] >> iMip), + (pDstOffset[1] >> iMip), + }; + + for (uint32_t iArrayLayer = 0; iArrayLayer < arrayLayerCount; iArrayLayer++) + { + const uint32_t srcSubresourceIndex = + D3D12CalcSubresource(srcMip, + pSrcView->subresourceRange.baseArrayLayer + iArrayLayer, + srcPlane, + pSrcResource->desc.image.mipLevels, + pSrcResource->desc.GetImageArrayLayers()); + + const uint32_t dstSubresourceIndex = + D3D12CalcSubresource(dstMip, + pDstView->subresourceRange.baseArrayLayer + iArrayLayer, + dstPlane, + pDstResource->desc.image.mipLevels, + pDstResource->desc.GetImageArrayLayers()); + + pCmdList->ResolveSubresourceRegion(rpsD3D12ResourceFromHandle(pDstResource->hRuntimeResource), + dstSubresourceIndex, + dstOffset[0], + dstOffset[1], + rpsD3D12ResourceFromHandle(pSrcResource->hRuntimeResource), + srcSubresourceIndex, + isFullSubresource ? NULL : &srcRect, + rpsFormatToDXGI(dstFmt), + D3D12GetResolveMode(resolveMode)); + } + } + } + } +} // namespace rps diff --git a/src/runtime/d3d12/rps_d3d12_enhanced_barrier.hpp b/src/runtime/d3d12/rps_d3d12_enhanced_barrier.hpp new file mode 100644 index 0000000..b998d2a --- /dev/null +++ b/src/runtime/d3d12/rps_d3d12_enhanced_barrier.hpp @@ -0,0 +1,635 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_D3D12_ENHANCED_BARRIER_H +#define RPS_D3D12_ENHANCED_BARRIER_H + +#if RPS_D3D12_ENHANCED_BARRIER_SUPPORT + +#include "core/rps_util.hpp" +#include "runtime/d3d12/rps_d3d12_barrier.hpp" + +namespace rps +{ + class D3D12EnhancedBarrierBuilder : public D3D12BarrierBuilder + { + public: + D3D12EnhancedBarrierBuilder(const RuntimeDevice& runtimeDevice) + : m_runtimeDevice(runtimeDevice) + { + } + + virtual void UpdateFrame(const RenderGraphUpdateContext& context) override final + { + m_barrierBatches.reset_keep_capacity(&context.frameArena); + m_textureBarriers.reset_keep_capacity(&context.frameArena); + m_bufferBarriers.reset_keep_capacity(&context.frameArena); + m_globalBarriers.reset_keep_capacity(&context.frameArena); + } + + virtual uint32_t CreateBarrierBatch(const RenderGraphUpdateContext& context, + Span transitionRange) override final + { + RPS_ASSERT(!transitionRange.empty()); + + auto& aliasingInfos = context.renderGraph.GetResourceAliasingInfos(); + auto& resourceInstances = context.renderGraph.GetResourceInstances(); + auto& transitions = context.renderGraph.GetTransitions(); + + auto transitionRangeCmds = transitionRange.Get(context.renderGraph.GetRuntimeCmdInfos()); + + BarrierBatch currBatch = {}; + currBatch.offsets[D3D12_BARRIER_TYPE_GLOBAL] = uint32_t(m_globalBarriers.size()); + currBatch.offsets[D3D12_BARRIER_TYPE_TEXTURE] = uint32_t(m_textureBarriers.size()); + currBatch.offsets[D3D12_BARRIER_TYPE_BUFFER] = uint32_t(m_bufferBarriers.size()); + + constexpr RpsAccessAttr noAccess = {RPS_ACCESS_UNKNOWN, RPS_SHADER_STAGE_NONE}; + + // At preamble, initialize the layout for pending, non-aliased resources. + // For aliased resources this is included in the cmd.aliasingInfos so skipping here. + if (transitionRangeCmds[0].cmdId == CMD_ID_PREAMBLE) + { + // Non-initialized + // TODO: Add pendingInit resource indices to a separated list. + for (auto& resInst : resourceInstances) + { + if (resInst.isPendingInit && !resInst.isAliased) + { + const auto aliasDstAccessInfo = CalcD3D12AccessInfo(resInst.initialAccess); + AppendBarrier(resInst, noAccess, resInst.initialAccess, true, resInst.fullSubresourceRange); + } + } + } + + for (uint32_t idx = 0; idx < transitionRangeCmds.size(); idx++) + { + auto& cmd = transitionRangeCmds[idx]; + RPS_ASSERT(cmd.isTransition); + + // Process aliasing: + for (auto& aliasing : cmd.aliasingInfos.Get(aliasingInfos)) + { + // The src shouldn't be accessed by current transiton batch. + RPS_ASSERT((aliasing.srcResourceIndex == RPS_INDEX_NONE_U32) || + (resourceInstances[aliasing.srcResourceIndex].lifetimeEnd < + (transitionRange.GetBegin() + idx))); + + if (aliasing.srcDeactivating && (aliasing.srcResourceIndex != RPS_INDEX_NONE_U32)) + { + const auto& srcResInfo = resourceInstances[aliasing.srcResourceIndex]; + + auto srcFinalAccesses = + srcResInfo.finalAccesses.Get(context.renderGraph.GetResourceFinalAccesses()); + + // TODO: Early out / conservative sync if too many final accesses + for (auto& srcFinalAccess : srcFinalAccesses) + { + auto& srcAccess = (srcFinalAccess.prevTransition != RenderGraph::INVALID_TRANSITION) + ? transitions[srcFinalAccess.prevTransition].access.access + : srcResInfo.initialAccess; + + AppendBarrier(srcResInfo, srcAccess, noAccess, false, srcFinalAccess.range); + } + } + + if (aliasing.dstActivating && (aliasing.dstResourceIndex != RPS_INDEX_NONE_U32)) + { + const auto& dstResInfo = resourceInstances[aliasing.dstResourceIndex]; + const auto aliasDstAccessInfo = CalcD3D12AccessInfo(dstResInfo.initialAccess); + + AppendBarrier( + dstResInfo, noAccess, dstResInfo.initialAccess, true, dstResInfo.fullSubresourceRange); + + // TODO: Whole resource already in initial layout, can skip the first access barrier. + } + } + + if (cmd.cmdId < CMD_ID_PREAMBLE) + { + // Process transitons: + + const auto& currTrans = transitions[cmd.cmdId]; + const auto& resInstance = resourceInstances[currTrans.access.resourceId]; + + const auto& prevAccess = (currTrans.prevTransition != RenderGraph::INVALID_TRANSITION) + ? transitions[currTrans.prevTransition].access.access + : resInstance.initialAccess; + + AppendBarrier(resInstance, prevAccess, currTrans.access.access, false, currTrans.access.range); + } + else if (cmd.cmdId == CMD_ID_POSTAMBLE) + { + // At frame end, transit non-deactivated resource states to initial states + // TODO: Extract non-aliased resource list ahead of time. + for (uint32_t iRes = 0, numRes = uint32_t(resourceInstances.size()); iRes < numRes; iRes++) + { + auto& resInstance = resourceInstances[iRes]; + + if (!resInstance.isAliased) + { + for (auto& finalAccess : + resInstance.finalAccesses.Get(context.renderGraph.GetResourceFinalAccesses())) + { + if (finalAccess.prevTransition != RenderGraph::INVALID_TRANSITION) + { + AppendBarrier( + resInstance, + transitions[finalAccess.prevTransition].access.access, + // TODO: For non-external resource, set no access/sync + initial layout? + resInstance.initialAccess, + false, + transitions[finalAccess.prevTransition].access.range); + } + } + } + } + } + } + + currBatch.count[D3D12_BARRIER_TYPE_GLOBAL] = + uint32_t(m_globalBarriers.size()) - currBatch.offsets[D3D12_BARRIER_TYPE_GLOBAL]; + currBatch.count[D3D12_BARRIER_TYPE_TEXTURE] = + uint32_t(m_textureBarriers.size()) - currBatch.offsets[D3D12_BARRIER_TYPE_TEXTURE]; + currBatch.count[D3D12_BARRIER_TYPE_BUFFER] = + uint32_t(m_bufferBarriers.size()) - currBatch.offsets[D3D12_BARRIER_TYPE_BUFFER]; + + uint32_t batchId = RPS_INDEX_NONE_U32; + + if ((currBatch.count[D3D12_BARRIER_TYPE_GLOBAL] > 0) || (currBatch.count[D3D12_BARRIER_TYPE_TEXTURE] > 0) || + (currBatch.count[D3D12_BARRIER_TYPE_BUFFER] > 0)) + { + batchId = uint32_t(m_barrierBatches.size()); + m_barrierBatches.push_back(currBatch); + } + + return batchId; + } + + virtual void RecordBarrierBatch(ID3D12GraphicsCommandList* pD3DCmdList, uint32_t barrierBatch) override final + { + const auto& batch = m_barrierBatches[barrierBatch]; + + if (batch.empty()) + return; + + D3D12_BARRIER_GROUP barrierGroups[D3D12BarrierTypeCount]; + uint32_t numGroups = 0; + + if (batch.count[D3D12_BARRIER_TYPE_GLOBAL] > 0) + { + barrierGroups[numGroups] = {D3D12_BARRIER_TYPE_GLOBAL, batch.count[D3D12_BARRIER_TYPE_GLOBAL]}; + barrierGroups[numGroups].pGlobalBarriers = &m_globalBarriers[batch.offsets[D3D12_BARRIER_TYPE_GLOBAL]]; + numGroups++; + } + + if (batch.count[D3D12_BARRIER_TYPE_TEXTURE] > 0) + { + barrierGroups[numGroups] = {D3D12_BARRIER_TYPE_TEXTURE, batch.count[D3D12_BARRIER_TYPE_TEXTURE]}; + barrierGroups[numGroups].pTextureBarriers = + &m_textureBarriers[batch.offsets[D3D12_BARRIER_TYPE_TEXTURE]]; + numGroups++; + } + + if (batch.count[D3D12_BARRIER_TYPE_BUFFER] > 0) + { + barrierGroups[numGroups] = {D3D12_BARRIER_TYPE_BUFFER, batch.count[D3D12_BARRIER_TYPE_BUFFER]}; + barrierGroups[numGroups].pBufferBarriers = &m_bufferBarriers[batch.offsets[D3D12_BARRIER_TYPE_BUFFER]]; + numGroups++; + } + +#if RPS_DX12_ENHANCED_BARRIER_DEBUG_DUMP + static const StrRef barrierGroupTypeNames[] = {"Global", "Texture", "Buffer"}; + + static const NameValuePair layoutNames[] = { + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_UNDEFINED), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_COMMON), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_PRESENT), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_GENERIC_READ), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_RENDER_TARGET), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_UNORDERED_ACCESS), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_DEPTH_STENCIL_WRITE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_DEPTH_STENCIL_READ), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_SHADER_RESOURCE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_COPY_SOURCE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_COPY_DEST), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_RESOLVE_SOURCE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_RESOLVE_DEST), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_SHADING_RATE_SOURCE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_VIDEO_DECODE_READ), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_VIDEO_DECODE_WRITE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_VIDEO_PROCESS_READ), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_VIDEO_PROCESS_WRITE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_VIDEO_ENCODE_READ), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_VIDEO_ENCODE_WRITE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_DIRECT_QUEUE_COMMON), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_DIRECT_QUEUE_GENERIC_READ), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_DIRECT_QUEUE_COPY_SOURCE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_DIRECT_QUEUE_COPY_DEST), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_COMPUTE_QUEUE_COMMON), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_COMPUTE_QUEUE_GENERIC_READ), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_COMPUTE_QUEUE_COPY_SOURCE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_COMPUTE_QUEUE_COPY_DEST), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, LAYOUT_VIDEO_QUEUE_COMMON), + }; + + static const NameValuePair syncNames[] = { + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_NONE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_ALL), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_DRAW), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_INPUT_ASSEMBLER), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_VERTEX_SHADING), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_PIXEL_SHADING), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_DEPTH_STENCIL), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_RENDER_TARGET), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_COMPUTE_SHADING), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_RAYTRACING), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_COPY), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_RESOLVE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_EXECUTE_INDIRECT), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_PREDICATION), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_ALL_SHADING), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_NON_PIXEL_SHADING), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, + SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_VIDEO_DECODE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_VIDEO_PROCESS), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_VIDEO_ENCODE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, SYNC_SPLIT), + }; + + static const NameValuePair accessNames[] = { + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_COMMON), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_VERTEX_BUFFER), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_CONSTANT_BUFFER), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_INDEX_BUFFER), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_RENDER_TARGET), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_UNORDERED_ACCESS), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_DEPTH_STENCIL_WRITE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_DEPTH_STENCIL_READ), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_SHADER_RESOURCE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_STREAM_OUTPUT), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_INDIRECT_ARGUMENT), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_PREDICATION), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_COPY_DEST), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_COPY_SOURCE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_RESOLVE_DEST), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_RESOLVE_SOURCE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_SHADING_RATE_SOURCE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_VIDEO_DECODE_READ), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_VIDEO_DECODE_WRITE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_VIDEO_PROCESS_READ), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_VIDEO_PROCESS_WRITE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_VIDEO_ENCODE_READ), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_VIDEO_ENCODE_WRITE), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_BARRIER_, ACCESS_NO_ACCESS), + }; + + PrinterRef printer = m_runtimeDevice.GetDevice().Printer(); + + printer("\n\nBarrierBatch %u", barrierBatch); + + for (uint32_t iGroup = 0; iGroup < numGroups; iGroup++) + { + const auto& group = barrierGroups[iGroup]; + + printer("\n Group %u : ", iGroup); + printer(barrierGroupTypeNames[group.Type]); + + if (group.Type == D3D12_BARRIER_TYPE_TEXTURE) + { + for (uint32_t iBarrier = 0; iBarrier < group.NumBarriers; iBarrier++) + { + auto& barrier = group.pTextureBarriers[iBarrier]; + printer("\n [%u] :", iBarrier); + printer("\n SyncBefore : "); + printer.PrintFlags(barrier.SyncBefore, syncNames); + printer("\n SyncAfter : "); + printer.PrintFlags(barrier.SyncAfter, syncNames); + printer("\n AccessBefore : "); + printer.PrintFlags(barrier.AccessBefore, accessNames); + printer("\n AccessAfter : "); + printer.PrintFlags(barrier.AccessAfter, accessNames); + printer("\n LayoutBefore : "); + printer.PrintValueName(barrier.LayoutBefore, layoutNames); + printer("\n LayoutAfter : "); + printer.PrintValueName(barrier.LayoutAfter, layoutNames); + printer("\n pResource : %p", barrier.pResource); + printer("\n Subresources : Mips [ %u, %u ), Array [ %u, %u ), Plane[ %u, %u )", + barrier.Subresources.IndexOrFirstMipLevel, + barrier.Subresources.IndexOrFirstMipLevel + barrier.Subresources.NumMipLevels, + barrier.Subresources.FirstArraySlice, + barrier.Subresources.FirstArraySlice + barrier.Subresources.NumArraySlices, + barrier.Subresources.FirstPlane, + barrier.Subresources.FirstPlane + barrier.Subresources.NumPlanes); + + if (barrier.Flags & D3D12_TEXTURE_BARRIER_FLAG_DISCARD) + printer("\n Discard: true"); + } + } + } +#endif + + ID3D12GraphicsCommandList7* pD3DCmdList7 = static_cast(pD3DCmdList); + pD3DCmdList7->Barrier(numGroups, barrierGroups); + } + + private: + struct D3D12EnhancedBarrierAccessInfo + { + D3D12_BARRIER_ACCESS access; + D3D12_BARRIER_LAYOUT layout; + D3D12_BARRIER_SYNC sync; + }; + + static D3D12_BARRIER_SYNC GetD3D12SyncFlagsForShaderStages(RpsShaderStageFlags shaderStages) + { + D3D12_BARRIER_SYNC result = D3D12_BARRIER_SYNC_NONE; + + constexpr RpsShaderStageFlags allVertexShadingStages = RPS_SHADER_STAGE_VS | RPS_SHADER_STAGE_HS | + RPS_SHADER_STAGE_DS | RPS_SHADER_STAGE_GS | + RPS_SHADER_STAGE_AS | RPS_SHADER_STAGE_MS; + + static constexpr struct + { + RpsShaderStageFlags rpsStages; + D3D12_BARRIER_SYNC d3dSync; + } stageToSyncMap[] = { + {allVertexShadingStages, D3D12_BARRIER_SYNC_VERTEX_SHADING}, + {RPS_SHADER_STAGE_PS, D3D12_BARRIER_SYNC_PIXEL_SHADING}, + {RPS_SHADER_STAGE_CS, D3D12_BARRIER_SYNC_COMPUTE_SHADING}, + {RPS_SHADER_STAGE_RAYTRACING, D3D12_BARRIER_SYNC_RAYTRACING}, + }; + + for (auto i = std::begin(stageToSyncMap), e = std::end(stageToSyncMap); i != e; ++i) + { + if (shaderStages & i->rpsStages) + { + result |= i->d3dSync; + } + } + + return result; + } + + static D3D12EnhancedBarrierAccessInfo CalcD3D12AccessInfo(const RpsAccessAttr& access) + { + if (access.accessFlags == RPS_ACCESS_UNKNOWN) + { + return D3D12EnhancedBarrierAccessInfo{ + D3D12_BARRIER_ACCESS_NO_ACCESS, D3D12_BARRIER_LAYOUT_UNDEFINED, D3D12_BARRIER_SYNC_NONE}; + } + + const bool isWriteOnly = rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_DISCARD_OLD_DATA_BIT); + + if (access.accessFlags & RPS_ACCESS_RENDER_TARGET_BIT) + { + return D3D12EnhancedBarrierAccessInfo{D3D12_BARRIER_ACCESS_RENDER_TARGET, + D3D12_BARRIER_LAYOUT_RENDER_TARGET, + D3D12_BARRIER_SYNC_RENDER_TARGET}; + } + + if (access.accessFlags & RPS_ACCESS_DEPTH_STENCIL) + { + if (access.accessFlags & RPS_ACCESS_DEPTH_STENCIL_WRITE) + return D3D12EnhancedBarrierAccessInfo{D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE, + D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, + D3D12_BARRIER_SYNC_DEPTH_STENCIL}; + else + return D3D12EnhancedBarrierAccessInfo{D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ, + D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ, + D3D12_BARRIER_SYNC_DEPTH_STENCIL}; + } + + // TODO: Queue types + + if (access.accessFlags & RPS_ACCESS_UNORDERED_ACCESS_BIT) + { + + if (access.accessFlags & RPS_ACCESS_CLEAR_BIT) + { + return D3D12EnhancedBarrierAccessInfo{D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, + D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS, + D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW}; + } + + return D3D12EnhancedBarrierAccessInfo{D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, + D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS, + GetD3D12SyncFlagsForShaderStages(access.accessStages)}; + } + + if (access.accessFlags & RPS_ACCESS_COPY_DEST_BIT) + { + // TODO: Check self copy + + return D3D12EnhancedBarrierAccessInfo{ + D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_LAYOUT_COPY_DEST, D3D12_BARRIER_SYNC_COPY}; + } + + if (access.accessFlags & RPS_ACCESS_RESOLVE_DEST_BIT) + { + return D3D12EnhancedBarrierAccessInfo{ + D3D12_BARRIER_ACCESS_RESOLVE_DEST, D3D12_BARRIER_LAYOUT_RESOLVE_DEST, D3D12_BARRIER_SYNC_RESOLVE}; + } + + // TODO: Handle AS COPY / EmitPostBuildInfo + if (access.accessFlags & RPS_ACCESS_RAYTRACING_AS_BUILD_BIT) + { + return D3D12EnhancedBarrierAccessInfo{D3D12_BARRIER_ACCESS_RESOLVE_DEST, + D3D12_BARRIER_LAYOUT_COMMON, + D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE}; + } + + if (access.accessFlags & RPS_ACCESS_STREAM_OUT_BIT) + { + return D3D12EnhancedBarrierAccessInfo{ + D3D12_BARRIER_ACCESS_STREAM_OUTPUT, D3D12_BARRIER_LAYOUT_COMMON, D3D12_BARRIER_SYNC_VERTEX_SHADING}; + } + + // At this point we should have handled all GPU-write accesses. + RPS_ASSERT(!(access.accessFlags & RPS_ACCESS_ALL_GPU_WRITE)); + + D3D12EnhancedBarrierAccessInfo result = {}; + + result.sync = GetD3D12SyncFlagsForShaderStages(access.accessStages); + + // clang-format off + static constexpr struct + { + RpsAccessFlags rpsAccess; + D3D12_BARRIER_ACCESS d3dAccess; + D3D12_BARRIER_LAYOUT d3dLayout; + D3D12_BARRIER_SYNC sync; + } readAccessMap[] = { + {RPS_ACCESS_SHADER_RESOURCE_BIT, D3D12_BARRIER_ACCESS_SHADER_RESOURCE, D3D12_BARRIER_LAYOUT_SHADER_RESOURCE, D3D12_BARRIER_SYNC_NONE}, // Sync comes from ShaderStage flags + {RPS_ACCESS_COPY_SRC_BIT, D3D12_BARRIER_ACCESS_COPY_SOURCE, D3D12_BARRIER_LAYOUT_COPY_SOURCE, D3D12_BARRIER_SYNC_COPY}, + {RPS_ACCESS_RESOLVE_SRC_BIT, D3D12_BARRIER_ACCESS_RESOLVE_SOURCE, D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE, D3D12_BARRIER_SYNC_RESOLVE}, + {RPS_ACCESS_SHADING_RATE_BIT, D3D12_BARRIER_ACCESS_SHADING_RATE_SOURCE, D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE, D3D12_BARRIER_SYNC_PIXEL_SHADING}, + {RPS_ACCESS_INDIRECT_ARGS_BIT, D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT, D3D12_BARRIER_LAYOUT_COMMON, D3D12_BARRIER_SYNC_EXECUTE_INDIRECT}, + {RPS_ACCESS_INDEX_BUFFER_BIT, D3D12_BARRIER_ACCESS_INDEX_BUFFER, D3D12_BARRIER_LAYOUT_COMMON, D3D12_BARRIER_SYNC_INPUT_ASSEMBLER}, + {RPS_ACCESS_VERTEX_BUFFER_BIT, D3D12_BARRIER_ACCESS_VERTEX_BUFFER, D3D12_BARRIER_LAYOUT_COMMON, D3D12_BARRIER_SYNC_VERTEX_SHADING}, + {RPS_ACCESS_CONSTANT_BUFFER_BIT, D3D12_BARRIER_ACCESS_CONSTANT_BUFFER, D3D12_BARRIER_LAYOUT_COMMON, D3D12_BARRIER_SYNC_NONE}, + {RPS_ACCESS_PRESENT_BIT, D3D12_BARRIER_ACCESS_COMMON, D3D12_BARRIER_LAYOUT_COMMON, D3D12_BARRIER_SYNC_ALL}, //SyncBefore bits D3D12_BARRIER_SYNC_NONE are incompatible with AccessBefore bits D3D12_BARRIER_ACCESS_COMMON in texture barrier at group [0], index [2]. [ STATE_SETTING ERROR #1331: INCOMPATIBLE_BARRIER_VALUES] + {RPS_ACCESS_RAYTRACING_AS_READ_BIT, D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ, D3D12_BARRIER_LAYOUT_COMMON, D3D12_BARRIER_SYNC_RAYTRACING}, // TODO: Does this include DXR1.1 shaders? + }; + // clang-format on + + // TODO: Skip buffer-only accesses for images + // TODO: Avoid / warn on generic/common images + bool bFoundAccess = false; + for (auto i = std::begin(readAccessMap), e = std::end(readAccessMap); i != e; ++i) + { + if (access.accessFlags & i->rpsAccess) + { + result.access |= i->d3dAccess; + result.sync |= i->sync; + + result.layout = (result.layout == D3D12_BARRIER_LAYOUT_COMMON) ? i->d3dLayout + : D3D12_BARRIER_LAYOUT_GENERIC_READ; + + bFoundAccess = true; + } + } + + if (!bFoundAccess) + { + result = {D3D12_BARRIER_ACCESS_NO_ACCESS, D3D12_BARRIER_LAYOUT_COMMON, D3D12_BARRIER_SYNC_NONE}; + } + + return result; + } + + D3D12_BARRIER_SUBRESOURCE_RANGE GetD3D12SubresourceRange(const SubresourceRangePacked& range) + { + D3D12_BARRIER_SUBRESOURCE_RANGE result; + + // Expecting d3d12 aspect mask to be 1,2,3 for now. + RPS_ASSERT((range.aspectMask >= 1) && (range.aspectMask <= 3)); + + result.IndexOrFirstMipLevel = range.baseMipLevel; + result.NumMipLevels = range.GetMipLevelCount(); + result.FirstArraySlice = range.baseArrayLayer; + result.NumArraySlices = range.GetArrayLayerCount(); + result.FirstPlane = (range.aspectMask & 1) ? 0 : 1; + result.NumPlanes = (range.aspectMask == 3) ? 2 : 1; + + return result; + } + + static bool ResourceMayNeedPlacedResourceInit(const ResourceInstance& resInfo) + { + return (resInfo.isAliased || resInfo.isPendingInit) && + rpsAnyBitsSet( + resInfo.allAccesses.accessFlags, + (RPS_ACCESS_RENDER_TARGET_BIT | RPS_ACCESS_DEPTH_STENCIL | RPS_ACCESS_UNORDERED_ACCESS_BIT)) && + !rpsAnyBitsSet(resInfo.initialAccess.accessFlags, RPS_ACCESS_CLEAR_BIT) && + !rpsAllBitsSet(resInfo.initialAccess.accessFlags, + RPS_ACCESS_COPY_DEST_BIT | RPS_ACCESS_DISCARD_OLD_DATA_BIT); + } + + void AppendBarrier(const ResourceInstance& resInfo, + const RpsAccessAttr& prevAccess, + const RpsAccessAttr& currAccess, + bool bDiscard, + const SubresourceRangePacked range) + { + // TODO: Make a texture-only version of CalcD3D12AccessInfo + auto beforeAccessInfo = CalcD3D12AccessInfo(prevAccess); + auto afterAccessInfo = CalcD3D12AccessInfo(currAccess); + + beforeAccessInfo.sync = beforeAccessInfo.sync; + afterAccessInfo.sync = afterAccessInfo.sync; + beforeAccessInfo.access = beforeAccessInfo.access; + afterAccessInfo.access = afterAccessInfo.access; + + if ((beforeAccessInfo.access == D3D12_BARRIER_ACCESS_UNORDERED_ACCESS) && + (afterAccessInfo.access == D3D12_BARRIER_ACCESS_UNORDERED_ACCESS) && + (prevAccess.accessFlags & RPS_ACCESS_RELAXED_ORDER_BIT) && + (currAccess.accessFlags & RPS_ACCESS_RELAXED_ORDER_BIT)) + { + return; + } + + if ((beforeAccessInfo.access == afterAccessInfo.access) && + ((beforeAccessInfo.access == D3D12_BARRIER_ACCESS_RENDER_TARGET) || + (beforeAccessInfo.access == D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE))) + { + return; + } + + if (resInfo.desc.IsImage()) + { + D3D12_TEXTURE_BARRIER* pBarrier = m_textureBarriers.grow(1); + + pBarrier->SyncBefore = beforeAccessInfo.sync; + pBarrier->SyncAfter = afterAccessInfo.sync; + pBarrier->AccessBefore = beforeAccessInfo.access; + pBarrier->AccessAfter = afterAccessInfo.access; + pBarrier->LayoutBefore = beforeAccessInfo.layout; + pBarrier->LayoutAfter = afterAccessInfo.layout; + pBarrier->pResource = rpsD3D12ResourceFromHandle(resInfo.hRuntimeResource); + pBarrier->Subresources = GetD3D12SubresourceRange(range); + pBarrier->Flags = D3D12_TEXTURE_BARRIER_FLAG_NONE; + + if (bDiscard && ResourceMayNeedPlacedResourceInit(resInfo)) + { + pBarrier->Subresources = {UINT32_MAX}; + pBarrier->Flags = D3D12_TEXTURE_BARRIER_FLAG_DISCARD; + } + } + else if (resInfo.desc.IsBuffer()) + { + D3D12_BUFFER_BARRIER* pBarrier = m_bufferBarriers.grow(1); + + pBarrier->SyncBefore = beforeAccessInfo.sync; + pBarrier->SyncAfter = afterAccessInfo.sync; + pBarrier->AccessBefore = beforeAccessInfo.access; + pBarrier->AccessAfter = afterAccessInfo.access; + pBarrier->pResource = rpsD3D12ResourceFromHandle(resInfo.hRuntimeResource); + pBarrier->Offset = 0; + pBarrier->Size = UINT64_MAX; + } + else + { + RPS_TODO(); + } + } + + private: + static constexpr uint32_t D3D12BarrierTypeCount = D3D12_BARRIER_TYPE_BUFFER + 1; + + struct BarrierBatch + { + uint32_t offsets[D3D12BarrierTypeCount]; + uint32_t count[D3D12BarrierTypeCount]; + + bool empty() const + { + return (count[D3D12_BARRIER_TYPE_GLOBAL] == 0) && (count[D3D12_BARRIER_TYPE_TEXTURE] == 0) && + (count[D3D12_BARRIER_TYPE_BUFFER] == 0); + } + }; + + const RuntimeDevice& m_runtimeDevice; + + ArenaVector m_barrierBatches; + ArenaVector m_textureBarriers; + ArenaVector m_bufferBarriers; + ArenaVector m_globalBarriers; + }; +} + +#endif //RPS_D3D12_ENHANCED_BARRIER_SUPPORT + +#endif //RPS_D3D12_ENHANCED_BARRIER_H diff --git a/src/runtime/d3d12/rps_d3d12_runtime_backend.cpp b/src/runtime/d3d12/rps_d3d12_runtime_backend.cpp new file mode 100644 index 0000000..4f62041 --- /dev/null +++ b/src/runtime/d3d12/rps_d3d12_runtime_backend.cpp @@ -0,0 +1,982 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "rps/runtime/common/rps_render_states.h" +#include "rps/runtime/d3d12/rps_d3d12_runtime.h" + +#include "runtime/common/rps_render_graph.hpp" +#include "runtime/common/rps_runtime_util.hpp" +#include "runtime/d3d12/rps_d3d12_runtime_backend.hpp" +#include "runtime/d3d12/rps_d3d12_runtime_device.hpp" +#include "runtime/d3d12/rps_d3d12_util.hpp" + +namespace rps +{ + RpsFormat GetD3D12DSVFormat(RpsFormat viewFormat); + + RpsResult D3D12RuntimeBackend::CreateCommandResources(const RenderGraphUpdateContext& context) + { + auto& renderGraph = context.renderGraph; + + const auto& graph = renderGraph.GetGraph(); + auto& runtimeCmds = renderGraph.GetRuntimeCmdInfos(); + auto cmdBatches = renderGraph.GetCmdBatches().range_all(); + + ArenaCheckPoint arenaCheckpoint{context.scratchArena}; + + uint32_t numGraphicsCmds = 0; + Span transitionRange = {}; + + auto flushBarrierBatch = [&]() { + if (!transitionRange.empty()) + { + uint32_t batchId = m_pBarriers->CreateBarrierBatch(context, transitionRange); + transitionRange = {}; + + if (batchId != RPS_INDEX_NONE_U32) + { + auto pNewRuntimeCmd = m_runtimeCmds.grow(1); + + pNewRuntimeCmd->cmdId = RPS_CMD_ID_INVALID; + pNewRuntimeCmd->barrierBatchId = batchId; + } + } + }; + + for (uint32_t iBatch = 0; iBatch < cmdBatches.size(); iBatch++) + { + RpsCommandBatch& batchInfo = cmdBatches[iBatch]; + + const uint32_t backendCmdBegin = uint32_t(m_runtimeCmds.size()); + + for (uint32_t iCmd = batchInfo.cmdBegin, numCmds = batchInfo.cmdBegin + batchInfo.numCmds; iCmd < numCmds; + iCmd++) + { + const auto& runtimeCmd = runtimeCmds[iCmd]; + + if (runtimeCmd.isTransition) + { + if (transitionRange.GetEnd() != iCmd) + { + transitionRange.SetRange(iCmd, 0); + } + transitionRange.SetCount(transitionRange.size() + 1); + } + else + { + flushBarrierBatch(); + + auto pNewRuntimeCmd = m_runtimeCmds.grow(1); + + pNewRuntimeCmd->cmdId = runtimeCmd.cmdId; + pNewRuntimeCmd->barrierBatchId = RPS_INDEX_NONE_U32; + pNewRuntimeCmd->resourceBindingInfo = RPS_INDEX_NONE_U32; + } + } + + flushBarrierBatch(); + + batchInfo.cmdBegin = backendCmdBegin; + batchInfo.numCmds = uint32_t(m_runtimeCmds.size()) - backendCmdBegin; + + // TODO: Avoid per-backend runtime command reorganize. + } + + // Create Views + + ArenaVector cbvSrvUavs(&context.scratchArena); + ArenaVector rtvs(&context.scratchArena); + ArenaVector dsvs(&context.scratchArena); + + cbvSrvUavs.reserve(context.renderGraph.GetCmdAccessInfos().size()); + rtvs.reserve(context.renderGraph.GetCmdAccessInfos().size()); + + const auto& cmdAccesses = context.renderGraph.GetCmdAccessInfos(); + + for (auto& runtimeCmd : m_runtimeCmds) + { + if (runtimeCmd.cmdId == RPS_CMD_ID_INVALID) + continue; + + auto* pCmdInfo = context.renderGraph.GetCmdInfo(runtimeCmd.cmdId); + auto& cmd = *pCmdInfo->pCmdDecl; + auto& nodeDeclInfo = *pCmdInfo->pNodeDecl; + + const uint32_t accessOffset = pCmdInfo->accesses.GetBegin(); + + for (uint32_t accessIdx = 0, accessCount = pCmdInfo->accesses.size(); accessIdx < accessCount; accessIdx++) + { + const uint32_t globalAccessIdx = accessOffset + accessIdx; + auto& access = cmdAccesses[globalAccessIdx]; + + if (!rpsAnyBitsSet(access.access.accessFlags, RPS_ACCESS_NO_VIEW_BIT)) + { + if (rpsAnyBitsSet(access.access.accessFlags, + RPS_ACCESS_CONSTANT_BUFFER_BIT | RPS_ACCESS_UNORDERED_ACCESS_BIT | + RPS_ACCESS_SHADER_RESOURCE_BIT)) + { + cbvSrvUavs.push_back(globalAccessIdx); + } + else if (rpsAnyBitsSet(access.access.accessFlags, RPS_ACCESS_RENDER_TARGET_BIT)) + { + rtvs.push_back(globalAccessIdx); + } + else if (rpsAnyBitsSet(access.access.accessFlags, RPS_ACCESS_DEPTH_STENCIL)) + { + dsvs.push_back(globalAccessIdx); + } + } + } + } + + m_accessToDescriptorMap.resize(cmdAccesses.size(), RPS_INDEX_NONE_U32); + + RPS_V_RETURN(CreateResourceViews(context, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, cbvSrvUavs.range_all())); + RPS_V_RETURN(CreateResourceViews(context, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, rtvs.range_all())); + RPS_V_RETURN(CreateResourceViews(context, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, dsvs.range_all())); + + return RPS_OK; + } + + RpsResult D3D12RuntimeBackend::UpdateFrame(const RenderGraphUpdateContext& context) + { + m_currentResourceFrame = + m_frameResources.empty() ? 0 : (m_currentResourceFrame + 1) % uint32_t(m_frameResources.size()); + + if (m_frameResources.size() <= GetNumQueuedFrames(context)) + { + RPS_RETURN_ERROR_IF(m_frameResources.size() > RPS_MAX_QUEUED_FRAMES, RPS_ERROR_INVALID_OPERATION); + + RPS_CHECK_ALLOC(m_frameResources.insert(m_currentResourceFrame, FrameResources{})); + m_frameResources[m_currentResourceFrame].Reset(m_persistentPool); + } + else + { + // TODO - Recycle + m_frameResources[m_currentResourceFrame].DestroyDeviceResources(); + std::swap(m_pendingReleaseResources, m_frameResources[m_currentResourceFrame].pendingResources); + } + + // TODO: + m_runtimeCmds.reset(&context.frameArena); + m_accessToDescriptorMap.reset(&context.frameArena); + + m_pBarriers->UpdateFrame(context); + + return RPS_OK; + } + + RpsResult D3D12RuntimeBackend::CreateHeaps(const RenderGraphUpdateContext& context, ArrayRef heaps) + { + auto pD3DDevice = m_device.GetD3DDevice(); + const bool bEnableDebugNames = + !!(context.pUpdateInfo->diagnosticFlags & RPS_DIAGNOSTIC_ENABLE_RUNTIME_DEBUG_NAMES); + + for (auto& heapInfo : heaps) + { + // TODO: + heapInfo.size = (heapInfo.size == UINT64_MAX) ? heapInfo.maxUsedSize : heapInfo.size; + + if (heapInfo.hRuntimeHeap || !heapInfo.size) + continue; + + auto& heapTypeInfo = m_device.GetD3D12HeapTypeInfo(heapInfo.memTypeIndex); + + D3D12_HEAP_DESC heapDesc = {}; + heapDesc.Alignment = heapInfo.alignment; + heapDesc.SizeInBytes = heapInfo.size; + heapDesc.Flags = heapTypeInfo.heapFlags; + heapDesc.Properties.Type = heapTypeInfo.type; + + ID3D12Heap* pD3DHeap = nullptr; + RPS_V_RETURN(HRESULTToRps(pD3DDevice->CreateHeap(&heapDesc, IID_PPV_ARGS(&pD3DHeap)))); + + if (bEnableDebugNames) + SetHeapDebugName(pD3DHeap, heapDesc, heapInfo.index); + + heapInfo.hRuntimeHeap = rpsD3D12HeapToHandle(pD3DHeap); + } + + return RPS_OK; + } + + void D3D12RuntimeBackend::DestroyHeaps(ArrayRef heaps) + { + for (auto& heapInfo : heaps) + { + if (heapInfo.hRuntimeHeap) + { + ID3D12Heap* pD3DHeap = rpsD3D12HeapFromHandle(heapInfo.hRuntimeHeap); + heapInfo.hRuntimeHeap = {}; + + pD3DHeap->Release(); + } + } + } + + RpsResult D3D12RuntimeBackend::CreateResources(const RenderGraphUpdateContext& context, + ArrayRef resInstances) + { + auto& heaps = GetRenderGraph().GetHeapInfos(); + auto resourceDecls = GetRenderGraph().GetBuilder().GetResourceDecls(); + + const bool bNeedsPlacedResourceInitState = !m_device.GetEnhancedBarrierEnabled(); + const bool bEnableDebugNames = + !!(context.pUpdateInfo->diagnosticFlags & RPS_DIAGNOSTIC_ENABLE_RUNTIME_DEBUG_NAMES); + +#if RPS_D3D12_ENHANCED_BARRIER_SUPPORT + ScopedComPtr pD3DDevice10; + m_device.GetD3DDevice()->QueryInterface(pD3DDevice10.ReleaseAndGetAddressOf()); +#endif //RPS_D3D12_ENHANCED_BARRIER_SUPPORT + + uint32_t temporalSlice = RPS_INDEX_NONE_U32; + + // Create resources + for (uint32_t iRes = 0, numRes = uint32_t(resInstances.size()); iRes < numRes; iRes++) + { + auto& resInfo = resInstances[iRes]; + + if (resInfo.isExternal) + { + continue; + } + + if (bNeedsPlacedResourceInitState && resInfo.isAliased && + (resInfo.initialAccess.accessFlags != RPS_ACCESS_UNKNOWN)) + { + // Force an initial state compatible with PlacedResource initialization + m_pBarriers->EnsurePlacedResourceInitState(resInfo); + } + + temporalSlice = + resInfo.isFirstTemporalSlice ? 0 : (resInfo.isTemporalSlice ? (temporalSlice + 1) : RPS_INDEX_NONE_U32); + + if (resInfo.isPendingCreate) + { + RPS_ASSERT(!resInfo.hRuntimeResource); + + if (resInfo.allocPlacement.heapId != RPS_INDEX_NONE_U32) + { + auto pD3DHeap = rpsD3D12HeapFromHandle(heaps[resInfo.allocPlacement.heapId].hRuntimeHeap); + ID3D12Resource* pD3DRes = nullptr; + + D3D12_CLEAR_VALUE clearValue = {}; + + bool bHasClearValue = ((resInfo.allAccesses.accessFlags & + (RPS_ACCESS_RENDER_TARGET_BIT | RPS_ACCESS_DEPTH_STENCIL)) && + (resInfo.clearValueId != RPS_INDEX_NONE_U32) && !resInfo.desc.IsBuffer()); + + if (bHasClearValue) + { + auto& rpsClearValue = GetRenderGraph().GetResourceClearValue(resInfo.clearValueId); + + if (resInfo.allAccesses.accessFlags & RPS_ACCESS_RENDER_TARGET_BIT) + { + clearValue.Format = rpsFormatToDXGI(rpsClearValue.format); + memcpy(clearValue.Color, rpsClearValue.value.color.float32, sizeof(float) * 4); + } + else + { + clearValue.Format = rpsFormatToDXGI(GetD3D12DSVFormat(rpsClearValue.format)); + clearValue.DepthStencil.Depth = rpsClearValue.value.depthStencil.depth; + clearValue.DepthStencil.Stencil = UINT8(rpsClearValue.value.depthStencil.stencil); + } + } + +#if RPS_D3D12_ENHANCED_BARRIER_SUPPORT + if (m_device.GetEnhancedBarrierEnabled()) + { + + D3D12_RESOURCE_DESC1 d3dResDesc = {}; + CalcD3D12ResourceDesc(&d3dResDesc, resInfo); + + // TODO: Castable formats + + RPS_V_RETURN( + HRESULTToRps(pD3DDevice10->CreatePlacedResource2(pD3DHeap, + resInfo.allocPlacement.offset, + &d3dResDesc, + D3D12_BARRIER_LAYOUT_UNDEFINED, + bHasClearValue ? &clearValue : nullptr, + 0, + nullptr, + IID_PPV_ARGS(&pD3DRes)))); + } + else +#endif //RPS_D3D12_ENHANCED_BARRIER_SUPPORT + { + D3D12_RESOURCE_DESC d3dResDesc = {}; + CalcD3D12ResourceDesc(&d3dResDesc, resInfo); + + const auto d3dInitState = + D3D12ConventionalBarrierBuilder::CalcResourceInitState(m_device, resInfo); + + RPS_V_RETURN(HRESULTToRps( + m_device.GetD3DDevice()->CreatePlacedResource(pD3DHeap, + resInfo.allocPlacement.offset, + &d3dResDesc, + d3dInitState, + bHasClearValue ? &clearValue : nullptr, + IID_PPV_ARGS(&pD3DRes)))); + } + + resInfo.hRuntimeResource = rpsD3D12ResourceToHandle(pD3DRes); + resInfo.isPendingInit = true; + + if (bEnableDebugNames) + { + SetResourceDebugName(pD3DRes, resourceDecls[resInfo.resourceDeclId].name, temporalSlice); + } + } + resInfo.isPendingCreate = false; + } + else + { + RPS_ASSERT(!resInfo.isExternal); + resInfo.isPendingInit = resInfo.isAliased; + } + } + + return RPS_OK; + } + + void D3D12RuntimeBackend::DestroyResources(ArrayRef resources) + { + for (auto& resInfo : resources) + { + if (!resInfo.isExternal && resInfo.hRuntimeResource) + { + rpsD3D12ResourceFromHandle(resInfo.hRuntimeResource)->Release(); + } + } + } + + void D3D12RuntimeBackend::OnDestroy() + { + for (auto& frameResource : m_frameResources) + { + frameResource.DestroyDeviceResources(); + } + + m_frameResources.clear(); + + RuntimeBackend::OnDestroy(); + } + + RpsResult D3D12RuntimeBackend::RecordCommands(const RenderGraph& renderGraph, + const RpsRenderGraphRecordCommandInfo& recordInfo) const + { + RuntimeCmdCallbackContext cmdCbCtx{this, recordInfo}; + + for (auto cmdIter = m_runtimeCmds.cbegin() + recordInfo.cmdBeginIndex, cmdEnd = cmdIter + recordInfo.numCmds; + cmdIter != cmdEnd; + ++cmdIter) + { + auto& runtimeCmd = *cmdIter; + + if (runtimeCmd.barrierBatchId != RPS_INDEX_NONE_U32) + { + m_pBarriers->RecordBarrierBatch(GetContextD3DCmdList(cmdCbCtx), runtimeCmd.barrierBatchId); + } + + RecordCommand(cmdCbCtx, runtimeCmd); + } + + return RPS_OK; + } + + void D3D12RuntimeBackend::DestroyRuntimeResourceDeferred(ResourceInstance& resource) + { + if (resource.hRuntimeResource) + { + m_pendingReleaseResources.push_back(D3D12RuntimeDevice::FromHandle(resource.hRuntimeResource)); + resource.hRuntimeResource = {}; + } + } + + RpsResult D3D12RuntimeBackend::RecordCmdRenderPassBegin(const RuntimeCmdCallbackContext& context) const + { + auto& cmd = *context.pCmd; + auto* pCmdInfo = context.pCmdInfo; + auto& nodeDeclInfo = *pCmdInfo->pNodeDecl; + auto pD3DCmdList = GetContextD3DCmdList(context); + + RPS_RETURN_ERROR_IF(!nodeDeclInfo.MaybeGraphicsNode(), RPS_ERROR_INVALID_OPERATION); + + const bool bBindRenderTargets = !rpsAnyBitsSet(cmd.callback.flags, RPS_CMD_CALLBACK_CUSTOM_RENDER_TARGETS_BIT); + const bool bSetViewportScissors = !rpsAnyBitsSet(cmd.callback.flags, RPS_CMD_CALLBACK_CUSTOM_VIEWPORT_BIT); + + // Need to skip clears if it's render pass resume + const bool bIsRenderPassResuming = rpsAnyBitsSet(context.renderPassFlags, RPS_RUNTIME_RENDER_PASS_RESUMING); + + auto descriptorIndices = + m_accessToDescriptorMap.range(pCmdInfo->accesses.GetBegin(), pCmdInfo->accesses.size()); + + uint32_t numRtvs = 0; + + D3D12_CPU_DESCRIPTOR_HANDLE rtvs[D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT]; + D3D12_CPU_DESCRIPTOR_HANDLE dsv = {0}; + + if ((nodeDeclInfo.pRenderPassInfo) && !(nodeDeclInfo.pRenderPassInfo->clearOnly) && + (bBindRenderTargets || !bIsRenderPassResuming)) + { + auto& rpInfo = *nodeDeclInfo.pRenderPassInfo; + + auto clearColorValueRefs = rpInfo.GetRenderTargetClearValueRefs(); + uint32_t clearColorValueIndex = 0; + + for (auto& rtParamRef : rpInfo.GetRenderTargetRefs()) + { + auto& paramAccessInfo = nodeDeclInfo.params[rtParamRef.paramId]; + auto descriptorIndicesRange = + descriptorIndices.range(paramAccessInfo.accessOffset, paramAccessInfo.numElements); + + const uint32_t rtvSlot = paramAccessInfo.baseSemanticIndex + rtParamRef.arrayOffset; + + numRtvs = rpsMax(numRtvs, rtvSlot + 1); + + rtvs[rtvSlot] = m_cpuDescriptorHeaps[D3D12_DESCRIPTOR_HEAP_TYPE_RTV].Get( + descriptorIndicesRange[rtParamRef.arrayOffset]); + + if ((!bIsRenderPassResuming) && (rpInfo.renderTargetClearMask & (1u << rtvSlot))) + { + auto clearValueRef = clearColorValueRefs[clearColorValueIndex]; + + auto pClearColor = static_cast( + cmd.args[clearValueRef.paramId])[clearValueRef.arrayOffset] + .float32; + + // TODO: Sub-rect clear implemented separatedly + pD3DCmdList->ClearRenderTargetView(rtvs[rtvSlot], pClearColor, 0, nullptr); + + clearColorValueIndex++; + } + } + + if (rpInfo.depthStencilTargetMask) + { + auto& paramAccessInfo = nodeDeclInfo.params[rpInfo.GetDepthStencilRef()->paramId]; + RPS_ASSERT(paramAccessInfo.numElements == 1); + + dsv = m_cpuDescriptorHeaps[D3D12_DESCRIPTOR_HEAP_TYPE_DSV].Get( + descriptorIndices[paramAccessInfo.accessOffset]); + + if ((!bIsRenderPassResuming) && (rpInfo.clearDepth || rpInfo.clearStencil)) + { + float depthClearValue = 0.0f; + uint32_t stencilClearValue = 0; + D3D12_CLEAR_FLAGS clearFlag = {}; + + if (rpInfo.clearDepth) + { + auto pClearValueRef = rpInfo.GetDepthClearValueRef(); + depthClearValue = static_cast(cmd.args[pClearValueRef->paramId])[0]; + clearFlag |= D3D12_CLEAR_FLAG_DEPTH; + } + + if (rpInfo.clearStencil) + { + auto pClearValueRef = rpInfo.GetStencilClearValueRef(); + stencilClearValue = static_cast(cmd.args[pClearValueRef->paramId])[0]; + clearFlag |= D3D12_CLEAR_FLAG_STENCIL; + } + + pD3DCmdList->ClearDepthStencilView( + dsv, clearFlag, depthClearValue, UINT8(stencilClearValue), 0, nullptr); + } + } + + if (bBindRenderTargets && ((numRtvs > 0) || (dsv.ptr != 0))) + { + pD3DCmdList->OMSetRenderTargets(numRtvs, rtvs, FALSE, (dsv.ptr != 0) ? &dsv : nullptr); + } + + if (bSetViewportScissors) + { + auto& cmdRPInfo = *pCmdInfo->pRenderPassInfo; + + RPS_STATIC_ASSERT(sizeof(D3D12_VIEWPORT) == sizeof(RpsViewport), + "RpsViewport / D3D12_VIEWPORT size mismatch"); + + pD3DCmdList->RSSetViewports(cmdRPInfo.viewportInfo.numViewports, + reinterpret_cast(cmdRPInfo.viewportInfo.pViewports)); + + D3D12_RECT d3dScissorRects[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE] = {}; + + RPS_RETURN_ERROR_IF(cmdRPInfo.viewportInfo.numScissorRects > RPS_COUNTOF(d3dScissorRects), + RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + for (uint32_t iScissor = 0; iScissor < cmdRPInfo.viewportInfo.numScissorRects; iScissor++) + { + auto& rect = cmdRPInfo.viewportInfo.pScissorRects[iScissor]; + d3dScissorRects[iScissor] = D3D12_RECT{ + rect.x, + rect.y, + rect.x + rect.width, + rect.y + rect.height, + }; + } + + pD3DCmdList->RSSetScissorRects(cmdRPInfo.viewportInfo.numScissorRects, d3dScissorRects); + } + } + + return RPS_OK; + } + + RpsResult D3D12RuntimeBackend::RecordCmdRenderPassEnd(const RuntimeCmdCallbackContext& context) const + { + // TODO: Add a pass to split graphics cmd to separated clear / resolve cmds + + auto& renderGraph = *context.pRenderGraph; + auto* pCmdInfo = context.pCmdInfo; + auto cmdAccesses = pCmdInfo->accesses.Get(renderGraph.GetCmdAccessInfos()); + auto resInstances = renderGraph.GetResourceInstances().range_all(); + auto pD3DCmdList = GetContextD3DCmdList(context); + + // Skip resolve if it's render pass suspending + const bool bIsRenderPassSuspending = rpsAnyBitsSet(context.renderPassFlags, RPS_RUNTIME_RENDER_PASS_SUSPENDING); + + const auto& nodeDeclInfo = *pCmdInfo->pNodeDecl; + + if (!bIsRenderPassSuspending && (nodeDeclInfo.pRenderPassInfo) && + (nodeDeclInfo.pRenderPassInfo->resolveTargetsMask != 0)) + { + auto rpInfo = *nodeDeclInfo.pRenderPassInfo; + auto resolveDsts = rpInfo.GetResolveTargetRefs(); + auto resolveSrcs = rpInfo.GetRenderTargetRefs(); + + uint32_t srcMask = rpInfo.renderTargetsMask; + uint32_t dstMask = rpInfo.resolveTargetsMask; + uint32_t srcIndex = 0; + uint32_t dstIndex = 0; + + static constexpr uint32_t RESOLVE_BATCH = 128; + D3D12_RESOURCE_BARRIER srcBarriers[RESOLVE_BATCH]; + struct + { + ID3D12Resource* pDst; + uint32_t dstSubResource; + DXGI_FORMAT format; + } resolveInfos[RESOLVE_BATCH]; + uint32_t numResolvesBatched = 0; + + auto flushResolveBatch = [&]() { + if (numResolvesBatched > 0) + { + pD3DCmdList->ResourceBarrier(numResolvesBatched, srcBarriers); + + for (uint32_t i = 0; i < numResolvesBatched; i++) + { + pD3DCmdList->ResolveSubresource(resolveInfos[i].pDst, + resolveInfos[i].dstSubResource, + srcBarriers[i].Transition.pResource, + srcBarriers[i].Transition.Subresource, + resolveInfos[i].format); + + std::swap(srcBarriers[i].Transition.StateBefore, srcBarriers[i].Transition.StateAfter); + } + + pD3DCmdList->ResourceBarrier(numResolvesBatched, srcBarriers); + + numResolvesBatched = 0; + } + }; + + while (dstMask != 0) + { + uint32_t nextRTMask = (1u << rpsFirstBitLow(srcMask)); + srcMask &= ~nextRTMask; + + if (dstMask & nextRTMask) + { + dstMask &= ~nextRTMask; + + auto& dstParamAccessInfo = nodeDeclInfo.params[resolveDsts[dstIndex].paramId]; + auto& dstAccessInfo = cmdAccesses[dstParamAccessInfo.accessOffset]; + auto& dstResInfo = resInstances[dstAccessInfo.resourceId]; + auto pD3DResDst = rpsD3D12ResourceFromHandle(dstResInfo.hRuntimeResource); + + auto& srcParamAccessInfo = nodeDeclInfo.params[resolveSrcs[srcIndex].paramId]; + auto& srcAccessInfo = cmdAccesses[srcParamAccessInfo.accessOffset]; + auto& srcResInfo = resInstances[srcAccessInfo.resourceId]; + auto pD3DResSrc = rpsD3D12ResourceFromHandle(srcResInfo.hRuntimeResource); + + RPS_ASSERT(dstAccessInfo.range.GetNumSubresources() == srcAccessInfo.range.GetNumSubresources()); + RPS_ASSERT(dstAccessInfo.range.aspectMask == 1); + RPS_ASSERT(dstAccessInfo.range.GetMipLevelCount() == 1); + + auto format = rpsFormatToDXGI(dstAccessInfo.viewFormat); + + for (uint32_t iArray = 0; iArray < dstAccessInfo.range.GetArrayLayerCount(); iArray++) + { + uint32_t dstSubRes = D3D12CalcSubresource(dstAccessInfo.range.baseMipLevel, + iArray + dstAccessInfo.range.baseArrayLayer, + 0, + dstResInfo.desc.image.mipLevels, + dstResInfo.desc.image.arrayLayers); + uint32_t srcSubRes = D3D12CalcSubresource(srcAccessInfo.range.baseMipLevel, + iArray + srcAccessInfo.range.baseArrayLayer, + 0, + srcResInfo.desc.image.mipLevels, + srcResInfo.desc.image.arrayLayers); + + if (numResolvesBatched == RESOLVE_BATCH) + { + flushResolveBatch(); + } + + auto& barrier = srcBarriers[numResolvesBatched]; + barrier = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION}; + barrier.Transition.pResource = pD3DResSrc; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_RESOLVE_SOURCE; + barrier.Transition.Subresource = srcSubRes; + + auto& resolve = resolveInfos[numResolvesBatched]; + resolve.pDst = pD3DResDst; + resolve.dstSubResource = dstSubRes; + resolve.format = format; + + numResolvesBatched++; + } + + dstIndex++; + } + + srcIndex++; + } + + flushResolveBatch(); + } + + return RPS_OK; + } + + RpsResult D3D12RuntimeBackend::RecordCmdFixedFunctionBindingsAndDynamicStates( + const RuntimeCmdCallbackContext& context) const + { + RPS_RETURN_OK_IF(rpsAnyBitsSet(context.pCmd->callback.flags, RPS_CMD_CALLBACK_CUSTOM_STATE_SETUP_BIT)); + + auto pD3DCmdList = GetContextD3DCmdList(context); + + // TODO: Setup common states + const auto pCmdInfo = context.pCmdInfo; + const auto& nodeDeclInfo = *context.pNodeDeclInfo; + + auto descriptorIndices = + m_accessToDescriptorMap.range(pCmdInfo->accesses.GetBegin(), pCmdInfo->accesses.size()); + + auto fixedFuncBindings = nodeDeclInfo.fixedFunctionBindings.Get(nodeDeclInfo.semanticKinds); + auto dynamicStates = nodeDeclInfo.dynamicStates.Get(nodeDeclInfo.semanticKinds); + + const auto pCmdList = pD3DCmdList; + + for (auto& binding : fixedFuncBindings) + { + auto paramIndices = binding.params.Get(nodeDeclInfo.semanticParamTable); + + switch (binding.semantic) + { + case RPS_SEMANTIC_VERTEX_BUFFER: + break; + case RPS_SEMANTIC_INDEX_BUFFER: + break; + case RPS_SEMANTIC_INDIRECT_ARGS: + break; + case RPS_SEMANTIC_INDIRECT_COUNT: + break; + case RPS_SEMANTIC_STREAM_OUT_BUFFER: + break; + case RPS_SEMANTIC_SHADING_RATE_IMAGE: + break; + case RPS_SEMANTIC_RENDER_TARGET: + case RPS_SEMANTIC_DEPTH_STENCIL_TARGET: + case RPS_SEMANTIC_RESOLVE_TARGET: + default: + break; + } + } + + for (auto& dynamicState : dynamicStates) + { + auto paramIndices = dynamicState.params.Get(nodeDeclInfo.semanticParamTable); + + switch (dynamicState.semantic) + { + case RPS_SEMANTIC_PRIMITIVE_TOPOLOGY: + break; + case RPS_SEMANTIC_PATCH_CONTROL_POINTS: + break; + case RPS_SEMANTIC_PRIMITIVE_STRIP_CUT_INDEX: + break; + case RPS_SEMANTIC_BLEND_FACTOR: + break; + case RPS_SEMANTIC_STENCIL_REF: + break; + case RPS_SEMANTIC_DEPTH_BOUNDS: + break; + case RPS_SEMANTIC_SAMPLE_LOCATION: + break; + case RPS_SEMANTIC_SHADING_RATE: + break; + case RPS_SEMANTIC_COLOR_CLEAR_VALUE: + case RPS_SEMANTIC_DEPTH_CLEAR_VALUE: + case RPS_SEMANTIC_STENCIL_CLEAR_VALUE: + case RPS_SEMANTIC_VIEWPORT: + case RPS_SEMANTIC_SCISSOR: + default: + break; + } + } + + return RPS_OK; + } + + RpsResult D3D12RuntimeBackend::GetCmdArgResources(const RuntimeCmdCallbackContext& context, + uint32_t argIndex, + uint32_t srcArrayIndex, + ID3D12Resource** ppResources, + uint32_t count) const + { + RPS_RETURN_ERROR_IF(argIndex >= context.pNodeDeclInfo->params.size(), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + auto& paramAccessInfo = context.pNodeDeclInfo->params[argIndex]; + RPS_RETURN_ERROR_IF(srcArrayIndex + count > paramAccessInfo.numElements, RPS_ERROR_INDEX_OUT_OF_BOUNDS); + RPS_RETURN_ERROR_IF(!paramAccessInfo.IsResource(), RPS_ERROR_TYPE_MISMATCH); + + auto cmdAccessInfos = context.pCmdInfo->accesses.Get(context.pRenderGraph->GetCmdAccessInfos()); + RPS_ASSERT((paramAccessInfo.accessOffset + paramAccessInfo.numElements) <= cmdAccessInfos.size()); + + for (uint32_t i = 0; i < count; i++) + { + auto& accessInfo = cmdAccessInfos[paramAccessInfo.accessOffset + srcArrayIndex + i]; + ppResources[i] = + (accessInfo.resourceId != RPS_RESOURCE_ID_INVALID) + ? rpsD3D12ResourceFromHandle( + context.pRenderGraph->GetResourceInstance(accessInfo.resourceId).hRuntimeResource) + : nullptr; + } + + return RPS_OK; + } + + static constexpr RpsAccessFlags AccessFlagsMaybeCbvSrvUav = + RPS_ACCESS_CONSTANT_BUFFER_BIT | RPS_ACCESS_SHADER_RESOURCE_BIT | RPS_ACCESS_UNORDERED_ACCESS_BIT | + RPS_ACCESS_RAYTRACING_AS_READ_BIT; + + RpsResult D3D12RuntimeBackend::GetCmdArgDescriptors(const RuntimeCmdCallbackContext& context, + uint32_t argIndex, + uint32_t srcArrayIndex, + D3D12_CPU_DESCRIPTOR_HANDLE* pDescriptors, + uint32_t count) const + { + RPS_RETURN_ERROR_IF(argIndex >= context.pNodeDeclInfo->params.size(), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + auto& paramAccessInfo = context.pNodeDeclInfo->params[argIndex]; + RPS_RETURN_ERROR_IF(srcArrayIndex + count > paramAccessInfo.numElements, RPS_ERROR_INDEX_OUT_OF_BOUNDS); + RPS_RETURN_ERROR_IF(paramAccessInfo.access.accessFlags & RPS_ACCESS_NO_VIEW_BIT, RPS_ERROR_INVALID_OPERATION); + + auto descriptorIndices = + m_accessToDescriptorMap.range(context.pCmdInfo->accesses.GetBegin(), context.pCmdInfo->accesses.size()); + + RPS_ASSERT((paramAccessInfo.accessOffset + paramAccessInfo.numElements) <= descriptorIndices.size()); + + // TODO: Bake descriptor type in indices? + const auto descriptorHeapType = + rpsAnyBitsSet(paramAccessInfo.access.accessFlags, AccessFlagsMaybeCbvSrvUav) + ? D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV + : (rpsAnyBitsSet(paramAccessInfo.access.accessFlags, RPS_ACCESS_RENDER_TARGET_BIT) + ? D3D12_DESCRIPTOR_HEAP_TYPE_RTV + : (rpsAnyBitsSet(paramAccessInfo.access.accessFlags, RPS_ACCESS_DEPTH_STENCIL) + ? D3D12_DESCRIPTOR_HEAP_TYPE_DSV + : D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES)); + + RPS_RETURN_ERROR_IF(descriptorHeapType == D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES, RPS_ERROR_INVALID_OPERATION); + + // Assuming all elements in the same parameter have the same access + + for (uint32_t i = 0; i < count; i++) + { + const uint32_t descriptorIndex = descriptorIndices[paramAccessInfo.accessOffset + srcArrayIndex + i]; + + pDescriptors[i] = (descriptorIndex != RPS_INDEX_NONE_U32) + ? m_cpuDescriptorHeaps[descriptorHeapType].Get(descriptorIndex) + : D3D12_CPU_DESCRIPTOR_HANDLE{}; + } + + return RPS_OK; + } + + RpsResult D3D12RuntimeBackend::GetCmdArgResources(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayIndex, + ID3D12Resource** ppResources, + uint32_t count) + { + RPS_CHECK_ARGS(pContext && ppResources); + auto pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + + return pBackendContext->GetBackend()->GetCmdArgResources( + *pBackendContext, argIndex, srcArrayIndex, ppResources, count); + } + + RpsResult D3D12RuntimeBackend::GetCmdArgDescriptors(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayIndex, + D3D12_CPU_DESCRIPTOR_HANDLE* pDescriptors, + uint32_t count) + { + RPS_CHECK_ARGS(pContext && pDescriptors); + + auto pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + + return pBackendContext->GetBackend()->GetCmdArgDescriptors( + *pBackendContext, argIndex, srcArrayIndex, pDescriptors, count); + } + + RpsResult D3D12RuntimeBackend::CopyCmdArgDescriptors(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayIndex, + uint32_t count, + RpsBool singleHandleToArray, + D3D12_CPU_DESCRIPTOR_HANDLE* pDstHandles) + { + RPS_CHECK_ARGS(pContext && pDstHandles); + + auto pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + auto pBackend = pBackendContext->GetBackend(); + auto& device = pBackend->m_device; + + auto& paramAccessInfo = pBackendContext->pNodeDeclInfo->params[argIndex]; + RPS_RETURN_ERROR_IF(srcArrayIndex + count > paramAccessInfo.numElements, RPS_ERROR_INDEX_OUT_OF_BOUNDS); + RPS_RETURN_ERROR_IF(paramAccessInfo.access.accessFlags & RPS_ACCESS_NO_VIEW_BIT, RPS_ERROR_INVALID_OPERATION); + + // Only expect copying CBV_SRV_UAV here. + RPS_RETURN_ERROR_IF(!rpsAnyBitsSet(paramAccessInfo.access.accessFlags, AccessFlagsMaybeCbvSrvUav), + RPS_ERROR_INVALID_OPERATION); + + constexpr auto heapType = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + + const auto descriptorSize = device.GetDescriptorSize(heapType); + + auto descriptorIndices = pBackend->m_accessToDescriptorMap.range(pBackendContext->pCmdInfo->accesses.GetBegin(), + pBackendContext->pCmdInfo->accesses.size()); + + const uint32_t baseIndexOffset = paramAccessInfo.accessOffset + srcArrayIndex; + + RPS_ASSERT((baseIndexOffset + count) <= descriptorIndices.size()); + + if (singleHandleToArray) + { + D3D12_CPU_DESCRIPTOR_HANDLE dstHdl = *pDstHandles; + D3D12_CPU_DESCRIPTOR_HANDLE srcHdl = {}; + + uint32_t lastDescriptorIndex = UINT32_MAX - 1; + uint32_t numToCopy = 0; + + for (uint32_t i = 0; i < count; i++) + { + uint32_t descriptorIndex = descriptorIndices[baseIndexOffset + i]; + + if ((lastDescriptorIndex + 1) != descriptorIndex) + { + if (numToCopy) + { + device.GetD3DDevice()->CopyDescriptorsSimple(numToCopy, dstHdl, srcHdl, heapType); + } + + srcHdl = pBackend->m_cpuDescriptorHeaps[heapType].Get(descriptorIndex); + dstHdl.ptr += descriptorSize * numToCopy; + + numToCopy = 0; + } + + lastDescriptorIndex = descriptorIndex; + numToCopy++; + } + + if (numToCopy) + { + device.GetD3DDevice()->CopyDescriptorsSimple(numToCopy, dstHdl, srcHdl, heapType); + } + } + else + { + D3D12_CPU_DESCRIPTOR_HANDLE dstHdl = *pDstHandles; + + for (uint32_t i = 0; i < count; i++) + { + uint32_t descriptorIndex = descriptorIndices[baseIndexOffset + i]; + + D3D12_CPU_DESCRIPTOR_HANDLE srcHdl = pBackend->m_cpuDescriptorHeaps[heapType].Get(descriptorIndex); + + device.GetD3DDevice()->CopyDescriptorsSimple(1, dstHdl, srcHdl, heapType); + + dstHdl.ptr += descriptorSize; + } + } + return RPS_OK; + } + + const D3D12RuntimeBackend* D3D12RuntimeBackend::Get(const RpsCmdCallbackContext* pContext) + { + auto pBackendContext = rps::RuntimeCmdCallbackContext::Get(pContext); + return pBackendContext->GetBackend(); + } + +} // namespace rps + +RpsResult rpsD3D12GetCmdArgResourceArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + ID3D12Resource** pResources, + uint32_t count) +{ + return rps::D3D12RuntimeBackend::GetCmdArgResources(pContext, argIndex, srcArrayOffset, pResources, count); +} + +RpsResult rpsD3D12GetCmdArgResource(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + ID3D12Resource** pResources) +{ + return rpsD3D12GetCmdArgResourceArray(pContext, argIndex, 0, pResources, 1); +} + +RpsResult rpsD3D12GetCmdArgDescriptorArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + D3D12_CPU_DESCRIPTOR_HANDLE* pHandles, + uint32_t count) +{ + return rps::D3D12RuntimeBackend::GetCmdArgDescriptors(pContext, argIndex, srcArrayOffset, pHandles, count); +} + +RpsResult rpsD3D12GetCmdArgDescriptor(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + D3D12_CPU_DESCRIPTOR_HANDLE* pHandles) +{ + return rps::D3D12RuntimeBackend::GetCmdArgDescriptors(pContext, argIndex, 0, pHandles, 1); +} + +RpsResult rpsD3D12CopyCmdArgDescriptors(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + uint32_t count, + RpsBool singleHandleToArray, + D3D12_CPU_DESCRIPTOR_HANDLE* pDstHandles) +{ + return rps::D3D12RuntimeBackend::CopyCmdArgDescriptors( + pContext, argIndex, srcArrayOffset, count, singleHandleToArray, pDstHandles); +} + +RpsResult rpsD3D12ResourceDescToRps(const D3D12_RESOURCE_DESC* pD3D12Desc, RpsResourceDesc* pRpsDesc) +{ + RPS_CHECK_ARGS(pD3D12Desc && pRpsDesc); + rps::D3D12ResourceDescToRps(pRpsDesc, pD3D12Desc); + return RPS_OK; +} diff --git a/src/runtime/d3d12/rps_d3d12_runtime_backend.hpp b/src/runtime/d3d12/rps_d3d12_runtime_backend.hpp new file mode 100644 index 0000000..5d1001e --- /dev/null +++ b/src/runtime/d3d12/rps_d3d12_runtime_backend.hpp @@ -0,0 +1,253 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_D3D12_CMD_H_ +#define _RPS_D3D12_CMD_H_ + +#include "rps/runtime/d3d_common/rps_d3d_common.h" + +#include "runtime/common/rps_render_graph.hpp" +#include "runtime/d3d12/rps_d3d12_runtime_device.hpp" +#include "runtime/d3d12/rps_d3d12_barrier.hpp" +#include "runtime/d3d12/rps_d3d12_enhanced_barrier.hpp" + +namespace rps +{ + class D3D12RuntimeBackend : public RuntimeBackend + { + private: + struct D3D12RuntimeCmd : public RuntimeCmd + { + uint32_t barrierBatchId; + uint32_t resourceBindingInfo; + + D3D12RuntimeCmd() = default; + + D3D12RuntimeCmd(uint32_t inCmdId, uint32_t inBarrierBatchId, uint32_t inResourceBindingInfo) + : RuntimeCmd(inCmdId) + , barrierBatchId(inBarrierBatchId) + , resourceBindingInfo(inResourceBindingInfo) + { + } + }; + + public: + D3D12RuntimeBackend(D3D12RuntimeDevice& device, RenderGraph& renderGraph) + : RuntimeBackend(renderGraph) + , m_device(device) + , m_persistentPool(device.GetDevice().Allocator()) + , m_pendingReleaseResources(&m_persistentPool) + , m_frameResources(&m_persistentPool) + { + for (uint32_t iType = 0; iType < RPS_COUNTOF(m_cpuDescriptorHeaps); iType++) + { + m_cpuDescriptorHeaps[iType].descriptorIncSize = + m_device.GetDescriptorSize(D3D12_DESCRIPTOR_HEAP_TYPE(iType)); + } + +#if RPS_D3D12_ENHANCED_BARRIER_SUPPORT + if (device.GetEnhancedBarrierEnabled()) + { + m_pBarriers = m_persistentPool.New(device); + } + else +#endif //RPS_D3D12_ENHANCED_BARRIER_SUPPORT + { + m_pBarriers = m_persistentPool.New(device); + } + } + + virtual RpsResult RecordCommands(const RenderGraph& renderGraph, + const RpsRenderGraphRecordCommandInfo& recordInfo) const override final; + + virtual RpsResult RecordCmdRenderPassBegin(const RuntimeCmdCallbackContext& context) const override final; + + virtual RpsResult RecordCmdRenderPassEnd(const RuntimeCmdCallbackContext& context) const override final; + + virtual RpsResult RecordCmdFixedFunctionBindingsAndDynamicStates( + const RuntimeCmdCallbackContext& context) const override final; + + virtual void DestroyRuntimeResourceDeferred(ResourceInstance& resource) override final; + + RpsResult GetCmdArgResources(const RuntimeCmdCallbackContext& context, + uint32_t argIndex, + uint32_t srcArrayIndex, + ID3D12Resource** ppResources, + uint32_t count) const; + + RpsResult GetCmdArgDescriptors(const RuntimeCmdCallbackContext& context, + uint32_t argIndex, + uint32_t srcArrayIndex, + D3D12_CPU_DESCRIPTOR_HANDLE* pDescriptors, + uint32_t count) const; + + static RpsResult GetCmdArgResources(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayIndex, + ID3D12Resource** ppResources, + uint32_t count); + + static RpsResult GetCmdArgDescriptors(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayIndex, + D3D12_CPU_DESCRIPTOR_HANDLE* pDescriptors, + uint32_t count); + + static RpsResult CopyCmdArgDescriptors(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + uint32_t count, + RpsBool singleHandleToArray, + D3D12_CPU_DESCRIPTOR_HANDLE* pDstHandles); + + static const D3D12RuntimeBackend* Get(const RpsCmdCallbackContext* pContext); + + static ID3D12GraphicsCommandList* GetContextD3DCmdList(const RuntimeCmdCallbackContext& context) + { + return rpsD3D12CommandListFromHandle(context.hCommandBuffer); + } + + protected: + virtual RpsResult UpdateFrame(const RenderGraphUpdateContext& context) override final; + virtual RpsResult CreateHeaps(const RenderGraphUpdateContext& context, ArrayRef heaps) override final; + virtual void DestroyHeaps(ArrayRef heaps) override final; + virtual RpsResult CreateResources(const RenderGraphUpdateContext& context, + ArrayRef resources) override final; + virtual void DestroyResources(ArrayRef resources) override final; + virtual RpsResult CreateCommandResources(const RenderGraphUpdateContext& context) override final; + virtual void OnDestroy() override final; + + private: + RPS_NO_DISCARD + RpsResult CreateResourceViews(const RenderGraphUpdateContext& context, + D3D12_DESCRIPTOR_HEAP_TYPE type, + ConstArrayRef accessIndices); + + static void SetResourceDebugName(ID3D12Object* pObject, StrRef name, uint32_t index); + static void SetHeapDebugName(ID3D12Heap* pHeap, const D3D12_HEAP_DESC &heapDesc, uint32_t index); + static void SetDescriptorHeapDebugName(ID3D12DescriptorHeap* pHeap, const D3D12_DESCRIPTOR_HEAP_DESC &heapDesc, uint32_t index); + + private: + D3D12RuntimeDevice& m_device; + Arena m_persistentPool; + + ArenaVector m_runtimeCmds; + ArenaVector m_accessToDescriptorMap; + + D3D12BarrierBuilder* m_pBarriers = {}; + + struct DescriptorHeap + { + static constexpr uint32_t DescriptorHeapAllocGranularity = 64; + + ID3D12DescriptorHeap* pHeap = nullptr; + uint32_t capacity = 0; + uint32_t descriptorIncSize = 0; + + ~DescriptorHeap() + { + SafeRelease(pHeap); + } + + RpsResult Reserve(const RenderGraphUpdateContext& context, + ID3D12Device* pDevice, D3D12_DESCRIPTOR_HEAP_TYPE type, uint32_t count) + { + // Reserve 1 element for default NULL descriptor. + const uint32_t allocCount = + rpsDivRoundUp(count + 1, DescriptorHeapAllocGranularity) * DescriptorHeapAllocGranularity; + + if (capacity < allocCount) + { + SafeRelease(pHeap); + + D3D12_DESCRIPTOR_HEAP_DESC desc = {}; + desc.NodeMask = 1; + desc.NumDescriptors = allocCount; + desc.Type = type; + + HRESULT hr = pDevice->CreateDescriptorHeap(&desc, IID_PPV_ARGS(&pHeap)); + if (FAILED(hr)) + { + capacity = 0; + return HRESULTToRps(hr); + } + capacity = allocCount; + // generate debug names + const bool bEnableDebugNames = + !!(context.pUpdateInfo->diagnosticFlags & RPS_DIAGNOSTIC_ENABLE_RUNTIME_DEBUG_NAMES); + if (bEnableDebugNames) + { + SetDescriptorHeapDebugName(pHeap, desc, RPS_INDEX_NONE_U32); + } + + const uint32_t descriptorSize = pDevice->GetDescriptorHandleIncrementSize(type); + const auto defaultNullHandle = D3D12_CPU_DESCRIPTOR_HANDLE{ + pHeap->GetCPUDescriptorHandleForHeapStart().ptr + descriptorSize * (capacity - 1)}; + + switch (type) + { + case D3D12_DESCRIPTOR_HEAP_TYPE_RTV: + { + D3D12_RENDER_TARGET_VIEW_DESC nullRtvDesc = {}; + // As of AgilitySDK 706, the SDK layer doesn't seem to care about + // the format/dimension of the null RTV. + nullRtvDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + nullRtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + pDevice->CreateRenderTargetView(nullptr, &nullRtvDesc, defaultNullHandle); + } + break; + case D3D12_DESCRIPTOR_HEAP_TYPE_DSV: + { + D3D12_DEPTH_STENCIL_VIEW_DESC nullDsvDesc = {}; + nullDsvDesc.Format = DXGI_FORMAT_D32_FLOAT_S8X24_UINT; + nullDsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; + pDevice->CreateDepthStencilView(nullptr, &nullDsvDesc, defaultNullHandle); + } + break; + default: + break; + } + } + return RPS_OK; + } + + D3D12_CPU_DESCRIPTOR_HANDLE Get(uint32_t offset) const + { + RPS_ASSERT(capacity > 0); + + D3D12_CPU_DESCRIPTOR_HANDLE result = pHeap->GetCPUDescriptorHandleForHeapStart(); + result.ptr += descriptorIncSize * rpsMin(offset, capacity - 1); + return result; + } + }; + + DescriptorHeap m_cpuDescriptorHeaps[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; + + struct FrameResources + { + ArenaVector pendingResources; + + void Reset(Arena& arena) + { + pendingResources.reset(&arena); + } + + void DestroyDeviceResources() + { + std::for_each(pendingResources.begin(), pendingResources.end(), [&](auto& i) { i->Release(); }); + pendingResources.clear(); + } + }; + + ArenaVector m_pendingReleaseResources; + ArenaVector m_frameResources; + uint32_t m_currentResourceFrame = 0; + + }; +} // namespace rps + +#endif //_RPS_D3D12_CMD_H_ diff --git a/src/runtime/d3d12/rps_d3d12_runtime_backend_debug.cpp b/src/runtime/d3d12/rps_d3d12_runtime_backend_debug.cpp new file mode 100644 index 0000000..3ef752b --- /dev/null +++ b/src/runtime/d3d12/rps_d3d12_runtime_backend_debug.cpp @@ -0,0 +1,135 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "runtime/d3d12/rps_d3d12_runtime_backend.hpp" + +namespace rps +{ + + void SetObjectDebugName(ID3D12Object* pObject, const char *str, uint32_t globalIndex) + { + if ((pObject == nullptr) || (str == nullptr)) + return; + + WCHAR buf[RPS_NAME_MAX_LEN]; + + if (globalIndex != RPS_INDEX_NONE_U32) + { + swprintf(buf, RPS_NAME_MAX_LEN, L"%S_%u", str, globalIndex); + } + else + { + swprintf(buf, RPS_NAME_MAX_LEN, L"%S", str); + } + + pObject->SetName(buf); + } + + void D3D12RuntimeBackend::SetHeapDebugName( + ID3D12Heap* pHeap, const D3D12_HEAP_DESC &heapDesc, uint32_t index) + { + if (!pHeap) return; + + auto buf = StrBuilder<>("rps_heap_"); + + switch(heapDesc.Properties.Type) { + case D3D12_HEAP_TYPE_CUSTOM: + switch(heapDesc.Properties.MemoryPoolPreference) + { + case D3D12_MEMORY_POOL_L0: + buf.Append("custom_L0"); + break; + case D3D12_MEMORY_POOL_L1: + buf.Append("custom_L1"); + break; + default: + // bad config. cannot have custom heap and unknown mem pool. + RPS_ASSERT(false && "Invalid MemoryPoolPreference value for custom heap type"); + buf.Append("custom_unknown"); + break; + } + switch(heapDesc.Properties.CPUPageProperty) + { + case D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE: + buf.Append("_na"); + break; + case D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE: + buf.Append("_wc"); + break; + case D3D12_CPU_PAGE_PROPERTY_WRITE_BACK: + buf.Append("_wb"); + break; + default: + // bad config. cannot have custom heap and unknown page prop. + RPS_ASSERT(false && "Invalid CPUPageProperty value for custom heap type"); + buf.Append("_unknown"); + break; + } + break; + case D3D12_HEAP_TYPE_READBACK: + buf.Append("readback"); + break; + case D3D12_HEAP_TYPE_UPLOAD: + buf.Append("upload"); + break; + case D3D12_HEAP_TYPE_DEFAULT: + buf.Append("default"); + break; + default: + RPS_ASSERT(false && "Invalid heap type"); + buf.Append("unknown"); + break; + } + + SetObjectDebugName(pHeap, buf.c_str(), index); + } + + void D3D12RuntimeBackend::SetDescriptorHeapDebugName( + ID3D12DescriptorHeap* pHeap, const D3D12_DESCRIPTOR_HEAP_DESC &heapDesc, uint32_t index) + { + if (!pHeap) return; + + const char *descHeapNames[] = + { + "rps_descriptor_heap_cbv_srv_uav", + "rps_descriptor_heap_sampler", + "rps_descriptor_heap_rtv", + "rps_descriptor_heap_dsv", + "rps_descriptor_heap_unknown" + }; + + static_assert(D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES == RPS_COUNTOF(descHeapNames) - 1, + "SetDescriptorHeapDebugName needs update."); + + size_t idx = rpsMin((size_t)heapDesc.Type, RPS_COUNTOF(descHeapNames) - 1); + RPS_ASSERT(idx != (RPS_COUNTOF(descHeapNames) - 1) && "Invalid descriptor heap type"); + + SetObjectDebugName(pHeap, descHeapNames[idx], index); + } + + void D3D12RuntimeBackend::SetResourceDebugName(ID3D12Object* pObject, StrRef name, uint32_t index) + { + if (!pObject || name.empty()) + { + return; + } + + char buf[RPS_NAME_MAX_LEN]; + + if (index != RPS_INDEX_NONE_U32) + { + snprintf(buf, RPS_NAME_MAX_LEN, "%s[%u]", name.str, index); + } + else + { + snprintf(buf, RPS_NAME_MAX_LEN, "%s", name.str); + } + + SetObjectDebugName(pObject, buf, RPS_INDEX_NONE_U32); + } + +} \ No newline at end of file diff --git a/src/runtime/d3d12/rps_d3d12_runtime_backend_views.cpp b/src/runtime/d3d12/rps_d3d12_runtime_backend_views.cpp new file mode 100644 index 0000000..9942ea3 --- /dev/null +++ b/src/runtime/d3d12/rps_d3d12_runtime_backend_views.cpp @@ -0,0 +1,665 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "runtime/d3d12/rps_d3d12_runtime_backend.hpp" + +#include "runtime/common/rps_render_graph.hpp" +#include "runtime/common/rps_runtime_util.hpp" + +namespace rps +{ + + RpsFormat GetD3D12SRVFormat(const CmdAccessInfo& accessInfo) + { + RpsFormat viewFormat = accessInfo.viewFormat; + + switch (viewFormat) + { + case RPS_FORMAT_D32_FLOAT: + viewFormat = RPS_FORMAT_R32_FLOAT; + break; + case RPS_FORMAT_D16_UNORM: + viewFormat = RPS_FORMAT_R16_UNORM; + break; + case RPS_FORMAT_D24_UNORM_S8_UINT: + case RPS_FORMAT_R24G8_TYPELESS: + viewFormat = RPS_FORMAT_R24_UNORM_X8_TYPELESS; + break; + case RPS_FORMAT_D32_FLOAT_S8X24_UINT: + case RPS_FORMAT_R32G8X24_TYPELESS: + viewFormat = RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS; + break; + default: + break; + } + + return viewFormat; + } + + RpsFormat GetD3D12DSVFormat(RpsFormat viewFormat) + { + switch (viewFormat) + { + case RPS_FORMAT_R32G8X24_TYPELESS: + viewFormat = RPS_FORMAT_D32_FLOAT_S8X24_UINT; + break; + case RPS_FORMAT_R24G8_TYPELESS: + viewFormat = RPS_FORMAT_D24_UNORM_S8_UINT; + break; + case RPS_FORMAT_R32_TYPELESS: + viewFormat = RPS_FORMAT_D32_FLOAT; + break; + case RPS_FORMAT_R16_TYPELESS: + viewFormat = RPS_FORMAT_D16_UNORM; + break; + default: + break; + } + + return viewFormat; + } + + static inline uint32_t GetD3D12ComponentMapping(uint32_t rpsMapping) + { + static_assert((uint32_t(RPS_RESOURCE_VIEW_COMPONENT_MAPPING_R) == + uint32_t(D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0)) && + (uint32_t(RPS_RESOURCE_VIEW_COMPONENT_MAPPING_G) == + uint32_t(D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1)) && + (uint32_t(RPS_RESOURCE_VIEW_COMPONENT_MAPPING_B) == + uint32_t(D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2)) && + (uint32_t(RPS_RESOURCE_VIEW_COMPONENT_MAPPING_A) == + uint32_t(D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3)) && + (uint32_t(RPS_RESOURCE_VIEW_COMPONENT_MAPPING_ZERO) == + uint32_t(D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0)) && + (uint32_t(RPS_RESOURCE_VIEW_COMPONENT_MAPPING_ONE) == + uint32_t(D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1)), + "Unexpected D3D12_SHADER_COMPONENT_MAPPING value"); + + return D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING((RPS_IMAGE_VIEW_GET_COMPONENT_MAPPING_CHANNEL_R(rpsMapping)), + (RPS_IMAGE_VIEW_GET_COMPONENT_MAPPING_CHANNEL_G(rpsMapping)), + (RPS_IMAGE_VIEW_GET_COMPONENT_MAPPING_CHANNEL_B(rpsMapping)), + (RPS_IMAGE_VIEW_GET_COMPONENT_MAPPING_CHANNEL_A(rpsMapping))); + } + + RpsResult InitD3D12RTVDesc(D3D12_RENDER_TARGET_VIEW_DESC* pRTVDesc, + const CmdAccessInfo& accessInfo, + const ResourceInstance& resource) + { + const auto& resDesc = resource.desc; + + RPS_ASSERT(resDesc.IsImage() || (accessInfo.viewFormat != RPS_FORMAT_UNKNOWN)); + + pRTVDesc->Format = rpsFormatToDXGI(accessInfo.viewFormat); + + RPS_ASSERT(rpsCountBits(accessInfo.range.aspectMask) == 1); + + if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_2D) + { + if (resource.desc.image.arrayLayers <= 1) + { + if (resource.desc.image.sampleCount <= 1) + { + pRTVDesc->ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + pRTVDesc->Texture2D.MipSlice = accessInfo.range.baseMipLevel; + pRTVDesc->Texture2D.PlaneSlice = 0; + } + else + { + pRTVDesc->ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS; + } + } + else + { + if (resDesc.image.sampleCount <= 1) + { + pRTVDesc->ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + pRTVDesc->Texture2DArray.MipSlice = accessInfo.range.baseMipLevel; + pRTVDesc->Texture2DArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pRTVDesc->Texture2DArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + pRTVDesc->Texture2DArray.PlaneSlice = 0; + } + else + { + pRTVDesc->ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY; + pRTVDesc->Texture2DMSArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pRTVDesc->Texture2DMSArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + } + } + else if (resDesc.type == RPS_RESOURCE_TYPE_IMAGE_3D) + { + //TODO: No WSlice info here. Using full-resource for now + + pRTVDesc->ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; + pRTVDesc->Texture3D.MipSlice = accessInfo.range.baseMipLevel; + pRTVDesc->Texture3D.FirstWSlice = 0; + pRTVDesc->Texture3D.WSize = resDesc.image.depth; + } + else if (resDesc.type == RPS_RESOURCE_TYPE_IMAGE_1D) + { + if (resDesc.image.arrayLayers <= 1) + { + pRTVDesc->ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1D; + pRTVDesc->Texture1D.MipSlice = accessInfo.range.baseMipLevel; + } + else + { + pRTVDesc->ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1DARRAY; + pRTVDesc->Texture1DArray.MipSlice = accessInfo.range.baseMipLevel; + pRTVDesc->Texture1DArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pRTVDesc->Texture1DArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + } + else if (resDesc.type == RPS_RESOURCE_TYPE_BUFFER) + { + auto pBufView = reinterpret_cast(accessInfo.pViewInfo); + const uint32_t elementSize = rpsGetFormatElementBytes(accessInfo.viewFormat); + const uint64_t bufViewBytes = GetBufferViewBytes(pBufView, resource.desc); + + RPS_RETURN_ERROR_IF(elementSize == 0, RPS_ERROR_INVALID_ARGUMENTS); + + const uint64_t numElements = bufViewBytes / elementSize; + RPS_RETURN_ERROR_IF(numElements > UINT32_MAX, RPS_ERROR_INTEGER_OVERFLOW); + RPS_RETURN_ERROR_IF(numElements > D3D12_REQ_RENDER_TO_BUFFER_WINDOW_WIDTH, RPS_ERROR_INVALID_ARGUMENTS); + + pRTVDesc->ViewDimension = D3D12_RTV_DIMENSION_BUFFER; + pRTVDesc->Buffer.FirstElement = pBufView->offset / elementSize; + pRTVDesc->Buffer.NumElements = uint32_t(numElements); + } + else + { + return RPS_ERROR_INVALID_OPERATION; + } + + return RPS_OK; + } + + RpsResult InitD3D12DSVDesc(D3D12_DEPTH_STENCIL_VIEW_DESC* pDSVDesc, + const CmdAccessInfo& accessInfo, + const ResourceInstance& resource) + { + RPS_RETURN_ERROR_IF(!resource.desc.IsImage(), RPS_ERROR_INVALID_OPERATION); + + // TODO: Add actual view Format info to Access. + const RpsFormat dsvFormat = GetD3D12DSVFormat(accessInfo.viewFormat); + pDSVDesc->Format = rpsFormatToDXGI(dsvFormat); + + pDSVDesc->Flags = D3D12_DSV_FLAG_NONE; + + // Check depth plane is READONLY: + if ((accessInfo.access.accessFlags & RPS_ACCESS_DEPTH_READ_BIT) && + !(accessInfo.access.accessFlags & RPS_ACCESS_DEPTH_WRITE_BIT)) + { + pDSVDesc->Flags |= D3D12_DSV_FLAG_READ_ONLY_DEPTH; + } + + // Check stencil plane is READONLY: + if (rpsFormatHasStencil(dsvFormat) && (accessInfo.access.accessFlags & RPS_ACCESS_STENCIL_READ_BIT) && + !(accessInfo.access.accessFlags & RPS_ACCESS_STENCIL_WRITE_BIT)) + { + pDSVDesc->Flags |= D3D12_DSV_FLAG_READ_ONLY_STENCIL; + } + + if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_2D) + { + if (resource.desc.image.arrayLayers <= 1) + { + if (resource.desc.image.sampleCount <= 1) + { + pDSVDesc->ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; + pDSVDesc->Texture2D.MipSlice = accessInfo.range.baseMipLevel; + } + else + { + pDSVDesc->ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMS; + } + } + else + { + if (resource.desc.image.sampleCount <= 1) + { + pDSVDesc->ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DARRAY; + pDSVDesc->Texture2DArray.MipSlice = accessInfo.range.baseMipLevel; + pDSVDesc->Texture2DArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pDSVDesc->Texture2DArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + else + { + pDSVDesc->ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY; + pDSVDesc->Texture2DMSArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pDSVDesc->Texture2DMSArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + } + } + else if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_1D) + { + if (resource.desc.image.arrayLayers <= 1) + { + pDSVDesc->ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1D; + pDSVDesc->Texture1D.MipSlice = accessInfo.range.baseMipLevel; + } + else + { + pDSVDesc->ViewDimension = D3D12_DSV_DIMENSION_TEXTURE1DARRAY; + pDSVDesc->Texture1DArray.MipSlice = accessInfo.range.baseMipLevel; + pDSVDesc->Texture1DArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pDSVDesc->Texture1DArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + } + else + { + return RPS_ERROR_INVALID_OPERATION; + } + + return RPS_OK; + } + + RpsResult InitD3D12SRVDesc(D3D12RuntimeDevice& device, + D3D12_SHADER_RESOURCE_VIEW_DESC* pSRVDesc, + const CmdAccessInfo& accessInfo, + const ResourceInstance& resource) + { + RPS_RETURN_ERROR_IF(!IsResourceTypeValid(resource.desc.type), RPS_ERROR_INVALID_OPERATION); + + pSRVDesc->Format = rpsFormatToDXGI(GetD3D12SRVFormat(accessInfo)); + + if (resource.desc.IsBuffer()) + { + auto pBufView = reinterpret_cast(accessInfo.pViewInfo); + + pSRVDesc->Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; //TODO + + if (!(accessInfo.access.accessFlags & RPS_ACCESS_RAYTRACING_AS_READ_BIT)) + { + uint32_t elementSize = rpsGetFormatElementBytes(accessInfo.viewFormat); + + if (elementSize == 0) + { + elementSize = pBufView->stride; + } + if (elementSize == 0) + { + RPS_ASSERT(accessInfo.viewFormat == RPS_FORMAT_UNKNOWN); + elementSize = 4; //TODO: RAW + } + + const uint64_t bufViewBytes = GetBufferViewBytes(pBufView, resource.desc); + const uint64_t numElements = bufViewBytes / elementSize; + const uint64_t firstElement = pBufView->offset / elementSize; + + RPS_RETURN_ERROR_IF(numElements > UINT32_MAX, RPS_ERROR_INTEGER_OVERFLOW); + + pSRVDesc->ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + pSRVDesc->Buffer.FirstElement = firstElement; + pSRVDesc->Buffer.NumElements = uint32_t(numElements); + pSRVDesc->Buffer.StructureByteStride = pBufView->stride; + pSRVDesc->Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + + if (((accessInfo.viewFormat == RPS_FORMAT_UNKNOWN) || + (accessInfo.viewFormat == RPS_FORMAT_R32_TYPELESS)) && + (pBufView->stride == 0)) + { + pSRVDesc->Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + pSRVDesc->Format = DXGI_FORMAT_R32_TYPELESS; + } + } + else + { + RPS_ASSERT("NoImpl"); + } + } + else + { + auto pImageView = reinterpret_cast(accessInfo.pViewInfo); + pSRVDesc->Shader4ComponentMapping = GetD3D12ComponentMapping(pImageView->componentMapping); + + if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_2D) + { + if (resource.desc.image.sampleCount > 1) + { + if (resource.desc.image.arrayLayers <= 1) + { + pSRVDesc->ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS; + } + else + { + RPS_ASSERT(!(accessInfo.pViewInfo->flags & RPS_RESOURCE_VIEW_FLAG_CUBEMAP_BIT)); + { + pSRVDesc->ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; + pSRVDesc->Texture2DMSArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pSRVDesc->Texture2DMSArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + } + } + else if (resource.desc.image.arrayLayers <= 1) + { + pSRVDesc->ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + pSRVDesc->Texture2D.MostDetailedMip = accessInfo.range.baseMipLevel; + pSRVDesc->Texture2D.MipLevels = accessInfo.range.GetMipLevelCount(); + pSRVDesc->Texture2D.PlaneSlice = device.GetFormatPlaneIndex(accessInfo.viewFormat); + pSRVDesc->Texture2D.ResourceMinLODClamp = pImageView->minLodClamp; + } + else + { + if (!(accessInfo.pViewInfo->flags & RPS_RESOURCE_VIEW_FLAG_CUBEMAP_BIT)) + { + pSRVDesc->ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + pSRVDesc->Texture2DArray.MostDetailedMip = accessInfo.range.baseMipLevel; + pSRVDesc->Texture2DArray.MipLevels = accessInfo.range.GetMipLevelCount(); + pSRVDesc->Texture2DArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pSRVDesc->Texture2DArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + pSRVDesc->Texture2DArray.PlaneSlice = device.GetFormatPlaneIndex(accessInfo.viewFormat); + pSRVDesc->Texture2DArray.ResourceMinLODClamp = pImageView->minLodClamp; + } + else if ((accessInfo.range.GetArrayLayerCount() > 6) || (accessInfo.range.baseArrayLayer > 0)) + { + pSRVDesc->ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + pSRVDesc->TextureCubeArray.MostDetailedMip = accessInfo.range.baseMipLevel; + pSRVDesc->TextureCubeArray.MipLevels = accessInfo.range.GetMipLevelCount(); + pSRVDesc->TextureCubeArray.First2DArrayFace = accessInfo.range.baseArrayLayer; + pSRVDesc->TextureCubeArray.NumCubes = accessInfo.range.GetArrayLayerCount() / 6; + pSRVDesc->TextureCubeArray.ResourceMinLODClamp = pImageView->minLodClamp; + } + else + { + RPS_ASSERT(accessInfo.range.GetArrayLayerCount() == 6); + pSRVDesc->ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + pSRVDesc->TextureCube.MostDetailedMip = accessInfo.range.baseMipLevel; + pSRVDesc->TextureCube.MipLevels = accessInfo.range.GetMipLevelCount(); + pSRVDesc->TextureCube.ResourceMinLODClamp = pImageView->minLodClamp; + } + } + } + else if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_3D) + { + pSRVDesc->ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + pSRVDesc->Texture3D.MostDetailedMip = accessInfo.range.baseMipLevel; + pSRVDesc->Texture3D.MipLevels = accessInfo.range.GetMipLevelCount(); + pSRVDesc->Texture3D.ResourceMinLODClamp = pImageView->minLodClamp; + } + else if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_1D) + { + if (resource.desc.image.arrayLayers <= 1) + { + pSRVDesc->ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + pSRVDesc->Texture1D.MostDetailedMip = accessInfo.range.baseMipLevel; + pSRVDesc->Texture1D.MipLevels = accessInfo.range.GetMipLevelCount(); + pSRVDesc->Texture1D.ResourceMinLODClamp = pImageView->minLodClamp; + } + else + { + pSRVDesc->ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY; + pSRVDesc->Texture1DArray.MostDetailedMip = accessInfo.range.baseMipLevel; + pSRVDesc->Texture1DArray.MipLevels = accessInfo.range.GetMipLevelCount(); + pSRVDesc->Texture1DArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pSRVDesc->Texture1DArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + pSRVDesc->Texture1DArray.ResourceMinLODClamp = pImageView->minLodClamp; + } + } + else + { + return RPS_ERROR_INVALID_OPERATION; + } + } + + return RPS_OK; + } + + RpsResult InitD3D12CBVDesc(D3D12_CONSTANT_BUFFER_VIEW_DESC* pCBVDesc, + const CmdAccessInfo& accessInfo, + const ResourceInstance& resource) + { + RPS_RETURN_ERROR_IF(!resource.desc.IsBuffer(), RPS_ERROR_INVALID_OPERATION); + + auto pBufView = reinterpret_cast(accessInfo.pViewInfo); + auto* pD3DRes = D3D12RuntimeDevice::FromHandle(resource.hRuntimeResource); + + // NOTE: We allow DX12 debug layer to complain if CBV > D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT + // and/or if defy D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT by pBufView->offset. + + RPS_RETURN_ERROR_IF((pBufView->sizeInBytes == RPS_BUFFER_WHOLE_SIZE) && + ((resource.desc.GetBufferSize() % D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT) != 0), + RPS_ERROR_INVALID_ARGUMENTS); + + const uint64_t bufViewBytes = rpsAlignUp(GetBufferViewBytes(pBufView, resource.desc), + uint64_t(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)); + + RPS_RETURN_ERROR_IF(bufViewBytes > UINT32_MAX, RPS_ERROR_INTEGER_OVERFLOW); + RPS_RETURN_ERROR_IF((bufViewBytes + pBufView->offset) > resource.desc.GetBufferSize(), + RPS_ERROR_INVALID_ARGUMENTS); + + pCBVDesc->BufferLocation = pD3DRes->GetGPUVirtualAddress() + pBufView->offset; + pCBVDesc->SizeInBytes = uint32_t(bufViewBytes); + + return RPS_OK; + } + + RpsResult InitD3D12UAVDesc(D3D12RuntimeDevice& device, + D3D12_UNORDERED_ACCESS_VIEW_DESC* pUAVDesc, + const CmdAccessInfo& accessInfo, + const ResourceInstance& resource) + + { + const RpsFormat viewFormat = accessInfo.viewFormat; + + pUAVDesc->Format = rpsFormatToDXGI(viewFormat); + + if (resource.desc.IsBuffer()) + { + auto pBufView = reinterpret_cast(accessInfo.pViewInfo); + + uint32_t elementSize = rpsGetFormatElementBytes(viewFormat); + + if (elementSize == 0) + { + elementSize = pBufView->stride; + } + if (elementSize == 0) + { + elementSize = 4; //TODO: RAW + } + + const uint64_t bufViewBytes = GetBufferViewBytes(pBufView, resource.desc); + const uint64_t numElements = bufViewBytes / elementSize; + + RPS_RETURN_ERROR_IF(numElements > UINT32_MAX, RPS_ERROR_INTEGER_OVERFLOW); + + pUAVDesc->ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + pUAVDesc->Buffer.FirstElement = pBufView->offset / elementSize; + pUAVDesc->Buffer.NumElements = uint32_t(numElements); + pUAVDesc->Buffer.CounterOffsetInBytes = 0; + pUAVDesc->Buffer.StructureByteStride = pBufView->stride; + pUAVDesc->Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + + if (((pUAVDesc->Format == DXGI_FORMAT_UNKNOWN) || (pUAVDesc->Format == DXGI_FORMAT_R32_TYPELESS)) && + (pBufView->stride == 0)) + { + pUAVDesc->Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + pUAVDesc->Format = DXGI_FORMAT_R32_TYPELESS; + } + } + else + { + auto pImageView = reinterpret_cast(accessInfo.pViewInfo); + +#if !RPS_D3D12_MSAA_UAV_SUPPORT + RPS_ASSERT(resource.desc.image.sampleCount == 1); +#endif + + if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_2D) + { +#if RPS_D3D12_MSAA_UAV_SUPPORT + if (resource.desc.image.sampleCount == 1) +#endif //RPS_D3D12_MSAA_UAV_SUPPORT + { + if (resource.desc.image.arrayLayers <= 1) + { + pUAVDesc->ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + pUAVDesc->Texture2D.MipSlice = accessInfo.range.baseMipLevel; + pUAVDesc->Texture2D.PlaneSlice = device.GetFormatPlaneIndex(viewFormat); + } + else + { + pUAVDesc->ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + pUAVDesc->Texture2DArray.MipSlice = accessInfo.range.baseMipLevel; + pUAVDesc->Texture2DArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pUAVDesc->Texture2DArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + pUAVDesc->Texture2DArray.PlaneSlice = device.GetFormatPlaneIndex(viewFormat); + } + } +#if RPS_D3D12_MSAA_UAV_SUPPORT + else + { + if (resource.desc.image.arrayLayers <= 1) + { + pUAVDesc->ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DMS; + } + else + { + pUAVDesc->ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DMSARRAY; + pUAVDesc->Texture2DMSArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pUAVDesc->Texture2DMSArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + } +#endif //RPS_D3D12_MSAA_UAV_SUPPORT + } + else if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_3D) + { + pUAVDesc->ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; + pUAVDesc->Texture3D.MipSlice = accessInfo.range.baseMipLevel; + pUAVDesc->Texture3D.FirstWSlice = 0; + pUAVDesc->Texture3D.WSize = resource.desc.image.depth; // TODO - W range + } + else if (resource.desc.type == RPS_RESOURCE_TYPE_IMAGE_1D) + { + if (resource.desc.image.arrayLayers <= 1) + { + pUAVDesc->ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; + pUAVDesc->Texture1D.MipSlice = accessInfo.range.baseMipLevel; + } + else + { + pUAVDesc->ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1DARRAY; + pUAVDesc->Texture1DArray.MipSlice = accessInfo.range.baseMipLevel; + pUAVDesc->Texture1DArray.FirstArraySlice = accessInfo.range.baseArrayLayer; + pUAVDesc->Texture1DArray.ArraySize = accessInfo.range.GetArrayLayerCount(); + } + } + else + { + return RPS_ERROR_INVALID_OPERATION; + } + } + return RPS_OK; + } + + RpsResult D3D12RuntimeBackend::CreateResourceViews(const RenderGraphUpdateContext& context, + D3D12_DESCRIPTOR_HEAP_TYPE type, + ConstArrayRef accessIndices) + { + RPS_RETURN_OK_IF(accessIndices.empty()); + + auto& cmdAccesses = context.renderGraph.GetCmdAccessInfos(); + + auto resourceInstances = context.renderGraph.GetResourceInstances().range_all(); + auto pD3DDevice = m_device.GetD3DDevice(); + const auto descriptorSize = m_device.GetDescriptorSize(type); + + RPS_V_RETURN( + m_cpuDescriptorHeaps[type].Reserve(context, m_device.GetD3DDevice(), type, uint32_t(accessIndices.size()))); + + D3D12_CPU_DESCRIPTOR_HANDLE cpuDescHdl = m_cpuDescriptorHeaps[type].pHeap->GetCPUDescriptorHandleForHeapStart(); + + uint32_t heapOffset = 0; + + if (type == D3D12_DESCRIPTOR_HEAP_TYPE_RTV) + { + D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {}; + + for (auto accessIndex : accessIndices) + { + auto& access = cmdAccesses[accessIndex]; + + const auto& resource = resourceInstances[access.resourceId]; + auto* pD3DRes = D3D12RuntimeDevice::FromHandle(resource.hRuntimeResource); + + RPS_V_RETURN(InitD3D12RTVDesc(&rtvDesc, access, resource)); + + pD3DDevice->CreateRenderTargetView(pD3DRes, &rtvDesc, cpuDescHdl); + + m_accessToDescriptorMap[accessIndex] = heapOffset; + + heapOffset++; + cpuDescHdl.ptr += descriptorSize; + } + } + else if (type == D3D12_DESCRIPTOR_HEAP_TYPE_DSV) + { + D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc = {}; + + for (auto accessIndex : accessIndices) + { + auto& access = cmdAccesses[accessIndex]; + + const auto& resource = resourceInstances[access.resourceId]; + auto* pD3DRes = D3D12RuntimeDevice::FromHandle(resource.hRuntimeResource); + + RPS_V_RETURN(InitD3D12DSVDesc(&dsvDesc, access, resource)); + + pD3DDevice->CreateDepthStencilView(pD3DRes, &dsvDesc, cpuDescHdl); + + m_accessToDescriptorMap[accessIndex] = heapOffset; + + heapOffset++; + cpuDescHdl.ptr += descriptorSize; + } + } + else if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {}; + + for (auto accessIndex : accessIndices) + { + auto& access = cmdAccesses[accessIndex]; + + const auto& resource = resourceInstances[access.resourceId]; + auto* pD3DRes = D3D12RuntimeDevice::FromHandle(resource.hRuntimeResource); + + // TODO: Do we want to support single access with multiple views? + if (access.access.accessFlags & RPS_ACCESS_SHADER_RESOURCE_BIT) + { + RPS_V_RETURN(InitD3D12SRVDesc(m_device, &srvDesc, access, resource)); + pD3DDevice->CreateShaderResourceView(pD3DRes, &srvDesc, cpuDescHdl); + } + else if (access.access.accessFlags & RPS_ACCESS_CONSTANT_BUFFER_BIT) + { + RPS_V_RETURN(InitD3D12CBVDesc(&cbvDesc, access, resource)); + pD3DDevice->CreateConstantBufferView(&cbvDesc, cpuDescHdl); + } + else if (access.access.accessFlags & RPS_ACCESS_UNORDERED_ACCESS_BIT) + { + RPS_V_RETURN(InitD3D12UAVDesc(m_device, &uavDesc, access, resource)); + // TODO: Counter from another access + pD3DDevice->CreateUnorderedAccessView(pD3DRes, nullptr, &uavDesc, cpuDescHdl); + } + + m_accessToDescriptorMap[accessIndex] = heapOffset; + + heapOffset++; + cpuDescHdl.ptr += descriptorSize; + } + } + else + { + RPS_TODO_RETURN_NOT_IMPLEMENTED(); + } + + return RPS_OK; + } +} // namespace rps diff --git a/src/runtime/d3d12/rps_d3d12_runtime_device.cpp b/src/runtime/d3d12/rps_d3d12_runtime_device.cpp new file mode 100644 index 0000000..ccad892 --- /dev/null +++ b/src/runtime/d3d12/rps_d3d12_runtime_device.cpp @@ -0,0 +1,360 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "rps/runtime/d3d_common/rps_d3d_common.h" + +#include "runtime/common/rps_runtime_util.hpp" +#include "runtime/common/phases/rps_pre_process.hpp" +#include "runtime/common/phases/rps_dag_build.h" +#include "runtime/common/phases/rps_access_dag_build.hpp" +#include "runtime/common/phases/rps_cmd_print.hpp" +#include "runtime/common/phases/rps_cmd_dag_print.hpp" +#include "runtime/common/phases/rps_dag_schedule.hpp" +#include "runtime/common/phases/rps_schedule_print.hpp" +#include "runtime/common/phases/rps_memory_schedule.hpp" + +#include "runtime/d3d12/rps_d3d12_runtime_device.hpp" +#include "runtime/d3d12/rps_d3d12_runtime_backend.hpp" +#include "runtime/d3d12/rps_d3d12_util.hpp" + +namespace rps +{ + D3D12RuntimeDevice::D3D12RuntimeDevice(Device* pDevice, const RpsD3D12RuntimeDeviceCreateInfo* pCreateInfo) + : RuntimeDevice(pDevice, pCreateInfo->pRuntimeCreateInfo) + , m_pD3DDevice(pCreateInfo->pD3D12Device) + , m_flags(pCreateInfo->flags) + { + m_pD3DDevice->AddRef(); + } + + RpsResult D3D12RuntimeDevice::Init() + { + RPS_V_RETURN(HRESULTToRps(m_pD3DDevice->QueryInterface(IID_PPV_ARGS(&m_pD3DDevice2)))); + + D3D12_FEATURE_DATA_D3D12_OPTIONS featureOptionsData = {}; + + HRESULT hr = m_pD3DDevice->CheckFeatureSupport( + D3D12_FEATURE_D3D12_OPTIONS, &featureOptionsData, sizeof(featureOptionsData)); + RPS_V_RETURN(HRESULTToRps(hr)); + + D3D12_FEATURE_DATA_D3D12_OPTIONS5 featureOptionsData5 = {}; + + hr = m_pD3DDevice->CheckFeatureSupport( + D3D12_FEATURE_D3D12_OPTIONS5, &featureOptionsData5, sizeof(featureOptionsData5)); + RPS_V_RETURN(HRESULTToRps(hr)); + +#if RPS_D3D12_FEATURE_D3D12_OPTIONS12_DEFINED + D3D12_FEATURE_DATA_D3D12_OPTIONS12 featureOptionsData12 = {}; + + m_bEnhancedBarriersEnabled = false; + + if (SUCCEEDED(m_pD3DDevice->CheckFeatureSupport( + D3D12_FEATURE_D3D12_OPTIONS12, &featureOptionsData12, sizeof(featureOptionsData12)))) + { + m_bEnhancedBarriersEnabled = featureOptionsData12.EnhancedBarriersSupported && + (m_flags & RPS_D3D12_RUNTIME_FLAG_PREFER_ENHANCED_BARRIERS); + } +#endif //RPS_D3D12_FEATURE_D3D12_OPTIONS12_DEFINED + + m_heapTier = featureOptionsData.ResourceHeapTier; + m_renderPassesTier = featureOptionsData5.RenderPassesTier; + + m_memoryTypeInfos[RPS_D3D12_HEAP_TYPE_INDEX_UPLOAD] = {0, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT}; + m_memoryTypeInfos[RPS_D3D12_HEAP_TYPE_INDEX_READBACK] = {0, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT}; + + if (m_heapTier == D3D12_RESOURCE_HEAP_TIER_2) + { + m_memoryTypeInfos[RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT] = {0, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT}; + m_memoryTypeInfos[RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT_MSAA] = {0, D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT}; + } + else + { + m_memoryTypeInfos[RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT_TIER_1_RT_DS_TEXTURE] = { + 0, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT}; + m_memoryTypeInfos[RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT_TIER_1_RT_DS_TEXTURE_MSAA] = { + 0, D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT}; + m_memoryTypeInfos[RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT_TIER_1_NON_RT_DS_TEXTURE] = { + 0, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT}; + } + + for (uint32_t iDH = 0; iDH < D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES; iDH++) + { + m_descriptorIncSizes[iDH] = m_pD3DDevice->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE(iDH)); + } + + D3D12_FEATURE_DATA_FORMAT_INFO formatInfo = {DXGI_FORMAT_UNKNOWN, 0}; + + for (uint32_t i = 0; i < RPS_FORMAT_COUNT; i++) + { + formatInfo.Format = rpsFormatToDXGI(RpsFormat(i)); + + if (FAILED(m_pD3DDevice->CheckFeatureSupport(D3D12_FEATURE_FORMAT_INFO, &formatInfo, sizeof(formatInfo)))) + { + // Failure means format is not supported, non-fatal. + formatInfo.PlaneCount = 0; + } + m_formatPlaneCount[i] = formatInfo.PlaneCount; + } + + // TODO: + m_formatPlaneCount[RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS] = 1; + m_formatPlaneCount[RPS_FORMAT_X32_TYPELESS_G8X24_UINT] = 1; + m_formatPlaneCount[RPS_FORMAT_R24_UNORM_X8_TYPELESS] = 1; + m_formatPlaneCount[RPS_FORMAT_X24_TYPELESS_G8_UINT] = 1; + + return RPS_OK; + } + + D3D12RuntimeDevice::~D3D12RuntimeDevice() + { + SafeRelease(m_pD3DDevice2); + SafeRelease(m_pD3DDevice); + } + + RpsResult D3D12RuntimeDevice::BuildDefaultRenderGraphPhases(RenderGraph& renderGraph) + { + RPS_V_RETURN(renderGraph.ReservePhases(8)); + RPS_V_RETURN(renderGraph.AddPhase()); + RPS_V_RETURN(renderGraph.AddPhase()); + RPS_V_RETURN(renderGraph.AddPhase()); + RPS_V_RETURN(renderGraph.AddPhase(renderGraph)); + RPS_V_RETURN(renderGraph.AddPhase(renderGraph)); + RPS_V_RETURN(renderGraph.AddPhase(renderGraph)); + RPS_V_RETURN(renderGraph.AddPhase(renderGraph)); + RPS_V_RETURN(renderGraph.AddPhase()); + RPS_V_RETURN(renderGraph.AddPhase(*this, renderGraph)); + + return RPS_OK; + } + + RpsResult D3D12RuntimeDevice::InitializeSubresourceInfos(ArrayRef resInstances) + { + for (auto& resInstance : resInstances) + { + GetFullSubresourceRange( + resInstance.fullSubresourceRange, resInstance.desc, GetResourcePlaneMask(resInstance.desc)); + + resInstance.numSubResources = GetSubresourceCount(resInstance.desc); + } + + return RPS_OK; + } + + RpsResult D3D12RuntimeDevice::InitializeResourceAllocInfos(ArrayRef resInstances) + { + for (auto& resInst : resInstances) + { + if (resInst.isPendingCreate) + { + if (resInst.desc.IsBuffer() && (resInst.allAccesses.accessFlags & RPS_ACCESS_CONSTANT_BUFFER_BIT)) + { + resInst.desc.SetBufferSize(rpsAlignUp(resInst.desc.GetBufferSize(), + uint64_t(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT))); + } + + auto allocInfo = GetResourceAllocInfo(resInst); + RPS_RETURN_ERROR_IF(allocInfo.SizeInBytes > SIZE_MAX, RPS_ERROR_INTEGER_OVERFLOW); + RPS_RETURN_ERROR_IF(allocInfo.Alignment > UINT32_MAX, RPS_ERROR_INTEGER_OVERFLOW); + + resInst.allocRequirement.size = uint64_t(allocInfo.SizeInBytes); + resInst.allocRequirement.alignment = uint32_t(allocInfo.Alignment); + resInst.allocRequirement.memoryTypeIndex = GetD3D12HeapTypeIndex(m_heapTier, resInst); + } + } + + return RPS_OK; + } + + RpsResult D3D12RuntimeDevice::GetSubresourceRangeFromImageView(SubresourceRangePacked& outRange, + const ResourceInstance& resourceInfo, + const RpsAccessAttr& accessAttr, + const RpsImageView& imageView) + { + uint32_t viewPlaneMask = (imageView.base.viewFormat == RPS_FORMAT_UNKNOWN) + ? UINT32_MAX + : GetFormatPlaneMask(imageView.base.viewFormat); + uint32_t planeMask = GetResourcePlaneMask(resourceInfo.desc) & viewPlaneMask; + outRange = SubresourceRangePacked(planeMask, imageView.subresourceRange, resourceInfo.desc); + return RPS_OK; + } + + ConstArrayRef D3D12RuntimeDevice::GetMemoryTypeInfos() const + { + return {m_memoryTypeInfos, + size_t((m_heapTier == D3D12_RESOURCE_HEAP_TIER_1) ? RPS_D3D12_HEAP_TYPE_COUNT_TIER_1 + : RPS_D3D12_HEAP_TYPE_COUNT_TIER_2)}; + } + + RpsResult D3D12RuntimeDevice::DescribeMemoryType(uint32_t memoryTypeIndex, PrinterRef printer) const + { + const auto memoryTypeInfos = GetMemoryTypeInfos(); + + RPS_RETURN_ERROR_IF(memoryTypeIndex >= memoryTypeInfos.size(), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + const auto& heapTypeInfo = GetD3D12HeapTypeInfo(memoryTypeIndex); + + static const NameValuePair heapTypeNames[] = { + RPS_INIT_NAME_VALUE_PAIR(D3D12_HEAP_TYPE_DEFAULT), + RPS_INIT_NAME_VALUE_PAIR(D3D12_HEAP_TYPE_UPLOAD), + RPS_INIT_NAME_VALUE_PAIR(D3D12_HEAP_TYPE_READBACK), + RPS_INIT_NAME_VALUE_PAIR(D3D12_HEAP_TYPE_CUSTOM), + }; + + printer.PrintValueName(heapTypeInfo.type, heapTypeNames); + + static const NameValuePair heapFlagNames[] = { + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_HEAP_FLAG_, SHARED), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_HEAP_FLAG_, DENY_BUFFERS), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_HEAP_FLAG_, ALLOW_DISPLAY), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_HEAP_FLAG_, SHARED_CROSS_ADAPTER), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_HEAP_FLAG_, DENY_RT_DS_TEXTURES), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_HEAP_FLAG_, DENY_NON_RT_DS_TEXTURES), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_HEAP_FLAG_, HARDWARE_PROTECTED), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_HEAP_FLAG_, ALLOW_WRITE_WATCH), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_HEAP_FLAG_, ALLOW_SHADER_ATOMICS), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_HEAP_FLAG_, CREATE_NOT_RESIDENT), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(D3D12_HEAP_FLAG_, CREATE_NOT_ZEROED), + }; + + printer("-FLAG_").PrintFlags(heapTypeInfo.heapFlags, heapFlagNames, "_"); + + if (heapTypeInfo.AllowMsaa) + printer("-ALLOW_MSAA"); + + return RPS_OK; + } + + const D3D12HeapTypeInfo& D3D12RuntimeDevice::GetD3D12HeapTypeInfo(uint32_t memoryTypeIndex) const + { + constexpr bool AllowMsaa = true; + constexpr bool NoMsaa = false; + + // clang-format off + static constexpr D3D12HeapTypeInfo s_d3dHeapTier1MemoryTypes[] = { + {D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS, NoMsaa }, + {D3D12_HEAP_TYPE_READBACK, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS, NoMsaa }, + {D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES, NoMsaa }, + {D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES, AllowMsaa }, + {D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS, NoMsaa }, + {D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES, NoMsaa }, + }; + + //TODO: UMA + static constexpr D3D12HeapTypeInfo s_d3dHeapTier2MemoryTypes[] = { + {D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE, NoMsaa }, + {D3D12_HEAP_TYPE_READBACK, D3D12_HEAP_FLAG_NONE, NoMsaa }, + {D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_NONE, NoMsaa }, + {D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_NONE, AllowMsaa }, + }; + // clang-format on + + if (m_heapTier == D3D12_RESOURCE_HEAP_TIER_2) + { + return s_d3dHeapTier2MemoryTypes[memoryTypeIndex]; + } + else + { + return s_d3dHeapTier1MemoryTypes[memoryTypeIndex]; + } + } + + D3D12_RESOURCE_ALLOCATION_INFO D3D12RuntimeDevice::GetResourceAllocInfo(const ResourceInstance& resInstance) const + { + D3D12_RESOURCE_DESC d3d12Desc; + CalcD3D12ResourceDesc(&d3d12Desc, resInstance); // TODO: Should cache D3D12 res desc? + return m_pD3DDevice->GetResourceAllocationInfo(1, 1, &d3d12Desc); + } + + uint32_t D3D12RuntimeDevice::GetSubresourceCount(const ResourceDescPacked& resDesc) const + { + return resDesc.IsBuffer() ? 1 + : (((resDesc.type == RPS_RESOURCE_TYPE_IMAGE_3D) ? 1 : resDesc.image.arrayLayers) * + resDesc.image.mipLevels * GetFormatPlaneCount(resDesc.image.format)); + } + + uint32_t D3D12RuntimeDevice::GetResourcePlaneMask(const ResourceDescPacked& resDesc) const + { + return resDesc.IsBuffer() ? 1u : GetFormatPlaneMask(resDesc.image.format); + } + + void D3D12BuiltInClearColorRegions(const RpsCmdCallbackContext* pContext); + void D3D12BuiltInClearColor(const RpsCmdCallbackContext* pContext); + void D3D12BuiltInClearDepthStencil(const RpsCmdCallbackContext* pContext); + void D3D12BuiltInClearDepthStencilRegions(const RpsCmdCallbackContext* pContext); + void D3D12BuiltInClearTextureUAV(const RpsCmdCallbackContext* pContext); + void D3D12BuiltInClearTextureUAVRegions(const RpsCmdCallbackContext* pContext); + void D3D12BuiltInClearBufferUAV(const RpsCmdCallbackContext* pContext); + void D3D12BuiltInCopyTexture(const RpsCmdCallbackContext* pContext); + void D3D12BuiltInCopyBuffer(const RpsCmdCallbackContext* pContext); + void D3D12BuiltInCopyTextureToBuffer(const RpsCmdCallbackContext* pContext); + void D3D12BuiltInCopyBufferToTexture(const RpsCmdCallbackContext* pContext); + void D3D12BuiltInResolve(const RpsCmdCallbackContext* pContext); + + ConstArrayRef D3D12RuntimeDevice::GetBuiltInNodes() const + { + static const BuiltInNodeInfo c_builtInNodes[] = { + {"clear_color", {&D3D12BuiltInClearColor, nullptr}}, + {"clear_color_regions", {&D3D12BuiltInClearColorRegions, nullptr}}, + {"clear_depth_stencil", {&D3D12BuiltInClearDepthStencil, nullptr}}, + {"clear_depth_stencil_regions", {&D3D12BuiltInClearDepthStencilRegions, nullptr}}, + {"clear_texture", {&D3D12BuiltInClearTextureUAV, nullptr}}, + {"clear_texture_regions", {&D3D12BuiltInClearTextureUAVRegions, nullptr}}, + {"clear_buffer", {&D3D12BuiltInClearBufferUAV,nullptr}}, + {"copy_texture", {&D3D12BuiltInCopyTexture,nullptr}}, + {"copy_buffer", {&D3D12BuiltInCopyBuffer,nullptr}}, + {"copy_texture_to_buffer", {&D3D12BuiltInCopyTextureToBuffer,nullptr}}, + {"copy_buffer_to_texture", {&D3D12BuiltInCopyBufferToTexture,nullptr}}, + {"resolve", {&D3D12BuiltInResolve,nullptr}}, + }; + + return c_builtInNodes; + } + + bool D3D12RuntimeDevice::CalculateAccessTransition(const RpsAccessAttr& beforeAccess, + const RpsAccessAttr& afterAccess, + AccessTransitionInfo& results) const + { + + const bool bHasClear = rpsAnyBitsSet(beforeAccess.accessFlags | afterAccess.accessFlags, RPS_ACCESS_CLEAR_BIT); + + const bool bBothAreRenderTarget = ((beforeAccess.accessFlags & RPS_ACCESS_RENDER_TARGET_BIT) && + (afterAccess.accessFlags & RPS_ACCESS_RENDER_TARGET_BIT)); + + const bool bBothAreDepthStencil = ((beforeAccess.accessFlags & RPS_ACCESS_DEPTH_STENCIL_WRITE) && + (afterAccess.accessFlags & RPS_ACCESS_DEPTH_STENCIL_WRITE)); + + const bool bDepthStencilRWTransition = + IsDepthStencilReadWriteTransition(beforeAccess.accessFlags, afterAccess.accessFlags); + + const bool bBothAreUAV = ((beforeAccess.accessFlags & RPS_ACCESS_UNORDERED_ACCESS_BIT) && + (afterAccess.accessFlags & RPS_ACCESS_UNORDERED_ACCESS_BIT)); + + if (bBothAreRenderTarget || (bBothAreDepthStencil && !bDepthStencilRWTransition) || + (bHasClear && !GetEnhancedBarrierEnabled() && bBothAreUAV)) + { + // D3D12 doesn't need a barrier between Clear RTV/DSV and RTV/DSV access, or ClearUAV to UAV without enhanced barrier. + results.bKeepOrdering = true; + results.bMergedAccessStates = true; + results.bTransition = false; + results.mergedAccess = beforeAccess | afterAccess; + + return true; + } + + return false; + } +} // namespace rps + +RpsResult rpsD3D12RuntimeDeviceCreate(const RpsD3D12RuntimeDeviceCreateInfo* pCreateInfo, RpsDevice* phDevice) +{ + RPS_CHECK_ARGS(pCreateInfo && pCreateInfo->pD3D12Device); + + RpsResult result = + rps::RuntimeDevice::Create(phDevice, pCreateInfo->pDeviceCreateInfo, pCreateInfo); + + return result; +} diff --git a/src/runtime/d3d12/rps_d3d12_runtime_device.hpp b/src/runtime/d3d12/rps_d3d12_runtime_device.hpp new file mode 100644 index 0000000..bdabdba --- /dev/null +++ b/src/runtime/d3d12/rps_d3d12_runtime_device.hpp @@ -0,0 +1,132 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_DEVICE_D3D12_H_ +#define _RPS_DEVICE_D3D12_H_ + +#include "rps/runtime/d3d12/rps_d3d12_runtime.h" + +#include "runtime/common/rps_runtime_device.hpp" + +namespace rps +{ + struct D3D12HeapTypeInfo + { + D3D12_HEAP_TYPE type; + D3D12_HEAP_FLAGS heapFlags; + bool AllowMsaa; + }; + + class D3D12RuntimeDevice final : public RuntimeDevice + { + public: + D3D12RuntimeDevice(Device* pDevice, const RpsD3D12RuntimeDeviceCreateInfo* pCreateInfo); + virtual ~D3D12RuntimeDevice(); + + virtual RpsResult Init() override final; + virtual RpsResult BuildDefaultRenderGraphPhases(RenderGraph& renderGraph) override final; + virtual RpsResult InitializeSubresourceInfos(ArrayRef resInstances) override final; + virtual RpsResult InitializeResourceAllocInfos(ArrayRef resInstances) override final; + virtual RpsResult GetSubresourceRangeFromImageView(SubresourceRangePacked& outRange, + const ResourceInstance& resourceInfo, + const RpsAccessAttr& accessAttr, + const RpsImageView& imageView) override final; + virtual ConstArrayRef GetMemoryTypeInfos() const override final; + virtual RpsResult DescribeMemoryType(uint32_t memoryTypeIndex, PrinterRef printer) const override; + + virtual ConstArrayRef GetBuiltInNodes() const override final; + + virtual bool CalculateAccessTransition(const RpsAccessAttr& beforeAccess, + const RpsAccessAttr& afterAccess, + AccessTransitionInfo& results) const override final; + + virtual RpsImageAspectUsageFlags GetImageAspectUsages(uint32_t aspectMask) const override final + { + return ((aspectMask & 1) ? (RPS_IMAGE_ASPECT_COLOR | RPS_IMAGE_ASPECT_DEPTH) : RPS_IMAGE_ASPECT_UNKNOWN) | + ((aspectMask & 2) ? RPS_IMAGE_ASPECT_STENCIL : RPS_IMAGE_ASPECT_UNKNOWN); + } + + static ID3D12Resource* FromHandle(RpsRuntimeResource hRuntimeResource) + { + return static_cast(hRuntimeResource.ptr); + } + + static RpsRuntimeResource ToHandle(ID3D12Resource* pD3DResource) + { + return RpsRuntimeResource{pD3DResource}; + } + + public: + ID3D12Device* GetD3DDevice() const + { + return m_pD3DDevice; + } + + uint32_t GetDescriptorSize(D3D12_DESCRIPTOR_HEAP_TYPE type) const + { + return m_descriptorIncSizes[type]; + } + + uint32_t GetFormatPlaneMask(RpsFormat format) const + { + switch (format) + { + case RPS_FORMAT_D24_UNORM_S8_UINT: + case RPS_FORMAT_D32_FLOAT_S8X24_UINT: + return 0x3; + case RPS_FORMAT_X24_TYPELESS_G8_UINT: + case RPS_FORMAT_X32_TYPELESS_G8X24_UINT: + return 0x2; + case RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case RPS_FORMAT_R24_UNORM_X8_TYPELESS: + return 0x1; + default: + break; + } + return (1u << GetFormatPlaneCount(format)) - 1; + } + + uint32_t GetFormatPlaneIndex(RpsFormat format) const + { + uint32_t mask = GetFormatPlaneMask(format); + + RPS_ASSERT(rpsCountBits(mask) == 1); + + return rpsFirstBitLow(mask); + } + + const D3D12HeapTypeInfo& GetD3D12HeapTypeInfo(uint32_t memoryTypeIndex) const; + + bool GetEnhancedBarrierEnabled() const + { + return m_bEnhancedBarriersEnabled; + } + + private: + D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocInfo(const ResourceInstance& resInstance) const; + uint32_t GetSubresourceCount(const ResourceDescPacked& resDesc) const; + uint32_t GetResourcePlaneMask(const ResourceDescPacked& resDesc) const; + + uint32_t GetFormatPlaneCount(RpsFormat format) const + { + return (uint32_t(format) < RPS_FORMAT_COUNT) ? m_formatPlaneCount[format] : 0; + } + + private: + ID3D12Device* m_pD3DDevice; + ID3D12Device2* m_pD3DDevice2; + RpsD3D12RuntimeFlags m_flags = {}; + D3D12_RESOURCE_HEAP_TIER m_heapTier; + D3D12_RENDER_PASS_TIER m_renderPassesTier; + bool m_bEnhancedBarriersEnabled = false; + RpsMemoryTypeInfo m_memoryTypeInfos[RPS_D3D12_HEAP_TYPE_COUNT_MAX]; + uint32_t m_descriptorIncSizes[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; + char m_formatPlaneCount[RPS_FORMAT_COUNT]; + }; +} + +#endif //_RPS_DEVICE_D3D12_H_ diff --git a/src/runtime/d3d12/rps_d3d12_util.hpp b/src/runtime/d3d12/rps_d3d12_util.hpp new file mode 100644 index 0000000..80b9758 --- /dev/null +++ b/src/runtime/d3d12/rps_d3d12_util.hpp @@ -0,0 +1,250 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_D3D12_UTILS_H_ +#define _RPS_D3D12_UTILS_H_ + +#include "rps/runtime/d3d12/rps_d3d12_runtime.h" +#include "runtime/d3d_common/rps_d3d_common_util.hpp" + +namespace rps +{ + constexpr uint32_t D3D12CalcSubresource( + uint32_t MipSlice, uint32_t ArraySlice, uint32_t PlaneSlice, uint32_t MipLevels, uint32_t ArraySize) noexcept + { + return MipSlice + ArraySlice * MipLevels + PlaneSlice * MipLevels * ArraySize; + } + + static inline D3D12_RESOURCE_DIMENSION GetD3D12ResourceDimension(RpsResourceType type) + { + switch (type) + { + case RPS_RESOURCE_TYPE_BUFFER: + return D3D12_RESOURCE_DIMENSION_BUFFER; + case RPS_RESOURCE_TYPE_IMAGE_2D: + return D3D12_RESOURCE_DIMENSION_TEXTURE2D; + case RPS_RESOURCE_TYPE_IMAGE_3D: + return D3D12_RESOURCE_DIMENSION_TEXTURE3D; + case RPS_RESOURCE_TYPE_IMAGE_1D: + return D3D12_RESOURCE_DIMENSION_TEXTURE1D; + default: + break; + } + return D3D12_RESOURCE_DIMENSION_UNKNOWN; + } + + static inline RpsResourceType D3D12ResourceDimensionToRps(D3D12_RESOURCE_DIMENSION type) + { + switch (type) + { + case D3D12_RESOURCE_DIMENSION_BUFFER: + return RPS_RESOURCE_TYPE_BUFFER; + case D3D12_RESOURCE_DIMENSION_TEXTURE2D: + return RPS_RESOURCE_TYPE_IMAGE_2D; + case D3D12_RESOURCE_DIMENSION_TEXTURE3D: + return RPS_RESOURCE_TYPE_IMAGE_3D; + case D3D12_RESOURCE_DIMENSION_TEXTURE1D: + return RPS_RESOURCE_TYPE_IMAGE_1D; + default: + break; + } + return RPS_RESOURCE_TYPE_UNKNOWN; + } + + static inline D3D12_RESOURCE_FLAGS GetD3D12ResourceFlags(const ResourceInstance& resInfo) + { + D3D12_RESOURCE_FLAGS result = D3D12_RESOURCE_FLAG_NONE; + + if (resInfo.allAccesses.accessFlags & RPS_ACCESS_UNORDERED_ACCESS_BIT) + result |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + if (resInfo.allAccesses.accessFlags & RPS_ACCESS_RENDER_TARGET_BIT) + result |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + + if (resInfo.allAccesses.accessFlags & RPS_ACCESS_DEPTH_STENCIL) + { + result |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + + if (!(resInfo.allAccesses.accessFlags & RPS_ACCESS_SHADER_RESOURCE_BIT)) + { + result |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + } + } + +#if RPS_D3D12_ENHANCED_BARRIER_SUPPORT + if (resInfo.allAccesses.accessFlags & (RPS_ACCESS_RAYTRACING_AS_BUILD_BIT | RPS_ACCESS_RAYTRACING_AS_READ_BIT)) + { + result |= D3D12_RESOURCE_FLAG_RAYTRACING_ACCELERATION_STRUCTURE; + } +#endif //RPS_D3D12_ENHANCED_BARRIER_SUPPORT + + return result; + } + + static inline RpsResourceFlags D3D12ResourceFlagsToRps(D3D12_RESOURCE_FLAGS flags) + { + RpsResourceFlags result = RPS_RESOURCE_FLAG_NONE; + + if (flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) + result |= RPS_ACCESS_UNORDERED_ACCESS_BIT; + + if (flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) + result |= RPS_ACCESS_RENDER_TARGET_BIT; + + if (flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) + { + result |= RPS_ACCESS_DEPTH_STENCIL; + + if (!(flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)) + { + result |= RPS_ACCESS_SHADER_RESOURCE_BIT; + } + } + +#if RPS_D3D12_ENHANCED_BARRIER_SUPPORT + if (flags & D3D12_RESOURCE_FLAG_RAYTRACING_ACCELERATION_STRUCTURE) + { + result |= (RPS_ACCESS_RAYTRACING_AS_BUILD_BIT | RPS_ACCESS_RAYTRACING_AS_READ_BIT); + } +#endif //RPS_D3D12_ENHANCED_BARRIER_SUPPORT + + return result; + } + + template + static void CalcD3D12ResourceDesc(TD3D12ResourceDesc* pD3D12Desc, const ResourceInstance& resInfo) + { + pD3D12Desc->Dimension = GetD3D12ResourceDimension(resInfo.desc.type); + + pD3D12Desc->Alignment = 0; + pD3D12Desc->Format = rpsFormatToDXGI(resInfo.desc.GetFormat()); + pD3D12Desc->Flags = GetD3D12ResourceFlags(resInfo); + + if (resInfo.desc.IsImage()) + { + pD3D12Desc->Width = resInfo.desc.image.width; + pD3D12Desc->Height = resInfo.desc.image.height; + pD3D12Desc->DepthOrArraySize = (pD3D12Desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) + ? resInfo.desc.image.depth + : resInfo.desc.image.arrayLayers; + pD3D12Desc->MipLevels = resInfo.desc.image.mipLevels; + pD3D12Desc->SampleDesc.Count = resInfo.desc.image.sampleCount; + pD3D12Desc->SampleDesc.Quality = 0; + const bool bRowMajor = !!(resInfo.desc.flags & RPS_RESOURCE_FLAG_ROWMAJOR_IMAGE_BIT); + pD3D12Desc->Layout = bRowMajor ? D3D12_TEXTURE_LAYOUT_ROW_MAJOR : D3D12_TEXTURE_LAYOUT_UNKNOWN; + } + else if (resInfo.desc.IsBuffer()) + { + pD3D12Desc->Width = resInfo.desc.GetBufferSize(); + pD3D12Desc->Height = 1; + pD3D12Desc->DepthOrArraySize = 1; + pD3D12Desc->MipLevels = 1; + pD3D12Desc->SampleDesc.Count = 1; + pD3D12Desc->SampleDesc.Quality = 0; + pD3D12Desc->Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + } + } + + template + static void D3D12ResourceDescToRps(RpsResourceDesc* pRpsDesc, const TD3D12ResourceDesc* pD3D12Desc) + { + pRpsDesc->type = D3D12ResourceDimensionToRps(pD3D12Desc->Dimension); + pRpsDesc->flags = D3D12ResourceFlagsToRps(pD3D12Desc->Flags); + + if ((pD3D12Desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE1D) || + (pD3D12Desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D) || + (pD3D12Desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D)) + { + pRpsDesc->image.format = rpsFormatFromDXGI(pD3D12Desc->Format); + pRpsDesc->image.width = uint32_t(pD3D12Desc->Width); + pRpsDesc->image.height = pD3D12Desc->Height; + if (pD3D12Desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) + { + pRpsDesc->image.depth = pD3D12Desc->DepthOrArraySize; + } + else + { + pRpsDesc->image.arrayLayers = pD3D12Desc->DepthOrArraySize; + } + pRpsDesc->image.mipLevels = pD3D12Desc->MipLevels; + pRpsDesc->image.sampleCount = pD3D12Desc->SampleDesc.Count; + if (pD3D12Desc->Layout == D3D12_TEXTURE_LAYOUT_ROW_MAJOR) + { + pRpsDesc->flags |= RPS_RESOURCE_FLAG_ROWMAJOR_IMAGE_BIT; + } + } + else if (pD3D12Desc->Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + { + pRpsDesc->buffer.sizeInBytesHi = uint32_t(pD3D12Desc->Width >> 32u); + pRpsDesc->buffer.sizeInBytesLo = uint32_t(pD3D12Desc->Width & UINT32_MAX); + } + } + + static inline uint32_t GetD3D12HeapTypeIndex(D3D12_RESOURCE_HEAP_TIER heapTier, + const ResourceInstance& resourceInstance) + { + if (rpsAnyBitsSet(resourceInstance.allAccesses.accessFlags, RPS_ACCESS_CPU_READ_BIT)) + { + return RPS_D3D12_HEAP_TYPE_INDEX_READBACK; + } + else if (rpsAnyBitsSet(resourceInstance.allAccesses.accessFlags, RPS_ACCESS_CPU_WRITE_BIT)) + { + return RPS_D3D12_HEAP_TYPE_INDEX_UPLOAD; + } + + if (heapTier == D3D12_RESOURCE_HEAP_TIER_2) + { + return (resourceInstance.desc.IsImage() && (resourceInstance.desc.image.sampleCount > 1)) + ? RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT_MSAA + : RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT; + } + else if (resourceInstance.desc.IsBuffer()) + { + return RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT_TIER_1_BUFFER; + } + else if (resourceInstance.desc.IsImage()) + { + if (rpsAnyBitsSet(resourceInstance.allAccesses.accessFlags, + (RPS_ACCESS_RENDER_TARGET_BIT | RPS_ACCESS_DEPTH_STENCIL))) + { + return (resourceInstance.desc.image.sampleCount > 1) + ? RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT_TIER_1_RT_DS_TEXTURE_MSAA + : RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT_TIER_1_RT_DS_TEXTURE; + } + else + { + return RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT_TIER_1_NON_RT_DS_TEXTURE; + } + } + return RPS_D3D12_HEAP_TYPE_INDEX_DEFAULT; + } + + static inline D3D12_RESOLVE_MODE D3D12GetResolveMode(RpsResolveMode mode) + { + switch (mode) + { + case RPS_RESOLVE_MODE_AVERAGE: + return D3D12_RESOLVE_MODE_AVERAGE; + case RPS_RESOLVE_MODE_MIN: + return D3D12_RESOLVE_MODE_MIN; + case RPS_RESOLVE_MODE_MAX: + return D3D12_RESOLVE_MODE_MAX; + case RPS_RESOLVE_MODE_ENCODE_SAMPLER_FEEDBACK: + return D3D12_RESOLVE_MODE_ENCODE_SAMPLER_FEEDBACK; + case RPS_RESOLVE_MODE_DECODE_SAMPLER_FEEDBACK: + return D3D12_RESOLVE_MODE_DECODE_SAMPLER_FEEDBACK; + default: + break; + } + + RPS_ASSERT(RPS_FALSE); + return D3D12_RESOLVE_MODE_AVERAGE; + } + +} // namespace rps + +#endif //_RPS_D3D12_UTILS_H_ diff --git a/src/runtime/d3d_common/rps_d3d_common_util.hpp b/src/runtime/d3d_common/rps_d3d_common_util.hpp new file mode 100644 index 0000000..d6d45c3 --- /dev/null +++ b/src/runtime/d3d_common/rps_d3d_common_util.hpp @@ -0,0 +1,91 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_D3D_COMMON_UTIL_H +#define RPS_D3D_COMMON_UTIL_H + +#include +#include "rps/core/rps_result.h" +#include "rps/runtime/d3d_common/rps_d3d_common.h" + +#include "core/rps_util.hpp" + +namespace rps +{ + static inline RpsResult HRESULTToRps(HRESULT hr) + { + switch (hr) + { + case S_OK: + case S_FALSE: + return RPS_OK; + case E_INVALIDARG: + return RPS_ERROR_INVALID_ARGUMENTS; + case E_OUTOFMEMORY: + return RPS_ERROR_OUT_OF_MEMORY; + case E_NOTIMPL: + return RPS_ERROR_NOT_IMPLEMENTED; + } + return RPS_ERROR_UNSPECIFIED; + } + + template + inline void SafeRelease(T*& ptr) + { + if (ptr) + { + ptr->Release(); + ptr = nullptr; + } + } + + template + struct ScopedComPtr + { + T* Ptr; + + ScopedComPtr(T* p = nullptr) + : Ptr(p) + { + } + + ~ScopedComPtr() + { + if (Ptr) + { + Ptr->Release(); + } + } + + T* Get() const + { + return Ptr; + } + + T** ReleaseAndGetAddressOf() + { + SafeRelease(Ptr); + return &Ptr; + } + + T* operator->() + { + return Get(); + } + + T* operator=(T* p) + { + SafeRelease(Ptr); + Ptr = p; + } + + private: + RPS_CLASS_NO_COPY_MOVE(ScopedComPtr); + }; +} // namespace rps + +#endif //RPS_D3D_COMMON_UTIL_H diff --git a/src/runtime/rps_cmd_buf.hpp b/src/runtime/rps_cmd_buf.hpp new file mode 100644 index 0000000..fbd9c55 --- /dev/null +++ b/src/runtime/rps_cmd_buf.hpp @@ -0,0 +1,47 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_CMD_BUF_HPP_ +#define _RPS_CMD_BUF_HPP_ + +#include "core/rps_core.hpp" +#include "core/rps_util.hpp" +#include "core/rps_device.hpp" +#include "core/rps_graph.hpp" + +namespace rps +{ + enum BuiltInNodeDeclIds + { + RPS_BUILTIN_NODE_INVALID = -1, + RPS_BUILTIN_NODE_SCHEDULER_BARRIER = -2, ///< Built-in node to mark a scheduler barrier. + RPS_BUILTIN_NODE_SUBGRAPH_BEGIN = -3, ///< Built-in node to mark beginning of a subgraph. + RPS_BUILTIN_NODE_SUBGRAPH_END = -4, ///< Built-in node to mark ending of a subgraph. + RPS_BUILTIN_NODE_BEGIN_SUBROUTINE = -5, + RPS_BUILTIN_NODE_END_SUBROUTINE = -6, + + RPS_BUILTIN_NODE_FORCE_INT32 = INT32_MIN, + }; + + struct Cmd + { + RpsNodeDeclId nodeDeclId = RPS_NODEDECL_ID_INVALID; + uint32_t programInstanceId = RPS_INDEX_NONE_U32; + uint32_t tag = 0; + ArrayRef args; + RpsCmdCallback callback; + }; + + struct NodeDependency + { + RpsNodeId before; + RpsNodeId after; + }; + +} // namespace rps + +#endif // _RPS_CMD_BUF_HPP_ diff --git a/src/runtime/vk/rps_vk_built_in_nodes.cpp b/src/runtime/vk/rps_vk_built_in_nodes.cpp new file mode 100644 index 0000000..49a73c3 --- /dev/null +++ b/src/runtime/vk/rps_vk_built_in_nodes.cpp @@ -0,0 +1,495 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "rps/runtime/common/rps_render_states.h" + +#include "runtime/vk/rps_vk_runtime_device.hpp" +#include "runtime/vk/rps_vk_runtime_backend.hpp" +#include "runtime/vk/rps_vk_util.hpp" + +#include "runtime/common/rps_runtime_util.hpp" + +namespace rps +{ + + // template + // graphics node clear_color_regions( [writeonly(clear)] texture t, float4 data, uint numRects, int4 rects[MaxRects] ); + // template + // graphics node clear_depth_stencil_regions( [writeonly(clear)] texture t, RPS_CLEAR_FLAGS option, float d, uint s, uint numRects, int4 rects[MaxRects] ); + // template + // compute node clear_texture_regions( [writeonly(clear)] texture t, uint4 data, uint numRects, int4 rects[MaxRects] ); + + // graphics node clear_color ( [writeonly(clear)] texture t, float4 data ); + // graphics node clear_depth_stencil ( [writeonly(clear)] texture t, RPS_CLEAR_FLAGS option, float d, uint s ); + // compute node clear_texture ( [writeonly(clear)] texture t, uint4 data ); + // copy node clear_buffer ( [writeonly(clear)] buffer b, uint4 data ); + // copy node copy_texture ( [writeonly(copy)] texture dst, uint3 dstOffset, [readonly(copy)] texture src, uint3 srcOffset, uint3 extent ); + // copy node copy_buffer ( [writeonly(copy)] buffer dst, uint64_t dstOffset, [readonly(copy)] buffer src, uint64_t srcOffset, uint64_t size ); + // copy node copy_texture_to_buffer ( [writeonly(copy)] buffer dst, uint64_t dstByteOffset, uint rowPitch, uint3 bufferImageSize, uint3 dstOffset, [readonly(copy)] texture src, uint3 srcOffset, uint3 extent ); + // copy node copy_buffer_to_texture ( [writeonly(copy)] texture dst, uint3 dstOffset, [readonly(copy)] buffer src, uint64_t srcByteOffset, uint rowPitch, uint3 bufferImageSize, uint3 srcOffset, uint3 extent ); + // graphics node resolve ( [writeonly(resolve)] texture dst, uint2 dstOffset, [readonly(resolve)] texture src, uint2 srcOffset, uint2 extent, RPS_RESOLVE_MODE resolveMode ); + + template + void VKBuiltInClearColorImpl(const RpsCmdCallbackContext* pContext) + { + VkCommandBuffer hCmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + RPS_ASSERT(pContext->numArgs > 1); + + static_assert(sizeof(RpsClearValue) == sizeof(VkClearColorValue), + "Assumption 'sizeof(RpsClearValue) == sizeof(VkClearColorValue)' is no longer true."); + + auto pImageView = rpsCmdGetArg(pContext); + auto pClearValue = rpsCmdGetArg(pContext); + uint32_t numRects = 0; + const RpsRect* pRects = nullptr; + + if (HasRegions) + { + RPS_TODO(); + } + + VkImage hImg = {}; + rpsVKGetCmdArgImage(pContext, 0, &hImg); + + VkImageSubresourceRange vkRange = {}; + vkRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + vkRange.baseMipLevel = pImageView->subresourceRange.baseMipLevel; + vkRange.levelCount = pImageView->subresourceRange.mipLevels; + vkRange.baseArrayLayer = pImageView->subresourceRange.baseArrayLayer; + vkRange.layerCount = pImageView->subresourceRange.arrayLayers; + + vkCmdClearColorImage(hCmdBuf, hImg, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, pClearValue, 1, &vkRange); + } + + void VKBuiltInClearColorRegions(const RpsCmdCallbackContext* pContext) + { + VKBuiltInClearColorImpl(pContext); + } + + void VKBuiltInClearColor(const RpsCmdCallbackContext* pContext) + { + VKBuiltInClearColorImpl(pContext); + } + + void VKBuiltInClearDepthStencil(const RpsCmdCallbackContext* pContext) + { + VkCommandBuffer hCmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + RPS_ASSERT(pContext->numArgs > 1); + + static_assert(sizeof(RpsClearValue) == sizeof(VkClearColorValue), + "Assumption 'sizeof(RpsClearValue) == sizeof(VkClearColorValue)' is no longer true."); + + VkClearDepthStencilValue clearValue = {}; + + auto pImageView = rpsCmdGetArg(pContext); + auto clearFlags = *rpsCmdGetArg(pContext); + clearValue.depth = *rpsCmdGetArg(pContext); + clearValue.stencil = *rpsCmdGetArg(pContext); + + VkImage hImg = {}; + rpsVKGetCmdArgImage(pContext, 0, &hImg); + + const VkImageAspectFlags aspectMask = ((clearFlags & RPS_CLEAR_FLAG_DEPTH) ? VK_IMAGE_ASPECT_DEPTH_BIT : 0) | + ((clearFlags & RPS_CLEAR_FLAG_STENCIL) ? VK_IMAGE_ASPECT_STENCIL_BIT : 0); + + VkImageSubresourceRange vkRange = {}; + vkRange.aspectMask = aspectMask; + vkRange.baseMipLevel = pImageView->subresourceRange.baseMipLevel; + vkRange.levelCount = pImageView->subresourceRange.mipLevels; + vkRange.baseArrayLayer = pImageView->subresourceRange.baseArrayLayer; + vkRange.layerCount = pImageView->subresourceRange.arrayLayers; + + vkCmdClearDepthStencilImage(hCmdBuf, hImg, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue, 1, &vkRange); + } + + void VKBuiltInClearDepthStencilRegions(const RpsCmdCallbackContext* pContext) + { + RPS_TODO(); + } + + void VKBuiltInClearTextureUAV(const RpsCmdCallbackContext* pContext) + { + RPS_TODO(); + } + void VKBuiltInClearTextureUAVRegions(const RpsCmdCallbackContext* pContext) + { + RPS_TODO(); + } + void VKBuiltInClearBufferUAV(const RpsCmdCallbackContext* pContext) + { + RPS_TODO(); + } + void VKBuiltInCopyTexture(const RpsCmdCallbackContext* pContext) + { + VkCommandBuffer hCmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + auto* pBackend = rps::VKRuntimeBackend::Get(pContext); + + auto* pRuntimeDevice = RuntimeDevice::Get(pBackend->GetRenderGraph().GetDevice()); + + RPS_ASSERT(pContext->numArgs == 5); + + const ResourceInstance *pDstResource, *pSrcResource; + RPS_V_REPORT_AND_RETURN(pContext, + rps::VKRuntimeBackend::GetCmdArgResourceInfos(pContext, 0, 0, &pDstResource, 1)); + RPS_V_REPORT_AND_RETURN(pContext, + rps::VKRuntimeBackend::GetCmdArgResourceInfos(pContext, 2, 0, &pSrcResource, 1)); + + auto pDstView = rpsCmdGetArg(pContext); + auto dstOffset = *rpsCmdGetArg(pContext); + auto pSrcView = rpsCmdGetArg(pContext); + auto srcOffset = *rpsCmdGetArg(pContext); + auto extent = *rpsCmdGetArg(pContext); + + uint32_t srcMipDim[3] = { + GetMipLevelDimension(pSrcResource->desc.image.width, pSrcView->subresourceRange.baseMipLevel), + GetMipLevelDimension(pSrcResource->desc.image.height, pSrcView->subresourceRange.baseMipLevel), + GetMipLevelDimension(pSrcResource->desc.GetImageDepth(), pSrcView->subresourceRange.baseMipLevel), + }; + + extent.width = (extent.width != UINT32_MAX) ? extent.width : (srcMipDim[0] - srcOffset.x); + extent.height = (extent.height != UINT32_MAX) ? extent.height : (srcMipDim[1] - srcOffset.y); + extent.depth = (extent.depth != UINT32_MAX) ? extent.depth : (srcMipDim[2] - srcOffset.z); + + // TODO: Handle BCn format reinterpret cast copy + RpsFormat srcFmt = (pSrcView->base.viewFormat != RPS_FORMAT_UNKNOWN) ? pSrcView->base.viewFormat + : pSrcResource->desc.image.format; + RpsFormat dstFmt = (pDstView->base.viewFormat != RPS_FORMAT_UNKNOWN) ? pDstView->base.viewFormat + : pDstResource->desc.image.format; + if (rpsFormatIsBlockCompressed(srcFmt) != rpsFormatIsBlockCompressed(dstFmt)) + { + RPS_V_REPORT_AND_RETURN(pContext, RPS_ERROR_NOT_IMPLEMENTED); + } + + RPS_ASSERT(pSrcResource->desc.GetSampleCount() == pDstResource->desc.GetSampleCount()); + +#define RPS_VK_MAX_IMAGE_COPY_INFO (32) + VkImageCopy copyInfos[RPS_VK_MAX_IMAGE_COPY_INFO]; +#undef RPS_VK_MAX_IMAGE_COPY_INFO + + VkImageSubresourceRange srcRange, dstRange; + GetVkSubresourceRange(srcRange, + SubresourceRangePacked(GetFormatAspectMask(srcFmt, pSrcResource->desc.GetFormat()), + pSrcView->subresourceRange)); + GetVkSubresourceRange(dstRange, + SubresourceRangePacked(GetFormatAspectMask(dstFmt, pDstResource->desc.GetFormat()), + pDstView->subresourceRange)); + + const uint32_t numMipLevels = + rpsMin(pSrcView->subresourceRange.mipLevels, pDstView->subresourceRange.mipLevels); + const uint32_t numArrayLayers = + rpsMin(pSrcView->subresourceRange.arrayLayers, pDstView->subresourceRange.arrayLayers); + + RPS_ASSERT(numMipLevels < RPS_COUNTOF(copyInfos)); + + for (uint32_t iMip = 0; iMip < numMipLevels; iMip++) + { + const uint32_t srcMip = srcRange.baseMipLevel + iMip; + const uint32_t dstMip = dstRange.baseMipLevel + iMip; + + VkImageCopy* pCopyInfo = ©Infos[iMip]; + + pCopyInfo->srcSubresource.aspectMask = srcRange.aspectMask; + pCopyInfo->srcSubresource.mipLevel = srcMip; + pCopyInfo->srcSubresource.baseArrayLayer = srcRange.baseArrayLayer; + pCopyInfo->srcSubresource.layerCount = numArrayLayers; + pCopyInfo->srcOffset = srcOffset; + + pCopyInfo->dstSubresource.aspectMask = dstRange.aspectMask; + pCopyInfo->dstSubresource.mipLevel = dstMip; + pCopyInfo->dstSubresource.baseArrayLayer = dstRange.baseArrayLayer; + pCopyInfo->dstSubresource.layerCount = numArrayLayers; + pCopyInfo->dstOffset = dstOffset; + + pCopyInfo->extent = extent; + + extent.width = rpsMax(1u, extent.width >> 1); + extent.height = rpsMax(1u, extent.height >> 1); + extent.depth = rpsMax(1u, extent.depth >> 1); + + srcOffset.x = srcOffset.x >> 1; + srcOffset.y = srcOffset.y >> 1; + srcOffset.z = srcOffset.z >> 1; + + dstOffset.x = dstOffset.x >> 1; + dstOffset.y = dstOffset.y >> 1; + dstOffset.z = dstOffset.z >> 1; + } + + const VkImage hDstResource = rpsVKImageFromHandle(pDstResource->hRuntimeResource); + const VkImage hSrcResource = rpsVKImageFromHandle(pSrcResource->hRuntimeResource); + + vkCmdCopyImage(hCmdBuf, + hSrcResource, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + hDstResource, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + numMipLevels, + copyInfos); + } + + void VKBuiltInCopyBuffer(const RpsCmdCallbackContext* pContext) + { + VkCommandBuffer hCmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + auto* pBackend = rps::VKRuntimeBackend::Get(pContext); + + const ResourceInstance *pDstResource, *pSrcResource; + RPS_V_REPORT_AND_RETURN(pContext, + rps::RuntimeBackend::GetCmdArgResourceInfos(pContext, 0, 0, &pDstResource, 1)); + RPS_V_REPORT_AND_RETURN(pContext, + rps::RuntimeBackend::GetCmdArgResourceInfos(pContext, 2, 0, &pSrcResource, 1)); + + const auto* pDstView = rpsCmdGetArg(pContext); + uint64_t dstOffset = *rpsCmdGetArg(pContext); + const auto* pSrcView = rpsCmdGetArg(pContext); + uint64_t srcOffset = *rpsCmdGetArg(pContext); + uint64_t copySize = *rpsCmdGetArg(pContext); + + const uint64_t dstTotalSize = pDstResource->desc.GetBufferSize(); + const uint64_t srcTotalSize = pSrcResource->desc.GetBufferSize(); + + const VkBuffer dstBuffer = rpsVKBufferFromHandle(pDstResource->hRuntimeResource); + const VkBuffer srcBuffer = rpsVKBufferFromHandle(pSrcResource->hRuntimeResource); + + VkBufferCopy copyInfo; + copyInfo.srcOffset = srcOffset; + copyInfo.dstOffset = dstOffset; + copyInfo.size = (copySize != UINT64_MAX) ? copySize : srcTotalSize; + + vkCmdCopyBuffer(hCmdBuf, srcBuffer, dstBuffer, 1, ©Info); + } + + static constexpr bool TextureToBuffer = true; + static constexpr bool BufferToTexture = false; + + template + void VKBuiltInCopyTextureBufferCommon(const RpsCmdCallbackContext* pContext) + { + VkCommandBuffer hCmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + auto* pBackend = rps::VKRuntimeBackend::Get(pContext); + auto* pRuntimeDevice = RuntimeDevice::Get(pBackend->GetRenderGraph().GetDevice()); + + const ResourceInstance *pTextureResource, *pBufferResource; + RPS_V_REPORT_AND_RETURN( + pContext, rps::VKRuntimeBackend::GetCmdArgResourceInfos(pContext, TextureArgIdx, 0, &pTextureResource, 1)); + RPS_V_REPORT_AND_RETURN( + pContext, rps::VKRuntimeBackend::GetCmdArgResourceInfos(pContext, BufferArgIdx, 0, &pBufferResource, 1)); + + auto* pBufferView = rpsCmdGetArg(pContext); + auto* pTextureView = rpsCmdGetArg(pContext); + auto bufferByteOffset = *rpsCmdGetArg(pContext); + auto bufferRowPitch = *rpsCmdGetArg(pContext); + auto bufferImageSize = *rpsCmdGetArg(pContext); + auto bufferImgOffset = *rpsCmdGetArg(pContext); + auto textureOffset = *rpsCmdGetArg(pContext); + auto extent = *rpsCmdGetArg(pContext); + + uint32_t texMipDim[3] = { + GetMipLevelDimension(pTextureResource->desc.image.width, pTextureView->subresourceRange.baseMipLevel), + GetMipLevelDimension(pTextureResource->desc.image.height, pTextureView->subresourceRange.baseMipLevel), + GetMipLevelDimension(pTextureResource->desc.GetImageDepth(), pTextureView->subresourceRange.baseMipLevel), + }; + + extent.width = (extent.width != UINT32_MAX) ? extent.width : texMipDim[0]; + extent.height = (extent.height != UINT32_MAX) ? extent.height : texMipDim[1]; + extent.depth = (extent.depth != UINT32_MAX) ? extent.depth : texMipDim[2]; + + const RpsFormat imgFormat = (pTextureView->base.viewFormat != RPS_FORMAT_UNKNOWN) + ? pTextureView->base.viewFormat + : pTextureResource->desc.image.format; + + const uint32_t texelElementSize = rpsGetFormatElementBytes(imgFormat); + + const uint32_t bufferOffsetToByteOffset = + (bufferImgOffset.z * bufferImageSize.height + bufferImgOffset.y) * bufferRowPitch + + bufferImgOffset.x * texelElementSize; + + VkBufferImageCopy copyInfos; + copyInfos.bufferOffset = bufferByteOffset + bufferOffsetToByteOffset; + copyInfos.bufferRowLength = bufferRowPitch / texelElementSize; + copyInfos.bufferImageHeight = bufferImageSize.height; + copyInfos.imageOffset = textureOffset; + + copyInfos.imageSubresource.aspectMask = GetFormatAspectMask(imgFormat, pTextureResource->desc.GetFormat()); + copyInfos.imageSubresource.mipLevel = pTextureView->subresourceRange.baseMipLevel; + copyInfos.imageSubresource.baseArrayLayer = pTextureView->subresourceRange.baseArrayLayer; + copyInfos.imageSubresource.layerCount = pTextureView->subresourceRange.arrayLayers; + copyInfos.imageExtent = extent; + + const VkBuffer bufferHdl = rpsVKBufferFromHandle(pBufferResource->hRuntimeResource); + const VkImage imageHdl = rpsVKImageFromHandle(pTextureResource->hRuntimeResource); + + if (SourceIsTexture) + { + vkCmdCopyImageToBuffer(hCmdBuf, imageHdl, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, bufferHdl, 1, ©Infos); + } + else + { + vkCmdCopyBufferToImage(hCmdBuf, bufferHdl, imageHdl, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©Infos); + } + } + + void VKBuiltInCopyTextureToBuffer(const RpsCmdCallbackContext* pContext) + { + enum + { + BUFFER_DST = 0, + BUFFER_BYTE_OFFSET, + ROW_PITCH, + BUFFER_IMAGE_SIZE, + BUFFER_IMAGE_OFFSET, + TEXTURE_SRC, + TEXTURE_OFFSET, + EXTENT, + }; + + VKBuiltInCopyTextureBufferCommon(pContext); + } + void VKBuiltInCopyBufferToTexture(const RpsCmdCallbackContext* pContext) + { + enum + { + TEXTURE_DST = 0, + TEXTURE_OFFSET, + BUFFER_SRC, + BUFFER_BYTE_OFFSET, + ROW_PITCH, + BUFFER_IMAGE_SIZE, + BUFFER_IMAGE_OFFSET, + EXTENT, + }; + + VKBuiltInCopyTextureBufferCommon(pContext); + } + + void VKBuiltInResolve(const RpsCmdCallbackContext* pContext) + { + VkCommandBuffer hCmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + auto* pBackend = rps::VKRuntimeBackend::Get(pContext); + + auto* pRuntimeDevice = RuntimeDevice::Get(pBackend->GetRenderGraph().GetDevice()); + + RPS_ASSERT(pContext->numArgs == 6); + + const ResourceInstance *pDstResource, *pSrcResource; + RPS_V_REPORT_AND_RETURN(pContext, + rps::RuntimeBackend::GetCmdArgResourceInfos(pContext, 0, 0, &pDstResource, 1)); + RPS_V_REPORT_AND_RETURN(pContext, + rps::RuntimeBackend::GetCmdArgResourceInfos(pContext, 2, 0, &pSrcResource, 1)); + + auto pDstView = rpsCmdGetArg(pContext); + auto dstOffset = *rpsCmdGetArg(pContext); + auto pSrcView = rpsCmdGetArg(pContext); + auto srcOffset = *rpsCmdGetArg(pContext); + auto extent = *rpsCmdGetArg(pContext); + auto resolveMode = *rpsCmdGetArg(pContext); + + RPS_ASSERT(!pDstResource->desc.IsBuffer()); + RPS_ASSERT(!pSrcResource->desc.IsBuffer()); + + // The builtin resolve command only supports avg for vulkan, as that is usually used for vkCmdResolveImage. + if (resolveMode != RPS_RESOLVE_MODE_AVERAGE) + { + RPS_V_REPORT_AND_RETURN(pContext, RPS_ERROR_NOT_IMPLEMENTED); + } + + if (pSrcResource->desc.GetSampleCount() < pDstResource->desc.GetSampleCount()) + { + RPS_V_REPORT_AND_RETURN(pContext, RPS_ERROR_INVALID_OPERATION); + } + + uint32_t texMipDim[3] = { + GetMipLevelDimension(pSrcResource->desc.image.width, pSrcView->subresourceRange.baseMipLevel), + GetMipLevelDimension(pSrcResource->desc.image.height, pSrcView->subresourceRange.baseMipLevel), + GetMipLevelDimension(pSrcResource->desc.GetImageDepth(), pSrcView->subresourceRange.baseMipLevel), + }; + + extent.width = (extent.width != UINT32_MAX) ? extent.width : (texMipDim[0] - srcOffset.x); + extent.height = (extent.height != UINT32_MAX) ? extent.height : (texMipDim[1] - srcOffset.y); + + RpsFormat srcFmt = (pSrcView->base.viewFormat != RPS_FORMAT_UNKNOWN) ? pSrcView->base.viewFormat + : pSrcResource->desc.image.format; + RpsFormat dstFmt = (pDstView->base.viewFormat != RPS_FORMAT_UNKNOWN) ? pDstView->base.viewFormat + : pDstResource->desc.image.format; + + uint32_t srcAspectMask = GetFormatAspectMask(srcFmt, pSrcResource->desc.GetFormat()); + uint32_t dstAspectMask = GetFormatAspectMask(dstFmt, pDstResource->desc.GetFormat()); + + uint32_t mipLevelCount = rpsMin(pSrcView->subresourceRange.mipLevels, pDstView->subresourceRange.mipLevels); + uint32_t arrayLayerCount = + rpsMin(pSrcView->subresourceRange.arrayLayers, pDstView->subresourceRange.arrayLayers); + +#define RPS_VK_MAX_IMAGE_RESOLVE_INFO (32) + VkImageResolve resolveInfo[RPS_VK_MAX_IMAGE_RESOLVE_INFO]; + RPS_ASSERT(mipLevelCount < RPS_VK_MAX_IMAGE_RESOLVE_INFO); +#undef RPS_VK_MAX_IMAGE_RESOLVE_INFO + + for (uint32_t iMip = 0; iMip < mipLevelCount; iMip++) + { + const uint32_t srcMip = pSrcView->subresourceRange.baseMipLevel + iMip; + const uint32_t dstMip = pDstView->subresourceRange.baseMipLevel + iMip; + + resolveInfo[iMip].srcSubresource.aspectMask = srcAspectMask; + resolveInfo[iMip].srcSubresource.baseArrayLayer = pSrcView->subresourceRange.baseArrayLayer; + resolveInfo[iMip].srcSubresource.layerCount = pSrcView->subresourceRange.arrayLayers; + resolveInfo[iMip].srcSubresource.mipLevel = srcMip; + resolveInfo[iMip].srcOffset.x = srcOffset.x >> iMip; + resolveInfo[iMip].srcOffset.y = srcOffset.y >> iMip; + resolveInfo[iMip].srcOffset.z = 0; + resolveInfo[iMip].dstSubresource.aspectMask = dstAspectMask; + resolveInfo[iMip].dstSubresource.baseArrayLayer = pDstView->subresourceRange.baseArrayLayer; + resolveInfo[iMip].dstSubresource.layerCount = pDstView->subresourceRange.arrayLayers; + resolveInfo[iMip].dstSubresource.mipLevel = dstMip; + resolveInfo[iMip].dstOffset.x = dstOffset.x >> iMip; + resolveInfo[iMip].dstOffset.y = dstOffset.y >> iMip; + resolveInfo[iMip].dstOffset.z = 0; + resolveInfo[iMip].extent.width = rpsMax(1u, extent.width >> iMip); + resolveInfo[iMip].extent.height = rpsMax(1u, extent.height >> iMip); + resolveInfo[iMip].extent.depth = 1; + } + + const VkImage hDstResource = rpsVKImageFromHandle(pDstResource->hRuntimeResource); + const VkImage hSrcResource = rpsVKImageFromHandle(pSrcResource->hRuntimeResource); + + vkCmdResolveImage(hCmdBuf, + hSrcResource, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + hDstResource, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + mipLevelCount, + resolveInfo); + } +} // namespace rps diff --git a/src/runtime/vk/rps_vk_formats.cpp b/src/runtime/vk/rps_vk_formats.cpp new file mode 100644 index 0000000..ee96339 --- /dev/null +++ b/src/runtime/vk/rps_vk_formats.cpp @@ -0,0 +1,293 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "rps/runtime/common/rps_format.h" +#include "rps/runtime/vk/rps_vk_runtime.h" + +#include "core/rps_util.hpp" + + +VkFormat rpsFormatToVK(RpsFormat rpsFmt) +{ + static const VkFormat formatMap[] = { + VK_FORMAT_UNDEFINED, // RPS_FORMAT_UNKNOWN, ///< The format is unknown. + VK_FORMAT_R32G32B32A32_UINT, // RPS_FORMAT_R32G32B32A32_TYPELESS, ///< A 4-channel, RGBA format where each channel is a typeless 32bit value. + VK_FORMAT_R32G32B32A32_SFLOAT, // RPS_FORMAT_R32G32B32A32_FLOAT, ///< A 4-channel, RGBA format where each channel is a 32bit IEEE 754 floating point value. + VK_FORMAT_R32G32B32A32_UINT, // RPS_FORMAT_R32G32B32A32_UINT, ///< A 4-channel, RGBA format where each channel is a 32bit unsigned integer. + VK_FORMAT_R32G32B32A32_SINT, // RPS_FORMAT_R32G32B32A32_SINT, ///< A 4-channel, RGBA format where each channel is a 32bit signed integer. + VK_FORMAT_R32G32B32_UINT, // RPS_FORMAT_R32G32B32_TYPELESS, ///< A 3-channel, RGB format where each channel is a typeless 32bit value. + VK_FORMAT_R32G32B32_SFLOAT, // RPS_FORMAT_R32G32B32_FLOAT, ///< A 3-channel, RGB format where each channel is a 32bit IEEE 754 floating point value. + VK_FORMAT_R32G32B32_UINT, // RPS_FORMAT_R32G32B32_UINT, ///< A 3-channel, RGB format where each channel is a 32bit unsigned integer. + VK_FORMAT_R32G32B32_SINT, // RPS_FORMAT_R32G32B32_SINT, ///< A 3-channel, RGB format where each channel is a 32bit signed integer. + VK_FORMAT_R16G16B16A16_UINT, // RPS_FORMAT_R16G16B16A16_TYPELESS, ///< A 4-channel, RGBA format where each channel is a typeless 16bit value. + VK_FORMAT_R16G16B16A16_SFLOAT, // RPS_FORMAT_R16G16B16A16_FLOAT, ///< A 4-channel, RGBA format where each channel is a 16bit floating point value. + VK_FORMAT_R16G16B16A16_UNORM, // RPS_FORMAT_R16G16B16A16_UNORM, ///< A 4-channel, RGBA format where each channel is a 16bit normalized, unsigned integer. + VK_FORMAT_R16G16B16A16_UINT, // RPS_FORMAT_R16G16B16A16_UINT, ///< A 4-channel, RGBA format where each channel is a 16bit unsigned integer. + VK_FORMAT_R16G16B16A16_SNORM, // RPS_FORMAT_R16G16B16A16_SNORM, ///< A 4-channel, RGBA format where each channel is a 16bit normalized, signed integer. + VK_FORMAT_R16G16B16A16_SINT, // RPS_FORMAT_R16G16B16A16_SINT, ///< A 4-channel, RGBA format where each channel is a 16bit signed integer. + VK_FORMAT_R32G32_UINT, // RPS_FORMAT_R32G32_TYPELESS, ///< A 2-channel, RG format where each channel is a typeless 32bit value. + VK_FORMAT_R32G32_SFLOAT, // RPS_FORMAT_R32G32_FLOAT, ///< A 2-channel, RG format where each channel is a 32bit IEEE 754 floating point value. + VK_FORMAT_R32G32_UINT, // RPS_FORMAT_R32G32_UINT, ///< A 2-channel, RG format where each channel is a 32bit unsigned integer. + VK_FORMAT_R32G32_SINT, // RPS_FORMAT_R32G32_SINT, ///< A 2-channel, RG format where each channel is a 32bit signed integer. + VK_FORMAT_D32_SFLOAT_S8_UINT, // RPS_FORMAT_R32G8X24_TYPELESS, ///< A 2-channel, RG format where the first channel is a typeless 32bit value, and the second channel is a 8bit channel. + VK_FORMAT_D32_SFLOAT_S8_UINT, // RPS_FORMAT_D32_FLOAT_S8X24_UINT, ///< A 1-channel, depth format where the channel contains a signed 8bit value and 24 unused. + VK_FORMAT_D32_SFLOAT_S8_UINT, // RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS, ///< + VK_FORMAT_D32_SFLOAT_S8_UINT, // RPS_FORMAT_X32_TYPELESS_G8X24_UINT, ///< + VK_FORMAT_A2R10G10B10_UINT_PACK32, // RPS_FORMAT_R10G10B10A2_TYPELESS, ///< A 4-channel, RGBA format where the RGB channels are typeless 10bit values, and the A channel is a typeless 2bit channel. + VK_FORMAT_A2R10G10B10_UNORM_PACK32, // RPS_FORMAT_R10G10B10A2_UNORM, ///< A 4-channel, RGBA format where the RGB channels are 10bit normalized, unsigned integer values, and the A channel is a 2bit channel. + VK_FORMAT_A2R10G10B10_UINT_PACK32, // RPS_FORMAT_R10G10B10A2_UINT, ///< A 4-channel, RGBA format where the RGB channels are 10bit unsigned integer values, and the A channel is a 2bit channel. + VK_FORMAT_B10G11R11_UFLOAT_PACK32, // RPS_FORMAT_R11G11B10_FLOAT, ///< A 3-channel, RGB format where the RG channels are 11bit floating point values, and the B channel is a 10bit floating point value. + VK_FORMAT_R8G8B8A8_UINT, // RPS_FORMAT_R8G8B8A8_TYPELESS, ///< A 4-channel, RGBA format where each channel is typeless 8bit value. + VK_FORMAT_R8G8B8A8_UNORM, // RPS_FORMAT_R8G8B8A8_UNORM, ///< A 4-channel, RGBA format where each channel is 8bit normalized, unsigned integer value. + VK_FORMAT_A8B8G8R8_SRGB_PACK32, // RPS_FORMAT_R8G8B8A8_UNORM_SRGB, ///< A 4-channel, RGBA format where each channel is 8bit normalized, unsigned integer SRGB value. + VK_FORMAT_R8G8B8A8_UINT, // RPS_FORMAT_R8G8B8A8_UINT, ///< A 4-channel, RGBA format where each channel is 8bit unsigned integer value. + VK_FORMAT_R8G8B8A8_SNORM, // RPS_FORMAT_R8G8B8A8_SNORM, ///< A 4-channel, RGBA format where each channel is 8bit normalized, signed integer value. + VK_FORMAT_R8G8B8A8_SINT, // RPS_FORMAT_R8G8B8A8_SINT, ///< A 4-channel, RGBA format where each channel is 8bit signed integer value. + VK_FORMAT_R16G16_UINT, // RPS_FORMAT_R16G16_TYPELESS, ///< A 2-channel, RG format where each channel is typeless 16bit value. + VK_FORMAT_R16G16_SFLOAT, // RPS_FORMAT_R16G16_FLOAT, ///< A 2-channel, RG format where each channel is 16bit IEEE 754 floating point value. + VK_FORMAT_R16G16_UNORM, // RPS_FORMAT_R16G16_UNORM, ///< A 2-channel, RG format where each channel is 16bit normalized, unsigned integer value. + VK_FORMAT_R16G16_UINT, // RPS_FORMAT_R16G16_UINT, ///< A 2-channel, RG format where each channel is 16bit unsigned integer value. + VK_FORMAT_R16G16_SNORM, // RPS_FORMAT_R16G16_SNORM, ///< A 2-channel, RG format where each channel is 16bit normalized, signed integer value. + VK_FORMAT_R16G16_SINT, // RPS_FORMAT_R16G16_SINT, ///< A 2-channel, RG format where each channel is 16bit signed integer value. + VK_FORMAT_R32_UINT, // RPS_FORMAT_R32_TYPELESS, ///< A single channel, R format where the channel is a typeless 32bit value. + VK_FORMAT_D32_SFLOAT, // RPS_FORMAT_D32_FLOAT, ///< A single channel, R format where the channel is a 32bit depth value. + VK_FORMAT_R32_SFLOAT, // RPS_FORMAT_R32_FLOAT, ///< A single channel, R format where the channel is a 32bit IEEE 754 floating point value. + VK_FORMAT_R32_UINT, // RPS_FORMAT_R32_UINT, ///< A single channel, R format where the channel is a 32bit unsigned integer value. + VK_FORMAT_R32_SINT, // RPS_FORMAT_R32_SINT, ///< A single channel, R format where the channel is a 32bit signed integer value. + VK_FORMAT_D24_UNORM_S8_UINT, // RPS_FORMAT_R24G8_TYPELESS, ///< + VK_FORMAT_D24_UNORM_S8_UINT, // RPS_FORMAT_D24_UNORM_S8_UINT, ///< + VK_FORMAT_D24_UNORM_S8_UINT, // RPS_FORMAT_R24_UNORM_X8_TYPELESS, ///< + VK_FORMAT_D24_UNORM_S8_UINT, // RPS_FORMAT_X24_TYPELESS_G8_UINT, ///< + VK_FORMAT_R8G8_UINT, // RPS_FORMAT_R8G8_TYPELESS, ///< A 2-channel, RG format where each channel is typeless 8bit value. + VK_FORMAT_R8G8_UNORM, // RPS_FORMAT_R8G8_UNORM, ///< A 2-channel, RG format where each channel is 8bit normalized, unsigned integer value. + VK_FORMAT_R8G8_UINT, // RPS_FORMAT_R8G8_UINT, ///< A 2-channel, RG format where each channel is 8bit unsigned integer value. + VK_FORMAT_R8G8_SNORM, // RPS_FORMAT_R8G8_SNORM, ///< A 2-channel, RG format where each channel is 8bit normalized, signed integer value. + VK_FORMAT_R8G8_SINT, // RPS_FORMAT_R8G8_SINT, ///< A 2-channel, RG format where each channel is 8bit signed integer value. + VK_FORMAT_R16_UINT, // RPS_FORMAT_R16_TYPELESS, ///< A single channel, R format where the channel is a typeless 16bit value. + VK_FORMAT_R16_SFLOAT, // RPS_FORMAT_R16_FLOAT, ///< A single channel, R format where the channel is a 16bit depth value. + VK_FORMAT_D16_UNORM, // RPS_FORMAT_D16_UNORM, ///< A single channel, R format where the channel is a 16bit IEEE 754 floating point value. + VK_FORMAT_R16_UNORM, // RPS_FORMAT_R16_UNORM, ///< A single channel, R format where the channel is a 16bit unsigned integer value. + VK_FORMAT_R16_UINT, // RPS_FORMAT_R16_UINT, ///< A single channel, R format where the channel is a 16bit signed integer value. + VK_FORMAT_R16_SNORM, // RPS_FORMAT_R16_SNORM, ///< A single channel, R format where the channel is a 16bit normalized, signed integer value. + VK_FORMAT_R16_SINT, // RPS_FORMAT_R16_SINT, ///< A single channel, R format where the channel is a 16bit signed integer value. + VK_FORMAT_R8_UINT, // RPS_FORMAT_R8_TYPELESS, ///< A single channel, R format where the channel is a typeless 8bit value. + VK_FORMAT_R8_UNORM, // RPS_FORMAT_R8_UNORM, ///< A single channel, R format where the channel is a 8bit unsigned integer value. + VK_FORMAT_R8_UINT, // RPS_FORMAT_R8_UINT, ///< A single channel, R format where the channel is a 8bit signed integer value. + VK_FORMAT_R8_SNORM, // RPS_FORMAT_R8_SNORM, ///< A single channel, R format where the channel is a 8bit normalized, signed integer value. + VK_FORMAT_R8_SINT, // RPS_FORMAT_R8_SINT, ///< A single channel, R format where the channel is a 8bit signed integer value. + VK_FORMAT_R8_UNORM, // RPS_FORMAT_A8_UNORM, ///< A single channel, A format where the channel is a 8bit unsigned integer value. + VK_FORMAT_UNDEFINED, // RPS_FORMAT_R1_UNORM, ///< A single channel, R format where the channel is a 1bit unsigned integer value. + VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, // RPS_FORMAT_R9G9B9E5_SHAREDEXP, ///< + VK_FORMAT_B8G8R8G8_422_UNORM_KHR, // RPS_FORMAT_R8G8_B8G8_UNORM, ///< + VK_FORMAT_G8B8G8R8_422_UNORM_KHR, // RPS_FORMAT_G8R8_G8B8_UNORM, ///< + VK_FORMAT_BC1_RGBA_UNORM_BLOCK, // RPS_FORMAT_BC1_TYPELESS, ///< + VK_FORMAT_BC1_RGBA_UNORM_BLOCK, // RPS_FORMAT_BC1_UNORM, ///< + VK_FORMAT_BC1_RGBA_SRGB_BLOCK, // RPS_FORMAT_BC1_UNORM_SRGB, ///< + VK_FORMAT_BC2_UNORM_BLOCK, // RPS_FORMAT_BC2_TYPELESS, ///< + VK_FORMAT_BC2_UNORM_BLOCK, // RPS_FORMAT_BC2_UNORM, ///< + VK_FORMAT_BC2_SRGB_BLOCK, // RPS_FORMAT_BC2_UNORM_SRGB, ///< + VK_FORMAT_BC3_UNORM_BLOCK, // RPS_FORMAT_BC3_TYPELESS, ///< + VK_FORMAT_BC3_UNORM_BLOCK, // RPS_FORMAT_BC3_UNORM, ///< + VK_FORMAT_BC3_SRGB_BLOCK, // RPS_FORMAT_BC3_UNORM_SRGB, ///< + VK_FORMAT_BC4_UNORM_BLOCK, // RPS_FORMAT_BC4_TYPELESS, ///< + VK_FORMAT_BC4_UNORM_BLOCK, // RPS_FORMAT_BC4_UNORM, ///< + VK_FORMAT_BC4_SNORM_BLOCK, // RPS_FORMAT_BC4_SNORM, ///< + VK_FORMAT_BC5_UNORM_BLOCK, // RPS_FORMAT_BC5_TYPELESS, ///< + VK_FORMAT_BC5_UNORM_BLOCK, // RPS_FORMAT_BC5_UNORM, ///< + VK_FORMAT_BC5_SNORM_BLOCK, // RPS_FORMAT_BC5_SNORM, ///< + VK_FORMAT_R5G6B5_UNORM_PACK16, // RPS_FORMAT_B5G6R5_UNORM, ///< + VK_FORMAT_A1R5G5B5_UNORM_PACK16, // RPS_FORMAT_B5G5R5A1_UNORM, ///< + VK_FORMAT_B8G8R8A8_UNORM, // RPS_FORMAT_B8G8R8A8_UNORM, ///< + VK_FORMAT_B8G8R8A8_UNORM, // RPS_FORMAT_B8G8R8X8_UNORM, ///< + VK_FORMAT_UNDEFINED, // RPS_FORMAT_R10G10B10_XR_BIAS_A2_UNORM, ///< + VK_FORMAT_B8G8R8A8_UINT, // RPS_FORMAT_B8G8R8A8_TYPELESS, ///< + VK_FORMAT_B8G8R8A8_SRGB, // RPS_FORMAT_B8G8R8A8_UNORM_SRGB, ///< + VK_FORMAT_B8G8R8A8_UNORM, // RPS_FORMAT_B8G8R8X8_TYPELESS, ///< + VK_FORMAT_B8G8R8A8_SRGB, // RPS_FORMAT_B8G8R8X8_UNORM_SRGB, ///< + VK_FORMAT_BC6H_UFLOAT_BLOCK, // RPS_FORMAT_BC6H_TYPELESS, ///< + VK_FORMAT_BC6H_UFLOAT_BLOCK, // RPS_FORMAT_BC6H_UF16, ///< + VK_FORMAT_BC6H_SFLOAT_BLOCK, // RPS_FORMAT_BC6H_SF16, ///< + VK_FORMAT_BC7_UNORM_BLOCK, // RPS_FORMAT_BC7_TYPELESS, ///< + VK_FORMAT_BC7_UNORM_BLOCK, // RPS_FORMAT_BC7_UNORM, ///< + VK_FORMAT_BC7_SRGB_BLOCK, // RPS_FORMAT_BC7_UNORM_SRGB, ///< + VK_FORMAT_UNDEFINED, // RPS_FORMAT_AYUV, ///< + VK_FORMAT_UNDEFINED, // RPS_FORMAT_Y410, ///< + VK_FORMAT_UNDEFINED, // RPS_FORMAT_Y416, ///< + VK_FORMAT_UNDEFINED, // RPS_FORMAT_NV12, ///< + VK_FORMAT_UNDEFINED, // RPS_FORMAT_P010, ///< + VK_FORMAT_UNDEFINED, // RPS_FORMAT_P016, ///< + VK_FORMAT_UNDEFINED, // RPS_FORMAT_420_OPAQUE, ///< + VK_FORMAT_UNDEFINED, // RPS_FORMAT_YUY2, ///< + VK_FORMAT_UNDEFINED, // RPS_FORMAT_Y210, ///< + VK_FORMAT_UNDEFINED, // RPS_FORMAT_Y216, ///< + VK_FORMAT_UNDEFINED, // RPS_FORMAT_NV11, ///< + VK_FORMAT_UNDEFINED, // RPS_FORMAT_AI44, ///< + VK_FORMAT_UNDEFINED, // RPS_FORMAT_IA44, ///< + VK_FORMAT_UNDEFINED, // RPS_FORMAT_P8, ///< + VK_FORMAT_UNDEFINED, // RPS_FORMAT_A8P8, ///< + VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT, // RPS_FORMAT_B4G4R4A4_UNORM, ///< + }; + + static_assert(RPS_COUNTOF(formatMap) == RPS_FORMAT_COUNT, "RpsFormat to VkFormat map needs update"); + + return (uint32_t)rpsFmt < RPS_COUNTOF(formatMap) ? formatMap[(uint32_t)rpsFmt] : VK_FORMAT_UNDEFINED; +} + +RpsFormat rpsFormatFromVK(VkFormat vkFormat) +{ + static const RpsFormat formatMap[] = { + RPS_FORMAT_UNKNOWN, // VK_FORMAT_UNDEFINED + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R4G4_UNORM_PACK8 + RPS_FORMAT_B4G4R4A4_UNORM, // VK_FORMAT_R4G4B4A4_UNORM_PACK16 + RPS_FORMAT_B4G4R4A4_UNORM, // VK_FORMAT_B4G4R4A4_UNORM_PACK16 + RPS_FORMAT_B5G6R5_UNORM, // VK_FORMAT_R5G6B5_UNORM_PACK16 + RPS_FORMAT_B5G6R5_UNORM, // VK_FORMAT_B5G6R5_UNORM_PACK16 + RPS_FORMAT_B5G5R5A1_UNORM, // VK_FORMAT_R5G5B5A1_UNORM_PACK16 + RPS_FORMAT_B5G5R5A1_UNORM, // VK_FORMAT_B5G5R5A1_UNORM_PACK16 + RPS_FORMAT_B5G5R5A1_UNORM, // VK_FORMAT_A1R5G5B5_UNORM_PACK16 + RPS_FORMAT_R8_UNORM, // VK_FORMAT_R8_UNORM + RPS_FORMAT_R8_SNORM, // VK_FORMAT_R8_SNORM + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R8_USCALED + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R8_SSCALED + RPS_FORMAT_R8_UINT, // VK_FORMAT_R8_UINT + RPS_FORMAT_R8_SINT, // VK_FORMAT_R8_SINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R8_SRGB + RPS_FORMAT_R8G8_UNORM, // VK_FORMAT_R8G8_UNORM + RPS_FORMAT_R8G8_SNORM, // VK_FORMAT_R8G8_SNORM + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R8G8_USCALED + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R8G8_SSCALED + RPS_FORMAT_R8G8_UINT, // VK_FORMAT_R8G8_UINT + RPS_FORMAT_R8G8_SINT, // VK_FORMAT_R8G8_SINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R8G8_SRGB + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R8G8B8_UNORM + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R8G8B8_SNORM + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R8G8B8_USCALED + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R8G8B8_SSCALED + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R8G8B8_UINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R8G8B8_SINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R8G8B8_SRGB + RPS_FORMAT_UNKNOWN, // VK_FORMAT_B8G8R8_UNORM + RPS_FORMAT_UNKNOWN, // VK_FORMAT_B8G8R8_SNORM + RPS_FORMAT_UNKNOWN, // VK_FORMAT_B8G8R8_USCALED + RPS_FORMAT_UNKNOWN, // VK_FORMAT_B8G8R8_SSCALED + RPS_FORMAT_UNKNOWN, // VK_FORMAT_B8G8R8_UINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_B8G8R8_SINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_B8G8R8_SRGB + RPS_FORMAT_R8G8B8A8_UNORM, // VK_FORMAT_R8G8B8A8_UNORM + RPS_FORMAT_R8G8B8A8_SNORM, // VK_FORMAT_R8G8B8A8_SNORM + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R8G8B8A8_USCALED + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R8G8B8A8_SSCALED + RPS_FORMAT_R8G8B8A8_UINT, // VK_FORMAT_R8G8B8A8_UINT + RPS_FORMAT_R8G8B8A8_SINT, // VK_FORMAT_R8G8B8A8_SINT + RPS_FORMAT_R8G8B8A8_UNORM_SRGB, // VK_FORMAT_R8G8B8A8_SRGB + RPS_FORMAT_B8G8R8A8_UNORM, // VK_FORMAT_B8G8R8A8_UNORM + RPS_FORMAT_UNKNOWN, // VK_FORMAT_B8G8R8A8_SNORM + RPS_FORMAT_UNKNOWN, // VK_FORMAT_B8G8R8A8_USCALED + RPS_FORMAT_UNKNOWN, // VK_FORMAT_B8G8R8A8_SSCALED + RPS_FORMAT_UNKNOWN, // VK_FORMAT_B8G8R8A8_UINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_B8G8R8A8_SINT + RPS_FORMAT_B8G8R8A8_UNORM_SRGB, // VK_FORMAT_B8G8R8A8_SRGB + RPS_FORMAT_R8G8B8A8_UNORM, // VK_FORMAT_A8B8G8R8_UNORM_PACK32 + RPS_FORMAT_R8G8B8A8_SNORM, // VK_FORMAT_A8B8G8R8_SNORM_PACK32 + RPS_FORMAT_UNKNOWN, // VK_FORMAT_A8B8G8R8_USCALED_PACK32 + RPS_FORMAT_UNKNOWN, // VK_FORMAT_A8B8G8R8_SSCALED_PACK32 + RPS_FORMAT_R8G8B8A8_UINT, // VK_FORMAT_A8B8G8R8_UINT_PACK32 + RPS_FORMAT_R8G8B8A8_SINT, // VK_FORMAT_A8B8G8R8_SINT_PACK32 + RPS_FORMAT_R8G8B8A8_UNORM_SRGB, // VK_FORMAT_A8B8G8R8_SRGB_PACK32 + RPS_FORMAT_R10G10B10A2_UNORM, // VK_FORMAT_A2R10G10B10_UNORM_PACK32 + RPS_FORMAT_UNKNOWN, // VK_FORMAT_A2R10G10B10_SNORM_PACK32 + RPS_FORMAT_UNKNOWN, // VK_FORMAT_A2R10G10B10_USCALED_PACK32 + RPS_FORMAT_UNKNOWN, // VK_FORMAT_A2R10G10B10_SSCALED_PACK32 + RPS_FORMAT_R10G10B10A2_UINT, // VK_FORMAT_A2R10G10B10_UINT_PACK32 + RPS_FORMAT_UNKNOWN, // VK_FORMAT_A2R10G10B10_SINT_PACK32 + RPS_FORMAT_R10G10B10A2_UNORM, // VK_FORMAT_A2B10G10R10_UNORM_PACK32 + RPS_FORMAT_UNKNOWN, // VK_FORMAT_A2B10G10R10_SNORM_PACK32 + RPS_FORMAT_UNKNOWN, // VK_FORMAT_A2B10G10R10_USCALED_PACK32 + RPS_FORMAT_UNKNOWN, // VK_FORMAT_A2B10G10R10_SSCALED_PACK32 + RPS_FORMAT_R10G10B10A2_UINT, // VK_FORMAT_A2B10G10R10_UINT_PACK32 + RPS_FORMAT_UNKNOWN, // VK_FORMAT_A2B10G10R10_SINT_PACK32 + RPS_FORMAT_R16_UNORM, // VK_FORMAT_R16_UNORM + RPS_FORMAT_R16_SNORM, // VK_FORMAT_R16_SNORM + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R16_USCALED + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R16_SSCALED + RPS_FORMAT_R16_UINT, // VK_FORMAT_R16_UINT + RPS_FORMAT_R16_SINT, // VK_FORMAT_R16_SINT + RPS_FORMAT_R16_FLOAT, // VK_FORMAT_R16_SFLOAT + RPS_FORMAT_R16G16_UNORM, // VK_FORMAT_R16G16_UNORM + RPS_FORMAT_R16G16_SNORM, // VK_FORMAT_R16G16_SNORM + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R16G16_USCALED + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R16G16_SSCALED + RPS_FORMAT_R16G16_UINT, // VK_FORMAT_R16G16_UINT + RPS_FORMAT_R16G16_SINT, // VK_FORMAT_R16G16_SINT + RPS_FORMAT_R16G16_FLOAT, // VK_FORMAT_R16G16_SFLOAT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R16G16B16_UNORM + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R16G16B16_SNORM + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R16G16B16_USCALED + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R16G16B16_SSCALED + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R16G16B16_UINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R16G16B16_SINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R16G16B16_SFLOAT + RPS_FORMAT_R16G16B16A16_UNORM, // VK_FORMAT_R16G16B16A16_UNORM + RPS_FORMAT_R16G16B16A16_SNORM, // VK_FORMAT_R16G16B16A16_SNORM + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R16G16B16A16_USCALED + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R16G16B16A16_SSCALED + RPS_FORMAT_R16G16B16A16_UINT, // VK_FORMAT_R16G16B16A16_UINT + RPS_FORMAT_R16G16B16A16_SINT, // VK_FORMAT_R16G16B16A16_SINT + RPS_FORMAT_R16G16B16A16_FLOAT, // VK_FORMAT_R16G16B16A16_SFLOAT + RPS_FORMAT_R32_UINT, // VK_FORMAT_R32_UINT + RPS_FORMAT_R32_SINT, // VK_FORMAT_R32_SINT + RPS_FORMAT_R32_FLOAT, // VK_FORMAT_R32_SFLOAT + RPS_FORMAT_R32G32_UINT, // VK_FORMAT_R32G32_UINT + RPS_FORMAT_R32G32_SINT, // VK_FORMAT_R32G32_SINT + RPS_FORMAT_R32G32_FLOAT, // VK_FORMAT_R32G32_SFLOAT + RPS_FORMAT_R32G32B32_UINT, // VK_FORMAT_R32G32B32_UINT + RPS_FORMAT_R32G32B32_SINT, // VK_FORMAT_R32G32B32_SINT + RPS_FORMAT_R32G32B32_FLOAT, // VK_FORMAT_R32G32B32_SFLOAT + RPS_FORMAT_R32G32B32A32_UINT, // VK_FORMAT_R32G32B32A32_UINT + RPS_FORMAT_R32G32B32A32_SINT, // VK_FORMAT_R32G32B32A32_SINT + RPS_FORMAT_R32G32B32A32_FLOAT, // VK_FORMAT_R32G32B32A32_SFLOAT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R64_UINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R64_SINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R64_SFLOAT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R64G64_UINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R64G64_SINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R64G64_SFLOAT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R64G64B64_UINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R64G64B64_SINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R64G64B64_SFLOAT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R64G64B64A64_UINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R64G64B64A64_SINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_R64G64B64A64_SFLOAT + RPS_FORMAT_R11G11B10_FLOAT, // VK_FORMAT_B10G11R11_UFLOAT_PACK32 + RPS_FORMAT_R9G9B9E5_SHAREDEXP, // VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 + RPS_FORMAT_D16_UNORM, // VK_FORMAT_D16_UNORM + RPS_FORMAT_D24_UNORM_S8_UINT, // VK_FORMAT_X8_D24_UNORM_PACK32 + RPS_FORMAT_D32_FLOAT, // VK_FORMAT_D32_SFLOAT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_S8_UINT + RPS_FORMAT_UNKNOWN, // VK_FORMAT_D16_UNORM_S8_UINT + RPS_FORMAT_D24_UNORM_S8_UINT, // VK_FORMAT_D24_UNORM_S8_UINT + RPS_FORMAT_D32_FLOAT_S8X24_UINT, // VK_FORMAT_D32_SFLOAT_S8_UINT + RPS_FORMAT_BC1_UNORM, // VK_FORMAT_BC1_RGB_UNORM_BLOCK + RPS_FORMAT_BC1_UNORM_SRGB, // VK_FORMAT_BC1_RGB_SRGB_BLOCK + RPS_FORMAT_BC1_UNORM, // VK_FORMAT_BC1_RGBA_UNORM_BLOCK + RPS_FORMAT_BC1_UNORM_SRGB, // VK_FORMAT_BC1_RGBA_SRGB_BLOCK + RPS_FORMAT_BC2_UNORM, // VK_FORMAT_BC2_UNORM_BLOCK + RPS_FORMAT_BC2_UNORM_SRGB, // VK_FORMAT_BC2_SRGB_BLOCK + RPS_FORMAT_BC3_UNORM, // VK_FORMAT_BC3_UNORM_BLOCK + RPS_FORMAT_BC3_UNORM_SRGB, // VK_FORMAT_BC3_SRGB_BLOCK + RPS_FORMAT_BC4_UNORM, // VK_FORMAT_BC4_UNORM_BLOCK + RPS_FORMAT_BC4_SNORM, // VK_FORMAT_BC4_SNORM_BLOCK + RPS_FORMAT_BC5_UNORM, // VK_FORMAT_BC5_UNORM_BLOCK + RPS_FORMAT_BC5_SNORM, // VK_FORMAT_BC5_SNORM_BLOCK + RPS_FORMAT_BC6H_UF16, // VK_FORMAT_BC6H_UFLOAT_BLOCK + RPS_FORMAT_BC6H_SF16, // VK_FORMAT_BC6H_SFLOAT_BLOCK + RPS_FORMAT_BC7_UNORM, // VK_FORMAT_BC7_UNORM_BLOCK + RPS_FORMAT_BC7_UNORM_SRGB, // VK_FORMAT_BC7_SRGB_BLOCK + }; + + return (uint32_t)vkFormat < RPS_COUNTOF(formatMap) ? formatMap[(uint32_t)vkFormat] : RPS_FORMAT_UNKNOWN; +} \ No newline at end of file diff --git a/src/runtime/vk/rps_vk_runtime_backend.cpp b/src/runtime/vk/rps_vk_runtime_backend.cpp new file mode 100644 index 0000000..a624577 --- /dev/null +++ b/src/runtime/vk/rps_vk_runtime_backend.cpp @@ -0,0 +1,1832 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "rps/runtime/common/rps_render_states.h" +#include "rps/runtime/vk/rps_vk_runtime.h" + +#include "runtime/common/rps_render_graph.hpp" +#include "runtime/common/rps_runtime_util.hpp" + +#include "runtime/vk/rps_vk_runtime_backend.hpp" +#include "runtime/vk/rps_vk_util.hpp" +#include "runtime/vk/rps_vk_runtime_device.hpp" + +namespace rps +{ + template + VkImageLayout GetVkImageLayout(const RpsAccessAttr& access) + { + RPS_ASSERT(access.accessFlags != RPS_ACCESS_UNKNOWN); + + if (access.accessFlags == RPS_ACCESS_PRESENT_BIT) + return bSrcLayout ? VK_IMAGE_LAYOUT_UNDEFINED : VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_RENDER_TARGET_BIT)) + return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + if (rpsAllBitsSet(access.accessFlags, RPS_ACCESS_DEPTH_WRITE_BIT | RPS_ACCESS_STENCIL_READ_BIT)) + return VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL; + else if (rpsAllBitsSet(access.accessFlags, RPS_ACCESS_DEPTH_READ_BIT | RPS_ACCESS_STENCIL_WRITE_BIT)) + return VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL; + else if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_DEPTH_WRITE_BIT | RPS_ACCESS_STENCIL_WRITE_BIT)) + return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_UNORDERED_ACCESS_BIT)) + return VK_IMAGE_LAYOUT_GENERAL; + + if (rpsAnyBitsSet(access.accessFlags, + RPS_ACCESS_CLEAR_BIT | RPS_ACCESS_RESOLVE_DEST_BIT | RPS_ACCESS_COPY_DEST_BIT)) + return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_DEPTH_STENCIL_READ)) + return VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; + else if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_SHADER_RESOURCE_BIT)) + return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + else if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_RESOLVE_SRC_BIT | RPS_ACCESS_COPY_SRC_BIT)) + return VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + + return VK_IMAGE_LAYOUT_UNDEFINED; + } + + struct VKAccessInfo + { + VkPipelineStageFlags stages; + VkAccessFlags access; + VkImageLayout imgLayout; + uint32_t queueFamilyIndex; + + VKAccessInfo() = default; + + VKAccessInfo(VkPipelineStageFlags inStages, + VkAccessFlags inAccess, + VkImageLayout inImgLayout, + uint32_t inQueueFamilyIndex) + : stages(inStages) + , access(inAccess) + , imgLayout(inImgLayout) + , queueFamilyIndex(inQueueFamilyIndex) + { + } + }; + + struct VKAccessInfo2 + { + VkPipelineStageFlags2 stages; + VkAccessFlags2 access; + VkImageLayout imgLayout; + uint32_t queueFamilyIndex; + }; + + static inline VkPipelineStageFlags GetVkPipelineStagesForShaderStages(RpsShaderStageFlags stages) + { + static constexpr struct + { + VkPipelineStageFlags vkFlags; + RpsShaderStageBits rpsFlags; + } stageMap[] = { + {VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, RPS_SHADER_STAGE_VS}, + {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, RPS_SHADER_STAGE_PS}, + {VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT, RPS_SHADER_STAGE_GS}, + {VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, RPS_SHADER_STAGE_CS}, + {VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT, RPS_SHADER_STAGE_HS}, + {VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, RPS_SHADER_STAGE_DS}, + {VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, RPS_SHADER_STAGE_RAYTRACING}, + {VK_PIPELINE_STAGE_TASK_SHADER_BIT_NV, RPS_SHADER_STAGE_AS}, + {VK_PIPELINE_STAGE_MESH_SHADER_BIT_NV, RPS_SHADER_STAGE_MS}, + }; + + VkPipelineStageFlags vkFlags = VK_PIPELINE_STAGE_NONE; + + for (auto iter = std::begin(stageMap); iter != std::end(stageMap); ++iter) + { + if (stages & iter->rpsFlags) + { + vkFlags |= iter->vkFlags; + } + } + + return vkFlags; + } + + // bRenderPass: Indicate if the access is used in part of a RenderPass. + // Currently mainly to distinguish between RenderPass clears (Attachment access) + // from Cmd Clears (Transfer access). + // bIsSrc: Indicates if the access is associated with the source access / stage of a barrier. + template + VKAccessInfo GetVKAccessInfo(const RpsAccessAttr& access) + { + const uint32_t queueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // TODO + + // TODO: + if (access.accessFlags == RPS_ACCESS_UNKNOWN) + { + return VKAccessInfo{bIsSrc ? VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + VK_IMAGE_LAYOUT_UNDEFINED, + queueFamilyIndex}; + } + + const bool isWriteOnly = rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_DISCARD_OLD_DATA_BIT); + const bool isRenderPass = bRenderPass || (access.accessFlags & RPS_ACCESS_RENDER_PASS); + + if (!isRenderPass && rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_CLEAR_BIT)) + return VKAccessInfo{VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + queueFamilyIndex}; + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_RENDER_TARGET_BIT)) + return VKAccessInfo( + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | (isWriteOnly ? 0 : VK_ACCESS_COLOR_ATTACHMENT_READ_BIT), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + queueFamilyIndex); + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_DEPTH_STENCIL_WRITE)) + { + VkImageLayout layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + if (rpsAllBitsSet(access.accessFlags, RPS_ACCESS_DEPTH_WRITE_BIT | RPS_ACCESS_STENCIL_READ_BIT)) + layout = VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL; + else if (rpsAllBitsSet(access.accessFlags, RPS_ACCESS_DEPTH_READ_BIT | RPS_ACCESS_STENCIL_WRITE_BIT)) + layout = VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL; + + return VKAccessInfo( + bIsSrc ? (VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT) + : VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + (isWriteOnly ? 0 : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT), + layout, + queueFamilyIndex); + } + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_UNORDERED_ACCESS_BIT)) + { + const auto shaderStages = GetVkPipelineStagesForShaderStages(access.accessStages); + return VKAccessInfo{shaderStages, + VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + queueFamilyIndex}; + } + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_RESOLVE_DEST_BIT)) + return VKAccessInfo{ + VkPipelineStageFlags(isRenderPass ? VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT + : VK_PIPELINE_STAGE_TRANSFER_BIT), + VkAccessFlags(isRenderPass ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT : VK_ACCESS_TRANSFER_WRITE_BIT), + isRenderPass ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + queueFamilyIndex}; + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_COPY_DEST_BIT)) + return VKAccessInfo{VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + queueFamilyIndex}; + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_STREAM_OUT_BIT)) + return VKAccessInfo{VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, + VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT, + VK_IMAGE_LAYOUT_UNDEFINED, + queueFamilyIndex}; + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_RAYTRACING_AS_BUILD_BIT)) + return VKAccessInfo{VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, + VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, + VK_IMAGE_LAYOUT_UNDEFINED, + queueFamilyIndex}; + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_CPU_WRITE_BIT)) + return VKAccessInfo{ + VK_PIPELINE_STAGE_HOST_BIT, VK_ACCESS_HOST_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, queueFamilyIndex}; + + // TODO: RPS_ACCESS_PREDICATION_BIT => VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT + + // clang-format off + static constexpr struct + { + RpsAccessFlags rpsFlags; + VkPipelineStageFlags stages; + VkAccessFlags access; + VkImageLayout imgLayout; + } readAccessMap[] = { + {RPS_ACCESS_INDIRECT_ARGS_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_IMAGE_LAYOUT_UNDEFINED}, + {RPS_ACCESS_INDEX_BUFFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_INDEX_READ_BIT, VK_IMAGE_LAYOUT_UNDEFINED}, + {RPS_ACCESS_VERTEX_BUFFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, VK_IMAGE_LAYOUT_UNDEFINED}, + {RPS_ACCESS_CONSTANT_BUFFER_BIT, 0, VK_ACCESS_UNIFORM_READ_BIT, VK_IMAGE_LAYOUT_UNDEFINED}, + {RPS_ACCESS_DEPTH_STENCIL_READ, bIsSrc ? VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT : VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT, VK_IMAGE_LAYOUT_UNDEFINED}, + // | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; // TODO: Adding DS Write bit since previous access might be RenderPass StoreOpStore. Which "uses the access type VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT" + {RPS_ACCESS_SHADER_RESOURCE_BIT, 0, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}, + {RPS_ACCESS_COPY_SRC_BIT | RPS_ACCESS_RESOLVE_SRC_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL}, + {RPS_ACCESS_SHADING_RATE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR, VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR, VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR}, + {RPS_ACCESS_RAYTRACING_AS_BUILD_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, VK_IMAGE_LAYOUT_UNDEFINED}, + {RPS_ACCESS_PRESENT_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_ACCESS_NONE, bIsSrc ? VK_IMAGE_LAYOUT_UNDEFINED : VK_IMAGE_LAYOUT_PRESENT_SRC_KHR}, + {RPS_ACCESS_CPU_READ_BIT, VK_PIPELINE_STAGE_HOST_BIT, VK_ACCESS_HOST_READ_BIT, VK_IMAGE_LAYOUT_UNDEFINED}, + }; + // clang-format on + + VKAccessInfo result = {}; + result.queueFamilyIndex = queueFamilyIndex; + + for (auto mapEntry = std::begin(readAccessMap); mapEntry != std::end(readAccessMap); ++mapEntry) + { + if (rpsAnyBitsSet(access.accessFlags, mapEntry->rpsFlags)) + { + result.stages |= mapEntry->stages; + result.access |= mapEntry->access; + + RPS_ASSERT((result.imgLayout == VK_IMAGE_LAYOUT_UNDEFINED) && "Unexpected image layout."); + result.imgLayout = mapEntry->imgLayout; + } + } + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_CONSTANT_BUFFER_BIT | RPS_ACCESS_SHADER_RESOURCE_BIT)) + { + result.stages |= GetVkPipelineStagesForShaderStages(access.accessStages); + } + + if (rpsAnyBitsSet(access.accessFlags, RPS_ACCESS_DEPTH_STENCIL_READ)) + { + result.imgLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; // TODO + } + + if (result.stages == 0) + { + result.stages = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + } + + return result; + } + + VKRuntimeBackend::~VKRuntimeBackend() + { + } + + void VKRuntimeBackend::OnDestroy() + { + for (auto& frameResource : m_frameResources) + { + frameResource.DestroyDeviceResources(m_device.GetVkDevice()); + } + + m_frameResources.clear(); + + RuntimeBackend::OnDestroy(); + } + + RpsResult VKRuntimeBackend::UpdateFrame(const RenderGraphUpdateContext& context) + { + m_currentResourceFrame = + m_frameResources.empty() ? 0 : (m_currentResourceFrame + 1) % uint32_t(m_frameResources.size()); + + if (m_frameResources.size() <= GetNumQueuedFrames(context)) + { + RPS_RETURN_ERROR_IF(m_frameResources.size() > RPS_MAX_QUEUED_FRAMES, RPS_ERROR_INVALID_OPERATION); + + RPS_CHECK_ALLOC(m_frameResources.insert(m_currentResourceFrame, FrameResources{})); + m_frameResources[m_currentResourceFrame].Reset(m_persistentPool); + } + else + { + // TODO - Recycle + m_frameResources[m_currentResourceFrame].DestroyDeviceResources(m_device.GetVkDevice()); + + std::swap(m_pendingReleaseImages, m_frameResources[m_currentResourceFrame].pendingImages); + std::swap(m_pendingReleaseBuffers, m_frameResources[m_currentResourceFrame].pendingBuffers); + } + + m_imageBarriers.reset(&context.frameArena); + m_bufferBarriers.reset(&context.frameArena); + m_memoryBarriers.reset(&context.frameArena); + m_runtimeCmds.reset(&context.frameArena); + m_barrierBatches.reset(&context.frameArena); + m_accessToDescriptorMap.reset(&context.frameArena); + m_imageViewLayouts.reset(&context.frameArena); + + return RPS_OK; + } + + RpsResult VKRuntimeBackend::CreateHeaps(const RenderGraphUpdateContext& context, ArrayRef heaps) + { + auto hVkDevice = m_device.GetVkDevice(); + + for (auto& heapInfo : heaps) + { + // TODO: + heapInfo.size = (heapInfo.size == UINT64_MAX) ? heapInfo.maxUsedSize : heapInfo.size; + + if (heapInfo.hRuntimeHeap || !heapInfo.size) + continue; + + VkMemoryAllocateInfo memAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; + memAllocInfo.memoryTypeIndex = heapInfo.memTypeIndex; + memAllocInfo.allocationSize = heapInfo.size; + + VkDeviceMemory hMemory; + RPS_V_RETURN(VkResultToRps(vkAllocateMemory(hVkDevice, &memAllocInfo, nullptr, &hMemory))); + + heapInfo.hRuntimeHeap = {hMemory}; + } + + return RPS_OK; + } + + void VKRuntimeBackend::DestroyHeaps(ArrayRef heaps) + { + auto hVkDevice = m_device.GetVkDevice(); + + for (auto& heapInfo : heaps) + { + if (heapInfo.hRuntimeHeap) + { + VkDeviceMemory hMemory = rpsVKMemoryFromHandle(heapInfo.hRuntimeHeap); + heapInfo.hRuntimeHeap = {}; + + vkFreeMemory(hVkDevice, hMemory, nullptr); + } + } + } + + RpsResult VKRuntimeBackend::CreateResources(const RenderGraphUpdateContext& context, + ArrayRef resInstances) + { + // Bind Resource Memory + auto& heaps = GetRenderGraph().GetHeapInfos(); + auto hVkDevice = m_device.GetVkDevice(); + + const auto& deviceCreateInfo = m_device.GetCreateInfo(); + auto resourceDecls = GetRenderGraph().GetBuilder().GetResourceDecls(); + + const auto pfnSetDebugNameCb = deviceCreateInfo.callbacks.pfnSetDebugName; + const bool bEnableDebugNames = + !!(context.pUpdateInfo->diagnosticFlags & RPS_DIAGNOSTIC_ENABLE_RUNTIME_DEBUG_NAMES) && + (pfnSetDebugNameCb != nullptr); + + char nameBuf[RPS_NAME_MAX_LEN]; + + for (auto& resInfo : resInstances) + { + if (resInfo.isPendingCreate) + { + if (resInfo.hRuntimeResource) + { + if (bEnableDebugNames) + { + resourceDecls[resInfo.resourceDeclId].name.ToCStr(nameBuf, RPS_COUNTOF(nameBuf)); + + RpsRuntimeOpSetDebugNameArgs setNameArgs = {}; + setNameArgs.hResource = resInfo.hRuntimeResource; + setNameArgs.resourceType = resInfo.desc.type; + setNameArgs.name = nameBuf; + + pfnSetDebugNameCb(deviceCreateInfo.pUserContext, &setNameArgs); + } + + if (resInfo.allocPlacement.heapId != RPS_INDEX_NONE_U32) + { + auto pMemory = rpsVKMemoryFromHandle(heaps[resInfo.allocPlacement.heapId].hRuntimeHeap); + if (resInfo.desc.IsImage()) + { + RPS_V_RETURN(VkResultToRps(vkBindImageMemory(hVkDevice, + rpsVKImageFromHandle(resInfo.hRuntimeResource), + pMemory, + resInfo.allocPlacement.offset))); + } + else + { + RPS_V_RETURN( + VkResultToRps(vkBindBufferMemory(hVkDevice, + rpsVKBufferFromHandle(resInfo.hRuntimeResource), + pMemory, + resInfo.allocPlacement.offset))); + } + resInfo.isPendingInit = true; + } + } + + resInfo.isPendingCreate = false; + } + else if (!resInfo.isExternal) + { + resInfo.isPendingInit = resInfo.isAliased; + } + } + + return RPS_OK; + } + + void VKRuntimeBackend::DestroyResources(ArrayRef resInstances) + { + auto hVkDevice = m_device.GetVkDevice(); + + for (auto& resInfo : resInstances) + { + if (resInfo.hRuntimeResource && !resInfo.isExternal) + { + if (resInfo.desc.IsImage()) + { + VkImage hImage = rpsVKImageFromHandle(resInfo.hRuntimeResource); + vkDestroyImage(hVkDevice, hImage, nullptr); + } + else + { + VkBuffer hBuffer = rpsVKBufferFromHandle(resInfo.hRuntimeResource); + vkDestroyBuffer(hVkDevice, hBuffer, nullptr); + } + } + } + } + + RpsResult VKRuntimeBackend::CreateCommandResources(const RenderGraphUpdateContext& context) + { + auto& renderGraph = context.renderGraph; + + const auto& graph = renderGraph.GetGraph(); + const auto& cmdAccesses = renderGraph.GetCmdAccessInfos(); + auto& runtimeCmds = renderGraph.GetRuntimeCmdInfos(); + auto& aliasingInfos = renderGraph.GetResourceAliasingInfos(); + auto& resInstances = renderGraph.GetResourceInstances(); + auto cmdBatches = renderGraph.GetCmdBatches().range_all(); + + ArenaCheckPoint arenaCheckpoint{context.scratchArena}; + + ArenaVector renderPassCmdIndices(&context.scratchArena); + ArenaVector bufViews(&context.scratchArena); + ArenaVector imgViews(&context.scratchArena); + renderPassCmdIndices.reserve(context.renderGraph.GetCmdInfos().size()); + bufViews.reserve(context.renderGraph.GetCmdAccessInfos().size()); + imgViews.reserve(context.renderGraph.GetCmdAccessInfos().size()); + + m_resourceLayoutOffsets.reset_keep_capacity(&context.scratchArena); + m_subResLayouts.reset_keep_capacity(&context.scratchArena); + + Span transitionRange = {}; + + for (uint32_t iBatch = 0; iBatch < cmdBatches.size(); iBatch++) + { + RpsCommandBatch& batchInfo = cmdBatches[iBatch]; + + const uint32_t backendCmdBegin = uint32_t(m_runtimeCmds.size()); + + for (uint32_t iCmd = batchInfo.cmdBegin, numCmds = batchInfo.cmdBegin + batchInfo.numCmds; + iCmd < numCmds; + iCmd++) + { + const auto& runtimeCmd = runtimeCmds[iCmd]; + + if (runtimeCmd.isTransition) + { + if (transitionRange.GetEnd() != iCmd) + { + transitionRange.SetRange(iCmd, 0); + } + transitionRange.SetCount(transitionRange.size() + 1); + } + else + { + ProcessBarrierBatch(context, transitionRange); + + auto pNewRuntimeCmd = m_runtimeCmds.grow(1); + + pNewRuntimeCmd->cmdId = runtimeCmd.cmdId; + + const auto* pCmdInfo = context.renderGraph.GetCmdInfo(runtimeCmd.cmdId); + const auto& nodeDeclInfo = *pCmdInfo->pNodeDecl; + + if (nodeDeclInfo.pRenderPassInfo && !nodeDeclInfo.pRenderPassInfo->clearOnly) + { + pNewRuntimeCmd->renderPassId = uint32_t(renderPassCmdIndices.size()); // TODO + pNewRuntimeCmd->frameBufferId = uint32_t(renderPassCmdIndices.size()); + renderPassCmdIndices.push_back(uint32_t(m_runtimeCmds.size() - 1)); + } + + static constexpr RpsAccessFlags AccessMaskMayNeedCreateView = + RPS_ACCESS_CONSTANT_BUFFER_BIT | RPS_ACCESS_UNORDERED_ACCESS_BIT | + RPS_ACCESS_SHADER_RESOURCE_BIT | RPS_ACCESS_RENDER_TARGET_BIT | RPS_ACCESS_DEPTH_STENCIL | + RPS_ACCESS_RESOLVE_DEST_BIT; + + const uint32_t accessOffset = pCmdInfo->accesses.GetBegin(); + + for (uint32_t accessIdx = 0, accessCount = pCmdInfo->accesses.size(); accessIdx < accessCount; + accessIdx++) + { + const uint32_t globalAccessIdx = accessOffset + accessIdx; + + auto& access = cmdAccesses[globalAccessIdx]; + const auto& resInfo = resInstances[access.resourceId]; + + if (!rpsAnyBitsSet(access.access.accessFlags, RPS_ACCESS_NO_VIEW_BIT) && + rpsAnyBitsSet(access.access.accessFlags, AccessMaskMayNeedCreateView)) + { + if (resInfo.desc.IsBuffer() && access.pViewInfo && + (access.pViewInfo->viewFormat != RPS_FORMAT_UNKNOWN)) + { + bufViews.push_back(globalAccessIdx); + } + else if (resInfo.desc.IsImage()) + { + imgViews.push_back(globalAccessIdx); + } + } + } + } + } + + ProcessBarrierBatch(context, transitionRange); + + batchInfo.cmdBegin = backendCmdBegin; + batchInfo.numCmds = uint32_t(m_runtimeCmds.size()) - backendCmdBegin; + } + + // Create Views / Per-Cmd objects + + m_accessToDescriptorMap.resize(cmdAccesses.size(), RPS_INDEX_NONE_U32); + + RPS_V_RETURN(CreateBufferViews(context, bufViews.range_all())); + RPS_V_RETURN(CreateImageViews(context, imgViews.range_all())); + RPS_V_RETURN(CreateRenderPasses(context, renderPassCmdIndices.range_all())); + + return RPS_OK; + } + + RpsResult VKRuntimeBackend::RecordCommands(const RenderGraph& renderGraph, + const RpsRenderGraphRecordCommandInfo& recordInfo) const + { + RuntimeCmdCallbackContext cmdCbCtx{this, recordInfo}; + + for (auto cmdIter = m_runtimeCmds.cbegin() + recordInfo.cmdBeginIndex, cmdEnd = cmdIter + recordInfo.numCmds; + cmdIter != cmdEnd; + ++cmdIter) + { + auto& runtimeCmd = *cmdIter; + + if (runtimeCmd.barrierBatchId != RPS_INDEX_NONE_U32) + { + RecordBarrierBatch(GetContextVkCmdBuf(cmdCbCtx), runtimeCmd.barrierBatchId); + } + + RPS_V_RETURN(RecordCommand(cmdCbCtx, runtimeCmd)); + } + + return RPS_OK; + } + + void VKRuntimeBackend::DestroyRuntimeResourceDeferred(ResourceInstance& resource) + { + if (resource.hRuntimeResource) + { + if (resource.desc.IsImage()) + { + m_pendingReleaseImages.push_back(rpsVKImageFromHandle(resource.hRuntimeResource)); + } + else + { + m_pendingReleaseBuffers.push_back(rpsVKBufferFromHandle(resource.hRuntimeResource)); + } + resource.hRuntimeResource = {}; + } + } + + static inline void FlipViewport(VkViewport& vp) + { + vp.y = vp.y + vp.height; + vp.height = -vp.height; + } + + RpsResult VKRuntimeBackend::RecordCmdRenderPassBegin(const RuntimeCmdCallbackContext& context) const + { + auto& renderGraph = *context.pRenderGraph; + auto& cmd = *context.pCmd; + auto* pCmdInfo = context.pCmdInfo; + auto& nodeDeclInfo = *pCmdInfo->pNodeDecl; + auto hVkCmdBuf = GetContextVkCmdBuf(context); + auto& runtimeCmd = *context.GetRuntimeCmd(); + + RPS_RETURN_ERROR_IF(!nodeDeclInfo.MaybeGraphicsNode(), RPS_ERROR_INVALID_OPERATION); + + RPS_RETURN_OK_IF(!nodeDeclInfo.pRenderPassInfo || nodeDeclInfo.pRenderPassInfo->clearOnly); + + const auto cmdCbFlags = context.bIsCmdBeginEnd ? cmd.callback.flags : RPS_CMD_CALLBACK_FLAG_NONE; + + const bool bIsCmdMultiThreading = rpsAnyBitsSet(cmdCbFlags, RPS_CMD_CALLBACK_MULTI_THREADED_BIT); + + const bool bToExecSecondaryCmdBuf = + bIsCmdMultiThreading || + rpsAnyBitsSet(context.renderPassFlags, RPS_RUNTIME_RENDER_PASS_EXECUTE_SECONDARY_COMMAND_BUFFERS); + + const bool bIsSecondaryCmdBuffer = + rpsAnyBitsSet(context.renderPassFlags, RPS_RUNTIME_RENDER_PASS_SECONDARY_COMMAND_BUFFER); + + RPS_CHECK_ARGS(!(bToExecSecondaryCmdBuf && bIsSecondaryCmdBuffer)); + + // TODO: Simplify conditions & share with EndRP. + // + // Skip vkCmdBeginRenderPass if: + // - Is called on secondary cmd buffer, in which case we may only setup Viewports / Scissor states. + // - User indicated the cmd callback will do custom RP. + // - RP info missing. + const bool bBeginVKRenderPass = !bIsSecondaryCmdBuffer && + !rpsAnyBitsSet(cmdCbFlags, RPS_CMD_CALLBACK_CUSTOM_RENDER_TARGETS_BIT) && + (runtimeCmd.renderPassId != RPS_INDEX_NONE_U32); + + auto& cmdRpInfo = *pCmdInfo->pRenderPassInfo; + + // Begin RenderPass + if (bBeginVKRenderPass) + { + auto& defaultRenderArea = cmdRpInfo.viewportInfo.defaultRenderArea; + + VkRenderPassBeginInfo rpBegin = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO}; + + auto& currResources = m_frameResources[m_currentResourceFrame]; + + rpBegin.renderPass = currResources.renderPasses[runtimeCmd.renderPassId]; + rpBegin.framebuffer = currResources.frameBuffers[runtimeCmd.frameBufferId]; + rpBegin.renderArea = VkRect2D{{defaultRenderArea.x, defaultRenderArea.y}, + {uint32_t(defaultRenderArea.width), uint32_t(defaultRenderArea.height)}}; + rpBegin.clearValueCount = uint32_t(runtimeCmd.clearValues.size()); + rpBegin.pClearValues = runtimeCmd.clearValues.data(); + + const VkSubpassContents subpassContent = + bToExecSecondaryCmdBuf ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS : VK_SUBPASS_CONTENTS_INLINE; + + vkCmdBeginRenderPass(hVkCmdBuf, &rpBegin, subpassContent); + } + + // Setup Viewport / Scissor states + + const bool bSetViewportScissors = + !bToExecSecondaryCmdBuf && !rpsAnyBitsSet(cmdCbFlags, RPS_CMD_CALLBACK_CUSTOM_VIEWPORT_BIT); + + if (bSetViewportScissors) + { + static_assert(sizeof(VkViewport) == sizeof(RpsViewport), "Invalid assumption about VkViewport layout"); + static_assert(sizeof(VkRect2D) == sizeof(RpsRect), "Invalid assumption about VkRect2D layout"); + + const VkViewport* pViewports = reinterpret_cast(cmdRpInfo.viewportInfo.pViewports); + const VkRect2D* pScissorRects = reinterpret_cast(cmdRpInfo.viewportInfo.pScissorRects); + + static constexpr uint32_t MAX_VIEWPORT_SCISSOR_COUNT = 32; // TODO + VkViewport viewports[MAX_VIEWPORT_SCISSOR_COUNT] = {}; + + const bool bFlipViewport = + !rpsAnyBitsSet(m_device.GetRuntimeFlags(), RPS_VK_RUNTIME_FLAG_DONT_FLIP_VIEWPORT); + + if (bFlipViewport) + { + RPS_RETURN_ERROR_IF(cmdRpInfo.viewportInfo.numViewports > MAX_VIEWPORT_SCISSOR_COUNT, + RPS_ERROR_NOT_SUPPORTED); + + for (uint32_t i = 0; i < cmdRpInfo.viewportInfo.numViewports; i++) + { + viewports[i] = pViewports[i]; + FlipViewport(viewports[0]); + } + + pViewports = viewports; + } + + vkCmdSetViewport(hVkCmdBuf, 0, cmdRpInfo.viewportInfo.numViewports, pViewports); + vkCmdSetScissor(hVkCmdBuf, 0, cmdRpInfo.viewportInfo.numScissorRects, pScissorRects); + } + + return RPS_OK; + } + + RpsResult VKRuntimeBackend::RecordCmdRenderPassEnd(const RuntimeCmdCallbackContext& context) const + { + auto& renderGraph = *context.pRenderGraph; + auto& runtimeCmd = *context.GetRuntimeCmd(); + auto& cmd = *context.pCmd; + auto& nodeDeclInfo = *context.pCmdInfo->pNodeDecl; + + RPS_RETURN_ERROR_IF(!nodeDeclInfo.MaybeGraphicsNode(), RPS_ERROR_INVALID_OPERATION); + + RPS_RETURN_OK_IF(!nodeDeclInfo.pRenderPassInfo || nodeDeclInfo.pRenderPassInfo->clearOnly); + + const auto cmdCbFlags = context.bIsCmdBeginEnd ? cmd.callback.flags : RPS_CMD_CALLBACK_FLAG_NONE; + + const bool bIsSecondaryCmdBuffer = + rpsAnyBitsSet(context.renderPassFlags, RPS_RUNTIME_RENDER_PASS_SECONDARY_COMMAND_BUFFER); + + const bool bEndVKRenderPass = !bIsSecondaryCmdBuffer && + !rpsAnyBitsSet(cmdCbFlags, RPS_CMD_CALLBACK_CUSTOM_RENDER_TARGETS_BIT) && + (runtimeCmd.renderPassId != RPS_INDEX_NONE_U32); + + if (bEndVKRenderPass) + { + vkCmdEndRenderPass(GetContextVkCmdBuf(context)); + } + + return RPS_OK; + } + + RpsResult VKRuntimeBackend::RecordCmdFixedFunctionBindingsAndDynamicStates( + const RuntimeCmdCallbackContext& context) const + { + auto& renderGraph = *context.pRenderGraph; + auto& nodeDeclInfo = *context.pCmdInfo->pNodeDecl; + + auto fixedFuncBindings = nodeDeclInfo.fixedFunctionBindings.Get(nodeDeclInfo.semanticKinds); + auto dynamicStates = nodeDeclInfo.dynamicStates.Get(nodeDeclInfo.semanticKinds); + + for (auto& binding : fixedFuncBindings) + { + auto paramIndices = binding.params.Get(nodeDeclInfo.semanticParamTable); + + switch (binding.semantic) + { + case RPS_SEMANTIC_VERTEX_BUFFER: + break; + case RPS_SEMANTIC_INDEX_BUFFER: + break; + case RPS_SEMANTIC_INDIRECT_ARGS: + break; + case RPS_SEMANTIC_INDIRECT_COUNT: + break; + case RPS_SEMANTIC_STREAM_OUT_BUFFER: + break; + case RPS_SEMANTIC_SHADING_RATE_IMAGE: + break; + case RPS_SEMANTIC_RENDER_TARGET: + case RPS_SEMANTIC_DEPTH_STENCIL_TARGET: + case RPS_SEMANTIC_RESOLVE_TARGET: + default: + break; + } + } + + for (auto& dynamicState : dynamicStates) + { + switch (dynamicState.semantic) + { + case RPS_SEMANTIC_VIEWPORT: + break; + case RPS_SEMANTIC_SCISSOR: + break; + case RPS_SEMANTIC_PRIMITIVE_TOPOLOGY: + break; + case RPS_SEMANTIC_PATCH_CONTROL_POINTS: + break; + case RPS_SEMANTIC_PRIMITIVE_STRIP_CUT_INDEX: + break; + case RPS_SEMANTIC_BLEND_FACTOR: + break; + case RPS_SEMANTIC_STENCIL_REF: + break; + case RPS_SEMANTIC_DEPTH_BOUNDS: + break; + case RPS_SEMANTIC_SAMPLE_LOCATION: + break; + case RPS_SEMANTIC_SHADING_RATE: + break; + case RPS_SEMANTIC_COLOR_CLEAR_VALUE: + + case RPS_SEMANTIC_DEPTH_CLEAR_VALUE: + + case RPS_SEMANTIC_STENCIL_CLEAR_VALUE: + + default: + break; + } + } + + return RPS_OK; + } + + static inline VkImageViewType GetImageViewType(const ResourceInstance& resInfo, + const CmdAccessInfo& accessInfo, + const RpsImageView& view) + { + const bool isArray = view.subresourceRange.arrayLayers > 1; + const bool isCubemap = rpsAnyBitsSet(view.base.flags, RPS_RESOURCE_VIEW_FLAG_CUBEMAP_BIT); + + if (resInfo.desc.type == RPS_RESOURCE_TYPE_IMAGE_2D) + { + if (isCubemap) + { + RPS_ASSERT(isArray); + + return (view.subresourceRange.arrayLayers > 6) ? VK_IMAGE_VIEW_TYPE_CUBE_ARRAY + : VK_IMAGE_VIEW_TYPE_CUBE; + } + return isArray ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; + } + else if (resInfo.desc.type == RPS_RESOURCE_TYPE_IMAGE_3D) + { + if (rpsAnyBitsSet(accessInfo.access.accessFlags, RPS_ACCESS_RENDER_TARGET_BIT)) + { + return VK_IMAGE_VIEW_TYPE_2D_ARRAY; + } + + return VK_IMAGE_VIEW_TYPE_3D; + } + else + { + return isArray ? VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D; + } + } + + RpsResult CreateImageView(VkDevice hDevice, + VkImage hImage, + const ResourceInstance& resInfo, + const CmdAccessInfo& accessInfo, + VkImageView& dstImgView) + { + const RpsFormat viewFormat = rpsVkGetImageViewFormat(accessInfo.viewFormat, resInfo); + const RpsImageView* pImgViewInfo = reinterpret_cast(accessInfo.pViewInfo); + + VkImageViewCreateInfo vkCreateInfo; + vkCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + vkCreateInfo.pNext = NULL; + vkCreateInfo.flags = 0; + vkCreateInfo.image = hImage; + vkCreateInfo.viewType = GetImageViewType(resInfo, accessInfo, *pImgViewInfo); + vkCreateInfo.format = rpsFormatToVK(viewFormat); + + GetVkComponentMapping(vkCreateInfo.components, pImgViewInfo->componentMapping); + GetVkSubresourceRange(vkCreateInfo.subresourceRange, accessInfo.range); + + return VkResultToRps(vkCreateImageView(hDevice, &vkCreateInfo, nullptr, &dstImgView)); + } + + RpsResult CreateBufferView(VkDevice hDevice, + VkBuffer hBuffer, + const ResourceInstance& resInfo, + const CmdAccessInfo& accessInfo, + VkBufferView& dstBufView) + { + RPS_ASSERT(accessInfo.viewFormat != RPS_FORMAT_UNKNOWN); + + const RpsBufferView* pBufViewInfo = reinterpret_cast(accessInfo.pViewInfo); + + VkBufferViewCreateInfo vkCreateInfo; + vkCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; + vkCreateInfo.pNext = NULL; + vkCreateInfo.flags = 0; + vkCreateInfo.buffer = hBuffer; + vkCreateInfo.format = rpsFormatToVK(accessInfo.viewFormat); + vkCreateInfo.offset = pBufViewInfo->offset; + vkCreateInfo.range = + (pBufViewInfo->sizeInBytes == RPS_BUFFER_WHOLE_SIZE) ? VK_WHOLE_SIZE : pBufViewInfo->sizeInBytes; + + return VkResultToRps(vkCreateBufferView(hDevice, &vkCreateInfo, nullptr, &dstBufView)); + } + + RpsResult VKRuntimeBackend::CreateImageViews(const RenderGraphUpdateContext& context, + ConstArrayRef accessIndices) + { + RPS_RETURN_OK_IF(accessIndices.empty()); + + auto hVkDevice = m_device.GetVkDevice(); + auto& cmdAccesses = context.renderGraph.GetCmdAccessInfos(); + auto resourceInstances = context.renderGraph.GetResourceInstances().range_all(); + auto& currResources = m_frameResources[m_currentResourceFrame]; + + m_imageViewLayouts.resize(accessIndices.size()); + + RPS_CHECK_ALLOC(currResources.imageViews.resize(accessIndices.size())); + + uint32_t imgViewIndex = 0; + VkImage hImage = VK_NULL_HANDLE; + + for (auto accessIndex : accessIndices) + { + auto& access = cmdAccesses[accessIndex]; + + const auto& resource = resourceInstances[access.resourceId]; + FromHandle(hImage, resource.hRuntimeResource); + + m_imageViewLayouts[imgViewIndex] = GetTrackedImageLayoutInfo(resource, access); + + VkImageView& hImgView = currResources.imageViews[imgViewIndex]; + RPS_V_RETURN(CreateImageView(hVkDevice, hImage, resource, access, hImgView)); + + m_accessToDescriptorMap[accessIndex] = imgViewIndex; + + imgViewIndex++; + } + + return RPS_OK; + } + + RpsResult VKRuntimeBackend::CreateBufferViews(const RenderGraphUpdateContext& context, + ConstArrayRef accessIndices) + { + RPS_RETURN_OK_IF(accessIndices.empty()); + + auto hVkDevice = m_device.GetVkDevice(); + auto& cmdAccesses = context.renderGraph.GetCmdAccessInfos(); + auto resourceInstances = context.renderGraph.GetResourceInstances().range_all(); + auto& currResources = m_frameResources[m_currentResourceFrame]; + + RPS_CHECK_ALLOC(currResources.bufferViews.resize(accessIndices.size())); + + uint32_t bufViewIndex = 0; + VkBuffer hBuffer = VK_NULL_HANDLE; + + for (auto accessIndex : accessIndices) + { + auto& access = cmdAccesses[accessIndex]; + + const auto& resource = resourceInstances[access.resourceId]; + FromHandle(hBuffer, resource.hRuntimeResource); + + VkBufferView& hBufView = currResources.bufferViews[bufViewIndex]; + RPS_V_RETURN(CreateBufferView(hVkDevice, hBuffer, resource, access, hBufView)); + + m_accessToDescriptorMap[accessIndex] = bufViewIndex; + + bufViewIndex++; + } + + return RPS_OK; + } + + VkAttachmentLoadOp GetVkLoadOp(const CmdAccessInfo& access, const NodeDeclRenderPassInfo& rpInfo) + { + // For depth stencil, need additional clear flags from rpInfo in case we want to clear only depth or stencil aspect. + const bool bIsDepthStencil = !!(access.access.accessFlags & RPS_ACCESS_DEPTH_STENCIL); + const bool bShouldClearDepthStencil = (rpInfo.clearDepth && (access.access.accessFlags & RPS_ACCESS_DEPTH)) || + (rpInfo.clearStencil && (access.access.accessFlags & RPS_ACCESS_STENCIL)); + + if (bShouldClearDepthStencil || (!bIsDepthStencil && (access.access.accessFlags & RPS_ACCESS_CLEAR_BIT))) + return VK_ATTACHMENT_LOAD_OP_CLEAR; + else if (access.access.accessFlags & RPS_ACCESS_DISCARD_OLD_DATA_BIT) + return VK_ATTACHMENT_LOAD_OP_DONT_CARE; + else //if ((pAccess->accessFlagsPrev & (RPS_RESOURCE_ACCESS_NO_FLAGS | RPS_RESOURCE_ACCESS_PRESENT_BIT)) == 0) + return VK_ATTACHMENT_LOAD_OP_LOAD; + // else + // return VK_ATTACHMENT_LOAD_OP_DONT_CARE; + } + + VkAttachmentStoreOp GetVkStoreOp(const CmdAccessInfo& access) + { + return VK_ATTACHMENT_STORE_OP_STORE; + // TODO: Mark final accesses to for DONT_CARE op. + } + + VkAttachmentLoadOp GetVkStencilLoadOp(const CmdAccessInfo& access, const NodeDeclRenderPassInfo& rpInfo) + { + // TODO: Separate depth/stencil clear control + if (access.access.accessFlags & (RPS_ACCESS_STENCIL_WRITE_BIT | RPS_ACCESS_STENCIL_READ_BIT)) + return GetVkLoadOp(access, rpInfo); + else + return VK_ATTACHMENT_LOAD_OP_DONT_CARE; + } + + VkAttachmentStoreOp GetVkStencilStoreOp(const CmdAccessInfo& access) + { + if (access.access.accessFlags & (RPS_ACCESS_STENCIL_WRITE_BIT | RPS_ACCESS_STENCIL_READ_BIT)) + return GetVkStoreOp(access); + else + return VK_ATTACHMENT_STORE_OP_DONT_CARE; + } + + template + static inline void GetVkAttachmentDescription(VkAttachmentDescription* pOut, + const CmdAccessInfo& access, + const ResourceInstance& resourceInfo, + const NodeDeclRenderPassInfo& rpInfo) + { + RPS_ASSERT(resourceInfo.desc.IsImage()); + + const RpsFormat viewFormat = rpsVkGetImageViewFormat(access.viewFormat, resourceInfo); + + const auto& initialAccess = access.access; + // bUseRenderPassBarriers ? pAccess->accessFlagsPrev : pAccess->accessFlagsCurrent; + const auto& finalAccess = access.access; + // bUseRenderPassBarriers ? pAccess->accessFlagsNext : pAccess->accessFlagsCurrent; + + pOut->flags = 0; + pOut->format = rpsFormatToVK(viewFormat); + pOut->samples = rpsVkGetSampleCount(resourceInfo.desc.image.sampleCount); + pOut->loadOp = GetVkLoadOp(access, rpInfo); + pOut->storeOp = GetVkStoreOp(access); + pOut->stencilLoadOp = GetVkStencilLoadOp(access, rpInfo); + pOut->stencilStoreOp = GetVkStencilStoreOp(access); + pOut->initialLayout = GetVkImageLayout(initialAccess); + pOut->finalLayout = GetVkImageLayout(finalAccess); + } + + RpsResult VKRuntimeBackend::CreateRenderPasses(const RenderGraphUpdateContext& context, + ConstArrayRef cmdIndices) + { + RPS_RETURN_OK_IF(cmdIndices.empty()); + + auto hVkDevice = m_device.GetVkDevice(); + const auto& resources = context.renderGraph.GetResourceInstances(); + const auto& runtimeCmds = context.renderGraph.GetRuntimeCmdInfos(); + const auto& cmdAccesses = context.renderGraph.GetCmdAccessInfos(); + + auto& currResources = m_frameResources[m_currentResourceFrame]; + + RPS_CHECK_ALLOC(currResources.renderPasses.resize(cmdIndices.size())); + RPS_CHECK_ALLOC(currResources.frameBuffers.resize(cmdIndices.size())); + + VkAttachmentDescription attchmtDescs[RPS_MAX_SIMULTANEOUS_RENDER_TARGET_COUNT * 2 + 1]; + VkAttachmentReference colorRefs[RPS_MAX_SIMULTANEOUS_RENDER_TARGET_COUNT]; + VkAttachmentReference resolveRefs[RPS_MAX_SIMULTANEOUS_RENDER_TARGET_COUNT]; + VkAttachmentReference depthRef; + + VkSubpassDescription subpassDesc = {}; + + VkRenderPassCreateInfo rpInfo = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO}; + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpassDesc; + + uint32_t rpIndex = 0; + for (auto cmdIndex : cmdIndices) + { + auto& runtimeCmd = m_runtimeCmds[cmdIndex]; + auto* pCmdInfo = context.renderGraph.GetCmdInfo(runtimeCmd.cmdId); + const auto& cmd = *pCmdInfo->pCmdDecl; + const auto& nodeDeclInfo = *pCmdInfo->pNodeDecl; + + RPS_ASSERT(nodeDeclInfo.pRenderPassInfo); + auto& nodeDeclRenderPassInfo = *nodeDeclInfo.pRenderPassInfo; + + RPS_ASSERT(runtimeCmd.renderPassId == rpIndex); // TODO + + bool bHasDsv = false; + uint32_t numRtvs = 0; + uint32_t numResolveRtvs = 0; + uint32_t attchmtCount = 0; + + VkImageView attchmtViews[RPS_MAX_SIMULTANEOUS_RENDER_TARGET_COUNT * 2 + 1]; + + auto fixedFuncBindings = nodeDeclInfo.fixedFunctionBindings.Get(nodeDeclInfo.semanticKinds); + + auto cmdDescriptorIndices = + m_accessToDescriptorMap.range(pCmdInfo->accesses.GetBegin(), pCmdInfo->accesses.size()); + auto cmdAccessInfos = pCmdInfo->accesses.Get(cmdAccesses); + + static constexpr VkAttachmentReference unusedAttchmt = {VK_ATTACHMENT_UNUSED, VK_IMAGE_LAYOUT_UNDEFINED}; + + uint32_t lastParamId = UINT32_MAX; + + for (auto& rtParamRef : nodeDeclRenderPassInfo.GetRenderTargetRefs()) + { + auto& paramAccessInfo = nodeDeclInfo.params[rtParamRef.paramId]; + auto descriptorIndices = + cmdDescriptorIndices.range(paramAccessInfo.accessOffset, paramAccessInfo.numElements); + + if (lastParamId != rtParamRef.paramId) + { + RPS_ASSERT(numRtvs <= paramAccessInfo.baseSemanticIndex); + std::fill(colorRefs + numRtvs, colorRefs + paramAccessInfo.baseSemanticIndex, unusedAttchmt); + std::fill(resolveRefs + numRtvs, resolveRefs + paramAccessInfo.baseSemanticIndex, unusedAttchmt); + lastParamId = rtParamRef.paramId; + numRtvs = paramAccessInfo.baseSemanticIndex + paramAccessInfo.numElements; + } + + const uint32_t imgViewIndex = descriptorIndices[rtParamRef.arrayOffset]; + attchmtViews[attchmtCount] = currResources.imageViews[imgViewIndex]; + + auto& accessInfo = cmdAccessInfos[paramAccessInfo.accessOffset + rtParamRef.arrayOffset]; + + GetVkAttachmentDescription( + &attchmtDescs[attchmtCount], accessInfo, resources[accessInfo.resourceId], nodeDeclRenderPassInfo); + + const uint32_t rtSlot = paramAccessInfo.baseSemanticIndex + rtParamRef.arrayOffset; + + auto& colorRef = colorRefs[rtSlot]; + colorRef.attachment = attchmtCount; + colorRef.layout = GetVkImageLayout(paramAccessInfo.access); + + auto& resolveRef = resolveRefs[rtSlot]; + resolveRef.attachment = VK_ATTACHMENT_UNUSED; + resolveRef.layout = VK_IMAGE_LAYOUT_UNDEFINED; + + attchmtCount++; + } + + if (nodeDeclRenderPassInfo.depthStencilTargetMask) + { + auto& dsvParamRef = *nodeDeclRenderPassInfo.GetDepthStencilRef(); + auto& paramAccessInfo = nodeDeclInfo.params[dsvParamRef.paramId]; + RPS_ASSERT(paramAccessInfo.numElements == 1); + + const uint32_t imgViewIndex = cmdDescriptorIndices[paramAccessInfo.accessOffset]; + + attchmtViews[attchmtCount] = currResources.imageViews[imgViewIndex]; + + auto& accessInfo = cmdAccessInfos[paramAccessInfo.accessOffset]; + + GetVkAttachmentDescription( + &attchmtDescs[attchmtCount], accessInfo, resources[accessInfo.resourceId], nodeDeclRenderPassInfo); + + depthRef.attachment = numRtvs; + depthRef.layout = GetVkImageLayout(accessInfo.access); + attchmtCount++; + + bHasDsv = true; + } + + for (auto resolveParamRef : nodeDeclRenderPassInfo.GetResolveTargetRefs()) + { + auto& paramAccessInfo = nodeDeclInfo.params[resolveParamRef.paramId]; + auto descriptorIndices = + cmdDescriptorIndices.range(paramAccessInfo.accessOffset, paramAccessInfo.numElements); + + const uint32_t imgViewIndex = descriptorIndices[resolveParamRef.arrayOffset]; + attchmtViews[attchmtCount] = currResources.imageViews[imgViewIndex]; + + auto& accessInfo = cmdAccessInfos[paramAccessInfo.accessOffset + resolveParamRef.arrayOffset]; + + GetVkAttachmentDescription( + &attchmtDescs[attchmtCount], accessInfo, resources[accessInfo.resourceId], nodeDeclRenderPassInfo); + + const uint32_t rtSlot = paramAccessInfo.baseSemanticIndex + resolveParamRef.arrayOffset; + + RPS_ASSERT(nodeDeclRenderPassInfo.renderTargetsMask & (1u << rtSlot)); + + auto& resolveRef = resolveRefs[rtSlot]; + resolveRef.attachment = attchmtCount; + resolveRef.layout = GetVkImageLayout(accessInfo.access); + + attchmtCount++; + } + + auto dynamicStates = nodeDeclInfo.dynamicStates.Get(nodeDeclInfo.semanticKinds); + + VkClearValue clearValues[RPS_MAX_SIMULTANEOUS_RENDER_TARGET_COUNT + 1]; + uint32_t clearValueCount = 0; + + static_assert(sizeof(RpsClearColorValue) == sizeof(VkClearColorValue), + "Bad assumption about VkClearColorValue size"); + + uint32_t clearColorMask = nodeDeclRenderPassInfo.renderTargetClearMask; + for (auto clearColorRef : nodeDeclRenderPassInfo.GetRenderTargetClearValueRefs()) + { + uint32_t rtSlot = rpsFirstBitLow(clearColorMask); + clearColorMask &= ~(1u << rtSlot); + + const auto& colorAttchmtRef = colorRefs[rtSlot]; + + clearValues[colorAttchmtRef.attachment].color = + static_cast(cmd.args[clearColorRef.paramId])[clearColorRef.arrayOffset]; + + clearValueCount = rpsMax(clearValueCount, colorAttchmtRef.attachment + 1); + } + + if (nodeDeclRenderPassInfo.clearDepth) + { + auto& depthClearValueRef = *nodeDeclRenderPassInfo.GetDepthClearValueRef(); + + clearValues[depthRef.attachment].depthStencil.depth = + *static_cast(cmd.args[depthClearValueRef.paramId]); + + clearValueCount = rpsMax(clearValueCount, depthRef.attachment + 1); + } + + if (nodeDeclRenderPassInfo.clearStencil) + { + auto& stencilClearValueRef = *nodeDeclRenderPassInfo.GetStencilClearValueRef(); + + clearValues[depthRef.attachment].depthStencil.stencil = + *static_cast(cmd.args[stencilClearValueRef.paramId]); + + clearValueCount = rpsMax(clearValueCount, depthRef.attachment + 1); + } + + subpassDesc.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpassDesc.colorAttachmentCount = numRtvs; + subpassDesc.pColorAttachments = (numRtvs > 0) ? colorRefs : nullptr; + subpassDesc.pResolveAttachments = (numRtvs > 0) ? resolveRefs : nullptr; + subpassDesc.pDepthStencilAttachment = bHasDsv ? &depthRef : nullptr; + + rpInfo.attachmentCount = attchmtCount; + rpInfo.pAttachments = attchmtDescs; + rpInfo.dependencyCount = 0; // TODO: Only using cmd barriers atm + rpInfo.pDependencies = nullptr; + + auto pVkRP = &currResources.renderPasses[rpIndex]; + RPS_V_RETURN(VkResultToRps(vkCreateRenderPass(hVkDevice, &rpInfo, nullptr, pVkRP))); + + RPS_ASSERT(pCmdInfo->pRenderPassInfo); + auto& cmdRPInfo = *pCmdInfo->pRenderPassInfo; + + VkFramebufferCreateInfo fbInfo = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO}; + fbInfo.renderPass = currResources.renderPasses[rpIndex]; + fbInfo.attachmentCount = attchmtCount; + fbInfo.pAttachments = attchmtViews; + fbInfo.width = cmdRPInfo.viewportInfo.defaultRenderArea.width; + fbInfo.height = cmdRPInfo.viewportInfo.defaultRenderArea.height; + fbInfo.layers = 1; // TODO + + auto pVkFB = &currResources.frameBuffers[rpIndex]; + RPS_V_RETURN(VkResultToRps(vkCreateFramebuffer(hVkDevice, &fbInfo, nullptr, pVkFB))); // TODO + + if (clearValueCount > 0) + { + runtimeCmd.clearValues = context.frameArena.NewArray(clearValueCount); + RPS_CHECK_ALLOC(runtimeCmd.clearValues.data()); + + memcpy(runtimeCmd.clearValues.data(), clearValues, sizeof(VkClearValue) * clearValueCount); + } + + rpIndex++; + } + + return RPS_OK; + } + + RpsResult VKRuntimeBackend::TrackImageLayoutInfo(RpsResourceId resourceId, + const ResourceInstance& resInfo, + const CmdAccessInfo& accessInfo) + { + RPS_ASSERT(resourceId != RPS_RESOURCE_ID_INVALID); + + if (!(resInfo.allAccesses.accessFlags & RPS_ACCESS_DEPTH_STENCIL)) + { + return RPS_OK; + } + + if (m_resourceLayoutOffsets.size() <= resourceId) + { + m_resourceLayoutOffsets.resize(resourceId + 1, UINT32_MAX); + } + + if (m_resourceLayoutOffsets[resourceId] == UINT32_MAX) + { + m_resourceLayoutOffsets[resourceId] = uint32_t(m_subResLayouts.size()); + RPS_CHECK_ALLOC(m_subResLayouts.grow(resInfo.numSubResources, VK_IMAGE_LAYOUT_UNDEFINED)); + } + + ArrayRef subResLayouts = {&m_subResLayouts[m_resourceLayoutOffsets[resourceId]], + resInfo.numSubResources}; + + const VkImageLayout layout = GetVkImageLayout(accessInfo.access); + const uint32_t numSubResPerAspect = resInfo.desc.GetImageArrayLayers() * resInfo.desc.image.mipLevels; + + for (uint32_t iAspect = 0; iAspect < 2; iAspect++) + { + const RpsImageAspectUsageFlags aspect = (iAspect == 0) ? RPS_IMAGE_ASPECT_DEPTH : RPS_IMAGE_ASPECT_STENCIL; + + if (accessInfo.range.aspectMask & aspect) + { + for (uint32_t iArray = accessInfo.range.baseArrayLayer, arrayEnd = accessInfo.range.arrayLayerEnd; + iArray < arrayEnd; + iArray++) + { + const uint32_t subResOffset = iAspect * numSubResPerAspect + iArray * resInfo.desc.image.mipLevels; + const auto subResBegin = subResLayouts.begin() + subResOffset; + + RPS_ASSERT(subResOffset + accessInfo.range.GetMipLevelCount() <= subResLayouts.size()); + + std::fill(subResBegin, subResBegin + accessInfo.range.GetMipLevelCount(), layout); + } + } + } + + return RPS_OK; + } + + VkImageLayout VKRuntimeBackend::GetTrackedImageLayoutInfo(const ResourceInstance& resInfo, + const CmdAccessInfo& accessInfo) const + { + if ((accessInfo.resourceId < m_resourceLayoutOffsets.size()) && + (m_resourceLayoutOffsets[accessInfo.resourceId] != UINT32_MAX)) + { + RPS_ASSERT(resInfo.allAccesses.accessFlags & RPS_ACCESS_DEPTH_STENCIL); + + const uint32_t numSubResPerAspect = resInfo.desc.GetImageArrayLayers() * resInfo.desc.image.mipLevels; + + const uint32_t layoutInfoOffset = + m_resourceLayoutOffsets[accessInfo.resourceId] + + ((accessInfo.range.aspectMask & RPS_IMAGE_ASPECT_DEPTH) ? 0 : numSubResPerAspect) + + accessInfo.range.baseArrayLayer * resInfo.desc.image.mipLevels + accessInfo.range.baseMipLevel; + + return m_subResLayouts[layoutInfoOffset]; + } + + return GetVkImageLayout(accessInfo.access); + } + + void VKRuntimeBackend::ProcessBarrierBatch(const RenderGraphUpdateContext& context, + Span& transitionRange) + { + auto& aliasingInfos = context.renderGraph.GetResourceAliasingInfos(); + auto& resourceInstances = context.renderGraph.GetResourceInstances(); + auto& transitions = context.renderGraph.GetTransitions(); + auto transitionRangeCmds = transitionRange.Get(context.renderGraph.GetRuntimeCmdInfos()); + + VKBarrierBatch currBatch = {}; + + currBatch.imageBarriers.SetRange(uint32_t(m_imageBarriers.size()), 0); + currBatch.bufferBarriers.SetRange(uint32_t(m_bufferBarriers.size()), 0); + currBatch.memoryBarriers.SetRange(uint32_t(m_memoryBarriers.size()), 0); + + constexpr RpsAccessAttr noAccess = {RPS_ACCESS_UNKNOWN, RPS_SHADER_STAGE_NONE}; + + for (uint32_t idx = 0; idx < transitionRangeCmds.size(); idx++) + { + auto& cmd = transitionRangeCmds[idx]; + RPS_ASSERT(cmd.isTransition); + + if (cmd.cmdId < CMD_ID_PREAMBLE) + { + const auto& currTrans = transitions[cmd.cmdId]; + const auto& resInstance = resourceInstances[currTrans.access.resourceId]; + + const auto& prevAccess = (currTrans.prevTransition != RenderGraph::INVALID_TRANSITION) + ? transitions[currTrans.prevTransition].access.access + : (resInstance.isPendingInit ? noAccess : resInstance.initialAccess); + + if (resInstance.desc.IsImage()) + { + auto hImage = FromHandle(resInstance.hRuntimeResource); + + AppendImageBarrier( + hImage, currBatch, prevAccess, currTrans.access.access, resInstance, currTrans.access.range); + + // VK 1.1 specific workaround for depth/stencil readonly + srv image layout... + if ((prevAccess.accessFlags | currTrans.access.access.accessFlags) & RPS_ACCESS_DEPTH_STENCIL) + { + TrackImageLayoutInfo(currTrans.access.resourceId, resInstance, currTrans.access); + } + } + else if (resInstance.desc.IsBuffer()) + { + auto hBuffer = FromHandle(resInstance.hRuntimeResource); + + AppendBufferBarrier(hBuffer, currBatch, prevAccess, currTrans.access.access, resInstance); + } + + // For aliased resources, wait on deactivating final access pipeline stages. + for (auto& aliasing : cmd.aliasingInfos.Get(aliasingInfos)) + { + if (aliasing.srcDeactivating) + { + if (aliasing.srcResourceIndex != RPS_RESOURCE_ID_INVALID) + { + auto& srcResInfo = resourceInstances[aliasing.srcResourceIndex]; + + for (auto& finalAccess : + srcResInfo.finalAccesses.Get(context.renderGraph.GetResourceFinalAccesses())) + { + if (finalAccess.prevTransition != RenderGraph::INVALID_TRANSITION) + { + auto& finalAccessAttr = transitions[finalAccess.prevTransition].access.access; + auto srcAccessInfo = GetVKAccessInfo(finalAccessAttr); + + currBatch.srcStage |= srcAccessInfo.stages; + } + } + } + else + { + currBatch.srcStage |= VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + } + } + } + } + else if (cmd.cmdId == CMD_ID_POSTAMBLE) + { + // At frame end, transit non-deactivated resource states to initial states + // TODO: Extract non-aliased resource list ahead of time. + for (uint32_t iRes = 0, numRes = uint32_t(resourceInstances.size()); iRes < numRes; iRes++) + { + auto& resInstance = resourceInstances[iRes]; + + if (!resInstance.isAliased) + { + for (auto& finalAccess : + resInstance.finalAccesses.Get(context.renderGraph.GetResourceFinalAccesses())) + { + if (finalAccess.prevTransition != RenderGraph::INVALID_TRANSITION) + { + if (resInstance.desc.IsImage()) + { + auto hImage = FromHandle(resInstance.hRuntimeResource); + + AppendImageBarrier(hImage, + currBatch, + transitions[finalAccess.prevTransition].access.access, + resInstance.initialAccess, + resInstance, + finalAccess.range); + } + else if (resInstance.desc.IsBuffer()) + { + auto hBuffer = FromHandle(resInstance.hRuntimeResource); + + AppendBufferBarrier(hBuffer, + currBatch, + transitions[finalAccess.prevTransition].access.access, + resInstance.initialAccess, + resInstance); + } + } + } + } + } + } + } + + currBatch.imageBarriers.SetEnd(uint32_t(m_imageBarriers.size())); + currBatch.bufferBarriers.SetEnd(uint32_t(m_bufferBarriers.size())); + currBatch.memoryBarriers.SetEnd(uint32_t(m_memoryBarriers.size())); + + if (!(currBatch.imageBarriers.empty() && currBatch.bufferBarriers.empty() && currBatch.memoryBarriers.empty())) + { + auto pNewRuntimeCmd = m_runtimeCmds.grow(1); + + pNewRuntimeCmd->cmdId = RPS_CMD_ID_INVALID; + pNewRuntimeCmd->barrierBatchId = uint32_t(m_barrierBatches.size()); + + m_barrierBatches.push_back(currBatch); + } + + transitionRange = {}; + } + + void VKRuntimeBackend::AppendImageBarrier(VkImage hImage, + VKBarrierBatch& barrierBatch, + const RpsAccessAttr& beforeAccess, + const RpsAccessAttr& afterAccess, + const ResourceInstance& resInfo, + const SubresourceRangePacked range) + { + auto* pImgBarrier = m_imageBarriers.grow(1, {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}); + + auto srcAccessInfo = GetVKAccessInfo(beforeAccess); + auto dstAccessInfo = GetVKAccessInfo(afterAccess); + + barrierBatch.srcStage |= srcAccessInfo.stages; + barrierBatch.dstStage |= dstAccessInfo.stages; + + pImgBarrier->srcAccessMask = srcAccessInfo.access; + pImgBarrier->dstAccessMask = dstAccessInfo.access; + pImgBarrier->oldLayout = srcAccessInfo.imgLayout; + pImgBarrier->newLayout = dstAccessInfo.imgLayout; + pImgBarrier->srcQueueFamilyIndex = srcAccessInfo.queueFamilyIndex; + pImgBarrier->dstQueueFamilyIndex = dstAccessInfo.queueFamilyIndex; + pImgBarrier->image = hImage; + + pImgBarrier->subresourceRange.aspectMask = range.aspectMask; + pImgBarrier->subresourceRange.baseMipLevel = range.baseMipLevel; + pImgBarrier->subresourceRange.levelCount = range.GetMipLevelCount(); + pImgBarrier->subresourceRange.baseArrayLayer = range.baseArrayLayer; + pImgBarrier->subresourceRange.layerCount = range.GetArrayLayerCount(); + } + + void VKRuntimeBackend::AppendBufferBarrier(VkBuffer hBuffer, + VKBarrierBatch& barrierBatch, + const RpsAccessAttr& beforeAccess, + const RpsAccessAttr& afterAccess, + const ResourceInstance& resInfo) + { + auto* pBufBarrier = m_bufferBarriers.grow(1, {VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER}); + + auto srcAccessInfo = GetVKAccessInfo(beforeAccess); + auto dstAccessInfo = GetVKAccessInfo(afterAccess); + + barrierBatch.srcStage |= srcAccessInfo.stages; + barrierBatch.dstStage |= dstAccessInfo.stages; + + pBufBarrier->srcAccessMask = srcAccessInfo.access; + pBufBarrier->dstAccessMask = dstAccessInfo.access; + pBufBarrier->srcQueueFamilyIndex = srcAccessInfo.queueFamilyIndex; + pBufBarrier->dstQueueFamilyIndex = dstAccessInfo.queueFamilyIndex; + pBufBarrier->buffer = hBuffer; + pBufBarrier->offset = 0; + pBufBarrier->size = VK_WHOLE_SIZE; + } + + void VKRuntimeBackend::RecordBarrierBatch(VkCommandBuffer hCmdBuf, uint32_t barrierBatch) const + { + const auto& batch = m_barrierBatches[barrierBatch]; + + vkCmdPipelineBarrier(hCmdBuf, + batch.srcStage, + batch.dstStage, + VK_DEPENDENCY_BY_REGION_BIT, + batch.memoryBarriers.size(), + batch.memoryBarriers.Get(m_memoryBarriers).data(), + batch.bufferBarriers.size(), + batch.bufferBarriers.Get(m_bufferBarriers).data(), + batch.imageBarriers.size(), + batch.imageBarriers.Get(m_imageBarriers).data()); + } + + template + RpsResult VKRuntimeBackend::GetCmdArgViews(const RuntimeCmdCallbackContext& context, + ConstArrayRef views, + uint32_t argIndex, + uint32_t srcArrayOffset, + ViewHandleType* pViews, + uint32_t count) const + { + RPS_RETURN_ERROR_IF(argIndex >= context.pNodeDeclInfo->params.size(), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + RPS_RETURN_OK_IF(count == 0); + + auto& paramAccessInfo = context.pNodeDeclInfo->params[argIndex]; + RPS_RETURN_ERROR_IF(srcArrayOffset + count > paramAccessInfo.numElements, RPS_ERROR_INDEX_OUT_OF_BOUNDS); + RPS_RETURN_ERROR_IF(paramAccessInfo.access.accessFlags & RPS_ACCESS_NO_VIEW_BIT, RPS_ERROR_INVALID_OPERATION); + RPS_RETURN_ERROR_IF(!paramAccessInfo.IsResource(), RPS_ERROR_TYPE_MISMATCH); + RPS_RETURN_ERROR_IF(VkObjectTypeMapper::typeId != paramAccessInfo.typeInfo.id, + RPS_ERROR_TYPE_MISMATCH); + + auto descriptorIndices = + m_accessToDescriptorMap.range(context.pCmdInfo->accesses.GetBegin(), context.pCmdInfo->accesses.size()); + + RPS_ASSERT((paramAccessInfo.accessOffset + paramAccessInfo.numElements) <= descriptorIndices.size()); + + // Assuming all elements in the same parameter have the same access + for (uint32_t i = 0; i < count; i++) + { + const uint32_t imgViewIndex = descriptorIndices[paramAccessInfo.accessOffset + srcArrayOffset + i]; + + pViews[i] = views[imgViewIndex]; + } + + return RPS_OK; + } + + template + RpsResult VKRuntimeBackend::GetCmdArgResources(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + ResHandleType* pResources, + uint32_t count) + { + RPS_CHECK_ARGS(pContext && pResources); + RPS_RETURN_OK_IF(count == 0); + + const auto& context = *RuntimeCmdCallbackContext::Get(pContext); + + RPS_RETURN_ERROR_IF(argIndex >= context.pNodeDeclInfo->params.size(), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + auto& paramAccessInfo = context.pNodeDeclInfo->params[argIndex]; + RPS_RETURN_ERROR_IF(srcArrayOffset + count > paramAccessInfo.numElements, RPS_ERROR_INDEX_OUT_OF_BOUNDS); + RPS_RETURN_ERROR_IF(!paramAccessInfo.IsResource(), RPS_ERROR_TYPE_MISMATCH); + RPS_RETURN_ERROR_IF(VkObjectTypeMapper::typeId != paramAccessInfo.typeInfo.id, + RPS_ERROR_TYPE_MISMATCH); + + auto& renderGraph = *context.pRenderGraph; + const auto& resInstances = renderGraph.GetResourceInstances(); + const auto& cmdAccesses = renderGraph.GetCmdAccessInfos(); + auto accessRange = context.pCmdInfo->accesses.Get(cmdAccesses); + + RPS_ASSERT((paramAccessInfo.accessOffset + paramAccessInfo.numElements) <= accessRange.size()); + + static constexpr RpsRuntimeResource RuntimeResourceNull = RpsRuntimeResource{nullptr}; + + // Assuming all elements in the same parameter have the same access + for (uint32_t i = 0; i < count; i++) + { + const uint32_t resId = accessRange[paramAccessInfo.accessOffset + srcArrayOffset + i].resourceId; + + FromHandle(pResources[i], + (resId != RPS_RESOURCE_ID_INVALID) ? resInstances[resId].hRuntimeResource : RuntimeResourceNull); + } + + return RPS_OK; + } + + RpsResult VKRuntimeBackend::GetCmdArgGpuMemoryRanges(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + RpsVkDeviceMemoryRange* pMemoryRanges, + uint32_t count) + { + RPS_CHECK_ARGS(pContext && pMemoryRanges); + + const auto& context = *RuntimeCmdCallbackContext::Get(pContext); + + RPS_RETURN_ERROR_IF(argIndex >= context.pNodeDeclInfo->params.size(), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + auto& paramAccessInfo = context.pNodeDeclInfo->params[argIndex]; + RPS_RETURN_ERROR_IF(srcArrayOffset + count > paramAccessInfo.numElements, RPS_ERROR_INDEX_OUT_OF_BOUNDS); + RPS_RETURN_ERROR_IF(!paramAccessInfo.IsResource(), RPS_ERROR_TYPE_MISMATCH); + + auto& renderGraph = *context.pRenderGraph; + const auto& resInstances = renderGraph.GetResourceInstances(); + const auto& cmdAccesses = renderGraph.GetCmdAccessInfos(); + auto accessRange = context.pCmdInfo->accesses.Get(cmdAccesses); + auto& heaps = renderGraph.GetHeapInfos(); + + // Assuming all elements in the same parameter have the same access + for (uint32_t i = 0; i < count; i++) + { + auto& dstMemRange = pMemoryRanges[i]; + + const uint32_t resId = accessRange[paramAccessInfo.accessOffset + srcArrayOffset + i].resourceId; + + if (resId != RPS_RESOURCE_ID_INVALID) + { + const auto& resInfo = resInstances[resId]; + + dstMemRange.hMemory = (resInfo.allocPlacement.heapId != RPS_INDEX_NONE_U32) + ? rpsVKMemoryFromHandle(heaps[resInfo.allocPlacement.heapId].hRuntimeHeap) + : VK_NULL_HANDLE; + dstMemRange.offset = resInfo.allocPlacement.offset; + dstMemRange.size = resInfo.allocRequirement.size; + } + else + { + dstMemRange = {}; + } + } + + return RPS_OK; + } + + RpsResult VKRuntimeBackend::GetCmdArgViews(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + VkImageView* pImageViews, + uint32_t count) + { + RPS_CHECK_ARGS(pContext); + + const auto& context = *RuntimeCmdCallbackContext::Get(pContext); + auto pBackend = context.GetBackend(); + + return pBackend->GetCmdArgViews( + context, + pBackend->m_frameResources[pBackend->m_currentResourceFrame].imageViews.range_all(), + argIndex, + srcArrayOffset, + pImageViews, + count); + } + + RpsResult VKRuntimeBackend::GetCmdArgViews(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + VkBufferView* pBufferViews, + uint32_t count) + { + RPS_CHECK_ARGS(pContext); + + const auto& context = *RuntimeCmdCallbackContext::Get(pContext); + auto pBackend = context.GetBackend(); + + return pBackend->GetCmdArgViews( + context, + pBackend->m_frameResources[pBackend->m_currentResourceFrame].bufferViews.range_all(), + argIndex, + srcArrayOffset, + pBufferViews, + count); + } + + RpsResult VKRuntimeBackend::GetCmdArgImageViewInfos(const RuntimeCmdCallbackContext& context, + uint32_t argIndex, + uint32_t srcArrayOffset, + RpsVkImageViewInfo* pImageViewInfos, + uint32_t count) const + { + RPS_RETURN_ERROR_IF(argIndex >= context.pNodeDeclInfo->params.size(), RPS_ERROR_INDEX_OUT_OF_BOUNDS); + + const auto& paramAccessInfo = context.pNodeDeclInfo->params[argIndex]; + RPS_RETURN_ERROR_IF(srcArrayOffset + count > paramAccessInfo.numElements, RPS_ERROR_INDEX_OUT_OF_BOUNDS); + RPS_RETURN_ERROR_IF(paramAccessInfo.access.accessFlags & RPS_ACCESS_NO_VIEW_BIT, RPS_ERROR_INVALID_OPERATION); + + const auto descriptorIndices = + m_accessToDescriptorMap.range(context.pCmdInfo->accesses.GetBegin(), context.pCmdInfo->accesses.size()); + + const auto imageViews = m_frameResources[m_currentResourceFrame].imageViews.crange_all(); + + // Assuming all elements in the same parameter has the same access + RpsVkImageViewInfo* pDstImageViewInfo = pImageViewInfos; + for (uint32_t i = 0; i < count; i++, pDstImageViewInfo++) + { + const uint32_t imgViewIndex = descriptorIndices[paramAccessInfo.accessOffset + srcArrayOffset + i]; + + pDstImageViewInfo->hImageView = imageViews[imgViewIndex]; + pDstImageViewInfo->layout = m_imageViewLayouts[imgViewIndex]; + } + + return RPS_OK; + } + + RpsResult VKRuntimeBackend::GetCmdRenderPass(const RpsCmdCallbackContext* pContext, VkRenderPass* pRenderPass) + { + RPS_CHECK_ARGS(pContext && pRenderPass); + + const auto& context = *RuntimeCmdCallbackContext::Get(pContext); + auto pBackend = context.GetBackend(); + + RPS_RETURN_ERROR_IF(context.pRuntimeCmd == nullptr, RPS_ERROR_INVALID_OPERATION); + + auto& frameResources = pBackend->m_frameResources[pBackend->m_currentResourceFrame]; + auto pRuntimeCmd = context.GetRuntimeCmd(); + + RPS_RETURN_ERROR_IF(pRuntimeCmd->renderPassId >= frameResources.renderPasses.size(), + RPS_ERROR_INVALID_OPERATION); + + *pRenderPass = frameResources.renderPasses[pRuntimeCmd->renderPassId]; + + return RPS_OK; + } + + const VKRuntimeBackend* VKRuntimeBackend::Get(const RpsCmdCallbackContext* pContext) + { + const auto& context = *RuntimeCmdCallbackContext::Get(pContext); + return context.GetBackend(); + } + +} // namespace rps + +RpsResult rpsVKGetCmdArgImageViewArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + VkImageView* pImageViews, + uint32_t count) +{ + return rps::VKRuntimeBackend::GetCmdArgViews(pContext, argIndex, srcArrayOffset, pImageViews, count); +} + +RpsResult rpsVKGetCmdArgImageView(const RpsCmdCallbackContext* pContext, uint32_t argIndex, VkImageView* pImageView) +{ + return rpsVKGetCmdArgImageViewArray(pContext, argIndex, 0, pImageView, 1); +} + +RpsResult rpsVKGetCmdArgImageViewInfoArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + RpsVkImageViewInfo* pImageViewInfos, + uint32_t count) +{ + RPS_CHECK_ARGS(pContext); + + const auto& context = *rps::RuntimeCmdCallbackContext::Get(pContext); + auto pBackend = context.GetBackend(); + + return pBackend->GetCmdArgImageViewInfos(context, argIndex, srcArrayOffset, pImageViewInfos, count); +} + +RpsResult rpsVKGetCmdArgImageViewInfo(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + RpsVkImageViewInfo* pImageViewInfo) +{ + return rpsVKGetCmdArgImageViewInfoArray(pContext, argIndex, 0, pImageViewInfo, 1); +} + +RpsResult rpsVKGetCmdArgBufferViewArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + VkBufferView* pBufferViews, + uint32_t count) +{ + return rps::VKRuntimeBackend::GetCmdArgViews(pContext, argIndex, srcArrayOffset, pBufferViews, count); +} + +RpsResult rpsVKGetCmdArgBufferView(const RpsCmdCallbackContext* pContext, uint32_t argIndex, VkBufferView* pBufferViews) +{ + return rpsVKGetCmdArgBufferViewArray(pContext, argIndex, 0, pBufferViews, 1); +} + +RpsResult rpsVKGetCmdArgImageArray( + const RpsCmdCallbackContext* pContext, uint32_t argIndex, uint32_t srcArrayOffset, VkImage* pImages, uint32_t count) +{ + return rps::VKRuntimeBackend::GetCmdArgResources(pContext, argIndex, srcArrayOffset, pImages, count); +} + +RpsResult rpsVKGetCmdArgImage(const RpsCmdCallbackContext* pContext, uint32_t argIndex, VkImage* pImage) +{ + return rpsVKGetCmdArgImageArray(pContext, argIndex, 0, pImage, 1); +} + +RpsResult rpsVKGetCmdArgBufferArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + VkBuffer* pBuffers, + uint32_t count) +{ + return rps::VKRuntimeBackend::GetCmdArgResources(pContext, argIndex, srcArrayOffset, pBuffers, count); +} + +RpsResult rpsVKGetCmdArgBuffer(const RpsCmdCallbackContext* pContext, uint32_t argIndex, VkBuffer* pBuffer) +{ + return rpsVKGetCmdArgBufferArray(pContext, argIndex, 0, pBuffer, 1); +} + +RpsResult rpsVKGetCmdArgGpuMemoryArray(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + RpsVkDeviceMemoryRange* pMemoryRanges, + uint32_t count) +{ + return rps::VKRuntimeBackend::GetCmdArgGpuMemoryRanges(pContext, argIndex, srcArrayOffset, pMemoryRanges, count); +} + +RpsResult rpsVKGetCmdArgGpuMemory(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + RpsVkDeviceMemoryRange* pMemoryRange) +{ + return rpsVKGetCmdArgGpuMemoryArray(pContext, argIndex, 0, pMemoryRange, 1); +} + +RpsResult rpsVKGetCmdRenderPass(const RpsCmdCallbackContext* pContext, VkRenderPass* pRenderPass) +{ + return rps::VKRuntimeBackend::GetCmdRenderPass(pContext, pRenderPass); +} diff --git a/src/runtime/vk/rps_vk_runtime_backend.hpp b/src/runtime/vk/rps_vk_runtime_backend.hpp new file mode 100644 index 0000000..a8ea1fd --- /dev/null +++ b/src/runtime/vk/rps_vk_runtime_backend.hpp @@ -0,0 +1,253 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_VK_RUNTIME_BACKEND_H +#define RPS_VK_RUNTIME_BACKEND_H + +#include "runtime/common/rps_render_graph.hpp" +#include "runtime/vk/rps_vk_runtime_device.hpp" + +namespace rps +{ + class VKRuntimeBackend : public RuntimeBackend + { + public: + struct VKBarrierBatch + { + VkPipelineStageFlags srcStage; + VkPipelineStageFlags dstStage; + Span imageBarriers; + Span bufferBarriers; + Span memoryBarriers; + }; + + struct VKRuntimeCmd : public RuntimeCmd + { + uint32_t barrierBatchId; + uint32_t resourceBindingInfo; + uint32_t renderPassId; + uint32_t frameBufferId; + + ArrayRef clearValues; + + VKRuntimeCmd() + : barrierBatchId(RPS_INDEX_NONE_U32) + , resourceBindingInfo(RPS_INDEX_NONE_U32) + , renderPassId(RPS_INDEX_NONE_U32) + , frameBufferId(RPS_INDEX_NONE_U32) + { + } + + VKRuntimeCmd(uint32_t inCmdId, uint32_t inBarrierBatchId, uint32_t inResourceBindingInfo) + : RuntimeCmd(inCmdId) + , barrierBatchId(inBarrierBatchId) + , resourceBindingInfo(inResourceBindingInfo) + , renderPassId(RPS_INDEX_NONE_U32) + , frameBufferId(RPS_INDEX_NONE_U32) + { + } + }; + + public: + VKRuntimeBackend(VKRuntimeDevice& device, RenderGraph& renderGraph) + : RuntimeBackend(renderGraph) + , m_device(device) + , m_persistentPool(device.GetDevice().Allocator()) + , m_pendingReleaseImages(&m_persistentPool) + , m_pendingReleaseBuffers(&m_persistentPool) + , m_frameResources(&m_persistentPool) + { + } + + virtual ~VKRuntimeBackend(); + + virtual RpsResult RecordCommands(const RenderGraph& renderGraph, + const RpsRenderGraphRecordCommandInfo& recordInfo) const override final; + + virtual RpsResult RecordCmdRenderPassBegin(const RuntimeCmdCallbackContext& context) const override final; + + virtual RpsResult RecordCmdRenderPassEnd(const RuntimeCmdCallbackContext& context) const override final; + + virtual RpsResult RecordCmdFixedFunctionBindingsAndDynamicStates( + const RuntimeCmdCallbackContext& context) const override final; + + virtual void DestroyRuntimeResourceDeferred(ResourceInstance& resource) override final; + + RpsResult GetCmdArgImageViewInfos(const RuntimeCmdCallbackContext& context, + uint32_t argIndex, + uint32_t srcArrayOffset, + RpsVkImageViewInfo* pImageViewInfos, + uint32_t count) const; + + static RpsResult GetCmdArgViews(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + VkImageView* pImageViews, + uint32_t count); + static RpsResult GetCmdArgViews(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + VkBufferView* pBufferViews, + uint32_t count); + + template + static RpsResult GetCmdArgResources(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + ResHandleType* pResources, + uint32_t count); + + static RpsResult GetCmdArgGpuMemoryRanges(const RpsCmdCallbackContext* pContext, + uint32_t argIndex, + uint32_t srcArrayOffset, + RpsVkDeviceMemoryRange* pMemoryRanges, + uint32_t count); + + static RpsResult GetCmdRenderPass(const RpsCmdCallbackContext* pContext, VkRenderPass* pRenderPass); + + static const VKRuntimeBackend* Get(const RpsCmdCallbackContext* pContext); + + static VkCommandBuffer GetContextVkCmdBuf(const RuntimeCmdCallbackContext& context) + { + return rpsVKCommandBufferFromHandle(context.hCommandBuffer); + } + + protected: + virtual RpsResult UpdateFrame(const RenderGraphUpdateContext& context) override final; + virtual RpsResult CreateHeaps(const RenderGraphUpdateContext& context, ArrayRef heaps) override final; + virtual void DestroyHeaps(ArrayRef heaps) override final; + virtual RpsResult CreateResources(const RenderGraphUpdateContext& context, + ArrayRef resources) override final; + virtual void DestroyResources(ArrayRef resources) override final; + virtual RpsResult CreateCommandResources(const RenderGraphUpdateContext& context) override final; + virtual void OnDestroy() override final; + + private: + RPS_NO_DISCARD + RpsResult CreateBufferViews(const RenderGraphUpdateContext& context, ConstArrayRef accessIndices); + RPS_NO_DISCARD + RpsResult CreateImageViews(const RenderGraphUpdateContext& context, ConstArrayRef accessIndices); + + RpsResult CreateRenderPasses(const RenderGraphUpdateContext& context, ConstArrayRef cmdIndices); + + void ProcessBarrierBatch(const RenderGraphUpdateContext& context, Span& transitionRange); + + void AppendImageBarrier(VkImage hImage, + VKBarrierBatch& barrierBatch, + const RpsAccessAttr& prevAccess, + const RpsAccessAttr& currAccess, + const ResourceInstance& resInfo, + const SubresourceRangePacked range); + + void AppendBufferBarrier(VkBuffer hBuffer, + VKBarrierBatch& barrierBatch, + const RpsAccessAttr& prevAccess, + const RpsAccessAttr& currAccess, + const ResourceInstance& resInfo); + + RpsResult TrackImageLayoutInfo(RpsResourceId resourceId, + const ResourceInstance& resInfo, + const CmdAccessInfo& accessInfo); + + VkImageLayout GetTrackedImageLayoutInfo(const ResourceInstance& resInfo, const CmdAccessInfo& accessInfo) const; + + private: + void RecordBarrierBatch(VkCommandBuffer hCmdBuf, uint32_t barrierBatch) const; + + template + RpsResult GetCmdArgViews(const RuntimeCmdCallbackContext& context, + ConstArrayRef views, + uint32_t argIndex, + uint32_t srcArrayOffset, + ViewHandleType* pViews, + uint32_t count) const; + + private: + VKRuntimeDevice& m_device; + Arena m_persistentPool; + + ArenaVector m_runtimeCmds; + ArenaVector m_barrierBatches; + ArenaVector m_imageBarriers; + ArenaVector m_bufferBarriers; + ArenaVector m_memoryBarriers; + + ArenaVector m_resourceLayoutOffsets; + ArenaVector m_subResLayouts; + ArenaVector m_imageViewLayouts; + + struct FrameResources + { + ArenaVector imageViews; + ArenaVector bufferViews; + ArenaVector renderPasses; + ArenaVector frameBuffers; + ArenaVector pendingImages; + ArenaVector pendingBuffers; + + void Reset(Arena& arena) + { + imageViews.reset(&arena); + bufferViews.reset(&arena); + renderPasses.reset(&arena); + frameBuffers.reset(&arena); + pendingImages.reset(&arena); + pendingBuffers.reset(&arena); + } + + void DestroyDeviceResources(VkDevice hDevice) + { + for (VkFramebuffer fb : frameBuffers) + { + vkDestroyFramebuffer(hDevice, fb, nullptr); + } + + for (VkRenderPass rp : renderPasses) + { + vkDestroyRenderPass(hDevice, rp, nullptr); + } + + for (VkBufferView bufView : bufferViews) + { + vkDestroyBufferView(hDevice, bufView, nullptr); + } + + for (VkImageView imgView : imageViews) + { + vkDestroyImageView(hDevice, imgView, nullptr); + } + + for (VkBuffer buf : pendingBuffers) + { + vkDestroyBuffer(hDevice, buf, nullptr); + } + + for (VkImage img : pendingImages) + { + vkDestroyImage(hDevice, img, nullptr); + } + + pendingImages.clear(); + pendingBuffers.clear(); + imageViews.clear(); + bufferViews.clear(); + renderPasses.clear(); + frameBuffers.clear(); + } + }; + + ArenaVector m_pendingReleaseImages; + ArenaVector m_pendingReleaseBuffers; + + ArenaVector m_frameResources; + uint32_t m_currentResourceFrame = 0; + + ArenaVector m_accessToDescriptorMap; + }; +} // namespace rps + +#endif //RPS_VK_RUNTIME_BACKEND_H diff --git a/src/runtime/vk/rps_vk_runtime_device.cpp b/src/runtime/vk/rps_vk_runtime_device.cpp new file mode 100644 index 0000000..64bdbd8 --- /dev/null +++ b/src/runtime/vk/rps_vk_runtime_device.cpp @@ -0,0 +1,475 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "runtime/vk/rps_vk_runtime_device.hpp" +#include "runtime/vk/rps_vk_runtime_backend.hpp" +#include "runtime/vk/rps_vk_util.hpp" + +#include "runtime/common/rps_runtime_util.hpp" +#include "runtime/common/phases/rps_pre_process.hpp" +#include "runtime/common/phases/rps_dag_build.h" +#include "runtime/common/phases/rps_access_dag_build.hpp" +#include "runtime/common/phases/rps_cmd_print.hpp" +#include "runtime/common/phases/rps_cmd_dag_print.hpp" +#include "runtime/common/phases/rps_dag_schedule.hpp" +#include "runtime/common/phases/rps_schedule_print.hpp" +#include "runtime/common/phases/rps_memory_schedule.hpp" + +namespace rps +{ + VKRuntimeDevice::VKRuntimeDevice(Device* pDevice, const RpsVKRuntimeDeviceCreateInfo* pCreateInfo) + : RuntimeDevice(pDevice, pCreateInfo->pRuntimeCreateInfo) + , m_device(pCreateInfo->hVkDevice) + , m_physicalDevice(pCreateInfo->hVkPhysicalDevice) + , m_flags(pCreateInfo->flags) + { + } + + RpsResult VKRuntimeDevice::Init() + { + vkGetPhysicalDeviceProperties(m_physicalDevice, &m_deviceProperties); + vkGetPhysicalDeviceMemoryProperties(m_physicalDevice, &m_deviceMemoryProperties); + + static_assert(VK_MAX_MEMORY_TYPES <= 32, "Bitwidth of m_hostVisibleMemoryTypeMask needs extending."); + + for (uint32_t iMemTy = 0; iMemTy < m_deviceMemoryProperties.memoryTypeCount; iMemTy++) + { + if (m_deviceMemoryProperties.memoryTypes[iMemTy].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + m_hostVisibleMemoryTypeMask |= (1u << iMemTy); + } + + return RPS_OK; + } + + VKRuntimeDevice::~VKRuntimeDevice() + { + } + + RpsResult VKRuntimeDevice::BuildDefaultRenderGraphPhases(RenderGraph& renderGraph) + { + RPS_V_RETURN(renderGraph.ReservePhases(8)); + RPS_V_RETURN(renderGraph.AddPhase()); + RPS_V_RETURN(renderGraph.AddPhase()); + RPS_V_RETURN(renderGraph.AddPhase()); + RPS_V_RETURN(renderGraph.AddPhase(renderGraph)); + RPS_V_RETURN(renderGraph.AddPhase(renderGraph)); + RPS_V_RETURN(renderGraph.AddPhase(renderGraph)); + RPS_V_RETURN(renderGraph.AddPhase(renderGraph)); + RPS_V_RETURN(renderGraph.AddPhase()); + RPS_V_RETURN(renderGraph.AddPhase(*this, renderGraph)); + + return RPS_OK; + } + + RpsResult VKRuntimeDevice::InitializeSubresourceInfos(ArrayRef resInstances) + { + for (auto& resInstance : resInstances) + { + GetFullSubresourceRange( + resInstance.fullSubresourceRange, resInstance.desc, GetResourceAspectMask(resInstance)); + + resInstance.numSubResources = GetSubresourceCount(resInstance); + } + + return RPS_OK; + } + + uint32_t FinalizeMemoryType(const VkPhysicalDeviceMemoryProperties& memProps, + const uint32_t hostVisibleMemoryTypeMask, + const VkMemoryRequirements& requirements, + const ResourceInstance& resInfo) + { + if (requirements.size == 0) + return UINT32_MAX; + + const bool bHostRead = resInfo.allAccesses.accessFlags & RPS_ACCESS_CPU_READ_BIT; + const bool bHostWrite = resInfo.allAccesses.accessFlags & RPS_ACCESS_CPU_WRITE_BIT; + + // No CPU access or explicitly prefers device local: + const bool bPreferLocal = + !(bHostRead || bHostWrite) || (resInfo.desc.flags & RPS_RESOURCE_FLAG_PREFER_GPU_LOCAL_CPU_VISIBLE_BIT); + + uint32_t typeBits = requirements.memoryTypeBits; + + if (bHostRead || bHostWrite) + { + typeBits &= hostVisibleMemoryTypeMask; + } + + uint32_t highScore = 0; + uint32_t firstMemIdx = UINT32_MAX; + + for (uint32_t i = 0; i < memProps.memoryTypeCount; i++) + { + uint32_t currScore = 0; + + if ((typeBits & 1) == 1) + { + currScore = 0x1; + + if (bPreferLocal && (memProps.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) + { + currScore |= 0x8; + } + + if (bHostRead && (memProps.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT)) + { + currScore |= 0x4; + } + + if ((bHostRead || bHostWrite) && + (memProps.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) + { + currScore |= 0x2; + } + } + + typeBits >>= 1; + + if (highScore < currScore) + { + firstMemIdx = i; + highScore = currScore; + } + } + + RPS_ASSERT(firstMemIdx != UINT32_MAX); + + return firstMemIdx; + } + + RpsResult VKRuntimeDevice::InitializeResourceAllocInfos(ArrayRef resInstances) + { + for (auto& resInst : resInstances) + { + if (!resInst.isPendingCreate) + { + //TODO: This may impact memory size based scheduling for external resources + continue; + } + + VKResourceAllocInfo allocInfo; + RPS_V_RETURN(GetResourceAllocInfo(resInst, allocInfo)); + RPS_RETURN_ERROR_IF(allocInfo.memoryRequirements.alignment > UINT32_MAX, RPS_ERROR_INTEGER_OVERFLOW); + + resInst.allocRequirement.size = uint64_t(allocInfo.memoryRequirements.size); + resInst.allocRequirement.alignment = uint32_t(allocInfo.memoryRequirements.alignment); + resInst.allocRequirement.memoryTypeIndex = FinalizeMemoryType( + m_deviceMemoryProperties, m_hostVisibleMemoryTypeMask, allocInfo.memoryRequirements, resInst); + + if (!resInst.hRuntimeResource) + { + resInst.hRuntimeResource = allocInfo.hRuntimeResource; + } + else + { + RPS_ASSERT(allocInfo.hRuntimeResource == RPS_NULL_HANDLE); + } + } + + return RPS_OK; + } + + RpsResult VKRuntimeDevice::GetSubresourceRangeFromImageView(SubresourceRangePacked& outRange, + const ResourceInstance& resourceInfo, + const RpsAccessAttr& accessAttr, + const RpsImageView& imageView) + { + uint32_t aspectMask = GetFormatAspectMask(imageView.base.viewFormat, rpsVkGetImageCreationFormat(resourceInfo)); + outRange = SubresourceRangePacked(aspectMask, imageView.subresourceRange, resourceInfo.desc); + return RPS_OK; + } + + VkImageUsageFlags GetVkImageUsageFlags(const ResourceInstance& resInfo) + { + VkImageUsageFlags usage = 0; + if (rpsAnyBitsSet(resInfo.allAccesses.accessFlags, RPS_ACCESS_SHADER_RESOURCE_BIT)) + usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + + if (rpsAnyBitsSet(resInfo.allAccesses.accessFlags, RPS_ACCESS_RENDER_TARGET_BIT)) + usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + if (rpsAnyBitsSet(resInfo.allAccesses.accessFlags, RPS_ACCESS_UNORDERED_ACCESS_BIT)) + usage |= VK_IMAGE_USAGE_STORAGE_BIT; + + if (rpsAnyBitsSet(resInfo.allAccesses.accessFlags, RPS_ACCESS_DEPTH_STENCIL)) + usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + + // TODO: distinguish command clear and render pass clear + if (rpsAnyBitsSet(resInfo.allAccesses.accessFlags, + (RPS_ACCESS_COPY_DEST_BIT | RPS_ACCESS_RESOLVE_DEST_BIT | RPS_ACCESS_CLEAR_BIT))) + usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + + if (rpsAnyBitsSet(resInfo.allAccesses.accessFlags, (RPS_ACCESS_COPY_SRC_BIT | RPS_ACCESS_RESOLVE_SRC_BIT))) + usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + + return usage; + } + + static inline VkImageCreateFlags GetImageCreateFlags(const ResourceInstance& resInfo) + { + VkImageCreateFlags flags = 0; + + const bool isImage3D = (resInfo.desc.type == RPS_RESOURCE_TYPE_IMAGE_3D); + const bool isCubemap = !!(resInfo.desc.flags & RPS_RESOURCE_FLAG_CUBEMAP_COMPATIBLE_BIT); + + if (resInfo.desc.flags & RPS_RESOURCE_FLAG_CUBEMAP_COMPATIBLE_BIT) + flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + + // TODO: Add VkImageFormatListCreateInfo + if (resInfo.isMutableFormat) + flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + + if (isImage3D && rpsAnyBitsSet(resInfo.allAccesses.accessFlags, RPS_ACCESS_RENDER_TARGET_BIT)) + flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; + + if (isCubemap) + flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; + + return flags; + } + + void GetVkImageCreateInfo(VkImageCreateInfo& imgCI, const ResourceInstance& resInfo) + { + RPS_ASSERT(resInfo.desc.IsImage()); + + const bool isImage3D = (resInfo.desc.type == RPS_RESOURCE_TYPE_IMAGE_3D); + const bool isRowMajor = rpsAnyBitsSet(resInfo.desc.flags, RPS_RESOURCE_FLAG_ROWMAJOR_IMAGE_BIT); + + imgCI.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + imgCI.pNext = NULL; + imgCI.flags = GetImageCreateFlags(resInfo); + imgCI.imageType = rpsVkGetImageType(resInfo.desc.type); + imgCI.format = rpsFormatToVK(rpsVkGetImageCreationFormat(resInfo)); + imgCI.extent.width = resInfo.desc.image.width; + imgCI.extent.height = resInfo.desc.image.height; + imgCI.extent.depth = isImage3D ? resInfo.desc.image.depth : 1; + imgCI.mipLevels = resInfo.desc.image.mipLevels; + imgCI.arrayLayers = isImage3D ? 1 : resInfo.desc.image.arrayLayers; + imgCI.samples = rpsVkGetSampleCount(resInfo.desc.image.sampleCount); + imgCI.tiling = isRowMajor ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + imgCI.usage = GetVkImageUsageFlags(resInfo); + imgCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + imgCI.queueFamilyIndexCount = 0; + imgCI.pQueueFamilyIndices = nullptr; + imgCI.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + } + + void GetVkBufferCreateInfo(VkBufferCreateInfo& bufCI, const ResourceInstance& resInfo) + { + VkFlags vkUsageFlags = 0; + if (rpsAnyBitsSet(resInfo.allAccesses.accessFlags, RPS_ACCESS_COPY_SRC_BIT)) + vkUsageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + if (rpsAnyBitsSet(resInfo.allAccesses.accessFlags, RPS_ACCESS_COPY_DEST_BIT)) + vkUsageFlags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; + if (rpsAnyBitsSet(resInfo.allAccesses.accessFlags, RPS_ACCESS_CONSTANT_BUFFER_BIT)) + vkUsageFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + if (rpsAnyBitsSet(resInfo.allAccesses.accessFlags, + RPS_ACCESS_UNORDERED_ACCESS_BIT | RPS_ACCESS_SHADER_RESOURCE_BIT)) + vkUsageFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + if (rpsAnyBitsSet(resInfo.allAccesses.accessFlags, RPS_ACCESS_INDEX_BUFFER_BIT)) + vkUsageFlags |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + if (rpsAnyBitsSet(resInfo.allAccesses.accessFlags, RPS_ACCESS_VERTEX_BUFFER_BIT)) + vkUsageFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + if (rpsAnyBitsSet(resInfo.allAccesses.accessFlags, RPS_ACCESS_INDIRECT_ARGS_BIT)) + vkUsageFlags |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; + if (resInfo.bBufferFormattedWrite) + vkUsageFlags |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; + if (resInfo.bBufferFormattedRead) + vkUsageFlags |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; + + bufCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bufCI.pNext = nullptr; + bufCI.flags = 0; + bufCI.size = resInfo.desc.GetBufferSize(); + bufCI.usage = vkUsageFlags; + bufCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + bufCI.queueFamilyIndexCount = 0; + bufCI.pQueueFamilyIndices = nullptr; + } + + VkMemoryType VKRuntimeDevice::GetVkHeapTypeInfo(uint32_t memTypeIndex) const + { + return m_deviceMemoryProperties.memoryTypes[memTypeIndex]; + } + + RpsResult VKRuntimeDevice::GetResourceAllocInfo(const ResourceInstance& resInstance, + VKResourceAllocInfo& allocInfo) const + { + allocInfo = {}; + + // TODO: Check pending state + if (!resInstance.hRuntimeResource && (resInstance.allAccesses.accessFlags != RPS_ACCESS_UNKNOWN)) + { + if (resInstance.desc.IsImage()) + { + VkImageCreateInfo imgCI; + GetVkImageCreateInfo(imgCI, resInstance); + + VkImage hImage; + RPS_V_RETURN(VkResultToRps(vkCreateImage(m_device, &imgCI, nullptr, &hImage))); + + allocInfo.hRuntimeResource = ToHandle(hImage); + vkGetImageMemoryRequirements(m_device, hImage, &allocInfo.memoryRequirements); + } + else if (resInstance.desc.IsBuffer()) + { + VkBufferCreateInfo bufCI; + GetVkBufferCreateInfo(bufCI, resInstance); + + VkBuffer hBuffer; + RPS_V_RETURN(VkResultToRps(vkCreateBuffer(m_device, &bufCI, nullptr, &hBuffer))); + + allocInfo.hRuntimeResource = ToHandle(hBuffer); + vkGetBufferMemoryRequirements(m_device, hBuffer, &allocInfo.memoryRequirements); + } + } + + return RPS_OK; + } + + uint32_t VKRuntimeDevice::GetResourceAspectMask(const ResourceInstance& resInfo) const + { + const auto& resourceDesc = resInfo.desc; + const RpsFormat imgCreationFmt = rpsVkGetImageCreationFormat(resInfo); + + return resourceDesc.IsImage() ? GetFormatAspectMask(imgCreationFmt, RPS_FORMAT_UNKNOWN) : 1; + } + + uint32_t VKRuntimeDevice::GetSubresourceCount(const ResourceInstance& resInfo) const + { + const auto& resDesc = resInfo.desc; + + return resDesc.IsBuffer() + ? 1 + : (((resDesc.type == RPS_RESOURCE_TYPE_IMAGE_3D) ? 1 : resDesc.image.arrayLayers) * + resDesc.image.mipLevels * GetFormatPlaneCount(rpsVkGetImageCreationFormat(resInfo))); + } + + void VKBuiltInClearColorRegions(const RpsCmdCallbackContext* pContext); + void VKBuiltInClearColor(const RpsCmdCallbackContext* pContext); + void VKBuiltInClearDepthStencil(const RpsCmdCallbackContext* pContext); + void VKBuiltInClearDepthStencilRegions(const RpsCmdCallbackContext* pContext); + void VKBuiltInClearTextureUAV(const RpsCmdCallbackContext* pContext); + void VKBuiltInClearTextureUAVRegions(const RpsCmdCallbackContext* pContext); + void VKBuiltInClearBufferUAV(const RpsCmdCallbackContext* pContext); + void VKBuiltInCopyTexture(const RpsCmdCallbackContext* pContext); + void VKBuiltInCopyBuffer(const RpsCmdCallbackContext* pContext); + void VKBuiltInCopyTextureToBuffer(const RpsCmdCallbackContext* pContext); + void VKBuiltInCopyBufferToTexture(const RpsCmdCallbackContext* pContext); + void VKBuiltInResolve(const RpsCmdCallbackContext* pContext); + + ConstArrayRef VKRuntimeDevice::GetBuiltInNodes() const + { + static const BuiltInNodeInfo c_builtInNodes[] = { + {"clear_color", {&VKBuiltInClearColor, nullptr}}, + {"clear_color_regions", {&VKBuiltInClearColorRegions, nullptr}}, + {"clear_depth_stencil", {&VKBuiltInClearDepthStencil, nullptr}}, + {"clear_depth_stencil_regions", {&VKBuiltInClearDepthStencilRegions, nullptr}}, + {"clear_texture", {&VKBuiltInClearTextureUAV, nullptr}}, + {"clear_texture_regions", {&VKBuiltInClearTextureUAVRegions, nullptr}}, + {"clear_buffer", {&VKBuiltInClearBufferUAV, nullptr}}, + {"copy_texture", {&VKBuiltInCopyTexture, nullptr}}, + {"copy_buffer", {&VKBuiltInCopyBuffer, nullptr}}, + {"copy_texture_to_buffer", {&VKBuiltInCopyTextureToBuffer, nullptr}}, + {"copy_buffer_to_texture", {&VKBuiltInCopyBufferToTexture, nullptr}}, + {"resolve", {&VKBuiltInResolve, nullptr}}, + }; + + return c_builtInNodes; + } + + ConstArrayRef VKRuntimeDevice::GetMemoryTypeInfos() const + { + static constexpr RpsMemoryTypeInfo s_MemTypes[VK_MAX_MEMORY_TYPES] = {}; + return {s_MemTypes, m_deviceMemoryProperties.memoryTypeCount}; + } + + RpsResult VKRuntimeDevice::DescribeMemoryType(uint32_t memoryTypeIndex, PrinterRef printer) const + { + const VkMemoryType memoryTypeInfo = GetVkHeapTypeInfo(memoryTypeIndex); + + static const NameValuePair memoryPropertyFlagNames[] = { + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(VK_MEMORY_PROPERTY_, DEVICE_LOCAL_BIT), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(VK_MEMORY_PROPERTY_, HOST_VISIBLE_BIT), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(VK_MEMORY_PROPERTY_, HOST_COHERENT_BIT), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(VK_MEMORY_PROPERTY_, HOST_CACHED_BIT), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(VK_MEMORY_PROPERTY_, LAZILY_ALLOCATED_BIT), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(VK_MEMORY_PROPERTY_, PROTECTED_BIT), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(VK_MEMORY_PROPERTY_, DEVICE_COHERENT_BIT_AMD), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(VK_MEMORY_PROPERTY_, DEVICE_UNCACHED_BIT_AMD), + RPS_INIT_NAME_VALUE_PAIR_PREFIXED(VK_MEMORY_PROPERTY_, RDMA_CAPABLE_BIT_NV), + }; + + printer("MEMORY_PROPERTY_").PrintFlags(memoryTypeInfo.propertyFlags, memoryPropertyFlagNames, "_"); + printer("-VK_HEAP_INDEX_%d", memoryTypeInfo.heapIndex); + + return RPS_OK; + } + + bool VKRuntimeDevice::CalculateAccessTransition(const RpsAccessAttr& beforeAccess, + const RpsAccessAttr& afterAccess, + AccessTransitionInfo& results) const + { + const auto transferSrcAccess = RPS_ACCESS_COPY_SRC_BIT | RPS_ACCESS_RESOLVE_SRC_BIT; + + // Handle transfer src layout, when it can't be merged with other readonly accesses + if ((beforeAccess.accessFlags & transferSrcAccess) != (afterAccess.accessFlags & transferSrcAccess)) + { + results.bKeepOrdering = false; + results.bTransition = true; + results.bMergedAccessStates = false; + return true; + } + + // Handle transfer dst to transfer dst access, may need a barrier in between + const auto transferDstAccess = RPS_ACCESS_CLEAR_BIT | RPS_ACCESS_COPY_DEST_BIT | RPS_ACCESS_RESOLVE_DEST_BIT; + + if ((beforeAccess == afterAccess) && (beforeAccess.accessFlags & transferDstAccess)) + { + results.bKeepOrdering = true; + results.bTransition = true; + results.bMergedAccessStates = false; + return true; + } + + return false; + } + + RpsRuntimeResource ToHandle(VkImage hImage) + { + return RpsRuntimeResource{hImage}; // TODO: Handle !VK_USE_64_BIT_PTR_DEFINES + } + + RpsRuntimeResource ToHandle(VkBuffer hBuffer) + { + return RpsRuntimeResource{hBuffer}; + } + + void FromHandle(VkImage& hImage, RpsRuntimeResource hResource) + { + hImage = VkImage(hResource.ptr); + } + + void FromHandle(VkBuffer& hBuffer, RpsRuntimeResource hResource) + { + hBuffer = VkBuffer(hResource.ptr); + } + +} // namespace rps + +RpsResult rpsVKRuntimeDeviceCreate(const RpsVKRuntimeDeviceCreateInfo* pCreateInfo, RpsDevice* phDevice) +{ + RPS_CHECK_ARGS(pCreateInfo); + RPS_CHECK_ARGS(pCreateInfo->hVkDevice != VK_NULL_HANDLE); + RPS_CHECK_ARGS(pCreateInfo->hVkPhysicalDevice != VK_NULL_HANDLE); + + RpsResult result = + rps::RuntimeDevice::Create(phDevice, pCreateInfo->pDeviceCreateInfo, pCreateInfo); + + return result; +} diff --git a/src/runtime/vk/rps_vk_runtime_device.hpp b/src/runtime/vk/rps_vk_runtime_device.hpp new file mode 100644 index 0000000..6a6e7a2 --- /dev/null +++ b/src/runtime/vk/rps_vk_runtime_device.hpp @@ -0,0 +1,107 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_VK_RUNTIME_DEVICE_H +#define RPS_VK_RUNTIME_DEVICE_H + +#include "runtime/common/rps_runtime_device.hpp" + +#include "rps/runtime/vk/rps_vk_runtime.h" + +namespace rps +{ + class VKRuntimeDevice final : public RuntimeDevice + { + public: + VKRuntimeDevice(Device* pDevice, const RpsVKRuntimeDeviceCreateInfo* pCreateInfo); + virtual ~VKRuntimeDevice(); + + virtual RpsResult Init() override final; + virtual RpsResult BuildDefaultRenderGraphPhases(RenderGraph& renderGraph) override final; + virtual RpsResult InitializeSubresourceInfos(ArrayRef resInstances) override final; + virtual RpsResult InitializeResourceAllocInfos(ArrayRef resInstances) override final; + virtual RpsResult GetSubresourceRangeFromImageView(SubresourceRangePacked& outRange, + const ResourceInstance& resourceInfo, + const RpsAccessAttr& accessAttr, + const RpsImageView& imageView) override final; + virtual ConstArrayRef GetMemoryTypeInfos() const override final; + virtual RpsResult DescribeMemoryType(uint32_t memoryTypeIndex, PrinterRef printer) const override final; + + virtual bool CalculateAccessTransition(const RpsAccessAttr& beforeAccess, + const RpsAccessAttr& afterAccess, + AccessTransitionInfo& results) const override final; + + virtual ConstArrayRef GetBuiltInNodes() const override final; + + virtual RpsImageAspectUsageFlags GetImageAspectUsages(uint32_t aspectMask) const override final + { + return ((aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) ? RPS_IMAGE_ASPECT_COLOR : RPS_IMAGE_ASPECT_UNKNOWN) | + ((aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ? RPS_IMAGE_ASPECT_DEPTH : RPS_IMAGE_ASPECT_UNKNOWN) | + ((aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) ? RPS_IMAGE_ASPECT_STENCIL : RPS_IMAGE_ASPECT_UNKNOWN); + } + + public: + VkDevice GetVkDevice() const + { + return m_device; + } + + VkPhysicalDevice GetVkPhysicalDevice() const + { + return m_physicalDevice; + } + + const VkPhysicalDeviceProperties& GetPhysicalDeviceProperties() const + { + return m_deviceProperties; + } + + RpsVKRuntimeFlags GetRuntimeFlags() const + { + return m_flags; + } + + VkMemoryType GetVkHeapTypeInfo(uint32_t memoryTypeIndex) const; + + private: + struct VKResourceAllocInfo + { + VkMemoryRequirements memoryRequirements; + RpsRuntimeResource hRuntimeResource; + }; + RpsResult GetResourceAllocInfo(const ResourceInstance& resInstance, VKResourceAllocInfo& allocInfo) const; + uint32_t GetResourceAspectMask(const ResourceInstance& resInfo) const; + uint32_t GetImageViewAspectMask(const ResourceDescPacked& resDesc, const RpsImageView& imageView) const; + uint32_t GetSubresourceCount(const ResourceInstance& resInfo) const; + + private: + VkDevice m_device = VK_NULL_HANDLE; + VkPhysicalDevice m_physicalDevice = VK_NULL_HANDLE; + + RpsVKRuntimeFlags m_flags = RPS_VK_RUNTIME_FLAG_NONE; + + // TODO: Store what we care + VkPhysicalDeviceProperties m_deviceProperties = {}; + VkPhysicalDeviceMemoryProperties m_deviceMemoryProperties = {}; + uint32_t m_hostVisibleMemoryTypeMask = 0; + }; + + RpsRuntimeResource ToHandle(VkImage hImage); + RpsRuntimeResource ToHandle(VkBuffer hBuffer); + void FromHandle(VkImage& hImage, RpsRuntimeResource hResource); + void FromHandle(VkBuffer& hBuffer, RpsRuntimeResource hResource); + + template ::value || std::is_same::value, + RpsRuntimeResource>::type> + T FromHandle(RpsRuntimeResource hResource) + { + return T(hResource.ptr); + } +} + +#endif //RPS_VK_RUNTIME_DEVICE_H diff --git a/src/runtime/vk/rps_vk_util.hpp b/src/runtime/vk/rps_vk_util.hpp new file mode 100644 index 0000000..f1fcfb3 --- /dev/null +++ b/src/runtime/vk/rps_vk_util.hpp @@ -0,0 +1,226 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef RPS_VK_UTILS_H +#define RPS_VK_UTILS_H + +#include "rps/runtime/vk/rps_vk_runtime.h" + +#include "core/rps_util.hpp" + +namespace rps +{ + template + struct VkObjectTypeMapper + { + }; + +#define RPS_DECLARE_VK_OBJECT_TYPE_MAP(TypeName, VkObjectTypeEnum, RpsTypeIdValue) \ + template <> \ + struct VkObjectTypeMapper \ + { \ + static constexpr VkObjectType value = VkObjectTypeEnum; \ + static constexpr RpsTypeId typeId = RpsTypeIdValue; \ + }; + + // clang-format off + RPS_DECLARE_VK_OBJECT_TYPE_MAP(VkBuffer, VK_OBJECT_TYPE_BUFFER, RPS_TYPE_BUFFER_VIEW) + RPS_DECLARE_VK_OBJECT_TYPE_MAP(VkImage, VK_OBJECT_TYPE_IMAGE, RPS_TYPE_IMAGE_VIEW) + RPS_DECLARE_VK_OBJECT_TYPE_MAP(VkBufferView, VK_OBJECT_TYPE_BUFFER_VIEW, RPS_TYPE_BUFFER_VIEW) + RPS_DECLARE_VK_OBJECT_TYPE_MAP(VkImageView, VK_OBJECT_TYPE_IMAGE_VIEW, RPS_TYPE_IMAGE_VIEW) + // clang-format on + +#undef RPS_DECLARE_VK_OBJECT_TYPE_MAP + + static inline RpsResult VkResultToRps(VkResult vkResult) + { + switch (vkResult) + { + case VK_SUCCESS: + case VK_SUBOPTIMAL_KHR: + return RPS_OK; + case VK_ERROR_OUT_OF_HOST_MEMORY: + case VK_ERROR_OUT_OF_DEVICE_MEMORY: + case VK_ERROR_OUT_OF_POOL_MEMORY: + return RPS_ERROR_OUT_OF_MEMORY; + default: + break; + } + return RPS_ERROR_RUNTIME_API_ERROR; + } + + // Deduces the VkImageAspectFlags from a viewFormat and corresponding resource format for a view. + // Certain RpsFormats can be used to implicitly specify the subresource range. + // (such as RPS_FORMAT_R24_UNORM_X8_TYPELESS indicates viewing the depth plane only). + // This is different from rpsFormatHasDepth/Stencil which means if a resource with + // the given format has certain image aspect. + static inline VkImageAspectFlags GetFormatAspectMask(RpsFormat viewFormat, RpsFormat resourceFormat) + { + switch (viewFormat) + { + case RPS_FORMAT_D16_UNORM: + case RPS_FORMAT_D32_FLOAT: + case RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case RPS_FORMAT_R24_UNORM_X8_TYPELESS: + return VK_IMAGE_ASPECT_DEPTH_BIT; + case RPS_FORMAT_X24_TYPELESS_G8_UINT: + case RPS_FORMAT_X32_TYPELESS_G8X24_UINT: + return VK_IMAGE_ASPECT_STENCIL_BIT; + case RPS_FORMAT_D24_UNORM_S8_UINT: + case RPS_FORMAT_D32_FLOAT_S8X24_UINT: + case RPS_FORMAT_R24G8_TYPELESS: + case RPS_FORMAT_R32G8X24_TYPELESS: + return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + case RPS_FORMAT_R16_UNORM: + return (resourceFormat == RPS_FORMAT_D16_UNORM) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + case RPS_FORMAT_R32_FLOAT: + return (resourceFormat == RPS_FORMAT_D32_FLOAT) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + case RPS_FORMAT_UNKNOWN: + RPS_ASSERT(resourceFormat != RPS_FORMAT_UNKNOWN); + return (resourceFormat != RPS_FORMAT_UNKNOWN) ? GetFormatAspectMask(resourceFormat, resourceFormat) + : VK_IMAGE_ASPECT_COLOR_BIT; + default: + return VK_IMAGE_ASPECT_COLOR_BIT; + } + } + + static inline constexpr uint32_t GetFormatPlaneCount(RpsFormat format) + { + switch (format) + { + case RPS_FORMAT_D24_UNORM_S8_UINT: + case RPS_FORMAT_D32_FLOAT_S8X24_UINT: + return 2; + default: + return 1; + } + } + + static inline VkSampleCountFlagBits rpsVkGetSampleCount(uint32_t sampleCount) + { + return VkSampleCountFlagBits(sampleCount); + } + + static inline VkImageType rpsVkGetImageType(RpsResourceType rpsType) + { + switch (rpsType) + { + case RPS_RESOURCE_TYPE_IMAGE_2D: + return VK_IMAGE_TYPE_2D; + case RPS_RESOURCE_TYPE_IMAGE_3D: + return VK_IMAGE_TYPE_3D; + case RPS_RESOURCE_TYPE_IMAGE_1D: + return VK_IMAGE_TYPE_1D; + default: + break; + } + return VK_IMAGE_TYPE_MAX_ENUM; + } + + static inline RpsFormat rpsVkGetImageCreationFormat(const ResourceInstance& resInfo) + { + if (resInfo.allAccesses.accessFlags & RPS_ACCESS_DEPTH_STENCIL) + { + switch (resInfo.desc.image.format) + { + case RPS_FORMAT_R16_TYPELESS: + return RPS_FORMAT_D16_UNORM; + case RPS_FORMAT_R24G8_TYPELESS: + return RPS_FORMAT_D24_UNORM_S8_UINT; + case RPS_FORMAT_R32_TYPELESS: + return RPS_FORMAT_D32_FLOAT; + case RPS_FORMAT_R32G8X24_TYPELESS: + return RPS_FORMAT_D32_FLOAT_S8X24_UINT; + default: + break; + } + } + + return resInfo.desc.image.format; + } + + static inline RpsFormat rpsVkGetImageViewFormat(RpsFormat requestedViewFormat, const ResourceInstance& resInfo) + { + if (resInfo.allAccesses.accessFlags & RPS_ACCESS_DEPTH_STENCIL) + { + switch (requestedViewFormat) + { + case RPS_FORMAT_R16_TYPELESS: + case RPS_FORMAT_R16_UNORM: + return RPS_FORMAT_D16_UNORM; + case RPS_FORMAT_R24G8_TYPELESS: + case RPS_FORMAT_R24_UNORM_X8_TYPELESS: + return RPS_FORMAT_D24_UNORM_S8_UINT; + case RPS_FORMAT_R32_TYPELESS: + case RPS_FORMAT_R32_FLOAT: + return RPS_FORMAT_D32_FLOAT; + case RPS_FORMAT_R32G8X24_TYPELESS: + case RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS: + return RPS_FORMAT_D32_FLOAT_S8X24_UINT; + default: + break; + } + } + + return (requestedViewFormat != RPS_FORMAT_UNKNOWN) ? requestedViewFormat : resInfo.desc.image.format; + } + + static inline VkRect2D GetVkRect2D(const RpsRect& rect) + { + return VkRect2D{ + {rect.x, rect.y}, + {uint32_t(rect.width), uint32_t(rect.height)}, + }; + } + + static inline void GetVkSubresourceRange(VkImageSubresourceRange& vkRange, const SubresourceRangePacked& range) + { + vkRange.aspectMask = range.aspectMask; + vkRange.baseMipLevel = range.baseMipLevel; + vkRange.levelCount = range.GetMipLevelCount(); + vkRange.baseArrayLayer = range.baseArrayLayer; + vkRange.layerCount = range.GetArrayLayerCount(); + } + + template + VkComponentSwizzle GetVkComponentSwizzle(RpsResourceViewComponentMapping swizzleMapping) + { + switch (swizzleMapping) + { + case RPS_RESOURCE_VIEW_COMPONENT_MAPPING_R: + return (Target == VK_COMPONENT_SWIZZLE_R) ? VK_COMPONENT_SWIZZLE_IDENTITY : VK_COMPONENT_SWIZZLE_R; + case RPS_RESOURCE_VIEW_COMPONENT_MAPPING_G: + return (Target == VK_COMPONENT_SWIZZLE_G) ? VK_COMPONENT_SWIZZLE_IDENTITY : VK_COMPONENT_SWIZZLE_G; + case RPS_RESOURCE_VIEW_COMPONENT_MAPPING_B: + return (Target == VK_COMPONENT_SWIZZLE_B) ? VK_COMPONENT_SWIZZLE_IDENTITY : VK_COMPONENT_SWIZZLE_B; + case RPS_RESOURCE_VIEW_COMPONENT_MAPPING_A: + return (Target == VK_COMPONENT_SWIZZLE_A) ? VK_COMPONENT_SWIZZLE_IDENTITY : VK_COMPONENT_SWIZZLE_A; + case RPS_RESOURCE_VIEW_COMPONENT_MAPPING_ZERO: + return VK_COMPONENT_SWIZZLE_ZERO; + case RPS_RESOURCE_VIEW_COMPONENT_MAPPING_ONE: + return VK_COMPONENT_SWIZZLE_ONE; + default: + break; + } + return VK_COMPONENT_SWIZZLE_IDENTITY; + } + + static inline void GetVkComponentMapping(VkComponentMapping& vkCompMapping, uint32_t rpsMapping) + { + vkCompMapping.r = + GetVkComponentSwizzle(RPS_IMAGE_VIEW_GET_COMPONENT_MAPPING_CHANNEL_R(rpsMapping)); + vkCompMapping.g = + GetVkComponentSwizzle(RPS_IMAGE_VIEW_GET_COMPONENT_MAPPING_CHANNEL_G(rpsMapping)); + vkCompMapping.b = + GetVkComponentSwizzle(RPS_IMAGE_VIEW_GET_COMPONENT_MAPPING_CHANNEL_B(rpsMapping)); + vkCompMapping.a = + GetVkComponentSwizzle(RPS_IMAGE_VIEW_GET_COMPONENT_MAPPING_CHANNEL_A(rpsMapping)); + } + +} // namespace rps + +#endif //RPS_VK_UTILS_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..b19dab1 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,150 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# This file is part of the AMD Render Pipeline Shaders SDK which is +# released under the AMD INTERNAL EVALUATION LICENSE. +# +# See file LICENSE.RTF for full license details. + +option( RPSL_DLL "Build RPSL DLLs" ON ) +option( RpsEnableVulkanTests "Enable Vulkan Test" ON ) + +# Adding a test project +function( AddTestApp AppName TestSrcFile CompileFlags LinkFlags IncludeDirectories DependenciesString UseRpslDll AppFolder ) + + set( GeneratedSources "" ) + get_filename_component( TestSrcFileDir ${TestSrcFile} DIRECTORY ) + + set( RpslFileName ${TestSrcFileDir}/${AppName}.rpsl ) + string(REGEX REPLACE "_vk$" "" SharedName ${AppName}) + string(REGEX REPLACE "_d3d12$" "" SharedName ${SharedName}) + string(REGEX REPLACE "_d3d11$" "" SharedName ${SharedName}) + if ( NOT EXISTS ${RpslFileName} ) + set( RpslFileName ${TestSrcFileDir}/${SharedName}.rpsl ) + endif() + if ( EXISTS ${RpslFileName} ) + if ( UseRpslDll ) + set( RpslcTargetName ${AppName}_dyn ) + else() + set( RpslcTargetName ${AppName} ) + endif() + + CompileRpslDxc( ${RpslcTargetName} ${RpslFileName} "${GeneratedSources}" "Generated") + source_group( TREE "${CMAKE_CURRENT_BINARY_DIR}/${RpslcTargetName}/Generated/" PREFIX "rps_generated" FILES ${GeneratedSources} ) + + if ( UseRpslDll ) + set( RpslDllTarget ${SharedName}_rpsl_dll ) + if ( NOT TARGET ${RpslDllTarget} ) + add_library( ${RpslDllTarget} SHARED ${RpslFileName} ${GeneratedSources} ${PROJECT_SOURCE_DIR}/src/runtime/common/rps_rpsl_host_dll.c ) + target_compile_definitions( ${RpslDllTarget} PRIVATE RPS_SHADER_GUEST=1 ) + set_target_properties( ${RpslDllTarget} PROPERTIES + OUTPUT_NAME ${SharedName}_rpsl + FOLDER "${AppFolder}/rpsl_dlls" ) + endif() + else() + set( TestSrcFile ${TestSrcFile} ${RpslFileName} ) + endif() + elseif( UseRpslDll ) + return() + endif() + set( SharedHeaderName ${TestSrcFileDir}/${SharedName}_shared.h) + if ( EXISTS ${SharedHeaderName} ) + set( TestSrcFile ${TestSrcFile} ${SharedHeaderName} ) + endif() + if ( EXISTS "${TestSrcFileDir}/${AppName}_c.c" ) + set( TestSrcFile ${TestSrcFile} ${TestSrcFileDir}/${AppName}_c.c ) + endif( ) + + if ( UseRpslDll ) + set( AppName ${AppName}_dyn ) + add_executable( ${AppName} WIN32 ${TestSrcFile} ) + target_compile_definitions(${AppName} PRIVATE USE_RPSL_DLL=1) + set( DependenciesString "${DependenciesString};${RpslDllTarget}" ) + else() + add_executable( ${AppName} WIN32 ${TestSrcFile} ${GeneratedSources} ) + endif() + + CopyDXC( ${AppName} ) + + if (${AppName} MATCHES "_d3d12$|_d3d12_dyn$") + CopyDX12AgilitySDKBinaries( ${AppName} ) + endif() + + target_include_directories( ${AppName} PRIVATE + ${PROJECT_SOURCE_DIR}/include/ + ${PROJECT_SOURCE_DIR}/src/ + ${PROJECT_SOURCE_DIR}/tools/ + ${CMAKE_CURRENT_SOURCE_DIR}/../../external/catch2/single_include/ + ${CMAKE_CURRENT_SOURCE_DIR}/../../tests/ + ${SrcFolder}/ + ${SrcInclude} + ${IncludeDirectories} + ) + set( DependencyList ${DependenciesString} ) + target_link_libraries( ${AppName} ${DependencyList} ) + + if ( ${CompileFlags} MATCHES "CONSOLE" ) + set( LinkFlags /SUBSYSTEM:CONSOLE ) + endif( ) + + set_target_properties( ${AppName} PROPERTIES + COMPILE_FLAGS "${CompileFlags}" + FOLDER "${AppFolder}" + LINK_FLAGS "${LinkFlags}" + VS_DPI_AWARE "PerMonitor") + + # Convert test executable absolute paths to relative + get_target_property(AppOutDir ${AppName} RUNTIME_OUTPUT_DIRECTORY) + set(AppAbsoluteExecPath ${AppOutDir}/$) + file(RELATIVE_PATH TestExecPath ${CMAKE_CURRENT_BINARY_DIR} ${AppAbsoluteExecPath}) + set(TestExecPath "./${TestExecPath}") + + if ( NOT ( ${AppName} MATCHES "_vk$|_vk_dyn$" AND NOT ${RpsEnableVulkanTests}) ) + add_test( NAME ${AppName} COMMAND ${TestExecPath} --reporter junit --out test_report_${AppName}.xml ) + endif() + +endfunction() + +function ( SetupTestApp TestSrc AppFolder AdditionalIncludeDirectiories AdditionalDependecies UseRpslDll) + get_filename_component( TargetName ${TestSrc} NAME_WE ) + set(RpsApiRuntime "") + foreach(ExcludeEntry IN LISTS ExcludeTests) + if (TargetName MATCHES ${ExcludeEntry}) + set(TargetName "") + endif() + endforeach() + + if (TargetName MATCHES "_d3d12$") + set(RpsApiRuntime "rps_runtime_d3d12;") + elseif (TargetName MATCHES "_d3d11$") + set(RpsApiRuntime "rps_runtime_d3d11;") + elseif (TargetName MATCHES "_vk$") + if (${Vulkan_FOUND}) + set(RpsApiRuntime "rps_runtime_vk;${Vulkan_LIBRARY}") + set(VulkanIncDir ${Vulkan_INCLUDE_DIRS}) + else() + set(TargetName "") + endif() + endif() + set( CompileFlags "" ) + if (MSVC) + set( CompileFlags "${CompileFlags} /DCONSOLE" ) + endif() + if (NOT ${TargetName} STREQUAL "") + + set( Dependencies "rps_core;rps_frontend;rps_runtime;${RpsApiRuntime};${AdditionalDependecies}") + set( IncludeDirectories "${AdditionalIncludeDirectiories}") + if (${Vulkan_FOUND}) + set( IncludeDirectories "${VulkanIncDir};${IncludeDirectories}") + endif() + AddTestApp( ${TargetName} ${TestSrc} "${CompileFlags}" "" "${IncludeDirectories}" "${Dependencies}" False ${AppFolder} ) + if ( ${RPSL_DLL} AND UseRpslDll AND WIN32 ) + AddTestApp( ${TargetName} ${TestSrc} "${CompileFlags}" "" "${IncludeDirectories}" "${Dependencies}" True ${AppFolder} ) + endif() + endif() +endfunction( ) + +BuildFolderProperty( "tests" AppFolder ) + +add_subdirectory(utils) +add_subdirectory(console) +add_subdirectory(gui) diff --git a/tests/console/CMakeLists.txt b/tests/console/CMakeLists.txt new file mode 100644 index 0000000..8aa2276 --- /dev/null +++ b/tests/console/CMakeLists.txt @@ -0,0 +1,26 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# This file is part of the AMD Render Pipeline Shaders SDK which is +# released under the AMD INTERNAL EVALUATION LICENSE. +# +# See file LICENSE.RTF for full license details. + +set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE} ) + +file( GLOB_RECURSE ConsoleTestSrcs + "${CMAKE_CURRENT_SOURCE_DIR}/test_*.cpp" ) + +message( STATUS "Found Tests: ${ConsoleTestSrcs}" ) + +if ( NOT RpsJITSupported ) + list(APPEND ExcludeTests "test_rpsl_jit") +endif() + +foreach( TestSrc ${ConsoleTestSrcs} ) + SetupTestApp( ${TestSrc} "${AppFolder}/console" "" "" True) +endforeach( ) + +# Per app settings +if ( RpsJITSupported ) + CopyJITCompiler(test_rpsl_jit) +endif() diff --git a/tests/console/test_builder.cpp b/tests/console/test_builder.cpp new file mode 100644 index 0000000..0bf669a --- /dev/null +++ b/tests/console/test_builder.cpp @@ -0,0 +1,213 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define CATCH_CONFIG_MAIN + +#include "rps/rps.h" + +#include "utils/rps_test_common.h" + +extern "C" { + +typedef struct PrivateUpdateInfo +{ + uint32_t width; + uint32_t height; + RpsBool bUseOffscreenRT; + RpsBool bUseMSAA; +} PrivateUpdateInfo; + +RpsResult buildRenderToTexture(RpsRenderGraphBuilder hBuilder, const RpsConstant* ppArgs, uint32_t numArgs); + +} // extern "C" + +RpsResult buildRenderToTextureCpp(RpsRenderGraphBuilder hBuilder, const RpsConstant* ppArgs, uint32_t numArgs) +{ + using namespace rps; + + REQUIRE(numArgs == 2); + + const RpsResourceDesc* pBackBufferDesc = static_cast(ppArgs[0]); + const PrivateUpdateInfo* pPrivateUpdateInfo = static_cast(ppArgs[1]); + + RpsNodeDeclId drawTriangle = rpsRenderGraphDeclareDynamicNode( + hBuilder, + "Triangle", + RPS_NODE_DECL_FLAG_NONE, + {ParameterDesc::Make(AccessAttr(RPS_ACCESS_RENDER_TARGET_BIT), "renderTarget"), + ParameterDesc::Make(SemanticAttr(RPS_SEMANTIC_COLOR_CLEAR_VALUE), "clearValue"), + ParameterDesc::Make("bMSAA"), + ParameterDesc::Make( + AccessAttr(RPS_ACCESS_RESOLVE_DEST_BIT), "resolveTarget", RPS_PARAMETER_FLAG_OPTIONAL_BIT)}); + + RpsNodeDeclId bltQuad = rpsRenderGraphDeclareDynamicNode( + hBuilder, + "Quad", + RPS_NODE_DECL_FLAG_NONE, + {ParameterDesc::Make(SemanticAttr(RPS_SEMANTIC_RENDER_TARGET), "backBuffer"), + ParameterDesc::Make(AccessAttr(RPS_ACCESS_SHADER_RESOURCE_BIT, RPS_SHADER_STAGE_PS), "offScreen")}); + + struct RTTVariables + { + ResourceDesc offscreenRTDesc; + ResourceDesc msaaRTDesc; + ImageView backBufferView; + ImageView offscreenRTView; + ImageView msaaRTView; + RpsClearValue clearValue; + bool bUseMSAA; + }; + + enum NodeIdentifiers + { + NODE_ID_TRIANGLE, + NODE_ID_MSAA_TRIANGLE, + NODE_BLT, + }; + + enum ResourceIdentifiers + { + RESOURCE_ID_OFFSCREEN_RT, + RESOURCE_ID_OFFSCREEN_MSAA_RT, + }; + + RTTVariables* rttVars = rpsRenderGraphAllocateData(hBuilder); + REQUIRE(rttVars); + + const auto backBufferResId = rpsRenderGraphGetParamResourceId(hBuilder, 0); + REQUIRE(backBufferResId != RPS_RESOURCE_ID_INVALID); + + rttVars->offscreenRTDesc = ResourceDesc( + RPS_RESOURCE_TYPE_IMAGE_2D, RPS_FORMAT_R8G8B8A8_UNORM, pPrivateUpdateInfo->width, pPrivateUpdateInfo->height); + rttVars->backBufferView = ImageView{backBufferResId}; + rttVars->clearValue = RpsClearValue{{{0.2f, 0.2f, 0.8f, 1.0f}}}; + rttVars->bUseMSAA = pPrivateUpdateInfo->bUseMSAA; + + if (pPrivateUpdateInfo->bUseOffscreenRT) + { + RpsResourceId offscreenRTResId = + rpsRenderGraphDeclareResource(hBuilder, "OffscreenRT", RESOURCE_ID_OFFSCREEN_RT, &rttVars->offscreenRTDesc); + rttVars->offscreenRTView = ImageView{offscreenRTResId}; + + if (pPrivateUpdateInfo->bUseMSAA) + { + rttVars->msaaRTDesc = rttVars->offscreenRTDesc; + rttVars->msaaRTDesc.image.sampleCount = 4; + + RpsResourceId offscreenRTMsaaResId = rpsRenderGraphDeclareResource( + hBuilder, "OffscreenRTMsaa", RESOURCE_ID_OFFSCREEN_MSAA_RT, &rttVars->msaaRTDesc); + + rttVars->msaaRTView = ImageView{offscreenRTMsaaResId}; + + rpsRenderGraphAddNode( + hBuilder, + drawTriangle, + NODE_ID_MSAA_TRIANGLE, + nullptr, + nullptr, + {&rttVars->msaaRTView, &rttVars->clearValue, &rttVars->bUseMSAA, &rttVars->offscreenRTView}); + } + else + { + rpsRenderGraphAddNode(hBuilder, + drawTriangle, + NODE_ID_TRIANGLE, + nullptr, + nullptr, + {&rttVars->offscreenRTView, &rttVars->clearValue, &rttVars->bUseMSAA, nullptr}); + } + + rpsRenderGraphAddNode( + hBuilder, bltQuad, NODE_BLT, nullptr, nullptr, {&rttVars->offscreenRTView, &rttVars->backBufferView}); + } + else + { + rpsRenderGraphAddNode(hBuilder, + drawTriangle, + pPrivateUpdateInfo->bUseMSAA ? NODE_ID_MSAA_TRIANGLE : NODE_ID_TRIANGLE, + nullptr, + nullptr, + {&rttVars->backBufferView, &rttVars->clearValue, &rttVars->bUseMSAA, nullptr}); + } + + return RPS_OK; +} + +TEST_CASE("BuildGraphUsingCApiCommon") +{ + RpsDevice device = rpsTestUtilCreateDevice([](auto pCreateInfo, auto phDevice) { + RpsNullRuntimeDeviceCreateInfo nullDeviceCreateInfo = {}; + nullDeviceCreateInfo.pDeviceCreateInfo = pCreateInfo; + return rpsNullRuntimeDeviceCreate(&nullDeviceCreateInfo, phDevice); + }); + + // Graph params: + RpsParameterDesc graphParams[2] = {}; + graphParams[0].typeInfo = rpsTypeInfoInitFromType(RpsResourceDesc); + graphParams[0].flags = RPS_PARAMETER_FLAG_RESOURCE_BIT; + graphParams[0].name = "backBuffer"; + graphParams[1].typeInfo = rpsTypeInfoInitFromType(void*); + graphParams[1].name = "pUserContext"; + + RpsRenderGraphSignatureDesc entryInfo = {0}; + entryInfo.name = "RenderToTexture_C"; + entryInfo.numParams = RPS_TEST_COUNTOF(graphParams); + entryInfo.pParamDescs = graphParams; + + RpsRenderGraphCreateInfo renderGraphCreateInfo = {}; + renderGraphCreateInfo.mainEntryCreateInfo.pSignatureDesc = &entryInfo; + + RpsRenderGraph hRenderGraph = RPS_NULL_HANDLE; + REQUIRE_RPS_OK(rpsRenderGraphCreate(device, &renderGraphCreateInfo, &hRenderGraph)); + + struct Resolution + { + uint32_t w; + uint32_t h; + }; + + Resolution resolutions[] = {{1280, 720}, {3840, 2160}}; + + RpsRenderGraphUpdateInfo renderGraphUpdateInfo = {}; + renderGraphUpdateInfo.gpuCompletedFrameIndex = RPS_GPU_COMPLETED_FRAME_INDEX_NONE; + renderGraphUpdateInfo.diagnosticFlags |= RPS_DIAGNOSTIC_ENABLE_ALL; + + RpsResourceDesc backBufferResDesc = {}; + backBufferResDesc.type = RPS_RESOURCE_TYPE_IMAGE_2D; + backBufferResDesc.temporalLayers = 1; + backBufferResDesc.image.arrayLayers = 1; + backBufferResDesc.image.format = RPS_FORMAT_R8G8B8A8_UNORM; + backBufferResDesc.image.mipLevels = 1; + backBufferResDesc.image.sampleCount = 1; + + for (auto res = std::cbegin(resolutions); res != std::cend(resolutions); ++res) + { + backBufferResDesc.image.width = res->w; + backBufferResDesc.image.height = res->h; + + for (int32_t iUseOffscreenRT = 0; iUseOffscreenRT < 2; iUseOffscreenRT++) + { + for (int32_t iUseMSAA = 0; iUseMSAA < 2; iUseMSAA++) + { + PrivateUpdateInfo privateUpdateInfo = {res->w, res->h, iUseOffscreenRT, iUseMSAA}; + RpsConstant args[] = {&backBufferResDesc, &privateUpdateInfo}; + renderGraphUpdateInfo.numArgs = RPS_TEST_COUNTOF(args); + renderGraphUpdateInfo.ppArgs = args; + + renderGraphUpdateInfo.pfnBuildCallback = &buildRenderToTexture; + REQUIRE_RPS_OK(rpsRenderGraphUpdate(hRenderGraph, &renderGraphUpdateInfo)); + + renderGraphUpdateInfo.pfnBuildCallback = &buildRenderToTextureCpp; + REQUIRE_RPS_OK(rpsRenderGraphUpdate(hRenderGraph, &renderGraphUpdateInfo)); + } + } + } + + rpsRenderGraphDestroy(hRenderGraph); + + rpsTestUtilDestroyDevice(device); +} diff --git a/tests/console/test_builder_c.c b/tests/console/test_builder_c.c new file mode 100644 index 0000000..e4500a9 --- /dev/null +++ b/tests/console/test_builder_c.c @@ -0,0 +1,157 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "utils/rps_test_common.h" + +RpsResult buildRenderToTexture(RpsRenderGraphBuilder cmdBuf, const RpsConstant* ppArgs, uint32_t numArgs) +{ + typedef struct PrivateUpdateInfo + { + uint32_t width; + uint32_t height; + RpsBool bUseOffscreenRT; + RpsBool bUseMSAA; + } PrivateUpdateInfo; + + REQUIRE(numArgs == 2); + + const PrivateUpdateInfo* pUpdateInfo = (const PrivateUpdateInfo*)ppArgs[1]; + + RpsParameterDesc pTriangleNodeParamDescs[4]; + memset(&pTriangleNodeParamDescs, 0, sizeof(pTriangleNodeParamDescs)); + + RpsParamAttr rtvAccessAttr, clearColorAttr, resolveDstAttr, psSrvAttr; + rpsInitParamAttrAccess(&rtvAccessAttr, RPS_ACCESS_RENDER_TARGET_BIT, RPS_SHADER_STAGE_NONE); + + pTriangleNodeParamDescs[0].flags = RPS_PARAMETER_FLAG_RESOURCE_BIT; + pTriangleNodeParamDescs[0].name = "renderTarget"; + pTriangleNodeParamDescs[0].attr = &rtvAccessAttr; + pTriangleNodeParamDescs[0].typeInfo = rpsTypeInfoInitFromTypeAndID(RpsImageView, RPS_TYPE_IMAGE_VIEW); + + pTriangleNodeParamDescs[1].name = "clearValue"; + pTriangleNodeParamDescs[1].attr = rpsInitParamAttrSemantic(&clearColorAttr, RPS_SEMANTIC_COLOR_CLEAR_VALUE, 0); + pTriangleNodeParamDescs[1].typeInfo = rpsTypeInfoInitFromType(RpsClearValue); + + pTriangleNodeParamDescs[2].name = "bEnableMSAA"; + pTriangleNodeParamDescs[2].typeInfo = rpsTypeInfoInitFromType(RpsBool); + + if (pUpdateInfo->bUseMSAA) + { + pTriangleNodeParamDescs[3].flags = RPS_PARAMETER_FLAG_RESOURCE_BIT; + pTriangleNodeParamDescs[3].name = "resolveTarget"; + pTriangleNodeParamDescs[3].attr = + rpsInitParamAttrAccess(&resolveDstAttr, RPS_ACCESS_RESOLVE_DEST_BIT, RPS_SHADER_STAGE_NONE); + pTriangleNodeParamDescs[3].typeInfo = rpsTypeInfoInitFromTypeAndID(RpsImageView, RPS_TYPE_IMAGE_VIEW); + } + + RpsParameterDesc pQuadNodeParamDescs[2]; + memset(&pQuadNodeParamDescs, 0, sizeof(pQuadNodeParamDescs)); + + pQuadNodeParamDescs[0].flags = RPS_PARAMETER_FLAG_RESOURCE_BIT; + pQuadNodeParamDescs[0].name = "target"; + pQuadNodeParamDescs[0].attr = &rtvAccessAttr; + pQuadNodeParamDescs[1].flags = RPS_PARAMETER_FLAG_RESOURCE_BIT; + pQuadNodeParamDescs[1].name = "source"; + pQuadNodeParamDescs[1].attr = + rpsInitParamAttrAccess(&psSrvAttr, RPS_ACCESS_SHADER_RESOURCE_BIT, RPS_SHADER_STAGE_PS); + + RpsNodeDesc* pTriangleNodeDesc = rpsRenderGraphAllocateDataOfTypeZeroed(cmdBuf, RpsNodeDesc); + pTriangleNodeDesc->name = "Triangle"; + pTriangleNodeDesc->numParams = pUpdateInfo->bUseMSAA ? 4 : 3; + pTriangleNodeDesc->pParamDescs = pTriangleNodeParamDescs; + + const RpsNodeDeclId triangleNodeId = rpsRenderGraphDeclareDynamicNode(cmdBuf, pTriangleNodeDesc); + REQUIRE(RPS_NODEDECL_ID_INVALID != triangleNodeId); + + RpsNodeDesc* pQuadNodeDesc = rpsRenderGraphAllocateDataOfTypeZeroed(cmdBuf, RpsNodeDesc); + pQuadNodeDesc->name = "Quad"; + pQuadNodeDesc->numParams = 2; + pQuadNodeDesc->pParamDescs = pQuadNodeParamDescs; + + const RpsNodeDeclId quadNodeId = rpsRenderGraphDeclareDynamicNode(cmdBuf, pQuadNodeDesc); + REQUIRE(RPS_NODEDECL_ID_INVALID != quadNodeId); + + RpsImageView* pBackBufferView = rpsRenderGraphAllocateDataOfTypeZeroed(cmdBuf, RpsImageView); + pBackBufferView->base.resourceId = rpsRenderGraphGetParamResourceId(cmdBuf, 0); + REQUIRE(pBackBufferView->base.resourceId != RPS_RESOURCE_ID_INVALID); + pBackBufferView->subresourceRange.arrayLayers = 1; + pBackBufferView->subresourceRange.mipLevels = 1; + + RpsClearValue* pClearValue = rpsRenderGraphAllocateDataOfType(cmdBuf, RpsClearValue); + *pClearValue = (RpsClearValue){{{0.2f, 0.2f, 0.8f, 1.0f}}}; + + enum + { + NODE_ID_TRIANGLE, + NODE_ID_MSAA_TRIANGLE, + NODE_BLT, + }; + + enum + { + RESOURCE_ID_OFFSCREEN_RT, + RESOURCE_ID_OFFSCREEN_MSAA_RT, + }; + + if (pUpdateInfo->bUseOffscreenRT) + { + RpsResourceDesc* pTriangleRTDesc = rpsRenderGraphAllocateDataOfTypeZeroed(cmdBuf, RpsResourceDesc); + pTriangleRTDesc->type = RPS_RESOURCE_TYPE_IMAGE_2D; + pTriangleRTDesc->image.width = pUpdateInfo->width; + pTriangleRTDesc->image.height = pUpdateInfo->height; + pTriangleRTDesc->image.arrayLayers = 1; + pTriangleRTDesc->image.mipLevels = 1; + pTriangleRTDesc->image.format = RPS_FORMAT_R8G8B8A8_UNORM; + pTriangleRTDesc->image.sampleCount = 1; + + RpsResourceId offscreenRTResId = + rpsRenderGraphDeclareResource(cmdBuf, "OffscreenRT", RESOURCE_ID_OFFSCREEN_RT, pTriangleRTDesc); + RpsImageView* pTriangleRTView = + rpsRenderGraphAllocateDataOfTypeAndCopyFrom(cmdBuf, RpsImageView, pBackBufferView); + pTriangleRTView->base.resourceId = offscreenRTResId; + + RpsBool* pUseMsaa = rpsRenderGraphAllocateDataOfTypeAndCopyFrom(cmdBuf, RpsBool, &pUpdateInfo->bUseMSAA); + + if (pUpdateInfo->bUseMSAA) + { + RpsResourceDesc* pTriangleRTMsaaDesc = + rpsRenderGraphAllocateDataOfTypeAndCopyFrom(cmdBuf, RpsResourceDesc, pTriangleRTDesc); + pTriangleRTMsaaDesc->image.sampleCount = 4; + + RpsResourceId offscreenRTMsaaResId = rpsRenderGraphDeclareResource( + cmdBuf, "OffscreenRTMsaa", RESOURCE_ID_OFFSCREEN_MSAA_RT, pTriangleRTMsaaDesc); + + RpsImageView* pTriangleMsaaRTView = + rpsRenderGraphAllocateDataOfTypeAndCopyFrom(cmdBuf, RpsImageView, pTriangleRTView); + pTriangleMsaaRTView->base.resourceId = offscreenRTMsaaResId; + + RpsVariable ppTriArgs[] = {pTriangleMsaaRTView, pClearValue, pUseMsaa, pTriangleRTView}; + rpsRenderGraphAddNode( + cmdBuf, triangleNodeId, NODE_ID_MSAA_TRIANGLE, NULL, NULL, ppTriArgs, RPS_TEST_COUNTOF(ppTriArgs)); + } + else + { + RpsVariable ppTriArgs[] = {pTriangleRTView, pClearValue, pUseMsaa}; + rpsRenderGraphAddNode( + cmdBuf, triangleNodeId, NODE_ID_TRIANGLE, NULL, NULL, ppTriArgs, RPS_TEST_COUNTOF(ppTriArgs)); + } + + RpsVariable ppQuadArgs[] = {pTriangleRTView, pBackBufferView}; + rpsRenderGraphAddNode(cmdBuf, quadNodeId, NODE_BLT, NULL, NULL, ppQuadArgs, RPS_TEST_COUNTOF(ppQuadArgs)); + } + else + { + RpsBool bUseMSAA = RPS_FALSE; + RpsBool* pUseMsaa = rpsRenderGraphAllocateDataOfTypeAndCopyFrom(cmdBuf, RpsBool, &bUseMSAA); + + RpsVariable ppTriArgs[] = {pBackBufferView, pClearValue, pUseMsaa, NULL}; + rpsRenderGraphAddNode( + cmdBuf, triangleNodeId, NODE_ID_TRIANGLE, NULL, NULL, ppTriArgs, pTriangleNodeDesc->numParams); + } + + return RPS_OK; +} diff --git a/tests/console/test_cmd_compiler.cpp b/tests/console/test_cmd_compiler.cpp new file mode 100644 index 0000000..645342c --- /dev/null +++ b/tests/console/test_cmd_compiler.cpp @@ -0,0 +1,402 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define CATCH_CONFIG_MAIN + +#include "rps/rps.h" + +#include "utils/rps_test_common.h" + +using namespace rps; + +class MiniRenderer +{ +public: + struct RenderOptions + { + bool EnableShadowMap; + bool ReverseZ; + bool EnableZPrePass; + bool EnableDeferred; + bool EnableTransparency; + bool EnablePostProcess; + RpsFormat DepthFormat; + RpsFormat ShadowMapFormat; + uint32_t ShadowMapSize; + }; + + MiniRenderer() + { + std::fill(std::begin(m_resourceIds), std::end(m_resourceIds), RPS_RESOURCE_ID_INVALID); + } + + static RpsResult BuildRenderGraphCb(RpsRenderGraphBuilder cmdBuf, const RpsConstant* ppArgs, uint32_t numArgs) + { + MiniRenderer* pThis = *static_cast(ppArgs[0]); + const ResourceDesc& outputBufferDesc = *static_cast(ppArgs[1]); + const RenderOptions& options = *static_cast(ppArgs[2]); + + pThis->Render(cmdBuf, outputBufferDesc, options); + + return RPS_OK; + } + + void Render(RpsRenderGraphBuilder cmdBuf, const ResourceDesc& outputBufferDesc, const RenderOptions& options) + { + RenderGraphBuilderRef builder(cmdBuf); + + const RpsResourceId outputResId = builder.GetParamResourceId(1); + REQUIRE(outputResId != RPS_RESOURCE_ID_INVALID); + + m_resourceDescs[RES_ID_OUTPUT_BUFFER] = outputBufferDesc; + m_resourceViews[RES_ID_OUTPUT_BUFFER] = ImageView{outputResId}; + + builder.SetParamVariable(1, m_resourceDescs[RES_ID_OUTPUT_BUFFER]); + + RpsFormat resourceFormats[] = { + RPS_FORMAT_UNKNOWN, + options.ShadowMapFormat, + options.DepthFormat, + RPS_FORMAT_R8G8B8A8_UNORM, + RPS_FORMAT_R11G11B10_FLOAT, + RPS_FORMAT_R16G16B16A16_FLOAT, + }; + + m_resourceDescs[RES_ID_SHADOW_MAP] = ResourceDesc{RPS_RESOURCE_TYPE_IMAGE_2D, + resourceFormats[RES_ID_SHADOW_MAP], + options.ShadowMapSize, + options.ShadowMapSize}; + + for (uint32_t i = RES_ID_DEPTH_BUFFER; i < NUM_RESOURCES; i++) + { + m_resourceDescs[i] = ResourceDesc{RPS_RESOURCE_TYPE_IMAGE_2D, + resourceFormats[i], + outputBufferDesc.image.width, + outputBufferDesc.image.height}; + } + + REQUIRE_RPS_OK(builder.DeclareResource(RES_ID_DEPTH_BUFFER, + &m_resourceDescs[RES_ID_DEPTH_BUFFER], + "DepthBuffer", + &m_resourceIds[RES_ID_DEPTH_BUFFER])); + + m_resourceViews[RES_ID_DEPTH_BUFFER] = ImageView{m_resourceIds[RES_ID_DEPTH_BUFFER]}; + + if (options.EnableZPrePass) + { + builder.AddNode( + this, + &MiniRenderer::RenderZPrePass, + NODE_ID_ZPREPASS, + "ZPrePass", + builder.MakeNodeArg(m_resourceViews[RES_ID_DEPTH_BUFFER], AccessAttr(RPS_ACCESS_DEPTH_STENCIL_WRITE))); + } + + if (options.EnableShadowMap) + { + REQUIRE_RPS_OK(builder.DeclareResource(RES_ID_SHADOW_MAP, + &m_resourceDescs[RES_ID_SHADOW_MAP], + "ShadowMap", + &m_resourceIds[RES_ID_SHADOW_MAP])); + + m_resourceViews[RES_ID_SHADOW_MAP] = ImageView(m_resourceIds[RES_ID_SHADOW_MAP]); + + builder.AddNode( + this, + &MiniRenderer::RenderShadowMap, + NODE_ID_SHADOW_MAP, + "ShadowMap", + builder.MakeNodeArg(m_resourceViews[RES_ID_SHADOW_MAP], AccessAttr(RPS_ACCESS_DEPTH_STENCIL_WRITE))); + } + else + { + m_resourceViews[RES_ID_SHADOW_MAP] = ImageView(RPS_INDEX_NONE_U32); + } + + m_resourceDescs[RES_ID_LIGHT_BUFFER].image.format = + options.EnablePostProcess + ? (options.EnableTransparency ? RPS_FORMAT_R16G16B16A16_FLOAT : RPS_FORMAT_R11G11B10_FLOAT) + : outputBufferDesc.image.format; + + REQUIRE_RPS_OK(builder.DeclareResource(RES_ID_LIGHT_BUFFER, + &m_resourceDescs[RES_ID_LIGHT_BUFFER], + "LightBuffer", + &m_resourceIds[RES_ID_LIGHT_BUFFER])); + m_resourceViews[RES_ID_LIGHT_BUFFER] = ImageView{m_resourceIds[RES_ID_LIGHT_BUFFER]}; + + if (options.EnableDeferred) + { + REQUIRE_RPS_OK(builder.DeclareResource(RES_ID_GBUFFER_MATERIAL, + &m_resourceDescs[RES_ID_GBUFFER_MATERIAL], + "MaterialBuffer", + &m_resourceIds[RES_ID_GBUFFER_MATERIAL])); + + REQUIRE_RPS_OK(builder.DeclareResource(RES_ID_GBUFFER_NORMAL, + &m_resourceDescs[RES_ID_GBUFFER_NORMAL], + "NormalBuffer", + &m_resourceIds[RES_ID_GBUFFER_NORMAL])); + + m_resourceViews[RES_ID_GBUFFER_MATERIAL] = ImageView{m_resourceIds[RES_ID_GBUFFER_MATERIAL]}; + m_resourceViews[RES_ID_GBUFFER_NORMAL] = ImageView{m_resourceIds[RES_ID_GBUFFER_NORMAL]}; + + builder.AddNode(this, + &MiniRenderer::RenderGBuffer, + NODE_ID_GBUFFER, + "GBuffer", + builder.MakeNodeArg(m_resourceViews[RES_ID_GBUFFER_MATERIAL], + SemanticAttr(RPS_SEMANTIC_RENDER_TARGET, 0)), + builder.MakeNodeArg(m_resourceViews[RES_ID_GBUFFER_NORMAL], + SemanticAttr(RPS_SEMANTIC_RENDER_TARGET, 1)), + builder.MakeNodeArg(m_resourceViews[RES_ID_DEPTH_BUFFER], + AccessAttr(options.EnableZPrePass ? RPS_ACCESS_DEPTH_STENCIL_READ + : RPS_ACCESS_DEPTH_STENCIL_WRITE))); + + constexpr AccessAttr CS_SRV_ATTR = AccessAttr(RPS_ACCESS_SHADER_RESOURCE_BIT, RPS_SHADER_STAGE_CS); + + builder.AddNode(this, + &MiniRenderer::DeferredLighting, + NODE_ID_LIGHTING, + "DeferredLighting", + builder.MakeNodeArg(m_resourceViews[RES_ID_LIGHT_BUFFER], + AccessAttr(RPS_ACCESS_UNORDERED_ACCESS_BIT, RPS_SHADER_STAGE_CS)), + builder.MakeNodeArg(m_resourceViews[RES_ID_GBUFFER_MATERIAL], CS_SRV_ATTR), + builder.MakeNodeArg(m_resourceViews[RES_ID_GBUFFER_NORMAL], CS_SRV_ATTR), + builder.MakeNodeArg(m_resourceViews[RES_ID_DEPTH_BUFFER], CS_SRV_ATTR), + builder.MakeNodeArg(m_resourceViews[RES_ID_SHADOW_MAP], CS_SRV_ATTR)); + } + else + { + builder.AddNode( + this, + &MiniRenderer::RenderForward, + NODE_ID_FORWARD, + "Forward", + builder.MakeNodeArg(m_resourceViews[RES_ID_LIGHT_BUFFER], SemanticAttr(RPS_SEMANTIC_RENDER_TARGET, 0)), + builder.MakeNodeArg(m_resourceViews[RES_ID_DEPTH_BUFFER], + AccessAttr(options.EnableZPrePass ? RPS_ACCESS_DEPTH_STENCIL_READ + : RPS_ACCESS_DEPTH_STENCIL_WRITE)), + builder.MakeNodeArg(m_resourceViews[RES_ID_SHADOW_MAP], + AccessAttr(RPS_ACCESS_SHADER_RESOURCE_BIT, RPS_SHADER_STAGE_PS))); + } + + if (options.EnableTransparency) + { + builder.AddNode( + this, + &MiniRenderer::RenderTransparency, + NODE_ID_TRANSPARENCY, + "Transparency", + builder.MakeNodeArg(m_resourceViews[RES_ID_LIGHT_BUFFER], SemanticAttr(RPS_SEMANTIC_RENDER_TARGET, 0)), + builder.MakeNodeArg(m_resourceViews[RES_ID_DEPTH_BUFFER], + AccessAttr(RPS_ACCESS_SHADER_RESOURCE_BIT, RPS_SHADER_STAGE_PS)), + builder.MakeNodeArg(m_resourceViews[RES_ID_DEPTH_BUFFER], AccessAttr(RPS_ACCESS_DEPTH_STENCIL_READ)), + builder.MakeNodeArg(m_resourceViews[RES_ID_SHADOW_MAP], + AccessAttr(RPS_ACCESS_SHADER_RESOURCE_BIT, RPS_SHADER_STAGE_PS))); + } + + if (options.EnablePostProcess) + { + builder.AddNode( + this, + &MiniRenderer::PostProcess, + NODE_ID_POST_PROCESS, + "PostProcess", + builder.MakeNodeArg(m_resourceViews[RES_ID_OUTPUT_BUFFER], SemanticAttr(RPS_SEMANTIC_RENDER_TARGET, 0)), + builder.MakeNodeArg(m_resourceViews[RES_ID_LIGHT_BUFFER], + AccessAttr(RPS_ACCESS_SHADER_RESOURCE_BIT, RPS_SHADER_STAGE_PS)), + builder.MakeNodeArg(m_resourceViews[RES_ID_DEPTH_BUFFER], AccessAttr(RPS_ACCESS_DEPTH_STENCIL_READ))); + } + else + { + builder.AddNode( + this, + &MiniRenderer::Copy, + NODE_ID_COPY_TO_OUTPUT, + "CopyToOutput", + builder.MakeNodeArg(m_resourceViews[RES_ID_OUTPUT_BUFFER], AccessAttr(RPS_ACCESS_COPY_DEST_BIT)), + builder.MakeNodeArg(m_resourceViews[RES_ID_LIGHT_BUFFER], AccessAttr(RPS_ACCESS_COPY_SRC_BIT))); + } + } + +private: + void RenderShadowMap(const RpsCmdCallbackContext& context) + //(const RpsImageView& shadowMap) + { + } + + void RenderZPrePass(const RpsCmdCallbackContext& context) + //(const RpsImageView& depthBuffer, float depthClear) + { + } + + void RenderGBuffer(const RpsCmdCallbackContext& context) + //(const RpsImageView& materialBuffer, const RpsImageView& normalBuffer, const RpsImageView& depthBuffer) + { + } + + void RenderForward(const RpsCmdCallbackContext& context) + //(const RpsImageView& lightBuffer, const RpsImageView& depthBuffer, const RpsImageView& shadowMap) + { + } + + void DeferredLighting(const RpsCmdCallbackContext& context) + //(const RpsImageView& materialBuffer, const RpsImageView& normalBuffer, const RpsImageView& depthBuffer, const RpsImageView& shadowMap) + { + } + + void RenderTransparency(const RpsCmdCallbackContext& context) + //(const RpsImageView& lightBuffer, const RpsImageView& depthBuffer, const RpsImageView& shadowMap) + { + } + + void PostProcess(const RpsCmdCallbackContext& context) + //(const RpsImageView& outputBuffer, const RpsImageView& lightBuffer, const RpsImageView& depthBuffer) + { + } + + void Copy(const RpsCmdCallbackContext& context) + //(const RpsImageView& dstBuffer, const RpsImageView& srcBuffer) + { + } + +private: + enum ResourceIds + { + RES_ID_OUTPUT_BUFFER, + RES_ID_SHADOW_MAP, + RES_ID_DEPTH_BUFFER, + RES_ID_GBUFFER_MATERIAL, + RES_ID_GBUFFER_NORMAL, + RES_ID_LIGHT_BUFFER, + NUM_RESOURCES, + }; + + RpsResourceId m_resourceIds[NUM_RESOURCES]; + ResourceDesc m_resourceDescs[NUM_RESOURCES]; + ImageView m_resourceViews[NUM_RESOURCES]; + + RpsClearValue clearValue = {}; + + enum NodeIdentifiers + { + NODE_ID_ZPREPASS, + NODE_ID_SHADOW_MAP, + NODE_ID_FORWARD, + NODE_ID_GBUFFER, + NODE_ID_LIGHTING, + NODE_ID_TRANSPARENCY, + NODE_ID_POST_PROCESS, + NODE_ID_COPY_TO_OUTPUT, + NUM_NODE_IDS, + }; +}; + +TEST_CASE("BuildCmdBufAndRenderGraph") +{ + RpsDevice device = rpsTestUtilCreateDevice([](const RpsDeviceCreateInfo* pCreateInfo, RpsDevice* pDevice) { + RpsNullRuntimeDeviceCreateInfo runtimeDeviceCreateInfo = {}; + runtimeDeviceCreateInfo.pDeviceCreateInfo = pCreateInfo; + return rpsNullRuntimeDeviceCreate(&runtimeDeviceCreateInfo, pDevice); + }); + + RpsRenderGraph renderGraph = RPS_NULL_HANDLE; + + { + RpsParameterDesc paramDescs[] = { + ParameterDesc::Make(), + ParameterDesc::Make("backBuffer", RPS_PARAMETER_FLAG_RESOURCE_BIT), + ParameterDesc::Make(), + }; + + RpsRenderGraphSignatureDesc entryInfo = {}; + entryInfo.name = "BasicPipeline"; + entryInfo.numParams = RPS_TEST_COUNTOF(paramDescs); + entryInfo.pParamDescs = paramDescs; + + RpsRenderGraphCreateInfo renderGraphCreateInfo = {}; + renderGraphCreateInfo.mainEntryCreateInfo.pSignatureDesc = &entryInfo; + + REQUIRE_RPS_OK(rpsRenderGraphCreate(device, &renderGraphCreateInfo, &renderGraph)); + } + + using ResolutionType = std::pair; + + ResourceDesc outputBufferDesc{RPS_RESOURCE_TYPE_IMAGE_2D, RPS_FORMAT_R8G8B8A8_UNORM, 1, 1}; + + MiniRenderer renderer; + + MiniRenderer::RenderOptions options = {}; + options.DepthFormat = RPS_FORMAT_D32_FLOAT_S8X24_UINT; + options.ShadowMapFormat = RPS_FORMAT_D16_UNORM; + options.ShadowMapSize = 1024; + + struct + { + bool* pEnable; + const char* name; + } optionArray[] = { + {&options.EnableShadowMap, "ShadowMap"}, + {&options.ReverseZ, "ReverseZ"}, + {&options.EnableZPrePass, "ZPrePass"}, + {&options.EnableDeferred, "Deferred"}, + {&options.EnableTransparency, "Transparency"}, + {&options.EnablePostProcess, "PostProcess"}, + }; + +#define DEBUG_OPTION 1 +#if DEBUG_OPTION + options.EnableShadowMap = true; + options.ReverseZ = false; + options.EnableZPrePass = true; + options.EnableDeferred = true; + options.EnableTransparency = false; + options.EnablePostProcess = false; +#endif + + constexpr uint32_t NumPermutations = 1u << RPS_TEST_COUNTOF(optionArray); + + ResolutionType resolutions[] = {{1280, 720}, {3840, 2160}}; + + RpsRenderGraphUpdateInfo updateInfo = {}; + updateInfo.gpuCompletedFrameIndex = RPS_GPU_COMPLETED_FRAME_INDEX_NONE; + updateInfo.diagnosticFlags |= RPS_DIAGNOSTIC_ENABLE_ALL; + + for (auto res = std::cbegin(resolutions); res != std::cend(resolutions); ++res) + { + outputBufferDesc.image.width = res->first; + outputBufferDesc.image.height = res->second; + +#if DEBUG_OPTION + MiniRenderer* pRenderer = &renderer; + const RpsConstant args[] = {&pRenderer, &outputBufferDesc, &options}; + updateInfo.ppArgs = args; + updateInfo.numArgs = RPS_TEST_COUNTOF(args); + updateInfo.pfnBuildCallback = &MiniRenderer::BuildRenderGraphCb; + + REQUIRE_RPS_OK(rpsRenderGraphUpdate(renderGraph, &updateInfo)); +#endif + + for (uint32_t iPerm = 0; iPerm < NumPermutations; iPerm++) + { + for (uint32_t iOpt = 0; iOpt < RPS_TEST_COUNTOF(optionArray); iOpt++) + { + *optionArray[iOpt].pEnable = !!((1u << iOpt) & iPerm); + + printf("%s%s : %s", + iOpt == 0 ? "\n" : ", ", + optionArray[iOpt].name, + *optionArray[iOpt].pEnable ? "1" : "0"); + } + + REQUIRE_RPS_OK(rpsRenderGraphUpdate(renderGraph, &updateInfo)); + } + } + + rpsRenderGraphDestroy(renderGraph); + + rpsTestUtilDestroyDevice(device); +} diff --git a/tests/console/test_device.cpp b/tests/console/test_device.cpp new file mode 100644 index 0000000..ee8d17b --- /dev/null +++ b/tests/console/test_device.cpp @@ -0,0 +1,62 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define CATCH_CONFIG_MAIN +#include +#include + +#include "rps/rps.h" + +#include "utils/rps_test_common.h" + +void* FailingMalloc(void* pContext, size_t size, size_t alignment) +{ + return NULL; +} + +// Test device creation APIs +TEST_CASE("DeviceCreation") +{ + RpsDevice device = RPS_NULL_HANDLE; + RpsDeviceCreateInfo createInfo; + + // Invalid Input + memset(&createInfo, 0, sizeof(createInfo)); + RpsResult result = rpsDeviceCreate(&createInfo, NULL); + REQUIRE(result == RPS_ERROR_INVALID_ARGUMENTS); + REQUIRE(device == RPS_NULL_HANDLE); + + // Default allocator + result = rpsDeviceCreate(&createInfo, &device); + REQUIRE(result == RPS_OK); + REQUIRE(device != RPS_NULL_HANDLE); + rpsDeviceDestroy(device); + device = RPS_NULL_HANDLE; + + // OOM + createInfo.allocator.pfnAlloc = FailingMalloc; + createInfo.allocator.pfnFree = CountedFree; + createInfo.printer.pfnPrintf = NULL; + result = rpsDeviceCreate(&createInfo, &device); + REQUIRE(result == RPS_ERROR_OUT_OF_MEMORY); + REQUIRE(device == RPS_NULL_HANDLE); + REQUIRE(g_NumMallocs == 0); + + // Success + createInfo.allocator.pfnAlloc = CountedMalloc; + createInfo.allocator.pfnFree = CountedFree; + createInfo.printer.pfnPrintf = NULL; + + result = rpsDeviceCreate(&createInfo, &device); + REQUIRE(result == RPS_OK); + REQUIRE(device != RPS_NULL_HANDLE); + REQUIRE(g_NumMallocs > 0); + + // Clean up + rpsDeviceDestroy(device); + REQUIRE(g_NumMallocs == 0); +} diff --git a/tests/console/test_hlsl_intrinsics.cpp b/tests/console/test_hlsl_intrinsics.cpp new file mode 100644 index 0000000..7259f08 --- /dev/null +++ b/tests/console/test_hlsl_intrinsics.cpp @@ -0,0 +1,214 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define CATCH_CONFIG_MAIN +#include +#include + +#include "rps/rps.h" +#include "utils/rps_test_common.h" + +#include "core/rps_util.hpp" + +// TODO: Reuse code: + +struct float4 +{ + float x, y, z, w; +}; + +struct int2 +{ + int32_t x, y; +}; + +struct uint3 +{ + uint32_t x, y, z; +}; + +uint32_t LzcntS32(int32_t a) +{ + if (a >= 0) + return rpsFirstBitHigh(uint32_t(a)); + else + return rpsFirstBitHigh(~(uint32_t)a); +} + +void EvalOnCpu(uint32_t ia[10], float fa[16], int32_t i1, uint32_t u1, const int2& i2, const uint3& u3, float f1, const float4& f4) +{ + ia[0] = (uint32_t)abs(i1); + + ia[1] = (i1 >> 3) & 0xff; + + ia[2] = *(const uint32_t*)(&f4.w); + + ia[3] = u3.x * u3.y + u3.z; + + ia[4] = rpsCountBits(u3.y); + + ia[5] = u3.x / 17; + ia[6] = u3.x % 17; + + uint64_t xx = (uint64_t(u1) << 32) | u3.x; + xx = xx / ((uint64_t)u3.y + 1); + + ia[7] = uint32_t(xx & 0xffffffff); + + ia[8] = rpsFirstBitHigh(u3.y); + + ia[9] = LzcntS32(-(int)u3.y - 4096); + + fa[0] = atan2(f4.x + f1, f4.y); + + fa[1] = *(const float*)(&u1); + + fa[2] = (((f4.x > 0) && (f4.y > 0) && (f4.z > 0) && (f4.w > 0)) && ((u3.x < 1000) || (u3.y < 1000) || (u3.z < 1000))) ? float(u1) : float(i1); + + fa[3] = i1 ? f4.x : f4.y; + + fa[4] = fmaxf(f4.x, f4.y); + + fa[5] = (f4.y - f4.x) * f4.z + f4.x; + + fa[6] = floor(f4.x); + fa[7] = ceil(f4.y); + fa[8] = round(f4.z * 0.5f) * 2.0f; + fa[9] = trunc(f4.w); + + fa[10] = (isinf(f4.x) || isnan(f4.y) || !isfinite(f4.z)) ? 1.0f : 2.0f; + + fa[11] = f4.y * f4.z + f4.x; + + fa[12] = cosf(f4.x); + + fa[13] = 1.0f / sqrtf(fabs(f4.y) + 0.0001f); + + fa[14] = (f4.z < 0 ? 0 : (f4.z > 1 ? 1 : f4.z)); + + fa[15] = exp2f(fmin(f4.z, 1.0f) * float(1.442695e+00)); +} + +RPS_DECLARE_RPSL_ENTRY(test_hlsl_intrinsics, rps_main); + +struct RpslCalculationResults +{ + const uint32_t* iaRps; + const float* faRps; +} rpslResults; + +void FooCb(const RpsCmdCallbackContext* pContext) +{ + RpslCalculationResults* pResults = static_cast(pContext->pUserRecordContext); + + pResults->iaRps = static_cast(pContext->ppArgs[0]); + pResults->faRps = static_cast(pContext->ppArgs[1]); +} + +TEST_CASE("TestHLSLIntrinsics") +{ + RpsDevice device = rpsTestUtilCreateDevice([](auto pCreateInfo, auto phDevice) { + RpsNullRuntimeDeviceCreateInfo nullDeviceCreateInfo = {}; + nullDeviceCreateInfo.pDeviceCreateInfo = pCreateInfo; + return rpsNullRuntimeDeviceCreate(&nullDeviceCreateInfo, phDevice); + }); + + RPS_TEST_MALLOC_CHECKPOINT(PostCreateDevice); + + RpsRenderGraphCreateInfo renderGraphCreateInfo = {}; + renderGraphCreateInfo.scheduleInfo.scheduleFlags = RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT; + renderGraphCreateInfo.mainEntryCreateInfo.hRpslEntryPoint = rpsTestLoadRpslEntry(test_hlsl_intrinsics, rps_main); + + RpsRenderGraph renderGraph = {}; + RpsResult result = rpsRenderGraphCreate(device, &renderGraphCreateInfo, &renderGraph); + REQUIRE(result == RPS_OK); + + auto hEntryInstance = rpsRenderGraphGetMainEntry(renderGraph); + rpsProgramBindNode(hEntryInstance, "Foo", &FooCb, nullptr); + + // void rps_main(int i1, uint u1, int2 i2, uint3 u3, float f1, float4 f4) + + srand(uint32_t(time(NULL))); + + for (uint32_t r = 0; r < 100; r++) + { + int32_t i1 = rand(); + uint32_t u1 = uint32_t(rand()); + int2 i2 = {rand(), rand()}; + uint3 u3 = {uint32_t(rand()), uint32_t(rand()), uint32_t(rand())}; + float f1 = rand() / float(RAND_MAX) + rand(); + float4 f4 = {rand() / float(RAND_MAX) + rand(), + rand() / float(RAND_MAX) + rand(), + rand() / float(RAND_MAX) + rand(), + rand() / float(RAND_MAX) + rand()}; + + RpsConstant args[] = {&i1, &u1, &i2, &u3, &f1, &f4}; + + RpsRenderGraphUpdateInfo updateInfo = {}; + updateInfo.frameIndex = 0; + updateInfo.gpuCompletedFrameIndex = RPS_GPU_COMPLETED_FRAME_INDEX_NONE; + updateInfo.numArgs = uint32_t(RPS_TEST_COUNTOF(args)); + updateInfo.ppArgs = args; + updateInfo.ppArgResources = nullptr; + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_ALL; + + REQUIRE_RPS_OK(rpsRenderGraphUpdate(renderGraph, &updateInfo)); + REQUIRE(result == RPS_OK); + + RpsRenderGraphBatchLayout batchLayout = {}; + REQUIRE_RPS_OK(rpsRenderGraphGetBatchLayout(renderGraph, &batchLayout)); + REQUIRE(batchLayout.numCmdBatches == 1); + + RpsRenderGraphRecordCommandInfo recordInfo = {}; + recordInfo.frameIndex = 0; + recordInfo.pUserContext = &rpslResults; + recordInfo.cmdBeginIndex = batchLayout.pCmdBatches[0].cmdBegin; + recordInfo.numCmds = batchLayout.pCmdBatches[0].numCmds; + REQUIRE_RPS_OK(rpsRenderGraphRecordCommands(renderGraph, &recordInfo)); + + uint32_t ia[10]; + float fa[16]; + + EvalOnCpu(ia, fa, i1, u1, i2, u3, f1, f4); + + printf("\n"); + + REQUIRE(rpslResults.iaRps != nullptr); + REQUIRE(rpslResults.faRps != nullptr); + + for (uint32_t i = 0; i < _countof(ia); i++) + { + if (ia[i] != rpslResults.iaRps[i]) + DebugBreak(); + REQUIRE(ia[i] == rpslResults.iaRps[i]); + } + + for (uint32_t i = 0; i < _countof(fa); i++) + { + printf("%25.10f : %25.10f\n", fa[i], rpslResults.faRps[i]); + + if (i < 2) + { + // FMod / Atan2 impl are slightly different between DXIL and emulation. + REQUIRE(fabs(fa[i] - rpslResults.faRps[i]) < 1E-5f); + } + else + { + REQUIRE(((isinf(fa[i]) && isinf(rpslResults.faRps[i])) || + (isnan(fa[i]) && isnan(rpslResults.faRps[i])) || + (fabs(fa[i] - rpslResults.faRps[i]) < FLT_EPSILON))); + } + } + } + + rpsRenderGraphDestroy(renderGraph); + + RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(PostCreateDevice); + + // Clean up + rpsTestUtilDestroyDevice(device); +} diff --git a/tests/console/test_hlsl_intrinsics.rpsl b/tests/console/test_hlsl_intrinsics.rpsl new file mode 100644 index 0000000..2d3fd59 --- /dev/null +++ b/tests/console/test_hlsl_intrinsics.rpsl @@ -0,0 +1,67 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +node Foo(uint ia[10], float fa[16]); + +export void rps_main(int i1, uint u1, int2 i2, uint3 u3, float f1, float4 f4) +{ + uint ia[10]; + float fa[16]; + + ia[0] = (uint)abs(i1); + + ia[1] = (i1 >> 3) & 0xff; + + ia[2] = asuint(f4.w); + + ia[3] = mad(u3.x, u3.y, u3.z); + + ia[4] = countbits(u3.y); + + ia[5] = u3.x / 17; + ia[6] = u3.x % 17; + + uint64_t xx = (uint64_t(u1) << 32) | u3.x; + xx = xx / ((uint64_t)u3.y + 1); + + ia[7] = uint(xx & 0xffffffff); + + ia[8] = firstbithigh(u3.y); + + ia[9] = firstbithigh(-(int)u3.y - 4096); + + fa[0] = atan2(f4.x + f1, f4.y); + + fa[1] = asfloat(u1); + + fa[2] = (all(f4 > 0) && any(u3 < 1000)) ? float(u1) : float(i1); + + fa[3] = i1 ? f4.x : f4.y; + + fa[4] = max(f4.x, f4.y); + + fa[5] = lerp(f4.x, f4.y, f4.z); + + fa[6] = floor(f4.x); + fa[7] = ceil(f4.y); + fa[8] = round(f4.z); + fa[9] = trunc(f4.w); + + fa[10] = (isinf(f4.x) || isnan(f4.y) || !isfinite(f4.z)) ? 1.0f : 2.0f; + + fa[11] = mad(f4.y, f4.z, f4.x); + + fa[12] = cos(f4.x); + + fa[13] = rsqrt(abs(f4.y) + 0.0001f); + + fa[14] = saturate(f4.z); + + fa[15] = exp(min(f4.z, 1)); + + Foo(ia, fa); +} diff --git a/tests/console/test_rpsl_jit.cpp b/tests/console/test_rpsl_jit.cpp new file mode 100644 index 0000000..4b1e389 --- /dev/null +++ b/tests/console/test_rpsl_jit.cpp @@ -0,0 +1,140 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define CATCH_CONFIG_MAIN +#include +#include +#include + +#define USE_RPSL_JIT 1 + +#include "rps/rps.h" +#include "utils/rps_test_common.h" + +// TODO: make JIT test crossplatform +#include "utils/rps_test_win32.h" + +static const char c_RpslCode[] = R"( +node Foo(uint2 ua, float fa[3]); +export void main(uint a, float3 b) +{ + uint2 ua = uint2( a + 1, 42 ); + float fa[3] = { b.z, b.y, b.x }; + Foo(ua, fa); +} +)"; + +void Foo(const RpsCmdCallbackContext* pContext, const uint32_t ua[2], const float fa[3]) +{ + REQUIRE(ua); + REQUIRE(fa); + REQUIRE(ua[0] == 3 + 1); + REQUIRE(fa[0] == 0.0f); + REQUIRE(fa[1] == 2.718f); + REQUIRE(fa[2] == 3.142f); +} + +TEST_CASE("TestRPSJit") +{ + RpsResult result = RPS_OK; + RpsDevice device = rpsTestUtilCreateDevice([](const RpsDeviceCreateInfo* pCreateInfo, RpsDevice* phDevice) { + RpsNullRuntimeDeviceCreateInfo nullDeviceCreateInfo = {}; + nullDeviceCreateInfo.pDeviceCreateInfo = pCreateInfo; + return rpsNullRuntimeDeviceCreate(&nullDeviceCreateInfo, phDevice); + }); + + RPS_TEST_MALLOC_CHECKPOINT(PostCreateDevice); + + char workingDir[MAX_PATH]; + REQUIRE(_getcwd(workingDir, _countof(workingDir)) == workingDir); + + // TODO: Make dxcompiler.dll to compile RPSL directly + // Write to temp file + std::string tmpRpslPath = std::string(workingDir) + "\\test_rpsl_jit.rpsl"; + REQUIRE(WriteToFile(tmpRpslPath, c_RpslCode, sizeof(c_RpslCode) - 1)); + + // Call rps-hlslc, compile string to bitcode + std::stringstream rpsHlslcCmdLine; + rpsHlslcCmdLine << "rps-hlslc.exe \"" << tmpRpslPath << "\" -od \"" << workingDir + << "\" -m test_rpsl_jit -O3 -rps-target-dll -rps-bc -cbe=0"; + REQUIRE(LaunchProcess(rpsHlslcCmdLine.str().c_str())); + + // JIT the bitcode + do + { + const char* argv[] = {""}; + RpsAfxJITHelper jit(_countof(argv), argv); + + int32_t jitStartupResult = jit.pfnRpsJITStartup(1, argv); + REQUIRE(jitStartupResult == 0); + + RpsJITModule hJITModule = jit.LoadBitcode((std::string(workingDir) + "\\test_rpsl_jit.llvm.bc").c_str()); + REQUIRE(hJITModule); + + auto moduleName = jit.GetModuleName(hJITModule); + REQUIRE(moduleName); + REQUIRE(moduleName == std::string("test_rpsl_jit")); + + auto entryNameTable = jit.GetEntryNameTable(hJITModule); + REQUIRE(entryNameTable); + REQUIRE(entryNameTable[0] == std::string("main")); + REQUIRE(entryNameTable[1] == nullptr); + + char nameBuf[256]; + RpsRpslEntry hRpslEntry = jit.GetEntryPoint( + hJITModule, rpsMakeRpslEntryName(nameBuf, RPS_TEST_COUNTOF(nameBuf), moduleName, entryNameTable[0])); + + REQUIRE(hRpslEntry != nullptr); + + RpsRenderGraphCreateInfo renderGraphCreateInfo = {}; + renderGraphCreateInfo.scheduleInfo.scheduleFlags = RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT; + renderGraphCreateInfo.mainEntryCreateInfo.hRpslEntryPoint = hRpslEntry; + + RpsRenderGraph renderGraph = {}; + result = rpsRenderGraphCreate(device, &renderGraphCreateInfo, &renderGraph); + REQUIRE(result == RPS_OK); + + auto hEntryInstance = rpsRenderGraphGetMainEntry(renderGraph); + rpsProgramBindNode(hEntryInstance, "Foo", &Foo); + + uint32_t a = 3; + float b[3] = {3.142f, 2.718f, 0.0f}; + + RpsConstant args[] = {&a, b}; + + RpsRenderGraphUpdateInfo updateInfo = {}; + updateInfo.frameIndex = 0; + updateInfo.gpuCompletedFrameIndex = RPS_GPU_COMPLETED_FRAME_INDEX_NONE; + updateInfo.numArgs = uint32_t(RPS_TEST_COUNTOF(args)); + updateInfo.ppArgs = args; + updateInfo.ppArgResources = nullptr; + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_ALL; + + REQUIRE_RPS_OK(rpsRenderGraphUpdate(renderGraph, &updateInfo)); + REQUIRE(result == RPS_OK); + + RpsRenderGraphBatchLayout batchLayout = {}; + REQUIRE_RPS_OK(rpsRenderGraphGetBatchLayout(renderGraph, &batchLayout)); + REQUIRE(batchLayout.numCmdBatches == 1); + + RpsRenderGraphRecordCommandInfo recordInfo = {}; + recordInfo.frameIndex = 0; + recordInfo.cmdBeginIndex = batchLayout.pCmdBatches[0].cmdBegin; + recordInfo.numCmds = batchLayout.pCmdBatches[0].numCmds; + rpsRenderGraphRecordCommands(renderGraph, &recordInfo); + + rpsRenderGraphDestroy(renderGraph); + + jit.pfnRpsJITUnload(hJITModule); + + } while (false); + + RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(PostCreateDevice); + + // Clean up + rpsTestUtilDestroyDevice(device); +} diff --git a/tests/console/test_scheduler.cpp b/tests/console/test_scheduler.cpp new file mode 100644 index 0000000..89a69ae --- /dev/null +++ b/tests/console/test_scheduler.cpp @@ -0,0 +1,351 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define CATCH_CONFIG_MAIN +#include +#include + +#include "rps/rps.h" +#include "utils/rps_test_common.h" + +#include + +RPS_DECLARE_RPSL_ENTRY(test_scheduler, program_order); +RPS_DECLARE_RPSL_ENTRY(test_scheduler, memory_saving); +RPS_DECLARE_RPSL_ENTRY(test_scheduler, random_order); +RPS_DECLARE_RPSL_ENTRY(test_scheduler, dead_code_elimination); +RPS_DECLARE_RPSL_ENTRY(test_scheduler, gfx_comp_batching); + +struct NodeOrderChecker +{ +public: + NodeOrderChecker(RpsDevice hDevice) + : m_device(hDevice) + , m_mt19937(std::random_device()()) + { + } + + void CreateRenderGraph(RpsRpslEntry hEntry) + { + DestroyRenderGraph(); + + RpsRenderGraphCreateInfo renderGraphCreateInfo = {}; + renderGraphCreateInfo.mainEntryCreateInfo.hRpslEntryPoint = hEntry; + + REQUIRE_RPS_OK(rpsRenderGraphCreate(m_device, &renderGraphCreateInfo, &m_renderGraph)); + + auto hMainEntry = rpsRenderGraphGetMainEntry(m_renderGraph); + REQUIRE_RPS_OK(rpsProgramBindNode(hMainEntry, nullptr, &NodeOrderChecker::CmdCallback, this)); + } + + void DestroyRenderGraph() + { + rpsRenderGraphDestroy(m_renderGraph); + m_renderGraph = RPS_NULL_HANDLE; + } + + void Execute(const RpsConstant* ppArgs, + uint32_t numArgs, + RpsScheduleFlags scheduleFlags, + RpsDiagnosticFlags diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_ALL, + std::function&)> customAssertion = {}) + { + RpsRandomNumberGenerator randGen = {}; + randGen.pContext = this; + randGen.pfnRandomUniformInt = &RandGen; + + RpsRenderGraphUpdateInfo updateInfo = {}; + updateInfo.frameIndex = 0; + updateInfo.gpuCompletedFrameIndex = RPS_GPU_COMPLETED_FRAME_INDEX_NONE; + updateInfo.numArgs = numArgs; + updateInfo.ppArgs = ppArgs; + updateInfo.ppArgResources = nullptr; + updateInfo.diagnosticFlags = diagnosticFlags; + updateInfo.scheduleFlags = scheduleFlags; + updateInfo.pRandomNumberGenerator = &randGen; + + RpsRenderGraphRecordCommandInfo recordInfo = {}; + RpsRenderGraphBatchLayout batchLayout = {}; + + REQUIRE_RPS_OK(rpsRenderGraphUpdate(m_renderGraph, &updateInfo)); + + REQUIRE_RPS_OK(rpsRenderGraphGetBatchLayout(m_renderGraph, &batchLayout)); + + for (uint32_t iBatch = 0; iBatch < batchLayout.numCmdBatches; iBatch++) + { + REQUIRE(batchLayout.numCmdBatches == 1); + + recordInfo.cmdBeginIndex = batchLayout.pCmdBatches[iBatch].cmdBegin; + recordInfo.numCmds = batchLayout.pCmdBatches[iBatch].numCmds; + + REQUIRE_RPS_OK(rpsRenderGraphRecordCommands(m_renderGraph, &recordInfo)); + } + + if (customAssertion) + { + customAssertion(m_actualSequence); + ResetSequences(); + } + else + { + AssertAndResetSequences(); + } + } + + void CmdCallback(const RpsCmdCallbackContext* pContext, uint32_t id) + { + m_actualSequence.push_back(id); + } + + void PushExpected(uint32_t value) + { + m_expectedSequence.push_back(value); + } + + void PushExpected(std::initializer_list values) + { + m_expectedSequence.insert(m_expectedSequence.end(), values); + } + + void PushExpectedRange(uint32_t begin, uint32_t end, int32_t step) + { + REQUIRE(begin != end); + REQUIRE(step != 0); + + if (begin < end) + { + for (uint32_t i = begin; i < end; i += step) + { + m_expectedSequence.push_back(i); + } + } + else + { + for (uint32_t i = end; i < begin; i += step) + { + m_expectedSequence.push_back(i); + } + } + } + +private: + void AssertSequences() const + { + REQUIRE(m_actualSequence.size() == m_expectedSequence.size()); + + for (uint32_t i = 0; i < m_actualSequence.size(); i++) + { + REQUIRE(m_actualSequence[i] == m_expectedSequence[i]); + } + } + + void AssertAndResetSequences() + { + AssertSequences(); + ResetSequences(); + } + + void ResetSequences() + { + m_actualSequence.clear(); + m_expectedSequence.clear(); + } + + static int32_t RandGen(void* pContext, int32_t minVal, int32_t maxVal) + { + NodeOrderChecker* pThis = static_cast(pContext); + return std::uniform_int_distribution<>(minVal, maxVal)(pThis->m_mt19937); + } + +private: + RpsDevice m_device = {}; + std::mt19937 m_mt19937; + RpsRenderGraph m_renderGraph = {}; + + std::vector m_actualSequence; + std::vector m_expectedSequence; +}; + +TEST_CASE("TestScheduler") +{ + RpsDevice device = rpsTestUtilCreateDevice([](auto pCreateInfo, auto phDevice) { + RpsNullRuntimeDeviceCreateInfo nullDeviceCreateInfo = {}; + nullDeviceCreateInfo.pDeviceCreateInfo = pCreateInfo; + return rpsNullRuntimeDeviceCreate(&nullDeviceCreateInfo, phDevice); + }); + + RPS_TEST_MALLOC_CHECKPOINT(PostCreateDevice); + + NodeOrderChecker orderChecker(device); + + rps::ResourceDesc resourceDesc(RPS_RESOURCE_TYPE_IMAGE_2D, RPS_FORMAT_R8G8B8A8_UNORM, 1920, 1080, 1); + + RpsConstant args[] = {&resourceDesc, nullptr, nullptr}; + + // Default scheduling: + + orderChecker.CreateRenderGraph(rpsTestLoadRpslEntry(test_scheduler, program_order)); + + // Expect interleaved Draw / Blt to be rescheduled and grouped together ( Draw x 6 + Blt x 6 + Draw x 6 + Blt x 6 ): + orderChecker.PushExpectedRange(0, 12, 1); + orderChecker.PushExpectedRange(12, 24, 2); + orderChecker.PushExpectedRange(13, 25, 2); + orderChecker.Execute(args, 1, RPS_SCHEDULE_DEFAULT); + + // Force Program-Order scheduling: + + // Expect program order ( Draw x 6 + Blt x 6 + ( Draw + Blt ) x 6 ): + orderChecker.PushExpectedRange(0, 24, 1); + orderChecker.Execute(args, 1, RPS_SCHEDULE_KEEP_PROGRAM_ORDER_BIT); + + // Prefer memory-saving scheduling: + orderChecker.CreateRenderGraph(rpsTestLoadRpslEntry(test_scheduler, memory_saving)); + + // Expect default order ( Draw x 6 + Blt x 6 ) + orderChecker.PushExpectedRange(0, 12, 1); + orderChecker.Execute(args, 1, RPS_SCHEDULE_DEFAULT); + + // Expect memory-saving order ( (Draw + Blt) x 6 ) + for (uint32_t i = 0; i < 6; i++) + { + orderChecker.PushExpectedRange(i, i + 6 + 1, 6); + } + orderChecker.Execute(args, 1, RPS_SCHEDULE_PREFER_MEMORY_SAVING_BIT); + + // Random ordering + + orderChecker.CreateRenderGraph(rpsTestLoadRpslEntry(test_scheduler, random_order)); + + constexpr uint32_t NumIndependentNodes = 12; + constexpr uint32_t NumIterations = 100; + constexpr int32_t ExpectedSum = (NumIndependentNodes * (NumIndependentNodes - 1) / 2) * NumIterations; + constexpr int32_t ExpectedAvgSumPerCmd = ExpectedSum / NumIndependentNodes; + + orderChecker.PushExpectedRange(0, NumIndependentNodes + 1, 1); + orderChecker.Execute(args, 1, RPS_SCHEDULE_KEEP_PROGRAM_ORDER_BIT); + + uint32_t sumsPerCmd[NumIndependentNodes] = {}; + + for (uint32_t iFrame = 0; iFrame < NumIterations; iFrame++) + { + orderChecker.Execute(args, + 1, + RPS_SCHEDULE_RANDOM_ORDER_BIT, + (iFrame < 5) ? RPS_DIAGNOSTIC_ENABLE_POST_SCHEDULE_DUMP : RPS_DIAGNOSTIC_NONE, + [&](auto& sequence) { + REQUIRE(sequence.size() == (NumIndependentNodes + 1)); + REQUIRE(sequence.back() == NumIndependentNodes); + + for (uint32_t i = 0; i < NumIndependentNodes; i++) + { + sumsPerCmd[i] += sequence[i]; + } + }); + } + + uint32_t totalSum = 0; + for (uint32_t i = 0; i < NumIndependentNodes; i++) + { + totalSum += sumsPerCmd[i]; + + //TODO find a better solution that does not occasionally fail like this + REQUIRE(abs(int32_t(sumsPerCmd[i]) - ExpectedAvgSumPerCmd) < (ExpectedAvgSumPerCmd / 2)); + } + + REQUIRE(totalSum == ExpectedSum); + + // Dead code elimination: + + orderChecker.CreateRenderGraph(rpsTestLoadRpslEntry(test_scheduler, dead_code_elimination)); + + const RpsBool bBltEnable = RPS_TRUE, bBltDisable = RPS_FALSE; + + args[1] = &bBltEnable; + args[2] = &bBltEnable; + orderChecker.PushExpected({0, 1, 2, 3}); + orderChecker.Execute(args, 3, RPS_SCHEDULE_DEFAULT, RPS_DIAGNOSTIC_ENABLE_ALL); + orderChecker.PushExpected({0, 1, 2, 3}); + orderChecker.Execute(args, 3, RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT, RPS_DIAGNOSTIC_ENABLE_ALL); + + auto unorderedEqual = [](const std::vector& vec1, const std::vector& vec2) { + if (vec1.size() != vec2.size()) + return false; + + std::vector counter; + counter.reserve(vec1.size()); + + uint32_t slotCount = 0; + for (uint32_t elem : vec1) + { + if (elem >= counter.size()) + counter.resize(elem + 1, 0); + + slotCount += (0 == (counter[elem]++)) ? 1 : 0; + } + + uint32_t reachingZeroCount = 0; + + for (uint32_t elem : vec2) + { + if (elem >= counter.size()) + return false; + reachingZeroCount += (0 == --(counter[elem])) ? 1 : 0; + } + + return slotCount == reachingZeroCount; + }; + + args[1] = &bBltDisable; + args[2] = &bBltEnable; + orderChecker.PushExpected({1, 3}); + orderChecker.Execute(args, 3, RPS_SCHEDULE_DEFAULT, RPS_DIAGNOSTIC_ENABLE_ALL); + orderChecker.Execute( + args, 3, RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT, RPS_DIAGNOSTIC_ENABLE_ALL, [=](auto& actualSequence) { + REQUIRE(unorderedEqual({0, 1, 3}, actualSequence)); + }); + + args[1] = &bBltEnable; + args[2] = &bBltDisable; + orderChecker.PushExpected({0, 2}); + orderChecker.Execute(args, 3, RPS_SCHEDULE_DEFAULT, RPS_DIAGNOSTIC_ENABLE_ALL); + orderChecker.Execute( + args, 3, RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT, RPS_DIAGNOSTIC_ENABLE_ALL, [=](auto& actualSequence) { + REQUIRE(unorderedEqual({0, 1, 2}, actualSequence)); + }); + + args[1] = &bBltDisable; + args[2] = &bBltDisable; + orderChecker.PushExpected({}); + orderChecker.Execute(args, 3, RPS_SCHEDULE_DEFAULT, RPS_DIAGNOSTIC_ENABLE_ALL); + orderChecker.Execute( + args, 3, RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT, RPS_DIAGNOSTIC_ENABLE_ALL, [=](auto& actualSequence) { + REQUIRE(unorderedEqual({0, 1}, actualSequence)); + }); + + // Graphics / Compute interleave + + orderChecker.CreateRenderGraph(rpsTestLoadRpslEntry(test_scheduler, gfx_comp_batching)); + + // Default behavior, expect Gfx & Compute are interleaved: + orderChecker.PushExpected({0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11}); + orderChecker.Execute(args, 1, RPS_SCHEDULE_DEFAULT, RPS_DIAGNOSTIC_ENABLE_ALL); + + // Aggressive pipelining, expect compute to be moved before gfx while interleaved: + orderChecker.PushExpected({1, 0, 2, 3, 5, 4, 6, 7, 8, 9, 10, 11}); + orderChecker.Execute(args, 1, RPS_SCHEDULE_WORKLOAD_TYPE_PIPELINING_AGGRESSIVE_BIT, RPS_DIAGNOSTIC_ENABLE_ALL); + + // Prefer minimize compute & gfx switching: + orderChecker.PushExpected({0, 3, 4, 1, 2, 5, 6, 7, 8, 9, 10, 11}); + orderChecker.Execute(args, 1, RPS_SCHEDULE_MINIMIZE_COMPUTE_GFX_SWITCH_BIT, RPS_DIAGNOSTIC_ENABLE_ALL); + + orderChecker.DestroyRenderGraph(); + + RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(PostCreateDevice); + + // Clean up + rpsTestUtilDestroyDevice(device); +} diff --git a/tests/console/test_scheduler.rpsl b/tests/console/test_scheduler.rpsl new file mode 100644 index 0000000..86c7497 --- /dev/null +++ b/tests/console/test_scheduler.rpsl @@ -0,0 +1,137 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +node draw(uint id, rtv rt : SV_Target0); +compute node comp_draw(uint id, uav rt); +node blt(uint id, rtv dst : SV_Target0, ps_srv src); +node blt_all(uint id, rtv dst : SV_Target0, ps_srv srcs[12]); + +export void program_order(texture output) +{ + ResourceDesc desc = output.desc(); + + texture t0 = create_texture(desc); + texture t1 = create_texture(desc); + texture t2 = create_texture(desc); + texture t3 = create_texture(desc); + texture t4 = create_texture(desc); + texture t5 = create_texture(desc); + + draw(0, t0); + draw(1, t1); + draw(2, t2); + draw(3, t3); + draw(4, t4); + draw(5, t5); + blt(6, output, t0); + blt(7, output, t1); + blt(8, output, t2); + blt(9, output, t3); + blt(10, output, t4); + blt(11, output, t5); + + draw(12, t0); + blt(13, output, t0); + draw(14, t1); + blt(15, output, t1); + draw(16, t2); + blt(17, output, t2); + draw(18, t3); + blt(19, output, t3); + draw(20, t4); + blt(21, output, t4); + draw(22, t5); + blt(23, output, t5); +} + +export void memory_saving(texture output) +{ + ResourceDesc desc = output.desc(); + + texture t0 = create_texture(desc); + texture t1 = create_texture(desc); + texture t2 = create_texture(desc); + texture t3 = create_texture(desc); + texture t4 = create_texture(desc); + texture t5 = create_texture(desc); + + draw(0, t0); + draw(1, t1); + draw(2, t2); + draw(3, t3); + draw(4, t4); + draw(5, t5); + blt(6, output, t0); + blt(7, output, t1); + blt(8, output, t2); + blt(9, output, t3); + blt(10, output, t4); + blt(11, output, t5); +} + +export void random_order(texture output) +{ + ResourceDesc desc = output.desc(); + + texture tmp[12]; + + for (uint i = 0; i < 12; i++) + { + tmp[i] = create_texture(desc); + draw(i, tmp[i]); + } + + blt_all(12, output, tmp); +} + +export void dead_code_elimination(texture output, bool bBlt0, bool bBlt1) +{ + ResourceDesc desc = output.desc(); + + texture t0 = create_texture(desc); + texture t1 = create_texture(desc); + + draw(0, t0); + + draw(1, t1); + + if (bBlt0) + { + blt(2, output, t0); + } + + if (bBlt1) + { + blt(3, output, t1); + } +} + +export void gfx_comp_batching(texture output) +{ + ResourceDesc desc = output.desc(); + + texture t0 = create_texture(desc); + texture t1 = create_texture(desc); + texture t2 = create_texture(desc); + texture t3 = create_texture(desc); + texture t4 = create_texture(desc); + texture t5 = create_texture(desc); + + draw(0, t0); + comp_draw(1, t1); + comp_draw(2, t2); + draw(3, t3); + draw(4, t4); + comp_draw(5, t5); + + blt(6, output, t0); + blt(7, output, t1); + blt(8, output, t2); + blt(9, output, t3); + blt(10, output, t4); + blt(11, output, t5); +} diff --git a/tests/console/test_scheduler_intrinsics.cpp b/tests/console/test_scheduler_intrinsics.cpp new file mode 100644 index 0000000..b71d746 --- /dev/null +++ b/tests/console/test_scheduler_intrinsics.cpp @@ -0,0 +1,438 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define CATCH_CONFIG_MAIN +#include +#include + +#include "rps/rps.h" +#include "utils/rps_test_common.h" + +#include + +RPS_DECLARE_RPSL_ENTRY(test_scheduler_intrinsics, test_schedule_control); +RPS_DECLARE_RPSL_ENTRY(test_scheduler_intrinsics, test_schedule_control_nested_atomic_subgraph); +RPS_DECLARE_RPSL_ENTRY(test_scheduler_intrinsics, test_abort); + +struct NodeOrderChecker +{ +public: + NodeOrderChecker(RpsDevice hDevice) + : m_device(hDevice) + , m_mt19937(std::random_device()()) + { + } + + void CreateRenderGraph(RpsRpslEntry hEntry) + { + DestroyRenderGraph(); + + RpsRenderGraphCreateInfo renderGraphCreateInfo = {}; + renderGraphCreateInfo.mainEntryCreateInfo.hRpslEntryPoint = hEntry; + + REQUIRE_RPS_OK(rpsRenderGraphCreate(m_device, &renderGraphCreateInfo, &m_renderGraph)); + + auto hMainEntry = rpsRenderGraphGetMainEntry(m_renderGraph); + REQUIRE_RPS_OK(rpsProgramBindNode(hMainEntry, nullptr, &NodeOrderChecker::CmdCallback, this)); + } + + void DestroyRenderGraph() + { + rpsRenderGraphDestroy(m_renderGraph); + m_renderGraph = RPS_NULL_HANDLE; + } + + void Execute(const RpsConstant* ppArgs, + uint32_t numArgs, + RpsScheduleFlags scheduleFlags, + RpsDiagnosticFlags diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_ALL, + std::function&)> customAssertion = {}) + { + RpsRandomNumberGenerator randGen = {}; + randGen.pContext = this; + randGen.pfnRandomUniformInt = &RandGen; + + RpsRenderGraphUpdateInfo updateInfo = {}; + updateInfo.frameIndex = 0; + updateInfo.gpuCompletedFrameIndex = RPS_GPU_COMPLETED_FRAME_INDEX_NONE; + updateInfo.numArgs = numArgs; + updateInfo.ppArgs = ppArgs; + updateInfo.ppArgResources = nullptr; + updateInfo.diagnosticFlags = diagnosticFlags; + updateInfo.scheduleFlags = scheduleFlags; + updateInfo.pRandomNumberGenerator = &randGen; + + RpsRenderGraphRecordCommandInfo recordInfo = {}; + RpsRenderGraphBatchLayout batchLayout = {}; + + REQUIRE_RPS_OK(rpsRenderGraphUpdate(m_renderGraph, &updateInfo)); + + REQUIRE_RPS_OK(rpsRenderGraphGetBatchLayout(m_renderGraph, &batchLayout)); + + for (uint32_t iBatch = 0; iBatch < batchLayout.numCmdBatches; iBatch++) + { + REQUIRE(batchLayout.numCmdBatches == 1); + + recordInfo.cmdBeginIndex = batchLayout.pCmdBatches[iBatch].cmdBegin; + recordInfo.numCmds = batchLayout.pCmdBatches[iBatch].numCmds; + + REQUIRE_RPS_OK(rpsRenderGraphRecordCommands(m_renderGraph, &recordInfo)); + } + + if (customAssertion) + { + customAssertion(m_actualSequence); + } + else if (m_pendingValidation) + { + AssertSequences(); + } + ResetSequences(); + } + + void CmdCallback(const RpsCmdCallbackContext* pContext, uint32_t id) + { + m_actualSequence.push_back(id); + } + + void PushExpected(uint32_t value) + { + m_expectedSequence.push_back(value); + } + + void PushExpected(std::initializer_list values) + { + m_expectedSequence.insert(m_expectedSequence.end(), values); + } + + void PushExpectedRange(uint32_t begin, uint32_t end, int32_t step) + { + REQUIRE(begin != end); + REQUIRE(step != 0); + + if (begin < end) + { + for (uint32_t i = begin; i < end; i += step) + { + m_expectedSequence.push_back(i); + } + } + else + { + for (uint32_t i = end; i < begin; i += step) + { + m_expectedSequence.push_back(i); + } + } + + m_pendingValidation = true; + } + + void DisableValidation() + { + m_pendingValidation = false; + } + +private: + void AssertSequences() const + { + REQUIRE(m_actualSequence.size() == m_expectedSequence.size()); + + for (uint32_t i = 0; i < m_actualSequence.size(); i++) + { + REQUIRE(m_actualSequence[i] == m_expectedSequence[i]); + } + } + + void ResetSequences() + { + m_actualSequence.clear(); + m_expectedSequence.clear(); + } + + static int32_t RandGen(void* pContext, int32_t minVal, int32_t maxVal) + { + NodeOrderChecker* pThis = static_cast(pContext); + return std::uniform_int_distribution<>(minVal, maxVal)(pThis->m_mt19937); + } + +private: + RpsDevice m_device = {}; + std::mt19937 m_mt19937; + RpsRenderGraph m_renderGraph = {}; + + bool m_pendingValidation = true; + std::vector m_actualSequence; + std::vector m_expectedSequence; +}; + +enum TestCases : uint32_t +{ + TEST_CASE_DEFAULT = 0, + TEST_CASE_ATOMIC = 1, + TEST_CASE_SEQUENTIAL = 2, + TEST_CASE_ATOMIC_SEQUENTIAL = 3, +}; + +TEST_CASE("TestSchedulerIntrinsics_Subgraph") +{ + RpsDevice device = rpsTestUtilCreateDevice([](auto pCreateInfo, auto phDevice) { + RpsNullRuntimeDeviceCreateInfo nullDeviceCreateInfo = {}; + nullDeviceCreateInfo.pDeviceCreateInfo = pCreateInfo; + return rpsNullRuntimeDeviceCreate(&nullDeviceCreateInfo, phDevice); + }); + + RPS_TEST_MALLOC_CHECKPOINT(PostCreateDevice); + + NodeOrderChecker orderChecker(device); + + rps::ResourceDesc resourceDesc(RPS_RESOURCE_TYPE_IMAGE_2D, RPS_FORMAT_R8G8B8A8_UNORM, 1920, 1080, 1); + uint32_t testCase = TEST_CASE_DEFAULT; + RpsBool useSchBarrier = RPS_FALSE; + RpsConstant args[] = {&resourceDesc, &testCase, &useSchBarrier}; + + orderChecker.CreateRenderGraph(rpsTestLoadRpslEntry(test_scheduler_intrinsics, test_schedule_control)); + + auto makeId = [](uint32_t callId, uint32_t localId) { return (callId << 16) | localId; }; + + // Asserts the explicit dependency (3 -> 7) is obeyed: + auto assertExplicitDependency = [](const std::vector& actualSeq) { + auto iter = std::find(actualSeq.begin(), actualSeq.end(), 3); + REQUIRE(iter != actualSeq.end()); + + iter = std::find(iter, actualSeq.end(), 7); + REQUIRE(iter != actualSeq.end()); + }; + + // Asserts the given range is atomic subgraph (can be reordered within the subgroup but not with nodes outside the subgroup): + auto assertAtomicRange = [](const std::vector& actualSeq, uint32_t lowerBound, uint32_t upperBound) { + auto rangeStartIter = std::find_if( + actualSeq.begin(), actualSeq.end(), [=](uint32_t i) { return (lowerBound <= i) && (i < upperBound); }); + REQUIRE(rangeStartIter != actualSeq.end()); + + std::vector seenFlags; + seenFlags.resize(upperBound - lowerBound, false); + + for (uint32_t i = 0; i < (upperBound - lowerBound); i++, ++rangeStartIter) + { + REQUIRE(rangeStartIter != actualSeq.end()); + REQUIRE(*rangeStartIter >= lowerBound); + REQUIRE(*rangeStartIter < upperBound); + + const uint32_t idx = *rangeStartIter - lowerBound; + REQUIRE(!seenFlags[idx]); + seenFlags[idx] = true; + } + }; + + // Asserts the given range is sequential (can not be reordered within the group, but external nodes can be inserted inbetween). + auto assertSequentialRange = [](const std::vector& actualSeq, uint32_t lowerBound, uint32_t upperBound) { + auto rangeStartIter = std::find(actualSeq.begin(), actualSeq.end(), lowerBound); + REQUIRE(rangeStartIter != actualSeq.end()); + + uint32_t expected = lowerBound; + + for (; rangeStartIter != actualSeq.end(); ++rangeStartIter) + { + if ((lowerBound <= *rangeStartIter) && (*rangeStartIter < upperBound)) + { + REQUIRE(*rangeStartIter == expected); + expected++; + + if (expected == upperBound) + { + break; + } + } + } + + REQUIRE(expected == upperBound); + }; + + // Asserts given range is sequential and not reordered with any external nodes. + auto assertEqualRange = [](const std::vector& actualSeq, uint32_t lowerBound, uint32_t upperBound) { + auto rangeStartIter = std::find(actualSeq.begin(), actualSeq.end(), lowerBound); + REQUIRE(rangeStartIter != actualSeq.end()); + + for (uint32_t expected = lowerBound; expected < upperBound; ++expected) + { + REQUIRE(rangeStartIter != actualSeq.end()); + REQUIRE(*rangeStartIter == expected); + ++rangeStartIter; + } + }; + + // Default case, only check the explicit dependency: + orderChecker.Execute( + args, 3, RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT, RPS_DIAGNOSTIC_ENABLE_ALL, assertExplicitDependency); + + // Atomic subgraph + testCase = TEST_CASE_ATOMIC; + orderChecker.Execute(args, + 3, + RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT, + RPS_DIAGNOSTIC_ENABLE_ALL, + [&](const std::vector& actualSeq) { + assertAtomicRange(actualSeq, makeId(2, 0), makeId(2, 12)); + assertExplicitDependency(actualSeq); + }); + + // Sequential subgraph + testCase = TEST_CASE_SEQUENTIAL; + orderChecker.Execute(args, + 3, + RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT, + RPS_DIAGNOSTIC_ENABLE_ALL, + [&](const std::vector& actualSeq) { + assertSequentialRange(actualSeq, makeId(2, 0), makeId(2, 12)); + assertExplicitDependency(actualSeq); + }); + + // Atomic Sequential subgraph + testCase = TEST_CASE_ATOMIC_SEQUENTIAL; + orderChecker.Execute(args, + 3, + RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT, + RPS_DIAGNOSTIC_ENABLE_ALL, + [&](const std::vector& actualSeq) { + assertEqualRange(actualSeq, makeId(2, 0), makeId(2, 12)); + assertExplicitDependency(actualSeq); + }); + + // Repeat with sch_barrier intrinsics enabled. + useSchBarrier = RPS_TRUE; + testCase = TEST_CASE_DEFAULT; + orderChecker.Execute(args, + 3, + RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT, + RPS_DIAGNOSTIC_ENABLE_ALL, + [&](const std::vector& actualSeq) { + assertAtomicRange(actualSeq, makeId(2, 0), makeId(2, 4)); + assertAtomicRange(actualSeq, makeId(2, 4), makeId(2, 8)); + assertAtomicRange(actualSeq, makeId(2, 8), makeId(2, 12)); + assertExplicitDependency(actualSeq); + }); + + testCase = TEST_CASE_ATOMIC; + orderChecker.Execute(args, + 3, + RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT, + RPS_DIAGNOSTIC_ENABLE_ALL, + [&](const std::vector& actualSeq) { + assertAtomicRange(actualSeq, makeId(2, 0), makeId(2, 12)); + assertAtomicRange(actualSeq, makeId(2, 0), makeId(2, 4)); + assertAtomicRange(actualSeq, makeId(2, 4), makeId(2, 8)); + assertAtomicRange(actualSeq, makeId(2, 8), makeId(2, 12)); + assertExplicitDependency(actualSeq); + }); + + testCase = TEST_CASE_SEQUENTIAL; + orderChecker.Execute(args, + 3, + RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT, + RPS_DIAGNOSTIC_ENABLE_ALL, + [&](const std::vector& actualSeq) { + assertSequentialRange(actualSeq, makeId(2, 0), makeId(2, 12)); + assertAtomicRange(actualSeq, makeId(2, 0), makeId(2, 4)); + assertAtomicRange(actualSeq, makeId(2, 4), makeId(2, 8)); + assertAtomicRange(actualSeq, makeId(2, 8), makeId(2, 12)); + assertExplicitDependency(actualSeq); + }); + + testCase = TEST_CASE_ATOMIC_SEQUENTIAL; + orderChecker.Execute(args, + 3, + RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT, + RPS_DIAGNOSTIC_ENABLE_ALL, + [&](const std::vector& actualSeq) { + assertEqualRange(actualSeq, makeId(2, 0), makeId(2, 12)); + // No need to validate individual atomic range anymore as EqualRange is more restrictive. + assertExplicitDependency(actualSeq); + }); + + // Check nested graphs: + + orderChecker.CreateRenderGraph( + rpsTestLoadRpslEntry(test_scheduler_intrinsics, test_schedule_control_nested_atomic_subgraph)); + + orderChecker.Execute(args, + 1, + RPS_SCHEDULE_DISABLE_DEAD_CODE_ELIMINATION_BIT, + RPS_DIAGNOSTIC_ENABLE_ALL, + [&](const std::vector& actualSeq) { + assertAtomicRange(actualSeq, 5, 29); + assertAtomicRange(actualSeq, 9, 13); + assertAtomicRange(actualSeq, 13, 18); + assertAtomicRange(actualSeq, 18, 22); + assertEqualRange(actualSeq, 25, 29); + }); + + orderChecker.DestroyRenderGraph(); + + RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(PostCreateDevice); + + // Clean up + rpsTestUtilDestroyDevice(device); +} + +void DummyCmdCallback(const RpsCmdCallbackContext* pContext) +{ +} + +TEST_CASE("TestAbort") +{ + RpsDevice device = rpsTestUtilCreateDevice([](auto pCreateInfo, auto phDevice) { + RpsNullRuntimeDeviceCreateInfo nullDeviceCreateInfo = {}; + nullDeviceCreateInfo.pDeviceCreateInfo = pCreateInfo; + return rpsNullRuntimeDeviceCreate(&nullDeviceCreateInfo, phDevice); + }); + + RPS_TEST_MALLOC_CHECKPOINT(PostCreateDevice); + + rps::ResourceDesc resourceDesc(RPS_RESOURCE_TYPE_IMAGE_2D, RPS_FORMAT_R8G8B8A8_UNORM, 1920, 1080, 1); + int32_t errorCode = 0; + RpsConstant args[] = {&resourceDesc, &errorCode}; + + RpsRenderGraphCreateInfo renderGraphCreateInfo = {}; + renderGraphCreateInfo.mainEntryCreateInfo.hRpslEntryPoint = + rpsTestLoadRpslEntry(test_scheduler_intrinsics, test_abort); + + RpsRenderGraph renderGraph; + REQUIRE_RPS_OK(rpsRenderGraphCreate(device, &renderGraphCreateInfo, &renderGraph)); + + auto hMainEntry = rpsRenderGraphGetMainEntry(renderGraph); + + for (int32_t iter = 0; iter < 100; iter++) + { + errorCode = (iter >> 1) * ((iter & 1) ? 1 : -1); + + RpsRenderGraphUpdateInfo updateInfo = {}; + updateInfo.frameIndex = 0; + updateInfo.gpuCompletedFrameIndex = RPS_GPU_COMPLETED_FRAME_INDEX_NONE; + updateInfo.numArgs = RPS_TEST_COUNTOF(args); + updateInfo.ppArgs = args; + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_ALL; + + const RpsResult updateResult = rpsRenderGraphUpdate(renderGraph, &updateInfo); + + if (errorCode < 0) + { + REQUIRE(RPS_FAILED(updateResult)); + } + else + { + REQUIRE_RPS_OK(updateResult); + } + } + + rpsRenderGraphDestroy(renderGraph); + + RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(PostCreateDevice); + + // Clean up + rpsTestUtilDestroyDevice(device); +} diff --git a/tests/console/test_scheduler_intrinsics.rpsl b/tests/console/test_scheduler_intrinsics.rpsl new file mode 100644 index 0000000..7ab61d9 --- /dev/null +++ b/tests/console/test_scheduler_intrinsics.rpsl @@ -0,0 +1,179 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +graphics node Ending(uint id, rtv dst, [readonly(ps)] buffer src0, [readonly(ps)] buffer src1, [readonly(ps)] buffer src2); +compute node N(uint id, [readwrite(cs)] buffer b); +node N_CpuOnly(uint id); +node M_WaitsN(uint id, rtv dst, node n); + +enum TestCases +{ + TEST_CASE_DEFAULT = 0, + TEST_CASE_ATOMIC = 1, + TEST_CASE_SEQUENTIAL = 2, + TEST_CASE_ATOMIC_SEQUENTIAL = 3, +}; + +uint MakeId(uint callId, uint localId) +{ + return (callId << 16) | localId; +} + +void foo_default(buffer x, buffer y, uint callId, bool useBarrier) +{ + if (useBarrier) + sch_barrier(); + + N(MakeId(callId, 0), x); + N(MakeId(callId, 1), x); + N(MakeId(callId, 2), x); + N(MakeId(callId, 3), x); + + if (useBarrier) + sch_barrier(); + + N(MakeId(callId, 4), y); + N(MakeId(callId, 5), y); + N(MakeId(callId, 6), y); + N(MakeId(callId, 7), y); + + if (useBarrier) + sch_barrier(); + + N(MakeId(callId, 8), x); + N(MakeId(callId, 9), y); + N(MakeId(callId, 10), x); + N(MakeId(callId, 11), y); + + if (useBarrier) + sch_barrier(); +} + +[subgraph(atomic)] void foo_atomic(buffer x, buffer y, uint callId, bool useBarrier) { + foo_default(x, y, callId, useBarrier); +} + +[subgraph(sequential)] void foo_sequential(buffer x, buffer y, uint callId, bool useBarrier) +{ + foo_default(x, y, callId, useBarrier); +} + +[subgraph(atomic, sequential)] void foo_atomic_sequential(buffer x, buffer y, uint callId, bool useBarrier) { + foo_default(x, y, callId, useBarrier); +} + +export void test_schedule_control(texture backBuffer, uint testCase, bool useBarrier) +{ + buffer x = create_buffer(32); + buffer y = create_buffer(32); + buffer z = create_buffer(32); + + // Early node + N(0, z); + N(1, z); + + if (testCase == TEST_CASE_DEFAULT) + { + foo_default(x, y, 2, useBarrier); + } + else if (testCase == TEST_CASE_ATOMIC) + { + foo_atomic(x, y, 2, useBarrier); + } + else if (testCase == TEST_CASE_SEQUENTIAL) + { + foo_sequential(x, y, 2, useBarrier); + } + else if (testCase == TEST_CASE_ATOMIC_SEQUENTIAL) + { + foo_atomic_sequential(x, y, 2, useBarrier); + } + + node n = N_CpuOnly(3); + N_CpuOnly(4); + + N(5, z); + + N(6, x); + N(6, x); + + M_WaitsN(7, backBuffer, n); + + Ending(8, backBuffer, x, y, z); +} + +export void test_schedule_control_nested_atomic_subgraph(texture backBuffer) +{ + buffer x = create_buffer(32); + buffer y = create_buffer(32); + buffer z = create_buffer(32); + buffer w = create_buffer(32); + + // Early node + N(0, w); + N(1, z); + N(2, z); + N(3, x); + N(4, y); + + [subgraph(atomic)] { + N(5, x); + N(6, y); + N(7, x); + N(8, y); + + [subgraph(atomic)] { + N(9, x); + N(10, y); + N(11, x); + N(12, y); + + [subgraph(atomic)] { + N(13, x); + N(14, y); + N(15, w); + N(16, x); + N(17, y); + } + } + + [subgraph(atomic)] { + N(18, x); + N(19, y); + N(20, x); + N(21, y); + } + + N(22, x); + N(23, y); + N(24, z); + + [subgraph(atomic, sequential)] { + N(25, x); + N(26, y); + N(27, x); + N(28, y); + } + } + + N(30, x); + N(31, y); + N(32, z); + + Ending(40, backBuffer, x, y, z); +} + + +export void test_abort(texture backBuffer, int errorCode) +{ + buffer x = create_buffer(32); + + if (errorCode < 0) + abort(errorCode); + + Ending(0, backBuffer, x, x, x); +} diff --git a/tests/console/test_utils.cpp b/tests/console/test_utils.cpp new file mode 100644 index 0000000..14064e2 --- /dev/null +++ b/tests/console/test_utils.cpp @@ -0,0 +1,854 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define CATCH_CONFIG_MAIN +#include +#include + +#include "core/rps_util.hpp" + +#include "utils/rps_test_common.h" + +#include +#include +#include + +template +void CheckMinMax(T (&arr)[N]) +{ + for (auto i = std::cbegin(arr); i != std::cend(arr); ++i) + { + for (auto j = std::cbegin(arr); j != std::cend(arr); ++j) + { + REQUIRE(std::min(*i, *j) == rpsMin(*i, *j)); + REQUIRE(std::max(*i, *j) == rpsMax(*i, *j)); + } + } +} + +TEST_CASE("MinMaxUtils") +{ + int32_t valuesS32[] = { + INT32_MIN, + INT32_MIN + 1, + -42, + -2, + -1, + 0, + 1, + 3, + 101, + INT32_MAX - 1, + INT32_MAX, + }; + uint32_t valuesU32[] = { + 0, + 1, + 2, + 3, + 42, + UINT32_MAX - 1, + UINT32_MAX, + }; + int64_t valuesS64[] = { + INT64_MIN, + INT64_MIN + 1, + INT32_MIN, + INT32_MIN + 1, + -42, + -2, + -1, + 0, + 1, + 3, + 101, + INT32_MAX - 1, + INT32_MAX, + INT64_MAX - 1, + INT64_MAX, + }; + uint64_t valuesU64[] = { + 0, + 1, + 2, + 3, + 42, + UINT32_MAX - 1, + UINT32_MAX, + UINT64_MAX - 1, + UINT64_MAX, + }; + float valuesF32[] = { + -std::numeric_limits::infinity(), + -FLT_MAX, + -1E9f, + -42.0f, + -3.1415927f, + -2.0f, + -1.0f, + -0.5f, + -FLT_EPSILON, + -FLT_MIN, + -std::numeric_limits::denorm_min(), + 0.5f, + 1.0f, + 2.0f, + 3.1415927f, + 42.0f, + 1E9f, + std::numeric_limits::denorm_min(), + FLT_MIN, + FLT_EPSILON, + FLT_MAX, + std::numeric_limits::infinity(), + }; + size_t valuesSize[] = { + 0, + 1, + 3, + 42, + SIZE_MAX - 1, + SIZE_MAX, + }; + + CheckMinMax(valuesS32); + CheckMinMax(valuesU32); + CheckMinMax(valuesS64); + CheckMinMax(valuesU64); + CheckMinMax(valuesF32); + CheckMinMax(valuesSize); +} + + +TEST_CASE("BitUtils") +{ + REQUIRE(rpsFirstBitHigh(0u) == 32); + REQUIRE(rpsFirstBitHigh(1u) == 31); + REQUIRE(rpsFirstBitHigh(UINT32_MAX) == 0); + REQUIRE(rpsFirstBitHigh(0x80000000u) == 0); + REQUIRE(rpsFirstBitHigh(0x7FFFFFFFu) == 1); + REQUIRE(rpsFirstBitHigh(0xFFFFu) == 16); + REQUIRE(rpsFirstBitHigh(0x3Fu) == 26); + + REQUIRE(rpsFirstBitLow(0u) == 32); + REQUIRE(rpsFirstBitLow(1u) == 0); + REQUIRE(rpsFirstBitLow(UINT32_MAX) == 0); + REQUIRE(rpsFirstBitLow(0x80000000u) == 31); + REQUIRE(rpsFirstBitLow(0x0FFFFFFEu) == 1); + REQUIRE(rpsFirstBitLow(0xFF0000u) == 16); + REQUIRE(rpsFirstBitLow(0xFC00u) == 10); + + uint32_t values[] = { + 0, + UINT32_MAX, + 1, + 2, + 4, + 31, + 42, + 0x80000000u, + UINT32_MAX - 1, + UINT16_MAX, + UINT16_MAX + 1, + UINT16_MAX + 42, + 0x12345678, + 0xABCD1234, + 0xDEADBEEF, + }; + + for (auto i = std::cbegin(values); i != std::cend(values); ++i) + { + const auto rpsReverseBits32Result = rpsReverseBits32(*i); + for (uint32_t b = 0; b < 32; b++) + { + REQUIRE(rpsAnyBitsSet(*i, 1u << b) == rpsAnyBitsSet(rpsReverseBits32Result, 1u << (31 - b))); + } + + REQUIRE(*i == rpsReverseBits32(rpsReverseBits32Result)); + + if (*i > 0x80000000u) + continue; + + const auto rpsRoundUpToPowerOfTwoResult = rpsRoundUpToPowerOfTwo(*i); + + REQUIRE(rpsIsPowerOfTwo(rpsRoundUpToPowerOfTwoResult)); + + REQUIRE(rpsRoundUpToPowerOfTwoResult >= *i); + REQUIRE(((*i == 0) || ((rpsRoundUpToPowerOfTwoResult >> 1u) < *i))); + } +} + +TEST_CASE("AlignmentUtils") +{ + REQUIRE(rpsDivRoundUp(0, 8) == 0); + REQUIRE(rpsDivRoundUp(1, 8) == 1); + REQUIRE(rpsDivRoundUp(7, 8) == 1); + REQUIRE(rpsDivRoundUp(8, 8) == 1); + REQUIRE(rpsDivRoundUp(9, 8) == 2); + + REQUIRE(rpsAlignUp(0, 4) == 0); + REQUIRE(rpsAlignUp(1, 4) == 4); + REQUIRE(rpsAlignUp(3, 4) == 4); + REQUIRE(rpsAlignUp(4, 4) == 4); + REQUIRE(rpsAlignUp(5, 4) == 8); + + REQUIRE(rpsAlignUp(0, 4) == 0); + REQUIRE(rpsAlignUp(1, 4) == 4); + REQUIRE(rpsAlignUp(3, 4) == 4); + REQUIRE(rpsAlignUp(4, 4) == 4); + REQUIRE(rpsAlignUp(5, 4) == 8); + + uint8_t foo[1024]; + uint8_t* pFoo = foo; + + for (size_t i = 0; i < 64; i++) + { + for (size_t alignmt = 1; alignmt <= 512; alignmt = alignmt << 1u) + { + size_t sz = RPS_COUNTOF(foo); + void* pRef = pFoo + i; + const size_t rpsPaddingSizeResult = rpsPaddingSize(pRef, alignmt); + void* rpsAlignUpConstPtrResult = rpsAlignUpPtr(pRef, alignmt); + + REQUIRE(rpsBytePtrInc(pRef, rpsPaddingSizeResult) == rpsAlignUpConstPtrResult); + REQUIRE(rpsAlignUpConstPtrResult == std::align(alignmt, 1, pRef, sz)); + } + } +} + + +TEST_CASE("VectorUtils") +{ + RpsAllocator allocatorCb = { + &CountedMalloc, + &CountedFree, + &CountedRealloc, + nullptr, + }; + + RPS_TEST_MALLOC_CHECKPOINT(0); + + do + { + rps::Vector> u32Vec; + + REQUIRE(u32Vec.size() == 0); + REQUIRE(u32Vec.empty()); + REQUIRE(u32Vec.capacity() == 0); + + rps::GeneralAllocator allocator(&allocatorCb); + u32Vec.reset(allocator); + + REQUIRE(u32Vec.size() == 0); + REQUIRE(u32Vec.empty()); + REQUIRE(u32Vec.capacity() == 0); + + u32Vec.reserve(5); + REQUIRE(u32Vec.capacity() >= 5); + + RPS_TEST_MALLOC_CHECKPOINT(1); + RPS_TEST_MALLOC_COUNTER_COMPARE(0, <, 1); + + u32Vec.resize(3); + + RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(1); + + u32Vec.resize(6); + REQUIRE(u32Vec.capacity() >= 6); + + for (uint32_t i = 0; i < 6; i++) + { + u32Vec[i] = 6 - i; + u32Vec.push_back(i); + } + + REQUIRE(u32Vec.size() == 12); + + for (uint32_t i = 0; i < 6; i++) + { + REQUIRE(u32Vec[i] == 6 - i); + REQUIRE(u32Vec[6 + i] == i); + } + + const size_t beforeClearCapcaity = u32Vec.capacity(); + + u32Vec.clear(); + + REQUIRE(u32Vec.size() == 0); + + RPS_TEST_MALLOC_CHECKPOINT(2); + + u32Vec.resize(beforeClearCapcaity); + RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(2); + + u32Vec.clear(); + u32Vec.shrink_to_fit(); + + RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(0); + + u32Vec.push_back(3); + u32Vec.push_back(4); + u32Vec.push_back(5); + REQUIRE(u32Vec[0] == 3); + REQUIRE(u32Vec[1] == 4); + REQUIRE(u32Vec[2] == 5); + REQUIRE(u32Vec.front() == 3); + REQUIRE(u32Vec.back() == 5); + u32Vec.pop_back(); + REQUIRE(u32Vec.back() == 4); + u32Vec.insert(1, 6); + REQUIRE(u32Vec[0] == 3); + REQUIRE(u32Vec[1] == 6); + REQUIRE(u32Vec[2] == 4); + u32Vec.pop_front(); + REQUIRE(u32Vec[0] == 6); + REQUIRE(u32Vec.back() == 4); + REQUIRE(u32Vec.size() == 2); + + u32Vec.clear(); + u32Vec.shrink_to_fit(); + + RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(0); + + + } while (false); + + static int fooCount; + + do + { + struct Foo + { + int value; + + Foo() + : Foo(-1) + { + } + + Foo(int v) + : value(v) + { + fooCount++; + } + + ~Foo() + { + fooCount--; + } + + Foo(const Foo& other) + : Foo(other.value) + { + } + + Foo(Foo&& other) + : Foo(other.value) + { + other.value = -43; + } + + Foo& operator=(const Foo& other) + { + value = other.value; + return *this; + } + + Foo& operator=(Foo&& other) + { + value = other.value; + other.value = -42; + + return *this; + } + }; + + rps::Vector foos; + rps::GeneralAllocator allocator(&allocatorCb); + + foos.reset(allocator); + + foos.resize(3); + REQUIRE(fooCount == 3); + + REQUIRE(foos[0].value == -1); + REQUIRE(foos[1].value == -1); + REQUIRE(foos[2].value == -1); + + foos.resize(2); + REQUIRE(fooCount == 2); + + foos.resize(3, Foo(3)); + REQUIRE(fooCount == 3); + REQUIRE(foos[0].value == -1); + REQUIRE(foos[1].value == -1); + REQUIRE(foos[2].value == 3); + + foos.insert(1, Foo(1)); + REQUIRE(fooCount == 4); + REQUIRE(foos[0].value == -1); + REQUIRE(foos[1].value == 1); + REQUIRE(foos[2].value == -1); + REQUIRE(foos[3].value == 3); + + foos.remove(2); + REQUIRE(foos.size() == fooCount); + REQUIRE(fooCount == 3); + REQUIRE(foos[0].value == -1); + REQUIRE(foos[1].value == 1); + REQUIRE(foos[2].value == 3); + + foos.push_back(Foo(4)); + REQUIRE(foos.size() == fooCount); + REQUIRE(foos[2].value == 3); + REQUIRE(foos[3].value == 4); + + { + Foo tmps[4] = {{5}, {6}, {7}, {8}}; + foos.insert(3, tmps, RPS_COUNTOF(tmps)); + } + + REQUIRE(foos.size() == fooCount); + int values[] = {-1, 1, 3, 5, 6, 7, 8, 4}; + + uint32_t i = 0; + for (auto iter = foos.begin(); iter != foos.end(); ++iter) + { + REQUIRE(iter->value == values[i]); + i++; + } + + { + Foo tmps[2] = {{9}, {10}}; + foos.insert(4, tmps, RPS_COUNTOF(tmps)); + } + REQUIRE(foos.size() == fooCount); + int values2[] = {-1, 1, 3, 5, 9, 10, 6, 7, 8, 4}; + + i = 0; + for (auto iter = foos.begin(); iter != foos.end(); ++iter) + { + REQUIRE(iter->value == values2[i]); + i++; + } + + } while (false); + + REQUIRE(fooCount == 0); + + RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(0); +} + +TEST_CASE("BitVector") +{ + RpsAllocator allocatorCb = { + &CountedMalloc, + &CountedFree, + &CountedRealloc, + nullptr, + }; + + RPS_TEST_MALLOC_CHECKPOINT(0); + + do + { + rps::BitVector<> bitVec{&allocatorCb}; + REQUIRE(bitVec.size() == 0); + REQUIRE(rps::BitVector<>::ELEMENT_NUM_BITS == 64); + + const size_t size1 = 17; + bitVec.Resize(size1); + REQUIRE(bitVec.size() == size1); + + REQUIRE(bitVec.GetVector().size() == rpsDivRoundUp(bitVec.size(), size_t(rps::BitVector<>::ELEMENT_NUM_BITS))); + + for (uint32_t i = 0; i < bitVec.size(); i++) + { + bitVec.SetBit(i, (i % 3) == 0); + } + + for (uint32_t i = 0; i < bitVec.size(); i++) + { + REQUIRE(bitVec.GetBit(i) == ((i % 3) == 0)); + } + + bitVec.Resize(bitVec.size() + 55, true); + const size_t size2 = bitVec.size(); + REQUIRE(size2 == size1 + 55); + + bitVec.Resize(bitVec.size() + 77, false); + const size_t size3 = bitVec.size(); + REQUIRE(size3 == size2 + 77); + + for (size_t i = 0; i < size1; i++) + { + REQUIRE(bitVec.GetBit(i) == ((i % 3) == 0)); + } + + for (size_t i = size1; i < size2; i++) + { + REQUIRE(bitVec.GetBit(i) == true); + } + + for (size_t i = size2; i < size3; i++) + { + REQUIRE(bitVec.GetBit(i) == false); + } + + bitVec.Fill(10, size3 - 11, false); + + for (size_t i = 0; i < 10; i++) + { + REQUIRE(bitVec.GetBit(i) == ((i % 3) == 0)); + } + + for (size_t i = 10; i < size3 - 11; i++) + { + REQUIRE(bitVec.GetBit(i) == false); + } + + bitVec.Fill(13, size3 - 14, true); + + for (size_t i = 13; i < size3 - 14; i++) + { + REQUIRE(bitVec.GetBit(i) == true); + } + + for (size_t i = size3 - 14; i < bitVec.size(); i++) + { + REQUIRE(bitVec.GetBit(i) == false); + } + + } while (false); + + RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(0); +} + +TEST_CASE("ArenaUtils") +{ + RpsAllocator allocator = { + &CountedMalloc, + &CountedFree, + &CountedRealloc, + nullptr, + }; + + RPS_TEST_MALLOC_CHECKPOINT(0); + + do + { + rps::Arena arena(allocator, 4096 - 32); + + void* pAllocated = arena.Alloc(1); + REQUIRE(pAllocated != nullptr); + + for (uint32_t i = 1; i < 512; i++) + { + pAllocated = arena.Alloc(i); + REQUIRE(pAllocated != nullptr); + } + + for (uint32_t i = 1; i < 32; i++) + { + const size_t alignmt = size_t(1ull << (rand() % 8)); + pAllocated = arena.AlignedAlloc(rand() % (1024 * 64 * 2), alignmt); + REQUIRE(pAllocated != nullptr); + REQUIRE(rpsIsPointerAlignedTo(pAllocated, alignmt)); + } + + RPS_TEST_MALLOC_CHECKPOINT(1); + + // Test realloc: + // First alloc + pAllocated = arena.Alloc(42); + + // Shrinking + void* pReallocatedWithSmallerSize = arena.Realloc(pAllocated, 42, 36); + REQUIRE(pAllocated == pReallocatedWithSmallerSize); + + // Extending within range of previous allocs + void* pReallocatedWithLargerSize = arena.Realloc(pAllocated, 36, 40); + REQUIRE(pAllocated == pReallocatedWithLargerSize); + + // Insert a new alloc + REQUIRE(0 != arena.Alloc(3)); + + // Shrinking + void* pReallocatedNotLast = arena.Realloc(pAllocated, 40, 31); + REQUIRE(pAllocated == pReallocatedNotLast); + + // Extending + pReallocatedNotLast = arena.Realloc(pAllocated, 31, 48); + REQUIRE(pAllocated != pReallocatedNotLast); + + arena.Reset(); + + RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(1); + + while (arena.HasFreeBlocks()) + { + pAllocated = arena.Alloc(42); + REQUIRE(pAllocated != nullptr); + RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(1); + } + } while (false); + + RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(0); +} + +TEST_CASE("CompoundAlloc") +{ + RpsAllocator allocator = { + &CountedMalloc, + &CountedFree, + &CountedRealloc, + nullptr, + }; + + RPS_TEST_MALLOC_CHECKPOINT(0); + + uint32_t* pUInt; + + void* pMemory = rps::AllocateCompound(allocator, &pUInt); + + REQUIRE(pMemory); + REQUIRE(pMemory == pUInt); + + allocator.pfnFree(allocator.pContext, pMemory); + + RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(0); + + rps::ArrayRef arrUInts; + rps::AllocInfo field2Info; + rps::AllocInfo field3Info; + uint16_t* pField2; + uint64_t* pField3; + + field2Info.Append(); + field3Info.Append(23); + + pMemory = rps::AllocateCompound(allocator, + &pUInt, + rps::CompoundEntry(&arrUInts, 42), + rps::CompoundEntry(&pField2, field2Info), + rps::CompoundEntry(&pField3, field3Info)); + + rps::AllocInfo checker; + + size_t field0Offset = checker.Append(); + size_t field1Offset = checker.Append(42); + size_t field2Offset = checker.Append(); + size_t field3Offset = checker.Append(23); + + REQUIRE(pMemory); + REQUIRE(pMemory == pUInt); + + REQUIRE(field0Offset == 0); + + REQUIRE(rpsBytePtrInc(pMemory, field0Offset) == pUInt); + REQUIRE(rpsBytePtrInc(pMemory, field1Offset) == arrUInts.data()); + REQUIRE(rpsBytePtrInc(pMemory, field2Offset) == pField2); + REQUIRE(rpsBytePtrInc(pMemory, field3Offset) == pField3); +} + +TEST_CASE("Span") +{ + RpsAllocator allocator = { + &CountedMalloc, + &CountedFree, + &CountedRealloc, + nullptr, + }; + + RPS_TEST_MALLOC_CHECKPOINT(0); + + //rps::Vector u16Vec(0, &allocator); + //rps::SpanPool MustNotCompile(u16Vec); + + rps::Vector u32Vec(0, &allocator); + rps::SpanPool spanPool(u32Vec); + + rps::Span span; + REQUIRE(span.size() == 0); + + const size_t initVecSize = u32Vec.size(); + + for (uint32_t i = 0; i < 130; i++) + { + uint32_t oldOffset = span.GetBegin(); + spanPool.push_to_span(span, 42 + i); + + REQUIRE(span.size() == i + 1); + REQUIRE(span.Get(u32Vec).back() == 42 + i); + + if (rpsIsPowerOfTwo(i)) + { + REQUIRE(span.GetBegin() == oldOffset + rpsRoundUpToPowerOfTwo(i)); + } + else + { + REQUIRE(span.GetBegin() == oldOffset); + } + } + + rps::Span span1; + + const size_t sizeBeforeReuse = u32Vec.size(); + + for (uint32_t i = 0; i < 128; i++) + { + uint32_t oldOffset = span1.GetBegin(); + + spanPool.push_to_span(span1, 242 + i); + + REQUIRE(span1.size() == i + 1); + REQUIRE(span1.Get(u32Vec).back() == 242 + i); + REQUIRE(span1.GetBegin() < span.GetBegin()); + + if (rpsIsPowerOfTwo(i)) + { + REQUIRE(span1.GetBegin() == oldOffset + rpsRoundUpToPowerOfTwo(i)); + } + else + { + REQUIRE(span1.GetBegin() == oldOffset); + } + + REQUIRE(sizeBeforeReuse == u32Vec.size()); + } + + spanPool.push_to_span(span1, 999); + REQUIRE(span1.GetBegin() > span.GetBegin()); + REQUIRE(sizeBeforeReuse < u32Vec.size()); + + u32Vec.reset(); + + RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(0); +} + +template +static void StrBuilderCheck(const rps::StrBuilder& builder, const char *str) +{ + REQUIRE(builder.c_str() != nullptr); + REQUIRE(builder.Length() == strlen(builder.c_str())); + REQUIRE(strlen(str) == builder.Length()); + REQUIRE(strcmp(str, builder.c_str()) == 0); +} + +TEST_CASE("StrBuilder") { + + auto builder = rps::StrBuilder<10>(); + + StrBuilderCheck(builder, ""); + builder.Append("hello"); + StrBuilderCheck(builder, "hello"); + builder.Append("hi"); + StrBuilderCheck(builder, "hellohi"); + builder.Append("_test!"); + StrBuilderCheck(builder, "hellohi_t"); + builder.Append("_test!"); + StrBuilderCheck(builder, "hellohi_t"); + builder.PopBack(4); + StrBuilderCheck(builder, "hello"); + builder.Append("_t"); + StrBuilderCheck(builder, "hello_t"); + builder.PopBack(2); + StrBuilderCheck(builder, "hello"); + builder.Append("hi_test!"); + StrBuilderCheck(builder, "hellohi_t"); + + builder.Reset(); + StrBuilderCheck(builder, ""); + + builder = rps::StrBuilder<10>("ab"); + StrBuilderCheck(builder, "ab"); + builder.Append("3434343434343434"); + StrBuilderCheck(builder, "ab3434343"); + builder.Append("3434343434343434"); + StrBuilderCheck(builder, "ab3434343"); + + auto builder2 = rps::StrBuilder<5>("abcdefgh"); + StrBuilderCheck(builder2, "abcd"); + builder2.Append("3434343434343434"); + StrBuilderCheck(builder2, "abcd"); + + auto builder3 = rps::StrBuilder<5>("abc"); + StrBuilderCheck(builder3, "abc"); + builder3.Append("a"); + StrBuilderCheck(builder3, "abca"); + builder3.Append("a"); + StrBuilderCheck(builder3, "abca"); + + auto builder4 = rps::StrBuilder<11>("abc%d"); + StrBuilderCheck(builder4, "abc%d"); + builder4.Append("abc%d"); + StrBuilderCheck(builder4, "abc%dabc%d"); + + char testName[256]; + std::fill(testName, testName + 255, 'a'); + testName[255] = '\0'; + auto builder5 = rps::StrBuilder<>(); + builder5.Append(testName); + StrBuilderCheck(builder5, testName); + + auto builder6 = rps::StrBuilder<>(); + builder6.AppendFormat("%d", 666); + StrBuilderCheck(builder6, "666"); + + auto builder7 = rps::StrBuilder<>("test"); + StrBuilderCheck(builder7, "test"); + builder7 += "test"; + StrBuilderCheck(builder7, "testtest"); + + auto builder8 = rps::StrBuilder<>("a"); + builder8.Append("b").Append("c").Append("d"); + StrBuilderCheck(builder8, "abcd"); + + rps::StrBuilder<> builder9("qwer"); + builder9 = builder8; + StrBuilderCheck(builder8, "abcd"); + StrBuilderCheck(builder9, "abcd"); + + const rps::StrBuilder<> builder10("qwer"); + builder9 = builder10; + StrBuilderCheck(builder9, "qwer"); +} + +TEST_CASE("StrRef") +{ + using rps::StrRef; + + StrRef s; + REQUIRE(!s); + REQUIRE(s.empty()); + + s = "asdf"; + + REQUIRE(s.len == 4); + + char buf[6]; + + memset(buf, 0xfe, sizeof(buf)); + REQUIRE(!s.ToCStr(buf, 3)); + REQUIRE(0 == strcmp(buf, "as")); + + memset(buf, 0xfe, sizeof(buf)); + REQUIRE(!s.ToCStr(buf, 4)); + REQUIRE(0 == strcmp(buf, "asd")); + + memset(buf, 0xfe, sizeof(buf)); + REQUIRE(s.ToCStr(buf, 5)); + REQUIRE(0 == strcmp(buf, "asdf")); + + memset(buf, 0xfe, sizeof(buf)); + REQUIRE(s.ToCStr(buf, 6)); + REQUIRE(0 == strcmp(buf, "asdf")); + + REQUIRE(StrRef(buf, 3) == StrRef("asd")); + REQUIRE(StrRef(buf, 3) == StrRef("asdX", 3)); + REQUIRE(StrRef(buf, 3) != StrRef(buf, 2)); +} diff --git a/tests/gui/CMakeLists.txt b/tests/gui/CMakeLists.txt new file mode 100644 index 0000000..42425c4 --- /dev/null +++ b/tests/gui/CMakeLists.txt @@ -0,0 +1,33 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# This file is part of the AMD Render Pipeline Shaders SDK which is +# released under the AMD INTERNAL EVALUATION LICENSE. +# +# See file LICENSE.RTF for full license details. + +set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE} ) + +file( GLOB_RECURSE GuiTestSrcs + "${CMAKE_CURRENT_SOURCE_DIR}/test_*.cpp" ) + +message( STATUS "Found Tests: ${GuiTestSrcs}" ) + +# TODO: +if ( WIN32 ) + foreach( TestSrc ${GuiTestSrcs} ) + if( TestSrc MATCHES "${CMAKE_CURRENT_SOURCE_DIR}/test_visualizer" ) + if ( ${RpsEnableImGui} ) + set( Dependencies "rps_visualizer;ImGuiWin32Backend" ) + set( IncludeDirectories ${CMAKE_SOURCE_DIR}/tools/rps_visualizer/include ) + if( TestSrc MATCHES "_vk.cpp" ) + set( Dependencies "${Dependencies};ImGuiVkBackend" ) + else() + set( Dependencies "${Dependencies};ImGuiDX12Backend" ) + endif() + SetupTestApp( ${TestSrc} "${AppFolder}/gui" "${IncludeDirectories}" "${Dependencies}" True ) + endif() + else() + SetupTestApp( ${TestSrc} "${AppFolder}/gui" "" "" True ) + endif() + endforeach() +endif() \ No newline at end of file diff --git a/tests/gui/test_built_in_nodes.rpsl b/tests/gui/test_built_in_nodes.rpsl new file mode 100644 index 0000000..3285839 --- /dev/null +++ b/tests/gui/test_built_in_nodes.rpsl @@ -0,0 +1,87 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +compute node fill_uv(uav dst, float blue); +node msaa_quad(rtv dst : SV_Target0, RpsViewport dstViewport : SV_Viewport); +node blt_to_swapchain(rtv dst : SV_Target0, srv src, RpsViewport dstViewport : SV_Viewport); + +#define subViewport(dstSize, col, row) (viewport(col * dstSize.x, row * dstSize.y, dstSize.x, dstSize.y)) + +export void rps_main([readonly(present)] texture backbuffer, bool bTestMinMax) +{ + ResourceDesc backbufferDesc = backbuffer.desc(); + uint2 dstVpSize = uint2((uint32_t)backbufferDesc.Width / 4, backbufferDesc.Height / 4); + + uint numMips = 2; + uint numArraySlices = 3; + uint texWidth = 133; + uint texHeight = 72; + texture src2D = create_tex2d( RPS_FORMAT_R8G8B8A8_UNORM, texWidth, texHeight, numMips, numArraySlices ); + + // Clear first subresources + fill_uv( src2D.mips(0).array(0), 0 ); + fill_uv( src2D.mips(1).array(0), 1 ); + clear( src2D.mips(0).array(2), float4(0, 0, 0, 0) ); + clear( src2D.mips(1).array(2), float4(0, 0, 0, 0) ); + + blt_to_swapchain( backbuffer, src2D.mips(0).array(0), subViewport( dstVpSize, 0, 0 ) ); + blt_to_swapchain( backbuffer, src2D.mips(1).array(0), subViewport( dstVpSize, 1, 0 ) ); + + // Copy full-subresources + copy_texture( src2D.array(1), src2D.array(0) ); + blt_to_swapchain( backbuffer, src2D.mips(0).array(1), subViewport( dstVpSize, 2, 0 ) ); + blt_to_swapchain( backbuffer, src2D.mips(1).array(1), subViewport( dstVpSize, 3, 0 ) ); + + // Copy full-subresource to region + copy_texture( src2D.mips(0).array(2), uint3(0, texHeight / 2, 0), src2D.mips(1).array(1), uint3(0,0,0), uint3(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF) ); + + // Copy region to region + copy_texture( src2D.mips(0).array(2), uint3(texWidth / 2 + 15, 10, 0), src2D.mips(1).array(1), uint3(15, 10, 0), uint3(texWidth / 2 - 30, texHeight / 2 - 20, 1) ); + blt_to_swapchain( backbuffer, src2D.mips(0).array(2), subViewport( dstVpSize, 0, 1 ) ); + + copy_texture( src2D.mips(0).array(2), uint3(10, 5, 0), src2D.mips(0).array(1), uint3(5, 3, 0), uint3(texWidth / 2, texHeight / 2, 1) ); + copy_texture( src2D.mips(0).array(2), uint3(texWidth / 2 - 10, texHeight / 2 - 5, 0), src2D.mips(0).array(1), uint3(texWidth / 2 - 5, texHeight / 2 - 3, 0), uint3(texWidth / 2, texHeight / 2, 1) ); + + blt_to_swapchain( backbuffer, src2D.mips(0).array(2), subViewport( dstVpSize, 1, 1 ) ); + + // copy tex to buffer + + // TODO: Expose buffer row pitch alignment requirements + uint3 bufImageSize = float3(175, 97, 1); + uint bufOffset = 512 * 3; + uint bufRowPitch = (bufImageSize.x * 4 + 255) & ~(255u); + uint bufSize = bufOffset + bufRowPitch * bufImageSize.y * bufImageSize.z; + + buffer tmpBuf = create_buffer( bufSize ); + + copy_texture_to_buffer( tmpBuf, bufOffset, bufRowPitch, bufImageSize, uint3(0, 0, 0), src2D.mips(0).array(2), uint3(0, 0, 0), uint3(texWidth / 2 + 20, texHeight / 2 + 10, 1) ); + copy_texture_to_buffer( tmpBuf, bufOffset, bufRowPitch, bufImageSize, uint3(texWidth / 2 + 20, texHeight / 2 + 10, 0), src2D.mips(0).array(1), uint3(5, 3, 0), uint3(texWidth / 2, texHeight / 2, 1) ); + copy_buffer_to_texture( src2D.mips(0).array(1), uint3(5, 5, 0), tmpBuf, bufOffset, bufRowPitch, bufImageSize, uint3(5, 5, 0), uint3(texWidth / 2, texHeight / 2, 1) ); + copy_buffer_to_texture( src2D.mips(1).array(1), uint3(5, 5, 0), tmpBuf, bufOffset, bufRowPitch, bufImageSize, uint3(texWidth / 2 + 20, texHeight / 2 + 10, 0), uint3(texWidth / 2 - 10, texHeight / 2 - 10, 1) ); + + blt_to_swapchain( backbuffer, src2D.mips(0).array(1), subViewport( dstVpSize, 2, 1 ) ); + blt_to_swapchain( backbuffer, src2D.mips(1).array(1), subViewport( dstVpSize, 3, 1 ) ); + + texture msaaRT = create_tex2d( RPS_FORMAT_R8G8B8A8_UNORM, texWidth, texHeight, 1, 1, 1, 2 ); // MSAA2x + clear( msaaRT, float4(0, 0, 1, 1) ); + msaa_quad( msaaRT, viewport( texWidth * 0.2f, texHeight * 0.1f, texWidth * 0.6f, texHeight * 0.8f ) ); + + resolve( src2D.mips(0).array(1), uint2(0, 0), msaaRT, uint2(0, 0), uint2(0xFFFFFFFFu, 0xFFFFFFFFu), RPS_RESOLVE_MODE_AVERAGE ); + blt_to_swapchain( backbuffer, src2D.mips(0).array(1), subViewport( dstVpSize, 0, 2 ) ); + + if(bTestMinMax) + { + resolve( src2D.mips(0).array(1), uint2(0, 0), msaaRT, uint2(0, 0), uint2(0xFFFFFFFFu, 0xFFFFFFFFu), RPS_RESOLVE_MODE_MIN ); + blt_to_swapchain( backbuffer, src2D.mips(0).array(1), subViewport( dstVpSize, 1, 2 ) ); + + resolve( src2D.mips(0).array(1), uint2(0, 0), msaaRT, uint2(0, 0), uint2(0xFFFFFFFFu, 0xFFFFFFFFu), RPS_RESOLVE_MODE_MAX ); + blt_to_swapchain( backbuffer, src2D.mips(0).array(1), subViewport( dstVpSize, 2, 2 ) ); + + resolve( src2D.mips(0).array(1), uint2(8, 5), msaaRT, uint2(texWidth * 0.2f, texHeight * 0.1f), uint2(texWidth * 0.6f, texHeight * 0.8f), RPS_RESOLVE_MODE_MIN ); + blt_to_swapchain( backbuffer, src2D.mips(0).array(1), subViewport( dstVpSize, 3, 2 ) ); + } +} diff --git a/tests/gui/test_built_in_nodes_d3d12.cpp b/tests/gui/test_built_in_nodes_d3d12.cpp new file mode 100644 index 0000000..ef4293a --- /dev/null +++ b/tests/gui/test_built_in_nodes_d3d12.cpp @@ -0,0 +1,296 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define RPS_D3D12_RUNTIME 1 + +#include "test_built_in_nodes_shared.h" + +#include "utils/rps_test_win32.h" +#include "utils/rps_test_d3d12_renderer.h" + +using namespace DirectX; + +class TestD3D12BuiltInNodes : public RpsTestD3D12Renderer, public TestRpsBuiltInNodes +{ +protected: + virtual void OnInit(ID3D12GraphicsCommandList* pInitCmdList, + std::vector>& tempResources) override + { + LoadAssets(pInitCmdList, tempResources); + + TestRpsBuiltInNodes::Init(rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); })); + } + + virtual void OnPostResize() override + { + } + + virtual void OnCleanUp() override + { + TestRpsBuiltInNodes::OnDestroy(); + + m_rootSignature = nullptr; + m_pipelineStateFillUV = nullptr; + m_pipelineStateMSAAQuad = nullptr; + m_pipelineStateBlt = nullptr; + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + RpsRuntimeResource backBuffers[DXGI_MAX_SWAP_CHAIN_BUFFERS]; + RpsResourceDesc backBufferDesc; + GetBackBuffers(backBufferDesc, backBuffers); + + RpsBool bTestMinMax = RPS_TRUE; + + RpsConstant args[] = {&backBufferDesc, &bTestMinMax}; + const RpsRuntimeResource* argResources[] = {backBuffers}; + + uint64_t completedFrameIndex = CalcGuaranteedCompletedFrameIndexForRps(); + + TestRpsBuiltInNodes::OnUpdate(frameIndex, completedFrameIndex, uint32_t(RPS_TEST_COUNTOF(args)), args, argResources); + + RpsTestD3D12Renderer::OnUpdate(frameIndex); + } + + virtual void OnRender(uint32_t frameIndex) override + { + REQUIRE(RPS_SUCCEEDED(ExecuteRenderGraph(frameIndex, GetRpsRenderGraph()))); + } + +protected: + virtual void BindNodes(RpsSubprogram hRpslEntry) override final + { + TestRpsBuiltInNodes::BindNodes(hRpslEntry); + + RpsResult result = + rpsProgramBindNode(hRpslEntry, "blt_to_swapchain", &TestD3D12BuiltInNodes::DrawBlt, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(hRpslEntry, "fill_uv", &TestD3D12BuiltInNodes::DrawFillUV, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(hRpslEntry, "msaa_quad", &TestD3D12BuiltInNodes::DrawMSAAQuad, this); + REQUIRE(result == RPS_OK); + } + + void CreateFillUV(const RpsCmdCallbackContext* pContext) + { + if (!m_pipelineStateFillUV) + { + CreateComputePSO(L"CSFillUV", &m_pipelineStateFillUV); + } + } + + void DrawFillUV(const RpsCmdCallbackContext* pContext, D3D12_CPU_DESCRIPTOR_HANDLE dst, float cbData) + { + CreateFillUV(pContext); + + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + D3D12_GPU_DESCRIPTOR_HANDLE uavTable = + AllocDynamicDescriptorsAndWrite(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, &dst, 1); + BindDescriptorHeaps(pCmdList); + + ID3D12Resource* pD3DResource = {}; + auto pViewInfo = static_cast(pContext->ppArgs[0]); + RpsResult result = rpsD3D12GetCmdArgResource(pContext, 0, &pD3DResource); + REQUIRE(result == RPS_OK); + + auto desc = pD3DResource->GetDesc(); + uint32_t w = std::max(1u, uint32_t(desc.Width) >> pViewInfo->subresourceRange.baseMipLevel); + uint32_t h = std::max(1u, desc.Height >> pViewInfo->subresourceRange.baseMipLevel); + + pCmdList->SetComputeRootSignature(m_rootSignature.Get()); + pCmdList->SetPipelineState(m_pipelineStateFillUV.Get()); + pCmdList->SetComputeRoot32BitConstant(0, *(const UINT*)(&cbData), 0); + pCmdList->SetComputeRootDescriptorTable(2, uavTable); + pCmdList->Dispatch((w + 7) / 8, (h + 7) / 8, 1); + } + + void CreateMSAAQuad(const RpsCmdCallbackContext* pContext) + { + if (!m_pipelineStateMSAAQuad) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VSBlt", L"PSColorSample", false, &rtInfo, &m_pipelineStateMSAAQuad); + } + } + + void DrawMSAAQuad(const RpsCmdCallbackContext* pContext) + { + CreateMSAAQuad(pContext); + + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + pCmdList->SetPipelineState(m_pipelineStateMSAAQuad.Get()); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + + void CreateBlt(const RpsCmdCallbackContext* pContext) + { + if (!m_pipelineStateBlt) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VSBlt", L"PSBlt", false, &rtInfo, &m_pipelineStateBlt); + } + } + + void DrawBlt(const RpsCmdCallbackContext* pContext, + rps::UnusedArg dst, + D3D12_CPU_DESCRIPTOR_HANDLE src, + const ViewportData& dstViewport) + { + CreateBlt(pContext); + + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + RpsCmdViewportInfo viewportScissorInfo = {}; + RpsResult result = rpsCmdGetViewportInfo(pContext, &viewportScissorInfo); + REQUIRE(result == RPS_OK); + REQUIRE(viewportScissorInfo.numViewports == 1); + REQUIRE(dstViewport.data.x == viewportScissorInfo.pViewports[0].x); + REQUIRE(dstViewport.data.y == viewportScissorInfo.pViewports[0].y); + REQUIRE(dstViewport.data.z == viewportScissorInfo.pViewports[0].width); + REQUIRE(dstViewport.data.w == viewportScissorInfo.pViewports[0].height); + + D3D12_GPU_DESCRIPTOR_HANDLE srvTable = + AllocDynamicDescriptorsAndWrite(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, &src, 1); + + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + BindDescriptorHeaps(pCmdList); + + pCmdList->SetPipelineState(m_pipelineStateBlt.Get()); + pCmdList->SetGraphicsRootDescriptorTable(1, srvTable); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + +private: + void LoadAssets(ID3D12GraphicsCommandList* pInitCmdList, std::vector>& tempResources) + { + CD3DX12_DESCRIPTOR_RANGE ranges[2] = {}; + CD3DX12_ROOT_PARAMETER rootParameters[3] = {}; + + ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0); + ranges[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0); + rootParameters[0].InitAsConstants(1, 0); + rootParameters[1].InitAsDescriptorTable(1, &ranges[0], D3D12_SHADER_VISIBILITY_PIXEL); + rootParameters[2].InitAsDescriptorTable(1, &ranges[1], D3D12_SHADER_VISIBILITY_ALL); + + D3D12_STATIC_SAMPLER_DESC sampler = {}; + sampler.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + sampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + sampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + sampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + sampler.MipLODBias = 0; + sampler.MaxAnisotropy = 0; + sampler.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + sampler.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + sampler.MinLOD = 0.0f; + sampler.MaxLOD = D3D12_FLOAT32_MAX; + sampler.ShaderRegister = 0; + sampler.RegisterSpace = 0; + sampler.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + + D3D12_ROOT_SIGNATURE_FLAGS rootSignatureFlags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc; + rootSignatureDesc.Init_1_0(_countof(rootParameters), rootParameters, 1, &sampler, rootSignatureFlags); + + ComPtr signature; + ComPtr error; + ThrowIfFailedEx(D3DX12SerializeVersionedRootSignature( + &rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1_0, &signature, &error), + error); + ThrowIfFailed(m_device->CreateRootSignature( + 0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature))); + } + + void CreatePSO(LPCWSTR vsEntry, + LPCWSTR psEntry, + bool bDepthEnable, + const RpsCmdRenderTargetInfo* pRenderTargetInfo, + ID3D12PipelineState** ppPSO) + { + // Create the pipeline state, which includes compiling and loading shaders. + { + std::vector vsCode, psCode, gsCode; + + // Describe and create the graphics pipeline state object (PSO). + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.InputLayout = {nullptr, 0}; + psoDesc.pRootSignature = m_rootSignature.Get(); + psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); + psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); + psoDesc.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT); + psoDesc.DepthStencilState.DepthEnable = !!bDepthEnable; + psoDesc.SampleMask = UINT_MAX; + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + + psoDesc.DSVFormat = rpsFormatToDXGI(pRenderTargetInfo->depthStencilFormat); + psoDesc.SampleDesc.Count = pRenderTargetInfo->numSamples; + psoDesc.NumRenderTargets = pRenderTargetInfo->numRenderTargets; + psoDesc.RasterizerState.MultisampleEnable = pRenderTargetInfo->numSamples > 1; + + for (uint32_t iRT = 0; iRT < pRenderTargetInfo->numRenderTargets; iRT++) + { + psoDesc.RTVFormats[iRT] = rpsFormatToDXGI(pRenderTargetInfo->renderTargetFormats[iRT]); + } + + DxcCompile(c_Shader, vsEntry, L"vs_6_0", L"", nullptr, 0, vsCode); + DxcCompile(c_Shader, psEntry, L"ps_6_0", L"", nullptr, 0, psCode); + psoDesc.VS = CD3DX12_SHADER_BYTECODE(vsCode.data(), vsCode.size()); + psoDesc.PS = CD3DX12_SHADER_BYTECODE(psCode.data(), psCode.size()); + + ThrowIfFailed(m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(ppPSO))); + } + } + + void CreateComputePSO(LPCWSTR csEntry, ID3D12PipelineState** ppPSO) + { + std::vector csCode; + D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.pRootSignature = m_rootSignature.Get(); + + DxcCompile(c_Shader, csEntry, L"cs_6_0", L"", nullptr, 0, csCode); + psoDesc.CS = CD3DX12_SHADER_BYTECODE(csCode.data(), csCode.size()); + + ThrowIfFailed(m_device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(ppPSO))); + } + +private: + ComPtr m_rootSignature; + ComPtr m_pipelineStateFillUV; + ComPtr m_pipelineStateBlt; + ComPtr m_pipelineStateMSAAQuad; +}; + +TEST_CASE(TEST_APP_NAME) +{ +#if _DEBUG + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#endif + + TestD3D12BuiltInNodes renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_built_in_nodes_shared.h b/tests/gui/test_built_in_nodes_shared.h new file mode 100644 index 0000000..75acfca --- /dev/null +++ b/tests/gui/test_built_in_nodes_shared.h @@ -0,0 +1,116 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#pragma once + +#include "utils/rps_test_host.h" + +RPS_DECLARE_RPSL_ENTRY(test_built_in_nodes, rps_main); + +static const char c_Shader[] = R"( + +struct V2P +{ + float4 Pos : SV_Position; + float2 UV : TEXCOORD0; +}; + +struct CBData +{ + float data; +}; + +#ifndef VULKAN +ConstantBuffer cb : register(b0); +#else +[[vk::push_constant]] CBData cb; +#endif + +[[vk::binding(2, 0)]] +RWTexture2D g_rwTex : register(u0); + +[numthreads(8, 8, 1)] +void CSFillUV(uint3 dtId : SV_DispatchThreadID) +{ + uint w, h; + g_rwTex.GetDimensions(w, h); + + if(all(dtId.xy < uint2(w, h))) + { + float4 color = float4(dtId.xy / float2(w, h), (cb.data > 0.5f) ? (dtId.x & 1) : (dtId.y & 1), 1); + g_rwTex[dtId.xy] = color; + } +} + +V2P VSBlt(uint vertexId : SV_VertexID) +{ + V2P result; + result.Pos = float4( + (vertexId & 1) * 4.0f - 1.0f, + (vertexId & 2) * -2.0f + 1.0f, + 0, 1); + result.UV = float2((vertexId & 1) * 2.0f, (vertexId & 2) * 1.0f); + + return result; +} + +[[vk::binding(1, 0)]] +Texture2D g_tex : register(t0); + +[[vk::binding(0, 0)]] +SamplerState g_sampler : register(s0); + +float4 PSBlt(V2P psIn) : SV_Target0 +{ + return g_tex.SampleLevel(g_sampler, psIn.UV, 0); +} + +float4 PSColorSample(V2P psIn, uint sampId : SV_SampleIndex) : SV_Target0 +{ + return float4( + (sampId == 0) ? psIn.UV.xy : (1.0f.xx - psIn.UV.xy), 0, 1); +} +)"; + +#define TEST_APP_NAME_RAW "TestBuiltInNode" + +using namespace DirectX; + +class TestRpsBuiltInNodes : public RpsTestHost +{ +public: + struct ViewportData + { + XMFLOAT4 data; + + ViewportData() + { + } + ViewportData(const ViewportData& r) + { + FAIL(); + data = r.data; + } + ViewportData& operator=(const ViewportData& r) + { + FAIL(); + data = r.data; + return *this; + } + }; + + TestRpsBuiltInNodes() + { + } + +protected: + void Init(RpsDevice hRpsDevice) + { + RpsTestHost::OnInit(hRpsDevice, rpsTestLoadRpslEntry(test_built_in_nodes, rps_main)); + } + +}; diff --git a/tests/gui/test_built_in_nodes_vk.cpp b/tests/gui/test_built_in_nodes_vk.cpp new file mode 100644 index 0000000..57c4ae5 --- /dev/null +++ b/tests/gui/test_built_in_nodes_vk.cpp @@ -0,0 +1,522 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifdef _WIN32 +#define VK_USE_PLATFORM_WIN32_KHR +#else +#error "TODO" +#endif + +#define RPS_VK_RUNTIME 1 + +#include "test_built_in_nodes_shared.h" + +#include "utils/rps_test_win32.h" +#include "utils/rps_test_vk_renderer.h" + +class TestVkBuiltInNodes : public RpsTestVulkanRenderer, public TestRpsBuiltInNodes +{ +protected: + virtual void OnInit(VkCommandBuffer initCmdBuf, InitTempResources& tempResources) override + { + LoadAssets(); + + TestRpsBuiltInNodes::Init(rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); })); + } + + virtual void OnPostResize() override + { + } + + virtual void OnCleanUp() override + { + TestRpsBuiltInNodes::OnDestroy(); + + vkDestroyDescriptorSetLayout(m_device, m_descriptorSetLayout, nullptr); + vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr); + vkDestroyPipeline(m_device, m_psoFillUV, nullptr); + vkDestroyPipeline(m_device, m_psoMSAAQuad, nullptr); + vkDestroyPipeline(m_device, m_psoBlt, nullptr); + vkDestroySampler(m_device, m_sampler, nullptr); + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + RpsResourceDesc backBufferDesc; + auto& swapChainBufferHdls = GetBackBuffers(backBufferDesc); + + RpsBool bTestMinMax = RPS_FALSE; + + RpsConstant args[] = {&backBufferDesc, &bTestMinMax}; + const RpsRuntimeResource* argResources[] = {swapChainBufferHdls.data()}; + + uint64_t completedFrameIndex = CalcGuaranteedCompletedFrameIndexForRps(); + + TestRpsBuiltInNodes::OnUpdate( + frameIndex, completedFrameIndex, uint32_t(RPS_TEST_COUNTOF(args)), args, argResources); + + RpsTestVulkanRenderer::OnUpdate(frameIndex); + } + + virtual void OnRender(uint32_t frameIndex) override + { + REQUIRE(RPS_SUCCEEDED(ExecuteRenderGraph(frameIndex, GetRpsRenderGraph()))); + } + +protected: + virtual void BindNodes(RpsSubprogram hRpslEntry) override final + { + TestRpsBuiltInNodes::BindNodes(hRpslEntry); + + RpsResult result = + rpsProgramBindNode(hRpslEntry, "blt_to_swapchain", &TestVkBuiltInNodes::DrawBlt, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(hRpslEntry, "fill_uv", &TestVkBuiltInNodes::DrawFillUV, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(hRpslEntry, "msaa_quad", &TestVkBuiltInNodes::DrawMSAAQuad, this); + REQUIRE(result == RPS_OK); + } + + void CreateFillUV(const RpsCmdCallbackContext* pContext) + { + if (m_psoFillUV == VK_NULL_HANDLE) + { + CreateComputePSO(L"CSFillUV", &m_psoFillUV); + } + } + + void DrawFillUV(const RpsCmdCallbackContext* pContext, VkImageView dst, float cbData) + { + CreateFillUV(pContext); + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkWriteDescriptorSet writeDescriptorSet[1] = {}; + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_descriptorSetLayout, 1, &ds)); + + VkDescriptorImageInfo imageInfo = {}; + imageInfo.sampler = VK_NULL_HANDLE; + imageInfo.imageView = dst; + imageInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + + auto pViewInfo = static_cast(rpsCmdGetArg(pContext, 0)); + + RpsResourceDesc resourceDesc = {}; + RpsResult result = rpsCmdGetArgResourceDesc(pContext, 0, &resourceDesc); + REQUIRE(result == RPS_OK); + + uint32_t w = std::max(1u, resourceDesc.image.width >> pViewInfo->subresourceRange.baseMipLevel); + uint32_t h = std::max(1u, resourceDesc.image.height >> pViewInfo->subresourceRange.baseMipLevel); + + AppendWriteDescriptorSetImages(&writeDescriptorSet[0], ds, 2, 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageInfo); + vkUpdateDescriptorSets(m_device, _countof(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipelineLayout, 0, 1, &ds, 0, nullptr); + vkCmdPushConstants(cmdBuf, m_pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(cbData), &cbData); + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_psoFillUV); + vkCmdDispatch(cmdBuf, (w + 7) / 8, (h + 7) / 8, 1); + } + + void CreateMSAAQuad(const RpsCmdCallbackContext* pContext) + { + if (!m_psoMSAAQuad) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + VkRenderPass rp; + result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VSBlt", nullptr, L"PSColorSample", rtInfo.numRenderTargets, false, rtInfo.numSamples, rp, &m_psoMSAAQuad); + } + } + + void DrawMSAAQuad(const RpsCmdCallbackContext* pContext) + { + CreateMSAAQuad(pContext); + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_psoMSAAQuad); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + + void CreateBlt(const RpsCmdCallbackContext* pContext) + { + if (m_psoBlt == VK_NULL_HANDLE) + { + VkRenderPass rp; + RpsResult result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VSBlt", nullptr, L"PSBlt", 1, false, 1, rp, &m_psoBlt); + } + } + + void DrawBlt(const RpsCmdCallbackContext* pContext, + rps::UnusedArg dst, + VkImageView src, + const ViewportData* dstViewport) + { + CreateBlt(pContext); + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_descriptorSetLayout, 1, &ds)); + + RpsCmdViewportInfo viewportScissorInfo = {}; + RpsResult result = rpsCmdGetViewportInfo(pContext, &viewportScissorInfo); + REQUIRE(result == RPS_OK); + REQUIRE(viewportScissorInfo.numViewports == 1); + REQUIRE(dstViewport->data.x == viewportScissorInfo.pViewports[0].x); + REQUIRE(dstViewport->data.y == viewportScissorInfo.pViewports[0].y); + REQUIRE(dstViewport->data.z == viewportScissorInfo.pViewports[0].width); + REQUIRE(dstViewport->data.w == viewportScissorInfo.pViewports[0].height); + + VkWriteDescriptorSet writeDescriptorSet[1] = {}; + + VkDescriptorImageInfo imageInfo = {VK_NULL_HANDLE, src, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + AppendWriteDescriptorSetImages(&writeDescriptorSet[0], ds, 1, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &imageInfo); + + vkUpdateDescriptorSets(m_device, _countof(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + + vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &ds, 0, nullptr); + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_psoBlt); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + +private: + void LoadAssets() + { + OnPostResize(); + + VkSamplerCreateInfo samplerCI = {VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO}; + samplerCI.magFilter = VK_FILTER_NEAREST; + samplerCI.minFilter = VK_FILTER_NEAREST; + samplerCI.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + samplerCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCI.mipLodBias = 0.0f; + samplerCI.compareOp = VK_COMPARE_OP_NEVER; + samplerCI.minLod = 0.0f; + samplerCI.maxLod = FLT_MAX; + samplerCI.maxAnisotropy = 1.0; + samplerCI.anisotropyEnable = VK_FALSE; + samplerCI.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + + ThrowIfFailedVK(vkCreateSampler(m_device, &samplerCI, nullptr, &m_sampler)); + + VkDescriptorSetLayoutBinding sharedLayoutBindings[3] = {}; + sharedLayoutBindings[0].binding = 1; + sharedLayoutBindings[0].descriptorCount = 1; + sharedLayoutBindings[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + sharedLayoutBindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[1].binding = 2; + sharedLayoutBindings[1].descriptorCount = 1; + sharedLayoutBindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + sharedLayoutBindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + sharedLayoutBindings[2].binding = 0; + sharedLayoutBindings[2].descriptorCount = 1; + sharedLayoutBindings[2].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + sharedLayoutBindings[2].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[2].pImmutableSamplers = &m_sampler; + + VkDescriptorSetLayoutCreateInfo setLayoutCI = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + setLayoutCI.pBindings = sharedLayoutBindings; + setLayoutCI.bindingCount = _countof(sharedLayoutBindings); + + ThrowIfFailedVK(vkCreateDescriptorSetLayout(m_device, &setLayoutCI, nullptr, &m_descriptorSetLayout)); + + VkPushConstantRange pushConstRanges[1] = {}; + pushConstRanges[0].offset = 0; + pushConstRanges[0].size = 4; + pushConstRanges[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + VkPipelineLayoutCreateInfo plCI = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + plCI.setLayoutCount = 1; + plCI.pSetLayouts = &m_descriptorSetLayout; + plCI.pPushConstantRanges = pushConstRanges; + plCI.pushConstantRangeCount = _countof(pushConstRanges); + + ThrowIfFailedVK(vkCreatePipelineLayout(m_device, &plCI, nullptr, &m_pipelineLayout)); + } + + void CreatePSO( + const WCHAR* vsEntry, const WCHAR* gsEntry, const WCHAR* psEntry, uint32_t numColorAttachments, bool bDepth, uint32_t sampleCount, VkRenderPass renderPass, VkPipeline* pPso) + { + VkPipelineVertexInputStateCreateInfo vi = {}; + vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vi.pNext = NULL; + vi.flags = 0; + vi.vertexBindingDescriptionCount = 0; + vi.pVertexBindingDescriptions = nullptr; + vi.vertexAttributeDescriptionCount = 0; + vi.pVertexAttributeDescriptions = nullptr; + + // input assembly state + // + VkPipelineInputAssemblyStateCreateInfo ia; + ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + ia.pNext = NULL; + ia.flags = 0; + ia.primitiveRestartEnable = VK_FALSE; + ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + // rasterizer state + VkPipelineRasterizationStateCreateInfo rs; + rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rs.pNext = NULL; + rs.flags = 0; + rs.polygonMode = VK_POLYGON_MODE_FILL; + rs.cullMode = VK_CULL_MODE_NONE; + rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rs.depthClampEnable = VK_FALSE; + rs.rasterizerDiscardEnable = VK_FALSE; + rs.depthBiasEnable = VK_FALSE; + rs.depthBiasConstantFactor = 0; + rs.depthBiasClamp = 0; + rs.depthBiasSlopeFactor = 0; + rs.lineWidth = 1.0f; + + VkPipelineColorBlendAttachmentState bs[8] = {}; + bs[0].blendEnable = VK_FALSE; + bs[0].srcColorBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].colorBlendOp = VK_BLEND_OP_ADD; + bs[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].alphaBlendOp = VK_BLEND_OP_ADD; + bs[0].colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + for (uint32_t i = 1; i < numColorAttachments; i++) + { + bs[i] = bs[0]; + } + + // Color blend state + VkPipelineColorBlendStateCreateInfo cb; + cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + cb.flags = 0; + cb.pNext = NULL; + cb.attachmentCount = numColorAttachments; + cb.pAttachments = bs; + cb.logicOpEnable = VK_FALSE; + cb.logicOp = VK_LOGIC_OP_NO_OP; + cb.blendConstants[0] = 1.0f; + cb.blendConstants[1] = 1.0f; + cb.blendConstants[2] = 1.0f; + cb.blendConstants[3] = 1.0f; + + std::vector dynamicStateEnables = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo dynamicState = {}; + dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamicState.pNext = NULL; + dynamicState.pDynamicStates = dynamicStateEnables.data(); + dynamicState.dynamicStateCount = (uint32_t)dynamicStateEnables.size(); + + // view port state + + VkPipelineViewportStateCreateInfo vp = {}; + vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + vp.pNext = NULL; + vp.flags = 0; + vp.viewportCount = 1; + vp.scissorCount = 1; + vp.pScissors = NULL; + vp.pViewports = NULL; + + // depth stencil state + + VkPipelineDepthStencilStateCreateInfo ds; + ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + ds.pNext = NULL; + ds.flags = 0; + ds.depthTestEnable = bDepth ? VK_TRUE : VK_FALSE; + ds.depthWriteEnable = bDepth ? VK_TRUE : VK_FALSE; + ds.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + ds.depthBoundsTestEnable = VK_FALSE; + ds.stencilTestEnable = VK_FALSE; + ds.back.failOp = VK_STENCIL_OP_KEEP; + ds.back.passOp = VK_STENCIL_OP_KEEP; + ds.back.compareOp = VK_COMPARE_OP_ALWAYS; + ds.back.compareMask = 0; + ds.back.reference = 0; + ds.back.depthFailOp = VK_STENCIL_OP_KEEP; + ds.back.writeMask = 0; + ds.minDepthBounds = 0; + ds.maxDepthBounds = 0; + ds.stencilTestEnable = VK_FALSE; + ds.front = ds.back; + + // multi sample state + + VkPipelineMultisampleStateCreateInfo ms; + ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + ms.pNext = NULL; + ms.flags = 0; + ms.pSampleMask = NULL; + ms.rasterizationSamples = static_cast(sampleCount); + ms.sampleShadingEnable = (sampleCount > 1) ? VK_TRUE : VK_FALSE; + ms.alphaToCoverageEnable = VK_FALSE; + ms.alphaToOneEnable = VK_FALSE; + ms.minSampleShading = (sampleCount > 1) ? 1.0f : 0.0f; + + VkShaderModule vsModule = VK_NULL_HANDLE, gsModule = VK_NULL_HANDLE, psModule = VK_NULL_HANDLE; + std::vector vsCode, gsCode, psCode; + + const DxcDefine defs[] = {{L"VULKAN", L"1"}}; + + DxcCompileToSpirv(c_Shader, vsEntry, L"vs_6_0", L"", defs, _countof(defs), vsCode); + DxcCompileToSpirv(c_Shader, psEntry, L"ps_6_0", L"", defs, _countof(defs), psCode); + + if (gsEntry) + { + DxcCompileToSpirv(c_Shader, gsEntry, L"gs_6_0", L"", defs, _countof(defs), gsCode); + } + + VkShaderModuleCreateInfo smCI = {}; + smCI.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + + smCI.pCode = reinterpret_cast(vsCode.data()); + smCI.codeSize = vsCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &vsModule)); + + smCI.pCode = reinterpret_cast(psCode.data()); + smCI.codeSize = psCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &psModule)); + + char vsName[128]; + char psName[128]; + sprintf_s(vsName, "%S", vsEntry); + sprintf_s(psName, "%S", psEntry); + + VkPipelineShaderStageCreateInfo shaderStages[3] = {}; + shaderStages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[0].module = vsModule; + shaderStages[0].pName = vsName; + shaderStages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + shaderStages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[1].module = psModule; + shaderStages[1].pName = psName; + shaderStages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + + uint32_t numShaderStages = 2; + + if (gsEntry) + { + smCI.pCode = reinterpret_cast(gsCode.data()); + smCI.codeSize = gsCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &gsModule)); + + char gsName[128]; + sprintf_s(gsName, "%S", gsEntry); + + shaderStages[2].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[2].module = gsModule; + shaderStages[2].pName = gsName; + shaderStages[2].stage = VK_SHADER_STAGE_GEOMETRY_BIT; + + numShaderStages = 3; + } + + VkGraphicsPipelineCreateInfo psoCI = {}; + psoCI.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + psoCI.pNext = NULL; + psoCI.layout = m_pipelineLayout; + psoCI.basePipelineHandle = VK_NULL_HANDLE; + psoCI.basePipelineIndex = 0; + psoCI.flags = 0; + psoCI.pVertexInputState = &vi; + psoCI.pInputAssemblyState = &ia; + psoCI.pRasterizationState = &rs; + psoCI.pColorBlendState = &cb; + psoCI.pTessellationState = NULL; + psoCI.pMultisampleState = &ms; + psoCI.pDynamicState = &dynamicState; + psoCI.pViewportState = &vp; + psoCI.pDepthStencilState = &ds; + psoCI.pStages = shaderStages; + psoCI.stageCount = numShaderStages; + psoCI.renderPass = renderPass; + psoCI.subpass = 0; + + ThrowIfFailedVK(vkCreateGraphicsPipelines(m_device, VK_NULL_HANDLE, 1, &psoCI, nullptr, pPso)); + + vkDestroyShaderModule(m_device, vsModule, nullptr); + vkDestroyShaderModule(m_device, psModule, nullptr); + if (gsModule != RPS_NULL_HANDLE) + { + vkDestroyShaderModule(m_device, gsModule, nullptr); + } + } + + void CreateComputePSO(const WCHAR* csEntry, VkPipeline* pPso) + { + const DxcDefine defs[] = {{L"VULKAN", L"1"}}; + + std::vector csCode; + DxcCompileToSpirv(c_Shader, csEntry, L"cs_6_0", L"", defs, _countof(defs), csCode); + + VkShaderModuleCreateInfo smCI = {}; + smCI.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smCI.pCode = reinterpret_cast(csCode.data()); + smCI.codeSize = csCode.size(); + + VkShaderModule csModule; + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &csModule)); + + char csName[128]; + sprintf_s(csName, "%S", csEntry); + + VkComputePipelineCreateInfo compPsoCI = {}; + compPsoCI.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + compPsoCI.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + compPsoCI.stage.module = csModule; + compPsoCI.stage.pName = csName; + compPsoCI.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT; + compPsoCI.layout = m_pipelineLayout; + + ThrowIfFailedVK(vkCreateComputePipelines(m_device, VK_NULL_HANDLE, 1, &compPsoCI, nullptr, pPso)); + vkDestroyShaderModule(m_device, csModule, nullptr); + } + +private: + VkDescriptorSetLayout m_descriptorSetLayout = VK_NULL_HANDLE; + VkPipelineLayout m_pipelineLayout = VK_NULL_HANDLE; + VkPipeline m_psoFillUV = VK_NULL_HANDLE; + VkPipeline m_psoMSAAQuad = VK_NULL_HANDLE; + VkPipeline m_psoBlt = VK_NULL_HANDLE; + VkSampler m_sampler = VK_NULL_HANDLE; +}; + +TEST_CASE(TEST_APP_NAME) +{ + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#if defined(BREAK_AT_ALLOC_ID) + _CrtSetBreakAlloc(BREAK_AT_ALLOC_ID); +#endif + + TestVkBuiltInNodes renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_downsample.rpsl b/tests/gui/test_downsample.rpsl new file mode 100644 index 0000000..bce8f5e --- /dev/null +++ b/tests/gui/test_downsample.rpsl @@ -0,0 +1,106 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +struct StructArg +{ + float f; + uint u; +}; + +node Geo( rtv dst : SV_Target0, uint geoId, StructArg s, uint4 v, uint a[4] ); +node Downsample( rtv dst : SV_Target0, srv src, float2 invSize ); +compute node DownsampleCompute( [relaxed] uav dst, [readonly(cs)] texture src, float2 invSize, uint2 dispatchGroups ); +graphics node Quads( [relaxed] rtv Color : SV_Target0, [readonly(vs, ps)] texture src, uint geoId ); + +// Test non-export function. Should not be included in __rps_entries_. +uint Add(uint a, uint b) +{ + return a + b; +} + +export void downsample([readonly(present)] texture backbuffer, bool useAsyncCompute, bool use_array, uint geos) +{ + ResourceDesc backbufferDesc = backbuffer.desc(); + + uint32_t width = (uint32_t)backbufferDesc.Width; + uint32_t height = backbufferDesc.Height; + RPS_FORMAT format = backbufferDesc.Format; + + uint downsamples = 3; + + if (geos == 0) + { + clear(backbuffer, float4(0.0, 0.2, 0.4, 1.0)); + } + + texture geoRT; + + if (use_array && (geos > 0)) + { + geoRT = create_tex2d(format, width / 2, height / 2, Add(downsamples, 1), geos); + } + + texture colorRtv = null; + texture texView; + + texture backBufferRtv = backbuffer.mips(0); + + for( uint g = 0; g < geos; g++ ) + { + if (!use_array) + { + geoRT = create_tex2d(format, width / 2, height / 2, downsamples + 1); + colorRtv = geoRT.mips(0).array(0); + } + else + { + colorRtv = geoRT.mips(0).array(g); + } + + if (colorRtv != null) + { + clear(colorRtv, float4(0.0, 0.2, 0.4, 1.0)); + } + + uint4 indexCounts = { 12, 9, 6, 3 }; + uint indexCounts2[4] = { 3, 3, 12, 12 }; + StructArg s; + s.f = 1.5f; + s.u = geos - 1 - g; + + Geo(colorRtv, g, s, indexCounts, indexCounts2); + + uint2 srcSize = uint2(colorRtv.desc().Width, colorRtv.desc().Height); + + for ( uint d = 0; d < downsamples; d++ ) + { + texView = colorRtv; + + uint2 currSrcSize = srcSize >> d; + float2 invSize = float2(1.0f / currSrcSize); + + colorRtv = geoRT.mips(d + 1).array(use_array ? g : 0); + + if (useAsyncCompute) + { + uint2 dispatchGroups = (currSrcSize + 7) / 8; + async DownsampleCompute(colorRtv, texView, invSize, dispatchGroups); + } + else + { + Downsample(colorRtv, texView, invSize ); + } + } + + texture downsampledSrv = colorRtv; + + if (downsampledSrv != null) + { + Quads(backbuffer, downsampledSrv, g); + } + } +} diff --git a/tests/gui/test_downsample_d3d12.cpp b/tests/gui/test_downsample_d3d12.cpp new file mode 100644 index 0000000..e416772 --- /dev/null +++ b/tests/gui/test_downsample_d3d12.cpp @@ -0,0 +1,487 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define RPS_D3D12_RUNTIME 1 + +#include "test_downsample_shared.h" + +#include "utils/rps_test_win32.h" +#include "utils/rps_test_d3d12_renderer.h" + +using namespace DirectX; + +class TestD3D12Downsample : public RpsTestD3D12Renderer, public TestRpsDownsample +{ + static const UINT TextureWidth = 256; + static const UINT TextureHeight = 256; + static const UINT MaxConstantSizePerFrame = 65536; + + struct alignas(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT) GeoConstantBufferD3D12 + : public TestRpsDownsample::GeoConstantBuffer + { + }; + +protected: + virtual void OnInit(ID3D12GraphicsCommandList* pInitCmdList, + std::vector>& tempResources) override + { + LoadAssets(pInitCmdList, tempResources); + + TestRpsDownsample::OnInit(rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); })); + } + + virtual void BindNodes(RpsSubprogram hRpslEntry) override + { + RpsResult result = + rpsProgramBindNode(rpsRenderGraphGetMainEntry(GetRpsRenderGraph()), "Quads", &DrawQuadsCb, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(rpsRenderGraphGetMainEntry(GetRpsRenderGraph()), "Geo", &DrawGeoCb, this); + REQUIRE(result == RPS_OK); + + result = + rpsProgramBindNode(rpsRenderGraphGetMainEntry(GetRpsRenderGraph()), "Downsample", &DrawDownsampleCb, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode( + rpsRenderGraphGetMainEntry(GetRpsRenderGraph()), "DownsampleCompute", &ComputeDownsampleCb, this); + REQUIRE(result == RPS_OK); + } + + virtual void OnPostResize() override + { + } + + virtual void OnCleanUp() override + { + TestRpsDownsample::OnDestroy(); + + m_rootSignature = nullptr; + m_rootSignatureCompute = nullptr; + m_defaultPipelineState = nullptr; + m_downsamplePipelineState = nullptr; + m_downsampleComputePipelineState = nullptr; + m_vertexBuffer = nullptr; + m_constantBuffer = nullptr; + m_texture = nullptr; + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + TestRpsDownsample::OnUpdate(frameIndex, m_width, m_height); + + UpdatePipeline(frameIndex, CalcGuaranteedCompletedFrameIndexForRps()); + } + + virtual void OnRender(uint32_t frameIndex) override + { + m_frameConstantUsage = 0; + + ExecuteRenderGraph(frameIndex, GetRpsRenderGraph()); + } + +private: + void DrawGeo(const RpsCmdCallbackContext* pContext) + { + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + RpsVariable cmdArg = rpsCmdGetArg(pContext, 1); + uint32_t triangleIndex = *static_cast(cmdArg); + + uint32_t triangleDataIndex = triangleIndex % _countof(m_TriangleCbData); + + BindDescriptorHeaps(pCmdList); + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + pCmdList->SetPipelineState(m_defaultPipelineState.Get()); + + D3D12_GPU_VIRTUAL_ADDRESS cbGpuVa = AllocAndWriteFrameConstants(&m_TriangleCbData[triangleDataIndex], sizeof(GeoConstantBuffer)); + pCmdList->SetGraphicsRootConstantBufferView(0, cbGpuVa); + pCmdList->SetGraphicsRootDescriptorTable(1, m_checkerboardTextureDescriptor.GetGPU(0)); + + pCmdList->IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->IASetVertexBuffers(0, 1, &m_triangleBufferView); + pCmdList->DrawInstanced(3 * (triangleDataIndex + 1), 1, 0, 0); + } + + void DrawQuads(const RpsCmdCallbackContext* pContext) + { + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + D3D12_CPU_DESCRIPTOR_HANDLE srcSrv; + RpsResult result = rpsD3D12GetCmdArgDescriptor(pContext, 1, &srcSrv); + REQUIRE(result == RPS_OK); + D3D12_GPU_DESCRIPTOR_HANDLE srvTable = + AllocDynamicDescriptorsAndWrite(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, &srcSrv, 1); + + RpsVariable cmdArg = rpsCmdGetArg(pContext, 2); + uint32_t quadIndex = *static_cast(cmdArg); + + GeoConstantBuffer data; + XMStoreFloat4x4(&data.offset, + XMMatrixAffineTransformation2D( + XMVectorSet(m_quadScale[0], m_quadScale[1], 1.f, 1.f), + XMVectorZero(), + 0.f, + XMVectorSet(m_quadOffsets[quadIndex][0], m_quadOffsets[quadIndex][1], 0.f, 0.f))); + data.color = XMFLOAT4(1.f, 1.f, 1.f, 1.f); + data.aspectRatio = 1.0f; + D3D12_GPU_VIRTUAL_ADDRESS cbGpuVa = AllocAndWriteFrameConstants(&data, sizeof(data)); + + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + pCmdList->SetPipelineState(m_defaultPipelineState.Get()); + + BindDescriptorHeaps(pCmdList); + + pCmdList->SetGraphicsRootConstantBufferView(0, cbGpuVa); + pCmdList->SetGraphicsRootDescriptorTable(1, srvTable); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->IASetVertexBuffers(0, 1, &m_quadsBufferView); + pCmdList->DrawInstanced(6, 1, 0, 0); + } + + void DrawDownsample(const RpsCmdCallbackContext* pContext) + { + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + D3D12_CPU_DESCRIPTOR_HANDLE srcSrv; + RpsResult result = rpsD3D12GetCmdArgDescriptor(pContext, 1, &srcSrv); + REQUIRE(result == RPS_OK); + + D3D12_GPU_DESCRIPTOR_HANDLE srvTable = + AllocDynamicDescriptorsAndWrite(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, &srcSrv, 1); + + RpsVariable cmdArg = rpsCmdGetArg(pContext, 2); + XMFLOAT2 invSize = *static_cast(cmdArg); + + D3D12_GPU_VIRTUAL_ADDRESS cbGpuVa = AllocAndWriteFrameConstants(&invSize, sizeof(invSize)); + + // Set necessary state. + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + pCmdList->SetPipelineState(m_downsamplePipelineState.Get()); + + BindDescriptorHeaps(pCmdList); + pCmdList->SetGraphicsRootConstantBufferView(0, cbGpuVa); + pCmdList->SetGraphicsRootDescriptorTable(1, srvTable); + + // Record commands. + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->IASetVertexBuffers(0, 1, &m_quadsBufferView); + pCmdList->DrawInstanced(6, 1, 0, 0); + } + + void ComputeDownsample(const RpsCmdCallbackContext* pContext) + { + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + D3D12_CPU_DESCRIPTOR_HANDLE cpuDescriptors[2]; + RpsResult result = rpsD3D12GetCmdArgDescriptor(pContext, 1, &cpuDescriptors[0]); + REQUIRE(result == RPS_OK); + + result = rpsD3D12GetCmdArgDescriptor(pContext, 0, &cpuDescriptors[1]); + REQUIRE(result == RPS_OK); + + D3D12_GPU_DESCRIPTOR_HANDLE srvUavTable = AllocDynamicDescriptorsAndWrite( + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, cpuDescriptors, _countof(cpuDescriptors)); + + RpsVariable cmdArg = rpsCmdGetArg(pContext, 2); + XMFLOAT2 invSize = *static_cast(cmdArg); + + cmdArg = rpsCmdGetArg(pContext, 3); + XMUINT2 dispatchGroups = *static_cast(cmdArg); + + // Set necessary state. + pCmdList->SetComputeRootSignature(m_rootSignatureCompute.Get()); + pCmdList->SetPipelineState(m_downsampleComputePipelineState.Get()); + + BindDescriptorHeaps(pCmdList); + pCmdList->SetComputeRoot32BitConstants(0, 2, &invSize, 0); + pCmdList->SetComputeRootDescriptorTable(1, srvUavTable); + pCmdList->Dispatch(dispatchGroups.x, dispatchGroups.y, 1); + } + + static void DrawGeoCb(const RpsCmdCallbackContext* pContext) + { + static_cast(pContext->pCmdCallbackContext)->DrawGeo(pContext); + } + + static void DrawQuadsCb(const RpsCmdCallbackContext* pContext) + { + static_cast(pContext->pCmdCallbackContext)->DrawQuads(pContext); + } + + static void DrawDownsampleCb(const RpsCmdCallbackContext* pContext) + { + static_cast(pContext->pCmdCallbackContext)->DrawDownsample(pContext); + } + + static void ComputeDownsampleCb(const RpsCmdCallbackContext* pContext) + { + static_cast(pContext->pCmdCallbackContext)->ComputeDownsample(pContext); + } + +private: + D3D12_GPU_VIRTUAL_ADDRESS AllocAndWriteFrameConstants(const void* pSrcData, uint32_t size) + { + uint32_t allocSize = (size + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1) & + ~(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1); + uint32_t newOffset = m_frameConstantUsage + allocSize; + if (newOffset > MaxConstantSizePerFrame) + { + return NULL; + } + + const uint32_t totalOffset = MaxConstantSizePerFrame * m_backBufferIndex + m_frameConstantUsage; + memcpy(m_constantBufferCpuVA + totalOffset, pSrcData, size); + + m_frameConstantUsage = newOffset; + return m_constantBuffer->GetGPUVirtualAddress() + totalOffset; + } + + void LoadAssets(ID3D12GraphicsCommandList* pInitCmdList, std::vector>& tempResources) + { + // Create a root signature consisting of a descriptor table with a single CBV. + + D3D12_STATIC_SAMPLER_DESC sampler = {}; + sampler.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + sampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + sampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + sampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + sampler.MipLODBias = 0; + sampler.MaxAnisotropy = 0; + sampler.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + sampler.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + sampler.MinLOD = 0.0f; + sampler.MaxLOD = D3D12_FLOAT32_MAX; + sampler.ShaderRegister = 0; + sampler.RegisterSpace = 0; + sampler.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + { + CD3DX12_DESCRIPTOR_RANGE ranges[1] = {}; + CD3DX12_ROOT_PARAMETER rootParameters[2] = {}; + + ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0); + rootParameters[0].InitAsConstantBufferView(0); + rootParameters[1].InitAsDescriptorTable(1, &ranges[0], D3D12_SHADER_VISIBILITY_PIXEL); + + D3D12_ROOT_SIGNATURE_FLAGS rootSignatureFlags = + D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc; + rootSignatureDesc.Init_1_0(_countof(rootParameters), rootParameters, 1, &sampler, rootSignatureFlags); + + ComPtr signature; + ComPtr error; + ThrowIfFailed(D3DX12SerializeVersionedRootSignature( + &rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1_0, &signature, &error)); + ThrowIfFailed(m_device->CreateRootSignature( + 0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature))); + } + + { + CD3DX12_DESCRIPTOR_RANGE ranges[2] = {}; + CD3DX12_ROOT_PARAMETER rootParameters[2] = {}; + ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0); + ranges[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0); + + rootParameters[0].InitAsConstants(2, 0); + rootParameters[1].InitAsDescriptorTable(_countof(ranges), &ranges[0], D3D12_SHADER_VISIBILITY_ALL); + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc; + rootSignatureDesc.Init_1_0( + _countof(rootParameters), rootParameters, 1, &sampler, D3D12_ROOT_SIGNATURE_FLAG_NONE); + + ComPtr signature; + ComPtr error; + ThrowIfFailed(D3DX12SerializeVersionedRootSignature( + &rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1_0, &signature, &error)); + ThrowIfFailed(m_device->CreateRootSignature( + 0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&m_rootSignatureCompute))); + } + + // Create the pipeline state, which includes compiling and loading shaders. + CreateGraphicsPipeline(c_DefaultShader, sizeof(c_DefaultShader), L"VSMain", L"PSMain", &m_defaultPipelineState); + CreateGraphicsPipeline( + c_DownsampleShader, sizeof(c_DownsampleShader), L"VSMain", L"PSMain", &m_downsamplePipelineState); + CreateComputePipeline( + c_DownsampleShader, sizeof(c_DownsampleShader), L"CSMain", &m_downsampleComputePipelineState); + + // Create constant buffers + { + CD3DX12_HEAP_PROPERTIES heapProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + CD3DX12_RESOURCE_DESC resourceDesc = CD3DX12_RESOURCE_DESC::Buffer( + MaxConstantSizePerFrame * m_backBufferCount, D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE); + ThrowIfFailed(m_device->CreateCommittedResource( + &heapProperties, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_constantBuffer))); + + CD3DX12_RANGE range = CD3DX12_RANGE(0, 0); + ThrowIfFailed(m_constantBuffer->Map(0, &range, reinterpret_cast(&m_constantBufferCpuVA))); + } + + // Create vertex buffers + { + // Define the geometry for a triangle. + Vertex triangleVertices[] = { + // triangle + {{0.0f, 0.25f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.5f, 0.0f}}, + {{0.25f, -0.25f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{-0.25f, -0.25f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + + {{0.0f, 0.25f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.5f, 0.0f}}, + {{0.5f, 0.25f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + {{0.25f, -0.25f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + + {{0.5f, 0.25f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.5f, 0.0f}}, + {{0.75f, -0.25f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{0.25f, -0.25f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 1.0f}}, + + {{0.5f, 0.25f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.5f, 0.0f}}, + {{1.0f, 0.25f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 1.0f}}, + {{0.75f, -0.25f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + + // quad 0 + {{-1.0f, 1.0, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 0.0f}}, + {{1.f, -1.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f}}, + {{-1.f, -1.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + {{-1.0f, 1.0, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 0.0f}}, + {{1.f, 1.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 0.0f}}, + {{1.f, -1.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f}}, + }; + + const UINT vertexBufferSize = sizeof(triangleVertices); + + // Note: using upload heaps to transfer static data like vert buffers is not + // recommended. Every time the GPU needs it, the upload heap will be marshalled + // over. Please read up on Default Heap usage. An upload heap is used here for + // code simplicity and because there are very few verts to actually transfer. + CD3DX12_HEAP_PROPERTIES heapProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + CD3DX12_RESOURCE_DESC resourceDesc = CD3DX12_RESOURCE_DESC::Buffer(vertexBufferSize); + ThrowIfFailed(m_device->CreateCommittedResource(&heapProperties, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_vertexBuffer))); + + // Copy the triangle data to the vertex buffer. + UINT8* pVertexDataBegin; + CD3DX12_RANGE readRange(0, 0); // We do not intend to read from this resource on the CPU. + ThrowIfFailed(m_vertexBuffer->Map(0, &readRange, reinterpret_cast(&pVertexDataBegin))); + memcpy(pVertexDataBegin, triangleVertices, sizeof(triangleVertices)); + m_vertexBuffer->Unmap(0, nullptr); + + // Initialize the vertex buffer view. + m_triangleBufferView.BufferLocation = m_vertexBuffer->GetGPUVirtualAddress(); + m_triangleBufferView.StrideInBytes = sizeof(Vertex); + m_triangleBufferView.SizeInBytes = sizeof(Vertex) * 12; + + m_quadsBufferView.BufferLocation = m_triangleBufferView.BufferLocation + m_triangleBufferView.SizeInBytes; + m_quadsBufferView.StrideInBytes = sizeof(Vertex); + m_quadsBufferView.SizeInBytes = sizeof(Vertex) * 6; + } + + // Create checkerboard texture + float tintColor[] = {1.0f, 1.0f, 1.0f, 1.0f}; + RpsTestD3D12Renderer::CreateStaticCheckerboardTexture(m_texture, tempResources, pInitCmdList, 256, 256, tintColor); + m_checkerboardTextureDescriptor = AllocStaticCBV_SRV_UAVs(1); + m_device->CreateShaderResourceView(m_texture.Get(), NULL, m_checkerboardTextureDescriptor.GetCPU(0)); + } + + void CreateGraphicsPipeline( + const char* shader, size_t shaderLength, LPCWSTR vsEntry, LPCWSTR psEntry, ID3D12PipelineState** ppPipeline) + { + // Define the vertex input layout. + D3D12_INPUT_ELEMENT_DESC inputElementDescs[] = { + {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, + {"COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, + {"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 28, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}}; + + std::vector vertexShader, pixelShader, err; + + DxcCompile(shader, vsEntry, L"vs_6_0", L"", nullptr, 0, vertexShader); + DxcCompile(shader, psEntry, L"ps_6_0", L"", nullptr, 0, pixelShader); + + // Describe and create the graphics pipeline state object (PSO). + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.InputLayout = {inputElementDescs, _countof(inputElementDescs)}; + psoDesc.pRootSignature = m_rootSignature.Get(); + psoDesc.VS = CD3DX12_SHADER_BYTECODE(vertexShader.data(), vertexShader.size()); + psoDesc.PS = CD3DX12_SHADER_BYTECODE(pixelShader.data(), pixelShader.size()); + psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); + psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); + psoDesc.DepthStencilState.DepthEnable = FALSE; + psoDesc.DepthStencilState.StencilEnable = FALSE; + psoDesc.SampleMask = UINT_MAX; + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + psoDesc.NumRenderTargets = 1; + psoDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; + psoDesc.SampleDesc.Count = 1; + + ThrowIfFailed(m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(ppPipeline))); + } + + void CreateComputePipeline(const char* shader, size_t length, LPCWSTR entry, ID3D12PipelineState** ppPipeline) + { + std::vector computeShader; + DxcCompile(shader, entry, L"cs_6_0", L"", nullptr, 0, computeShader); + + // Describe and create the graphics pipeline state object (PSO). + D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.CS = CD3DX12_SHADER_BYTECODE(computeShader.data(), computeShader.size()); + psoDesc.NodeMask = 1; + psoDesc.pRootSignature = m_rootSignatureCompute.Get(); + + ThrowIfFailed(m_device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(ppPipeline))); + } + + void UpdatePipeline(uint64_t frameIndex, uint64_t completedFrameIndex) + { + RpsRuntimeResource backBuffers[DXGI_MAX_SWAP_CHAIN_BUFFERS]; + RpsResourceDesc backBufferDesc = {}; + + GetBackBuffers(backBufferDesc, backBuffers); + + TestRpsDownsample::UpdateRpsPipeline(frameIndex, completedFrameIndex, backBufferDesc, backBuffers); + } + +private: + ComPtr m_rootSignature; + ComPtr m_rootSignatureCompute; + ComPtr m_defaultPipelineState; + ComPtr m_downsamplePipelineState; + ComPtr m_downsampleComputePipelineState; + + ComPtr m_vertexBuffer; + D3D12_VERTEX_BUFFER_VIEW m_triangleBufferView; + D3D12_VERTEX_BUFFER_VIEW m_quadsBufferView; + ComPtr m_constantBuffer; + uint8_t* m_constantBufferCpuVA; + uint32_t m_frameConstantUsage = 0; + ComPtr m_texture; + DescriptorTable m_checkerboardTextureDescriptor; + + std::vector m_fenceSignalInfos; +}; + +TEST_CASE(TEST_APP_NAME) +{ + TestD3D12Downsample renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_downsample_shared.h b/tests/gui/test_downsample_shared.h new file mode 100644 index 0000000..0160e28 --- /dev/null +++ b/tests/gui/test_downsample_shared.h @@ -0,0 +1,236 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#pragma once + +#include "utils/rps_test_host.h" + +RPS_DECLARE_RPSL_ENTRY(test_downsample, downsample); + +static const char c_DefaultShader[] = R"( +[[vk::binding(0, 0)]]cbuffer SceneConstantBuffer : register(b0) +{ + float4x4 offset; + float4 color; + float aspectRatio; +}; + +struct PSInput +{ + float4 position : SV_POSITION; + float4 color : COLOR; + float4 uv : TEXCOORD; +}; + +[[vk::binding(1, 0)]] Texture2D g_texture : register(t0); +[[vk::binding(2, 0)]] SamplerState g_sampler : register(s0); + +PSInput VSMain( + [[vk::location(0)]] float4 position : POSITION, + [[vk::location(1)]] float4 vertexColor : COLOR, + [[vk::location(2)]] float4 uv : TEXCOORD) +{ + PSInput result; + + position.y *= aspectRatio; + result.position = mul(offset, position); + result.color = vertexColor * color; + result.uv = uv; + + return result; +} + +float4 PSMain(PSInput input) : SV_TARGET +{ + return g_texture.Sample(g_sampler, input.uv.xy) * input.color; +} +)"; + +static const char c_DownsampleShader[] = R"( +[[vk::binding(0, 0)]] cbuffer DownsampleConstantBuffer : register(b0) +{ + float2 invSize; +}; + +struct PSInput +{ + float4 position : SV_POSITION; + float4 color : COLOR; + float4 uv : TEXCOORD; +}; + +[[vk::binding(1, 0)]] Texture2D g_texture : register(t0); +[[vk::binding(2, 0)]] SamplerState g_sampler : register(s0); + +PSInput VSMain( + [[vk::location(0)]] float4 position : POSITION, + [[vk::location(1)]] float4 color : COLOR, + [[vk::location(2)]] float4 uv : TEXCOORD) +{ + PSInput result; + + result.position = position; + position.w = 1.f; + result.color = color; + result.uv = uv; + + return result; +} + +static float2 offsets[9] = { + float2( 1, 1), float2( 0, 1), float2(-1, 1), + float2( 1, 0), float2( 0, 0), float2(-1, 0), + float2( 1,-1), float2( 0,-1), float2(-1,-1) + }; + +float4 PSMain(PSInput input) : SV_Target +{ + float4 color = float4(0,0,0,0); + + for(int i=0;i<9;i++) + color += g_texture.SampleLevel(g_sampler, input.uv.xy + (2 * invSize * offsets[i]), 0 ); + return color / 9.0f; +} + +[[vk::binding(3, 0)]] RWTexture2D g_textureOut : register(u0); + +[numthreads(8, 8, 1)] +void CSMain(uint2 dtId : SV_DispatchThreadID) +{ + uint2 inputSize; + g_textureOut.GetDimensions(inputSize.x, inputSize.y); + + PSInput psInput = (PSInput)0; + psInput.uv.xy = 1.0f - dtId.xy / float2(inputSize); + + g_textureOut[dtId] = PSMain(psInput); +} +)"; + +#define TEST_APP_NAME_RAW "TestDownsample" + +class TestRpsDownsample : public RpsTestHost +{ +public: + static const UINT GeoLimit = 25; + + struct GeoConstantBuffer + { + XMFLOAT4X4 offset; + XMFLOAT4 color; + float aspectRatio; + }; + + struct Vertex + { + XMFLOAT3 position; + XMFLOAT4 color; + XMFLOAT2 uv; + }; + +public: + + TestRpsDownsample() + : m_translation(0.f) + , m_rotation(0.f) + { + } + +protected: + + void OnInit(RpsDevice hRpsDevice) + { + m_quadColor[0] = XMFLOAT4(1.f, 1.f, 1.f, 1.f); + m_quadColor[1] = XMFLOAT4(1.f, 1.f, 0.f, 1.f); + m_quadColor[2] = XMFLOAT4(0.f, 1.f, 1.f, 1.f); + m_quadColor[3] = XMFLOAT4(1.f, 0.f, 1.f, 1.f); + + UpdateGeometryCount(2, 2); + + RpsTestHost::OnInit(hRpsDevice, rpsTestLoadRpslEntry(test_downsample, downsample)); + } + + void UpdateRpsPipeline(uint64_t frameIndex, + uint64_t completedFrameIndex, + const RpsResourceDesc& backBufferDesc, + RpsRuntimeResource* pBackBuffers) + { + const RpsBool useCompute = m_bUseAsyncCompute ? RPS_TRUE : RPS_FALSE; + const RpsBool useArray = m_bUseArrayMips ? RPS_TRUE : RPS_FALSE; + + const RpsRuntimeResource* argResources[] = {pBackBuffers}; + RpsConstant argData[] = {&backBufferDesc, &useCompute, &useArray, &m_NumGeos}; + + RpsTestHost::OnUpdate(frameIndex, completedFrameIndex, _countof(argData), argData, argResources); + } + + void OnUpdate(uint32_t frameIndex, uint32_t width, uint32_t height) + { + const float translationSpeed = 0.01f; + const float rotationSpeed = 0.02f; + const float offsetBounds = 1.4f; + + //m_rotation += rotationSpeed; + m_translation += translationSpeed; + if (m_translation > offsetBounds) + { + m_translation = -offsetBounds; + } + + XMMATRIX transform = XMMatrixAffineTransformation2D( + XMVectorSplatOne(), XMVectorZero(), m_rotation, XMVectorSet(m_translation, 0.f, 0.f, 0.f)); + + for (UINT g = 0; g < 4; g++) + { + XMStoreFloat4x4(&m_TriangleCbData[g].offset, transform); + m_TriangleCbData[g].color = m_quadColor[g]; + m_TriangleCbData[g].aspectRatio = static_cast(width) / height; + } + + UpdateGeometryCount((1 + (frameIndex >> 5)) % 5, (1 + (frameIndex >> 5)) % 5); + + m_bUseArrayMips = (frameIndex / 50) & 1; + m_bUseAsyncCompute = (frameIndex >> 5) >= 5; + } + +private: + void UpdateGeometryCount(uint32_t rows, uint32_t cols) + { + assert(rows * cols <= GeoLimit); + + float cellWidth = 2.f / cols; + float cellHeight = 2.f / rows; + + m_NumGeos = rows * cols; + m_quadScale[0] = cellWidth / 2.f; + m_quadScale[1] = cellHeight / 2.f; + + for (UINT r = 0; r < rows; r++) + { + for (UINT c = 0; c < cols; c++) + { + m_quadOffsets[r * cols + c][0] = -1.f + c * cellWidth + m_quadScale[0]; + m_quadOffsets[r * cols + c][1] = 1.f - r * cellHeight - m_quadScale[1]; + } + } + } + +protected: + float m_translation; + float m_rotation; + GeoConstantBuffer m_TriangleCbData[4]; + + float m_quadScale[2]; + float m_quadOffsets[GeoLimit][2]; + XMFLOAT4 m_quadColor[4]; + + bool m_bUseAsyncCompute = false; + bool m_bUseArrayMips = false; + bool m_bUseScheduler = true; + bool m_bUpdateRPSPipelineEveryFrame = false; + UINT m_NumGeos = 0; +}; diff --git a/tests/gui/test_downsample_vk.cpp b/tests/gui/test_downsample_vk.cpp new file mode 100644 index 0000000..26a0e69 --- /dev/null +++ b/tests/gui/test_downsample_vk.cpp @@ -0,0 +1,580 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifdef _WIN32 +#define VK_USE_PLATFORM_WIN32_KHR +#else +#error "TODO" +#endif + +#define RPS_VK_RUNTIME 1 + +#include "test_downsample_shared.h" +#include "utils/rps_test_win32.h" +#include "utils/rps_test_vk_renderer.h" + +class TestVkDownsample : public RpsTestVulkanRenderer, public TestRpsDownsample +{ +protected: + virtual void OnInit(VkCommandBuffer initCmdList, InitTempResources& tempResources) override + { + LoadAssets(initCmdList, tempResources); + + TestRpsDownsample::OnInit(rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); })); + } + + virtual void BindNodes(RpsSubprogram hRpslEntry) override + { + RpsResult result = + rpsProgramBindNode(rpsRenderGraphGetMainEntry(GetRpsRenderGraph()), "Quads", &DrawQuadsCb, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(rpsRenderGraphGetMainEntry(GetRpsRenderGraph()), "Geo", &DrawGeoCb, this); + REQUIRE(result == RPS_OK); + + result = + rpsProgramBindNode(rpsRenderGraphGetMainEntry(GetRpsRenderGraph()), "Downsample", &DrawDownsampleCb, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode( + rpsRenderGraphGetMainEntry(GetRpsRenderGraph()), "DownsampleCompute", &ComputeDownsampleCb, this); + REQUIRE(result == RPS_OK); + } + + virtual void OnCleanUp() override + { + TestRpsDownsample::OnDestroy(); + + vkDestroyImageView(m_device, m_checkerTextureView, nullptr); + vkDestroyImage(m_device, m_checkerTexture, nullptr); + vkDestroyBuffer(m_device, m_vertexBuffer, nullptr); + vkDestroyPipeline(m_device, m_psoCompute, nullptr); + vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr); + vkDestroyDescriptorSetLayout(m_device, m_descriptorSetLayout, nullptr); + vkDestroySampler(m_device, m_sampler, nullptr); + + if (m_psoDefault != VK_NULL_HANDLE) + { + vkDestroyPipeline(m_device, m_psoDefault, nullptr); + } + + if (m_psoGfxDownsample != VK_NULL_HANDLE) + { + vkDestroyPipeline(m_device, m_psoGfxDownsample, nullptr); + } + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + TestRpsDownsample::OnUpdate(frameIndex, m_width, m_height); + UpdatePipeline(frameIndex, CalcGuaranteedCompletedFrameIndexForRps()); + } + + virtual void OnRender(uint32_t frameIndex) override + { + ExecuteRenderGraph(frameIndex, GetRpsRenderGraph()); + } + +private: + void CreateDefaultPso(const RpsCmdCallbackContext* pContext) + { + if (!m_psoDefault) + { + VkRenderPass rp; + auto result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + CreateGraphicsPipeline(c_DefaultShader, L"VSMain", L"PSMain", rp, &m_psoDefault); + } + } + + void CreateGfxDownsamplePso(const RpsCmdCallbackContext* pContext) + { + if (!m_psoGfxDownsample) + { + VkRenderPass rp; + auto result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + CreateGraphicsPipeline(c_DownsampleShader, L"VSMain", L"PSMain", rp, &m_psoGfxDownsample); + } + } + + void DrawGeo(const RpsCmdCallbackContext* pContext) + { + CreateDefaultPso(pContext); + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + RpsVariable cmdArg = rpsCmdGetArg(pContext, 1); + uint32_t triangleIndex = *static_cast(cmdArg); + + uint32_t triangleDataIndex = triangleIndex % _countof(m_TriangleCbData); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_descriptorSetLayout, 1, &ds)); + + VkWriteDescriptorSet writeDescriptorSet[2] = {}; + + VkDescriptorBufferInfo bufInfo = + AllocAndWriteFrameConstants(&m_TriangleCbData[triangleDataIndex], sizeof(GeoConstantBuffer)); + AppendWriteDescriptorSetBuffers(&writeDescriptorSet[0], ds, 0, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &bufInfo); + + VkDescriptorImageInfo imageInfo = { + VK_NULL_HANDLE, m_checkerTextureView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + AppendWriteDescriptorSetImages(&writeDescriptorSet[1], ds, 1, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &imageInfo); + + vkUpdateDescriptorSets(m_device, _countof(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &ds, 0, nullptr); + + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_psoDefault); + vkCmdBindVertexBuffers( + cmdBuf, 0 /* firstBinding */, 1 /* bindingCount */, &m_vertexBuffer, &m_trisBufferOffset); + + vkCmdDraw(cmdBuf, 3 * (triangleDataIndex + 1), 1, 0, 0); + } + + void DrawQuads(const RpsCmdCallbackContext* pContext) + { + CreateDefaultPso(pContext); + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkImageView texView; + RpsResult result = rpsVKGetCmdArgImageView(pContext, 1, &texView); + REQUIRE(result == RPS_OK); + + RpsVariable cmdArg = rpsCmdGetArg(pContext, 2); + uint32_t quadIndex = *static_cast(cmdArg); + + // quad transformation matrix. + GeoConstantBuffer data; + XMStoreFloat4x4(&data.offset, + XMMatrixAffineTransformation2D( + XMVectorSet(m_quadScale[0], m_quadScale[1], 1.0f, 1.0f), + XMVectorZero(), + 0.0f, + XMVectorSet(m_quadOffsets[quadIndex][0], m_quadOffsets[quadIndex][1], 0.0f, 0.0f))); + data.color = XMFLOAT4(1.0f, 1.0f, 1.0f, 1.0f); + data.aspectRatio = 1.0f; + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_descriptorSetLayout, 1, &ds)); + + VkWriteDescriptorSet writeDescriptorSet[2] = {}; + + VkDescriptorBufferInfo bufInfo = AllocAndWriteFrameConstants(&data, sizeof(data)); + AppendWriteDescriptorSetBuffers(&writeDescriptorSet[0], ds, 0, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &bufInfo); + + VkDescriptorImageInfo imageInfo = {VK_NULL_HANDLE, texView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + AppendWriteDescriptorSetImages(&writeDescriptorSet[1], ds, 1, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &imageInfo); + + vkUpdateDescriptorSets(m_device, _countof(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &ds, 0, nullptr); + + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_psoDefault); + vkCmdBindVertexBuffers( + cmdBuf, 0 /* firstBinding */, 1 /* bindingCount */, &m_vertexBuffer, &m_quadsBufferOffset); + + vkCmdDraw(cmdBuf, 6, 1, 0, 0); + } + + void DrawDownsample(const RpsCmdCallbackContext* pContext) + { + CreateGfxDownsamplePso(pContext); + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkImageView texView; + RpsResult result = rpsVKGetCmdArgImageView(pContext, 1, &texView); + REQUIRE(result == RPS_OK); + + RpsVariable cmdArg = rpsCmdGetArg(pContext, 2); + XMFLOAT2 invSize = *static_cast(cmdArg); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_descriptorSetLayout, 1, &ds)); + + VkWriteDescriptorSet writeDescriptorSet[2] = {}; + + VkDescriptorBufferInfo bufInfo = AllocAndWriteFrameConstants(&invSize, sizeof(invSize)); + AppendWriteDescriptorSetBuffers(&writeDescriptorSet[0], ds, 0, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &bufInfo); + + VkDescriptorImageInfo imageInfo = {VK_NULL_HANDLE, texView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + AppendWriteDescriptorSetImages(&writeDescriptorSet[1], ds, 1, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &imageInfo); + + vkUpdateDescriptorSets(m_device, _countof(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &ds, 0, nullptr); + + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_psoGfxDownsample); + vkCmdBindVertexBuffers( + cmdBuf, 0 /* firstBinding */, 1 /* bindingCount */, &m_vertexBuffer, &m_quadsBufferOffset); + + vkCmdDraw(cmdBuf, 6, 1, 0, 0); + } + + void ComputeDownsample(const RpsCmdCallbackContext* pContext) + { + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkImageView dstImage; + VkImageView srcImage; + + RpsResult result = rpsVKGetCmdArgImageView(pContext, 0, &dstImage); + REQUIRE(result == RPS_OK); + + result = rpsVKGetCmdArgImageView(pContext, 1, &srcImage); + REQUIRE(result == RPS_OK); + + RpsVariable cmdArg = rpsCmdGetArg(pContext, 2); + XMFLOAT2 invSize = *static_cast(cmdArg); + + cmdArg = rpsCmdGetArg(pContext, 3); + XMUINT2 dispatchGroups = *static_cast(cmdArg); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_descriptorSetLayout, 1, &ds)); + + VkWriteDescriptorSet writeDescriptorSet[3] = {}; + + VkDescriptorImageInfo imageInfo = {VK_NULL_HANDLE, srcImage, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + AppendWriteDescriptorSetImages(&writeDescriptorSet[0], ds, 1, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &imageInfo); + + VkDescriptorImageInfo imageInfo2 = {VK_NULL_HANDLE, dstImage, VK_IMAGE_LAYOUT_GENERAL}; + AppendWriteDescriptorSetImages(&writeDescriptorSet[1], ds, 3, 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageInfo2); + + VkDescriptorBufferInfo bufInfo = AllocAndWriteFrameConstants(&invSize, sizeof(invSize)); + AppendWriteDescriptorSetBuffers(&writeDescriptorSet[2], ds, 0, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &bufInfo); + + vkUpdateDescriptorSets(m_device, _countof(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipelineLayout, 0, 1, &ds, 0, nullptr); + + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_psoCompute); + + vkCmdDispatch(cmdBuf, dispatchGroups.x, dispatchGroups.y, 1); + } + + static void DrawGeoCb(const RpsCmdCallbackContext* pContext) + { + static_cast(pContext->pCmdCallbackContext)->DrawGeo(pContext); + } + + static void DrawQuadsCb(const RpsCmdCallbackContext* pContext) + { + static_cast(pContext->pCmdCallbackContext)->DrawQuads(pContext); + } + + static void DrawDownsampleCb(const RpsCmdCallbackContext* pContext) + { + static_cast(pContext->pCmdCallbackContext)->DrawDownsample(pContext); + } + + static void ComputeDownsampleCb(const RpsCmdCallbackContext* pContext) + { + static_cast(pContext->pCmdCallbackContext)->ComputeDownsample(pContext); + } + + void CreateComputePipeline(const char* shader, const WCHAR* csEntry, VkPipeline* pPso) + { + const DxcDefine defs[] = {{L"VULKAN", L"1"}}; + std::vector csCode; + DxcCompileToSpirv(shader, csEntry, L"cs_6_0", L"", defs, _countof(defs), csCode); + + VkShaderModuleCreateInfo smCI = {}; + smCI.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smCI.pCode = reinterpret_cast(csCode.data()); + smCI.codeSize = csCode.size(); + + VkShaderModule csModule; + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &csModule)); + + char csName[128]; + sprintf_s(csName, "%S", csEntry); + + VkComputePipelineCreateInfo compPsoCI = {}; + compPsoCI.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + compPsoCI.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + compPsoCI.stage.module = csModule; + compPsoCI.stage.pName = csName; + compPsoCI.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT; + compPsoCI.layout = m_pipelineLayout; + + ThrowIfFailedVK(vkCreateComputePipelines(m_device, VK_NULL_HANDLE, 1, &compPsoCI, nullptr, pPso)); + vkDestroyShaderModule(m_device, csModule, nullptr); + } + + void CreateGraphicsPipeline( + const char* shader, const WCHAR* vsEntry, const WCHAR* psEntry, VkRenderPass renderPass, VkPipeline* pPso) + { + VkVertexInputBindingDescription vibd[1]; + vibd[0].binding = 0; + vibd[0].stride = sizeof(Vertex); + vibd[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + + VkVertexInputAttributeDescription viad[3]; + viad[0].location = 0; // POSITION + viad[0].binding = 0; + viad[0].format = VK_FORMAT_R32G32B32_SFLOAT; + viad[0].offset = offsetof(Vertex, position); + viad[1].location = 1; // COLOR + viad[1].binding = 0; + viad[1].format = VK_FORMAT_R32G32B32A32_SFLOAT; + viad[1].offset = offsetof(Vertex, color); + viad[2].location = 2; // TEXCOORD + viad[2].binding = 0; + viad[2].format = VK_FORMAT_R32G32_SFLOAT; + viad[2].offset = offsetof(Vertex, uv); + + // vertex input state + VkPipelineVertexInputStateCreateInfo vi = {}; + vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vi.vertexBindingDescriptionCount = _countof(vibd); + vi.pVertexBindingDescriptions = vibd; + vi.vertexAttributeDescriptionCount = _countof(viad); + vi.pVertexAttributeDescriptions = viad; + + // input assembly state + VkPipelineInputAssemblyStateCreateInfo ia = {}; + ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + // rasterization state + VkPipelineRasterizationStateCreateInfo rs = {}; + rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rs.polygonMode = VK_POLYGON_MODE_FILL; + rs.cullMode = VK_CULL_MODE_BACK_BIT; + rs.frontFace = VK_FRONT_FACE_CLOCKWISE; + rs.lineWidth = 1.0f; + + // Color blend state + VkPipelineColorBlendAttachmentState bs[1] = {}; + bs[0].blendEnable = VK_FALSE; + bs[0].colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + VkPipelineColorBlendStateCreateInfo cb = {}; + cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + cb.attachmentCount = 1; + cb.pAttachments = bs; + + // view port state + VkPipelineViewportStateCreateInfo vp = {}; + vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + vp.viewportCount = 1; + vp.scissorCount = 1; + + // Disable all depth testing + VkPipelineDepthStencilStateCreateInfo ds = {}; + ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + + // multi sample state + VkPipelineMultisampleStateCreateInfo ms = {}; + ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + + // viewport and scissor will be dynamic state. + std::vector dynamicStateEnables = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo dynamicState = {}; + dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamicState.pNext = NULL; + dynamicState.pDynamicStates = dynamicStateEnables.data(); + dynamicState.dynamicStateCount = (uint32_t)dynamicStateEnables.size(); + + // Load shaders + VkShaderModule vsModule = VK_NULL_HANDLE, psModule = VK_NULL_HANDLE; + std::vector vsCode, psCode; + const DxcDefine defs[] = {{L"VULKAN", L"1"}}; + DxcCompileToSpirv(shader, vsEntry, L"vs_6_0", L"", defs, _countof(defs), vsCode); + DxcCompileToSpirv(shader, psEntry, L"ps_6_0", L"", defs, _countof(defs), psCode); + + VkShaderModuleCreateInfo smCI = {}; + smCI.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smCI.pCode = reinterpret_cast(vsCode.data()); + smCI.codeSize = vsCode.size(); + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &vsModule)); + + smCI.pCode = reinterpret_cast(psCode.data()); + smCI.codeSize = psCode.size(); + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &psModule)); + + char vsName[128]; + char psName[128]; + sprintf_s(vsName, "%S", vsEntry); + sprintf_s(psName, "%S", psEntry); + + VkPipelineShaderStageCreateInfo shaderStages[3] = {}; + shaderStages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[0].module = vsModule; + shaderStages[0].pName = vsName; + shaderStages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + shaderStages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[1].module = psModule; + shaderStages[1].pName = psName; + shaderStages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + + uint32_t numShaderStages = 2; + // End load shaders + + VkGraphicsPipelineCreateInfo psoCI = {}; + psoCI.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + psoCI.stageCount = numShaderStages; + psoCI.pStages = shaderStages; + psoCI.pVertexInputState = &vi; + psoCI.pInputAssemblyState = &ia; + psoCI.pRasterizationState = &rs; + psoCI.pColorBlendState = &cb; + psoCI.pMultisampleState = &ms; + psoCI.pViewportState = &vp; + psoCI.pDepthStencilState = &ds; + psoCI.pDynamicState = &dynamicState; + + psoCI.renderPass = renderPass; + psoCI.layout = m_pipelineLayout; + + ThrowIfFailedVK(vkCreateGraphicsPipelines(m_device, VK_NULL_HANDLE, 1, &psoCI, nullptr, pPso)); + + vkDestroyShaderModule(m_device, vsModule, nullptr); + vkDestroyShaderModule(m_device, psModule, nullptr); + } + + void LoadAssets(VkCommandBuffer initCmdList, InitTempResources& tempResources) + { + VkSamplerCreateInfo samplerCI = {VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO}; + samplerCI.magFilter = VK_FILTER_LINEAR; // VK_FILTER_NEAREST is same as D3D12 point filtering. + samplerCI.minFilter = VK_FILTER_LINEAR; + samplerCI.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + samplerCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCI.mipLodBias = 0.0f; + samplerCI.anisotropyEnable = VK_FALSE; + samplerCI.compareOp = VK_COMPARE_OP_NEVER; + samplerCI.minLod = 0.0f; + samplerCI.maxLod = FLT_MAX; + samplerCI.unnormalizedCoordinates = VK_FALSE; // UV in range [0,1] + + ThrowIfFailedVK(vkCreateSampler(m_device, &samplerCI, nullptr, &m_sampler)); + + // Setup descriptor set layout. + VkDescriptorSetLayoutBinding layoutBinding[4] = {}; + layoutBinding[0].binding = 2; + layoutBinding[0].descriptorCount = 1; + layoutBinding[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + layoutBinding[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT; + layoutBinding[0].pImmutableSamplers = &m_sampler; + layoutBinding[1].binding = 1; + layoutBinding[1].descriptorCount = 1; + layoutBinding[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + layoutBinding[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT; + layoutBinding[2].binding = 3; + layoutBinding[2].descriptorCount = 1; + layoutBinding[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + layoutBinding[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + layoutBinding[3].binding = 0; + layoutBinding[3].descriptorCount = 1; + layoutBinding[3].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + layoutBinding[3].stageFlags = + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT; + + VkDescriptorSetLayoutCreateInfo setLayoutCI = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + setLayoutCI.pBindings = layoutBinding; + setLayoutCI.bindingCount = _countof(layoutBinding); + + ThrowIfFailedVK(vkCreateDescriptorSetLayout(m_device, &setLayoutCI, nullptr, &m_descriptorSetLayout)); + + VkPipelineLayoutCreateInfo pipelineCI = {}; + pipelineCI.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipelineCI.setLayoutCount = 1; + pipelineCI.pSetLayouts = &m_descriptorSetLayout; + + ThrowIfFailedVK(vkCreatePipelineLayout(m_device, &pipelineCI, nullptr, &m_pipelineLayout)); + + // create compute pipeline + CreateComputePipeline(c_DownsampleShader, L"CSMain", &m_psoCompute); + + // Define the geometry for a triangle. + Vertex triangleVertices[] = { + // triangle + {{0.0f, 0.25f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.5f, 0.0f}}, + {{0.25f, -0.25f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{-0.25f, -0.25f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + + {{0.0f, 0.25f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.5f, 0.0f}}, + {{0.5f, 0.25f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + {{0.25f, -0.25f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + + {{0.5f, 0.25f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.5f, 0.0f}}, + {{0.75f, -0.25f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{0.25f, -0.25f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 1.0f}}, + + {{0.5f, 0.25f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.5f, 0.0f}}, + {{1.0f, 0.25f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 1.0f}}, + {{0.75f, -0.25f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + + // quad 0 + {{-1.0f, 1.0, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 0.0f}}, + {{1.f, -1.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f}}, + {{-1.f, -1.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + {{-1.0f, 1.0, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 0.0f}}, + {{1.f, 1.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 0.0f}}, + {{1.f, -1.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f}}, + }; + + // setup offsets for when vkCmdBindVertexBuffers + m_trisBufferOffset = 0; + m_quadsBufferOffset = sizeof(Vertex) * 12; + + const UINT vertexBufferSize = sizeof(triangleVertices); + + m_vertexBuffer = CreateAndBindStaticBuffer( + vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); + + auto vbAlloc = AllocAndWriteFrameConstants(triangleVertices, vertexBufferSize); + VkBufferCopy vbCopy; + vbCopy.srcOffset = vbAlloc.offset; + vbCopy.dstOffset = 0; + vbCopy.size = vertexBufferSize; + vkCmdCopyBuffer(initCmdList, vbAlloc.buffer, m_vertexBuffer, 1, &vbCopy); + + // Create checkerboard texture + float tintColor[] = {1.0f, 1.0f, 1.0f, 1.0f}; + RpsTestVulkanRenderer::CreateStaticCheckerboardTexture( + m_checkerTextureView, m_checkerTexture, initCmdList, tempResources, 256, 256, tintColor); + } + + void UpdatePipeline(uint64_t frameIndex, uint64_t completedFrameIndex) + { + RpsResourceDesc backBufferDesc = {}; + std::vector backBuffers = GetBackBuffers(backBufferDesc); + TestRpsDownsample::UpdateRpsPipeline(frameIndex, completedFrameIndex, backBufferDesc, backBuffers.data()); + } + +private: + VkSampler m_sampler = VK_NULL_HANDLE; + VkImage m_checkerTexture = VK_NULL_HANDLE; + VkImageView m_checkerTextureView = VK_NULL_HANDLE; + VkDescriptorSetLayout m_descriptorSetLayout = VK_NULL_HANDLE; + VkPipelineLayout m_pipelineLayout = VK_NULL_HANDLE; + VkPipeline m_psoDefault = VK_NULL_HANDLE; + VkPipeline m_psoGfxDownsample = VK_NULL_HANDLE; + VkPipeline m_psoCompute = VK_NULL_HANDLE; + VkBuffer m_vertexBuffer = VK_NULL_HANDLE; + VkDeviceSize m_trisBufferOffset = 0; + VkDeviceSize m_quadsBufferOffset = 0; +}; + +TEST_CASE(TEST_APP_NAME) +{ + TestVkDownsample renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} \ No newline at end of file diff --git a/tests/gui/test_mrt_viewport_clear.rpsl b/tests/gui/test_mrt_viewport_clear.rpsl new file mode 100644 index 0000000..c5ce325 --- /dev/null +++ b/tests/gui/test_mrt_viewport_clear.rpsl @@ -0,0 +1,227 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +// No clear on rt2/rt4 to test discontinous clear color indices +// and potential artifact presence. +node test_unordered_5_mrt_no_ds( + rtv rt1 : SV_Target1, + RpsViewport vp : SV_Viewport, + float4 clearColor1 : SV_ClearColor1, + rtv rt3 : SV_Target3, + rtv rt2 : SV_Target2, + float4 clearColor0 : SV_ClearColor0, + float4 clearColor3 : SV_ClearColor3, + rtv rt4 : SV_Target4, + rtv rt0 : SV_Target0, + uint4 scissor : SV_ScissorRect); + +node test_unordered_3_mrt_ds( + rtv rt2 : SV_Target2, + dsv ds : SV_DepthStencil, + float clearDepth : SV_ClearDepth, + uint clearStencil : SV_ClearStencil, + rtv rt0 : SV_Target0, + rtv rt1 : SV_Target1); + +node test_bind_dsv_write_depth_stencil(rtv rt : SV_Target0, [readwrite(depth, stencil)] texture ds : SV_DepthStencil); +node test_bind_dsv_read_depth_write_stencil([readonly(ps)] texture depthSrv, rtv rt : SV_Target0, [readonly(depth)][readwrite(stencil)] texture ds : SV_DepthStencil); +node test_bind_dsv_read_depth_stencil([readonly(ps)] texture depthSrv, [readonly(ps)] texture stencilSrv, rtv rt : SV_Target0, [readonly(depth, stencil)] texture ds : SV_DepthStencil); + +node test_mrt_with_array( + rtv rtArr0[3] : SV_Target0, + rtv rts1 : SV_Target5, + srv src[12], + rtv rtArr1[2] : SV_Target3); + +node test_large_array( + rtv rtArr[22], + srv src[48]); + +node test_rt_array(rtv rt0 : SV_Target0, float4 clearCol : SV_ClearColor0); + +node blt_to_swapchain(rtv dst : SV_Target0, srv src, RpsViewport dstViewport : SV_Viewport); +node draw_cube_to_swapchain(rtv dst : SV_Target, [readonly(ps, cubemap)] texture src, RpsViewport dstViewport : SV_Viewport); + +void test_unordered_mrt_and_clear(texture backbuffer, uint4 inViewport) +{ + const uint w = inViewport.z; + const uint h = inViewport.w; + + texture rt0 = create_tex2d(RPS_FORMAT_R8G8B8A8_UNORM, w, h); + texture rt1 = create_tex2d(RPS_FORMAT_R16G16B16A16_FLOAT, w, h); + texture rt23 = create_tex2d(RPS_FORMAT_B8G8R8A8_UNORM, w, h, 1, 2); + texture rt4 = create_tex2d(RPS_FORMAT_R10G10B10A2_UNORM, w, h); + texture ds = create_tex2d(RPS_FORMAT_R32G8X24_TYPELESS, w, h); + + const float4 clear0 = float4(1, 0, 0, 1); + const float4 clear1 = float4(0, 1, 0, 1); + const float4 clear3 = float4(0, 0, 1, 1); + + RpsViewport subViewport = viewport( + inViewport.x + w * 0.1f, + inViewport.y + h * 0.2f, + w * 0.7f, + h * 0.5f); + + test_unordered_5_mrt_no_ds( + rt1, subViewport, clear1, rt23.array(1), rt23.array(0), clear0, clear3, rt4, rt0, uint4(0, 0, w, h)); + + blt_to_swapchain(backbuffer, rt0, viewport(0, 0, w, h)); + blt_to_swapchain(backbuffer, rt1, viewport(w, 0, w, h)); + blt_to_swapchain(backbuffer, rt23.array(0), viewport(w * 2, 0, w, h)); + blt_to_swapchain(backbuffer, rt23.array(1), viewport(w * 3, 0, w, h)); + + blt_to_swapchain(backbuffer, rt4, viewport(0, h, w, h)); + + test_unordered_3_mrt_ds(rt23.array(0), ds.format(RPS_FORMAT_D32_FLOAT_S8X24_UINT), 0.5f, 0x7f, rt0, rt1); + + blt_to_swapchain(backbuffer, rt0, viewport(w, h, w, h)); + blt_to_swapchain(backbuffer, rt1, viewport(w * 2, h, w, h)); + blt_to_swapchain(backbuffer, rt23.array(0), viewport(w * 3, h, w, h)); + + test_rt_array(rt23, float4(0, 1, 1, 1)); + + blt_to_swapchain(backbuffer, rt23.array(0), viewport(0, h * 2, w, h)); + blt_to_swapchain(backbuffer, rt23.array(1), viewport(w, h * 2, w, h)); + + test_unordered_5_mrt_no_ds( + rt1, subViewport, clear1, rt23.array(1), rt23.array(0), clear0, clear3, rt4, rt0, + uint4(w / 3, h / 3, 2 * w / 3, 2 * h / 3)); + + blt_to_swapchain(backbuffer, rt0, viewport(w * 2, h * 2, w, h)); + blt_to_swapchain(backbuffer, rt1, viewport(w * 3, h * 2, w, h)); +} + +texture test_array_node_params(texture backbuffer, uint4 inViewport) +{ + const uint w = inViewport.z; + const uint h = inViewport.w; + + texture cubeMaps = create_tex2d(RPS_FORMAT_R8G8B8A8_UNORM, 64, 64, 1, 128); + + // TODO: Add a helper to convert subresource range to array of views and vice versa. + texture rtArr012[3] = { cubeMaps.array(6, 6), cubeMaps.array(2 * 6, 6), cubeMaps.array(3 * 6, 6) }; + texture rt3 = cubeMaps.array(4 * 6, 6); + texture rtArr45[2] = { cubeMaps.array(5 * 6, 6), cubeMaps.array(6 * 6, 6) }; + + texture srvs[12]; + for(uint i = 0; i < 12; i++) + { + uint clearSlice = ((i < 6) ? 0 : 7) * 6 + (i % 6); + clear( cubeMaps.array(clearSlice), float4(i & 1, (i & 2) >> 1, (i & 4) >> 2, 1) ); + + srvs[i] = cubeMaps.array(clearSlice); + } + + test_mrt_with_array( rtArr012, rt3, srvs, rtArr45 ); + + for (uint i = 0; i < 8; i++) + { + draw_cube_to_swapchain(backbuffer, cubeMaps.array(6 * i, 6).cubemap(), viewport(w * (i % 4), h * (3 + i / 4), w, h)); + } + + texture largeRtvArray[22]; + texture largeSrvArray[48]; + + for (uint i = 0; i < 22; i++) + { + largeRtvArray[i] = cubeMaps.array(48 + i); + } + + for (uint i = 0; i < 48; i++) + { + largeSrvArray[i] = cubeMaps.array(i); + } + + test_large_array(largeRtvArray, largeSrvArray); + + return cubeMaps; +} + +void test_depth_stencil_rw(texture backbuffer, uint4 inViewport) +{ + const uint w = inViewport.z; + const uint h = inViewport.w; + + texture offScreenImg = create_tex2d(RPS_FORMAT_R8G8B8A8_UNORM, w, h); + texture depthStencil = create_tex2d(RPS_FORMAT_D32_FLOAT_S8X24_UINT, w, h); + + clear(offScreenImg, float4(1, 0, 0, 1)); + clear(depthStencil, 1.0f, 0); + + test_bind_dsv_write_depth_stencil(offScreenImg, depthStencil); + + blt_to_swapchain(backbuffer, offScreenImg, viewport(0, h * 5, w, h)); + blt_to_swapchain( + backbuffer, depthStencil.format(RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS), viewport(w, h * 5, w, h)); + + test_bind_dsv_read_depth_write_stencil(depthStencil.format(RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS), + offScreenImg, + depthStencil); + + test_bind_dsv_read_depth_stencil(depthStencil.format(RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS), + depthStencil.format(RPS_FORMAT_X32_TYPELESS_G8X24_UINT), + offScreenImg, + depthStencil); + + blt_to_swapchain(backbuffer, offScreenImg, viewport(w * 2, h * 5, w, h)); + blt_to_swapchain( + backbuffer, depthStencil.format(RPS_FORMAT_R32_FLOAT_X8X24_TYPELESS), viewport(w * 3, h * 5, w, h)); +} + +node test_buffer_rtv_clear( + [readwrite(rendertarget)] buffer rt0 : SV_Target0, + float4 clearColor1 : SV_ClearColor0); + +node test_buffer_rtv( + [readwrite(rendertarget)] buffer rt0 : SV_Target0, + float4 clearColor1 : SV_ClearColor0); + +void test_buffer_rtv_and_clear(texture backbuffer, uint4 inViewport) +{ + // TODO: We want to add rtv_buf as well as RPS_BUFFER_WHOLE_SIZE defs in RPSL. + const uint w = inViewport.z; + const uint h = inViewport.w; + + const uint rowPitch = ((w * 4) + 255) & ~255; + const uint wAligned = rowPitch / 4; + + const uint bufSize = wAligned * h * 4; + const uint bufOffset = bufSize / 2; + const float4 clear0 = float4(1, 0, 0, 1); + + buffer buf = create_buffer(bufSize, 1, RPS_RESOURCE_FLAG_ROWMAJOR_IMAGE); + buffer bufViewFirst = create_buffer_view(buf, 0, bufOffset, 0, RPS_FORMAT_B8G8R8A8_UNORM); + buffer bufViewSecond = create_buffer_view(buf, bufOffset, bufSize - bufOffset, 0, RPS_FORMAT_B8G8R8A8_UNORM); + + test_buffer_rtv_clear(bufViewFirst, clear0); + test_buffer_rtv(bufViewSecond, clear0); + + texture rt0 = create_tex2d(RPS_FORMAT_R8G8B8A8_UNORM, wAligned, h); + copy_buffer_to_texture( + rt0, uint3(0, 0, 0), buf, 0, rowPitch, uint3(wAligned, h, 1), uint3(0, 0, 0), uint3(wAligned, h, 1)); + + blt_to_swapchain(backbuffer, rt0, viewport(inViewport.x, inViewport.y, w, h)); +} + +export void rps_main([readonly(present)] texture backbuffer, bool bBufferRTVSupported) +{ + ResourceDesc backbufferDesc = backbuffer.desc(); + + uint4 dstViewport = uint4(0, 0, (uint32_t)backbufferDesc.Width / 4, backbufferDesc.Height / 6); + + test_unordered_mrt_and_clear(backbuffer, dstViewport); + + if (!bBufferRTVSupported) + { + test_buffer_rtv_and_clear(backbuffer, uint4(0, 0, 256, 120)); + } + + test_array_node_params(backbuffer, dstViewport); + + test_depth_stencil_rw(backbuffer, dstViewport); +} diff --git a/tests/gui/test_mrt_viewport_clear_d3d12.cpp b/tests/gui/test_mrt_viewport_clear_d3d12.cpp new file mode 100644 index 0000000..e5dc3f9 --- /dev/null +++ b/tests/gui/test_mrt_viewport_clear_d3d12.cpp @@ -0,0 +1,593 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define RPS_D3D12_RUNTIME 1 + +#include "test_mrt_viewport_clear_shared.h" + +#include "utils/rps_test_win32.h" +#include "utils/rps_test_d3d12_renderer.h" + +using namespace DirectX; + +class TestD3D12MrtViewportClear : public RpsTestD3D12Renderer, public TestRpsMrtViewportClear +{ +protected: + virtual void OnInit(ID3D12GraphicsCommandList* pInitCmdList, + std::vector>& tempResources) override + { + LoadAssets(pInitCmdList, tempResources); + + TestRpsMrtViewportClear::Init(rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); })); + } + + virtual void OnPostResize() override + { + } + + virtual void OnCleanUp() override + { + TestRpsMrtViewportClear::OnDestroy(); + + m_rootSignature = nullptr; + m_pipelineStateRtBuffer = nullptr; + m_pipelineStateMrt5NoDS = nullptr; + m_pipelineStateMrt3DS = nullptr; + m_pipelineStateRtArray = nullptr; + m_pipelineStateRtArrayCube = nullptr; + m_pipelineStateBlt = nullptr; + m_pipelineStateBltCube = nullptr; + m_pipelineStateWriteDepthStencil = nullptr; + m_pipelineStateReadDepthWriteStencil = nullptr; + m_pipelineStateReadDepthStencil = nullptr; + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + RpsRuntimeResource backBuffers[DXGI_MAX_SWAP_CHAIN_BUFFERS]; + RpsResourceDesc backBufferDesc; + GetBackBuffers(backBufferDesc, backBuffers); + + const bool bBufferRTVSupported = false; + + RpsConstant args[] = {&backBufferDesc, &bBufferRTVSupported}; + const RpsRuntimeResource* argResources[] = {backBuffers, nullptr}; + + uint64_t completedFrameIndex = CalcGuaranteedCompletedFrameIndexForRps(); + + TestRpsMrtViewportClear::OnUpdate( + frameIndex, completedFrameIndex, uint32_t(RPS_TEST_COUNTOF(args)), args, argResources); + + RpsTestD3D12Renderer::OnUpdate(frameIndex); + } + + virtual void OnRender(uint32_t frameIndex) override + { + REQUIRE(RPS_SUCCEEDED(ExecuteRenderGraph(frameIndex, GetRpsRenderGraph()))); + } + +protected: + virtual void BindNodes(RpsSubprogram hRpslEntry) override final + { + TestRpsMrtViewportClear::BindNodes(hRpslEntry); + + RpsResult result = + rpsProgramBindNode(hRpslEntry, "test_buffer_rtv", &TestD3D12MrtViewportClear::DrawRtbuffer, this); + REQUIRE(result == RPS_OK); + + result = + rpsProgramBindNode(hRpslEntry, "test_mrt_with_array", &TestD3D12MrtViewportClear::DrawMrtWithArray, this); + REQUIRE(result == RPS_OK); + + result = + rpsProgramBindNode(hRpslEntry, "blt_to_swapchain", &TestD3D12MrtViewportClear::DrawBlt, this); + REQUIRE(result == RPS_OK); + + result = + rpsProgramBindNode(hRpslEntry, "draw_cube_to_swapchain", &TestD3D12MrtViewportClear::DrawBltCube, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(hRpslEntry, + "test_bind_dsv_write_depth_stencil", + &TestD3D12MrtViewportClear::BindDsvWriteDepthStencil, + this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(hRpslEntry, + "test_bind_dsv_read_depth_write_stencil", + &TestD3D12MrtViewportClear::BindDsvReadDepthWriteStencil, + this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode( + hRpslEntry, "test_bind_dsv_read_depth_stencil", &TestD3D12MrtViewportClear::BindDsvReadDepthStencil, this); + REQUIRE(result == RPS_OK); + } + + void DrawTriangle(ID3D12GraphicsCommandList* pCmdList, ID3D12PipelineState *pipelineState) const + { + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + pCmdList->SetPipelineState(pipelineState); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + + void DrawRtbuffer(const RpsCmdCallbackContext* pContext) + { + CreateRtBuffer(pContext); + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + DrawTriangle(pCmdList, m_pipelineStateRtBuffer.Get()); + } + + virtual void Draw5MrtNoDS(const RpsCmdCallbackContext* pContext) override final + { + Create5MrtNoDS(pContext); + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + DrawTriangle(pCmdList, m_pipelineStateMrt5NoDS.Get()); + } + + virtual void Draw3MrtDS(const RpsCmdCallbackContext* pContext) override final + { + Create3MrtDS(pContext); + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + DrawTriangle(pCmdList, m_pipelineStateMrt3DS.Get()); + } + + virtual void DrawRtArray(const RpsCmdCallbackContext* pContext) override final + { + CreateRtArray(pContext); + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + DrawTriangle(pCmdList, m_pipelineStateRtArray.Get()); + } + + virtual void DrawMrtWithArray(const RpsCmdCallbackContext* pContext) override final + { + CreateMrtWithArray(pContext); + + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + RpsParameterDesc paramDesc; + RpsResult result = rpsCmdGetParamDesc(pContext, 2, ¶mDesc); + REQUIRE(result == RPS_OK); + REQUIRE(paramDesc.arraySize == 12); + + DescriptorTable dt = AllocDynamicDescriptors(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, paramDesc.arraySize); + D3D12_CPU_DESCRIPTOR_HANDLE dstHdl = dt.GetCPU(0); + result = rpsD3D12CopyCmdArgDescriptors(pContext, 2, 0, paramDesc.arraySize, RPS_TRUE, &dstHdl); + REQUIRE(result == RPS_OK); + + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + BindDescriptorHeaps(pCmdList); + pCmdList->SetGraphicsRootDescriptorTable(1, dt.GetGPU(0)); + pCmdList->SetPipelineState(m_pipelineStateRtArrayCube.Get()); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + + virtual void DrawLargeArray(const RpsCmdCallbackContext* pContext) override final + { + } + + void DrawBlt(const RpsCmdCallbackContext* pContext, + RpsRuntimeResource dstRuntimeResource, + D3D12_CPU_DESCRIPTOR_HANDLE src, + const ViewportData& dstViewport) + { + CreateBlt(pContext); + + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + RpsCmdViewportInfo viewportScissorInfo = {}; + RpsResult result = rpsCmdGetViewportInfo(pContext, &viewportScissorInfo); + REQUIRE(result == RPS_OK); + REQUIRE(viewportScissorInfo.numViewports == 1); + REQUIRE(dstViewport.data.x == viewportScissorInfo.pViewports[0].x); + REQUIRE(dstViewport.data.y == viewportScissorInfo.pViewports[0].y); + REQUIRE(dstViewport.data.z == viewportScissorInfo.pViewports[0].width); + REQUIRE(dstViewport.data.w == viewportScissorInfo.pViewports[0].height); + + if (m_frameCounter < (m_backBufferCount * 2)) + { + RpsResourceDesc resourceDesc = {}; + RpsRuntimeResource runtimeResource; + REQUIRE(RPS_SUCCEEDED(rpsCmdGetArgResourceDesc(pContext, 0, &resourceDesc))); + REQUIRE(RPS_SUCCEEDED(rpsCmdGetArgResourceDesc(pContext, 1, &resourceDesc))); + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == rpsCmdGetArgResourceDescArray(pContext, 0, 1, &resourceDesc, 1)); + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == rpsCmdGetArgResourceDescArray(pContext, 1, 1, &resourceDesc, 1)); + REQUIRE(RPS_ERROR_TYPE_MISMATCH == + rpsCmdGetArgResourceDesc(pContext, 2, &resourceDesc)); // Not a resource + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == rpsCmdGetArgResourceDescArray(pContext, 4, 1, &resourceDesc, 1)); + + REQUIRE(RPS_SUCCEEDED(rpsCmdGetArgRuntimeResource(pContext, 0, &runtimeResource))); + REQUIRE(runtimeResource == dstRuntimeResource); + + ID3D12Resource* pResource = nullptr; + D3D12_CPU_DESCRIPTOR_HANDLE descriptorHdl = {}; + REQUIRE((RPS_SUCCEEDED(rpsD3D12GetCmdArgResource(pContext, 0, &pResource)) && (pResource != nullptr))); + pResource = nullptr; + REQUIRE((RPS_SUCCEEDED(rpsD3D12GetCmdArgResourceArray(pContext, 0, 0, &pResource, 1)) && + (pResource != nullptr))); + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == rpsD3D12GetCmdArgResourceArray(pContext, 0, 1, &pResource, 1)); + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == rpsD3D12GetCmdArgResourceArray(pContext, 0, 0, &pResource, 2)); + + REQUIRE(RPS_SUCCEEDED(rpsD3D12GetCmdArgDescriptor(pContext, 0, &descriptorHdl))); + REQUIRE(RPS_SUCCEEDED(rpsD3D12GetCmdArgDescriptorArray(pContext, 0, 0, &descriptorHdl, 1))); + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == + rpsD3D12GetCmdArgDescriptorArray(pContext, 0, 1, &descriptorHdl, 1)); + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == + rpsD3D12GetCmdArgDescriptorArray(pContext, 0, 0, &descriptorHdl, 2)); + + pResource = nullptr; + descriptorHdl = {}; + REQUIRE((RPS_SUCCEEDED(rpsD3D12GetCmdArgResource(pContext, 1, &pResource)) && (pResource != nullptr))); + REQUIRE(RPS_ERROR_TYPE_MISMATCH == rpsD3D12GetCmdArgResource(pContext, 2, &pResource)); + } + + D3D12_GPU_DESCRIPTOR_HANDLE srvTable = AllocDynamicDescriptorsAndWrite(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, &src, 1); + + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + BindDescriptorHeaps(pCmdList); + + pCmdList->SetPipelineState(m_pipelineStateBlt.Get()); + pCmdList->SetGraphicsRootDescriptorTable(1, srvTable); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + + void DrawBltCube(const RpsCmdCallbackContext* pContext, + rps::UnusedArg dst, + D3D12_CPU_DESCRIPTOR_HANDLE src, + const ViewportData& dstViewport) + { + CreateBltCube(pContext); + + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + D3D12_GPU_DESCRIPTOR_HANDLE srvTable = + AllocDynamicDescriptorsAndWrite(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, &src, 1); + + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + BindDescriptorHeaps(pCmdList); + + pCmdList->SetPipelineState(m_pipelineStateBltCube.Get()); + pCmdList->SetGraphicsRootDescriptorTable(1, srvTable); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + + void BindDsvWriteDepthStencil(const RpsCmdCallbackContext* pContext) + { + if (!m_pipelineStateWriteDepthStencil) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + auto depthStencilState = CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT); + depthStencilState.StencilEnable = TRUE; + depthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE; + depthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; + + CreatePSO(L"VSSimple", + nullptr, + L"PSWriteDepthStencil", + true, + &rtInfo, + &m_pipelineStateWriteDepthStencil, + &depthStencilState); + } + + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + pCmdList->SetPipelineState(m_pipelineStateWriteDepthStencil.Get()); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->OMSetStencilRef(0x1); + pCmdList->SetGraphicsRoot32BitConstant(0, 0, 0); + pCmdList->DrawInstanced(3, 1, 0, 0); + pCmdList->OMSetStencilRef(0x2); + pCmdList->SetGraphicsRoot32BitConstant(0, 1, 0); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + + void BindDsvReadDepthWriteStencil(const RpsCmdCallbackContext* pContext, D3D12_CPU_DESCRIPTOR_HANDLE depthSrv) + { + if (!m_pipelineStateReadDepthWriteStencil) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + auto depthStencilState = CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT); + depthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; + depthStencilState.StencilEnable = TRUE; + depthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_INCR; + depthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP; + depthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP; + depthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_EQUAL; + + CreatePSO(L"VSSimpleFlatDepth", + nullptr, + L"PSReadDepthWriteStencil", + true, + &rtInfo, + &m_pipelineStateReadDepthWriteStencil, + &depthStencilState); + } + + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + D3D12_GPU_DESCRIPTOR_HANDLE depthSrvGpu = AllocDynamicDescriptorsAndWrite(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, &depthSrv, 1); + + BindDescriptorHeaps(pCmdList); + + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + pCmdList->SetPipelineState(m_pipelineStateReadDepthWriteStencil.Get()); + pCmdList->SetGraphicsRootDescriptorTable(1, depthSrvGpu); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->OMSetStencilRef(0x2); + pCmdList->SetGraphicsRoot32BitConstant(0, AsUInt(0.25f), 1); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + + void BindDsvReadDepthStencil(const RpsCmdCallbackContext* pContext, + D3D12_CPU_DESCRIPTOR_HANDLE depthSrv, + D3D12_CPU_DESCRIPTOR_HANDLE stencilSrv) + { + if (!m_pipelineStateReadDepthStencil) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + auto depthStencilState = CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT); + depthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; + depthStencilState.StencilEnable = TRUE; + depthStencilState.StencilReadMask = 0x3; + depthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_KEEP; + depthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP; + depthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP; + depthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_EQUAL; + + CreatePSO(L"VSSimpleFlatDepth", + nullptr, + L"PSReadDepthStencil", + true, + &rtInfo, + &m_pipelineStateReadDepthStencil, + &depthStencilState); + } + + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + D3D12_CPU_DESCRIPTOR_HANDLE srvCpuHdls[] = {depthSrv, stencilSrv}; + + D3D12_GPU_DESCRIPTOR_HANDLE dsSrvsGpu = + AllocDynamicDescriptorsAndWrite(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, srvCpuHdls, RPS_TEST_COUNTOF(srvCpuHdls)); + + BindDescriptorHeaps(pCmdList); + + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + pCmdList->SetPipelineState(m_pipelineStateReadDepthStencil.Get()); + pCmdList->SetGraphicsRootDescriptorTable(1, dsSrvsGpu); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->OMSetStencilRef(0x3); + pCmdList->SetGraphicsRoot32BitConstant(0, AsUInt(0.5f), 1); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + + void CreateRtBuffer(const RpsCmdCallbackContext* pContext) + { + if (!m_pipelineStateRtBuffer) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VSSimple", nullptr, L"PSMrt5", false, &rtInfo, &m_pipelineStateRtBuffer); + } + } + + void Create5MrtNoDS(const RpsCmdCallbackContext* pContext) + { + if (!m_pipelineStateMrt5NoDS) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VSSimple", nullptr, L"PSMrt5", false, &rtInfo, &m_pipelineStateMrt5NoDS); + } + } + + void Create3MrtDS(const RpsCmdCallbackContext* pContext) + { + if (!m_pipelineStateMrt3DS) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VSSimple", nullptr, L"PSMrt3", true, &rtInfo, &m_pipelineStateMrt3DS); + } + } + + void CreateRtArray(const RpsCmdCallbackContext* pContext) + { + if (!m_pipelineStateRtArray) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VSRtArray", L"GSRtArray", L"PSRtArray", false, &rtInfo, &m_pipelineStateRtArray); + } + } + + void CreateMrtWithArray(const RpsCmdCallbackContext* pContext) + { + if (!m_pipelineStateRtArrayCube) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + CreatePSO( + L"VSRtArray", L"GSRtArrayToCube", L"PSRtArrayToCubeMRT", false, &rtInfo, &m_pipelineStateRtArrayCube); + } + } + + void CreateLargeArray(const RpsCmdCallbackContext* pContext) + { + } + + void CreateBlt(const RpsCmdCallbackContext* pContext) + { + if (!m_pipelineStateBlt) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VSBlt", nullptr, L"PSBlt", false, &rtInfo, &m_pipelineStateBlt); + } + } + + void CreateBltCube(const RpsCmdCallbackContext* pContext) + { + if (!m_pipelineStateBltCube) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VSBlt", nullptr, L"PSBltCube", false, &rtInfo, &m_pipelineStateBltCube); + } + } + +private: + void LoadAssets(ID3D12GraphicsCommandList* pInitCmdList, std::vector>& tempResources) + { + CD3DX12_DESCRIPTOR_RANGE ranges[1] = {}; + CD3DX12_ROOT_PARAMETER rootParameters[2] = {}; + + ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 12, 0); + rootParameters[0].InitAsConstants(2, 0); + rootParameters[1].InitAsDescriptorTable(1, &ranges[0], D3D12_SHADER_VISIBILITY_PIXEL); + + D3D12_STATIC_SAMPLER_DESC sampler = {}; + sampler.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + sampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + sampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + sampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + sampler.MipLODBias = 0; + sampler.MaxAnisotropy = 0; + sampler.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + sampler.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + sampler.MinLOD = 0.0f; + sampler.MaxLOD = D3D12_FLOAT32_MAX; + sampler.ShaderRegister = 0; + sampler.RegisterSpace = 0; + sampler.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + + D3D12_ROOT_SIGNATURE_FLAGS rootSignatureFlags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc; + rootSignatureDesc.Init_1_0(_countof(rootParameters), rootParameters, 1, &sampler, rootSignatureFlags); + + ComPtr signature; + ComPtr error; + ThrowIfFailed(D3DX12SerializeVersionedRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1_0, &signature, &error)); + ThrowIfFailed(m_device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature))); + } + + void CreatePSO(LPCWSTR vsEntry, + LPCWSTR gsEntry, + LPCWSTR psEntry, + bool bDepthEnable, + const RpsCmdRenderTargetInfo* pRenderTargetInfo, + ID3D12PipelineState** ppPSO, + const CD3DX12_DEPTH_STENCIL_DESC* pCustomDepthStencilDesc = nullptr) + { + // Create the pipeline state, which includes compiling and loading shaders. + { + std::vector vsCode, psCode, gsCode; + + // Describe and create the graphics pipeline state object (PSO). + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.InputLayout = {nullptr, 0}; + psoDesc.pRootSignature = m_rootSignature.Get(); + psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); + psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); + + if (pCustomDepthStencilDesc) + { + psoDesc.DepthStencilState = *pCustomDepthStencilDesc; + } + else + { + psoDesc.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT); + psoDesc.DepthStencilState.DepthEnable = !!bDepthEnable; + } + + psoDesc.SampleMask = UINT_MAX; + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + + psoDesc.DSVFormat = rpsFormatToDXGI(pRenderTargetInfo->depthStencilFormat); + psoDesc.SampleDesc.Count = pRenderTargetInfo->numSamples; + psoDesc.NumRenderTargets = pRenderTargetInfo->numRenderTargets; + + for (uint32_t iRT = 0; iRT < pRenderTargetInfo->numRenderTargets; iRT++) + { + psoDesc.RTVFormats[iRT] = rpsFormatToDXGI(pRenderTargetInfo->renderTargetFormats[iRT]); + } + + DxcCompile(c_Shader, vsEntry, L"vs_6_0", L"", nullptr, 0, vsCode); + DxcCompile(c_Shader, psEntry, L"ps_6_0", L"", nullptr, 0, psCode); + psoDesc.VS = CD3DX12_SHADER_BYTECODE(vsCode.data(), vsCode.size()); + psoDesc.PS = CD3DX12_SHADER_BYTECODE(psCode.data(), psCode.size()); + + if (gsEntry) + { + DxcCompile(c_Shader, gsEntry, L"gs_6_0", L"", nullptr, 0, gsCode); + psoDesc.GS = CD3DX12_SHADER_BYTECODE(gsCode.data(), gsCode.size()); + } + + ThrowIfFailed(m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(ppPSO))); + } + } + +private: + ComPtr m_rootSignature; + ComPtr m_pipelineStateRtBuffer; + ComPtr m_pipelineStateMrt5NoDS; + ComPtr m_pipelineStateMrt3DS; + ComPtr m_pipelineStateRtArray; + ComPtr m_pipelineStateRtArrayCube; + ComPtr m_pipelineStateBlt; + ComPtr m_pipelineStateBltCube; + ComPtr m_pipelineStateWriteDepthStencil; + ComPtr m_pipelineStateReadDepthWriteStencil; + ComPtr m_pipelineStateReadDepthStencil; +}; + +TEST_CASE(TEST_APP_NAME) +{ +#if _DEBUG + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#endif + + TestD3D12MrtViewportClear renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_mrt_viewport_clear_shared.h b/tests/gui/test_mrt_viewport_clear_shared.h new file mode 100644 index 0000000..fa4346b --- /dev/null +++ b/tests/gui/test_mrt_viewport_clear_shared.h @@ -0,0 +1,339 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#pragma once + +#include "utils/rps_test_host.h" + +RPS_DECLARE_RPSL_ENTRY(test_mrt_viewport_clear, rps_main); + +static const char c_Shader[] = R"( +struct V2P +{ + float4 Pos : SV_Position; + float2 UV : TEXCOORD0; +}; + +V2P VSSimple(uint vertexId : SV_VertexID) +{ + V2P result; + + // Cover top left of the viewport, leaning Z 0 to 1 from left to right. + result.Pos = float4( + (vertexId & 1) * 2.0f - 1.0f, + (vertexId & 2) * -1.0f + 1.0f, + (vertexId & 1) * 1.0f, + 1); + result.UV = float2((vertexId & 1) * 1.0f, (vertexId & 2) * 0.5f); + + return result; +} + +struct PSOutMrt5 +{ + float4 RT0 : SV_Target0; + float4 RT1 : SV_Target1; + float4 RT2 : SV_Target2; + float4 RT3 : SV_Target3; + float4 RT4 : SV_Target4; +}; + +PSOutMrt5 PSMrt5(V2P psIn) +{ + PSOutMrt5 result; + result.RT0 = float4(psIn.UV, 0, 1); + result.RT1 = float4(psIn.UV, 1.0f / 4, 1); + result.RT2 = float4(psIn.UV, 2.0f / 4, 1); + result.RT3 = float4(psIn.UV, 3.0f / 4, 1); + result.RT4 = float4(psIn.UV, 4.0f / 4, 1); + return result; +} + +struct PSOutMrt3 +{ + float4 RT0 : SV_Target0; + float4 RT1 : SV_Target1; + float4 RT2 : SV_Target2; +}; + +PSOutMrt3 PSMrt3(V2P psIn) +{ + PSOutMrt3 result; + result.RT0 = float4(psIn.UV.x, 0, psIn.UV.y, 1); + result.RT1 = float4(psIn.UV.x, 1.0f / 2, psIn.UV.y, 1); + result.RT2 = float4(psIn.UV.x, 2.0f / 2, psIn.UV.y, 1); + return result; +} + +struct GSInput +{ + uint vertexId : DUMMYVERTEXID; +}; + +GSInput VSRtArray(uint vertexId : SV_VertexID) +{ + GSInput vsOut; + vsOut.vertexId = vertexId; + return vsOut; +} + +struct G2P +{ + float4 Pos : SV_Position; + float2 UV : TEXCOORD0; + uint RtIndex : SV_RenderTargetArrayIndex; +}; + +[maxvertexcount(6)] +void GSRtArray(triangle GSInput Input[3], inout TriangleStream gsOutStream) +{ + G2P gsOutVert; + + for(uint32_t iRT = 0; iRT < 2; iRT++) + { + gsOutVert.RtIndex = iRT; + + gsOutVert.Pos = float4(-1, 1, 0, 1); + gsOutVert.UV = float2(0, 0); + gsOutStream.Append(gsOutVert); + + gsOutVert.Pos = float4( 1, 1, 0, 1); + gsOutVert.UV = float2(1, 0); + gsOutStream.Append(gsOutVert); + + gsOutVert.Pos = float4(-1,-1, 0, 1); + gsOutVert.UV = float2(0, 1); + gsOutStream.Append(gsOutVert); + + gsOutStream.RestartStrip(); + } +} + +float4 PSRtArray(G2P psIn) : SV_Target0 +{ + return float4(psIn.UV, psIn.RtIndex * 1.0f, 1.0f); +} + +V2P VSBlt(uint vertexId : SV_VertexID) +{ + V2P result; + result.Pos = float4( + (vertexId & 1) * 4.0f - 1.0f, + (vertexId & 2) * -2.0f + 1.0f, + 0, 1); + result.UV = float2((vertexId & 1) * 2.0f, (vertexId & 2) * 1.0f); + + return result; +} + +[[vk::binding(1, 0)]] +Texture2D g_tex : register(t0); + +[[vk::binding(0, 0)]] +SamplerState g_sampler : register(s0); + +float4 PSBlt(V2P psIn) : SV_Target0 +{ + return g_tex.SampleLevel(g_sampler, psIn.UV, 0); +} + +[maxvertexcount(18)] +void GSRtArrayToCube(triangle GSInput Input[3], inout TriangleStream gsOutStream) +{ + G2P gsOutVert; + + for(uint32_t iRT = 0; iRT < 6; iRT++) + { + gsOutVert.RtIndex = iRT; + + gsOutVert.Pos = float4(-1, 1, 0, 1); + gsOutVert.UV = float2(0, 0); + gsOutStream.Append(gsOutVert); + + gsOutVert.Pos = float4( 3, 1, 0, 1); + gsOutVert.UV = float2(2, 0); + gsOutStream.Append(gsOutVert); + + gsOutVert.Pos = float4(-1,-3, 0, 1); + gsOutVert.UV = float2(0, 2); + gsOutStream.Append(gsOutVert); + + gsOutStream.RestartStrip(); + } +} + +[[vk::binding(2, 0)]] +Texture2D g_texArr[12] : register(t0); + +void PSRtArrayToCubeMRT(G2P psIn, + out float4 rt0 : SV_Target0, + out float4 rt1 : SV_Target1, + out float4 rt2 : SV_Target2, + out float4 rt3 : SV_Target3, + out float4 rt4 : SV_Target4, + out float4 rt5 : SV_Target5) +{ + float4 colorSrc0 = g_texArr[psIn.RtIndex].SampleLevel(g_sampler, psIn.UV, 0); + float4 colorSrc1 = g_texArr[psIn.RtIndex + 6].SampleLevel(g_sampler, psIn.UV, 0); + + rt0 = lerp(colorSrc0, colorSrc1, 1 / 7.0f); // cube 0 + rt1 = lerp(colorSrc0, colorSrc1, 2 / 7.0f); // cube 1 + rt2 = lerp(colorSrc0, colorSrc1, 3 / 7.0f); // cube 2 + + rt3 = lerp(colorSrc0, colorSrc1, 5 / 7.0f); // cube 3 + rt4 = lerp(colorSrc0, colorSrc1, 6 / 7.0f); // cube 4 + + rt5 = lerp(colorSrc0, colorSrc1, 4 / 7.0f); // cube 5 +} + +static const float PI = 3.14159265f; + +[[vk::binding(1, 0)]] +TextureCube g_cubeTex : register(t0); + +float4 PSBltCube(V2P psIn) : SV_Target0 +{ + float phi = psIn.UV.x * PI * 2; + float theta = psIn.UV.y * PI; + + float sinTheta = sin(theta); + + float3 coord = float3( + sinTheta * sin(phi), + sinTheta * cos(phi), + cos(theta)); + + return g_cubeTex.SampleLevel(g_sampler, coord, 0); +} + +[[vk::binding(1, 0)]] +Texture2D g_DepthSrv : register(t0); + +[[vk::binding(3, 0)]] +Texture2D g_StencilSrv : register(t1); + +struct CBData +{ + uint drawId; + float flatDepth; +}; + +#if VULKAN +[[vk::push_constant]] CBData cb; +#else +ConstantBuffer cb : register(b0); +#endif + +V2P VSSimpleFlatDepth(uint vertexId : SV_VertexID) +{ + V2P result; + + // Cover top left of the viewport, leaning Z 0 to 1 from left to right. + result.Pos = float4( + (vertexId & 1) * 2.0f - 1.0f, + (vertexId & 2) * -1.0f + 1.0f, + cb.flatDepth, + 1); + result.UV = float2((vertexId & 1) * 1.0f, (vertexId & 2) * 0.5f); + + return result; +} + +float4 PSWriteDepthStencil(V2P psIn) : SV_Target0 +{ + uint2 tile = (uint2)(psIn.Pos.xy) / 12; + + if ((cb.drawId == 0) == ((tile.x & 1u) != (tile.y & 1u))) + discard; + + return float4(0, 1, 0, 0); +} + +float4 PSReadDepthWriteStencil(V2P psIn, uint sampleIdx : SV_SampleIndex) : SV_Target0 +{ + float fDepthSrvValue = g_DepthSrv.Load(int3(psIn.Pos.xy, 0)); + + return float4(0, 0, fDepthSrvValue, 0); +} + +float4 PSReadDepthStencil(V2P psIn, uint sampleIdx : SV_SampleIndex) : SV_Target0 +{ + float fDepthSrvValue = g_DepthSrv.Load(int3(psIn.Pos.xy, 0)); + +#if VULKAN +#define STENCIL_COMPONENT r +#else +#define STENCIL_COMPONENT g +#endif + + uint uStencilValue = g_StencilSrv.Load(int3(psIn.Pos.xy, 0)).STENCIL_COMPONENT; + + return float4(uStencilValue / 2.0f, fDepthSrvValue, 0, 1); +} + +)"; + +#define TEST_APP_NAME_RAW "TestMultipleRenderTargetClear" + +using namespace DirectX; + +class TestRpsMrtViewportClear : public RpsTestHost +{ +public: + struct ViewportData + { + XMFLOAT4 data; + + ViewportData() + { + } + ViewportData(const ViewportData& r) + { + FAIL(); + data = r.data; + } + ViewportData& operator=(const ViewportData& r) + { + FAIL(); + data = r.data; + return *this; + } + }; + + TestRpsMrtViewportClear() + { + } + +protected: + void Init(RpsDevice hRpsDevice) + { + RpsTestHost::OnInit(hRpsDevice, rpsTestLoadRpslEntry(test_mrt_viewport_clear, rps_main)); + } + + virtual void BindNodes(RpsSubprogram hRpslEntry) override + { + RpsResult result = + rpsProgramBindNode(hRpslEntry, "test_unordered_5_mrt_no_ds", &TestRpsMrtViewportClear::Draw5MrtNoDS, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(hRpslEntry, "test_unordered_3_mrt_ds", &TestRpsMrtViewportClear::Draw3MrtDS, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(hRpslEntry, "test_rt_array", &TestRpsMrtViewportClear::DrawRtArray, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(hRpslEntry, "test_large_array", &TestRpsMrtViewportClear::DrawLargeArray, this); + REQUIRE(result == RPS_OK); + } + +protected: + virtual void Draw5MrtNoDS(const RpsCmdCallbackContext* pContext) = 0; + virtual void Draw3MrtDS(const RpsCmdCallbackContext* pContext) = 0; + virtual void DrawRtArray(const RpsCmdCallbackContext* pContext) = 0; + virtual void DrawMrtWithArray(const RpsCmdCallbackContext* pContext) = 0; + virtual void DrawLargeArray(const RpsCmdCallbackContext* pContext) = 0; +}; diff --git a/tests/gui/test_mrt_viewport_clear_vk.cpp b/tests/gui/test_mrt_viewport_clear_vk.cpp new file mode 100644 index 0000000..c9bba06 --- /dev/null +++ b/tests/gui/test_mrt_viewport_clear_vk.cpp @@ -0,0 +1,909 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifdef _WIN32 +#define VK_USE_PLATFORM_WIN32_KHR +#else +#error "TODO" +#endif + +#define RPS_VK_RUNTIME 1 + +#include "test_mrt_viewport_clear_shared.h" + +#include "utils/rps_test_win32.h" +#include "utils/rps_test_vk_renderer.h" + +class TestVkMrtViewportClear : public RpsTestVulkanRenderer, public TestRpsMrtViewportClear +{ + static constexpr uint32_t PushConstOffsetDrawId = 0; + static constexpr uint32_t PushConstOffsetFlagDepth = 4; + +protected : + virtual void OnInit(VkCommandBuffer initCmdBuf, InitTempResources& tempResources) override + { + LoadAssets(); + + TestRpsMrtViewportClear::Init(rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); })); + } + + virtual void OnPostResize() override + { + } + + virtual void OnCleanUp() override + { + TestRpsMrtViewportClear::OnDestroy(); + + vkDestroyDescriptorSetLayout(m_device, m_descriptorSetLayout, nullptr); + vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr); + vkDestroyPipeline(m_device, m_psoMrt5NoDS, nullptr); + vkDestroyPipeline(m_device, m_psoMrt3DS, nullptr); + vkDestroyPipeline(m_device, m_psoRtArray, nullptr); + vkDestroyPipeline(m_device, m_psoRtArrayCube, nullptr); + vkDestroyPipeline(m_device, m_psoBlt, nullptr); + vkDestroyPipeline(m_device, m_psoBltCube, nullptr); + vkDestroyPipeline(m_device, m_psoWriteDepthStencil, nullptr); + vkDestroyPipeline(m_device, m_psoReadDepthWriteStencil, nullptr); + vkDestroyPipeline(m_device, m_psoReadDepthStencil, nullptr); + vkDestroySampler(m_device, m_sampler, nullptr); + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + RpsResourceDesc backBufferDesc; + auto& swapChainBufferHdls = GetBackBuffers(backBufferDesc); + + const bool bBufferRTVSupported = true; + + RpsConstant args[] = {&backBufferDesc, &bBufferRTVSupported}; + const RpsRuntimeResource* argResources[] = {swapChainBufferHdls.data(), nullptr}; + + uint64_t completedFrameIndex = CalcGuaranteedCompletedFrameIndexForRps(); + + TestRpsMrtViewportClear::OnUpdate( + frameIndex, completedFrameIndex, uint32_t(RPS_TEST_COUNTOF(args)), args, argResources); + + RpsTestVulkanRenderer::OnUpdate(frameIndex); + } + + virtual void OnRender(uint32_t frameIndex) override + { + REQUIRE(RPS_SUCCEEDED(ExecuteRenderGraph(frameIndex, GetRpsRenderGraph()))); + } + +protected: + virtual void BindNodes(RpsSubprogram hRpslEntry) override final + { + TestRpsMrtViewportClear::BindNodes(hRpslEntry); + + RpsResult result = + rpsProgramBindNode(hRpslEntry, "test_mrt_with_array", &TestVkMrtViewportClear::DrawMrtWithArray, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(hRpslEntry, "blt_to_swapchain", &TestVkMrtViewportClear::DrawBlt, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(hRpslEntry, "draw_cube_to_swapchain", &TestVkMrtViewportClear::DrawBltCube, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode( + hRpslEntry, "test_bind_dsv_write_depth_stencil", &TestVkMrtViewportClear::BindDsvWriteDepthStencil, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(hRpslEntry, + "test_bind_dsv_read_depth_write_stencil", + &TestVkMrtViewportClear::BindDsvReadDepthWriteStencil, + this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode( + hRpslEntry, "test_bind_dsv_read_depth_stencil", &TestVkMrtViewportClear::BindDsvReadDepthStencil, this); + REQUIRE(result == RPS_OK); + } + + void Create5MrtNoDS(const RpsCmdCallbackContext* pContext) + { + if (m_psoMrt5NoDS == VK_NULL_HANDLE) + { + VkRenderPass rp; + RpsResult result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VSSimple", nullptr, L"PSMrt5", 5, false, rp, &m_psoMrt5NoDS); + } + } + + virtual void Draw5MrtNoDS(const RpsCmdCallbackContext* pContext) override final + { + Create5MrtNoDS(pContext); + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_psoMrt5NoDS); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + + void Create3MrtDS(const RpsCmdCallbackContext* pContext) + { + if (m_psoMrt3DS == VK_NULL_HANDLE) + { + VkRenderPass rp; + RpsResult result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VSSimple", nullptr, L"PSMrt3", 3, true, rp, &m_psoMrt3DS); + } + } + + virtual void Draw3MrtDS(const RpsCmdCallbackContext* pContext) override final + { + Create3MrtDS(pContext); + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_psoMrt3DS); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + + void CreateRtArray(const RpsCmdCallbackContext* pContext) + { + if (m_psoRtArray == VK_NULL_HANDLE) + { + VkRenderPass rp; + RpsResult result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VSRtArray", L"GSRtArray", L"PSRtArray", 1, false, rp, &m_psoRtArray); + } + } + + virtual void DrawRtArray(const RpsCmdCallbackContext* pContext) override final + { + CreateRtArray(pContext); + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_psoRtArray); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + + void CreateMrtWithArray(const RpsCmdCallbackContext* pContext) + { + if (m_psoRtArrayCube == VK_NULL_HANDLE) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + VkRenderPass rp; + result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VSRtArray", + L"GSRtArrayToCube", + L"PSRtArrayToCubeMRT", + rtInfo.numRenderTargets, + rtInfo.depthStencilFormat != RPS_FORMAT_UNKNOWN, + rp, + &m_psoRtArrayCube); + } + } + + void DrawMrtWithArray(const RpsCmdCallbackContext* pContext) override final + { + CreateMrtWithArray(pContext); + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkWriteDescriptorSet writeDescriptorSet[1] = {}; + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_descriptorSetLayout, 1, &ds)); + + RpsParameterDesc paramDesc; + RpsResult result = rpsCmdGetParamDesc(pContext, 2, ¶mDesc); + REQUIRE(result == RPS_OK); + REQUIRE(paramDesc.arraySize == 12); + + VkImageView imageViews[12]; + result = rpsVKGetCmdArgImageViewArray(pContext, 2, 0, imageViews, paramDesc.arraySize); + REQUIRE(result == RPS_OK); + + VkDescriptorImageInfo imageInfo[12] = {}; + for (uint32_t i = 0; i < _countof(imageInfo); i++) + { + imageInfo[i].sampler = VK_NULL_HANDLE; + imageInfo[i].imageView = imageViews[i]; + imageInfo[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } + AppendWriteDescriptorSetImages(&writeDescriptorSet[0], ds, 2, _countof(imageInfo), VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, imageInfo); + + vkUpdateDescriptorSets(m_device, _countof(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + + vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &ds, 0, nullptr); + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_psoRtArrayCube); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + + void CreateLargeArray(const RpsCmdCallbackContext* pContext) + { + } + + virtual void DrawLargeArray(const RpsCmdCallbackContext* pContext) override final + { + } + + void CreateBlt(const RpsCmdCallbackContext* pContext) + { + if (m_psoBlt == VK_NULL_HANDLE) + { + VkRenderPass rp; + RpsResult result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VSBlt", nullptr, L"PSBlt", 1, false, rp, &m_psoBlt); + } + } + + void DrawBlt(const RpsCmdCallbackContext* pContext, + RpsResourceAccessInfo resourceAccessInfo, + RpsVkImageViewInfo src, + const ViewportData* dstViewport) + { + CreateBlt(pContext); + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_descriptorSetLayout, 1, &ds)); + + RpsCmdViewportInfo viewportScissorInfo = {}; + RpsResult result = rpsCmdGetViewportInfo(pContext, &viewportScissorInfo); + REQUIRE(result == RPS_OK); + REQUIRE(viewportScissorInfo.numViewports == 1); + REQUIRE(dstViewport->data.x == viewportScissorInfo.pViewports[0].x); + REQUIRE(dstViewport->data.y == viewportScissorInfo.pViewports[0].y); + REQUIRE(dstViewport->data.z == viewportScissorInfo.pViewports[0].width); + REQUIRE(dstViewport->data.w == viewportScissorInfo.pViewports[0].height); + + if (m_frameCounter < (m_swapChainImages.size() * 2)) + { + RpsResourceDesc resourceDesc = {}; + RpsRuntimeResource rtResource; + RpsResourceAccessInfo accessInfo; + REQUIRE(RPS_SUCCEEDED(rpsCmdGetArgResourceDesc(pContext, 0, &resourceDesc))); + REQUIRE(RPS_SUCCEEDED(rpsCmdGetArgResourceDescArray(pContext, 1, 0, &resourceDesc, 1))); + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == rpsCmdGetArgResourceDescArray(pContext, 0, 1, &resourceDesc, 1)); + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == rpsCmdGetArgResourceDescArray(pContext, 1, 1, &resourceDesc, 1)); + REQUIRE(RPS_ERROR_TYPE_MISMATCH == rpsCmdGetArgResourceDesc(pContext, 2, &resourceDesc)); // Not a resource + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == rpsCmdGetArgResourceDescArray(pContext, 4, 1, &resourceDesc, 1)); + + REQUIRE(RPS_SUCCEEDED(rpsCmdGetArgRuntimeResource(pContext, 0, &rtResource))); + REQUIRE(RPS_SUCCEEDED(rpsCmdGetArgRuntimeResource(pContext, 1, &rtResource))); + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == rpsCmdGetArgRuntimeResourceArray(pContext, 0, 1, &rtResource, 1)); + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == rpsCmdGetArgRuntimeResourceArray(pContext, 1, 1, &rtResource, 1)); + REQUIRE(RPS_ERROR_TYPE_MISMATCH == + rpsCmdGetArgRuntimeResource(pContext, 2, &rtResource)); // Not a resource + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == rpsCmdGetArgRuntimeResourceArray(pContext, 4, 1, &rtResource, 1)); + + REQUIRE(RPS_SUCCEEDED(rpsCmdGetArgResourceAccessInfo(pContext, 0, &accessInfo))); + REQUIRE(accessInfo.access.accessFlags == (RPS_ACCESS_RENDER_TARGET_BIT | RPS_ACCESS_RENDER_PASS)); + REQUIRE(accessInfo.access.accessStages == RPS_SHADER_STAGE_NONE); + REQUIRE(accessInfo.range.baseArrayLayer == 0); + REQUIRE(accessInfo.range.arrayLayers == 1); + REQUIRE(accessInfo.range.baseMipLevel == 0); + REQUIRE(accessInfo.range.mipLevels == 1); + REQUIRE(accessInfo.viewFormat == RPS_FORMAT_B8G8R8A8_UNORM); + REQUIRE(resourceAccessInfo.access.accessFlags == accessInfo.access.accessFlags); + REQUIRE(resourceAccessInfo.access.accessStages == accessInfo.access.accessStages); + REQUIRE(resourceAccessInfo.range.baseArrayLayer == accessInfo.range.baseArrayLayer); + REQUIRE(resourceAccessInfo.range.arrayLayers == accessInfo.range.arrayLayers); + REQUIRE(resourceAccessInfo.range.baseMipLevel == accessInfo.range.baseMipLevel); + REQUIRE(resourceAccessInfo.range.mipLevels == accessInfo.range.mipLevels); + REQUIRE(resourceAccessInfo.viewFormat == accessInfo.viewFormat); + + REQUIRE(RPS_SUCCEEDED(rpsCmdGetArgResourceAccessInfo(pContext, 1, &accessInfo))); + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == + rpsCmdGetArgResourceAccessInfoArray(pContext, 0, 1, &accessInfo, 1)); + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == + rpsCmdGetArgResourceAccessInfoArray(pContext, 1, 1, &accessInfo, 1)); + REQUIRE(RPS_ERROR_TYPE_MISMATCH == + rpsCmdGetArgResourceAccessInfo(pContext, 2, &accessInfo)); // Not a resource + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == + rpsCmdGetArgResourceAccessInfoArray(pContext, 4, 1, &accessInfo, 1)); + + + VkImage hImage = VK_NULL_HANDLE; + VkImageView hImageView = VK_NULL_HANDLE; + VkBuffer hBuffer = VK_NULL_HANDLE; + VkBufferView hBufferView = VK_NULL_HANDLE; + + REQUIRE((RPS_SUCCEEDED(rpsVKGetCmdArgImage(pContext, 0, &hImage)) && (hImage != VK_NULL_HANDLE))); + hImage = VK_NULL_HANDLE; + REQUIRE( + (RPS_SUCCEEDED(rpsVKGetCmdArgImageArray(pContext, 0, 0, &hImage, 1)) && (hImage != VK_NULL_HANDLE))); + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == rpsVKGetCmdArgImageArray(pContext, 0, 1, &hImage, 1)); + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == rpsVKGetCmdArgImageArray(pContext, 0, 0, &hImage, 2)); + + REQUIRE( + (RPS_SUCCEEDED(rpsVKGetCmdArgImageView(pContext, 0, &hImageView)) && (hImageView != VK_NULL_HANDLE))); + hImageView = VK_NULL_HANDLE; + REQUIRE((RPS_SUCCEEDED(rpsVKGetCmdArgImageViewArray(pContext, 0, 0, &hImageView, 1)) && + (hImageView != VK_NULL_HANDLE))); + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == rpsVKGetCmdArgImageViewArray(pContext, 0, 1, &hImageView, 1)); + REQUIRE(RPS_ERROR_INDEX_OUT_OF_BOUNDS == rpsVKGetCmdArgImageViewArray(pContext, 0, 0, &hImageView, 2)); + + REQUIRE(RPS_ERROR_TYPE_MISMATCH == rpsVKGetCmdArgBuffer(pContext, 0, &hBuffer)); + REQUIRE(RPS_ERROR_TYPE_MISMATCH == rpsVKGetCmdArgBufferArray(pContext, 0, 0, &hBuffer, 1)); + REQUIRE(RPS_ERROR_TYPE_MISMATCH == rpsVKGetCmdArgBufferView(pContext, 0, &hBufferView)); + REQUIRE(RPS_ERROR_TYPE_MISMATCH == rpsVKGetCmdArgBufferViewArray(pContext, 0, 0, &hBufferView, 1)); + + REQUIRE(RPS_ERROR_TYPE_MISMATCH == rpsVKGetCmdArgBuffer(pContext, 1, &hBuffer)); + REQUIRE(RPS_ERROR_TYPE_MISMATCH == rpsVKGetCmdArgBufferView(pContext, 1, &hBufferView)); + + hImage = VK_NULL_HANDLE; + hImageView = VK_NULL_HANDLE; + REQUIRE((RPS_SUCCEEDED(rpsVKGetCmdArgImage(pContext, 1, &hImage)) && (hImage != VK_NULL_HANDLE))); + REQUIRE(RPS_ERROR_TYPE_MISMATCH == rpsVKGetCmdArgImage(pContext, 2, &hImage)); + } + + VkWriteDescriptorSet writeDescriptorSet[1] = {}; + + VkDescriptorImageInfo imageInfo = {VK_NULL_HANDLE, src.hImageView, src.layout}; + AppendWriteDescriptorSetImages(&writeDescriptorSet[0], ds, 1, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &imageInfo); + + vkUpdateDescriptorSets(m_device, _countof(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + + vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &ds, 0, nullptr); + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_psoBlt); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + + void CreateBltCube(const RpsCmdCallbackContext* pContext) + { + if (!m_psoBltCube) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + VkRenderPass rp; + result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VSBlt", nullptr, L"PSBltCube", rtInfo.numRenderTargets, rtInfo.depthStencilFormat != RPS_FORMAT_UNKNOWN, rp, &m_psoBltCube); + } + } + + void DrawBltCube(const RpsCmdCallbackContext* pContext, + rps::UnusedArg dst, + VkImageView src, + const ViewportData& dstViewport) + { + CreateBltCube(pContext); + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_descriptorSetLayout, 1, &ds)); + + VkWriteDescriptorSet writeDescriptorSet[1] = {}; + + VkDescriptorImageInfo imageInfo = {VK_NULL_HANDLE, src, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + AppendWriteDescriptorSetImages(&writeDescriptorSet[0], ds, 1, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &imageInfo); + + vkUpdateDescriptorSets(m_device, _countof(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + + vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &ds, 0, nullptr); + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_psoBltCube); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + + void BindDsvWriteDepthStencil(const RpsCmdCallbackContext* pContext) + { + if (!m_psoWriteDepthStencil) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + VkRenderPass rp; + result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + VkPipelineDepthStencilStateCreateInfo dsStateInfo; + dsStateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + dsStateInfo.pNext = NULL; + dsStateInfo.flags = 0; + dsStateInfo.depthTestEnable = VK_TRUE; + dsStateInfo.depthWriteEnable = VK_TRUE; + dsStateInfo.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + dsStateInfo.depthBoundsTestEnable = VK_FALSE; + dsStateInfo.stencilTestEnable = VK_TRUE; + dsStateInfo.back.failOp = VK_STENCIL_OP_KEEP; + dsStateInfo.back.passOp = VK_STENCIL_OP_REPLACE; + dsStateInfo.back.compareOp = VK_COMPARE_OP_ALWAYS; + dsStateInfo.back.compareMask = 0; + dsStateInfo.back.reference = 0; + dsStateInfo.back.depthFailOp = VK_STENCIL_OP_KEEP; + dsStateInfo.back.writeMask = 0xff; + dsStateInfo.minDepthBounds = 0; + dsStateInfo.maxDepthBounds = 0; + dsStateInfo.front = dsStateInfo.back; + + CreatePSO(L"VSSimple", + nullptr, + L"PSWriteDepthStencil", + rtInfo.numRenderTargets, + rtInfo.depthStencilFormat != RPS_FORMAT_UNKNOWN, + rp, + &m_psoWriteDepthStencil, + &dsStateInfo); + } + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_psoWriteDepthStencil); + + uint32_t drawId = 0; + vkCmdPushConstants( + cmdBuf, m_pipelineLayout, VK_SHADER_STAGE_FRAGMENT_BIT, PushConstOffsetDrawId, sizeof(drawId), &drawId); + vkCmdSetStencilReference(cmdBuf, VK_STENCIL_FACE_FRONT_AND_BACK, 0x1); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + + drawId = 1; + vkCmdPushConstants( + cmdBuf, m_pipelineLayout, VK_SHADER_STAGE_FRAGMENT_BIT, PushConstOffsetDrawId, sizeof(drawId), &drawId); + vkCmdSetStencilReference(cmdBuf, VK_STENCIL_FACE_FRONT_AND_BACK, 0x2); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + + void BindDsvReadDepthWriteStencil(const RpsCmdCallbackContext* pContext, RpsVkImageViewInfo depthSrv) + { + if (!m_psoReadDepthWriteStencil) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + VkRenderPass rp; + result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + VkPipelineDepthStencilStateCreateInfo dsStateInfo; + dsStateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + dsStateInfo.pNext = NULL; + dsStateInfo.flags = 0; + dsStateInfo.depthTestEnable = VK_TRUE; + dsStateInfo.depthWriteEnable = VK_FALSE; + dsStateInfo.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + dsStateInfo.depthBoundsTestEnable = VK_FALSE; + dsStateInfo.stencilTestEnable = VK_TRUE; + dsStateInfo.back.failOp = VK_STENCIL_OP_KEEP; + dsStateInfo.back.passOp = VK_STENCIL_OP_INCREMENT_AND_WRAP; + dsStateInfo.back.compareOp = VK_COMPARE_OP_EQUAL; + dsStateInfo.back.compareMask = 0xff; + dsStateInfo.back.reference = 0x2; + dsStateInfo.back.depthFailOp = VK_STENCIL_OP_KEEP; + dsStateInfo.back.writeMask = 0xff; + dsStateInfo.minDepthBounds = 0; + dsStateInfo.maxDepthBounds = 0; + dsStateInfo.front = dsStateInfo.back; + + CreatePSO(L"VSSimpleFlatDepth", + nullptr, + L"PSReadDepthWriteStencil", + rtInfo.numRenderTargets, + rtInfo.depthStencilFormat != RPS_FORMAT_UNKNOWN, + rp, + &m_psoReadDepthWriteStencil, + &dsStateInfo); + } + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_descriptorSetLayout, 1, &ds)); + + VkWriteDescriptorSet writeDescriptorSet[1] = {}; + + VkDescriptorImageInfo imageInfo = {VK_NULL_HANDLE, depthSrv.hImageView, depthSrv.layout}; + AppendWriteDescriptorSetImages(&writeDescriptorSet[0], ds, 1, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &imageInfo); + + vkUpdateDescriptorSets(m_device, _countof(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &ds, 0, nullptr); + + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_psoReadDepthWriteStencil); + float depth = 0.25f; + vkCmdPushConstants( + cmdBuf, m_pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, PushConstOffsetFlagDepth, sizeof(depth), &depth); + vkCmdSetStencilReference(cmdBuf, VK_STENCIL_FACE_FRONT_AND_BACK, 0x2); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + + void BindDsvReadDepthStencil(const RpsCmdCallbackContext* pContext, + RpsVkImageViewInfo depthSrv, + RpsVkImageViewInfo stencilSrv) + { + if (!m_psoReadDepthStencil) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + VkRenderPass rp; + result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + VkPipelineDepthStencilStateCreateInfo dsStateInfo; + dsStateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + dsStateInfo.pNext = NULL; + dsStateInfo.flags = 0; + dsStateInfo.depthTestEnable = VK_TRUE; + dsStateInfo.depthWriteEnable = VK_FALSE; + dsStateInfo.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + dsStateInfo.depthBoundsTestEnable = VK_FALSE; + dsStateInfo.stencilTestEnable = VK_TRUE; + dsStateInfo.back.failOp = VK_STENCIL_OP_KEEP; + dsStateInfo.back.passOp = VK_STENCIL_OP_KEEP; + dsStateInfo.back.compareOp = VK_COMPARE_OP_EQUAL; + dsStateInfo.back.compareMask = 0xff; + dsStateInfo.back.reference = 0x3; + dsStateInfo.back.depthFailOp = VK_STENCIL_OP_KEEP; + dsStateInfo.back.writeMask = 0x0; + dsStateInfo.minDepthBounds = 0; + dsStateInfo.maxDepthBounds = 0; + dsStateInfo.front = dsStateInfo.back; + + CreatePSO(L"VSSimpleFlatDepth", + nullptr, + L"PSReadDepthStencil", + rtInfo.numRenderTargets, + rtInfo.depthStencilFormat != RPS_FORMAT_UNKNOWN, + rp, + &m_psoReadDepthStencil, + &dsStateInfo); + } + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_descriptorSetLayout, 1, &ds)); + + VkWriteDescriptorSet writeDescriptorSet[2] = {}; + + VkDescriptorImageInfo depthImageInfo = {VK_NULL_HANDLE, depthSrv.hImageView, depthSrv.layout}; + VkDescriptorImageInfo stencilImageInfo = {VK_NULL_HANDLE, stencilSrv.hImageView, stencilSrv.layout}; + AppendWriteDescriptorSetImages( + &writeDescriptorSet[0], ds, 1, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &depthImageInfo); + AppendWriteDescriptorSetImages( + &writeDescriptorSet[1], ds, 3, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &stencilImageInfo); + + vkUpdateDescriptorSets(m_device, _countof(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + + vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &ds, 0, nullptr); + + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_psoReadDepthStencil); + const float depth = 0.5f; + vkCmdPushConstants( + cmdBuf, m_pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, PushConstOffsetFlagDepth, sizeof(depth), &depth); + vkCmdSetStencilReference(cmdBuf, VK_STENCIL_FACE_FRONT_AND_BACK, 0x3); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + +private: + void LoadAssets() + { + OnPostResize(); + + VkSamplerCreateInfo samplerCI = {VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO}; + samplerCI.magFilter = VK_FILTER_LINEAR; + samplerCI.minFilter = VK_FILTER_LINEAR; + samplerCI.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + samplerCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCI.mipLodBias = 0.0f; + samplerCI.compareOp = VK_COMPARE_OP_NEVER; + samplerCI.minLod = 0.0f; + samplerCI.maxLod = FLT_MAX; + samplerCI.maxAnisotropy = 1.0; + samplerCI.anisotropyEnable = VK_FALSE; + samplerCI.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + + ThrowIfFailedVK(vkCreateSampler(m_device, &samplerCI, nullptr, &m_sampler)); + + VkDescriptorSetLayoutBinding sharedLayoutBindings[4] = {}; + sharedLayoutBindings[0].binding = 1; + sharedLayoutBindings[0].descriptorCount = 1; + sharedLayoutBindings[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + sharedLayoutBindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[1].binding = 2; + sharedLayoutBindings[1].descriptorCount = 12; + sharedLayoutBindings[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + sharedLayoutBindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[2].binding = 0; + sharedLayoutBindings[2].descriptorCount = 1; + sharedLayoutBindings[2].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + sharedLayoutBindings[2].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[2].pImmutableSamplers = &m_sampler; + sharedLayoutBindings[3].binding = 3; + sharedLayoutBindings[3].descriptorCount = 1; + sharedLayoutBindings[3].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + sharedLayoutBindings[3].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + + VkDescriptorSetLayoutCreateInfo setLayoutCI = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + setLayoutCI.pBindings = sharedLayoutBindings; + setLayoutCI.bindingCount = _countof(sharedLayoutBindings); + + ThrowIfFailedVK(vkCreateDescriptorSetLayout(m_device, &setLayoutCI, nullptr, &m_descriptorSetLayout)); + + VkPushConstantRange pushConstRanges[] = {{VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4}, + {VK_SHADER_STAGE_VERTEX_BIT, 4, 8}}; + + VkPipelineLayoutCreateInfo plCI = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + plCI.setLayoutCount = 1; + plCI.pSetLayouts = &m_descriptorSetLayout; + plCI.pushConstantRangeCount = RPS_TEST_COUNTOF(pushConstRanges); + plCI.pPushConstantRanges = pushConstRanges; + + ThrowIfFailedVK(vkCreatePipelineLayout(m_device, &plCI, nullptr, &m_pipelineLayout)); + } + + void CreatePSO(const WCHAR* vsEntry, + const WCHAR* gsEntry, + const WCHAR* psEntry, + uint32_t numColorAttachments, + bool bDepth, + VkRenderPass renderPass, + VkPipeline* pPso, + const VkPipelineDepthStencilStateCreateInfo* pCustomDSInfo = nullptr) + { + VkPipelineVertexInputStateCreateInfo vi = {}; + vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vi.pNext = NULL; + vi.flags = 0; + vi.vertexBindingDescriptionCount = 0; + vi.pVertexBindingDescriptions = nullptr; + vi.vertexAttributeDescriptionCount = 0; + vi.pVertexAttributeDescriptions = nullptr; + + // input assembly state + // + VkPipelineInputAssemblyStateCreateInfo ia; + ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + ia.pNext = NULL; + ia.flags = 0; + ia.primitiveRestartEnable = VK_FALSE; + ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + // rasterizer state + VkPipelineRasterizationStateCreateInfo rs; + rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rs.pNext = NULL; + rs.flags = 0; + rs.polygonMode = VK_POLYGON_MODE_FILL; + rs.cullMode = VK_CULL_MODE_NONE; + rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rs.depthClampEnable = VK_FALSE; + rs.rasterizerDiscardEnable = VK_FALSE; + rs.depthBiasEnable = VK_FALSE; + rs.depthBiasConstantFactor = 0; + rs.depthBiasClamp = 0; + rs.depthBiasSlopeFactor = 0; + rs.lineWidth = 1.0f; + + VkPipelineColorBlendAttachmentState bs[8] = {}; + bs[0].blendEnable = VK_FALSE; + bs[0].srcColorBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].colorBlendOp = VK_BLEND_OP_ADD; + bs[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].alphaBlendOp = VK_BLEND_OP_ADD; + bs[0].colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + for (uint32_t i = 1; i < numColorAttachments; i++) + { + bs[i] = bs[0]; + } + + // Color blend state + VkPipelineColorBlendStateCreateInfo cb; + cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + cb.flags = 0; + cb.pNext = NULL; + cb.attachmentCount = numColorAttachments; + cb.pAttachments = bs; + cb.logicOpEnable = VK_FALSE; + cb.logicOp = VK_LOGIC_OP_NO_OP; + cb.blendConstants[0] = 1.0f; + cb.blendConstants[1] = 1.0f; + cb.blendConstants[2] = 1.0f; + cb.blendConstants[3] = 1.0f; + + VkDynamicState dynamicStateEnables[] = { + VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_STENCIL_REFERENCE}; + VkPipelineDynamicStateCreateInfo dynamicState = {}; + dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamicState.pNext = NULL; + dynamicState.pDynamicStates = dynamicStateEnables; + dynamicState.dynamicStateCount = RPS_TEST_COUNTOF(dynamicStateEnables); + + // view port state + + VkPipelineViewportStateCreateInfo vp = {}; + vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + vp.pNext = NULL; + vp.flags = 0; + vp.viewportCount = 1; + vp.scissorCount = 1; + vp.pScissors = NULL; + vp.pViewports = NULL; + + // depth stencil state + + VkPipelineDepthStencilStateCreateInfo ds; + ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + ds.pNext = NULL; + ds.flags = 0; + ds.depthTestEnable = bDepth ? VK_TRUE : VK_FALSE; + ds.depthWriteEnable = bDepth ? VK_TRUE : VK_FALSE; + ds.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + ds.depthBoundsTestEnable = VK_FALSE; + ds.stencilTestEnable = VK_FALSE; + ds.back.failOp = VK_STENCIL_OP_KEEP; + ds.back.passOp = VK_STENCIL_OP_KEEP; + ds.back.compareOp = VK_COMPARE_OP_ALWAYS; + ds.back.compareMask = 0; + ds.back.reference = 0; + ds.back.depthFailOp = VK_STENCIL_OP_KEEP; + ds.back.writeMask = 0; + ds.minDepthBounds = 0; + ds.maxDepthBounds = 0; + ds.stencilTestEnable = VK_FALSE; + ds.front = ds.back; + + // multi sample state + + VkPipelineMultisampleStateCreateInfo ms; + ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + ms.pNext = NULL; + ms.flags = 0; + ms.pSampleMask = NULL; + ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + ms.sampleShadingEnable = VK_FALSE; + ms.alphaToCoverageEnable = VK_FALSE; + ms.alphaToOneEnable = VK_FALSE; + ms.minSampleShading = 0.0; + + VkShaderModule vsModule = VK_NULL_HANDLE, gsModule = VK_NULL_HANDLE, psModule = VK_NULL_HANDLE; + std::vector vsCode, gsCode, psCode; + + const DxcDefine defs[] = {{L"VULKAN", L"1"}}; + + DxcCompileToSpirv(c_Shader, vsEntry, L"vs_6_0", L"", defs, 1, vsCode); + DxcCompileToSpirv(c_Shader, psEntry, L"ps_6_0", L"", defs, 1, psCode); + + if (gsEntry) + { + DxcCompileToSpirv(c_Shader, gsEntry, L"gs_6_0", L"", defs, 1, gsCode); + } + + VkShaderModuleCreateInfo smCI = {}; + smCI.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + + smCI.pCode = reinterpret_cast(vsCode.data()); + smCI.codeSize = vsCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &vsModule)); + + smCI.pCode = reinterpret_cast(psCode.data()); + smCI.codeSize = psCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &psModule)); + + char vsName[128]; + char psName[128]; + sprintf_s(vsName, "%S", vsEntry); + sprintf_s(psName, "%S", psEntry); + + VkPipelineShaderStageCreateInfo shaderStages[3] = {}; + shaderStages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[0].module = vsModule; + shaderStages[0].pName = vsName; + shaderStages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + shaderStages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[1].module = psModule; + shaderStages[1].pName = psName; + shaderStages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + + uint32_t numShaderStages = 2; + + if (gsEntry) + { + smCI.pCode = reinterpret_cast(gsCode.data()); + smCI.codeSize = gsCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &gsModule)); + + char gsName[128]; + sprintf_s(gsName, "%S", gsEntry); + + shaderStages[2].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[2].module = gsModule; + shaderStages[2].pName = gsName; + shaderStages[2].stage = VK_SHADER_STAGE_GEOMETRY_BIT; + + numShaderStages = 3; + } + + VkGraphicsPipelineCreateInfo psoCI = {}; + psoCI.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + psoCI.pNext = NULL; + psoCI.layout = m_pipelineLayout; + psoCI.basePipelineHandle = VK_NULL_HANDLE; + psoCI.basePipelineIndex = 0; + psoCI.flags = 0; + psoCI.pVertexInputState = &vi; + psoCI.pInputAssemblyState = &ia; + psoCI.pRasterizationState = &rs; + psoCI.pColorBlendState = &cb; + psoCI.pTessellationState = NULL; + psoCI.pMultisampleState = &ms; + psoCI.pDynamicState = &dynamicState; + psoCI.pViewportState = &vp; + psoCI.pDepthStencilState = pCustomDSInfo ? pCustomDSInfo : &ds; + psoCI.pStages = shaderStages; + psoCI.stageCount = numShaderStages; + psoCI.renderPass = renderPass; + psoCI.subpass = 0; + + ThrowIfFailedVK(vkCreateGraphicsPipelines(m_device, VK_NULL_HANDLE, 1, &psoCI, nullptr, pPso)); + + vkDestroyShaderModule(m_device, vsModule, nullptr); + vkDestroyShaderModule(m_device, psModule, nullptr); + if (gsModule != RPS_NULL_HANDLE) + { + vkDestroyShaderModule(m_device, gsModule, nullptr); + } + } + +private: + VkDescriptorSetLayout m_descriptorSetLayout = VK_NULL_HANDLE; + VkPipelineLayout m_pipelineLayout = VK_NULL_HANDLE; + VkPipeline m_psoMrt5NoDS = VK_NULL_HANDLE; + VkPipeline m_psoMrt3DS = VK_NULL_HANDLE; + VkPipeline m_psoRtArray = VK_NULL_HANDLE; + VkPipeline m_psoRtArrayCube = VK_NULL_HANDLE; + VkPipeline m_psoBlt = VK_NULL_HANDLE; + VkPipeline m_psoBltCube = VK_NULL_HANDLE; + VkPipeline m_psoWriteDepthStencil = VK_NULL_HANDLE; + VkPipeline m_psoReadDepthWriteStencil = VK_NULL_HANDLE; + VkPipeline m_psoReadDepthStencil = VK_NULL_HANDLE; + VkSampler m_sampler = VK_NULL_HANDLE; +}; + +TEST_CASE(TEST_APP_NAME) +{ + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#if defined(BREAK_AT_ALLOC_ID) + _CrtSetBreakAlloc(BREAK_AT_ALLOC_ID); +#endif + + TestVkMrtViewportClear renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_multi_queue.rpsl b/tests/gui/test_multi_queue.rpsl new file mode 100644 index 0000000..13071b9 --- /dev/null +++ b/tests/gui/test_multi_queue.rpsl @@ -0,0 +1,89 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +node UpdateInstanceData([writeonly(cpu)] buffer uploadBuffer, [writeonly(cpu)] buffer constBuffer); +compute node Procedural([readwrite(cs)] texture proceduralTexture, [readonly(cb, cs)] buffer constBuffer, uint2 outputDim); +compute node GenMip([writeonly(cs)] texture outMip, [readwrite(cs)] texture inMip, uint2 outputDim); +node ShadowMap([readonly(vs)] buffer instanceBuffer, [readonly(cb, vs)] buffer constBuffer, dsv shadowMap : SV_DepthStencil); +node ShadingPass(rtv colorBuffer : SV_Target0, + dsv depthBuffer : SV_DepthStencil, + [readonly(vs)] buffer instanceBuffer, + [readonly(ps)] texture shadowMap, + [readonly(ps)] texture proceduralTexture, + [readonly(cb, vs, ps)] buffer constBuffer); + +node UploadInstanceData([writeonly(copy)] buffer instanceBuffer, [readonly(copy)] buffer uploadBuffer) +{ + return copy_buffer(instanceBuffer, uploadBuffer); +} + +struct InstanceData +{ + float4x3 transform; + float4 color; +}; + +struct CBufferData +{ + float4x4 viewProjMat; + float4x4 lightViewProjMat; + float3 lightDir; + float timeInSeconds; +}; + +export void main([readonly(present)] texture backBuffer, uint numBoxes, uint shadowMapDim, uint proceduralTextureDim) +{ + clear(backBuffer, float4(0.0, 0.2, 0.4, 1.0)); + + ResourceDesc backBufferDesc = backBuffer.desc(); + + buffer uploadBuffer = create_buffer( + sizeof(InstanceData) * numBoxes, backBufferDesc.TemporalLayers, RPS_RESOURCE_FLAG_PREFER_GPU_LOCAL_CPU_VISIBLE); + + buffer constantBuffer = create_buffer( + sizeof(CBufferData), backBufferDesc.TemporalLayers, RPS_RESOURCE_FLAG_PREFER_GPU_LOCAL_CPU_VISIBLE); + + buffer instanceBuffer = create_buffer(sizeof(InstanceData) * numBoxes); + + // setting buffer view size to UINT64_MAX for RPS_BUFFER_WHOLE_SIZE test. + buffer constantBufferView = constantBuffer.bytes(0, 0xffffffffffffffff); + buffer instanceBufferView = instanceBuffer.bytes(0, 0xffffffffffffffff); + + texture proceduralTexture = create_tex2d(RPS_FORMAT_R8G8B8A8_UNORM, proceduralTextureDim, proceduralTextureDim, 0); + + texture depthBuffer = create_tex2d(RPS_FORMAT_R32G8X24_TYPELESS, backBufferDesc.Width, backBufferDesc.Height); + + texture shadowMap = create_tex2d(RPS_FORMAT_R32_TYPELESS, shadowMapDim, shadowMapDim); + + UpdateInstanceData(uploadBuffer, constantBufferView); + + async UploadInstanceData(instanceBufferView, uploadBuffer); + + ResourceDesc proceduralTextureDesc = proceduralTexture.desc(); + + async Procedural(proceduralTexture, constantBufferView, uint2(proceduralTextureDim, proceduralTextureDim)); + + for (uint i = 0; (i + 1) < proceduralTextureDesc.MipLevels; i++) + { + async GenMip(proceduralTexture.mips(i + 1), + proceduralTexture.mips(i), + uint2(proceduralTextureDim, proceduralTextureDim) >> (i + 1)); + } + + clear_depth(shadowMap.format(RPS_FORMAT_D32_FLOAT), 1.0f); + + ShadowMap(instanceBufferView.stride(sizeof(InstanceData)), constantBufferView, shadowMap.format(RPS_FORMAT_D32_FLOAT)); + + clear(depthBuffer.format(RPS_FORMAT_D32_FLOAT_S8X24_UINT), 1.0f, 0); + + ShadingPass(backBuffer, + depthBuffer.format(RPS_FORMAT_D32_FLOAT_S8X24_UINT), + instanceBufferView.stride(sizeof(InstanceData)), + shadowMap.format(RPS_FORMAT_R32_FLOAT), + proceduralTexture, + constantBufferView); +} diff --git a/tests/gui/test_multi_queue_d3d12.cpp b/tests/gui/test_multi_queue_d3d12.cpp new file mode 100644 index 0000000..4856bba --- /dev/null +++ b/tests/gui/test_multi_queue_d3d12.cpp @@ -0,0 +1,296 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "test_multi_queue_shared.h" + +#include "rps/runtime/d3d12/rps_d3d12_runtime.h" +#include "utils/rps_test_win32.h" +#include "utils/rps_test_d3d12_renderer.h" + +using namespace DirectX; + +class TestD3D12MultiQueue : public RpsTestD3D12Renderer, public TestRpsMultiQueue +{ +public: + TestD3D12MultiQueue() + { + } + +protected: + virtual void OnInit(ID3D12GraphicsCommandList* pInitCmdList, + std::vector>& tempResources) override + { + LoadAssets(pInitCmdList, tempResources); + + TestRpsMultiQueue::Init(rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); })); + + auto hMainEntry = rpsRenderGraphGetMainEntry(GetRpsRenderGraph()); + REQUIRE_RPS_OK( + rpsProgramBindNode(hMainEntry, "UpdateInstanceData", &TestD3D12MultiQueue::UpdateInstanceData, this)); + REQUIRE_RPS_OK(rpsProgramBindNode(hMainEntry, "Procedural", &TestD3D12MultiQueue::Procedural, this)); + REQUIRE_RPS_OK(rpsProgramBindNode(hMainEntry, "GenMip", &TestD3D12MultiQueue::GenMip, this)); + REQUIRE_RPS_OK(rpsProgramBindNode(hMainEntry, "ShadowMap", &TestD3D12MultiQueue::ShadowMap, this)); + REQUIRE_RPS_OK(rpsProgramBindNode(hMainEntry, "ShadingPass", &TestD3D12MultiQueue::ShadingPass, this)); + } + + virtual void OnCleanUp() override + { + TestRpsMultiQueue::OnDestroy(); + + m_rootSigCompute = nullptr; + m_rootSigGfx = nullptr; + m_pipelineStateProcedural = nullptr; + m_pipelineStateMipGen = nullptr; + m_pipelineStateShadowMap = nullptr; + m_pipelineStateShading = nullptr; + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + Animate(XMUINT2(m_width, m_height)); + + UpdatePipeline(frameIndex, CalcGuaranteedCompletedFrameIndexForRps()); + } + + virtual void OnRender(uint32_t frameIndex) override + { + ExecuteRenderGraph(frameIndex, GetRpsRenderGraph()); + } + +private: + void UpdateInstanceData(const RpsCmdCallbackContext* pContext, + ID3D12Resource* pUploadBuffer, + ID3D12Resource* pConstantBuffer) + { + constexpr D3D12_RANGE emptyReadRange = {0, 0}; + void* pData = nullptr; + + if (SUCCEEDED(pUploadBuffer->Map(0, &emptyReadRange, &pData))) + { + size_t sizeToCopy = + std::min(size_t(pUploadBuffer->GetDesc().Width), m_instanceDataGpu.size() * sizeof(InstanceDataGPU)); + + memcpy(pData, m_instanceDataGpu.data(), sizeToCopy); + + pUploadBuffer->Unmap(0, nullptr); + } + + if (SUCCEEDED(pConstantBuffer->Map(0, &emptyReadRange, &pData))) + { + memcpy(pData, &m_cbufferData, sizeof(m_cbufferData)); + + pConstantBuffer->Unmap(0, nullptr); + } + } + + void Procedural(const RpsCmdCallbackContext* pContext, + D3D12_CPU_DESCRIPTOR_HANDLE proceduralTextureUav, + ID3D12Resource* pConstantBuffer, + const XMUINT2& outputDim) + { + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + D3D12_CPU_DESCRIPTOR_HANDLE uavHdls[2] = {proceduralTextureUav, proceduralTextureUav}; + D3D12_GPU_DESCRIPTOR_HANDLE uavTable = + AllocDynamicDescriptorsAndWrite(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, uavHdls, 2); + + BindDescriptorHeaps(pCmdList); + pCmdList->SetComputeRootSignature(m_rootSigCompute.Get()); + + pCmdList->SetPipelineState(m_pipelineStateProcedural.Get()); + pCmdList->SetComputeRootConstantBufferView(0, pConstantBuffer->GetGPUVirtualAddress()); + pCmdList->SetComputeRootDescriptorTable(1, uavTable); + pCmdList->Dispatch(DivRoundUp(outputDim.x, 8), DivRoundUp(outputDim.y, 8), 1); + } + + void GenMip(const RpsCmdCallbackContext* pContext, + D3D12_CPU_DESCRIPTOR_HANDLE outMip, + D3D12_CPU_DESCRIPTOR_HANDLE inMip, + const XMUINT2& outputDim) + { + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + D3D12_CPU_DESCRIPTOR_HANDLE uavHdls[2] = {outMip, inMip}; + D3D12_GPU_DESCRIPTOR_HANDLE uavTable = + AllocDynamicDescriptorsAndWrite(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, uavHdls, 2); + + BindDescriptorHeaps(pCmdList); + pCmdList->SetComputeRootSignature(m_rootSigCompute.Get()); + + pCmdList->SetPipelineState(m_pipelineStateMipGen.Get()); + pCmdList->SetComputeRootDescriptorTable(1, uavTable); + pCmdList->Dispatch(DivRoundUp(outputDim.x, 8), DivRoundUp(outputDim.y, 8), 1); + } + + void ShadowMap(const RpsCmdCallbackContext* pContext, + D3D12_CPU_DESCRIPTOR_HANDLE instanceBuffer, + ID3D12Resource* pConstBuffer) + { + if (!m_pipelineStateShadowMap) + { + RpsCmdRenderTargetInfo renderTargetInfo; + REQUIRE_RPS_OK(rpsCmdGetRenderTargetsInfo(pContext, &renderTargetInfo)); + CreateGfxPSO(L"VSShadow", nullptr, &renderTargetInfo, &m_pipelineStateShadowMap); + } + + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + D3D12_GPU_DESCRIPTOR_HANDLE srvTable = + AllocDynamicDescriptorsAndWrite(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, &instanceBuffer, 1); + + BindDescriptorHeaps(pCmdList); + pCmdList->SetGraphicsRootSignature(m_rootSigGfx.Get()); + + pCmdList->SetPipelineState(m_pipelineStateShadowMap.Get()); + pCmdList->SetGraphicsRootConstantBufferView(0, pConstBuffer->GetGPUVirtualAddress()); + pCmdList->SetGraphicsRootDescriptorTable(1, srvTable); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->DrawInstanced(36, uint32_t(m_instanceData.size()), 0, 0); + } + + void ShadingPass(const RpsCmdCallbackContext* pContext, + rps::UnusedArg colorBuffer, + rps::UnusedArg depthBuffer, + D3D12_CPU_DESCRIPTOR_HANDLE instanceBuffer, + D3D12_CPU_DESCRIPTOR_HANDLE shadowMap, + D3D12_CPU_DESCRIPTOR_HANDLE proceduralTexture, + ID3D12Resource* pConstBuffer) + { + if (!m_pipelineStateShading) + { + RpsCmdRenderTargetInfo renderTargetInfo; + REQUIRE_RPS_OK(rpsCmdGetRenderTargetsInfo(pContext, &renderTargetInfo)); + CreateGfxPSO(L"VSShading", L"PSShading", &renderTargetInfo, &m_pipelineStateShading); + } + + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + D3D12_GPU_DESCRIPTOR_HANDLE srvTable = + AllocDynamicDescriptorsAndWrite(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, &instanceBuffer, 1); + + D3D12_CPU_DESCRIPTOR_HANDLE srcSRVs[] = {shadowMap, proceduralTexture}; + D3D12_GPU_DESCRIPTOR_HANDLE srvTablePS = + AllocDynamicDescriptorsAndWrite(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, srcSRVs, 2); + + BindDescriptorHeaps(pCmdList); + pCmdList->SetGraphicsRootSignature(m_rootSigGfx.Get()); + + pCmdList->SetPipelineState(m_pipelineStateShading.Get()); + pCmdList->SetGraphicsRootConstantBufferView(0, pConstBuffer->GetGPUVirtualAddress()); + pCmdList->SetGraphicsRootDescriptorTable(1, srvTable); + pCmdList->SetGraphicsRootDescriptorTable(2, srvTablePS); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->DrawInstanced(36, uint32_t(m_instanceData.size()), 0, 0); + } + +private: + void LoadAssets(ID3D12GraphicsCommandList* pInitCmdList, std::vector>& tempResources) + { + OnPostResize(); + + CreateComputePSOs(); + } + + void CreateComputePSOs() + { + std::vector csCode; + + DxcCompile(c_Shader, L"CSProcedural", L"cs_6_0", L"", nullptr, 0, csCode); + ThrowIfFailed(m_device->CreateRootSignature(0, csCode.data(), csCode.size(), IID_PPV_ARGS(&m_rootSigCompute))); + + D3D12_COMPUTE_PIPELINE_STATE_DESC compPsoDesc = {}; + + compPsoDesc.pRootSignature = m_rootSigCompute.Get(); + compPsoDesc.CS.pShaderBytecode = csCode.data(); + compPsoDesc.CS.BytecodeLength = csCode.size(); + + ThrowIfFailed(m_device->CreateComputePipelineState(&compPsoDesc, IID_PPV_ARGS(&m_pipelineStateProcedural))); + + DxcCompile(c_Shader, L"CSMipGen", L"cs_6_0", L"", nullptr, 0, csCode); + + compPsoDesc.CS.pShaderBytecode = csCode.data(); + compPsoDesc.CS.BytecodeLength = csCode.size(); + + ThrowIfFailed(m_device->CreateComputePipelineState(&compPsoDesc, IID_PPV_ARGS(&m_pipelineStateMipGen))); + } + + void CreateGfxPSO(LPCWSTR vsEntry, + LPCWSTR psEntry, + const RpsCmdRenderTargetInfo* pRtInfo, + ID3D12PipelineState** ppState) + { + std::vector vsCode, psCode; + + DxcCompile(c_Shader, vsEntry, L"vs_6_0", L"", nullptr, 0, vsCode); + + if (!m_rootSigGfx) + { + ThrowIfFailed(m_device->CreateRootSignature(0, vsCode.data(), vsCode.size(), IID_PPV_ARGS(&m_rootSigGfx))); + } + + // Describe and create the graphics pipeline state object (PSO). + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.pRootSignature = m_rootSigGfx.Get(); + psoDesc.VS = CD3DX12_SHADER_BYTECODE(vsCode.data(), vsCode.size()); + if (psEntry) + { + DxcCompile(c_Shader, psEntry, L"ps_6_0", L"", nullptr, 0, psCode); + psoDesc.PS = CD3DX12_SHADER_BYTECODE(psCode.data(), psCode.size()); + } + psoDesc.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT); + psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); + psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); + psoDesc.SampleMask = UINT_MAX; + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + + psoDesc.DSVFormat = rpsFormatToDXGI(pRtInfo->depthStencilFormat); + psoDesc.NumRenderTargets = pRtInfo->numRenderTargets; + for (uint32_t i = 0; i < pRtInfo->numRenderTargets; i++) + { + psoDesc.RTVFormats[i] = rpsFormatToDXGI(pRtInfo->renderTargetFormats[i]); + } + psoDesc.SampleDesc.Count = pRtInfo->numSamples; + + ThrowIfFailed(m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(ppState))); + } + + void UpdatePipeline(uint64_t frameIndex, uint64_t completedFrameIndex) + { + RpsRuntimeResource backBuffers[DXGI_MAX_SWAP_CHAIN_BUFFERS]; + RpsResourceDesc backBufferDesc = {}; + + GetBackBuffers(backBufferDesc, backBuffers); + + TestRpsMultiQueue::UpdateRpsPipeline(frameIndex, completedFrameIndex, backBufferDesc, backBuffers); + } + +private: + ComPtr m_rootSigCompute; + ComPtr m_rootSigGfx; + ComPtr m_pipelineStateProcedural; + ComPtr m_pipelineStateMipGen; + ComPtr m_pipelineStateShadowMap; + ComPtr m_pipelineStateShading; +}; + +TEST_CASE(TEST_APP_NAME) +{ +#if _DEBUG + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#endif + + TestD3D12MultiQueue renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_multi_queue_shared.h b/tests/gui/test_multi_queue_shared.h new file mode 100644 index 0000000..932b72d --- /dev/null +++ b/tests/gui/test_multi_queue_shared.h @@ -0,0 +1,314 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "utils/rps_test_host.h" +#include "app_framework/afx_threadpool.h" + +RPS_DECLARE_RPSL_ENTRY(test_multi_queue, main); + +#include + +static const char c_Shader[] = R"--( + +#define COMP_RS "CBV(b0), DescriptorTable( UAV(u0, numDescriptors = 2 ) )" +#define GFX_RS "RootFlags( ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT ), CBV(b0)," \ + "DescriptorTable( SRV(t0), visibility = SHADER_VISIBILITY_VERTEX )," \ + "DescriptorTable( SRV(t1, numDescriptors = 2), visibility = SHADER_VISIBILITY_PIXEL )," \ + "StaticSampler( s0, filter = FILTER_COMPARISON_MIN_MAG_MIP_LINEAR, addressU = TEXTURE_ADDRESS_CLAMP, addressV = TEXTURE_ADDRESS_CLAMP, addressW = TEXTURE_ADDRESS_CLAMP, comparisonFunc = COMPARISON_LESS_EQUAL )," \ + "StaticSampler( s1, filter = FILTER_ANISOTROPIC )" + +struct V2P +{ + float4 Pos : SV_Position; + float4 Color : COLOR0; + float2 UV : TEXCOORD0; + float3 Normal : TEXCOORD1; + float3 WorldPos : TEXCOORD2; +}; + +struct Vertex +{ + float4 Pos; + float3 Nrm; + float2 UV; +}; + +struct InstanceData +{ + float4x3 worldMat; + float4 color; +}; + +struct CBufferData +{ + float4x4 viewProjMat; + float4x4 lightViewProjMat; + float3 lightDir; + float timeInSeconds; +}; + +#ifdef VULKAN +[[vk::binding(0, 0)]] +#endif +ConstantBuffer cbuf; + +#ifdef VULKAN +[[vk::binding(1, 0)]] +#endif +RWTexture2D outImg : register(u0); + +#ifdef VULKAN +[[vk::binding(2, 0)]] +#endif +RWTexture2D inImg : register(u1); + +[RootSignature(COMP_RS)] +[numthreads(8, 8, 1)] +void CSProcedural(uint2 dtID : SV_DispatchThreadID) +{ + uint2 dim; + outImg.GetDimensions(dim.x, dim.y); + + float sum = 0; + for(uint i = 0; i < 16; i++) + { + float t = length((int2(dtID) - int2(dim / 2)) * 0.01f) * (i + 1) + cbuf.timeInSeconds; + sum += sin(t) * pow(0.5f, i); + } + + outImg[dtID] = sum * 0.3f + 0.7f; +} + +[RootSignature(COMP_RS)] +[numthreads(8, 8, 1)] +void CSMipGen(uint2 dtID : SV_DispatchThreadID) +{ + uint2 inCoord = dtID << 1; + outImg[dtID] = (inImg[inCoord] + inImg[inCoord + uint2(1, 0)] + inImg[inCoord + uint2(0, 1)] + inImg[inCoord + uint2(1, 1)]) * 0.25f; +} + +Vertex GetCubeVertex(uint vId) +{ + uint posIdxMap[] = { + 7, 6, 3, 3, 6, 2, // X+ + 4, 5, 6, 4, 6, 7, // Y+ + 0, 7, 3, 0, 4, 7, // Z+ + 0, 5, 4, 0, 1, 5, // X- + 3, 2, 0, 0, 2, 1, // Y- + 1, 6, 5, 1, 2, 6, // Z- + }; + + uint idx = posIdxMap[vId]; + + Vertex vert; + vert.Pos = float4( + (idx & 2) ? 1 : -1, + (idx & 4) ? 1 : -1, + (((idx & 3) == 0) || ((idx & 3) == 3)) ? 1 : -1, + 1.0f); + + uint faceId = vId / 6; + vert.Nrm = float3( + (faceId == 0) ? 1.0f : ((faceId == 3) ? -1.0f : 0), + (faceId == 1) ? 1.0f : ((faceId == 4) ? -1.0f : 0), + (faceId == 2) ? 1.0f : ((faceId == 5) ? -1.0f : 0)); + + vert.UV = + (((faceId == 0) || (faceId == 3)) ? vert.Pos.yz : + (((faceId == 1) || (faceId == 4)) ? vert.Pos.xz : vert.Pos.xy)) * 0.5f + 0.5f; + + return vert; +} + +#ifdef VULKAN +[[vk::binding(1, 0)]] +#endif +StructuredBuffer instanceDataBuf : register(t0); +#ifdef VULKAN +[[vk::binding(2, 0)]] +#endif +Texture2D shadowMap : register(t1); +#ifdef VULKAN +[[vk::binding(3, 0)]] +#endif +Texture2D proceduralImg : register(t2); +#ifdef VULKAN +[[vk::binding(4, 0)]] +#endif +SamplerComparisonState shadowMapSampler : register(s0); +#ifdef VULKAN +[[vk::binding(5, 0)]] +#endif +SamplerState imgSampler : register(s1); + +[RootSignature( GFX_RS )] +float4 VSShadow(uint vId : SV_VertexID, uint instId : SV_InstanceID) : SV_Position +{ + Vertex vert = GetCubeVertex( vId ); + InstanceData instanceData = instanceDataBuf[instId]; + + return mul( cbuf.lightViewProjMat, float4(mul(vert.Pos, instanceData.worldMat), 1) ); +} + +[RootSignature( GFX_RS )] +V2P VSShading(uint vId : SV_VertexID, uint instId : SV_InstanceID) +{ + Vertex vert = GetCubeVertex( vId ); + InstanceData instanceData = instanceDataBuf[instId]; + + V2P vsOut; + float3 worldPos = mul(vert.Pos, instanceData.worldMat); + vsOut.Pos = mul( cbuf.viewProjMat, float4(worldPos, 1) ); + vsOut.Color = instanceData.color; + vsOut.UV = vert.UV; + vsOut.Normal = normalize(mul(vert.Nrm, (float3x3)(instanceData.worldMat))); + vsOut.WorldPos = worldPos; + + return vsOut; +} + +float4 PSShading(V2P psIn) : SV_Target0 +{ + float3 lightProjCoord = mul( cbuf.lightViewProjMat, float4(psIn.WorldPos, 1) ).xyz; + float2 lightUV = lightProjCoord.xy * float2(0.5f, -0.5f) + 0.5f; + + float shadowValue = shadowMap.SampleCmpLevelZero( shadowMapSampler, lightUV, lightProjCoord.z - 0.001f ); + + float4 texureValue = proceduralImg.Sample( imgSampler, psIn.UV ); + + return psIn.Color * max(0.2f, saturate(dot(psIn.Normal, -cbuf.lightDir)) * shadowValue) * texureValue; +} +)--"; + +using namespace DirectX; + +#define TEST_APP_NAME_RAW "TestMultiQueue" + +class TestRpsMultiQueue : public RpsTestHost +{ +public: + struct InstanceData + { + XMFLOAT3 offset; + float scale; + XMFLOAT3 color; + float spinSpeed; + float rotationSpeed; + }; + + struct InstanceDataGPU + { + XMFLOAT3X4 transform; + XMFLOAT4 color; + }; + + struct CBufferData + { + XMFLOAT4X4 viewProjMat; + XMFLOAT4X4 lightViewProjMat; + XMFLOAT3 lightDir; + float timeInSeconds; + }; + +public: + TestRpsMultiQueue() + { + g_MultiQueueMode = MULTI_QUEUE_GFX_COMPUTE_COPY; + } + +protected: + void Init(RpsDevice hRpsDevice) + { + uint32_t numInstances = 4096; + m_instanceData.resize(numInstances); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dist(0.0f, 1.0f); + + m_instanceData[0] = {}; + m_instanceData[0].offset = XMFLOAT3(0, -32.0f, 0); + m_instanceData[0].scale = 32.0f; + m_instanceData[0].color = XMFLOAT3(1, 1, 1); + + for (uint32_t i = 1; i < m_instanceData.size(); i++) + { + m_instanceData[i].offset = + XMFLOAT3((dist(gen) - 0.5f) * 64.0f, dist(gen) * 3.0f + 4.0f, (dist(gen) - 0.5f) * 64.0f); + m_instanceData[i].scale = dist(gen) * 0.4f + 0.4f; + XMStoreFloat3(&m_instanceData[i].color, DirectX::XMColorHSVToRGB(XMVectorSet(dist(gen), 1, 1, 0))); + m_instanceData[i].spinSpeed = dist(gen); + m_instanceData[i].rotationSpeed = dist(gen); + } + m_instanceDataGpu.resize(m_instanceData.size()); + + m_cbufferData = {}; + + XMVECTOR lightDir = XMVector3Normalize(XMVectorSet(1, -0.5f, -0.75f, 1)); + + XMStoreFloat3(&m_cbufferData.lightDir, lightDir); + + XMMATRIX lightView = XMMatrixLookToRH(-lightDir * 100, lightDir, XMVectorSet(0, 1, 0, 1)); + XMMATRIX lightProj = XMMatrixOrthographicOffCenterRH(-75, 75, -75, 75, 0.5f, 1000.0f); + + XMStoreFloat4x4(&m_cbufferData.lightViewProjMat, lightView * lightProj); + + RpsTestHost::OnInit(hRpsDevice, rpsTestLoadRpslEntry(test_multi_queue, main)); + } + + void Animate(const XMUINT2& viewportSize) + { + float time = float(RpsAfxCpuTimer::SecondsSinceEpoch().count()); + + for (uint32_t i = 0; i < m_instanceData.size(); i++) + { + auto& instanceDataGpu = m_instanceDataGpu[i]; + const auto& instanceData = m_instanceData[i]; + + XMMATRIX transform = + XMMatrixScaling(instanceData.scale, instanceData.scale, instanceData.scale) * + XMMatrixRotationAxis(XMVectorSet(0, 1, 0, 1), instanceData.spinSpeed * time * XM_2PI) * + XMMatrixTranslation(instanceData.offset.x, instanceData.offset.y, instanceData.offset.z) * + XMMatrixRotationAxis(XMVectorSet(0, 1, 0, 1), instanceData.rotationSpeed * time * XM_2PI); + + XMStoreFloat3x4(&instanceDataGpu.transform, transform); + XMStoreFloat4(&instanceDataGpu.color, XMLoadFloat3(&instanceData.color)); + } + + XMMATRIX camView = XMMatrixLookAtRH(XMVectorSet(0, 40, 80, 1), XMVectorZero(), XMVectorSet(0, 1, 0, 1)); + XMMATRIX camProj = + XMMatrixPerspectiveFovRH(XMConvertToRadians(53), viewportSize.x / float(viewportSize.y), 0.5f, 1000.0f); + + XMStoreFloat4x4(&m_cbufferData.viewProjMat, camView * camProj); + m_cbufferData.timeInSeconds = time; + } + + virtual void UpdateRpsPipeline(uint64_t frameIndex, + uint64_t completedFrameIndex, + const RpsResourceDesc& backBufferDesc, + const RpsRuntimeResource* pBackBuffers) + { + const uint32_t numInstances = uint32_t(m_instanceData.size()); + + const RpsRuntimeResource* argResources[] = {pBackBuffers}; + RpsConstant argData[] = {&backBufferDesc, &numInstances, &m_shadowMapDim, &m_proceduralTextureDim}; + + RpsTestHost::OnUpdate(frameIndex, completedFrameIndex, _countof(argData), argData, argResources); + } + +private: + void DefaultCallback(const RpsCmdCallbackContext* pContext) + { + } + +protected: + std::vector m_instanceData; + std::vector m_instanceDataGpu; + CBufferData m_cbufferData; + uint32_t m_shadowMapDim = 8192; + uint32_t m_proceduralTextureDim = 4096; +}; diff --git a/tests/gui/test_multi_queue_vk.cpp b/tests/gui/test_multi_queue_vk.cpp new file mode 100644 index 0000000..fea2c8f --- /dev/null +++ b/tests/gui/test_multi_queue_vk.cpp @@ -0,0 +1,642 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifdef _WIN32 +#define VK_USE_PLATFORM_WIN32_KHR +#else +#error "TODO" +#endif + +#include "test_multi_queue_shared.h" + +#include "rps/runtime/vk/rps_vk_runtime.h" +#include "utils/rps_test_win32.h" +#include "utils/rps_test_vk_renderer.h" + +using namespace DirectX; + +class TestVKMultiQueue : public RpsTestVulkanRenderer, public TestRpsMultiQueue +{ + static constexpr uint32_t CONST_BINDING = 0u; + static constexpr uint32_t COMP_IMG_BINDING = 1u; + static constexpr uint32_t GFX_INSTANCE_BINDING = 1u; + static constexpr uint32_t GFX_IMG_BINDING = 2u; + static constexpr uint32_t GFX_SAMPLER_BINDING = 4u; + +public: + TestVKMultiQueue() + { + } + +protected: + virtual void OnInit(VkCommandBuffer initCmdList, InitTempResources& tempResources) override + { + LoadAssets(); + + TestRpsMultiQueue::Init(rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); })); + + auto hMainEntry = rpsRenderGraphGetMainEntry(GetRpsRenderGraph()); + REQUIRE_RPS_OK( + rpsProgramBindNode(hMainEntry, "UpdateInstanceData", &TestVKMultiQueue::UpdateInstanceData, this)); + REQUIRE_RPS_OK(rpsProgramBindNode(hMainEntry, "Procedural", &TestVKMultiQueue::Procedural, this)); + REQUIRE_RPS_OK(rpsProgramBindNode(hMainEntry, "GenMip", &TestVKMultiQueue::GenMip, this)); + REQUIRE_RPS_OK(rpsProgramBindNode(hMainEntry, "ShadowMap", &TestVKMultiQueue::ShadowMap, this)); + REQUIRE_RPS_OK(rpsProgramBindNode(hMainEntry, "ShadingPass", &TestVKMultiQueue::ShadingPass, this)); + } + + virtual void OnCleanUp() override + { + TestRpsMultiQueue::OnDestroy(); + + vkDestroyDescriptorSetLayout(m_device, m_descriptorSetLayoutComp, nullptr); + vkDestroyDescriptorSetLayout(m_device, m_descriptorSetLayoutGfx, nullptr); + vkDestroyPipelineLayout(m_device, m_pipelineLayoutComp, nullptr); + vkDestroyPipelineLayout(m_device, m_pipelineLayoutGfx, nullptr); + vkDestroyPipeline(m_device, m_pipelineStateProcedural, nullptr); + vkDestroyPipeline(m_device, m_pipelineStateMipGen, nullptr); + vkDestroyPipeline(m_device, m_pipelineStateShadowMap, nullptr); + vkDestroyPipeline(m_device, m_pipelineStateShading, nullptr); + vkDestroySampler(m_device, m_shadowMapSampler, nullptr); + vkDestroySampler(m_device, m_proceduralImgSampler, nullptr); + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + Animate(XMUINT2(m_width, m_height)); + + UpdatePipeline(frameIndex, CalcGuaranteedCompletedFrameIndexForRps()); + } + + virtual void OnRender(uint32_t frameIndex) override + { + ExecuteRenderGraph(frameIndex, GetRpsRenderGraph()); + } + +private: + void UpdateInstanceData(const RpsCmdCallbackContext* pContext, + RpsVkDeviceMemoryRange uploadBuffer, + RpsVkDeviceMemoryRange constantBuffer) + { + void* pData = nullptr; + REQUIRE(VK_SUCCESS == + vkMapMemory(m_device, uploadBuffer.hMemory, uploadBuffer.offset, uploadBuffer.size, 0, &pData)); + + size_t sizeToCopy = std::min(uploadBuffer.size, m_instanceDataGpu.size() * sizeof(InstanceDataGPU)); + + memcpy(pData, m_instanceDataGpu.data(), sizeToCopy); + + vkUnmapMemory(m_device, uploadBuffer.hMemory); + + REQUIRE(VK_SUCCESS == + vkMapMemory(m_device, constantBuffer.hMemory, constantBuffer.offset, constantBuffer.size, 0, &pData)); + + memcpy(pData, &m_cbufferData, sizeof(m_cbufferData)); + + vkUnmapMemory(m_device, constantBuffer.hMemory); + } + + void Procedural(const RpsCmdCallbackContext* pContext, + VkImageView proceduralTextureUav, + VkBuffer constantBuffer, + const XMUINT2& outputDim) + { + VkCommandBuffer hCmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_descriptorSetLayoutComp, 1, &ds)); + + VkWriteDescriptorSet writeDescriptorSet[2] = {}; + + VkDescriptorBufferInfo constBufferInfo = {constantBuffer, 0, VK_WHOLE_SIZE}; + VkDescriptorImageInfo imageInfo = {VK_NULL_HANDLE, proceduralTextureUav, VK_IMAGE_LAYOUT_GENERAL}; + AppendWriteDescriptorSetBuffers( + &writeDescriptorSet[0], ds, CONST_BINDING, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &constBufferInfo); + AppendWriteDescriptorSetImages( + &writeDescriptorSet[1], ds, COMP_IMG_BINDING, 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageInfo); + + vkUpdateDescriptorSets(m_device, RPS_TEST_COUNTOF(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + + vkCmdBindDescriptorSets(hCmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipelineLayoutComp, 0, 1, &ds, 0, nullptr); + vkCmdBindPipeline(hCmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipelineStateProcedural); + vkCmdDispatch(hCmdBuf, DivRoundUp(outputDim.x, 8), DivRoundUp(outputDim.y, 8), 1); + } + + void GenMip(const RpsCmdCallbackContext* pContext, VkImageView outMip, VkImageView inMip, const XMUINT2& outputDim) + { + VkCommandBuffer hCmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_descriptorSetLayoutComp, 1, &ds)); + + VkDescriptorImageInfo imageInfos[2] = { + {VK_NULL_HANDLE, outMip, VK_IMAGE_LAYOUT_GENERAL}, + {VK_NULL_HANDLE, inMip, VK_IMAGE_LAYOUT_GENERAL}, + }; + + VkWriteDescriptorSet writeDescriptorSet[1] = {}; + AppendWriteDescriptorSetImages( + &writeDescriptorSet[0], ds, COMP_IMG_BINDING, 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, imageInfos); + + vkUpdateDescriptorSets(m_device, RPS_TEST_COUNTOF(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + + vkCmdBindDescriptorSets(hCmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipelineLayoutComp, 0, 1, &ds, 0, nullptr); + vkCmdBindPipeline(hCmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipelineStateMipGen); + vkCmdDispatch(hCmdBuf, DivRoundUp(outputDim.x, 8), DivRoundUp(outputDim.y, 8), 1); + } + + void ShadowMap(const RpsCmdCallbackContext* pContext, VkBuffer instanceBuffer, VkBuffer constantBuffer) + { + if (!m_pipelineStateShadowMap) + { + RpsCmdRenderTargetInfo renderTargetInfo; + REQUIRE_RPS_OK(rpsCmdGetRenderTargetsInfo(pContext, &renderTargetInfo)); + VkRenderPass hRenderPass = VK_NULL_HANDLE; + REQUIRE_RPS_OK(rpsVKGetCmdRenderPass(pContext, &hRenderPass)); + + CreateGfxPSO(L"VSShadow", nullptr, nullptr, &renderTargetInfo, m_pipelineLayoutGfx, hRenderPass, &m_pipelineStateShadowMap); + } + + VkCommandBuffer hCmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_descriptorSetLayoutGfx, 1, &ds)); + + VkDescriptorBufferInfo constBufferInfo = {constantBuffer, 0, VK_WHOLE_SIZE}; + VkDescriptorBufferInfo instanceBufferInfo = {instanceBuffer, 0, VK_WHOLE_SIZE}; + + + VkWriteDescriptorSet writeDescriptorSet[2] = {}; + AppendWriteDescriptorSetBuffers( + &writeDescriptorSet[0], ds, CONST_BINDING, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &constBufferInfo); + AppendWriteDescriptorSetBuffers( + &writeDescriptorSet[1], ds, GFX_INSTANCE_BINDING, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &instanceBufferInfo); + + vkUpdateDescriptorSets(m_device, RPS_TEST_COUNTOF(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + + vkCmdBindDescriptorSets(hCmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayoutGfx, 0, 1, &ds, 0, nullptr); + vkCmdBindPipeline(hCmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineStateShadowMap); + vkCmdDraw(hCmdBuf, 36, uint32_t(m_instanceData.size()), 0, 0); + } + + void ShadingPass(const RpsCmdCallbackContext* pContext, + rps::UnusedArg colorBuffer, + rps::UnusedArg depthBuffer, + VkBuffer instanceBuffer, + VkImageView shadowMap, + VkImageView proceduralTexture, + VkBuffer constantBuffer) + { + if (!m_pipelineStateShading) + { + RpsCmdRenderTargetInfo renderTargetInfo; + REQUIRE_RPS_OK(rpsCmdGetRenderTargetsInfo(pContext, &renderTargetInfo)); + VkRenderPass hRenderPass = VK_NULL_HANDLE; + REQUIRE_RPS_OK(rpsVKGetCmdRenderPass(pContext, &hRenderPass)); + CreateGfxPSO(L"VSShading", + L"PSShading", + nullptr, + &renderTargetInfo, + m_pipelineLayoutGfx, + hRenderPass, + &m_pipelineStateShading); + } + + VkCommandBuffer hCmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_descriptorSetLayoutGfx, 1, &ds)); + + VkDescriptorBufferInfo constBufferInfo = {constantBuffer, 0, VK_WHOLE_SIZE}; + VkDescriptorBufferInfo instanceBufferInfo = {instanceBuffer, 0, VK_WHOLE_SIZE}; + + VkWriteDescriptorSet writeDescriptorSet[4] = {}; + AppendWriteDescriptorSetBuffers( + &writeDescriptorSet[0], ds, CONST_BINDING, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &constBufferInfo); + AppendWriteDescriptorSetBuffers(&writeDescriptorSet[1], + ds, + GFX_INSTANCE_BINDING, + 1, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + &instanceBufferInfo); + + VkDescriptorImageInfo shadowMapImageInfo = { + VK_NULL_HANDLE, shadowMap, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo proceduralImageInfo = { + VK_NULL_HANDLE, proceduralTexture, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + + AppendWriteDescriptorSetImages( + &writeDescriptorSet[2], ds, GFX_IMG_BINDING, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &shadowMapImageInfo); + AppendWriteDescriptorSetImages( + &writeDescriptorSet[3], ds, GFX_IMG_BINDING + 1, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &proceduralImageInfo); + + vkUpdateDescriptorSets(m_device, RPS_TEST_COUNTOF(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + + vkCmdBindDescriptorSets(hCmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayoutGfx, 0, 1, &ds, 0, nullptr); + vkCmdBindPipeline(hCmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineStateShading); + vkCmdDraw(hCmdBuf, 36, uint32_t(m_instanceData.size()), 0, 0); + } + +private: + void LoadAssets() + { + OnPostResize(); + + CreatePsoLayouts(); + + CreateComputePSO(L"CSProcedural", m_pipelineLayoutComp, &m_pipelineStateProcedural); + CreateComputePSO(L"CSMipGen", m_pipelineLayoutComp, &m_pipelineStateMipGen); + } + + void CreatePsoLayouts() + { + { + VkSamplerCreateInfo samplerCI = {VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO}; + samplerCI.magFilter = VK_FILTER_LINEAR; + samplerCI.minFilter = VK_FILTER_LINEAR; + samplerCI.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + samplerCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCI.mipLodBias = 0.0f; + samplerCI.compareEnable = VK_TRUE; + samplerCI.compareOp = VK_COMPARE_OP_LESS; + samplerCI.minLod = 0.0f; + samplerCI.maxLod = FLT_MAX; + samplerCI.maxAnisotropy = 1.0; + samplerCI.anisotropyEnable = VK_FALSE; + samplerCI.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + + ThrowIfFailedVK(vkCreateSampler(m_device, &samplerCI, nullptr, &m_shadowMapSampler)); + + samplerCI.magFilter = VK_FILTER_LINEAR; + samplerCI.minFilter = VK_FILTER_LINEAR; + samplerCI.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + samplerCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + samplerCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + samplerCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + samplerCI.mipLodBias = 0.0f; + samplerCI.compareEnable = VK_FALSE; + samplerCI.compareOp = VK_COMPARE_OP_NEVER; + samplerCI.minLod = 0.0f; + samplerCI.maxLod = FLT_MAX; + samplerCI.maxAnisotropy = 1.0; + samplerCI.anisotropyEnable = VK_FALSE; + samplerCI.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + + ThrowIfFailedVK(vkCreateSampler(m_device, &samplerCI, nullptr, &m_proceduralImgSampler)); + + VkSampler samplers[] = {m_shadowMapSampler, m_proceduralImgSampler}; + + VkDescriptorSetLayoutBinding sharedLayoutBindings[6] = {}; + sharedLayoutBindings[0].binding = CONST_BINDING; + sharedLayoutBindings[0].descriptorCount = 1; + sharedLayoutBindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + sharedLayoutBindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[1].binding = GFX_INSTANCE_BINDING; + sharedLayoutBindings[1].descriptorCount = 1; + sharedLayoutBindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + sharedLayoutBindings[1].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + sharedLayoutBindings[2].binding = GFX_IMG_BINDING; + sharedLayoutBindings[2].descriptorCount = 1; + sharedLayoutBindings[2].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + sharedLayoutBindings[2].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[3].binding = GFX_IMG_BINDING + 1; + sharedLayoutBindings[3].descriptorCount = 1; + sharedLayoutBindings[3].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + sharedLayoutBindings[3].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[4].binding = GFX_SAMPLER_BINDING; + sharedLayoutBindings[4].descriptorCount = 1; + sharedLayoutBindings[4].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + sharedLayoutBindings[4].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[4].pImmutableSamplers = &m_shadowMapSampler; + sharedLayoutBindings[5].binding = GFX_SAMPLER_BINDING + 1; + sharedLayoutBindings[5].descriptorCount = 1; + sharedLayoutBindings[5].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + sharedLayoutBindings[5].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[5].pImmutableSamplers = &m_proceduralImgSampler; + + VkDescriptorSetLayoutCreateInfo setLayoutCI = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + setLayoutCI.pBindings = sharedLayoutBindings; + setLayoutCI.bindingCount = _countof(sharedLayoutBindings); + + ThrowIfFailedVK(vkCreateDescriptorSetLayout(m_device, &setLayoutCI, nullptr, &m_descriptorSetLayoutGfx)); + + VkPipelineLayoutCreateInfo plCI = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + plCI.setLayoutCount = 1; + plCI.pSetLayouts = &m_descriptorSetLayoutGfx; + + ThrowIfFailedVK(vkCreatePipelineLayout(m_device, &plCI, nullptr, &m_pipelineLayoutGfx)); + } + + { + VkDescriptorSetLayoutBinding sharedLayoutBindings[3] = {}; + sharedLayoutBindings[0].binding = CONST_BINDING; + sharedLayoutBindings[0].descriptorCount = 1; + sharedLayoutBindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + sharedLayoutBindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + sharedLayoutBindings[1].binding = COMP_IMG_BINDING; + sharedLayoutBindings[1].descriptorCount = 1; + sharedLayoutBindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + sharedLayoutBindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + sharedLayoutBindings[2].binding = COMP_IMG_BINDING + 1; + sharedLayoutBindings[2].descriptorCount = 1; + sharedLayoutBindings[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + sharedLayoutBindings[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + VkDescriptorSetLayoutCreateInfo setLayoutCI = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + setLayoutCI.pBindings = sharedLayoutBindings; + setLayoutCI.bindingCount = _countof(sharedLayoutBindings); + + ThrowIfFailedVK(vkCreateDescriptorSetLayout(m_device, &setLayoutCI, nullptr, &m_descriptorSetLayoutComp)); + + VkPipelineLayoutCreateInfo plCI = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + plCI.setLayoutCount = 1; + plCI.pSetLayouts = &m_descriptorSetLayoutComp; + + ThrowIfFailedVK(vkCreatePipelineLayout(m_device, &plCI, nullptr, &m_pipelineLayoutComp)); + } + } + + void CreateComputePSO(LPCWSTR csEntry, VkPipelineLayout hPipelineLayout, VkPipeline* phPipeline) + { + const DxcDefine defs[] = {{L"VULKAN", L"1"}}; + + std::vector csCode; + DxcCompileToSpirv(c_Shader, csEntry, L"cs_6_0", L"", defs, RPS_TEST_COUNTOF(defs), csCode); + + VkShaderModuleCreateInfo smCI = {}; + smCI.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smCI.pCode = reinterpret_cast(csCode.data()); + smCI.codeSize = csCode.size(); + + VkShaderModule csModule; + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &csModule)); + + char csName[128]; + sprintf_s(csName, "%S", csEntry); + + VkComputePipelineCreateInfo compPsoCI = {}; + compPsoCI.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + compPsoCI.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + compPsoCI.stage.module = csModule; + compPsoCI.stage.pName = csName; + compPsoCI.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT; + compPsoCI.layout = hPipelineLayout; + + ThrowIfFailedVK(vkCreateComputePipelines(m_device, VK_NULL_HANDLE, 1, &compPsoCI, nullptr, phPipeline)); + vkDestroyShaderModule(m_device, csModule, nullptr); + } + + void CreateGfxPSO(LPCWSTR vsEntry, + LPCWSTR psEntry, + LPCWSTR gsEntry, + const RpsCmdRenderTargetInfo* pRtInfo, + VkPipelineLayout hPipelineLayout, + VkRenderPass hRenderPass, + VkPipeline* pPso) + { + VkPipelineVertexInputStateCreateInfo vi = {}; + vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + + VkPipelineInputAssemblyStateCreateInfo ia = {}; + ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + // rasterizer state + VkPipelineRasterizationStateCreateInfo rs = {}; + rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rs.polygonMode = VK_POLYGON_MODE_FILL; + rs.cullMode = VK_CULL_MODE_NONE; + rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rs.lineWidth = 1.0f; + + VkPipelineColorBlendAttachmentState bs[8] = {}; + bs[0].blendEnable = VK_FALSE; + bs[0].srcColorBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].colorBlendOp = VK_BLEND_OP_ADD; + bs[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].alphaBlendOp = VK_BLEND_OP_ADD; + bs[0].colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + for (uint32_t i = 1; i < pRtInfo->numRenderTargets; i++) + { + bs[i] = bs[0]; + } + + // Color blend state + VkPipelineColorBlendStateCreateInfo cb; + cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + cb.flags = 0; + cb.pNext = NULL; + cb.attachmentCount = pRtInfo->numRenderTargets; + cb.pAttachments = bs; + cb.logicOpEnable = VK_FALSE; + cb.logicOp = VK_LOGIC_OP_NO_OP; + cb.blendConstants[0] = 1.0f; + cb.blendConstants[1] = 1.0f; + cb.blendConstants[2] = 1.0f; + cb.blendConstants[3] = 1.0f; + + std::vector dynamicStateEnables = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo dynamicState = {}; + dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamicState.pNext = NULL; + dynamicState.pDynamicStates = dynamicStateEnables.data(); + dynamicState.dynamicStateCount = (uint32_t)dynamicStateEnables.size(); + + // view port state + + VkPipelineViewportStateCreateInfo vp = {}; + vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + vp.pNext = NULL; + vp.flags = 0; + vp.viewportCount = 1; + vp.scissorCount = 1; + vp.pScissors = NULL; + vp.pViewports = NULL; + + // depth stencil state + + const bool bDepth = (pRtInfo->depthStencilFormat != RPS_FORMAT_UNKNOWN); + + VkPipelineDepthStencilStateCreateInfo ds = {}; + ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + ds.depthTestEnable = bDepth ? VK_TRUE : VK_FALSE; + ds.depthWriteEnable = bDepth ? VK_TRUE : VK_FALSE; + ds.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + ds.depthBoundsTestEnable = VK_FALSE; + ds.stencilTestEnable = VK_FALSE; + ds.back.failOp = VK_STENCIL_OP_KEEP; + ds.back.passOp = VK_STENCIL_OP_KEEP; + ds.back.compareOp = VK_COMPARE_OP_ALWAYS; + ds.back.compareMask = 0; + ds.back.reference = 0; + ds.back.depthFailOp = VK_STENCIL_OP_KEEP; + ds.back.writeMask = 0; + ds.minDepthBounds = 0; + ds.maxDepthBounds = 0; + ds.stencilTestEnable = VK_FALSE; + ds.front = ds.back; + + // multi sample state + + VkPipelineMultisampleStateCreateInfo ms; + ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + ms.pNext = NULL; + ms.flags = 0; + ms.pSampleMask = NULL; + ms.rasterizationSamples = static_cast(pRtInfo->numSamples); + ms.sampleShadingEnable = (pRtInfo->numSamples > 1) ? VK_TRUE : VK_FALSE; + ms.alphaToCoverageEnable = VK_FALSE; + ms.alphaToOneEnable = VK_FALSE; + ms.minSampleShading = (pRtInfo->numSamples > 1) ? 1.0f : 0.0f; + + VkShaderModule vsModule = VK_NULL_HANDLE, gsModule = VK_NULL_HANDLE, psModule = VK_NULL_HANDLE; + std::vector vsCode, gsCode, psCode; + + const DxcDefine defs[] = {{L"VULKAN", L"1"}}; + + DxcCompileToSpirv(c_Shader, vsEntry, L"vs_6_0", L"", defs, _countof(defs), vsCode); + + if (psEntry) + { + DxcCompileToSpirv(c_Shader, psEntry, L"ps_6_0", L"", defs, _countof(defs), psCode); + } + + if (gsEntry) + { + DxcCompileToSpirv(c_Shader, gsEntry, L"gs_6_0", L"", defs, _countof(defs), gsCode); + } + + VkShaderModuleCreateInfo smCI = {}; + smCI.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + + smCI.pCode = reinterpret_cast(vsCode.data()); + smCI.codeSize = vsCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &vsModule)); + + char vsName[128]; + sprintf_s(vsName, "%S", vsEntry); + + VkPipelineShaderStageCreateInfo shaderStages[3] = {}; + shaderStages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[0].module = vsModule; + shaderStages[0].pName = vsName; + shaderStages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + + uint32_t numShaderStages = 1; + + if (psEntry) + { + smCI.pCode = reinterpret_cast(psCode.data()); + smCI.codeSize = psCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &psModule)); + + char psName[128]; + sprintf_s(psName, "%S", psEntry); + + shaderStages[numShaderStages].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[numShaderStages].module = psModule; + shaderStages[numShaderStages].pName = psName; + shaderStages[numShaderStages].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + + numShaderStages++; + } + + if (gsEntry) + { + smCI.pCode = reinterpret_cast(gsCode.data()); + smCI.codeSize = gsCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &gsModule)); + + char gsName[128]; + sprintf_s(gsName, "%S", gsEntry); + + shaderStages[numShaderStages].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[numShaderStages].module = gsModule; + shaderStages[numShaderStages].pName = gsName; + shaderStages[numShaderStages].stage = VK_SHADER_STAGE_GEOMETRY_BIT; + + numShaderStages++; + } + + VkGraphicsPipelineCreateInfo psoCI = {}; + psoCI.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + psoCI.pNext = NULL; + psoCI.layout = hPipelineLayout; + psoCI.basePipelineHandle = VK_NULL_HANDLE; + psoCI.basePipelineIndex = 0; + psoCI.flags = 0; + psoCI.pVertexInputState = &vi; + psoCI.pInputAssemblyState = &ia; + psoCI.pRasterizationState = &rs; + psoCI.pColorBlendState = &cb; + psoCI.pTessellationState = NULL; + psoCI.pMultisampleState = &ms; + psoCI.pDynamicState = &dynamicState; + psoCI.pViewportState = &vp; + psoCI.pDepthStencilState = &ds; + psoCI.pStages = shaderStages; + psoCI.stageCount = numShaderStages; + psoCI.renderPass = hRenderPass; + psoCI.subpass = 0; + + ThrowIfFailedVK(vkCreateGraphicsPipelines(m_device, VK_NULL_HANDLE, 1, &psoCI, nullptr, pPso)); + + vkDestroyShaderModule(m_device, vsModule, nullptr); + vkDestroyShaderModule(m_device, psModule, nullptr); + if (gsModule != RPS_NULL_HANDLE) + { + vkDestroyShaderModule(m_device, gsModule, nullptr); + } + } + + void UpdatePipeline(uint64_t frameIndex, uint64_t completedFrameIndex) + { + RpsResourceDesc backBufferDesc; + RpsRuntimeResource backBuffers[16]; + GetBackBuffers(backBufferDesc, backBuffers, RPS_TEST_COUNTOF(backBuffers)); + + TestRpsMultiQueue::UpdateRpsPipeline(frameIndex, completedFrameIndex, backBufferDesc, backBuffers); + } + +private: + VkDescriptorSetLayout m_descriptorSetLayoutComp = VK_NULL_HANDLE; + VkDescriptorSetLayout m_descriptorSetLayoutGfx = VK_NULL_HANDLE; + VkPipelineLayout m_pipelineLayoutComp = VK_NULL_HANDLE; + VkPipelineLayout m_pipelineLayoutGfx = VK_NULL_HANDLE; + VkPipeline m_pipelineStateProcedural = VK_NULL_HANDLE; + VkPipeline m_pipelineStateMipGen = VK_NULL_HANDLE; + VkPipeline m_pipelineStateShadowMap = VK_NULL_HANDLE; + VkPipeline m_pipelineStateShading = VK_NULL_HANDLE; + VkSampler m_shadowMapSampler = VK_NULL_HANDLE; + VkSampler m_proceduralImgSampler = VK_NULL_HANDLE; +}; + +TEST_CASE(TEST_APP_NAME) +{ +#if _DEBUG + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#endif + + TestVKMultiQueue renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_multithreading.rpsl b/tests/gui/test_multithreading.rpsl new file mode 100644 index 0000000..533e4e2 --- /dev/null +++ b/tests/gui/test_multithreading.rpsl @@ -0,0 +1,25 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +graphics node GeometryPass(uint passId, rtv renderTarget : SV_Target0, uint4 scissorRect : SV_ScissorRect); + +export void mt_main([readonly(present)] texture backbuffer, uint32_t numPasses) +{ + clear(backbuffer, float4(0.0, 0.2, 0.4, 1.0)); + + ResourceDesc backbufferDesc = backbuffer.desc(); + + uint spacing = 5; + uint width = max(spacing, uint(backbufferDesc.Width + spacing) / numPasses) - spacing; + + for (uint32_t i = 0; i < numPasses; i++) + { + uint4 scissor = uint4(i * (width + spacing), 0, width, backbufferDesc.Height); + + GeometryPass(i, backbuffer, scissor); + } +} diff --git a/tests/gui/test_multithreading_d3d12.cpp b/tests/gui/test_multithreading_d3d12.cpp new file mode 100644 index 0000000..fa13872 --- /dev/null +++ b/tests/gui/test_multithreading_d3d12.cpp @@ -0,0 +1,316 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "test_multithreading_shared.h" + +#include "rps/runtime/d3d12/rps_d3d12_runtime.h" +#include "utils/rps_test_win32.h" +#include "utils/rps_test_d3d12_renderer.h" + +using namespace DirectX; + +class TestD3D12Multithreading : public RpsTestD3D12Renderer, public TestRpsMultithreading +{ +protected: + virtual void OnInit(ID3D12GraphicsCommandList* pInitCmdList, std::vector>& tempResources) override + { + LoadAssets(pInitCmdList, tempResources); + + TestRpsMultithreading::Init(rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); })); + } + + virtual void OnCleanUp() override + { + TestRpsMultithreading::OnDestroy(); + + m_rootSignature = nullptr; + m_pipelineState = nullptr; + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + UpdatePipeline(frameIndex, CalcGuaranteedCompletedFrameIndexForRps()); + + if (frameIndex < 50 * MAX_THREADS) + { + SetRenderJobCount(frameIndex / 50 + 1); + } + } + + virtual RpsRuntimeCommandBuffer AcquireNewCommandBuffer(uint32_t* pInsertAfter) override final + { + std::lock_guard lock(m_cmdListsMutex); + + uint32_t newIdx = uint32_t(m_activeCmdLists.size()); + + m_activeCmdLists.push_back({ + AcquireCmdList(RPS_AFX_QUEUE_INDEX_GFX), + UINT32_MAX, + }); + + if (pInsertAfter) + { + if (*pInsertAfter != UINT32_MAX) + { + uint32_t next = m_activeCmdLists[*pInsertAfter].Next; + m_activeCmdLists[*pInsertAfter].Next = newIdx; + m_activeCmdLists.back().Next = next; + } + + *pInsertAfter = newIdx; + } + + return rpsD3D12CommandListToHandle(m_activeCmdLists.back().CmdList.cmdList.Get()); + } + + virtual void OnRender(uint32_t frameIndex) override + { + m_activeCmdLists.clear(); + + TestRpsMultithreading::OnRender(frameIndex, m_numPasses); + + for (auto& cl : m_activeCmdLists) + { + CloseCmdList(cl.CmdList); + } + + m_cmdListsToSubmit.clear(); + m_cmdListsToSubmit.reserve(m_activeCmdLists.size()); + + for (uint32_t i = 0; i < m_activeCmdLists.size(); i = m_activeCmdLists[i].Next) + { + m_cmdListsToSubmit.push_back(m_activeCmdLists[i].CmdList.cmdList.Get()); + } + + REQUIRE(m_cmdListsToSubmit.size() == m_activeCmdLists.size()); + + if (m_cmdListsToSubmit.size() > 0) + { + m_presentQueue->ExecuteCommandLists(uint32_t(m_cmdListsToSubmit.size()), m_cmdListsToSubmit.data()); + } + + for (uint32_t i = 0; i < m_activeCmdLists.size(); i++) + { + RecycleCmdList(m_activeCmdLists[i].CmdList); + } + } + + virtual void OnKeyUp(char key) override + { + if (key >= '1' && key <= '8') + SetRenderJobCount(key - '1'); + } + + void SetRenderJobCount(uint32_t count) + { + m_renderJobs = std::max(1u, count); + + char buf[256]; + sprintf_s(buf, "TestD3D12Multithreading - %d workers on %d threads", m_renderJobs, m_threadPool.GetNumThreads()); + SetWindowText(m_hWnd, buf); + } + +protected: + + virtual bool IsSoftwareAdapter() const override final + { + return m_useWarpDevice; + } + + virtual void DrawGeometryPass(const RpsCmdCallbackContext* pContext) override final + { + const uint32_t numThreads = std::max(1u, std::min(MAX_THREADS, m_renderJobs)); + + CmdRangeContext* pRangeContext = static_cast(pContext->pUserRecordContext); + + if (m_pipelineState == nullptr) + { + std::lock_guard lock(m_cmdListsMutex); + + if (m_pipelineState == nullptr) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + THREAD_SAFE_REQUIRE(result == RPS_OK); + + CreatePSO(c_Shader, &rtInfo, &m_pipelineState); + } + } + + for (uint32_t i = 0; i < numThreads; i++) + { + auto hNewCmdBuf = AcquireNewCommandBuffer(&pRangeContext->LastCmdListIndex); + + const RpsCmdCallbackContext* pLocalContext = {}; + { + std::lock_guard lock(m_cmdListsMutex); + THREAD_SAFE_REQUIRE(rpsCmdCloneContext(pContext, hNewCmdBuf, &pLocalContext) == RPS_OK); + } + + RpsAfxThreadPool::WaitHandle waitHdl = m_threadPool.EnqueueJob( + [this, pLocalContext, i, numThreads, hNewCmdBuf, batchId = pRangeContext->BatchIndex]() { + uint32_t numTrianglesPerThread = uint32_t(m_triangleData.size() + numThreads - 1) / numThreads; + + uint32_t beginIndex = numTrianglesPerThread * i; + uint32_t endIndex = std::min(uint32_t(m_triangleData.size()), beginIndex + numTrianglesPerThread); + + auto pCmdList = rpsD3D12CommandListFromHandle(pLocalContext->hCommandBuffer); + + if (hNewCmdBuf != pLocalContext->hCommandBuffer) + { + m_failCount++; + } + + RpsRuntimeRenderPassFlags rpFlags = {}; + + rpFlags = RPS_RUNTIME_RENDER_PASS_FLAG_NONE; + if (i != 0) + rpFlags |= RPS_RUNTIME_RENDER_PASS_RESUMING; + if (i != (numThreads - 1)) + rpFlags |= RPS_RUNTIME_RENDER_PASS_SUSPENDING; + + RpsResult threadResult = rpsCmdBeginRenderPass(pLocalContext, rpFlags); + if (threadResult != RPS_OK) + m_failCount++; + + const float aspectRatio = m_height / static_cast(m_width); + + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + pCmdList->SetPipelineState(m_pipelineState.Get()); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + auto tid = batchId * numThreads + i; + + const XMVECTOR threadColorTint = XMVectorSet(float((tid / 7) & 1), float((tid / 13) & 1), float((tid / 25) & 1), 1.0f); + + for (uint32_t triangleIdx = beginIndex; triangleIdx < endIndex; triangleIdx++) + { + TriangleDataGPU cbData = {}; + + TriangleDataCPU* pTriangle = &m_triangleData[triangleIdx]; + pTriangle->Offset.x = fmod(pTriangle->Offset.x + pTriangle->Speed + m_runwayLength * 0.5f, m_runwayLength) - m_runwayLength * 0.5f; + + cbData.Pos = pTriangle->Offset; + cbData.AspectRatio = aspectRatio; + cbData.Scale = pTriangle->Scale; + XMStoreFloat3(&cbData.Color, XMVectorLerp(XMLoadFloat3(&pTriangle->Color), threadColorTint, 0.7f)); + + pCmdList->SetGraphicsRoot32BitConstants(0, sizeof(TriangleDataGPU) / sizeof(uint32_t), &cbData, 0); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + + threadResult = rpsCmdEndRenderPass(pLocalContext); + if (threadResult != RPS_OK) + m_failCount++; + + m_executeCount++; + }); + + { + std::lock_guard lock(m_cmdListsMutex); + m_waitHandles.emplace_back(std::move(waitHdl)); + } + } + + RpsRuntimeCommandBuffer newBuffer = AcquireNewCommandBuffer(&pRangeContext->LastCmdListIndex); + RpsResult result = rpsCmdSetCommandBuffer(pContext, newBuffer); + THREAD_SAFE_REQUIRE(result == RPS_OK); + } + +private: + void LoadAssets(ID3D12GraphicsCommandList* pInitCmdList, std::vector>& tempResources) + { + OnPostResize(); + + { + CD3DX12_ROOT_PARAMETER rootParameters[1] = {}; + + rootParameters[0].InitAsConstants(sizeof(TriangleDataGPU) / sizeof(uint32_t), 0, 0, D3D12_SHADER_VISIBILITY_VERTEX); + + D3D12_ROOT_SIGNATURE_FLAGS rootSignatureFlags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc; + rootSignatureDesc.Init_1_0(_countof(rootParameters), rootParameters, 0, nullptr, rootSignatureFlags); + + ComPtr signature; + ComPtr error; + ThrowIfFailed(D3DX12SerializeVersionedRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1_0, &signature, &error)); + ThrowIfFailed(m_device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature))); + } + } + + void CreatePSO(const char* shader, const RpsCmdRenderTargetInfo* pRtInfo, ID3D12PipelineState** ppState) + { + std::vector vsCode, psCode; + DxcCompile(c_Shader, L"VSMain", L"vs_6_0", L"", nullptr, 0, vsCode); + DxcCompile(c_Shader, L"PSMain", L"ps_6_0", L"", nullptr, 0, psCode); + + // Describe and create the graphics pipeline state object (PSO). + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.pRootSignature = m_rootSignature.Get(); + psoDesc.VS = CD3DX12_SHADER_BYTECODE(vsCode.data(), vsCode.size()); + psoDesc.PS = CD3DX12_SHADER_BYTECODE(psCode.data(), psCode.size()); + psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); + psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); + psoDesc.DepthStencilState.DepthEnable = FALSE; + psoDesc.DepthStencilState.StencilEnable = FALSE; + psoDesc.SampleMask = UINT_MAX; + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + + psoDesc.DSVFormat = rpsFormatToDXGI(pRtInfo->depthStencilFormat); + psoDesc.NumRenderTargets = pRtInfo->numRenderTargets; + for (uint32_t i = 0; i < pRtInfo->numRenderTargets; i++) + { + psoDesc.RTVFormats[i] = rpsFormatToDXGI(pRtInfo->renderTargetFormats[i]); + } + psoDesc.SampleDesc.Count = pRtInfo->numSamples; + + ThrowIfFailed(m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_pipelineState))); + } + + void UpdatePipeline(uint64_t frameIndex, uint64_t completedFrameIndex) + { + RpsRuntimeResource backBuffers[DXGI_MAX_SWAP_CHAIN_BUFFERS]; + RpsResourceDesc backBufferDesc = {}; + + GetBackBuffers(backBufferDesc, backBuffers); + + TestRpsMultithreading::UpdateRpsPipeline(frameIndex, completedFrameIndex, backBufferDesc, backBuffers); + } + +private: + ComPtr m_rootSignature; + ComPtr m_pipelineState; + + struct ActiveCommandListEx + { + ActiveCommandList CmdList; + uint32_t Next; + }; + + std::vector m_activeCmdLists; + + std::vector m_cmdListsToSubmit; +}; + +TEST_CASE(TEST_APP_NAME) +{ +#if _DEBUG + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#endif + + TestD3D12Multithreading renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_multithreading_shared.h b/tests/gui/test_multithreading_shared.h new file mode 100644 index 0000000..d2ba4a0 --- /dev/null +++ b/tests/gui/test_multithreading_shared.h @@ -0,0 +1,248 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include "utils/rps_test_host.h" +#include "app_framework/afx_threadpool.h" + +RPS_DECLARE_RPSL_ENTRY(test_multithreading, mt_main); + +#include +#include + +static const char c_Shader[] = R"( +struct V2P +{ + float4 Pos : SV_Position; + float4 Color : COLOR0; +}; + +struct CBData +{ + float2 Pos; + float Scale; + float AspectRatio; + float3 Color; +}; + +#ifndef VULKAN +ConstantBuffer cb : register(b0); +#else +[[vk::push_constant]] CBData cb; +#endif + +#define PI (3.14159f) + +V2P VSMain(uint vId : SV_VertexID) +{ + float2 pos[3] = + { + { -0.5f * cb.AspectRatio, -0.5f * tan(PI / 6), }, + { 0.0f * cb.AspectRatio, 0.5f / cos(PI / 6), }, + { 0.5f * cb.AspectRatio, -0.5f * tan(PI / 6), }, + }; + + V2P vsOut; + vsOut.Pos = float4(pos[min(vId, 2)] * cb.Scale + cb.Pos, 0, 1); + vsOut.Color = float4(cb.Color, 1.0f); + return vsOut; +} + +float4 PSMain(V2P psIn) : SV_Target0 +{ + return psIn.Color; +} +)"; + +#define TEST_APP_NAME_RAW "TestMultithreading" + +using namespace DirectX; + +#define THREAD_SAFE_REQUIRE(EXPR) \ + do \ + { \ + std::lock_guard lock(m_catch2Mutex); \ + REQUIRE(EXPR); \ + } while (false); + +class TestRpsMultithreading : public RpsTestHost +{ +public: + static constexpr uint32_t MAX_THREADS = 8; + static constexpr uint32_t MIN_THREADS = 4; + + struct TriangleDataCPU + { + XMFLOAT2 Offset; + float Scale; + float Speed; + XMFLOAT3 Color; + }; + + struct TriangleDataGPU + { + XMFLOAT2 Pos; + float Scale; + float AspectRatio; + XMFLOAT3 Color; + }; + + struct CmdRangeContext + { + uint32_t BatchIndex; + uint32_t LastCmdListIndex; + }; + +public: + TestRpsMultithreading() + { + const uint32_t hwConcurrency = std::thread::hardware_concurrency(); + const uint32_t numThreads = std::min(MAX_THREADS, std::max(MIN_THREADS, (hwConcurrency > 0) ? (hwConcurrency - 1) : hwConcurrency)); + m_threadPool.Init(numThreads); + } + +protected: + void Init(RpsDevice hRpsDevice) + { + uint32_t baseTriangles = 4096; +#ifndef _DEBUG + m_speedMultiplier = IsSoftwareAdapter() ? 1 : 64; +#endif + m_triangleData.resize(baseTriangles * m_speedMultiplier); + m_runwayLength *= m_speedMultiplier; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dist(0.0f, 1.0f); + + for (uint32_t i = 0; i < m_triangleData.size(); i++) + { + m_triangleData[i].Offset = XMFLOAT2((dist(gen) - 0.5f) * m_runwayLength, dist(gen) * 2.0f - 1.0f); + m_triangleData[i].Color = XMFLOAT3(dist(gen), dist(gen), dist(gen)); + m_triangleData[i].Scale = dist(gen) * 0.1f + 0.1f; + m_triangleData[i].Speed = (dist(gen) + 0.5f) * 0.01f * m_speedMultiplier; + } + + RpsTestHost::OnInit(hRpsDevice, rpsTestLoadRpslEntry(test_multithreading, mt_main)); + } + + virtual void UpdateRpsPipeline(uint64_t frameIndex, + uint64_t completedFrameIndex, + const RpsResourceDesc& backBufferDesc, + RpsRuntimeResource* pBackBuffers) + { + const RpsRuntimeResource* argResources[] = {pBackBuffers}; + RpsConstant argData[] = {&backBufferDesc, &m_numPasses}; + + RpsTestHost::OnUpdate(frameIndex, completedFrameIndex, _countof(argData), argData, argResources); + } + + void OnRender(uint64_t frameIndex, uint32_t numThreads) + { + m_waitHandles.clear(); + m_executeCount = 0; + m_failCount = 0; + + RpsRenderGraphBatchLayout batchLayout = {}; + REQUIRE_RPS_OK(rpsRenderGraphGetBatchLayout(GetRpsRenderGraph(), &batchLayout)); + + assert(batchLayout.numCmdBatches == 1); + + const uint32_t numCmds = batchLayout.pCmdBatches[0].numCmds; + const uint32_t batchCmdEnd = batchLayout.pCmdBatches[0].cmdBegin + batchLayout.pCmdBatches[0].numCmds; + + uint32_t cmdBegin = 0; + uint32_t cmdsPerThread = (numCmds + numThreads - 1) / numThreads; + uint32_t numThreadsActual = (numCmds + cmdsPerThread - 1) / cmdsPerThread; + + static constexpr uint32_t MAX_BATCHES = 32; + std::pair buffers[MAX_BATCHES]; + + uint32_t lastCmdListId = UINT32_MAX; + for (uint32_t iBatch = 0; iBatch < numThreadsActual; iBatch++) + { + buffers[iBatch].first = AcquireNewCommandBuffer(&lastCmdListId); + buffers[iBatch].second = lastCmdListId; + } + + for (uint32_t iBatch = 0; iBatch < numThreadsActual; iBatch++) + { + const uint32_t cmdEnd = std::min(batchCmdEnd, cmdBegin + cmdsPerThread); + + if (cmdEnd == cmdBegin) + { + break; + } + + RpsAfxThreadPool::WaitHandle waitHdl = + m_threadPool.EnqueueJob([this, iBatch, buffer = buffers[iBatch], frameIndex, cmdBegin, cmdEnd]() { + + CmdRangeContext rangeContext = {iBatch, buffer.second}; + + RpsRenderGraphRecordCommandInfo recordInfo = {}; + recordInfo.hCmdBuffer = buffer.first; + recordInfo.pUserContext = &rangeContext; + recordInfo.frameIndex = frameIndex; + recordInfo.cmdBeginIndex = cmdBegin; + recordInfo.numCmds = cmdEnd - cmdBegin; + + RpsResult threadResult = rpsRenderGraphRecordCommands(GetRpsRenderGraph(), &recordInfo); + THREAD_SAFE_REQUIRE(threadResult == RPS_OK); + }); + + { + std::lock_guard lock(m_cmdListsMutex); + m_waitHandles.emplace_back(std::move(waitHdl)); + } + + cmdBegin = cmdEnd; + } + + m_threadPool.WaitIdle(); + + REQUIRE(m_failCount == 0); + REQUIRE((m_executeCount + numThreadsActual) == m_waitHandles.size()); + } + +protected: + virtual bool IsSoftwareAdapter() const + { + return false; + } + + virtual RpsRuntimeCommandBuffer AcquireNewCommandBuffer(uint32_t* pInsertAfter) = 0; + + virtual void DrawGeometryPass(const RpsCmdCallbackContext* pContext) = 0; + + virtual void BindNodes(RpsSubprogram hRpslEntry) override + { + RpsResult result = RPS_OK; + + result = rpsProgramBindNode(hRpslEntry, + "GeometryPass", + &TestRpsMultithreading::DrawGeometryPass, + this, + RPS_CMD_CALLBACK_MULTI_THREADED_BIT); + REQUIRE(result == RPS_OK); + } + +protected: + uint32_t m_numPasses = 4; + std::vector m_triangleData; + float m_runwayLength = 15.0f; + int32_t m_speedMultiplier = 1; + RpsAfxThreadPool m_threadPool; + + uint32_t m_renderJobs = 8; + + std::vector m_waitHandles; + + std::atomic_int32_t m_failCount = {}; + std::atomic_int32_t m_executeCount = {}; + + std::mutex m_cmdListsMutex; + std::mutex m_catch2Mutex; +}; diff --git a/tests/gui/test_multithreading_vk.cpp b/tests/gui/test_multithreading_vk.cpp new file mode 100644 index 0000000..91fbddb --- /dev/null +++ b/tests/gui/test_multithreading_vk.cpp @@ -0,0 +1,452 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifdef _WIN32 +#define VK_USE_PLATFORM_WIN32_KHR +#else +#error "TODO" +#endif + +#include "test_multithreading_shared.h" + +#include "rps/runtime/vk/rps_vk_runtime.h" +#include "utils/rps_test_win32.h" +#include "utils/rps_test_vk_renderer.h" + +class TestVkMultithreading : public RpsTestVulkanRenderer, public TestRpsMultithreading +{ +protected: + virtual void OnInit(VkCommandBuffer initCmdBuf, InitTempResources& tempResources) override + { + LoadAssets(initCmdBuf, tempResources); + + TestRpsMultithreading::Init(rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); })); + } + + virtual void OnCleanUp() override + { + TestRpsMultithreading::OnDestroy(); + + vkDestroyPipeline(m_device, m_geoPipeline, nullptr); + vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr); + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + UpdatePipeline(frameIndex, CalcGuaranteedCompletedFrameIndexForRps()); + + if (frameIndex < 50 * MAX_THREADS) + { + SetRenderJobCount(frameIndex / 50 + 1); + } + } + + virtual RpsRuntimeCommandBuffer AcquireNewCommandBuffer(uint32_t* pInsertAfter) override final + { + m_activePrimaryCmdBufs.emplace_back(BeginCmdList(RPS_AFX_QUEUE_INDEX_GFX)); + return rpsVKCommandBufferToHandle(m_activePrimaryCmdBufs.back().cmdBuf); + } + + virtual void OnRender(uint32_t frameIndex) override + { + assert(m_activePrimaryCmdBufs.empty()); + + TestRpsMultithreading::OnRender(frameIndex, m_numPasses); + + for (auto& cl : m_activePrimaryCmdBufs) + { + EndCmdList(cl); + } + + SubmitCmdLists(m_activePrimaryCmdBufs.data(), uint32_t(m_activePrimaryCmdBufs.size()), VK_TRUE); + + for (auto& secondaryCmdBuf : m_activeSecondaryCmdBufs) + { + RecycleCmdList(secondaryCmdBuf); + } + m_activeSecondaryCmdBufs.clear(); + + for (uint32_t i = 0; i < m_activePrimaryCmdBufs.size(); i++) + { + RecycleCmdList(m_activePrimaryCmdBufs[i]); + } + m_activePrimaryCmdBufs.clear(); + } + + virtual void OnKeyUp(char key) override + { + if (key >= '1' && key <= '8') + SetRenderJobCount(key - '1'); + } + + void SetRenderJobCount(uint32_t count) + { + m_renderJobs = std::max(1u, count); + + char buf[256]; + sprintf_s(buf, "TestVkMultithreading - %d workers on %d threads", m_renderJobs, m_threadPool.GetNumThreads()); + SetWindowText(m_hWnd, buf); + } + +protected: + virtual void DrawGeometryPass(const RpsCmdCallbackContext* pContext) override final + { + if (m_geoPipeline == VK_NULL_HANDLE) + { + std::lock_guard lock(m_cmdListMutex); + if (m_geoPipeline == VK_NULL_HANDLE) + { + VkRenderPass rp; + RpsResult result = rpsVKGetCmdRenderPass(pContext, &rp); + THREAD_SAFE_REQUIRE(result == RPS_OK); + CreatePipeline(c_Shader, rp, &m_geoPipeline); + } + } + + const uint32_t numThreads = std::max(1u, std::min(MAX_THREADS, m_renderJobs)); + + RpsAfxThreadPool::WaitHandle waitHandles[MAX_THREADS]; + VkCommandBuffer vkCmdBufs[MAX_THREADS]; + + VkCommandBufferInheritanceInfo cmdBufInheritanceInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO}; + RpsResult result = rpsVKGetCmdRenderPass(pContext, &cmdBufInheritanceInfo.renderPass); + THREAD_SAFE_REQUIRE(result == RPS_OK); + + CmdRangeContext* pRangeContext = static_cast(pContext->pUserRecordContext); + + std::atomic_int32_t failCount = {}; + std::atomic_int32_t executeCount = {}; + + for (uint32_t i = 0; i < numThreads; i++) + { + ActiveCommandList hNewCmdBuf = BeginCmdList(RPS_AFX_QUEUE_INDEX_GFX, &cmdBufInheritanceInfo); + + const RpsCmdCallbackContext* pLocalContext = {}; + + { + std::lock_guard lock(m_cmdListMutex); + result = rpsCmdCloneContext(pContext, rpsVKCommandBufferToHandle(hNewCmdBuf), &pLocalContext); + THREAD_SAFE_REQUIRE(result == RPS_OK); + } + + vkCmdBufs[i] = hNewCmdBuf; + + waitHandles[i] = m_threadPool.EnqueueJob([this, + pLocalContext, + hNewCmdBuf, + i, + numThreads, + &failCount, + &executeCount, + batchId = pRangeContext->BatchIndex]() { + ActiveCommandList cmdBuf = hNewCmdBuf; + + uint32_t numTrianglesPerThread = uint32_t(m_triangleData.size() + numThreads - 1) / numThreads; + + uint32_t beginIndex = numTrianglesPerThread * i; + uint32_t endIndex = std::min(uint32_t(m_triangleData.size()), beginIndex + numTrianglesPerThread); + + assert(cmdBuf == rpsVKCommandBufferFromHandle(pLocalContext->hCommandBuffer)); + + RpsRuntimeRenderPassFlags rpFlags = RPS_RUNTIME_RENDER_PASS_SECONDARY_COMMAND_BUFFER; + + RpsResult threadResult = rpsCmdBeginRenderPass(pLocalContext, rpFlags); + if (threadResult != RPS_OK) + failCount++; + + const float aspectRatio = m_height / static_cast(m_width); + + auto tid = batchId * numThreads + i; + const XMVECTOR threadColorTint = + XMVectorSet(float((tid / 7) & 1), float((tid / 13) & 1), float((tid / 25) & 1), 1.0f); + + for (uint32_t triangleIdx = beginIndex; triangleIdx < endIndex; triangleIdx++) + { + TriangleDataGPU cbData = {}; + + TriangleDataCPU* pTriangle = &m_triangleData[triangleIdx]; + pTriangle->Offset.x = + fmod(pTriangle->Offset.x + pTriangle->Speed + m_runwayLength * 0.5f, m_runwayLength) - + m_runwayLength * 0.5f; + + cbData.Pos = pTriangle->Offset; + cbData.AspectRatio = aspectRatio; + cbData.Scale = pTriangle->Scale; + XMStoreFloat3(&cbData.Color, XMVectorLerp(XMLoadFloat3(&pTriangle->Color), threadColorTint, 0.7f)); + + vkCmdPushConstants( + cmdBuf, m_pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(TriangleDataGPU), &cbData); + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_geoPipeline); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + + threadResult = rpsCmdEndRenderPass(pLocalContext); + if (threadResult != RPS_OK) + failCount++; + + EndCmdList(cmdBuf); + + { + std::lock_guard lock(m_cmdListMutex); + m_activeSecondaryCmdBufs.emplace_back(cmdBuf); + } + executeCount++; + }); + } + + // For vulkan secondary cmd buffers we need to wait before execute them on the primary cmd buffer. + m_threadPool.WaitForJobs(waitHandles, numThreads); + + THREAD_SAFE_REQUIRE(failCount == 0); + THREAD_SAFE_REQUIRE(executeCount == numThreads); + + VkCommandBuffer cmdBufPrimary = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + vkCmdExecuteCommands(cmdBufPrimary, numThreads, vkCmdBufs); + } + +private: + void LoadAssets(VkCommandBuffer initCmdBuf, InitTempResources& tempResources) + { + OnPostResize(); + + VkPushConstantRange pushConstRanges[1] = {}; + pushConstRanges[0].offset = 0; + pushConstRanges[0].size = 7 * 4; + pushConstRanges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + + VkPipelineLayoutCreateInfo plCI = {}; + plCI.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + plCI.pPushConstantRanges = pushConstRanges; + plCI.pushConstantRangeCount = _countof(pushConstRanges); + + ThrowIfFailedVK(vkCreatePipelineLayout(m_device, &plCI, nullptr, &m_pipelineLayout)); + } + + void CreatePipeline(const char* pShaderCode, VkRenderPass renderPass, VkPipeline* pPipeline) + { + VkPipelineVertexInputStateCreateInfo vi = {}; + vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vi.pNext = NULL; + vi.flags = 0; + vi.vertexBindingDescriptionCount = 0; + vi.pVertexBindingDescriptions = nullptr; + vi.vertexAttributeDescriptionCount = 0; + vi.pVertexAttributeDescriptions = nullptr; + + // input assembly state + // + VkPipelineInputAssemblyStateCreateInfo ia; + ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + ia.pNext = NULL; + ia.flags = 0; + ia.primitiveRestartEnable = VK_FALSE; + ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + // rasterizer state + VkPipelineRasterizationStateCreateInfo rs; + rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rs.pNext = NULL; + rs.flags = 0; + rs.polygonMode = VK_POLYGON_MODE_FILL; + rs.cullMode = VK_CULL_MODE_NONE; + rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rs.depthClampEnable = VK_FALSE; + rs.rasterizerDiscardEnable = VK_FALSE; + rs.depthBiasEnable = VK_FALSE; + rs.depthBiasConstantFactor = 0; + rs.depthBiasClamp = 0; + rs.depthBiasSlopeFactor = 0; + rs.lineWidth = 1.0f; + + VkPipelineColorBlendAttachmentState bs[1] = {}; + bs[0].blendEnable = VK_FALSE; + bs[0].srcColorBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].colorBlendOp = VK_BLEND_OP_ADD; + bs[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].alphaBlendOp = VK_BLEND_OP_ADD; + bs[0].colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + // Color blend state + VkPipelineColorBlendStateCreateInfo cb; + cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + cb.flags = 0; + cb.pNext = NULL; + cb.attachmentCount = 1; + cb.pAttachments = bs; + cb.logicOpEnable = VK_FALSE; + cb.logicOp = VK_LOGIC_OP_NO_OP; + cb.blendConstants[0] = 1.0f; + cb.blendConstants[1] = 1.0f; + cb.blendConstants[2] = 1.0f; + cb.blendConstants[3] = 1.0f; + + std::vector dynamicStateEnables = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo dynamicState = {}; + dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamicState.pNext = NULL; + dynamicState.pDynamicStates = dynamicStateEnables.data(); + dynamicState.dynamicStateCount = (uint32_t)dynamicStateEnables.size(); + + // view port state + + VkPipelineViewportStateCreateInfo vp = {}; + vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + vp.pNext = NULL; + vp.flags = 0; + vp.viewportCount = 1; + vp.scissorCount = 1; + vp.pScissors = NULL; + vp.pViewports = NULL; + + // depth stencil state + + VkPipelineDepthStencilStateCreateInfo ds; + ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + ds.pNext = NULL; + ds.flags = 0; + ds.depthTestEnable = VK_FALSE; + ds.depthWriteEnable = VK_FALSE; + ds.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + ds.depthBoundsTestEnable = VK_FALSE; + ds.stencilTestEnable = VK_FALSE; + ds.back.failOp = VK_STENCIL_OP_KEEP; + ds.back.passOp = VK_STENCIL_OP_KEEP; + ds.back.compareOp = VK_COMPARE_OP_ALWAYS; + ds.back.compareMask = 0; + ds.back.reference = 0; + ds.back.depthFailOp = VK_STENCIL_OP_KEEP; + ds.back.writeMask = 0; + ds.minDepthBounds = 0; + ds.maxDepthBounds = 0; + ds.stencilTestEnable = VK_FALSE; + ds.front = ds.back; + + // multi sample state + + VkPipelineMultisampleStateCreateInfo ms; + ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + ms.pNext = NULL; + ms.flags = 0; + ms.pSampleMask = NULL; + ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + ms.sampleShadingEnable = VK_FALSE; + ms.alphaToCoverageEnable = VK_FALSE; + ms.alphaToOneEnable = VK_FALSE; + ms.minSampleShading = 0.0; + + VkShaderModule vsModule, psModule; + std::vector vsCode, psCode; + + const DxcDefine defs[] = {{L"VULKAN", L"1"}}; + + DxcCompileToSpirv(pShaderCode, L"VSMain", L"vs_6_0", L"", defs, _countof(defs), vsCode); + DxcCompileToSpirv(pShaderCode, L"PSMain", L"ps_6_0", L"", defs, _countof(defs), psCode); + + VkShaderModuleCreateInfo smCI = {}; + smCI.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + + smCI.pCode = reinterpret_cast(vsCode.data()); + smCI.codeSize = vsCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &vsModule)); + + smCI.pCode = reinterpret_cast(psCode.data()); + smCI.codeSize = psCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &psModule)); + + VkPipelineShaderStageCreateInfo shaderStages[2] = {}; + shaderStages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[0].module = vsModule; + shaderStages[0].pName = "VSMain"; + shaderStages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + shaderStages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[1].module = psModule; + shaderStages[1].pName = "PSMain"; + shaderStages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + + VkGraphicsPipelineCreateInfo psoCI = {}; + psoCI.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + psoCI.pNext = NULL; + psoCI.layout = m_pipelineLayout; + psoCI.basePipelineHandle = VK_NULL_HANDLE; + psoCI.basePipelineIndex = 0; + psoCI.flags = 0; + psoCI.pVertexInputState = &vi; + psoCI.pInputAssemblyState = &ia; + psoCI.pRasterizationState = &rs; + psoCI.pColorBlendState = &cb; + psoCI.pTessellationState = NULL; + psoCI.pMultisampleState = &ms; + psoCI.pDynamicState = &dynamicState; + psoCI.pViewportState = &vp; + psoCI.pDepthStencilState = &ds; + psoCI.pStages = shaderStages; + psoCI.stageCount = _countof(shaderStages); + psoCI.renderPass = renderPass; + psoCI.subpass = 0; + + ThrowIfFailedVK(vkCreateGraphicsPipelines(m_device, VK_NULL_HANDLE, 1, &psoCI, nullptr, pPipeline)); + + vkDestroyShaderModule(m_device, vsModule, nullptr); + vkDestroyShaderModule(m_device, psModule, nullptr); + } + + void UpdatePipeline(uint64_t frameIndex, uint64_t completedFrameIndex) + { + RpsRuntimeResource backBuffers[16]; + + if (m_swapChainImages.size() > RPS_TEST_COUNTOF(backBuffers)) + throw; + + for (uint32_t i = 0; i < m_swapChainImages.size(); i++) + { + backBuffers[i] = rpsVKImageToHandle(m_swapChainImages[i].image); + } + + RpsResourceDesc backBufferDesc = {}; + backBufferDesc.type = RPS_RESOURCE_TYPE_IMAGE_2D; + backBufferDesc.temporalLayers = uint32_t(m_swapChainImages.size()); + backBufferDesc.image.arrayLayers = 1; + backBufferDesc.image.mipLevels = 1; + backBufferDesc.image.format = rpsFormatFromVK(m_swapChainFormat.format); + backBufferDesc.image.width = m_width; + backBufferDesc.image.height = m_height; + backBufferDesc.image.sampleCount = 1; + + TestRpsMultithreading::UpdateRpsPipeline(frameIndex, completedFrameIndex, backBufferDesc, backBuffers); + } + +private: + VkPipeline m_geoPipeline = VK_NULL_HANDLE; + VkPipelineLayout m_pipelineLayout = VK_NULL_HANDLE; + + std::vector m_activeSecondaryCmdBufs; + std::vector m_activePrimaryCmdBufs; +}; + +TEST_CASE(TEST_APP_NAME) +{ + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#if defined(BREAK_AT_ALLOC_ID) + _CrtSetBreakAlloc(BREAK_AT_ALLOC_ID); +#endif + + TestVkMultithreading renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_output_resource.rpsl b/tests/gui/test_output_resource.rpsl new file mode 100644 index 0000000..e5c530e --- /dev/null +++ b/tests/gui/test_output_resource.rpsl @@ -0,0 +1,39 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +node DrawTriangle(rtv dst : SV_Target0); +node Blt(float3 tint, ps_srv src, rtv dst : SV_Target0, RpsViewport viewport : SV_Viewport); + +export void render_colored([readonly(present)] texture backBuffer, [readonly(ps)] out texture offscreenRT) +{ + ResourceDesc backbufferDesc = backBuffer.desc(); + uint32_t width = (uint32_t)backbufferDesc.Width / 2; + uint32_t height = (uint32_t)backbufferDesc.Height / 2; + RPS_FORMAT backbufferFormat = backbufferDesc.Format; + + clear(backBuffer, float4(0.1, 0.1, 0.1, 1.0)); + + // Create offscreen buffer and assign to the out parameter + offscreenRT = create_tex2d(backbufferFormat, width, height); + clear(offscreenRT, float4(0.1, 0.2, 0.4, 1.0)); + + DrawTriangle(offscreenRT); + + copy_texture(backBuffer, uint3(0, 0, 0), offscreenRT, uint3(0, 0, 0), uint3(width, height, 1)); +} + +export void render_tinted([readonly(present)] texture backBuffer, [readonly(ps)] texture offscreenBuffer) +{ + ResourceDesc backbufferDesc = backBuffer.desc(); + uint32_t width = (uint32_t)backbufferDesc.Width / 2; + uint32_t height = (uint32_t)backbufferDesc.Height / 2; + + Blt(float3(1, 1, 1), offscreenBuffer, backBuffer, viewport(0, 0, width, height)); + Blt(float3(1, 0, 0), offscreenBuffer, backBuffer, viewport(width, 0, width, height)); + Blt(float3(0, 1, 0), offscreenBuffer, backBuffer, viewport(0, height, width, height)); + Blt(float3(0, 0, 1), offscreenBuffer, backBuffer, viewport(width, height, width, height)); +} diff --git a/tests/gui/test_output_resource_d3d12.cpp b/tests/gui/test_output_resource_d3d12.cpp new file mode 100644 index 0000000..eb146ff --- /dev/null +++ b/tests/gui/test_output_resource_d3d12.cpp @@ -0,0 +1,227 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define RPS_D3D12_RUNTIME 1 + +#include "test_output_resource_shared.h" + +#include "utils/rps_test_win32.h" +#include "utils/rps_test_d3d12_renderer.h" + +using namespace DirectX; + +class TestD3D12OutputResource : public RpsTestD3D12Renderer, public TestRpsOutputResource +{ +protected: + virtual void OnInit(ID3D12GraphicsCommandList* pInitCmdList, + std::vector>& tempResources) override + { + LoadAssets(pInitCmdList, tempResources); + + TestRpsOutputResource::OnInit(); + + auto coloredTriangelEntry = rpsRenderGraphGetMainEntry(GetRpsRenderGraphColoredTriangle()); + + RpsResult result = + rpsProgramBindNode(coloredTriangelEntry, "DrawTriangle", &TestD3D12OutputResource::DrawTriangle, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(coloredTriangelEntry, "Blt", &TestD3D12OutputResource::DrawBlt, this); + REQUIRE(result == RPS_OK); + + auto tintedQuadEntry = rpsRenderGraphGetMainEntry(GetRpsRenderGraphTintedQuads()); + + result = rpsProgramBindNode(tintedQuadEntry, "Blt", &TestD3D12OutputResource::DrawBlt, this); + REQUIRE(result == RPS_OK); + } + + virtual void OnCleanUp() override + { + TestRpsOutputResource::OnCleanUp(); + + m_rootSignature = nullptr; + m_pipelineStateBlt = nullptr; + m_pipelineStateDrawTriangle = nullptr; + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + TestRpsOutputResource::OnUpdate(frameIndex, m_width, m_height); + UpdatePipeline(frameIndex, CalcGuaranteedCompletedFrameIndexForRps()); + } + + virtual void OnRender(uint32_t frameIndex) override + { + if (m_triangleActive) + { + REQUIRE(RPS_SUCCEEDED(ExecuteRenderGraph(frameIndex, GetRpsRenderGraphColoredTriangle()))); + } + + REQUIRE(RPS_SUCCEEDED(ExecuteRenderGraph(frameIndex, GetRpsRenderGraphTintedQuads()))); + } + +protected: + virtual void CreateRpsDevice(RpsDevice& rpsDeviceOut) override final + { + rpsDeviceOut = rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); }); + } + + void DrawTriangle(const RpsCmdCallbackContext* pContext) + { + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + BindDescriptorHeaps(pCmdList); + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + pCmdList->SetPipelineState(m_pipelineStateDrawTriangle.Get()); + + ConstantData cbData = { + {}, + float(m_width) / m_height, + float(std::chrono::duration_cast>(m_triangleAnimationTime).count())}; + + pCmdList->SetGraphicsRoot32BitConstants(0, sizeof(cbData) / sizeof(UINT), &cbData, 0); + pCmdList->IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + +private: + void DrawBlt(const RpsCmdCallbackContext* pContext, const XMFLOAT3& tint, D3D12_CPU_DESCRIPTOR_HANDLE srcImage) + { + auto pThis = static_cast(pContext->pCmdCallbackContext); + + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + D3D12_GPU_DESCRIPTOR_HANDLE srvTable = + AllocDynamicDescriptorsAndWrite(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, &srcImage, 1); + + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + pCmdList->SetPipelineState(m_pipelineStateBlt.Get()); + + BindDescriptorHeaps(pCmdList); + ConstantData cbData = { + tint, + float(m_width) / m_height, + (tint.x + tint.y + tint.z) > 2.5f ? 0 : float(RpsAfxCpuTimer::SecondsSinceEpoch().count())}; + pCmdList->SetGraphicsRoot32BitConstants(0, sizeof(cbData) / sizeof(UINT), &cbData, 0); + pCmdList->SetGraphicsRootDescriptorTable(1, srvTable); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + +private: + void LoadAssets(ID3D12GraphicsCommandList* pInitCmdList, std::vector>& tempResources) + { + // Create a root signature consisting of a descriptor table with a single CBV. + { + CD3DX12_DESCRIPTOR_RANGE ranges[1] = {}; + CD3DX12_ROOT_PARAMETER rootParameters[2] = {}; + + ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0); + rootParameters[0].InitAsConstants(5, 0, 0, D3D12_SHADER_VISIBILITY_VERTEX); + rootParameters[1].InitAsDescriptorTable(1, ranges, D3D12_SHADER_VISIBILITY_PIXEL); + + D3D12_STATIC_SAMPLER_DESC sampler = {}; + sampler.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + sampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + sampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + sampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + sampler.MipLODBias = 0; + sampler.MaxAnisotropy = 0; + sampler.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + sampler.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + sampler.MinLOD = 0.0f; + sampler.MaxLOD = D3D12_FLOAT32_MAX; + sampler.ShaderRegister = 0; + sampler.RegisterSpace = 0; + sampler.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc; + rootSignatureDesc.Init_1_0( + _countof(rootParameters), rootParameters, 1, &sampler, D3D12_ROOT_SIGNATURE_FLAG_NONE); + + ComPtr signature; + ComPtr error; + ThrowIfFailed(D3DX12SerializeVersionedRootSignature( + &rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1_0, &signature, &error)); + ThrowIfFailed(m_device->CreateRootSignature( + 0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature))); + } + + { + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.pRootSignature = m_rootSignature.Get(); + psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); + psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); + psoDesc.DepthStencilState.DepthEnable = FALSE; + psoDesc.DepthStencilState.StencilEnable = FALSE; + psoDesc.SampleMask = UINT_MAX; + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + psoDesc.NumRenderTargets = 1; + psoDesc.RTVFormats[0] = m_swapChain.GetFormat(); + psoDesc.SampleDesc.Count = 1; + + std::vector vsCode, psCode; + DxcCompile(c_Shader, L"VSTriangle", L"vs_6_0", L"", nullptr, 0, vsCode); + DxcCompile(c_Shader, L"PSTriangle", L"ps_6_0", L"", nullptr, 0, psCode); + psoDesc.VS = CD3DX12_SHADER_BYTECODE(vsCode.data(), vsCode.size()); + psoDesc.PS = CD3DX12_SHADER_BYTECODE(psCode.data(), psCode.size()); + + ThrowIfFailed(m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_pipelineStateDrawTriangle))); + + DxcCompile(c_Shader, L"VSBlt", L"vs_6_0", L"", nullptr, 0, vsCode); + DxcCompile(c_Shader, L"PSBlt", L"ps_6_0", L"", nullptr, 0, psCode); + psoDesc.VS = CD3DX12_SHADER_BYTECODE(vsCode.data(), vsCode.size()); + psoDesc.PS = CD3DX12_SHADER_BYTECODE(psCode.data(), psCode.size()); + + ThrowIfFailed(m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_pipelineStateBlt))); + } + } + + void UpdatePipeline(uint64_t frameIndex, uint64_t completedFrameIndex) + { + RpsRuntimeResource backBuffers[DXGI_MAX_SWAP_CHAIN_BUFFERS]; + for (uint32_t i = 0; i < m_backBufferCount; i++) + { + backBuffers[i] = rpsD3D12ResourceToHandle(m_backBuffers[i].Get()); + } + + RpsResourceDesc backBufferDesc = {}; + backBufferDesc.type = RPS_RESOURCE_TYPE_IMAGE_2D; + backBufferDesc.temporalLayers = uint32_t(m_backBuffers.size()); + backBufferDesc.image.arrayLayers = 1; + backBufferDesc.image.mipLevels = 1; + backBufferDesc.image.format = rpsFormatFromDXGI(m_swapChain.GetFormat()); + backBufferDesc.image.width = m_width; + backBufferDesc.image.height = m_height; + backBufferDesc.image.sampleCount = 1; + + TestRpsOutputResource::UpdateRpsPipeline(frameIndex, completedFrameIndex, backBufferDesc, backBuffers); + } + +private: + ComPtr m_rootSignature; + ComPtr m_pipelineStateDrawTriangle; + ComPtr m_pipelineStateBlt; +}; + +TEST_CASE(TEST_APP_NAME) +{ +#if _DEBUG + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#endif + + TestD3D12OutputResource renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_output_resource_shared.h b/tests/gui/test_output_resource_shared.h new file mode 100644 index 0000000..34a0b5a --- /dev/null +++ b/tests/gui/test_output_resource_shared.h @@ -0,0 +1,237 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define CATCH_CONFIG_MAIN +#include +#include +#include + +#include "rps/rps.h" + +#include "utils/rps_test_common.h" + +RPS_DECLARE_RPSL_ENTRY(test_output_resource, render_colored); +RPS_DECLARE_RPSL_ENTRY(test_output_resource, render_tinted); + +static const char c_Shader[] = R"( +struct CBData +{ + float3 tint; + float aspectRatio; + float time; +}; + +#if VULKAN +[[vk::push_constant]] CBData cb; +#else +ConstantBuffer cb : register(b0); +#endif + +struct PSInput +{ + float4 position : SV_POSITION; + float4 color : COLOR; + float2 uv : TEXCOORD; +}; + +[[vk::binding(0, 0)]] +Texture2D g_texture : register(t0); + +[[vk::binding(1, 0)]] +SamplerState g_sampler : register(s0); + +PSInput VSTriangle(uint vId : SV_VertexID) +{ + PSInput result; + + const float2 verts[] = { { 0.0f, 2.0f }, { 1.732f, -1.0f }, { -1.732f, -1.0f } }; + const float3 colors[] = { { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 } }; + + float2x2 rotMat = { {cos(cb.time), -sin(cb.time)}, {sin(cb.time), cos(cb.time)} }; + float2 vertPos = mul(verts[vId], rotMat); + vertPos = vertPos * float2(0.2f, 0.2f * cb.aspectRatio); + + result.position = float4(vertPos, 0, 1); + result.color = float4(colors[vId], 1.0f); + result.uv = float2(0, 0); + + return result; +} + +float4 PSTriangle(PSInput input) : SV_Target0 +{ + return input.color; +} + +PSInput VSBlt(uint vId : SV_VertexID) +{ + PSInput result; + + float2 uv = float2(float((vId & 1) << 1), float(vId & 2)); + + result.position = float4(uv.x * 2.0f - 1.0f, uv.y * -2.0f + 1.0f, 0, 1); + result.color = float4(cb.tint, 1); + result.uv = uv + float2(sin(cb.time) * 0.2f, 0); + + return result; +} + +float4 PSBlt(PSInput input) : SV_Target0 +{ + return g_texture.Sample(g_sampler, input.uv) * input.color; +} +)"; + +#define TEST_APP_NAME_RAW "TestOutputResource" + +using namespace DirectX; + +class TestRpsOutputResource +{ +public: + struct ConstantData + { + XMFLOAT3 tint; + float aspectRatio; + float time; + }; + + TestRpsOutputResource() + { + } + +protected: + RpsRenderGraph GetRpsRenderGraphColoredTriangle() const + { + return m_rpsRenderGraphColoredTriangle; + } + + RpsRenderGraph GetRpsRenderGraphTintedQuads() const + { + return m_rpsRenderGraphTintedQuads; + } + + void OnInit() + { + CreateRpsDevice(m_rpsDevice); + LoadRpsPipeline(); + } + + virtual void UpdateRpsPipeline(uint64_t frameIndex, + uint64_t completedFrameIndex, + const RpsResourceDesc& backBufferDesc, + RpsRuntimeResource* pBackBuffers) + { + RpsRuntimeResourceInfo offscreenTextureInfo = {}; + + if (m_rpsRenderGraphColoredTriangle != RPS_NULL_HANDLE) + { + const RpsRuntimeResource* argResources[] = {pBackBuffers}; + RpsConstant argData[] = {&backBufferDesc, nullptr}; + + RpsRenderGraphUpdateInfo updateInfo = {}; + updateInfo.frameIndex = frameIndex; + updateInfo.gpuCompletedFrameIndex = completedFrameIndex; + updateInfo.numArgs = uint32_t(RPS_TEST_COUNTOF(argData)); + updateInfo.ppArgs = argData; + updateInfo.ppArgResources = argResources; + + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_RUNTIME_DEBUG_NAMES; + if (completedFrameIndex == RPS_GPU_COMPLETED_FRAME_INDEX_NONE) + { + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_ALL; + } + + if (m_triangleActive) + { + REQUIRE_RPS_OK(rpsRenderGraphUpdate(m_rpsRenderGraphColoredTriangle, &updateInfo)); + } + + constexpr uint32_t OffscreenTextureParamId = 1; + + REQUIRE_RPS_OK(rpsRenderGraphGetOutputParameterResourceInfos( + m_rpsRenderGraphColoredTriangle, OffscreenTextureParamId, 0, 1, &offscreenTextureInfo)); + } + + if (m_rpsRenderGraphTintedQuads != RPS_NULL_HANDLE) + { + const RpsRuntimeResource* argResources[] = {pBackBuffers, &offscreenTextureInfo.hResource}; + RpsConstant argData[] = {&backBufferDesc, &offscreenTextureInfo.resourceDesc}; + + RpsRenderGraphUpdateInfo updateInfo = {}; + updateInfo.frameIndex = frameIndex; + updateInfo.gpuCompletedFrameIndex = completedFrameIndex; + updateInfo.numArgs = uint32_t(RPS_TEST_COUNTOF(argData)); + updateInfo.ppArgs = argData; + updateInfo.ppArgResources = argResources; + + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_RUNTIME_DEBUG_NAMES; + if (completedFrameIndex == RPS_GPU_COMPLETED_FRAME_INDEX_NONE) + { + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_ALL; + } + + REQUIRE_RPS_OK(rpsRenderGraphUpdate(m_rpsRenderGraphTintedQuads, &updateInfo)); + } + } + + void OnCleanUp() + { + rpsRenderGraphDestroy(m_rpsRenderGraphColoredTriangle); + rpsRenderGraphDestroy(m_rpsRenderGraphTintedQuads); + rpsTestUtilDestroyDevice(m_rpsDevice); + } + + void OnUpdate(uint32_t frameIndex, uint32_t width, uint32_t height) + { + using namespace std::chrono; + + const auto currTime = RpsAfxCpuTimer::Now(); + const auto currTimeInMilli = duration_cast(currTime.time_since_epoch()); + + // Pause triangle animation every half seconds. + m_triangleActive = (frameIndex < 16) || (((currTimeInMilli.count() / 500) & 1) == 0); + + if (m_triangleActive) + { + m_triangleAnimationTime += (currTime.time_since_epoch() - m_lastUpdateTime.time_since_epoch()); + } + + m_lastUpdateTime = currTime; + } + +protected: + virtual void CreateRpsDevice(RpsDevice& rpsDeviceOut) = 0; + +private: + void LoadRpsPipeline() + { + RpsRenderGraphCreateInfo renderGraphCreateInfo = {}; + renderGraphCreateInfo.mainEntryCreateInfo.hRpslEntryPoint = + rpsTestLoadRpslEntry(test_output_resource, render_colored); + + RpsResult result = rpsRenderGraphCreate(m_rpsDevice, &renderGraphCreateInfo, &m_rpsRenderGraphColoredTriangle); + REQUIRE(result == RPS_OK); + + renderGraphCreateInfo.mainEntryCreateInfo.hRpslEntryPoint = + rpsTestLoadRpslEntry(test_output_resource, render_tinted); + + result = rpsRenderGraphCreate(m_rpsDevice, &renderGraphCreateInfo, &m_rpsRenderGraphTintedQuads); + REQUIRE(result == RPS_OK); + } + +protected: + bool m_triangleActive = true; + + std::chrono::steady_clock::duration m_triangleAnimationTime = {}; + RpsAfxCpuTimer::time_point m_lastUpdateTime; + +private: + RpsDevice m_rpsDevice = RPS_NULL_HANDLE; + RpsRenderGraph m_rpsRenderGraphColoredTriangle = {}; + RpsRenderGraph m_rpsRenderGraphTintedQuads = {}; +}; diff --git a/tests/gui/test_output_resource_vk.cpp b/tests/gui/test_output_resource_vk.cpp new file mode 100644 index 0000000..317c90c --- /dev/null +++ b/tests/gui/test_output_resource_vk.cpp @@ -0,0 +1,426 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifdef _WIN32 +#define VK_USE_PLATFORM_WIN32_KHR +#else +#error "TODO" +#endif + +#define RPS_VK_RUNTIME 1 + +#include "test_output_resource_shared.h" + +#include "utils/rps_test_common.h" +#include "utils/rps_test_win32.h" +#include "utils/rps_test_vk_renderer.h" + +#include + +class TestVkOutputResource : public RpsTestVulkanRenderer, public TestRpsOutputResource +{ +protected: + virtual void OnInit(VkCommandBuffer initCmdBuf, InitTempResources& tempResources) override + { + LoadAssets(initCmdBuf, tempResources); + + TestRpsOutputResource::OnInit(); + + auto coloredTriangelEntry = rpsRenderGraphGetMainEntry(GetRpsRenderGraphColoredTriangle()); + + RpsResult result = + rpsProgramBindNode(coloredTriangelEntry, "DrawTriangle", &TestVkOutputResource::DrawTriangle, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(coloredTriangelEntry, "Blt", &TestVkOutputResource::DrawBlt, this); + REQUIRE(result == RPS_OK); + + auto tintedQuadEntry = rpsRenderGraphGetMainEntry(GetRpsRenderGraphTintedQuads()); + + result = rpsProgramBindNode(tintedQuadEntry, "Blt", &TestVkOutputResource::DrawBlt, this); + REQUIRE(result == RPS_OK); + } + + virtual void OnCleanUp() override + { + TestRpsOutputResource::OnCleanUp(); + + vkDestroyPipeline(m_device, m_psoBlt, nullptr); + vkDestroyPipeline(m_device, m_psoDrawTriangle, nullptr); + vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr); + vkDestroySampler(m_device, m_defaultSampler, nullptr); + vkDestroyDescriptorSetLayout(m_device, m_sharedDescriptorSetLayout, nullptr); + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + TestRpsOutputResource::OnUpdate(frameIndex, m_width, m_height); + UpdatePipeline(frameIndex, CalcGuaranteedCompletedFrameIndexForRps()); + } + + virtual void OnRender(uint32_t frameIndex) override + { + if (m_triangleActive) + { + REQUIRE(RPS_SUCCEEDED(ExecuteRenderGraph(frameIndex, GetRpsRenderGraphColoredTriangle(), true, false))); + } + + REQUIRE(RPS_SUCCEEDED(ExecuteRenderGraph(frameIndex, GetRpsRenderGraphTintedQuads(), !m_triangleActive, true))); + } + +protected: + virtual void CreateRpsDevice(RpsDevice& rpsDeviceOut) override final + { + rpsDeviceOut = rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); }); + } + + void DrawTriangle(const RpsCmdCallbackContext* pContext) + { + if (m_psoDrawTriangle == RPS_NULL_HANDLE) + { + VkRenderPass rp; + RpsResult result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePipeline(c_Shader, rp, &m_psoDrawTriangle, L"VSTriangle", L"PSTriangle"); + } + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + ConstantData cbData = { + {}, + float(m_width) / m_height, + float(std::chrono::duration_cast>(m_triangleAnimationTime).count())}; + + vkCmdPushConstants(cmdBuf, m_pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(cbData), &cbData); + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_psoDrawTriangle); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + + void DrawBlt(const RpsCmdCallbackContext* pContext, const XMFLOAT3& tint, VkImageView srcImage) + { + if (m_psoBlt == RPS_NULL_HANDLE) + { + VkRenderPass rp; + RpsResult result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePipeline(c_Shader, rp, &m_psoBlt, L"VSBlt", L"PSBlt"); + } + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + ConstantData cbData = { + tint, + float(m_width) / m_height, + (tint.x + tint.y + tint.z) > 2.5f ? 0 : float(RpsAfxCpuTimer::SecondsSinceEpoch().count())}; + + vkCmdPushConstants(cmdBuf, m_pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(cbData), &cbData); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_sharedDescriptorSetLayout, 1, &ds)); + + VkWriteDescriptorSet writeDescriptorSet[1] = {}; + VkDescriptorImageInfo imageInfo = {VK_NULL_HANDLE, srcImage, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + AppendWriteDescriptorSetImages(&writeDescriptorSet[0], ds, 0, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &imageInfo); + + vkUpdateDescriptorSets(m_device, _countof(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + + vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &ds, 0, nullptr); + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_psoBlt); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + +private: + void LoadAssets(VkCommandBuffer initCmdBuf, InitTempResources& tempResources) + { + OnPostResize(); + + VkSamplerCreateInfo sampCI = {}; + sampCI.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampCI.magFilter = VK_FILTER_LINEAR; + sampCI.minFilter = VK_FILTER_LINEAR; + sampCI.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + sampCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampCI.mipLodBias = 0.0f; + sampCI.compareOp = VK_COMPARE_OP_NEVER; + sampCI.minLod = 0.0f; + sampCI.maxLod = FLT_MAX; + sampCI.maxAnisotropy = 1.0; + sampCI.anisotropyEnable = VK_FALSE; + sampCI.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + ThrowIfFailedVK(vkCreateSampler(m_device, &sampCI, nullptr, &m_defaultSampler)); + + VkDescriptorSetLayoutBinding sharedLayoutBindings[2] = {}; + sharedLayoutBindings[0].binding = 0; + sharedLayoutBindings[0].descriptorCount = 1; + sharedLayoutBindings[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + sharedLayoutBindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[1].binding = 1; + sharedLayoutBindings[1].descriptorCount = 1; + sharedLayoutBindings[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + sharedLayoutBindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[1].pImmutableSamplers = &m_defaultSampler; + + VkDescriptorSetLayoutCreateInfo setLayoutCI = {}; + setLayoutCI.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + setLayoutCI.pBindings = sharedLayoutBindings; + setLayoutCI.bindingCount = _countof(sharedLayoutBindings); + + ThrowIfFailedVK(vkCreateDescriptorSetLayout(m_device, &setLayoutCI, nullptr, &m_sharedDescriptorSetLayout)); + + VkPushConstantRange pushConstRanges[1] = {}; + pushConstRanges[0].offset = 0; + pushConstRanges[0].size = sizeof(ConstantData); + pushConstRanges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + + VkPipelineLayoutCreateInfo plCI = {}; + plCI.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + plCI.setLayoutCount = 1; + plCI.pSetLayouts = &m_sharedDescriptorSetLayout; + plCI.pushConstantRangeCount = 1; + plCI.pPushConstantRanges = pushConstRanges; + + ThrowIfFailedVK(vkCreatePipelineLayout(m_device, &plCI, nullptr, &m_pipelineLayout)); + } + + void CreatePipeline( + const char* pShaderCode, VkRenderPass renderPass, VkPipeline* pPipeline, LPCWSTR vsEntry, LPCWSTR psEntry) + { + VkPipelineVertexInputStateCreateInfo vi = {}; + vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + + // input assembly state + // + VkPipelineInputAssemblyStateCreateInfo ia; + ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + ia.pNext = NULL; + ia.flags = 0; + ia.primitiveRestartEnable = VK_FALSE; + ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + // rasterizer state + VkPipelineRasterizationStateCreateInfo rs; + rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rs.pNext = NULL; + rs.flags = 0; + rs.polygonMode = VK_POLYGON_MODE_FILL; + rs.cullMode = VK_CULL_MODE_NONE; + rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rs.depthClampEnable = VK_FALSE; + rs.rasterizerDiscardEnable = VK_FALSE; + rs.depthBiasEnable = VK_FALSE; + rs.depthBiasConstantFactor = 0; + rs.depthBiasClamp = 0; + rs.depthBiasSlopeFactor = 0; + rs.lineWidth = 1.0f; + + VkPipelineColorBlendAttachmentState bs[1] = {}; + bs[0].blendEnable = VK_FALSE; + bs[0].srcColorBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].colorBlendOp = VK_BLEND_OP_ADD; + bs[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].alphaBlendOp = VK_BLEND_OP_ADD; + bs[0].colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + // Color blend state + VkPipelineColorBlendStateCreateInfo cb; + cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + cb.flags = 0; + cb.pNext = NULL; + cb.attachmentCount = 1; + cb.pAttachments = bs; + cb.logicOpEnable = VK_FALSE; + cb.logicOp = VK_LOGIC_OP_NO_OP; + cb.blendConstants[0] = 1.0f; + cb.blendConstants[1] = 1.0f; + cb.blendConstants[2] = 1.0f; + cb.blendConstants[3] = 1.0f; + + std::vector dynamicStateEnables = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo dynamicState = {}; + dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamicState.pNext = NULL; + dynamicState.pDynamicStates = dynamicStateEnables.data(); + dynamicState.dynamicStateCount = (uint32_t)dynamicStateEnables.size(); + + // view port state + + VkPipelineViewportStateCreateInfo vp = {}; + vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + vp.pNext = NULL; + vp.flags = 0; + vp.viewportCount = 1; + vp.scissorCount = 1; + vp.pScissors = NULL; + vp.pViewports = NULL; + + // depth stencil state + + VkPipelineDepthStencilStateCreateInfo ds; + ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + ds.pNext = NULL; + ds.flags = 0; + ds.depthTestEnable = VK_FALSE; + ds.depthWriteEnable = VK_FALSE; + ds.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + ds.depthBoundsTestEnable = VK_FALSE; + ds.stencilTestEnable = VK_FALSE; + ds.back.failOp = VK_STENCIL_OP_KEEP; + ds.back.passOp = VK_STENCIL_OP_KEEP; + ds.back.compareOp = VK_COMPARE_OP_ALWAYS; + ds.back.compareMask = 0; + ds.back.reference = 0; + ds.back.depthFailOp = VK_STENCIL_OP_KEEP; + ds.back.writeMask = 0; + ds.minDepthBounds = 0; + ds.maxDepthBounds = 0; + ds.stencilTestEnable = VK_FALSE; + ds.front = ds.back; + + // multi sample state + + VkPipelineMultisampleStateCreateInfo ms; + ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + ms.pNext = NULL; + ms.flags = 0; + ms.pSampleMask = NULL; + ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + ms.sampleShadingEnable = VK_FALSE; + ms.alphaToCoverageEnable = VK_FALSE; + ms.alphaToOneEnable = VK_FALSE; + ms.minSampleShading = 0.0; + + VkShaderModule vsModule, psModule; + std::vector vsCode, psCode; + + DxcDefine defs[] = {{L"VULKAN", L"1"}}; + + DxcCompileToSpirv(pShaderCode, vsEntry, L"vs_6_0", L"", defs, RPS_TEST_COUNTOF(defs), vsCode); + DxcCompileToSpirv(pShaderCode, psEntry, L"ps_6_0", L"", defs, RPS_TEST_COUNTOF(defs), psCode); + + VkShaderModuleCreateInfo smCI = {}; + smCI.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + + smCI.pCode = reinterpret_cast(vsCode.data()); + smCI.codeSize = vsCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &vsModule)); + + smCI.pCode = reinterpret_cast(psCode.data()); + smCI.codeSize = psCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &psModule)); + + char vsName[128]; + char psName[128]; + sprintf_s(vsName, "%S", vsEntry); + sprintf_s(psName, "%S", psEntry); + + VkPipelineShaderStageCreateInfo shaderStages[2] = {}; + shaderStages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[0].module = vsModule; + shaderStages[0].pName = vsName; + shaderStages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + shaderStages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[1].module = psModule; + shaderStages[1].pName = psName; + shaderStages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + + VkGraphicsPipelineCreateInfo psoCI = {}; + psoCI.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + psoCI.pNext = NULL; + psoCI.layout = m_pipelineLayout; + psoCI.basePipelineHandle = VK_NULL_HANDLE; + psoCI.basePipelineIndex = 0; + psoCI.flags = 0; + psoCI.pVertexInputState = &vi; + psoCI.pInputAssemblyState = &ia; + psoCI.pRasterizationState = &rs; + psoCI.pColorBlendState = &cb; + psoCI.pTessellationState = NULL; + psoCI.pMultisampleState = &ms; + psoCI.pDynamicState = &dynamicState; + psoCI.pViewportState = &vp; + psoCI.pDepthStencilState = &ds; + psoCI.pStages = shaderStages; + psoCI.stageCount = _countof(shaderStages); + psoCI.renderPass = renderPass; + psoCI.subpass = 0; + + ThrowIfFailedVK(vkCreateGraphicsPipelines(m_device, VK_NULL_HANDLE, 1, &psoCI, nullptr, pPipeline)); + + vkDestroyShaderModule(m_device, vsModule, nullptr); + vkDestroyShaderModule(m_device, psModule, nullptr); + } + + void UpdatePipeline(uint64_t frameIndex, uint64_t completedFrameIndex) + { + RpsRuntimeResource backBuffers[16]; + + if (m_swapChainImages.size() > RPS_TEST_COUNTOF(backBuffers)) + throw; + + for (uint32_t i = 0; i < m_swapChainImages.size(); i++) + { + backBuffers[i] = rpsVKImageToHandle(m_swapChainImages[i].image); + } + + RpsResourceDesc backBufferDesc = {}; + backBufferDesc.type = RPS_RESOURCE_TYPE_IMAGE_2D; + backBufferDesc.temporalLayers = uint32_t(m_swapChainImages.size()); + backBufferDesc.image.arrayLayers = 1; + backBufferDesc.image.mipLevels = 1; + backBufferDesc.image.format = rpsFormatFromVK(m_swapChainFormat.format); + backBufferDesc.image.width = m_width; + backBufferDesc.image.height = m_height; + backBufferDesc.image.sampleCount = 1; + + TestRpsOutputResource::UpdateRpsPipeline(frameIndex, completedFrameIndex, backBufferDesc, backBuffers); + } + +private: + VkPipeline m_psoDrawTriangle = VK_NULL_HANDLE; + VkPipeline m_psoBlt = VK_NULL_HANDLE; + + VkPipelineLayout m_pipelineLayout = VK_NULL_HANDLE; + + VkSampler m_defaultSampler = VK_NULL_HANDLE; + + VkDescriptorSetLayout m_sharedDescriptorSetLayout = VK_NULL_HANDLE; + + VkImage m_checkerboardTexture = RPS_NULL_HANDLE; + VkImageView m_checkerboardTextureView = RPS_NULL_HANDLE; + VkBuffer m_vertexBuffer = RPS_NULL_HANDLE; + + VkDeviceSize m_triangleVbOffset = 0; + VkDeviceSize m_quadVbOffset = 0; +}; + +TEST_CASE(TEST_APP_NAME) +{ + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#if defined(BREAK_AT_ALLOC_ID) + _CrtSetBreakAlloc(BREAK_AT_ALLOC_ID); +#endif + + TestVkOutputResource renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_render_to_texture.rpsl b/tests/gui/test_render_to_texture.rpsl new file mode 100644 index 0000000..5bc192d --- /dev/null +++ b/tests/gui/test_render_to_texture.rpsl @@ -0,0 +1,39 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +node Geometry(rtv offscreenRenderTarget : SV_Target0, uint useMSAA); +graphics node GeometryMSAA(rtv offscreenRTMSAA : SV_Target0, uint useMSAA, [writeonly(resolve)] texture offscreenRT : SV_ResolveTarget0); +graphics node Quads(rtv backBuffer : SV_Target0, srv offscreenRenderTarget); + +export void render_to_texture([readonly(present)] texture backbuffer, bool useMultisampling) +{ + ResourceDesc backbufferDesc = backbuffer.desc(); + uint32_t width = (uint32_t)backbufferDesc.Width; + uint32_t height = (uint32_t)backbufferDesc.Height; + RPS_FORMAT backbufferFormat = backbufferDesc.Format; + + // create the resources we need for the offscreen rendering, as well as a view into it. + + texture offscreenRT = create_tex2d(backbufferFormat, width, height); + + // clear and then render geometry to offscreen target + if (useMultisampling) + { + texture offscreenRTMSAA = create_tex2d(backbufferFormat, width, height, 1, 1, 1, 4); + + clear(offscreenRTMSAA, float4(0.0, 0.4, 0.2, 1.0)); + GeometryMSAA(offscreenRTMSAA, useMultisampling, offscreenRT); + } + else + { + clear(offscreenRT, float4(0.0, 0.2, 0.4, 1.0)); + Geometry(offscreenRT, false); + } + + // render to the back buffer. + Quads(backbuffer, offscreenRT); +} diff --git a/tests/gui/test_render_to_texture_d3d11.cpp b/tests/gui/test_render_to_texture_d3d11.cpp new file mode 100644 index 0000000..5825c73 --- /dev/null +++ b/tests/gui/test_render_to_texture_d3d11.cpp @@ -0,0 +1,287 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define RPS_D3D11_RUNTIME 1 + +#include "test_render_to_texture_shared.h" + +#include "utils/rps_test_win32.h" +#include "utils/rps_test_d3d11_renderer.h" + +using namespace DirectX; + +class TestD3D11RpsRenderToTexture : public RpsTestD3D11Renderer, public TestRpsRenderToTexture +{ + struct alignas(16) SceneConstantBufferD3D11 : public TestRpsRenderToTexture::SceneConstantBuffer + { + }; + +protected: + virtual void OnInit() override + { + LoadAssets(); + + TestRpsRenderToTexture::OnInit(); + + RpsResult result = rpsProgramBindNode( + rpsRenderGraphGetMainEntry(GetRpsRenderGraph()), "Quads", &TestD3D11RpsRenderToTexture::DrawQuads, this); + REQUIRE(result == RPS_OK); + } + + virtual void OnPostResize() override + { + } + + virtual void OnCleanUp() override + { + m_pSampler = nullptr; + m_inputLayout = nullptr; + m_VS = nullptr; + m_PS = nullptr; + m_vertexBuffer = nullptr; + m_triangleCB = nullptr; + m_quadCB = nullptr; + m_checkerboardTextureView = nullptr; + + TestRpsRenderToTexture::OnCleanUp(); + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + TestRpsRenderToTexture::OnUpdate(frameIndex, m_width, m_height); + UpdatePipeline(frameIndex, CalcGuaranteedCompletedFrameIndexForRps()); + } + + virtual void OnRender(uint32_t frameIndex) override + { + D3D11_MAPPED_SUBRESOURCE mappedCb; + ThrowIfFailed(m_immDC->Map(m_triangleCB.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedCb)); + + memcpy(mappedCb.pData, &m_triangleAnimationData, sizeof(m_triangleAnimationData)); + + m_immDC->Unmap(m_triangleCB.Get(), 0); + + REQUIRE(RPS_SUCCEEDED(ExecuteRenderGraph(frameIndex, GetRpsRenderGraph()))); + } + +protected: + virtual void CreateRpsDevice(RpsDevice& rpsDeviceOut) override final + { + rpsDeviceOut = rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); }); + } + + virtual void DrawTriangle(const RpsCmdCallbackContext* pContext, bool isMSAA) override final + { + ID3D11DeviceContext* pCmdList = rpsD3D11DeviceContextFromHandle(pContext->hCommandBuffer); + + pCmdList->VSSetShader(m_VS.Get(), nullptr, 0); + pCmdList->PSSetShader(m_PS.Get(), nullptr, 0); + + pCmdList->VSSetConstantBuffers(0, 1, m_triangleCB.GetAddressOf()); + pCmdList->PSSetShaderResources(0, 1, m_checkerboardTextureView.GetAddressOf()); + pCmdList->PSSetSamplers(0, 1, m_pSampler.GetAddressOf()); + + pCmdList->IASetInputLayout(m_inputLayout.Get()); + pCmdList->IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + UINT vbStride = sizeof(Vertex); + UINT vbOffset = 0; + pCmdList->IASetVertexBuffers(0, 1, m_vertexBuffer.GetAddressOf(), &vbStride, &vbOffset); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + +private: + void DrawQuads(const RpsCmdCallbackContext* pContext, + rps::UnusedArg backBuffer, + ID3D11ShaderResourceView* offscreenRTSrv) + { + ID3D11DeviceContext* pCmdList = rpsD3D11DeviceContextFromHandle(pContext->hCommandBuffer); + + pCmdList->VSSetShader(m_VS.Get(), nullptr, 0); + + pCmdList->VSSetConstantBuffers(0, 1, m_quadCB.GetAddressOf()); + pCmdList->PSSetShaderResources(0, 1, &offscreenRTSrv); + pCmdList->PSSetSamplers(0, 1, m_pSampler.GetAddressOf()); + + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + UINT vbStride = sizeof(Vertex); + UINT vbOffset = sizeof(Vertex) * 3; + pCmdList->IASetVertexBuffers(0, 1, m_vertexBuffer.GetAddressOf(), &vbStride, &vbOffset); + pCmdList->DrawInstanced(24, 1, 0, 0); + } + +private: + void LoadAssets() + { + { + D3D11_SAMPLER_DESC sampler = {}; + + sampler.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + sampler.AddressU = D3D11_TEXTURE_ADDRESS_WRAP; + sampler.AddressV = D3D11_TEXTURE_ADDRESS_WRAP; + sampler.AddressW = D3D11_TEXTURE_ADDRESS_WRAP; + sampler.MipLODBias = 0; + sampler.MaxAnisotropy = 0; + sampler.ComparisonFunc = D3D11_COMPARISON_NEVER; + sampler.MinLOD = 0.0f; + sampler.MaxLOD = D3D11_FLOAT32_MAX; + + m_device->CreateSamplerState(&sampler, &m_pSampler); + } + + // Create the pipeline state, which includes compiling and loading shaders. + { + D3D11_INPUT_ELEMENT_DESC inputElementDescs[] = { + {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0}, + {"COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0}, + {"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0}}; + + ComPtr dxbc, err; + + ThrowIfFailedEx( + D3DCompile( + c_Shader, sizeof(c_Shader), nullptr, nullptr, nullptr, "VSMain", "vs_5_0", 0, 0, &dxbc, &err), + err); + ThrowIfFailed( + m_device->CreateVertexShader(dxbc->GetBufferPointer(), dxbc->GetBufferSize(), nullptr, &m_VS)); + + ThrowIfFailed(m_device->CreateInputLayout(inputElementDescs, + _countof(inputElementDescs), + dxbc->GetBufferPointer(), + dxbc->GetBufferSize(), + &m_inputLayout)); + + ThrowIfFailedEx( + D3DCompile( + c_Shader, sizeof(c_Shader), nullptr, nullptr, nullptr, "PSMain", "ps_5_0", 0, 0, &dxbc, &err), + err); + ThrowIfFailed(m_device->CreatePixelShader(dxbc->GetBufferPointer(), dxbc->GetBufferSize(), nullptr, &m_PS)); + } + + // Create constant buffers + { + CD3D11_BUFFER_DESC cbufDesc(sizeof(SceneConstantBufferD3D11), + D3D11_BIND_CONSTANT_BUFFER, + D3D11_USAGE_DYNAMIC, + D3D11_CPU_ACCESS_WRITE); + + ThrowIfFailed(m_device->CreateBuffer(&cbufDesc, nullptr, &m_triangleCB)); + + cbufDesc.Usage = D3D11_USAGE_IMMUTABLE; + cbufDesc.CPUAccessFlags = 0; + + D3D11_SUBRESOURCE_DATA quadCBInitData = {}; + quadCBInitData.pSysMem = &m_quadConstantData; + + ThrowIfFailed(m_device->CreateBuffer(&cbufDesc, &quadCBInitData, &m_quadCB)); + } + + // Create vertex buffers + { + // Define the geometry for a triangle. + Vertex triangleVertices[] = { + // triangle + {{0.0f, 0.25f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.5f, 0.0f}}, + {{0.25f, -0.25f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{-0.25f, -0.25f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + + // quad 0 + {{-1.0f, 1.0, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 0.0f}}, + {{0.f, 0.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f}}, + {{-1.f, 0.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + {{-1.0f, 1.0, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 0.0f}}, + {{0.f, 1.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 0.0f}}, + {{0.f, 0.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f}}, + + // quad 1 + {{-1.0f, 0.0, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + {{0.f, -1.f, 0.0f}, {0.f, 0.0f, 1.0f, 1.0f}, {1.0f, 0.0f}}, + {{-1.f, -1.f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 0.0f}}, + {{-1.0f, 0.0, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + {{0.f, 0.f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {1.0f, 1.0f}}, + {{0.f, -1.f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {1.0f, 0.0f}}, + + // quad 2 + {{0.0f, 1.0, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 0.0f}}, + {{1.f, 0.f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.0f, 1.0f}}, + {{0.f, 0.f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{0.0f, 1.0, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 0.0f}}, + {{1.f, 1.f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.0f, 0.0f}}, + {{1.f, 0.f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.0f, 1.0f}}, + + // quad 3 + {{0.0f, 0.0, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{1.f, -1.f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 0.0f}}, + {{0.f, -1.f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 0.0f}}, + {{0.0f, 0.0, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{1.f, 0.f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 1.0f}}, + {{1.f, -1.f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 0.0f}}, + }; + + const UINT vertexBufferSize = sizeof(triangleVertices); + + auto vbDesc = CD3D11_BUFFER_DESC(vertexBufferSize, D3D11_BIND_VERTEX_BUFFER, D3D11_USAGE_IMMUTABLE); + + D3D11_SUBRESOURCE_DATA vbInitData = {}; + vbInitData.pSysMem = triangleVertices; + + ThrowIfFailed(m_device->CreateBuffer(&vbDesc, &vbInitData, &m_vertexBuffer)); + } + + // Create checkerboard texture + ComPtr checkerboardTexture; + float tintColor[] = {1.0f, 1.0f, 1.0f, 1.0f}; + CreateStaticCheckerboardTexture(checkerboardTexture, 256, 256, tintColor); + + ThrowIfFailed(m_device->CreateShaderResourceView(checkerboardTexture.Get(), NULL, &m_checkerboardTextureView)); + } + + void UpdatePipeline(uint64_t frameIndex, uint64_t completedFrameIndex) + { + RpsRuntimeResource backBuffers[1]; + backBuffers[0] = rpsD3D11ResourceToHandle(m_backBuffer.Get()); + + RpsResourceDesc backBufferDesc = {}; + backBufferDesc.type = RPS_RESOURCE_TYPE_IMAGE_2D; + backBufferDesc.temporalLayers = 1; + backBufferDesc.image.arrayLayers = 1; + backBufferDesc.image.mipLevels = 1; + backBufferDesc.image.format = rpsFormatFromDXGI(m_swapChain.GetFormat()); + backBufferDesc.image.width = m_width; + backBufferDesc.image.height = m_height; + backBufferDesc.image.sampleCount = 1; + + TestRpsRenderToTexture::UpdateRpsPipeline(frameIndex, completedFrameIndex, backBufferDesc, backBuffers); + } + +private: + ComPtr m_pSampler; + ComPtr m_inputLayout; + ComPtr m_VS; + ComPtr m_PS; + ComPtr m_vertexBuffer; + ComPtr m_triangleCB; + ComPtr m_quadCB; + ComPtr m_checkerboardTextureView; +}; + +TEST_CASE(TEST_APP_NAME) +{ +#if _DEBUG + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#endif + + TestD3D11RpsRenderToTexture renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_render_to_texture_d3d12.cpp b/tests/gui/test_render_to_texture_d3d12.cpp new file mode 100644 index 0000000..2c49c75 --- /dev/null +++ b/tests/gui/test_render_to_texture_d3d12.cpp @@ -0,0 +1,449 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define RPS_D3D12_RUNTIME 1 + +#include "test_render_to_texture_shared.h" + +#include "utils/rps_test_win32.h" +#include "utils/rps_test_d3d12_renderer.h" + +using namespace DirectX; + +class TestD3D12RpsRenderToTexture : public RpsTestD3D12Renderer, public TestRpsRenderToTexture +{ + struct alignas(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT) SceneConstantBufferD3D12 + : public TestRpsRenderToTexture::SceneConstantBuffer + { + }; + +protected: + virtual void OnInit(ID3D12GraphicsCommandList* pInitCmdList, + std::vector>& tempResources) override + { + LoadAssets(pInitCmdList, tempResources); + + TestRpsRenderToTexture::OnInit(); + + RpsResult result = + rpsProgramBindNode(rpsRenderGraphGetMainEntry(GetRpsRenderGraph()), "Quads", &DrawQuadsCb, this); + REQUIRE(result == RPS_OK); + } + + virtual void OnPostResize() override + { + } + + virtual void OnCleanUp() override + { + TestRpsRenderToTexture::OnCleanUp(); + + m_rootSignature = nullptr; + m_pipelineState = nullptr; + m_pipelineStateMSAA = nullptr; + m_vertexBuffer = nullptr; + m_constantBuffer = nullptr; + m_texture = nullptr; + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + TestRpsRenderToTexture::OnUpdate(frameIndex, m_width, m_height); + UpdatePipeline(frameIndex, CalcGuaranteedCompletedFrameIndexForRps()); + } + + virtual void OnRender(uint32_t frameIndex) override + { + memcpy(&m_constantBufferCpuVA[m_backBufferIndex], &m_triangleAnimationData, sizeof(m_triangleAnimationData)); + + REQUIRE(RPS_SUCCEEDED(ExecuteRenderGraph(frameIndex, GetRpsRenderGraph()))); + } + +protected: + virtual void CreateRpsDevice(RpsDevice& rpsDeviceOut) override final + { + rpsDeviceOut = rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); }); + } + + virtual void DrawTriangle(const RpsCmdCallbackContext* pContext, bool isMSAA) override final + { + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + BindDescriptorHeaps(pCmdList); + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + pCmdList->SetPipelineState(isMSAA ? m_pipelineStateMSAA.Get() : m_pipelineState.Get()); + + pCmdList->SetGraphicsRootDescriptorTable(0, m_triangleConstantBufferViews.GetGPU(m_backBufferIndex)); + pCmdList->SetGraphicsRootDescriptorTable(1, m_checkerboardTextureDescriptor.GetGPU(0)); + + pCmdList->IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->IASetVertexBuffers(0, 1, &m_triangleBufferView); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + +private: + void DrawQuads(const RpsCmdCallbackContext* pContext, + rps::UnusedArg backBuffer, + D3D12_CPU_DESCRIPTOR_HANDLE offscreenRTSrv) + { + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + + D3D12_GPU_DESCRIPTOR_HANDLE srvTable = + AllocDynamicDescriptorsAndWrite(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, &offscreenRTSrv, 1); + + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + pCmdList->SetPipelineState(m_pipelineState.Get()); + + BindDescriptorHeaps(pCmdList); + pCmdList->SetGraphicsRootDescriptorTable(0, m_quadsConstantBufferView.GetGPU(0)); + pCmdList->SetGraphicsRootDescriptorTable(1, srvTable); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->IASetVertexBuffers(0, 1, &m_quadsBufferView); + pCmdList->DrawInstanced(24, 1, 0, 0); + } + + static void DrawQuadsCb(const RpsCmdCallbackContext* pContext) + { + auto pThis = static_cast(pContext->pCmdCallbackContext); + + D3D12_CPU_DESCRIPTOR_HANDLE srvHdl; + REQUIRE(RPS_OK == rpsD3D12GetCmdArgDescriptor(pContext, 1, &srvHdl)); + + pThis->DrawQuads(pContext, {}, srvHdl); + } + +private: + void LoadAssets(ID3D12GraphicsCommandList* pInitCmdList, std::vector>& tempResources) + { + // Create a root signature consisting of a descriptor table with a single CBV. + { + CD3DX12_DESCRIPTOR_RANGE ranges[2] = {}; + CD3DX12_ROOT_PARAMETER rootParameters[2] = {}; + + ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0); + ranges[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0); + rootParameters[0].InitAsDescriptorTable(1, &ranges[0], D3D12_SHADER_VISIBILITY_VERTEX); + rootParameters[1].InitAsDescriptorTable(1, &ranges[1], D3D12_SHADER_VISIBILITY_PIXEL); + + D3D12_STATIC_SAMPLER_DESC sampler = {}; + sampler.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + sampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + sampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + sampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + sampler.MipLODBias = 0; + sampler.MaxAnisotropy = 0; + sampler.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + sampler.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + sampler.MinLOD = 0.0f; + sampler.MaxLOD = D3D12_FLOAT32_MAX; + sampler.ShaderRegister = 0; + sampler.RegisterSpace = 0; + sampler.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + + D3D12_ROOT_SIGNATURE_FLAGS rootSignatureFlags = + D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc; + rootSignatureDesc.Init_1_0(_countof(rootParameters), rootParameters, 1, &sampler, rootSignatureFlags); + + ComPtr signature; + ComPtr error; + ThrowIfFailed(D3DX12SerializeVersionedRootSignature( + &rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1_0, &signature, &error)); + ThrowIfFailed(m_device->CreateRootSignature( + 0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature))); + } + + // Create the pipeline state, which includes compiling and loading shaders. + { + D3D12_INPUT_ELEMENT_DESC inputElementDescs[] = { + {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, + {"COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, + {"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 28, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}}; + + std::vector vsCode, psCode; + DxcCompile(c_Shader, L"VSMain", L"vs_6_0", L"", nullptr, 0, vsCode); + DxcCompile(c_Shader, L"PSMain", L"ps_6_0", L"", nullptr, 0, psCode); + + // Describe and create the graphics pipeline state object (PSO). + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.InputLayout = {inputElementDescs, _countof(inputElementDescs)}; + psoDesc.pRootSignature = m_rootSignature.Get(); + psoDesc.VS = CD3DX12_SHADER_BYTECODE(vsCode.data(), vsCode.size()); + psoDesc.PS = CD3DX12_SHADER_BYTECODE(psCode.data(), psCode.size()); + psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); + psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); + psoDesc.DepthStencilState.DepthEnable = FALSE; + psoDesc.DepthStencilState.StencilEnable = FALSE; + psoDesc.SampleMask = UINT_MAX; + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + psoDesc.NumRenderTargets = 1; + psoDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; + psoDesc.SampleDesc.Count = 1; + + ThrowIfFailed(m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_pipelineState))); + + psoDesc.SampleDesc.Count = 4; + + ThrowIfFailed(m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_pipelineStateMSAA))); + } + + auto uploadHeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + + // Create constant buffers + { + auto uploadBufDesc = CD3DX12_RESOURCE_DESC::Buffer( + sizeof(SceneConstantBufferD3D12) * (m_backBufferCount + 1), D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE); + + ThrowIfFailed(m_device->CreateCommittedResource(&uploadHeapProps, + D3D12_HEAP_FLAG_NONE, + &uploadBufDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_constantBuffer))); + + auto emptyRange = CD3DX12_RANGE(0, 0); + ThrowIfFailed(m_constantBuffer->Map(0, &emptyRange, reinterpret_cast(&m_constantBufferCpuVA))); + + m_triangleConstantBufferViews = + AllocStaticDescriptors(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, m_backBufferCount); + for (uint32_t i = 0; i < m_backBufferCount; i++) + { + D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {}; + cbvDesc.BufferLocation = + m_constantBuffer->GetGPUVirtualAddress() + sizeof(SceneConstantBufferD3D12) * i; + cbvDesc.SizeInBytes = sizeof(SceneConstantBufferD3D12); + m_device->CreateConstantBufferView(&cbvDesc, m_triangleConstantBufferViews.GetCPU(i)); + } + + m_quadsConstantBufferView = AllocStaticDescriptors(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1); + D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {}; + cbvDesc.BufferLocation = + m_constantBuffer->GetGPUVirtualAddress() + sizeof(SceneConstantBufferD3D12) * m_backBufferCount; + cbvDesc.SizeInBytes = sizeof(SceneConstantBufferD3D12); + m_device->CreateConstantBufferView(&cbvDesc, m_quadsConstantBufferView.GetCPU(0)); + + memcpy(&m_constantBufferCpuVA[m_backBufferCount], &m_quadConstantData, sizeof(m_quadConstantData)); + } + + // Create vertex buffers + { + // Define the geometry for a triangle. + Vertex triangleVertices[] = { + // triangle + {{0.0f, 0.25f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.5f, 0.0f}}, + {{0.25f, -0.25f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{-0.25f, -0.25f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + + // quad 0 + {{-1.0f, 1.0, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 0.0f}}, + {{0.f, 0.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f}}, + {{-1.f, 0.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + {{-1.0f, 1.0, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 0.0f}}, + {{0.f, 1.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 0.0f}}, + {{0.f, 0.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f}}, + + // quad 1 + {{-1.0f, 0.0, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + {{0.f, -1.f, 0.0f}, {0.f, 0.0f, 1.0f, 1.0f}, {1.0f, 0.0f}}, + {{-1.f, -1.f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 0.0f}}, + {{-1.0f, 0.0, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + {{0.f, 0.f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {1.0f, 1.0f}}, + {{0.f, -1.f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {1.0f, 0.0f}}, + + // quad 2 + {{0.0f, 1.0, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 0.0f}}, + {{1.f, 0.f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.0f, 1.0f}}, + {{0.f, 0.f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{0.0f, 1.0, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 0.0f}}, + {{1.f, 1.f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.0f, 0.0f}}, + {{1.f, 0.f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.0f, 1.0f}}, + + // quad 3 + {{0.0f, 0.0, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{1.f, -1.f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 0.0f}}, + {{0.f, -1.f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 0.0f}}, + {{0.0f, 0.0, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{1.f, 0.f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 1.0f}}, + {{1.f, -1.f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 0.0f}}, + }; + + const UINT vertexBufferSize = sizeof(triangleVertices); + + auto vbDesc = CD3DX12_RESOURCE_DESC::Buffer(vertexBufferSize); + + ThrowIfFailed(m_device->CreateCommittedResource(&uploadHeapProps, + D3D12_HEAP_FLAG_NONE, + &vbDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_vertexBuffer))); + + // Copy the triangle data to the vertex buffer. + UINT8* pVertexDataBegin; + CD3DX12_RANGE readRange(0, 0); // We do not intend to read from this resource on the CPU. + ThrowIfFailed(m_vertexBuffer->Map(0, &readRange, reinterpret_cast(&pVertexDataBegin))); + memcpy(pVertexDataBegin, triangleVertices, sizeof(triangleVertices)); + m_vertexBuffer->Unmap(0, nullptr); + + // Initialize the vertex buffer view. + m_triangleBufferView.BufferLocation = m_vertexBuffer->GetGPUVirtualAddress(); + m_triangleBufferView.StrideInBytes = sizeof(Vertex); + m_triangleBufferView.SizeInBytes = sizeof(Vertex) * 3; + + m_quadsBufferView.BufferLocation = m_triangleBufferView.BufferLocation + m_triangleBufferView.SizeInBytes; + m_quadsBufferView.StrideInBytes = sizeof(Vertex); + m_quadsBufferView.SizeInBytes = sizeof(Vertex) * 24; + } + + // Create checkerboard texture + ComPtr textureUploadHeap; + + // Create the texture. + { + static const UINT TextureWidth = 256; + static const UINT TextureHeight = 256; + static const UINT TexturePixelSize = 4; + + // Describe and create a Texture2D. + D3D12_RESOURCE_DESC textureDesc = {}; + textureDesc.MipLevels = 1; + textureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + textureDesc.Width = TextureWidth; + textureDesc.Height = TextureHeight; + textureDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + textureDesc.DepthOrArraySize = 1; + textureDesc.SampleDesc.Count = 1; + textureDesc.SampleDesc.Quality = 0; + textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + + auto defaultHeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + ThrowIfFailed(m_device->CreateCommittedResource(&defaultHeapProps, + D3D12_HEAP_FLAG_NONE, + &textureDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&m_texture))); + + const UINT64 uploadBufferSize = GetRequiredIntermediateSize(m_texture.Get(), 0, 1); + + // Create the GPU upload buffer. + auto uploadBufDesc = CD3DX12_RESOURCE_DESC::Buffer(uploadBufferSize); + ThrowIfFailed(m_device->CreateCommittedResource(&uploadHeapProps, + D3D12_HEAP_FLAG_NONE, + &uploadBufDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&textureUploadHeap))); + textureUploadHeap->SetName(L"textureUploadHeap"); + + // Copy data to the intermediate upload heap and then schedule a copy + // from the upload heap to the Texture2D. + const UINT rowPitch = TextureWidth * TexturePixelSize; + const UINT cellPitch = rowPitch >> 3; // The width of a cell in the checkboard texture. + const UINT cellHeight = TextureWidth >> 3; // The height of a cell in the checkerboard texture. + const UINT textureSize = rowPitch * TextureHeight; + + std::vector data(textureSize); + UINT8* pData = &data[0]; + + for (UINT n = 0; n < textureSize; n += TexturePixelSize) + { + UINT x = n % rowPitch; + UINT y = n / rowPitch; + UINT i = x / cellPitch; + UINT j = y / cellHeight; + + if (i % 2 == j % 2) + { + pData[n] = 0xa0; // R + pData[n + 1] = 0xa0; // G + pData[n + 2] = 0xa0; // B + pData[n + 3] = 0xff; // A + } + else + { + pData[n] = 0xff; // R + pData[n + 1] = 0xff; // G + pData[n + 2] = 0xff; // B + pData[n + 3] = 0xff; // A + } + } + + D3D12_SUBRESOURCE_DATA textureData = {}; + textureData.pData = &data[0]; + textureData.RowPitch = TextureWidth * TexturePixelSize; + textureData.SlicePitch = textureData.RowPitch * TextureHeight; + + auto uploadBarrier = CD3DX12_RESOURCE_BARRIER::Transition( + m_texture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + + UpdateSubresources(pInitCmdList, m_texture.Get(), textureUploadHeap.Get(), 0, 0, 1, &textureData); + pInitCmdList->ResourceBarrier(1, &uploadBarrier); + + // Keep it around until upload cmdlist finishes executing. + tempResources.push_back(textureUploadHeap); + + m_checkerboardTextureDescriptor = AllocStaticCBV_SRV_UAVs(1); + m_device->CreateShaderResourceView(m_texture.Get(), NULL, m_checkerboardTextureDescriptor.GetCPU(0)); + } + } + + void UpdatePipeline(uint64_t frameIndex, uint64_t completedFrameIndex) + { + RpsRuntimeResource backBuffers[DXGI_MAX_SWAP_CHAIN_BUFFERS]; + for (uint32_t i = 0; i < m_backBufferCount; i++) + { + backBuffers[i] = rpsD3D12ResourceToHandle(m_backBuffers[i].Get()); + } + + RpsResourceDesc backBufferDesc = {}; + backBufferDesc.type = RPS_RESOURCE_TYPE_IMAGE_2D; + backBufferDesc.temporalLayers = uint32_t(m_backBuffers.size()); + backBufferDesc.image.arrayLayers = 1; + backBufferDesc.image.mipLevels = 1; + backBufferDesc.image.format = rpsFormatFromDXGI(m_swapChain.GetFormat()); + backBufferDesc.image.width = m_width; + backBufferDesc.image.height = m_height; + backBufferDesc.image.sampleCount = 1; + + TestRpsRenderToTexture::UpdateRpsPipeline(frameIndex, completedFrameIndex, backBufferDesc, backBuffers); + } + +private: + ComPtr m_rootSignature; + ComPtr m_pipelineState; + ComPtr m_pipelineStateMSAA; + + ComPtr m_vertexBuffer; + D3D12_VERTEX_BUFFER_VIEW m_triangleBufferView; + D3D12_VERTEX_BUFFER_VIEW m_quadsBufferView; + ComPtr m_constantBuffer; + SceneConstantBufferD3D12* m_constantBufferCpuVA; + DescriptorTable m_triangleConstantBufferViews; + DescriptorTable m_quadsConstantBufferView; + ComPtr m_texture; + DescriptorTable m_checkerboardTextureDescriptor; +}; + +TEST_CASE(TEST_APP_NAME) +{ +#if _DEBUG + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#endif + + TestD3D12RpsRenderToTexture renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_render_to_texture_shared.h b/tests/gui/test_render_to_texture_shared.h new file mode 100644 index 0000000..eb9443a --- /dev/null +++ b/tests/gui/test_render_to_texture_shared.h @@ -0,0 +1,216 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define CATCH_CONFIG_MAIN +#include +#include +#include + +#include "rps/rps.h" + +#include "utils/rps_test_common.h" + +RPS_DECLARE_RPSL_ENTRY(test_render_to_texture, render_to_texture); + +static const char c_Shader[] = R"( +#ifdef __hlsl_dx_compiler +[[vk::binding(0, 0)]] +#endif +cbuffer SceneConstantBuffer : register(b0) +{ + float4x4 offset; + float uvDistort; + float aspectRatio; +}; + +struct PSInput +{ + float4 position : SV_POSITION; + float4 color : COLOR; + float4 uv : TEXCOORD; +}; + +#ifdef __hlsl_dx_compiler +[[vk::binding(1, 0)]] +#endif +Texture2D g_texture : register(t0); +#ifdef __hlsl_dx_compiler +[[vk::binding(2, 0)]] +#endif +SamplerState g_sampler : register(s0); + +PSInput VSMain(float4 position : POSITION, float4 color : COLOR, float4 uv : TEXCOORD) +{ + PSInput result; + + position.y *= aspectRatio; + result.position = mul(offset, position); + result.color = color; + result.uv = uv; + result.uv.z = uvDistort; + + return result; +} + +float4 PSMain(PSInput input) : SV_TARGET +{ + input.uv.y += sin(input.uv.x * 10.f) * input.uv.z; + return g_texture.Sample(g_sampler, input.uv.xy) * input.color; +} +)"; + +#define TEST_APP_NAME_RAW "TestRenderToTexture" + +using namespace DirectX; + +class TestRpsRenderToTexture +{ +public: + struct SceneConstantBuffer + { + XMFLOAT4X4 offset; + float uvDistort; + float aspectRatio; + }; + + struct Vertex + { + XMFLOAT3 position; + XMFLOAT4 color; + XMFLOAT2 uv; + }; + +public: + + TestRpsRenderToTexture() + : m_translation(0.f) + , m_rotation(0.f) + { + XMStoreFloat4x4(&m_quadConstantData.offset, XMMatrixIdentity()); + m_quadConstantData.uvDistort = 0.1f; + m_quadConstantData.aspectRatio = 1.0f; + } + +protected: + RpsRenderGraph GetRpsRenderGraph() const + { + return m_rpsRenderGraph; + } + + void OnInit() + { + CreateRpsDevice(m_rpsDevice); + LoadRpsPipeline(); + } + + virtual void UpdateRpsPipeline(uint64_t frameIndex, + uint64_t completedFrameIndex, + const RpsResourceDesc& backBufferDesc, + RpsRuntimeResource* pBackBuffers) + { + if (m_rpsRenderGraph != RPS_NULL_HANDLE) + { + const RpsRuntimeResource* argResources[] = {pBackBuffers}; + RpsConstant argData[] = {&backBufferDesc, &m_useMSAA}; + + RpsRenderGraphUpdateInfo updateInfo = {}; + updateInfo.frameIndex = frameIndex; + updateInfo.gpuCompletedFrameIndex = completedFrameIndex; + updateInfo.numArgs = uint32_t(RPS_TEST_COUNTOF(argData)); + updateInfo.ppArgs = argData; + updateInfo.ppArgResources = argResources; + + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_RUNTIME_DEBUG_NAMES; + if (completedFrameIndex == RPS_GPU_COMPLETED_FRAME_INDEX_NONE) + { + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_ALL; + } + + REQUIRE_RPS_OK(rpsRenderGraphUpdate(m_rpsRenderGraph, &updateInfo)); + } + } + + void OnCleanUp() + { + rpsRenderGraphDestroy(m_rpsRenderGraph); + rpsTestUtilDestroyDevice(m_rpsDevice); + } + + void OnUpdate(uint32_t frameIndex, uint32_t width, uint32_t height) + { + const float translationSpeed = 0.01f; + const float offsetBounds = 1.4f; + + m_translation += translationSpeed; + if (m_translation > offsetBounds) + { + m_translation = -offsetBounds; + } + + XMMATRIX transform = XMMatrixAffineTransformation2D(XMVectorSplatOne(), XMVectorZero(), m_rotation, XMVectorSet(m_translation, 0.f, 0.f, 0.f)); + + XMStoreFloat4x4(&m_triangleAnimationData.offset, transform); + m_triangleAnimationData.uvDistort = 0.1f; + m_triangleAnimationData.aspectRatio = static_cast(width) / height; + } + + void OnRender(uint64_t frameIndex, RpsRuntimeCommandBuffer cmdBuf, uint32_t cmdBegin, uint32_t cmdCount) + { + RpsRenderGraphRecordCommandInfo recordInfo = {}; + + recordInfo.hCmdBuffer = cmdBuf; + recordInfo.pUserContext = this; + recordInfo.frameIndex = frameIndex; + recordInfo.cmdBeginIndex = cmdBegin; + recordInfo.numCmds = cmdCount; + + RpsResult result = rpsRenderGraphRecordCommands(m_rpsRenderGraph, &recordInfo); + REQUIRE(result == RPS_OK); + } + +protected: + virtual void CreateRpsDevice(RpsDevice& rpsDeviceOut) = 0; + virtual void DrawTriangle(const RpsCmdCallbackContext* pContext, bool bMSAA) = 0; + +private: + static void DrawTriangleCb(const RpsCmdCallbackContext* pContext) + { + auto pThis = static_cast(pContext->pCmdCallbackContext); + const bool bMSAA = *rpsCmdGetArg(pContext); + pThis->DrawTriangle(pContext, bMSAA); + } + + void LoadRpsPipeline() + { + RpsRenderGraphCreateInfo renderGraphCreateInfo = {}; + renderGraphCreateInfo.mainEntryCreateInfo.hRpslEntryPoint = + rpsTestLoadRpslEntry(test_render_to_texture, render_to_texture); + + RpsResult result = rpsRenderGraphCreate(m_rpsDevice, &renderGraphCreateInfo, &m_rpsRenderGraph); + REQUIRE(result == RPS_OK); + + auto hRpslEntry = rpsRenderGraphGetMainEntry(m_rpsRenderGraph); + + result = rpsProgramBindNode(hRpslEntry, "Geometry", &DrawTriangleCb, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(hRpslEntry, "GeometryMSAA", &DrawTriangleCb, this); + REQUIRE(result == RPS_OK); + } + +private: + RpsDevice m_rpsDevice = RPS_NULL_HANDLE; + RpsRenderGraph m_rpsRenderGraph = {}; + + float m_translation = 0.0f; + float m_rotation = 0.0f; + bool m_useMSAA = true; + +protected: + SceneConstantBuffer m_triangleAnimationData; + SceneConstantBuffer m_quadConstantData; +}; diff --git a/tests/gui/test_render_to_texture_vk.cpp b/tests/gui/test_render_to_texture_vk.cpp new file mode 100644 index 0000000..9bb8db3 --- /dev/null +++ b/tests/gui/test_render_to_texture_vk.cpp @@ -0,0 +1,658 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifdef _WIN32 +#define VK_USE_PLATFORM_WIN32_KHR +#else +#error "TODO" +#endif + +#define RPS_VK_RUNTIME 1 + +#include "test_render_to_texture_shared.h" + +#include "utils/rps_test_common.h" +#include "utils/rps_test_win32.h" +#include "utils/rps_test_vk_renderer.h" + +#include + +class TestVkRenderToTexture : public RpsTestVulkanRenderer, public TestRpsRenderToTexture +{ +protected: + virtual void OnInit(VkCommandBuffer initCmdBuf, InitTempResources& tempResources) override + { + LoadAssets(initCmdBuf, tempResources); + + TestRpsRenderToTexture::OnInit(); + + RpsResult result = + rpsProgramBindNode(rpsRenderGraphGetMainEntry(GetRpsRenderGraph()), "Quads", &DrawQuadsCb, this); + REQUIRE(result == RPS_OK); + } + + virtual void OnPostResize() override + { + } + + virtual void OnCleanUp() override + { + TestRpsRenderToTexture::OnCleanUp(); + + vkDestroyPipeline(m_device, m_geoPipeline, nullptr); + vkDestroyPipeline(m_device, m_geoPipelineMSAA, nullptr); + vkDestroyPipeline(m_device, m_quadPipeline, nullptr); + vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr); + vkDestroySampler(m_device, m_defaultSampler, nullptr); + vkDestroyDescriptorSetLayout(m_device, m_sharedDescriptorSetLayout, nullptr); + vkDestroyImage(m_device, m_checkerboardTexture, nullptr); + vkDestroyImageView(m_device, m_checkerboardTextureView, nullptr); + vkDestroyBuffer(m_device, m_vertexBuffer, nullptr); + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + TestRpsRenderToTexture::OnUpdate(frameIndex, m_width, m_height); + UpdatePipeline(frameIndex, CalcGuaranteedCompletedFrameIndexForRps()); + } + + virtual void OnRender(uint32_t frameIndex) override + { + REQUIRE(RPS_SUCCEEDED(ExecuteRenderGraph(frameIndex, GetRpsRenderGraph()))); + } + +protected: + virtual void CreateRpsDevice(RpsDevice& rpsDeviceOut) override final + { + rpsDeviceOut = rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); }); + } + + virtual void DrawTriangle(const RpsCmdCallbackContext* pContext, bool isMSAA) override final + { + if (isMSAA && (m_geoPipelineMSAA == RPS_NULL_HANDLE)) + { + VkRenderPass rp; + RpsResult result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePipeline(c_Shader, rp, &m_geoPipelineMSAA, true); + } + else if (!isMSAA && (m_geoPipeline == RPS_NULL_HANDLE)) + { + VkRenderPass rp; + RpsResult result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePipeline(c_Shader, rp, &m_geoPipeline, false); + } + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_sharedDescriptorSetLayout, 1, &ds)); + + VkWriteDescriptorSet writeDescriptorSet[2] = {}; + + VkDescriptorBufferInfo bufInfo = + AllocAndWriteFrameConstants(&m_triangleAnimationData, sizeof(m_triangleAnimationData)); + AppendWriteDescriptorSetBuffers(&writeDescriptorSet[0], ds, 0, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &bufInfo); + + VkDescriptorImageInfo imageInfo = { + VK_NULL_HANDLE, m_checkerboardTextureView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + AppendWriteDescriptorSetImages(&writeDescriptorSet[1], ds, 1, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &imageInfo); + + vkUpdateDescriptorSets(m_device, _countof(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + + vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &ds, 0, nullptr); + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, isMSAA ? m_geoPipelineMSAA : m_geoPipeline); + vkCmdBindVertexBuffers(cmdBuf, 0, 1, &m_vertexBuffer, &m_triangleVbOffset); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + + void DrawQuads(const RpsCmdCallbackContext* pContext, rps::UnusedArg backBuffer, VkImageView offscreenRTSrv) + { + if (m_quadPipeline == RPS_NULL_HANDLE) + { + VkRenderPass rp; + RpsResult result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePipeline(c_Shader, rp, &m_quadPipeline, false); + } + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_sharedDescriptorSetLayout, 1, &ds)); + + VkWriteDescriptorSet writeDescriptorSet[2] = {}; + + VkDescriptorBufferInfo bufInfo = AllocAndWriteFrameConstants(&m_quadConstantData, sizeof(m_quadConstantData)); + AppendWriteDescriptorSetBuffers(&writeDescriptorSet[0], ds, 0, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &bufInfo); + + VkDescriptorImageInfo imageInfo = {VK_NULL_HANDLE, offscreenRTSrv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + AppendWriteDescriptorSetImages(&writeDescriptorSet[1], ds, 1, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &imageInfo); + + vkUpdateDescriptorSets(m_device, _countof(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + + vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &ds, 0, nullptr); + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_quadPipeline); + vkCmdBindVertexBuffers(cmdBuf, 0, 1, &m_vertexBuffer, &m_quadVbOffset); + vkCmdDraw(cmdBuf, 24, 1, 0, 0); + } + + static void DrawQuadsCb(const RpsCmdCallbackContext* pContext) + { + auto pThis = static_cast(pContext->pCmdCallbackContext); + + VkImageView hSrv; + REQUIRE(RPS_OK == rpsVKGetCmdArgImageView(pContext, 1, &hSrv)); + + pThis->DrawQuads(pContext, {}, hSrv); + } + +private: + void LoadAssets(VkCommandBuffer initCmdBuf, InitTempResources& tempResources) + { + OnPostResize(); + + VkSamplerCreateInfo sampCI = {}; + sampCI.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampCI.magFilter = VK_FILTER_LINEAR; + sampCI.minFilter = VK_FILTER_LINEAR; + sampCI.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + sampCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampCI.mipLodBias = 0.0f; + sampCI.compareOp = VK_COMPARE_OP_NEVER; + sampCI.minLod = 0.0f; + sampCI.maxLod = FLT_MAX; + sampCI.maxAnisotropy = 1.0; + sampCI.anisotropyEnable = VK_FALSE; + sampCI.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + ThrowIfFailedVK(vkCreateSampler(m_device, &sampCI, nullptr, &m_defaultSampler)); + + VkDescriptorSetLayoutBinding sharedLayoutBindings[4] = {}; + sharedLayoutBindings[0].binding = 0; + sharedLayoutBindings[0].descriptorCount = 1; + sharedLayoutBindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + sharedLayoutBindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[1].binding = 1; + sharedLayoutBindings[1].descriptorCount = 1; + sharedLayoutBindings[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + sharedLayoutBindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[2].binding = 2; + sharedLayoutBindings[2].descriptorCount = 1; + sharedLayoutBindings[2].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + sharedLayoutBindings[2].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[2].pImmutableSamplers = &m_defaultSampler; + sharedLayoutBindings[3].binding = 3; + sharedLayoutBindings[3].descriptorCount = 1; + sharedLayoutBindings[3].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + sharedLayoutBindings[3].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + + VkDescriptorSetLayoutCreateInfo setLayoutCI = {}; + setLayoutCI.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + setLayoutCI.pBindings = sharedLayoutBindings; + setLayoutCI.bindingCount = _countof(sharedLayoutBindings); + + ThrowIfFailedVK(vkCreateDescriptorSetLayout(m_device, &setLayoutCI, nullptr, &m_sharedDescriptorSetLayout)); + + VkPipelineLayoutCreateInfo plCI = {}; + plCI.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + plCI.setLayoutCount = 1; + plCI.pSetLayouts = &m_sharedDescriptorSetLayout; + + ThrowIfFailedVK(vkCreatePipelineLayout(m_device, &plCI, nullptr, &m_pipelineLayout)); + + // Create vertex buffers + { + Vertex triangleVertices[] = { + // triangle + {{0.0f, 0.25f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.5f, 0.0f}}, + {{0.25f, -0.25f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{-0.25f, -0.25f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + + // quad 0 + {{-1.0f, 1.0, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 0.0f}}, + {{0.f, 0.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f}}, + {{-1.f, 0.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + {{-1.0f, 1.0, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 0.0f}}, + {{0.f, 1.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 0.0f}}, + {{0.f, 0.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f}}, + + // quad 1 + {{-1.0f, 0.0, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + {{0.f, -1.f, 0.0f}, {0.f, 0.0f, 1.0f, 1.0f}, {1.0f, 0.0f}}, + {{-1.f, -1.f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 0.0f}}, + {{-1.0f, 0.0, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + {{0.f, 0.f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {1.0f, 1.0f}}, + {{0.f, -1.f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {1.0f, 0.0f}}, + + // quad 2 + {{0.0f, 1.0, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 0.0f}}, + {{1.f, 0.f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.0f, 1.0f}}, + {{0.f, 0.f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{0.0f, 1.0, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 0.0f}}, + {{1.f, 1.f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.0f, 0.0f}}, + {{1.f, 0.f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.0f, 1.0f}}, + + // quad 3 + {{0.0f, 0.0, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{1.f, -1.f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 0.0f}}, + {{0.f, -1.f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 0.0f}}, + {{0.0f, 0.0, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{1.f, 0.f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 1.0f}}, + {{1.f, -1.f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 0.0f}}, + }; + + const UINT vertexBufferSize = sizeof(triangleVertices); + + m_vertexBuffer = CreateAndBindStaticBuffer( + vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); + m_triangleVbOffset = 0; + m_quadVbOffset = sizeof(Vertex) * 3; + + auto vbAlloc = AllocAndWriteFrameConstants(triangleVertices, vertexBufferSize); + VkBufferCopy vbCopy; + vbCopy.srcOffset = vbAlloc.offset; + vbCopy.dstOffset = 0; + vbCopy.size = vertexBufferSize; + vkCmdCopyBuffer(initCmdBuf, vbAlloc.buffer, m_vertexBuffer, 1, &vbCopy); + } + + CreateCheckerboardTexture(initCmdBuf, tempResources); + } + + void CreateCheckerboardTexture(VkCommandBuffer initCmdBuf, InitTempResources& tempResources) + { + // Texture data contains 4 channels (RGBA) with unnormalized 8-bit values, this is the most commonly supported format + VkFormat format = VK_FORMAT_R8G8B8A8_UNORM; + + uint32_t width = 256; + uint32_t height = 256; + uint32_t texturePixelSize = 4; + + const uint32_t rowPitch = width * texturePixelSize; + const uint32_t cellPitch = rowPitch >> 3; // The width of a cell in the checkboard texture. + const uint32_t cellHeight = width >> 3; // The height of a cell in the checkerboard texture. + const uint32_t textureSize = rowPitch * height; + + std::vector data(textureSize); + uint8_t* textureData = &data[0]; + + for (uint32_t n = 0; n < textureSize; n += texturePixelSize) + { + uint32_t x = n % rowPitch; + uint32_t y = n / rowPitch; + uint32_t i = x / cellPitch; + uint32_t j = y / cellHeight; + + if (i % 2 == j % 2) + { + textureData[n] = 0xa0; // R + textureData[n + 1] = 0xa0; // G + textureData[n + 2] = 0xa0; // B + textureData[n + 3] = 0xff; // A + } + else + { + textureData[n] = 0xff; // R + textureData[n + 1] = 0xff; // G + textureData[n + 2] = 0xff; // B + textureData[n + 3] = 0xff; // A + } + } + + { + auto textureDataUploadBuf = AllocAndWriteFrameConstants(textureData, textureSize); + + m_checkerboardTexture = + CreateAndBindStaticImage(VK_IMAGE_TYPE_2D, + VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, + format, + width, + height, + 1, + 1, + 1); + + VkBufferImageCopy bufferCopyRegion = {}; + bufferCopyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + bufferCopyRegion.imageSubresource.mipLevel = 0; + bufferCopyRegion.imageSubresource.baseArrayLayer = 0; + bufferCopyRegion.imageSubresource.layerCount = 1; + bufferCopyRegion.imageExtent.width = width; + bufferCopyRegion.imageExtent.height = height; + bufferCopyRegion.imageExtent.depth = 1; + bufferCopyRegion.bufferOffset = textureDataUploadBuf.offset; + + // Transition the texture image layout to transfer target, so we can safely copy our buffer data to it. + VkImageMemoryBarrier imageMemoryBarrier = {}; + imageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageMemoryBarrier.image = m_checkerboardTexture; + imageMemoryBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imageMemoryBarrier.subresourceRange.baseMipLevel = 0; + imageMemoryBarrier.subresourceRange.levelCount = 1; + imageMemoryBarrier.subresourceRange.layerCount = 1; + imageMemoryBarrier.srcAccessMask = 0; + imageMemoryBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + // Insert a memory dependency at the proper pipeline stages that will execute the image layout transition + // Source pipeline stage is host write/read exection (VK_PIPELINE_STAGE_HOST_BIT) + // Destination pipeline stage is copy command exection (VK_PIPELINE_STAGE_TRANSFER_BIT) + vkCmdPipelineBarrier(initCmdBuf, + VK_PIPELINE_STAGE_HOST_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, + 0, + nullptr, + 0, + nullptr, + 1, + &imageMemoryBarrier); + + // Copy mip levels from staging buffer + vkCmdCopyBufferToImage(initCmdBuf, + textureDataUploadBuf.buffer, + m_checkerboardTexture, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, + &bufferCopyRegion); + + // Once the data has been uploaded we transfer to the texture image to the shader read layout, so it can be sampled from + imageMemoryBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + vkCmdPipelineBarrier(initCmdBuf, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + 0, + 0, + nullptr, + 0, + nullptr, + 1, + &imageMemoryBarrier); + } + + // Create image view + // Textures are not directly accessed by the shaders and + // are abstracted by image views containing additional + // information and sub resource ranges + VkImageViewCreateInfo view = {}; + view.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view.viewType = VK_IMAGE_VIEW_TYPE_2D; + view.format = format; + view.components = { + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A}; + view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + view.subresourceRange.baseMipLevel = 0; + view.subresourceRange.baseArrayLayer = 0; + view.subresourceRange.layerCount = 1; + view.subresourceRange.levelCount = 1; + view.image = m_checkerboardTexture; + ThrowIfFailedVK(vkCreateImageView(m_device, &view, nullptr, &m_checkerboardTextureView)); + } + + void CreatePipeline(const char* pShaderCode, VkRenderPass renderPass, VkPipeline* pPipeline, bool bMSAA) + { + VkVertexInputBindingDescription vertBinding = {}; + vertBinding.binding = 0; + vertBinding.stride = sizeof(Vertex); + vertBinding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + + VkVertexInputAttributeDescription vertAttrBinding[3] = {}; + vertAttrBinding[0].binding = 0; + vertAttrBinding[0].location = 0; + vertAttrBinding[0].format = VK_FORMAT_R32G32B32_SFLOAT; + vertAttrBinding[0].offset = offsetof(Vertex, position); + vertAttrBinding[1].binding = 0; + vertAttrBinding[1].location = 1; + vertAttrBinding[1].format = VK_FORMAT_R32G32B32_SFLOAT; + vertAttrBinding[1].offset = offsetof(Vertex, color); + vertAttrBinding[2].binding = 0; + vertAttrBinding[2].location = 2; + vertAttrBinding[2].format = VK_FORMAT_R32G32_SFLOAT; + vertAttrBinding[2].offset = offsetof(Vertex, uv); + + VkPipelineVertexInputStateCreateInfo vi = {}; + vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vi.pNext = NULL; + vi.flags = 0; + vi.vertexBindingDescriptionCount = 1; + vi.pVertexBindingDescriptions = &vertBinding; + vi.vertexAttributeDescriptionCount = _countof(vertAttrBinding); + vi.pVertexAttributeDescriptions = vertAttrBinding; + + // input assembly state + // + VkPipelineInputAssemblyStateCreateInfo ia; + ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + ia.pNext = NULL; + ia.flags = 0; + ia.primitiveRestartEnable = VK_FALSE; + ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + // rasterizer state + VkPipelineRasterizationStateCreateInfo rs; + rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rs.pNext = NULL; + rs.flags = 0; + rs.polygonMode = VK_POLYGON_MODE_FILL; + rs.cullMode = VK_CULL_MODE_NONE; + rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rs.depthClampEnable = VK_FALSE; + rs.rasterizerDiscardEnable = VK_FALSE; + rs.depthBiasEnable = VK_FALSE; + rs.depthBiasConstantFactor = 0; + rs.depthBiasClamp = 0; + rs.depthBiasSlopeFactor = 0; + rs.lineWidth = 1.0f; + + VkPipelineColorBlendAttachmentState bs[1] = {}; + bs[0].blendEnable = VK_FALSE; + bs[0].srcColorBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].colorBlendOp = VK_BLEND_OP_ADD; + bs[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].alphaBlendOp = VK_BLEND_OP_ADD; + bs[0].colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + // Color blend state + VkPipelineColorBlendStateCreateInfo cb; + cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + cb.flags = 0; + cb.pNext = NULL; + cb.attachmentCount = 1; + cb.pAttachments = bs; + cb.logicOpEnable = VK_FALSE; + cb.logicOp = VK_LOGIC_OP_NO_OP; + cb.blendConstants[0] = 1.0f; + cb.blendConstants[1] = 1.0f; + cb.blendConstants[2] = 1.0f; + cb.blendConstants[3] = 1.0f; + + std::vector dynamicStateEnables = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo dynamicState = {}; + dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamicState.pNext = NULL; + dynamicState.pDynamicStates = dynamicStateEnables.data(); + dynamicState.dynamicStateCount = (uint32_t)dynamicStateEnables.size(); + + // view port state + + VkPipelineViewportStateCreateInfo vp = {}; + vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + vp.pNext = NULL; + vp.flags = 0; + vp.viewportCount = 1; + vp.scissorCount = 1; + vp.pScissors = NULL; + vp.pViewports = NULL; + + // depth stencil state + + VkPipelineDepthStencilStateCreateInfo ds; + ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + ds.pNext = NULL; + ds.flags = 0; + ds.depthTestEnable = VK_FALSE; + ds.depthWriteEnable = VK_FALSE; + ds.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + ds.depthBoundsTestEnable = VK_FALSE; + ds.stencilTestEnable = VK_FALSE; + ds.back.failOp = VK_STENCIL_OP_KEEP; + ds.back.passOp = VK_STENCIL_OP_KEEP; + ds.back.compareOp = VK_COMPARE_OP_ALWAYS; + ds.back.compareMask = 0; + ds.back.reference = 0; + ds.back.depthFailOp = VK_STENCIL_OP_KEEP; + ds.back.writeMask = 0; + ds.minDepthBounds = 0; + ds.maxDepthBounds = 0; + ds.stencilTestEnable = VK_FALSE; + ds.front = ds.back; + + // multi sample state + + VkPipelineMultisampleStateCreateInfo ms; + ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + ms.pNext = NULL; + ms.flags = 0; + ms.pSampleMask = NULL; + ms.rasterizationSamples = bMSAA ? VK_SAMPLE_COUNT_4_BIT : VK_SAMPLE_COUNT_1_BIT; + ms.sampleShadingEnable = VK_FALSE; + ms.alphaToCoverageEnable = VK_FALSE; + ms.alphaToOneEnable = VK_FALSE; + ms.minSampleShading = 0.0; + + VkShaderModule vsModule, psModule; + std::vector vsCode, psCode; + + DxcCompileToSpirv(pShaderCode, L"VSMain", L"vs_6_0", L"", nullptr, 0, vsCode); + DxcCompileToSpirv(pShaderCode, L"PSMain", L"ps_6_0", L"", nullptr, 0, psCode); + + VkShaderModuleCreateInfo smCI = {}; + smCI.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + + smCI.pCode = reinterpret_cast(vsCode.data()); + smCI.codeSize = vsCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &vsModule)); + + smCI.pCode = reinterpret_cast(psCode.data()); + smCI.codeSize = psCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &psModule)); + + VkPipelineShaderStageCreateInfo shaderStages[2] = {}; + shaderStages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[0].module = vsModule; + shaderStages[0].pName = "VSMain"; + shaderStages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + shaderStages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[1].module = psModule; + shaderStages[1].pName = "PSMain"; + shaderStages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + + VkGraphicsPipelineCreateInfo psoCI = {}; + psoCI.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + psoCI.pNext = NULL; + psoCI.layout = m_pipelineLayout; + psoCI.basePipelineHandle = VK_NULL_HANDLE; + psoCI.basePipelineIndex = 0; + psoCI.flags = 0; + psoCI.pVertexInputState = &vi; + psoCI.pInputAssemblyState = &ia; + psoCI.pRasterizationState = &rs; + psoCI.pColorBlendState = &cb; + psoCI.pTessellationState = NULL; + psoCI.pMultisampleState = &ms; + psoCI.pDynamicState = &dynamicState; + psoCI.pViewportState = &vp; + psoCI.pDepthStencilState = &ds; + psoCI.pStages = shaderStages; + psoCI.stageCount = _countof(shaderStages); + psoCI.renderPass = renderPass; + psoCI.subpass = 0; + + ThrowIfFailedVK(vkCreateGraphicsPipelines(m_device, VK_NULL_HANDLE, 1, &psoCI, nullptr, pPipeline)); + + vkDestroyShaderModule(m_device, vsModule, nullptr); + vkDestroyShaderModule(m_device, psModule, nullptr); + } + + void UpdatePipeline(uint64_t frameIndex, uint64_t completedFrameIndex) + { + RpsRuntimeResource backBuffers[16]; + + if (m_swapChainImages.size() > RPS_TEST_COUNTOF(backBuffers)) + throw; + + for (uint32_t i = 0; i < m_swapChainImages.size(); i++) + { + backBuffers[i] = rpsVKImageToHandle(m_swapChainImages[i].image); + } + + RpsResourceDesc backBufferDesc = {}; + backBufferDesc.type = RPS_RESOURCE_TYPE_IMAGE_2D; + backBufferDesc.temporalLayers = uint32_t(m_swapChainImages.size()); + backBufferDesc.image.arrayLayers = 1; + backBufferDesc.image.mipLevels = 1; + backBufferDesc.image.format = rpsFormatFromVK(m_swapChainFormat.format); + backBufferDesc.image.width = m_width; + backBufferDesc.image.height = m_height; + backBufferDesc.image.sampleCount = 1; + + TestRpsRenderToTexture::UpdateRpsPipeline(frameIndex, completedFrameIndex, backBufferDesc, backBuffers); + } + +private: + VkPipeline m_geoPipeline = VK_NULL_HANDLE; + VkPipeline m_geoPipelineMSAA = VK_NULL_HANDLE; + VkPipeline m_quadPipeline = VK_NULL_HANDLE; + + VkPipelineLayout m_pipelineLayout = VK_NULL_HANDLE; + + VkSampler m_defaultSampler = VK_NULL_HANDLE; + + VkDescriptorSetLayout m_sharedDescriptorSetLayout = VK_NULL_HANDLE; + + VkImage m_checkerboardTexture = RPS_NULL_HANDLE; + VkImageView m_checkerboardTextureView = RPS_NULL_HANDLE; + VkBuffer m_vertexBuffer = RPS_NULL_HANDLE; + + VkDeviceSize m_triangleVbOffset = 0; + VkDeviceSize m_quadVbOffset = 0; +}; + +TEST_CASE(TEST_APP_NAME) +{ + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#if defined(BREAK_AT_ALLOC_ID) + _CrtSetBreakAlloc(BREAK_AT_ALLOC_ID); +#endif + + TestVkRenderToTexture renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_subprogram.rpsl b/tests/gui/test_subprogram.rpsl new file mode 100644 index 0000000..636a5c7 --- /dev/null +++ b/tests/gui/test_subprogram.rpsl @@ -0,0 +1,32 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +node DrawScene([readwrite(rendertarget)] texture rt : SV_Target0, float4 color, RpsViewport viewport : SV_Viewport0); +node DrawGUI([readwrite(rendertarget)] texture rt : SV_Target0, RpsViewport viewport : SV_Viewport0, float4 color); + +export void Main([readonly(present)] texture backBuffer, RpsViewport viewport) +{ + clear(backBuffer, 0.0f.xxxx); + DrawScene(backBuffer, float4(1.0f, 1.0f, 0.0f, 1.0f), viewport); + + viewport.x += viewport.width * 0.3f; + viewport.y += viewport.height * 0.3f; + viewport.width *= 0.4f; + viewport.height *= 0.4f; + + DrawGUI(backBuffer, viewport, float4(1.0f, 0.0f, 0.0f, 0.5f)); +} + +export void Scene([readwrite(rendertarget)] texture rt, float4 color, RpsViewport viewport) +{ + DrawScene(rt, float4(0.0f, 1.0f, 0.0f, 1.0f), viewport); +} + +export void GUI([readwrite(rendertarget)] texture rt, RpsViewport viewport, float4 color) +{ + DrawGUI(rt, viewport, float4(0.0f, 0.0f, 1.0f, 0.5f)); +} diff --git a/tests/gui/test_subprogram_d3d12.cpp b/tests/gui/test_subprogram_d3d12.cpp new file mode 100644 index 0000000..402ef0a --- /dev/null +++ b/tests/gui/test_subprogram_d3d12.cpp @@ -0,0 +1,261 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define RPS_D3D12_RUNTIME 1 + +#include "test_subprogram_shared.h" + +#include "utils/rps_test_win32.h" +#include "utils/rps_test_d3d12_renderer.h" + +using namespace DirectX; + +class TestD3D12Subprogram : public RpsTestD3D12Renderer, public TestRpsSubprogram +{ +protected: + virtual void OnInit(ID3D12GraphicsCommandList* pInitCmdList, + std::vector>& tempResources) override + { + LoadAssets(pInitCmdList, tempResources); + + RpsDevice hRpsDevice = rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); }); + + TestRpsSubprogram::Init(hRpsDevice); + } + + virtual void OnPostResize() override + { + } + + virtual void OnCleanUp() override + { + TestRpsSubprogram::OnDestroy(); + + m_rootSignature = nullptr; + m_pipelineStateDrawScene = nullptr; + m_pipelineStateDrawGUI = nullptr; + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + BindNodes(m_mainRpslProgram, !!((frameIndex >> 6) & 1), !!((frameIndex >> 5) & 1)); + + RpsRuntimeResource backBuffers[DXGI_MAX_SWAP_CHAIN_BUFFERS]; + RpsResourceDesc backBufferDesc; + GetBackBuffers(backBufferDesc, backBuffers); + + RpsViewport viewport = { + 0.0f, 0.0f, float(backBufferDesc.image.width), float(backBufferDesc.image.height), 0.0f, 1.0f}; + + RpsConstant args[] = {&backBufferDesc, &viewport}; + const RpsRuntimeResource* argResources[] = {backBuffers}; + + uint64_t completedFrameIndex = CalcGuaranteedCompletedFrameIndexForRps(); + + TestRpsSubprogram::OnUpdate(frameIndex, completedFrameIndex, uint32_t(RPS_TEST_COUNTOF(args)), args, argResources); + + RpsTestD3D12Renderer::OnUpdate(frameIndex); + } + + virtual void OnRender(uint32_t frameIndex) override + { + REQUIRE(RPS_SUCCEEDED(ExecuteRenderGraph(frameIndex, GetRpsRenderGraph()))); + } + +protected: + virtual void BindNodes(RpsSubprogram hRpslEntry) override final + { + BindNodes(hRpslEntry, false, false); + } + + void BindNodes(RpsSubprogram hRpslEntry, bool bUseSceneSubprogram, bool bUseGUISubprogram) + { + RpsResult result = + bUseSceneSubprogram + ? rpsProgramBindNodeSubprogram(hRpslEntry, "DrawScene", m_drawSceneSubprogram) + : rpsProgramBindNode(hRpslEntry, "DrawScene", &TestD3D12Subprogram::DrawScene, this); + REQUIRE(result == RPS_OK); + + result = bUseGUISubprogram + ? rpsProgramBindNodeSubprogram(hRpslEntry, "DrawGUI", m_drawGUISubprogram) + : rpsProgramBindNode(hRpslEntry, "DrawGUI", &TestD3D12Subprogram::DrawGUI, this); + REQUIRE(result == RPS_OK); + } + + void CreateScene(const RpsCmdCallbackContext* pContext) + { + if (!m_pipelineStateDrawScene) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VS", L"PSScene", false, false, &rtInfo, &m_pipelineStateDrawScene); + } + } + + void DrawScene(const RpsCmdCallbackContext* pContext, + rps::UnusedArg rt, + const float color[4], + const RpsViewport& viewport) + { + CreateScene(pContext); + + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + pCmdList->SetPipelineState(m_pipelineStateDrawScene.Get()); + pCmdList->SetGraphicsRoot32BitConstants(0, 4, color, 0); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + + void CreateGUI(const RpsCmdCallbackContext* pContext) + { + if (!m_pipelineStateDrawGUI) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VS", L"PSGUI", false, true, &rtInfo, &m_pipelineStateDrawGUI); + } + } + + void DrawGUI(const RpsCmdCallbackContext* pContext, + rps::UnusedArg rt, + const RpsViewport& viewport, + const float color[4]) + { + CreateGUI(pContext); + + ID3D12GraphicsCommandList* pCmdList = rpsD3D12CommandListFromHandle(pContext->hCommandBuffer); + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + pCmdList->SetPipelineState(m_pipelineStateDrawGUI.Get()); + pCmdList->SetGraphicsRoot32BitConstants(0, 4, color, 0); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + +private: + void LoadAssets(ID3D12GraphicsCommandList* pInitCmdList, std::vector>& tempResources) + { + CD3DX12_ROOT_PARAMETER rootParameters[1] = {}; + rootParameters[0].InitAsConstants(4, 0); + + D3D12_STATIC_SAMPLER_DESC sampler = {}; + sampler.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + sampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + sampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + sampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + sampler.MipLODBias = 0; + sampler.MaxAnisotropy = 0; + sampler.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + sampler.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + sampler.MinLOD = 0.0f; + sampler.MaxLOD = D3D12_FLOAT32_MAX; + sampler.ShaderRegister = 0; + sampler.RegisterSpace = 0; + sampler.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + + D3D12_ROOT_SIGNATURE_FLAGS rootSignatureFlags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc; + rootSignatureDesc.Init_1_0(_countof(rootParameters), rootParameters, 1, &sampler, rootSignatureFlags); + + ComPtr signature; + ComPtr error; + ThrowIfFailedEx(D3DX12SerializeVersionedRootSignature( + &rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1_0, &signature, &error), + error); + ThrowIfFailed(m_device->CreateRootSignature( + 0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature))); + } + + void CreatePSO(LPCWSTR vsEntry, + LPCWSTR psEntry, + bool bDepthEnable, + bool bBlendEnable, + const RpsCmdRenderTargetInfo* pRenderTargetInfo, + ID3D12PipelineState** ppPSO) + { + // Create the pipeline state, which includes compiling and loading shaders. + { + std::vector vsCode, psCode, gsCode; + + // Describe and create the graphics pipeline state object (PSO). + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.InputLayout = {nullptr, 0}; + psoDesc.pRootSignature = m_rootSignature.Get(); + psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); + psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); + psoDesc.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT); + psoDesc.DepthStencilState.DepthEnable = !!bDepthEnable; + psoDesc.SampleMask = UINT_MAX; + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + + psoDesc.DSVFormat = rpsFormatToDXGI(pRenderTargetInfo->depthStencilFormat); + psoDesc.SampleDesc.Count = pRenderTargetInfo->numSamples; + psoDesc.NumRenderTargets = pRenderTargetInfo->numRenderTargets; + psoDesc.RasterizerState.MultisampleEnable = pRenderTargetInfo->numSamples > 1; + + if (bBlendEnable) + { + psoDesc.BlendState.RenderTarget[0].BlendEnable = TRUE; + psoDesc.BlendState.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA; + psoDesc.BlendState.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA; + psoDesc.BlendState.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; + } + + for (uint32_t iRT = 0; iRT < pRenderTargetInfo->numRenderTargets; iRT++) + { + psoDesc.RTVFormats[iRT] = rpsFormatToDXGI(pRenderTargetInfo->renderTargetFormats[iRT]); + } + + DxcCompile(c_Shader, vsEntry, L"vs_6_0", L"", nullptr, 0, vsCode); + DxcCompile(c_Shader, psEntry, L"ps_6_0", L"", nullptr, 0, psCode); + psoDesc.VS = CD3DX12_SHADER_BYTECODE(vsCode.data(), vsCode.size()); + psoDesc.PS = CD3DX12_SHADER_BYTECODE(psCode.data(), psCode.size()); + + ThrowIfFailed(m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(ppPSO))); + } + } + + void CreateComputePSO(LPCWSTR csEntry, ID3D12PipelineState** ppPSO) + { + std::vector csCode; + D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.pRootSignature = m_rootSignature.Get(); + + DxcCompile(c_Shader, csEntry, L"cs_6_0", L"", nullptr, 0, csCode); + psoDesc.CS = CD3DX12_SHADER_BYTECODE(csCode.data(), csCode.size()); + + ThrowIfFailed(m_device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(ppPSO))); + } + +private: + ComPtr m_rootSignature; + ComPtr m_pipelineStateDrawScene; + ComPtr m_pipelineStateDrawGUI; +}; + +TEST_CASE(TEST_APP_NAME) +{ +#if _DEBUG + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#endif + + TestD3D12Subprogram renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_subprogram_shared.h b/tests/gui/test_subprogram_shared.h new file mode 100644 index 0000000..2fcad13 --- /dev/null +++ b/tests/gui/test_subprogram_shared.h @@ -0,0 +1,99 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#pragma once + +#include "utils/rps_test_host.h" + +RPS_DECLARE_RPSL_ENTRY(test_subprogram, Main); +RPS_DECLARE_RPSL_ENTRY(test_subprogram, Scene); +RPS_DECLARE_RPSL_ENTRY(test_subprogram, GUI); + +static const char c_Shader[] = R"( + +struct V2P +{ + float4 Pos : SV_Position; +}; + +struct CBData +{ + float4 color; +}; + +#ifndef VULKAN +ConstantBuffer cb : register(b0); +#else +[[vk::push_constant]] CBData cb; +#endif + +V2P VS(uint vertexId : SV_VertexID) +{ + V2P result; + result.Pos = float4( + (vertexId & 1) * 4.0f - 1.0f, + (vertexId & 2) * -2.0f + 1.0f, + 0, 1); + return result; +} + +float4 PSGUI(V2P psIn) : SV_Target0 +{ + return cb.color; +} + +float4 PSScene(V2P psIn) : SV_Target0 +{ + int2 tile = int2(psIn.Pos.xy) >> 5; + return (0.2f + 0.6f * float((tile.x + tile.y) & 1)) * cb.color; +} +)"; + +#define TEST_APP_NAME_RAW "TestSubprogram" + +using namespace DirectX; + +class TestRpsSubprogram : public RpsTestHost +{ +protected: + void Init(RpsDevice hRpsDevice) + { + RpsTestHost::OnInit(hRpsDevice, rpsTestLoadRpslEntry(test_subprogram, Main)); + + m_mainRpslProgram = rpsRenderGraphGetMainEntry(GetRpsRenderGraph()); + + RpsProgramCreateInfo programCreateInfo = {}; + programCreateInfo.hRpslEntryPoint = rpsTestLoadRpslEntry(test_subprogram, Scene); + + RpsResult result = rpsProgramCreate(hRpsDevice, &programCreateInfo, &m_drawSceneSubprogram); + + REQUIRE(result == RPS_OK); + REQUIRE(m_drawSceneSubprogram != RPS_NULL_HANDLE); + + programCreateInfo.hRpslEntryPoint = rpsTestLoadRpslEntry(test_subprogram, GUI); + + result = rpsProgramCreate(hRpsDevice, &programCreateInfo, &m_drawGUISubprogram); + REQUIRE(result == RPS_OK); + REQUIRE(m_drawGUISubprogram != RPS_NULL_HANDLE); + + BindNodes(m_drawSceneSubprogram); + BindNodes(m_drawGUISubprogram); + } + + virtual void OnDestroy() override + { + rpsProgramDestroy(m_drawSceneSubprogram); + rpsProgramDestroy(m_drawGUISubprogram); + + RpsTestHost::OnDestroy(); + } + +protected: + RpsSubprogram m_mainRpslProgram = RPS_NULL_HANDLE; + RpsSubprogram m_drawSceneSubprogram = RPS_NULL_HANDLE; + RpsSubprogram m_drawGUISubprogram = RPS_NULL_HANDLE; +}; diff --git a/tests/gui/test_subprogram_vk.cpp b/tests/gui/test_subprogram_vk.cpp new file mode 100644 index 0000000..8f138cc --- /dev/null +++ b/tests/gui/test_subprogram_vk.cpp @@ -0,0 +1,451 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifdef _WIN32 +#define VK_USE_PLATFORM_WIN32_KHR +#else +#error "TODO" +#endif + +#define RPS_VK_RUNTIME 1 + +#include "test_subprogram_shared.h" + +#include "utils/rps_test_win32.h" +#include "utils/rps_test_vk_renderer.h" + +class TestVkSubprogram : public RpsTestVulkanRenderer, public TestRpsSubprogram +{ +protected: + virtual void OnInit(VkCommandBuffer initCmdBuf, InitTempResources& tempResources) override + { + LoadAssets(); + + RpsDevice hRpsDevice = rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); }); + + TestRpsSubprogram::Init(hRpsDevice); + } + + virtual void OnPostResize() override + { + } + + virtual void OnCleanUp() override + { + TestRpsSubprogram::OnDestroy(); + + vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr); + vkDestroyPipeline(m_device, m_pipelineStateDrawScene, nullptr); + vkDestroyPipeline(m_device, m_pipelineStateDrawGUI, nullptr); + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + BindNodes(m_mainRpslProgram, !!((frameIndex >> 6) & 1), !!((frameIndex >> 5) & 1)); + + RpsResourceDesc backBufferDesc; + auto& swapChainBufferHdls = GetBackBuffers(backBufferDesc); + + RpsViewport viewport = { + 0.0f, 0.0f, float(backBufferDesc.image.width), float(backBufferDesc.image.height), 0.0f, 1.0f}; + + RpsConstant args[] = {&backBufferDesc, &viewport}; + const RpsRuntimeResource* argResources[] = {swapChainBufferHdls.data()}; + + uint64_t completedFrameIndex = CalcGuaranteedCompletedFrameIndexForRps(); + + TestRpsSubprogram::OnUpdate( + frameIndex, completedFrameIndex, uint32_t(RPS_TEST_COUNTOF(args)), args, argResources); + + RpsTestVulkanRenderer::OnUpdate(frameIndex); + } + + virtual void OnRender(uint32_t frameIndex) override + { + REQUIRE(RPS_SUCCEEDED(ExecuteRenderGraph(frameIndex, GetRpsRenderGraph()))); + } + +protected: + virtual void BindNodes(RpsSubprogram hRpslEntry) override final + { + BindNodes(hRpslEntry, false, false); + } + + void BindNodes(RpsSubprogram hRpslEntry, bool bUseSceneSubprogram, bool bUseGUISubprogram) + { + TestRpsSubprogram::BindNodes(hRpslEntry); + + RpsResult result = bUseSceneSubprogram + ? rpsProgramBindNodeSubprogram(hRpslEntry, "DrawScene", m_drawSceneSubprogram) + : rpsProgramBindNode(hRpslEntry, "DrawScene", &TestVkSubprogram::DrawScene, this); + REQUIRE(result == RPS_OK); + + result = bUseGUISubprogram ? rpsProgramBindNodeSubprogram(hRpslEntry, "DrawGUI", m_drawGUISubprogram) + : rpsProgramBindNode(hRpslEntry, "DrawGUI", &TestVkSubprogram::DrawGUI, this); + REQUIRE(result == RPS_OK); + } + + void CreateScene(const RpsCmdCallbackContext* pContext) + { + if (!m_pipelineStateDrawScene) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + VkRenderPass rp; + result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VS", + nullptr, + L"PSScene", + rtInfo.numRenderTargets, + false, + false, + rtInfo.numSamples, + rp, + &m_pipelineStateDrawScene); + } + } + + void DrawScene(const RpsCmdCallbackContext* pContext, + rps::UnusedArg rt, + const float color[4], + const RpsViewport& viewport) + { + CreateScene(pContext); + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineStateDrawScene); + vkCmdPushConstants(cmdBuf, m_pipelineLayout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(float) * 4, color); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + + void CreateGUI(const RpsCmdCallbackContext* pContext) + { + if (!m_pipelineStateDrawGUI) + { + RpsCmdRenderTargetInfo rtInfo; + RpsResult result = rpsCmdGetRenderTargetsInfo(pContext, &rtInfo); + REQUIRE(result == RPS_OK); + + VkRenderPass rp; + result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePSO(L"VS", nullptr, L"PSGUI", 1, false, true, rtInfo.numSamples, rp, &m_pipelineStateDrawGUI); + } + } + + void DrawGUI(const RpsCmdCallbackContext* pContext, + rps::UnusedArg rt, + const RpsViewport& viewport, + const float color[4]) + { + CreateGUI(pContext); + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineStateDrawGUI); + vkCmdPushConstants(cmdBuf, m_pipelineLayout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(float) * 4, color); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + +private: + void LoadAssets() + { + OnPostResize(); + + VkPushConstantRange pushConstRanges[1] = {}; + pushConstRanges[0].offset = 0; + pushConstRanges[0].size = 4 * 4; + pushConstRanges[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + + VkPipelineLayoutCreateInfo plCI = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + plCI.setLayoutCount = 0; + plCI.pSetLayouts = nullptr; + plCI.pPushConstantRanges = pushConstRanges; + plCI.pushConstantRangeCount = _countof(pushConstRanges); + + ThrowIfFailedVK(vkCreatePipelineLayout(m_device, &plCI, nullptr, &m_pipelineLayout)); + } + + void CreatePSO(const WCHAR* vsEntry, + const WCHAR* gsEntry, + const WCHAR* psEntry, + uint32_t numColorAttachments, + bool bDepth, + bool bAlphaBlend, + uint32_t sampleCount, + VkRenderPass renderPass, + VkPipeline* pPso) + { + VkPipelineVertexInputStateCreateInfo vi = {}; + vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vi.pNext = NULL; + vi.flags = 0; + vi.vertexBindingDescriptionCount = 0; + vi.pVertexBindingDescriptions = nullptr; + vi.vertexAttributeDescriptionCount = 0; + vi.pVertexAttributeDescriptions = nullptr; + + // input assembly state + // + VkPipelineInputAssemblyStateCreateInfo ia; + ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + ia.pNext = NULL; + ia.flags = 0; + ia.primitiveRestartEnable = VK_FALSE; + ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + // rasterizer state + VkPipelineRasterizationStateCreateInfo rs; + rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rs.pNext = NULL; + rs.flags = 0; + rs.polygonMode = VK_POLYGON_MODE_FILL; + rs.cullMode = VK_CULL_MODE_NONE; + rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rs.depthClampEnable = VK_FALSE; + rs.rasterizerDiscardEnable = VK_FALSE; + rs.depthBiasEnable = VK_FALSE; + rs.depthBiasConstantFactor = 0; + rs.depthBiasClamp = 0; + rs.depthBiasSlopeFactor = 0; + rs.lineWidth = 1.0f; + + VkPipelineColorBlendAttachmentState bs[8] = {}; + + bs[0].blendEnable = bAlphaBlend ? VK_TRUE : VK_FALSE; + bs[0].srcColorBlendFactor = bAlphaBlend ? VK_BLEND_FACTOR_SRC_ALPHA : VK_BLEND_FACTOR_ONE; + bs[0].dstColorBlendFactor = bAlphaBlend ? VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA : VK_BLEND_FACTOR_ZERO; + bs[0].colorBlendOp = VK_BLEND_OP_ADD; + bs[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].alphaBlendOp = VK_BLEND_OP_ADD; + bs[0].colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + for (uint32_t i = 1; i < numColorAttachments; i++) + { + bs[i] = bs[0]; + } + + // Color blend state + VkPipelineColorBlendStateCreateInfo cb; + cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + cb.flags = 0; + cb.pNext = NULL; + cb.attachmentCount = numColorAttachments; + cb.pAttachments = bs; + cb.logicOpEnable = VK_FALSE; + cb.logicOp = VK_LOGIC_OP_NO_OP; + cb.blendConstants[0] = 1.0f; + cb.blendConstants[1] = 1.0f; + cb.blendConstants[2] = 1.0f; + cb.blendConstants[3] = 1.0f; + + std::vector dynamicStateEnables = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo dynamicState = {}; + dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamicState.pNext = NULL; + dynamicState.pDynamicStates = dynamicStateEnables.data(); + dynamicState.dynamicStateCount = (uint32_t)dynamicStateEnables.size(); + + // view port state + + VkPipelineViewportStateCreateInfo vp = {}; + vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + vp.pNext = NULL; + vp.flags = 0; + vp.viewportCount = 1; + vp.scissorCount = 1; + vp.pScissors = NULL; + vp.pViewports = NULL; + + // depth stencil state + + VkPipelineDepthStencilStateCreateInfo ds; + ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + ds.pNext = NULL; + ds.flags = 0; + ds.depthTestEnable = bDepth ? VK_TRUE : VK_FALSE; + ds.depthWriteEnable = bDepth ? VK_TRUE : VK_FALSE; + ds.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + ds.depthBoundsTestEnable = VK_FALSE; + ds.stencilTestEnable = VK_FALSE; + ds.back.failOp = VK_STENCIL_OP_KEEP; + ds.back.passOp = VK_STENCIL_OP_KEEP; + ds.back.compareOp = VK_COMPARE_OP_ALWAYS; + ds.back.compareMask = 0; + ds.back.reference = 0; + ds.back.depthFailOp = VK_STENCIL_OP_KEEP; + ds.back.writeMask = 0; + ds.minDepthBounds = 0; + ds.maxDepthBounds = 0; + ds.stencilTestEnable = VK_FALSE; + ds.front = ds.back; + + // multi sample state + + VkPipelineMultisampleStateCreateInfo ms; + ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + ms.pNext = NULL; + ms.flags = 0; + ms.pSampleMask = NULL; + ms.rasterizationSamples = static_cast(sampleCount); + ms.sampleShadingEnable = (sampleCount > 1) ? VK_TRUE : VK_FALSE; + ms.alphaToCoverageEnable = VK_FALSE; + ms.alphaToOneEnable = VK_FALSE; + ms.minSampleShading = (sampleCount > 1) ? 1.0f : 0.0f; + + VkShaderModule vsModule = VK_NULL_HANDLE, gsModule = VK_NULL_HANDLE, psModule = VK_NULL_HANDLE; + std::vector vsCode, gsCode, psCode; + + const DxcDefine defs[] = {{L"VULKAN", L"1"}}; + + DxcCompileToSpirv(c_Shader, vsEntry, L"vs_6_0", L"", defs, _countof(defs), vsCode); + DxcCompileToSpirv(c_Shader, psEntry, L"ps_6_0", L"", defs, _countof(defs), psCode); + + if (gsEntry) + { + DxcCompileToSpirv(c_Shader, gsEntry, L"gs_6_0", L"", defs, _countof(defs), gsCode); + } + + VkShaderModuleCreateInfo smCI = {}; + smCI.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + + smCI.pCode = reinterpret_cast(vsCode.data()); + smCI.codeSize = vsCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &vsModule)); + + smCI.pCode = reinterpret_cast(psCode.data()); + smCI.codeSize = psCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &psModule)); + + char vsName[128]; + char psName[128]; + sprintf_s(vsName, "%S", vsEntry); + sprintf_s(psName, "%S", psEntry); + + VkPipelineShaderStageCreateInfo shaderStages[3] = {}; + shaderStages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[0].module = vsModule; + shaderStages[0].pName = vsName; + shaderStages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + shaderStages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[1].module = psModule; + shaderStages[1].pName = psName; + shaderStages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + + uint32_t numShaderStages = 2; + + if (gsEntry) + { + smCI.pCode = reinterpret_cast(gsCode.data()); + smCI.codeSize = gsCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &gsModule)); + + char gsName[128]; + sprintf_s(gsName, "%S", gsEntry); + + shaderStages[2].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[2].module = gsModule; + shaderStages[2].pName = gsName; + shaderStages[2].stage = VK_SHADER_STAGE_GEOMETRY_BIT; + + numShaderStages = 3; + } + + VkGraphicsPipelineCreateInfo psoCI = {}; + psoCI.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + psoCI.pNext = NULL; + psoCI.layout = m_pipelineLayout; + psoCI.basePipelineHandle = VK_NULL_HANDLE; + psoCI.basePipelineIndex = 0; + psoCI.flags = 0; + psoCI.pVertexInputState = &vi; + psoCI.pInputAssemblyState = &ia; + psoCI.pRasterizationState = &rs; + psoCI.pColorBlendState = &cb; + psoCI.pTessellationState = NULL; + psoCI.pMultisampleState = &ms; + psoCI.pDynamicState = &dynamicState; + psoCI.pViewportState = &vp; + psoCI.pDepthStencilState = &ds; + psoCI.pStages = shaderStages; + psoCI.stageCount = numShaderStages; + psoCI.renderPass = renderPass; + psoCI.subpass = 0; + + ThrowIfFailedVK(vkCreateGraphicsPipelines(m_device, VK_NULL_HANDLE, 1, &psoCI, nullptr, pPso)); + + vkDestroyShaderModule(m_device, vsModule, nullptr); + vkDestroyShaderModule(m_device, psModule, nullptr); + if (gsModule != RPS_NULL_HANDLE) + { + vkDestroyShaderModule(m_device, gsModule, nullptr); + } + } + + void CreateComputePSO(const WCHAR* csEntry, VkPipeline* pPso) + { + const DxcDefine defs[] = {{L"VULKAN", L"1"}}; + + std::vector csCode; + DxcCompileToSpirv(c_Shader, csEntry, L"cs_6_0", L"", defs, _countof(defs), csCode); + + VkShaderModuleCreateInfo smCI = {}; + smCI.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smCI.pCode = reinterpret_cast(csCode.data()); + smCI.codeSize = csCode.size(); + + VkShaderModule csModule; + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &csModule)); + + char csName[128]; + sprintf_s(csName, "%S", csEntry); + + VkComputePipelineCreateInfo compPsoCI = {}; + compPsoCI.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + compPsoCI.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + compPsoCI.stage.module = csModule; + compPsoCI.stage.pName = csName; + compPsoCI.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT; + compPsoCI.layout = m_pipelineLayout; + + ThrowIfFailedVK(vkCreateComputePipelines(m_device, VK_NULL_HANDLE, 1, &compPsoCI, nullptr, pPso)); + vkDestroyShaderModule(m_device, csModule, nullptr); + } + +private: + VkPipelineLayout m_pipelineLayout = VK_NULL_HANDLE; + VkPipeline m_pipelineStateDrawScene = VK_NULL_HANDLE; + VkPipeline m_pipelineStateDrawGUI = VK_NULL_HANDLE; +}; + +TEST_CASE(TEST_APP_NAME) +{ + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#if defined(BREAK_AT_ALLOC_ID) + _CrtSetBreakAlloc(BREAK_AT_ALLOC_ID); +#endif + + TestVkSubprogram renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_triangle.rpsl b/tests/gui/test_triangle.rpsl new file mode 100644 index 0000000..33d8fe0 --- /dev/null +++ b/tests/gui/test_triangle.rpsl @@ -0,0 +1,15 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +graphics node Triangle(rtv renderTarget : SV_Target0, uint triangleId ); + +export void main([readonly(present)] texture backbuffer) +{ + // clear and then render geometry to back buffer + clear(backbuffer, float4(0.0, 0.2, 0.4, 1.0)); + Triangle(backbuffer, 0); +} diff --git a/tests/gui/test_triangle_d3d12.cpp b/tests/gui/test_triangle_d3d12.cpp new file mode 100644 index 0000000..d23b647 --- /dev/null +++ b/tests/gui/test_triangle_d3d12.cpp @@ -0,0 +1,465 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define CATCH_CONFIG_MAIN +#include +#include + +#include "utils/rps_test_d3d12_renderer.h" +#include "utils/rps_test_common.h" +#include "utils/rps_test_win32.h" + +RPS_DECLARE_RPSL_ENTRY(test_triangle, main); + +static const char c_Shader[] = R"( +struct V2P +{ + float4 Pos : SV_Position; + float4 Color : COLOR0; +}; + +cbuffer cb : register(b0) +{ + float AspectRatio; +}; + +#define PI (3.14159f) + +V2P VSMain(uint vId : SV_VertexID) +{ + float2 pos[3] = + { + { -0.5f, -0.5f * tan(PI / 6), }, + { 0.0f, 0.5f / cos(PI / 6), }, + { 0.5f, -0.5f * tan(PI / 6), }, + }; + + V2P vsOut; + vsOut.Pos = float4(pos[min(vId, 2)], 0, 1); + vsOut.Pos.x *= AspectRatio; + vsOut.Color = float4(vId == 0 ? 1 : 0, vId == 1 ? 1 : 0, vId == 2 ? 1 : 0, 1); + return vsOut; +} + +float4 PSMain(V2P psIn) : SV_Target0 +{ + return psIn.Color; +} +)"; + +#define TEST_APP_NAME_RAW "TestTriangle" + +class TestD3D12Triangle : public RpsTestD3D12Renderer +{ +protected: + virtual void OnInit(ID3D12GraphicsCommandList* pInitCmdList, + std::vector>& tempResources) override + { + LoadAssets(); + + m_rpsDevice = rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); }); + + LoadRpsPipeline(); + } + + virtual void OnPostResize() override + { + } + + virtual void OnCleanUp() override + { + rpsRenderGraphDestroy(m_rpsRenderGraph); + + rpsTestUtilDestroyDevice(m_rpsDevice); + + m_pipelineState = nullptr; + m_rootSignature = nullptr; + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + UpdateRpsPipeline(frameIndex); + } + + virtual void OnRender(uint32_t frameIndex) override + { + const bool useRps = (m_rpsRenderGraph != RPS_NULL_HANDLE) && (m_sampleMode != SampleMode::NO_RPS); + + if (useRps) + { + RpsRenderGraphBatchLayout batchLayout = {}; + RpsResult result = rpsRenderGraphGetBatchLayout(m_rpsRenderGraph, &batchLayout); + REQUIRE(result == RPS_OK); + + m_fenceSignalInfos.resize(batchLayout.numFenceSignals); + + for (uint32_t iBatch = 0; iBatch < batchLayout.numCmdBatches; iBatch++) + { + auto& batch = batchLayout.pCmdBatches[iBatch]; + + ActiveCommandList cmdList = AcquireCmdList(RPS_AFX_QUEUE_INDEX_GFX); + + RpsRenderGraphRecordCommandInfo recordInfo = {}; + + recordInfo.pUserContext = this; + recordInfo.cmdBeginIndex = batch.cmdBegin; + recordInfo.numCmds = batch.numCmds; + recordInfo.hCmdBuffer = rpsD3D12CommandListToHandle(cmdList.cmdList.Get()); + + for (uint32_t iWaitIdx = batch.waitFencesBegin; + iWaitIdx < batch.waitFencesBegin + batch.numWaitFences; + ++iWaitIdx) + { + const auto& signalInfo = m_fenceSignalInfos[batchLayout.pWaitFenceIndices[iWaitIdx]]; + + HRESULT hr = + m_queues[batch.queueIndex]->Wait(m_fences[signalInfo.queueIndex].Get(), signalInfo.value); + REQUIRE(SUCCEEDED(hr)); + } + + result = rpsRenderGraphRecordCommands(m_rpsRenderGraph, &recordInfo); + REQUIRE(result == RPS_OK); + + CloseCmdList(cmdList); + ID3D12CommandList* pCmdLists[] = {cmdList.cmdList.Get()}; + m_presentQueue->ExecuteCommandLists(1, pCmdLists); + RecycleCmdList(cmdList); + + if (batch.signalFenceIndex != RPS_INDEX_NONE_U32) + { + m_fenceValue++; + + auto& signalInfo = m_fenceSignalInfos[batch.signalFenceIndex]; + + signalInfo.queueIndex = batch.queueIndex; + signalInfo.value = m_fenceValue; + + HRESULT hr = + m_queues[batch.queueIndex]->Signal(m_fences[signalInfo.queueIndex].Get(), signalInfo.value); + REQUIRE(SUCCEEDED(hr)); + } + } + } + else + { + ActiveCommandList cmdList = AcquireCmdList(RPS_AFX_QUEUE_INDEX_GFX); + + RenderWithoutRPS(cmdList.cmdList.Get()); + + CloseCmdList(cmdList); + ID3D12CommandList* pCmdLists[] = {cmdList.cmdList.Get()}; + m_presentQueue->ExecuteCommandLists(1, pCmdLists); + RecycleCmdList(cmdList); + } + } + +private: + void RenderWithoutRPS(ID3D12GraphicsCommandList* pCmdList) + { + auto barrier = CD3DX12_RESOURCE_BARRIER::Transition( + GetBackBuffer(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET); + pCmdList->ResourceBarrier(1, &barrier); + + auto rtv = GetBackBufferRTV(); + + FLOAT clearColor[4] = {0.0f, 0.2f, 0.4f, 1.0f}; + auto viewport = CD3DX12_VIEWPORT(0.0f, 0.0f, static_cast(m_width), static_cast(m_height)); + auto scissorRect = CD3DX12_RECT(0, 0, m_width, m_height); + pCmdList->ClearRenderTargetView(rtv, clearColor, 0, nullptr); + pCmdList->OMSetRenderTargets(1, &rtv, FALSE, nullptr); + pCmdList->RSSetViewports(1, &viewport); + pCmdList->RSSetScissorRects(1, &scissorRect); + + DrawTriangle(pCmdList); + + barrier = CD3DX12_RESOURCE_BARRIER::Transition( + GetBackBuffer(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT); + pCmdList->ResourceBarrier(1, &barrier); + } + + void DrawTriangle(ID3D12GraphicsCommandList* pCmdList) + { + FLOAT aspectRatio = m_height / static_cast(m_width); + pCmdList->SetGraphicsRootSignature(m_rootSignature.Get()); + pCmdList->SetGraphicsRoot32BitConstant(0, *(const UINT*)&aspectRatio, 0); + pCmdList->SetPipelineState(m_pipelineState.Get()); + pCmdList->IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->DrawInstanced(3, 1, 0, 0); + } + + static void DrawTriangleCb(const RpsCmdCallbackContext* pContext) + { + auto pThis = static_cast(pContext->pCmdCallbackContext); + pThis->DrawTriangle(rpsD3D12CommandListFromHandle(pContext->hCommandBuffer)); + } + + static RpsResult AcquireRuntimeCommandBufferCb(void* pUserContext, + uint32_t queueIndex, + uint32_t numCmdBuffers, + RpsRuntimeCommandBuffer* pCmdBuffers, + uint32_t* pCmdBufferIdentifiers) + { + return RPS_ERROR_NOT_IMPLEMENTED; + } + + static RpsResult SubmitRuntimeCommandBufferCb(void* pUserContext, + uint32_t queueIndex, + const RpsRuntimeCommandBuffer* pRuntimeCmdBufs, + uint32_t numRuntimeCmdBufs, + uint32_t waitId, + uint32_t signalId) + { + return RPS_ERROR_NOT_IMPLEMENTED; + } + +private: + void LoadAssets() + { + // Create a root signature consisting of a descriptor table with a single CBV. + { + CD3DX12_ROOT_PARAMETER rootParameters[1]; + rootParameters[0].InitAsConstants(1, 0); + + D3D12_ROOT_SIGNATURE_FLAGS rootSignatureFlags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc; + rootSignatureDesc.Init_1_0(_countof(rootParameters), rootParameters, 0, nullptr, rootSignatureFlags); + + ComPtr signature; + ComPtr error; + ThrowIfFailed(D3DX12SerializeVersionedRootSignature( + &rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1_0, &signature, &error)); + ThrowIfFailed(m_device->CreateRootSignature( + 0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature))); + } + + // Create the pipeline state, which includes compiling and loading shaders. + { + ComPtr vertexShader, pixelShader, err; + +#if defined(_DEBUG) + // Enable better shader debugging with the graphics debugging tools. + UINT compileFlags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; +#else + UINT compileFlags = 0; +#endif + + ThrowIfFailedEx(D3DCompile(c_Shader, + sizeof(c_Shader), + nullptr, + nullptr, + nullptr, + "VSMain", + "vs_5_0", + compileFlags, + 0, + &vertexShader, + &err), + err); + ThrowIfFailedEx(D3DCompile(c_Shader, + sizeof(c_Shader), + nullptr, + nullptr, + nullptr, + "PSMain", + "ps_5_0", + compileFlags, + 0, + &pixelShader, + &err), + err); + + // Describe and create the graphics pipeline state object (PSO). + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.InputLayout = {nullptr, 0}; + psoDesc.pRootSignature = m_rootSignature.Get(); + psoDesc.VS = CD3DX12_SHADER_BYTECODE(vertexShader.Get()); + psoDesc.PS = CD3DX12_SHADER_BYTECODE(pixelShader.Get()); + psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); + psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); + psoDesc.DepthStencilState.DepthEnable = FALSE; + psoDesc.DepthStencilState.StencilEnable = FALSE; + psoDesc.SampleMask = UINT_MAX; + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + psoDesc.NumRenderTargets = 1; + psoDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; + psoDesc.SampleDesc.Count = 1; + + ThrowIfFailed(m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_pipelineState))); + } + } + + void LoadRpsPipeline() + { + if (m_sampleMode == SampleMode::RPSL) + { + RpsRenderGraphCreateInfo renderGraphInfo = {}; + renderGraphInfo.mainEntryCreateInfo.hRpslEntryPoint = rpsTestLoadRpslEntry(test_triangle, main); + + REQUIRE_RPS_OK(rpsRenderGraphCreate(m_rpsDevice, &renderGraphInfo, &m_rpsRenderGraph)); + + rpsProgramBindNode(rpsRenderGraphGetMainEntry(m_rpsRenderGraph), "Triangle", &DrawTriangleCb, this); + } + else if (m_sampleMode == SampleMode::RPS_API) + { + struct ParamAttrList + { + RpsAccessAttr access; + RpsSemanticAttr semantic; + }; + + ParamAttrList rtvAttr = {{}, rps::SemanticAttr(RPS_SEMANTIC_RENDER_TARGET, 0)}; + ParamAttrList clearColorAttr = {{}, {RPS_SEMANTIC_COLOR_CLEAR_VALUE, 0}}; + + RpsParameterDesc nodeParamDescs[2] = {}; + nodeParamDescs[0].name = "RTV0"; + nodeParamDescs[0].typeInfo = rpsTypeInfoInitFromTypeAndID(RpsImageView, RPS_TYPE_IMAGE_VIEW); + nodeParamDescs[0].attr = reinterpret_cast(&rtvAttr); + nodeParamDescs[1].name = "ClearColor"; + nodeParamDescs[1].typeInfo = rpsTypeInfoInitFromType(FLOAT[4]); + nodeParamDescs[1].attr = reinterpret_cast(&clearColorAttr); + + RpsNodeDesc nodeDescs[1] = {}; + nodeDescs[0].name = "DrawTriangle"; + nodeDescs[0].numParams = _countof(nodeParamDescs); + nodeDescs[0].pParamDescs = nodeParamDescs; + + ParamAttrList presentAccess = {rps::AccessAttr(RPS_ACCESS_PRESENT_BIT, RPS_SHADER_STAGE_NONE)}; + + RpsParameterDesc paramDescs[2] = {}; + paramDescs[0].typeInfo = rpsTypeInfoInitFromType(RpsResourceDesc); + paramDescs[0].arraySize = 0; + paramDescs[0].flags = RPS_PARAMETER_FLAG_RESOURCE_BIT; + paramDescs[0].attr = reinterpret_cast(&presentAccess); + paramDescs[0].name = "backBuffer"; + paramDescs[1].typeInfo = rpsTypeInfoInitFromType(void*); + paramDescs[1].name = "pThis"; + + RpsRenderGraphSignatureDesc signatureDesc = {}; + signatureDesc.name = "HelloTriangle"; + signatureDesc.numParams = _countof(paramDescs); + signatureDesc.pParamDescs = paramDescs; + signatureDesc.numNodeDescs = _countof(nodeDescs); + signatureDesc.pNodeDescs = nodeDescs; + + RpsRenderGraphCreateInfo renderGraphInfo = {}; + renderGraphInfo.mainEntryCreateInfo.pSignatureDesc = &signatureDesc; + + REQUIRE_RPS_OK(rpsRenderGraphCreate(m_rpsDevice, &renderGraphInfo, &m_rpsRenderGraph)); + } + } + + static RpsResult BuildRpsRenderGraphCb(RpsRenderGraphBuilder builder, const RpsConstant* ppArgs, uint32_t numArgs) + { + RpsImageView* backBufferRTView = (RpsImageView*)rpsRenderGraphAllocateData(builder, sizeof(RpsImageView)); + REQUIRE(backBufferRTView != nullptr); + + *backBufferRTView = {}; + backBufferRTView->base.resourceId = rpsRenderGraphGetParamResourceId(builder, 0); + backBufferRTView->subresourceRange.arrayLayers = 1; + backBufferRTView->subresourceRange.mipLevels = 1; + backBufferRTView->componentMapping = RPS_RESOURCE_VIEW_COMPONENT_MAPPING_DEFAULT; + + const FLOAT clearValue[4] = {0.0f, 0.2f, 0.4f, 1.0f}; + FLOAT* clearColor = (FLOAT*)rpsRenderGraphAllocateData(builder, sizeof(FLOAT) * 4); + REQUIRE(clearColor != nullptr); + memcpy(clearColor, clearValue, sizeof(clearValue)); + + RpsVariable nodeArgs[] = {backBufferRTView, clearColor}; + + TestD3D12Triangle* pThis = *static_cast(ppArgs[1]); + + RpsNodeId triangleNodeId = + rpsRenderGraphAddNode(builder, 0, 0, &DrawTriangleCb, pThis, nodeArgs, _countof(nodeArgs)); + REQUIRE(triangleNodeId != RPS_CMD_ID_INVALID); + + return RPS_OK; + } + + void UpdateRpsPipeline(uint64_t frameIndex) + { + if (m_rpsRenderGraph != RPS_NULL_HANDLE) + { + RpsRuntimeResource backBufferResources[DXGI_MAX_SWAP_CHAIN_BUFFERS] = {}; + for (uint32_t i = 0; i < m_backBuffers.size(); i++) + { + backBufferResources[i] = {m_backBuffers[i].Get()}; + } + const RpsRuntimeResource* argResources[] = {backBufferResources}; + + RpsResourceDesc backBufferDesc = {}; + backBufferDesc.type = RPS_RESOURCE_TYPE_IMAGE_2D; + backBufferDesc.temporalLayers = uint32_t(m_backBuffers.size()); + backBufferDesc.image.arrayLayers = 1; + backBufferDesc.image.mipLevels = 1; + backBufferDesc.image.format = RPS_FORMAT_R8G8B8A8_UNORM; + backBufferDesc.image.width = m_width; + backBufferDesc.image.height = m_height; + backBufferDesc.image.sampleCount = 1; + + auto pThis = this; + RpsConstant argData[] = {&backBufferDesc, &pThis}; + + const uint64_t completedFrameIndex = CalcGuaranteedCompletedFrameIndexForRps(); + + RpsRenderGraphUpdateInfo updateInfo = {}; + updateInfo.frameIndex = frameIndex; + updateInfo.gpuCompletedFrameIndex = completedFrameIndex; + updateInfo.numArgs = (m_sampleMode == SampleMode::RPS_API) ? 2 : 1; + updateInfo.ppArgs = argData; + updateInfo.ppArgResources = argResources; + + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_RUNTIME_DEBUG_NAMES; + if (frameIndex < m_backBufferCount) + { + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_ALL; + } + + if (m_sampleMode == SampleMode::RPS_API) + { + updateInfo.pfnBuildCallback = &BuildRpsRenderGraphCb; + } + + REQUIRE_RPS_OK(rpsRenderGraphUpdate(m_rpsRenderGraph, &updateInfo)); + } + } + +private: + ComPtr m_rootSignature; + ComPtr m_pipelineState; + + RpsDevice m_rpsDevice = {}; + RpsRenderGraph m_rpsRenderGraph = {}; + + enum class SampleMode + { + NO_RPS, + RPS_API, + RPSL, + }; + + SampleMode m_sampleMode = SampleMode::RPS_API; +}; + +TEST_CASE(TEST_APP_NAME) +{ + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#if defined(BREAK_AT_ALLOC_ID) + _CrtSetBreakAlloc(BREAK_AT_ALLOC_ID); +#endif + + TestD3D12Triangle renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_triangle_vk.cpp b/tests/gui/test_triangle_vk.cpp new file mode 100644 index 0000000..1d5a253 --- /dev/null +++ b/tests/gui/test_triangle_vk.cpp @@ -0,0 +1,569 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define CATCH_CONFIG_MAIN +#include +#include + +#ifdef _WIN32 +#define VK_USE_PLATFORM_WIN32_KHR +#else +#error "TODO" +#endif + +#include "rps/rps.h" +#include "rps/runtime/vk/rps_vk_runtime.h" + +#include "utils/rps_test_common.h" +#include "utils/rps_test_win32.h" +#include "utils/rps_test_vk_renderer.h" + +RPS_DECLARE_RPSL_ENTRY(test_triangle, main); + +static const char c_Shader[] = R"( +struct V2P +{ + float4 Pos : SV_Position; + float4 Color : COLOR0; +}; + +[[vk::push_constant]] +struct +{ + float AspectRatio; +} cb; + +#define PI (3.14159f) + +V2P VSMain(uint vId : SV_VertexID) +{ + float2 pos[3] = + { + { -0.5f, -0.5f * tan(PI / 6), }, + { 0.0f, 0.5f / cos(PI / 6), }, + { 0.5f, -0.5f * tan(PI / 6), }, + }; + + V2P vsOut; + vsOut.Pos = float4(pos[min(vId, 2)], 0, 1); + vsOut.Pos.x *= cb.AspectRatio; + vsOut.Color = float4(vId == 0 ? 1 : 0, vId == 1 ? 1 : 0, vId == 2 ? 1 : 0, 1); + return vsOut; +} + +float4 PSMain(V2P psIn) : SV_Target0 +{ + return psIn.Color; +} +)"; + +#define TEST_APP_NAME_RAW "TestTriangle" + +class TestVkTriangle : public RpsTestVulkanRenderer +{ +protected: + virtual void OnInit(VkCommandBuffer initCmdBuf, InitTempResources& tempResources) override + { + LoadAssets(); + + m_rpsDevice = rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); }); + + LoadRpsPipeline(); + } + + virtual void OnPreResize() override + { + for (auto& fb : m_frameBuffers) + { + vkDestroyFramebuffer(m_device, fb, nullptr); + } + m_frameBuffers.clear(); + } + + virtual void OnPostResize() override + { + // PostResize can be called before OnInit, when m_renderPassWithoutRps is null. + if (m_renderPassWithoutRps != VK_NULL_HANDLE) + { + m_frameBuffers.resize(m_swapChainImages.size()); + + VkFramebufferCreateInfo fbCI = {}; + fbCI.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + fbCI.renderPass = m_renderPassWithoutRps; + fbCI.width = m_width; + fbCI.height = m_height; + fbCI.layers = 1; + fbCI.attachmentCount = 1; + + for (uint32_t i = 0; i < m_swapChainImages.size(); i++) + { + fbCI.pAttachments = &m_swapChainImages[i].imageView; + ThrowIfFailedVK(vkCreateFramebuffer(m_device, &fbCI, nullptr, &m_frameBuffers[i])); + } + } + } + + virtual void OnCleanUp() override + { + rpsRenderGraphDestroy(m_rpsRenderGraph); + + rpsTestUtilDestroyDevice(m_rpsDevice); + + OnPreResize(); + + vkDestroyRenderPass(m_device, m_renderPassWithoutRps, nullptr); + vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr); + vkDestroyPipeline(m_device, m_psoWithoutRps, nullptr); + + if (m_psoWithRps != VK_NULL_HANDLE) + { + vkDestroyPipeline(m_device, m_psoWithRps, nullptr); + } + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + UpdateRpsPipeline(frameIndex); + } + + virtual void OnRender(uint32_t frameIndex) override + { + bool useRps = (m_rpsRenderGraph != RPS_NULL_HANDLE) && m_bUseRps; + + if (useRps) + { + RpsRenderGraphBatchLayout batchLayout = {}; + RpsResult result = rpsRenderGraphGetBatchLayout(m_rpsRenderGraph, &batchLayout); + REQUIRE(result == RPS_OK); + + ReserveSemaphores(batchLayout.numFenceSignals); + + for (uint32_t iBatch = 0; iBatch < batchLayout.numCmdBatches; iBatch++) + { + auto& batch = batchLayout.pCmdBatches[iBatch]; + + ActiveCommandList cmdList = BeginCmdList(RPS_AFX_QUEUE_INDEX_GFX); + + RpsRenderGraphRecordCommandInfo recordInfo = {}; + + recordInfo.pUserContext = this; + recordInfo.cmdBeginIndex = batch.cmdBegin; + recordInfo.numCmds = batch.numCmds; + recordInfo.hCmdBuffer = rpsVKCommandBufferToHandle(cmdList.cmdBuf); + + result = rpsRenderGraphRecordCommands(m_rpsRenderGraph, &recordInfo); + REQUIRE(result == RPS_OK); + + EndCmdList(cmdList); + + SubmitCmdLists(&cmdList, + 1, + ((iBatch + 1) == batchLayout.numCmdBatches), + batch.numWaitFences, + batchLayout.pWaitFenceIndices + batch.waitFencesBegin, + batch.signalFenceIndex); + + RecycleCmdList(cmdList); + } + } + else + { + ActiveCommandList cmdList = BeginCmdList(RPS_AFX_QUEUE_INDEX_GFX); + + RenderWithoutRPS(cmdList.cmdBuf); + + EndCmdList(cmdList); + + SubmitCmdLists(&cmdList, 1, VK_TRUE); + + RecycleCmdList(cmdList); + } + } + +private: + void RenderWithoutRPS(VkCommandBuffer cmdBuf) + { + VkClearValue clearColor = {{{0.0f, 0.2f, 0.4f, 1.0f}}}; + + VkRenderPassBeginInfo rpInfo = {}; + rpInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + rpInfo.clearValueCount = 1; + rpInfo.pClearValues = &clearColor; + rpInfo.framebuffer = m_frameBuffers[m_backBufferIndex]; + rpInfo.renderArea.extent.width = m_width; + rpInfo.renderArea.extent.height = m_height; + rpInfo.renderArea.offset.x = 0; + rpInfo.renderArea.offset.y = 0; + rpInfo.renderPass = m_renderPassWithoutRps; + + vkCmdBeginRenderPass(cmdBuf, &rpInfo, VK_SUBPASS_CONTENTS_INLINE); + + VkViewport viewport = {0.0f, m_height * 1.0f, m_width * 1.0f, m_height * -1.0f, 0.0f, 1.0f}; + vkCmdSetViewport(cmdBuf, 0, 1, &viewport); + vkCmdSetScissor(cmdBuf, 0, 1, &rpInfo.renderArea); + + DrawTriangle(cmdBuf, m_psoWithoutRps); + + vkCmdEndRenderPass(cmdBuf); + } + + void DrawTriangle(VkCommandBuffer cmdBuf, VkPipeline pso) + { + FLOAT aspectRatio = m_height / static_cast(m_width); + vkCmdPushConstants(cmdBuf, m_pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, 4, &aspectRatio); + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pso); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + + void DrawTriangleWithRPSCb(const RpsCmdCallbackContext* pContext) + { + if (m_psoWithRps == VK_NULL_HANDLE) + { + VkRenderPass renderPassFromRps = {}; + REQUIRE_RPS_OK(rpsVKGetCmdRenderPass(pContext, &renderPassFromRps)); + + m_psoWithRps = CreateVkPipeline(renderPassFromRps); + } + + DrawTriangle(rpsVKCommandBufferFromHandle(pContext->hCommandBuffer), m_psoWithRps); + } + +private: + void LoadAssets() + { + VkAttachmentDescription atchmtDesc = {}; + atchmtDesc.format = m_swapChainFormat.format; + atchmtDesc.samples = VK_SAMPLE_COUNT_1_BIT; + atchmtDesc.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + atchmtDesc.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + atchmtDesc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + atchmtDesc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + atchmtDesc.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + atchmtDesc.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + atchmtDesc.flags = 0; + + VkAttachmentReference colorRef = {}; + colorRef.attachment = 0; + colorRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + VkSubpassDescription subpass = {}; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.flags = 0; + subpass.inputAttachmentCount = 0; + subpass.pInputAttachments = NULL; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + subpass.pResolveAttachments = NULL; + subpass.pDepthStencilAttachment = NULL; + subpass.preserveAttachmentCount = 0; + subpass.pPreserveAttachments = NULL; + + VkSubpassDependency deps[2] = {}; + deps[0].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; + deps[0].srcSubpass = VK_SUBPASS_EXTERNAL; + deps[0].srcAccessMask = 0; + deps[0].srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + deps[0].dstSubpass = 0; + deps[0].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + deps[0].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + deps[1].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; + deps[1].srcSubpass = 0; + deps[1].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + deps[1].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + deps[1].dstSubpass = VK_SUBPASS_EXTERNAL; + deps[1].dstAccessMask = 0; + deps[1].dstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + + VkRenderPassCreateInfo rpCI = {}; + rpCI.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + rpCI.pNext = NULL; + rpCI.attachmentCount = 1; + rpCI.pAttachments = &atchmtDesc; + rpCI.subpassCount = 1; + rpCI.pSubpasses = &subpass; + rpCI.dependencyCount = _countof(deps); + rpCI.pDependencies = deps; + + ThrowIfFailedVK(vkCreateRenderPass(m_device, &rpCI, nullptr, &m_renderPassWithoutRps)); + + OnPostResize(); + + VkPushConstantRange pushConstRange = {}; + pushConstRange.offset = 0; + pushConstRange.size = 4; + pushConstRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + + VkPipelineLayoutCreateInfo plCI = {}; + plCI.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + plCI.setLayoutCount = 0; + plCI.pushConstantRangeCount = 1; + plCI.pPushConstantRanges = &pushConstRange; + + ThrowIfFailedVK(vkCreatePipelineLayout(m_device, &plCI, nullptr, &m_pipelineLayout)); + + m_psoWithoutRps = CreateVkPipeline(m_renderPassWithoutRps); + } + + VkPipeline CreateVkPipeline(VkRenderPass renderPass) + { + VkPipelineVertexInputStateCreateInfo vi = {}; + vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vi.pNext = NULL; + vi.flags = 0; + vi.vertexBindingDescriptionCount = 0; + vi.pVertexBindingDescriptions = nullptr; + vi.vertexAttributeDescriptionCount = 0; + vi.pVertexAttributeDescriptions = nullptr; + + // input assembly state + // + VkPipelineInputAssemblyStateCreateInfo ia; + ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + ia.pNext = NULL; + ia.flags = 0; + ia.primitiveRestartEnable = VK_FALSE; + ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + // rasterizer state + VkPipelineRasterizationStateCreateInfo rs; + rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rs.pNext = NULL; + rs.flags = 0; + rs.polygonMode = VK_POLYGON_MODE_FILL; + rs.cullMode = VK_CULL_MODE_NONE; + rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rs.depthClampEnable = VK_FALSE; + rs.rasterizerDiscardEnable = VK_FALSE; + rs.depthBiasEnable = VK_FALSE; + rs.depthBiasConstantFactor = 0; + rs.depthBiasClamp = 0; + rs.depthBiasSlopeFactor = 0; + rs.lineWidth = 1.0f; + + VkPipelineColorBlendAttachmentState bs[1] = {}; + bs[0].blendEnable = VK_FALSE; + bs[0].srcColorBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].colorBlendOp = VK_BLEND_OP_ADD; + bs[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].alphaBlendOp = VK_BLEND_OP_ADD; + bs[0].colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + // Color blend state + VkPipelineColorBlendStateCreateInfo cb; + cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + cb.flags = 0; + cb.pNext = NULL; + cb.attachmentCount = 1; + cb.pAttachments = bs; + cb.logicOpEnable = VK_FALSE; + cb.logicOp = VK_LOGIC_OP_NO_OP; + cb.blendConstants[0] = 1.0f; + cb.blendConstants[1] = 1.0f; + cb.blendConstants[2] = 1.0f; + cb.blendConstants[3] = 1.0f; + + std::vector dynamicStateEnables = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo dynamicState = {}; + dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamicState.pNext = NULL; + dynamicState.pDynamicStates = dynamicStateEnables.data(); + dynamicState.dynamicStateCount = (uint32_t)dynamicStateEnables.size(); + + // view port state + + VkPipelineViewportStateCreateInfo vp = {}; + vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + vp.pNext = NULL; + vp.flags = 0; + vp.viewportCount = 1; + vp.scissorCount = 1; + vp.pScissors = NULL; + vp.pViewports = NULL; + + // depth stencil state + + VkPipelineDepthStencilStateCreateInfo ds; + ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + ds.pNext = NULL; + ds.flags = 0; + ds.depthTestEnable = VK_FALSE; + ds.depthWriteEnable = VK_FALSE; + ds.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + ds.depthBoundsTestEnable = VK_FALSE; + ds.stencilTestEnable = VK_FALSE; + ds.back.failOp = VK_STENCIL_OP_KEEP; + ds.back.passOp = VK_STENCIL_OP_KEEP; + ds.back.compareOp = VK_COMPARE_OP_ALWAYS; + ds.back.compareMask = 0; + ds.back.reference = 0; + ds.back.depthFailOp = VK_STENCIL_OP_KEEP; + ds.back.writeMask = 0; + ds.minDepthBounds = 0; + ds.maxDepthBounds = 0; + ds.stencilTestEnable = VK_FALSE; + ds.front = ds.back; + + // multi sample state + + VkPipelineMultisampleStateCreateInfo ms; + ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + ms.pNext = NULL; + ms.flags = 0; + ms.pSampleMask = NULL; + ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + ms.sampleShadingEnable = VK_FALSE; + ms.alphaToCoverageEnable = VK_FALSE; + ms.alphaToOneEnable = VK_FALSE; + ms.minSampleShading = 0.0; + + VkShaderModule vsModule, psModule; + std::vector vsCode, psCode; + + DxcCompileToSpirv(c_Shader, L"VSMain", L"vs_6_0", L"", nullptr, 0, vsCode); + DxcCompileToSpirv(c_Shader, L"PSMain", L"ps_6_0", L"", nullptr, 0, psCode); + + VkShaderModuleCreateInfo smCI = {}; + smCI.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + + smCI.pCode = reinterpret_cast(vsCode.data()); + smCI.codeSize = vsCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &vsModule)); + + smCI.pCode = reinterpret_cast(psCode.data()); + smCI.codeSize = psCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &psModule)); + + VkPipelineShaderStageCreateInfo shaderStages[2] = {}; + shaderStages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[0].module = vsModule; + shaderStages[0].pName = "VSMain"; + shaderStages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + shaderStages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[1].module = psModule; + shaderStages[1].pName = "PSMain"; + shaderStages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + + VkGraphicsPipelineCreateInfo psoCI = {}; + psoCI.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + psoCI.pNext = NULL; + psoCI.layout = m_pipelineLayout; + psoCI.basePipelineHandle = VK_NULL_HANDLE; + psoCI.basePipelineIndex = 0; + psoCI.flags = 0; + psoCI.pVertexInputState = &vi; + psoCI.pInputAssemblyState = &ia; + psoCI.pRasterizationState = &rs; + psoCI.pColorBlendState = &cb; + psoCI.pTessellationState = NULL; + psoCI.pMultisampleState = &ms; + psoCI.pDynamicState = &dynamicState; + psoCI.pViewportState = &vp; + psoCI.pDepthStencilState = &ds; + psoCI.pStages = shaderStages; + psoCI.stageCount = _countof(shaderStages); + psoCI.renderPass = renderPass; + psoCI.subpass = 0; + + VkPipeline pso = {}; + ThrowIfFailedVK(vkCreateGraphicsPipelines(m_device, VK_NULL_HANDLE, 1, &psoCI, nullptr, &pso)); + + vkDestroyShaderModule(m_device, vsModule, nullptr); + vkDestroyShaderModule(m_device, psModule, nullptr); + + return pso; + } + + void LoadRpsPipeline() + { + RpsRenderGraphCreateInfo renderGraphInfo = {}; + renderGraphInfo.mainEntryCreateInfo.hRpslEntryPoint = rpsTestLoadRpslEntry(test_triangle, main); + + REQUIRE_RPS_OK(rpsRenderGraphCreate(m_rpsDevice, &renderGraphInfo, &m_rpsRenderGraph)); + + REQUIRE_RPS_OK(rpsProgramBindNode( + rpsRenderGraphGetMainEntry(m_rpsRenderGraph), "Triangle", &TestVkTriangle::DrawTriangleWithRPSCb, this)); + } + + void UpdateRpsPipeline(uint64_t frameIndex) + { + if (m_rpsRenderGraph != RPS_NULL_HANDLE) + { + RpsRuntimeResource backBufferResources[16] = {}; + + for (uint32_t i = 0; i < m_swapChainImages.size(); i++) + { + backBufferResources[i] = {m_swapChainImages[i].image}; + } + const RpsRuntimeResource* argResources[] = {backBufferResources}; + + RpsResourceDesc backBufferDesc = {}; + backBufferDesc.type = RPS_RESOURCE_TYPE_IMAGE_2D; + backBufferDesc.temporalLayers = uint32_t(m_swapChainImages.size()); + backBufferDesc.image.arrayLayers = 1; + backBufferDesc.image.mipLevels = 1; + backBufferDesc.image.format = rpsFormatFromVK(m_swapChainFormat.format); + backBufferDesc.image.width = m_width; + backBufferDesc.image.height = m_height; + backBufferDesc.image.sampleCount = 1; + + RpsConstant argData[] = {&backBufferDesc}; + + const uint64_t gpuCompletedFrameIndex = CalcGuaranteedCompletedFrameIndexForRps(); + + RpsRenderGraphUpdateInfo updateInfo = {}; + updateInfo.frameIndex = frameIndex; + updateInfo.gpuCompletedFrameIndex = gpuCompletedFrameIndex; + updateInfo.numArgs = 1; + updateInfo.ppArgs = argData; + updateInfo.ppArgResources = argResources; + + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_RUNTIME_DEBUG_NAMES; + if (frameIndex < m_swapChainImages.size()) + { + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_ALL; + } + + REQUIRE_RPS_OK(rpsRenderGraphUpdate(m_rpsRenderGraph, &updateInfo)); + } + } + +private: + VkRenderPass m_renderPassWithoutRps = VK_NULL_HANDLE; + VkPipelineLayout m_pipelineLayout = VK_NULL_HANDLE; + VkPipeline m_psoWithoutRps = VK_NULL_HANDLE; + VkPipeline m_psoWithRps = VK_NULL_HANDLE; + std::vector m_frameBuffers; + + RpsDevice m_rpsDevice = {}; + RpsRenderGraph m_rpsRenderGraph = {}; + + bool m_bUseRps = true; +}; + +TEST_CASE(TEST_APP_NAME) +{ + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#if defined(BREAK_AT_ALLOC_ID) + _CrtSetBreakAlloc(BREAK_AT_ALLOC_ID); +#endif + + TestVkTriangle renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 1280; + runInfo.height = 720; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/gui/test_visualizer.rpsl b/tests/gui/test_visualizer.rpsl new file mode 100644 index 0000000..4615df9 --- /dev/null +++ b/tests/gui/test_visualizer.rpsl @@ -0,0 +1,40 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +node Geometry(rtv offscreenRenderTarget : SV_Target0, uint useMSAA); +graphics node GeometryMSAA(rtv offscreenRTMSAA : SV_Target0, uint useMSAA, [writeonly(resolve)] texture offscreenRT : SV_ResolveTarget0); +graphics node Quads(rtv backBuffer : SV_Target0, srv offscreenRenderTarget); + +//render_to_texture +export void main([readonly(present)] texture backbuffer, bool useMultisampling) +{ + ResourceDesc backbufferDesc = backbuffer.desc(); + uint32_t width = (uint32_t)backbufferDesc.Width; + uint32_t height = (uint32_t)backbufferDesc.Height; + RPS_FORMAT backbufferFormat = backbufferDesc.Format; + + // create the resources we need for the offscreen rendering, as well as a view into it. + + texture offscreenRT = create_tex2d(backbufferFormat, width, height); + + // clear and then render geometry to offscreen target + if (useMultisampling) + { + texture offscreenRTMSAA = create_tex2d(backbufferFormat, width, height, 1, 1, 1, 4); + + clear(offscreenRTMSAA, float4(0.0, 0.4, 0.2, 1.0)); + GeometryMSAA(offscreenRTMSAA, useMultisampling, offscreenRT); + } + else + { + clear(offscreenRT, float4(0.0, 0.2, 0.4, 1.0)); + Geometry(offscreenRT, false); + } + + // render to the back buffer. + Quads(backbuffer, offscreenRT); +} diff --git a/tests/gui/test_visualizer_shared.h b/tests/gui/test_visualizer_shared.h new file mode 100644 index 0000000..9f7fd2b --- /dev/null +++ b/tests/gui/test_visualizer_shared.h @@ -0,0 +1,265 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#define CATCH_CONFIG_MAIN +#include +#include +#include + +#include "rps/rps.h" + +#include "rps_visualizer.h" + +#include "utils/rps_test_common.h" +#include "core/rps_util.hpp" + +RPS_DECLARE_RPSL_ENTRY(test_visualizer, main); + +static const char c_Shader[] = R"( +#ifdef __hlsl_dx_compiler +[[vk::binding(0, 0)]] +#endif +cbuffer SceneConstantBuffer : register(b0) +{ + float4x4 offset; + float uvDistort; + float aspectRatio; +}; + +struct PSInput +{ + float4 position : SV_POSITION; + float4 color : COLOR; + float4 uv : TEXCOORD; +}; + +#ifdef __hlsl_dx_compiler +[[vk::binding(1, 0)]] +#endif +Texture2D g_texture : register(t0); +#ifdef __hlsl_dx_compiler +[[vk::binding(2, 0)]] +#endif +SamplerState g_sampler : register(s0); + +PSInput VSMain(float4 position : POSITION, float4 color : COLOR, float4 uv : TEXCOORD) +{ + PSInput result; + + position.y *= aspectRatio; + result.position = mul(offset, position); + result.color = color; + result.uv = uv; + result.uv.z = uvDistort; + + return result; +} + +float4 PSMain(PSInput input) : SV_TARGET +{ + input.uv.y += sin(input.uv.x * 10.f) * input.uv.z; + return g_texture.Sample(g_sampler, input.uv.xy) * input.color; +} +)"; + +#define TEST_APP_NAME_RAW "TestVisualizer" + +using namespace DirectX; + +class TestRpsRenderVisualizer +{ +public: + struct SceneConstantBuffer + { + XMFLOAT4X4 offset; + float uvDistort; + float aspectRatio; + }; + + struct Vertex + { + XMFLOAT3 position; + XMFLOAT4 color; + XMFLOAT2 uv; + }; + +public: + TestRpsRenderVisualizer() + : m_translation(0.f) + , m_rotation(0.f) + { + XMStoreFloat4x4(&m_quadConstantData.offset, XMMatrixIdentity()); + m_quadConstantData.uvDistort = 0.1f; + m_quadConstantData.aspectRatio = 1.0f; + } + +protected: + RpsRenderGraph GetRpsRenderGraph() const + { + return m_rpsRenderGraph; + } + + void OnInit() + { + if (s_UseNullRuntime) + { + CreateRpsNullRuntimeDevice(m_rpsDevice); + } + else + { + CreateRpsDevice(m_rpsDevice); + } + + LoadRpsPipeline(); + } + + virtual void UpdateRpsPipeline(uint64_t frameIndex, + uint64_t completedFrameIndex, + const RpsResourceDesc& backBufferDesc, + RpsRuntimeResource* pBackBuffers) + { + if (m_rpsRenderGraph != RPS_NULL_HANDLE) + { + const RpsRuntimeResource* argResources[] = {pBackBuffers}; + RpsConstant argDataRenderToTexture[] = {&backBufferDesc, &m_useMSAA}; + RpsConstant argDataGeneral[] = {&backBufferDesc}; + + RpsRenderGraphUpdateInfo updateInfo = {}; + updateInfo.frameIndex = frameIndex; + updateInfo.gpuCompletedFrameIndex = completedFrameIndex; + updateInfo.numArgs = uint32_t(s_UseRenderToTextureImplementation ? RPS_COUNTOF(argDataRenderToTexture) + : RPS_COUNTOF(argDataGeneral)); + updateInfo.ppArgs = s_UseRenderToTextureImplementation ? argDataRenderToTexture : argDataGeneral; + updateInfo.ppArgResources = argResources; + + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_RUNTIME_DEBUG_NAMES; + if (completedFrameIndex == RPS_GPU_COMPLETED_FRAME_INDEX_NONE) + { + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_ALL; + } + //Disable for null runtime + REQUIRE_RPS_OK(rpsRenderGraphUpdate(m_rpsRenderGraph, &updateInfo)); + + if (m_HasResized) + { + RpsVisualizerUpdateInfo updateInfo = {m_rpsRenderGraph}; + REQUIRE_RPS_OK(rpsVisualizerUpdate(m_rpsVisualizer, &updateInfo)); + } + } + } + + void OnCleanUp() + { + rpsVisualizerDestroy(m_rpsVisualizer); + rpsRenderGraphDestroy(m_rpsRenderGraph); + rpsTestUtilDestroyDevice(m_rpsDevice); + } + + void OnUpdate(uint32_t frameIndex, uint32_t width, uint32_t height) + { + const float translationSpeed = 0.01f; + const float offsetBounds = 1.4f; + + m_translation += translationSpeed; + if (m_translation > offsetBounds) + { + m_translation = -offsetBounds; + } + + XMMATRIX transform = XMMatrixAffineTransformation2D( + XMVectorSplatOne(), XMVectorZero(), m_rotation, XMVectorSet(m_translation, 0.f, 0.f, 0.f)); + + XMStoreFloat4x4(&m_triangleAnimationData.offset, transform); + m_triangleAnimationData.uvDistort = 0.1f; + m_triangleAnimationData.aspectRatio = static_cast(width) / height; + } + + void OnRender(uint64_t frameIndex, RpsRuntimeCommandBuffer cmdBuf, uint32_t cmdBegin, uint32_t cmdCount) + { + RpsRenderGraphRecordCommandInfo recordInfo = {}; + + recordInfo.hCmdBuffer = cmdBuf; + recordInfo.pUserContext = this; + recordInfo.frameIndex = frameIndex; + recordInfo.cmdBeginIndex = cmdBegin; + recordInfo.numCmds = cmdCount; + + RpsResult result = rpsRenderGraphRecordCommands(m_rpsRenderGraph, &recordInfo); + REQUIRE(result == RPS_OK); + + REQUIRE_RPS_OK(rpsVisualizerDrawImGui(m_rpsVisualizer)); + + m_HasResized = false; + } + +protected: + virtual void CreateRpsDevice(RpsDevice& rpsDeviceOut) = 0; + virtual void CreateRpsNullRuntimeDevice(RpsDevice& rpsDeviceOut) = 0; + virtual void DrawTriangle(const RpsCmdCallbackContext* pContext, bool bMSAA) = 0; + +private: + static void DummyCb(const RpsCmdCallbackContext*) + { + } + + static void DrawTriangleCb(const RpsCmdCallbackContext* pContext) + { + auto pThis = static_cast(pContext->pCmdCallbackContext); + const bool bMSAA = *rpsCmdGetArg(pContext); + pThis->DrawTriangle(pContext, bMSAA); + } + + void LoadRpsPipeline() + { + RpsRenderGraphCreateInfo renderGraphCreateInfo = {}; + renderGraphCreateInfo.mainEntryCreateInfo.hRpslEntryPoint = rpsTestLoadRpslEntry(test_visualizer, main); + + RpsResult result = rpsRenderGraphCreate(m_rpsDevice, &renderGraphCreateInfo, &m_rpsRenderGraph); + REQUIRE(result == RPS_OK); + + RpsSubprogram hRpslEntry = rpsRenderGraphGetMainEntry(m_rpsRenderGraph); + + if (s_UseRenderToTextureImplementation && !s_UseNullRuntime) + { + //Regular implementation + result = rpsProgramBindNode(hRpslEntry, "Geometry", &DrawTriangleCb, this); + REQUIRE(result == RPS_OK); + + result = rpsProgramBindNode(hRpslEntry, "GeometryMSAA", &DrawTriangleCb, this); + REQUIRE(result == RPS_OK); + } + else + { + //Dummy Command for null runtime + result = rpsProgramBindNode(hRpslEntry, nullptr, &DummyCb, this); + REQUIRE(result == RPS_OK); + } + result = rpsVisualizerCreate(m_rpsDevice, nullptr, &m_rpsVisualizer); + REQUIRE(result == RPS_OK); + } + +private: + RpsDevice m_rpsDevice = RPS_NULL_HANDLE; + RpsRenderGraph m_rpsRenderGraph = RPS_NULL_HANDLE; + + float m_translation = 0.0f; + float m_rotation = 0.0f; + bool m_useMSAA = true; + +protected: + static constexpr bool s_UseNullRuntime = true; + static constexpr bool s_UseRenderToTextureImplementation = false; + static_assert(s_UseRenderToTextureImplementation || s_UseNullRuntime, + "If the RenderToTextureImplementation is not used, the NullRuntime is required."); + + RpsVisualizer m_rpsVisualizer = RPS_NULL_HANDLE; + + SceneConstantBuffer m_triangleAnimationData; + SceneConstantBuffer m_quadConstantData; + + bool m_HasResized = true; +}; diff --git a/tests/gui/test_visualizer_vk.cpp b/tests/gui/test_visualizer_vk.cpp new file mode 100644 index 0000000..62becba --- /dev/null +++ b/tests/gui/test_visualizer_vk.cpp @@ -0,0 +1,793 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifdef _WIN32 +#define VK_USE_PLATFORM_WIN32_KHR +#else +#error "TODO" +#endif + +#define RPS_VK_RUNTIME 1 +#define RPS_AFX_REQUIRE_IMGUI 1 + +#include "test_visualizer_shared.h" + +#include "utils/rps_test_common.h" +#include "utils/rps_test_win32.h" +#include "utils/rps_test_vk_renderer.h" + +#include "rps_visualizer.h" + +#include "imgui.h" +#include "backends/imgui_impl_win32.h" +#include "backends/imgui_impl_vulkan.h" + +#include + +extern IMGUI_IMPL_API LRESULT ImGui_ImplWin32_WndProcHandler(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam); + +static void check_vk_result(VkResult err) +{ + if (err == 0) + return; + fprintf(stderr, "[vulkan] Error: VkResult = %d\n", err); + if (err < 0) + abort(); +} + +class TestVkRenderVisualizer : public RpsTestVulkanRenderer, public TestRpsRenderVisualizer +{ +protected: + virtual void OnInit(VkCommandBuffer initCmdBuf, InitTempResources& tempResources) override + { + LoadAssets(initCmdBuf, tempResources); + + IMGUI_CHECKVERSION(); + ImGui::CreateContext(); + + ImGui_ImplWin32_Init(m_hWnd); + + ImGui_ImplVulkan_InitInfo initInfo = {}; + initInfo.Instance = m_vkInstance; + initInfo.PhysicalDevice = m_physicalDevice; + initInfo.Device = m_device; + initInfo.QueueFamily = m_rpsQueueIndexToVkQueueFamilyMap[RPS_AFX_QUEUE_INDEX_GFX]; + initInfo.Queue = m_queues[RPS_AFX_QUEUE_INDEX_GFX]; + initInfo.PipelineCache = VK_NULL_HANDLE; + initInfo.DescriptorPool = m_descriptorPool; + initInfo.Subpass = 0; + initInfo.MinImageCount = 2; + initInfo.ImageCount = static_cast(m_swapChainImages.size()); + initInfo.MSAASamples = VK_SAMPLE_COUNT_1_BIT; + initInfo.Allocator = nullptr; + initInfo.CheckVkResultFn = check_vk_result; + + VkAttachmentDescription attachment = {}; + attachment.format = m_swapChainFormat.format; + attachment.samples = VK_SAMPLE_COUNT_1_BIT; + attachment.loadOp = TestRpsRenderVisualizer::s_UseRenderToTextureImplementation ? VK_ATTACHMENT_LOAD_OP_LOAD + : VK_ATTACHMENT_LOAD_OP_CLEAR; + attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachment.initialLayout = TestRpsRenderVisualizer::s_UseRenderToTextureImplementation + ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + : VK_IMAGE_LAYOUT_UNDEFINED; + attachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + + VkAttachmentReference color_attachment = {}; + color_attachment.attachment = 0; + color_attachment.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + VkSubpassDescription subpass = {}; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &color_attachment; + + VkRenderPassCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + info.attachmentCount = 1; + info.pAttachments = &attachment; + info.subpassCount = 1; + info.pSubpasses = &subpass; + VkResult result = vkCreateRenderPass(m_device, &info, nullptr, &m_imguiRenderPass); + + ImGui_ImplVulkan_Init(&initInfo, m_imguiRenderPass); + ImGui_ImplVulkan_CreateFontsTexture(initCmdBuf); + + OnPostResize(); + + TestRpsRenderVisualizer::OnInit(); + + if (s_UseRenderToTextureImplementation && !s_UseNullRuntime) + { + RpsResult rpsResult = + rpsProgramBindNode(rpsRenderGraphGetMainEntry(GetRpsRenderGraph()), "Quads", &DrawQuadsCb, this); + REQUIRE(rpsResult == RPS_OK); + } + } + + virtual void OnPreResize() override + { + DestroySwapChainFrameBuffers(); + } + + virtual void OnPostResize() override + { + if (m_imguiRenderPass != VK_NULL_HANDLE) + { + VkFramebufferCreateInfo fbCreateInfo = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO}; + fbCreateInfo.renderPass = m_imguiRenderPass; + fbCreateInfo.attachmentCount = 1; + fbCreateInfo.width = m_width; + fbCreateInfo.height = m_height; + fbCreateInfo.layers = 1; + + assert(m_imguiFrameBuffers.empty()); + m_imguiFrameBuffers.resize(m_swapChainImages.size()); + + for (uint32_t i = 0; i < m_swapChainImages.size(); i++) + { + fbCreateInfo.pAttachments = &m_swapChainImages[i].imageView; + ThrowIfFailedVK(vkCreateFramebuffer(m_device, &fbCreateInfo, nullptr, &m_imguiFrameBuffers[i])); + } + } + + m_HasResized = true; + + m_frameCounter = 0; + } + + virtual void OnCleanUp() override + { + TestRpsRenderVisualizer::OnCleanUp(); + + ImGui_ImplVulkan_Shutdown(); + ImGui_ImplWin32_Shutdown(); + ImGui::DestroyContext(); + + DestroySwapChainFrameBuffers(); + + vkDestroyRenderPass(m_device, m_imguiRenderPass, nullptr); + + vkDestroyPipeline(m_device, m_geoPipeline, nullptr); + vkDestroyPipeline(m_device, m_geoPipelineMSAA, nullptr); + vkDestroyPipeline(m_device, m_quadPipeline, nullptr); + vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr); + vkDestroySampler(m_device, m_defaultSampler, nullptr); + vkDestroyDescriptorSetLayout(m_device, m_sharedDescriptorSetLayout, nullptr); + vkDestroyImage(m_device, m_checkerboardTexture, nullptr); + vkDestroyImageView(m_device, m_checkerboardTextureView, nullptr); + vkDestroyBuffer(m_device, m_vertexBuffer, nullptr); + } + + virtual void OnUpdate(uint32_t frameIndex) override + { + TestRpsRenderVisualizer::OnUpdate(frameIndex, m_width, m_height); + UpdatePipeline(frameIndex, CalcGuaranteedCompletedFrameIndexForRps()); + } + + virtual void OnRender(uint32_t frameIndex) override + { + REQUIRE(RPS_SUCCEEDED(ExecuteRenderGraph(frameIndex, GetRpsRenderGraph(), true, false))); + + StartImGuiDraw(); + + REQUIRE(RPS_SUCCEEDED(rpsVisualizerDrawImGui(m_rpsVisualizer))); + + REQUIRE(RPS_SUCCEEDED(FinishImGuiDraw(GetRpsRenderGraph()))); + + m_HasResized = false; + } + +protected: + virtual void CreateRpsDevice(RpsDevice& rpsDeviceOut) override final + { + rpsDeviceOut = rpsTestUtilCreateDevice( + [this](auto pCreateInfo, auto phDevice) { return CreateRpsRuntimeDevice(*pCreateInfo, *phDevice); }); + } + + virtual void CreateRpsNullRuntimeDevice(RpsDevice& rpsDeviceOut) override final + { + rpsDeviceOut = rpsTestUtilCreateNullRuntimeDevice(); + } + + virtual void DrawTriangle(const RpsCmdCallbackContext* pContext, bool isMSAA) override final + { + if (isMSAA && (m_geoPipelineMSAA == RPS_NULL_HANDLE)) + { + VkRenderPass rp; + RpsResult result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePipeline(c_Shader, rp, &m_geoPipelineMSAA, true); + } + else if (!isMSAA && (m_geoPipeline == RPS_NULL_HANDLE)) + { + VkRenderPass rp; + RpsResult result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePipeline(c_Shader, rp, &m_geoPipeline, false); + } + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_sharedDescriptorSetLayout, 1, &ds)); + + VkWriteDescriptorSet writeDescriptorSet[2] = {}; + + VkDescriptorBufferInfo bufInfo = + AllocAndWriteFrameConstants(&m_triangleAnimationData, sizeof(m_triangleAnimationData)); + AppendWriteDescriptorSetBuffers(&writeDescriptorSet[0], ds, 0, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &bufInfo); + + VkDescriptorImageInfo imageInfo = { + VK_NULL_HANDLE, m_checkerboardTextureView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + AppendWriteDescriptorSetImages(&writeDescriptorSet[1], ds, 1, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &imageInfo); + + vkUpdateDescriptorSets(m_device, _countof(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + + vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &ds, 0, nullptr); + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, isMSAA ? m_geoPipelineMSAA : m_geoPipeline); + vkCmdBindVertexBuffers(cmdBuf, 0, 1, &m_vertexBuffer, &m_triangleVbOffset); + vkCmdDraw(cmdBuf, 3, 1, 0, 0); + } + + void DrawQuads(const RpsCmdCallbackContext* pContext, rps::UnusedArg backBuffer, VkImageView offscreenRTSrv) + { + if (m_quadPipeline == RPS_NULL_HANDLE) + { + VkRenderPass rp; + RpsResult result = rpsVKGetCmdRenderPass(pContext, &rp); + REQUIRE(result == RPS_OK); + + CreatePipeline(c_Shader, rp, &m_quadPipeline, false); + } + + VkCommandBuffer cmdBuf = rpsVKCommandBufferFromHandle(pContext->hCommandBuffer); + + VkDescriptorSet ds; + ThrowIfFailedVK(AllocFrameDescriptorSet(&m_sharedDescriptorSetLayout, 1, &ds)); + + VkWriteDescriptorSet writeDescriptorSet[2] = {}; + + VkDescriptorBufferInfo bufInfo = AllocAndWriteFrameConstants(&m_quadConstantData, sizeof(m_quadConstantData)); + AppendWriteDescriptorSetBuffers(&writeDescriptorSet[0], ds, 0, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &bufInfo); + + VkDescriptorImageInfo imageInfo = {VK_NULL_HANDLE, offscreenRTSrv, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + AppendWriteDescriptorSetImages(&writeDescriptorSet[1], ds, 1, 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &imageInfo); + + vkUpdateDescriptorSets(m_device, _countof(writeDescriptorSet), writeDescriptorSet, 0, nullptr); + + vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLayout, 0, 1, &ds, 0, nullptr); + vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_quadPipeline); + vkCmdBindVertexBuffers(cmdBuf, 0, 1, &m_vertexBuffer, &m_quadVbOffset); + vkCmdDraw(cmdBuf, 24, 1, 0, 0); + } + + static void DrawQuadsCb(const RpsCmdCallbackContext* pContext) + { + auto pThis = static_cast(pContext->pCmdCallbackContext); + + VkImageView hSrv; + REQUIRE(RPS_OK == rpsVKGetCmdArgImageView(pContext, 1, &hSrv)); + + pThis->DrawQuads(pContext, {}, hSrv); + } + + virtual LRESULT WindowProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam, bool& bHandled) override + { + const LRESULT result = ImGui_ImplWin32_WndProcHandler(hWnd, msg, wParam, lParam); + if (result) + { + bHandled = true; + } + + return result; + } + +private: + void LoadAssets(VkCommandBuffer initCmdBuf, InitTempResources& tempResources) + { + OnPostResize(); + + VkSamplerCreateInfo sampCI = {}; + sampCI.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampCI.magFilter = VK_FILTER_LINEAR; + sampCI.minFilter = VK_FILTER_LINEAR; + sampCI.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + sampCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampCI.mipLodBias = 0.0f; + sampCI.compareOp = VK_COMPARE_OP_NEVER; + sampCI.minLod = 0.0f; + sampCI.maxLod = FLT_MAX; + sampCI.maxAnisotropy = 1.0; + sampCI.anisotropyEnable = VK_FALSE; + sampCI.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + ThrowIfFailedVK(vkCreateSampler(m_device, &sampCI, nullptr, &m_defaultSampler)); + + VkDescriptorSetLayoutBinding sharedLayoutBindings[4] = {}; + sharedLayoutBindings[0].binding = 0; + sharedLayoutBindings[0].descriptorCount = 1; + sharedLayoutBindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + sharedLayoutBindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[1].binding = 1; + sharedLayoutBindings[1].descriptorCount = 1; + sharedLayoutBindings[1].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + sharedLayoutBindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[2].binding = 2; + sharedLayoutBindings[2].descriptorCount = 1; + sharedLayoutBindings[2].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + sharedLayoutBindings[2].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + sharedLayoutBindings[2].pImmutableSamplers = &m_defaultSampler; + sharedLayoutBindings[3].binding = 3; + sharedLayoutBindings[3].descriptorCount = 1; + sharedLayoutBindings[3].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + sharedLayoutBindings[3].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + + VkDescriptorSetLayoutCreateInfo setLayoutCI = {}; + setLayoutCI.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + setLayoutCI.pBindings = sharedLayoutBindings; + setLayoutCI.bindingCount = _countof(sharedLayoutBindings); + + ThrowIfFailedVK(vkCreateDescriptorSetLayout(m_device, &setLayoutCI, nullptr, &m_sharedDescriptorSetLayout)); + + VkPipelineLayoutCreateInfo plCI = {}; + plCI.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + plCI.setLayoutCount = 1; + plCI.pSetLayouts = &m_sharedDescriptorSetLayout; + + ThrowIfFailedVK(vkCreatePipelineLayout(m_device, &plCI, nullptr, &m_pipelineLayout)); + + // Create vertex buffers + { + Vertex triangleVertices[] = { + // triangle + {{0.0f, 0.25f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.5f, 0.0f}}, + {{0.25f, -0.25f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{-0.25f, -0.25f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + + // quad 0 + {{-1.0f, 1.0, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 0.0f}}, + {{0.f, 0.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f}}, + {{-1.f, 0.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + {{-1.0f, 1.0, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {0.0f, 0.0f}}, + {{0.f, 1.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 0.0f}}, + {{0.f, 0.f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f}}, + + // quad 1 + {{-1.0f, 0.0, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + {{0.f, -1.f, 0.0f}, {0.f, 0.0f, 1.0f, 1.0f}, {1.0f, 0.0f}}, + {{-1.f, -1.f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 0.0f}}, + {{-1.0f, 0.0, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}, + {{0.f, 0.f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {1.0f, 1.0f}}, + {{0.f, -1.f, 0.0f}, {0.0f, 0.0f, 1.0f, 1.0f}, {1.0f, 0.0f}}, + + // quad 2 + {{0.0f, 1.0, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 0.0f}}, + {{1.f, 0.f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.0f, 1.0f}}, + {{0.f, 0.f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{0.0f, 1.0, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 0.0f}}, + {{1.f, 1.f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.0f, 0.0f}}, + {{1.f, 0.f, 0.0f}, {1.0f, 0.0f, 0.0f, 1.0f}, {0.0f, 1.0f}}, + + // quad 3 + {{0.0f, 0.0, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{1.f, -1.f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 0.0f}}, + {{0.f, -1.f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 0.0f}}, + {{0.0f, 0.0, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, + {{1.f, 0.f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 1.0f}}, + {{1.f, -1.f, 0.0f}, {0.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 0.0f}}, + }; + + const UINT vertexBufferSize = sizeof(triangleVertices); + + m_vertexBuffer = CreateAndBindStaticBuffer( + vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); + m_triangleVbOffset = 0; + m_quadVbOffset = sizeof(Vertex) * 3; + + auto vbAlloc = AllocAndWriteFrameConstants(triangleVertices, vertexBufferSize); + VkBufferCopy vbCopy; + vbCopy.srcOffset = vbAlloc.offset; + vbCopy.dstOffset = 0; + vbCopy.size = vertexBufferSize; + vkCmdCopyBuffer(initCmdBuf, vbAlloc.buffer, m_vertexBuffer, 1, &vbCopy); + } + + CreateCheckerboardTexture(initCmdBuf, tempResources); + } + + void CreateCheckerboardTexture(VkCommandBuffer initCmdBuf, InitTempResources& tempResources) + { + // Texture data contains 4 channels (RGBA) with unnormalized 8-bit values, this is the most commonly supported format + VkFormat format = VK_FORMAT_R8G8B8A8_UNORM; + + uint32_t width = 256; + uint32_t height = 256; + uint32_t texturePixelSize = 4; + + const uint32_t rowPitch = width * texturePixelSize; + const uint32_t cellPitch = rowPitch >> 3; // The width of a cell in the checkboard texture. + const uint32_t cellHeight = width >> 3; // The height of a cell in the checkerboard texture. + const uint32_t textureSize = rowPitch * height; + + std::vector data(textureSize); + uint8_t* textureData = &data[0]; + + for (uint32_t n = 0; n < textureSize; n += texturePixelSize) + { + uint32_t x = n % rowPitch; + uint32_t y = n / rowPitch; + uint32_t i = x / cellPitch; + uint32_t j = y / cellHeight; + + if (i % 2 == j % 2) + { + textureData[n] = 0xa0; // R + textureData[n + 1] = 0xa0; // G + textureData[n + 2] = 0xa0; // B + textureData[n + 3] = 0xff; // A + } + else + { + textureData[n] = 0xff; // R + textureData[n + 1] = 0xff; // G + textureData[n + 2] = 0xff; // B + textureData[n + 3] = 0xff; // A + } + } + + { + auto textureDataUploadBuf = AllocAndWriteFrameConstants(textureData, textureSize); + + m_checkerboardTexture = + CreateAndBindStaticImage(VK_IMAGE_TYPE_2D, + VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, + format, + width, + height, + 1, + 1, + 1); + + VkBufferImageCopy bufferCopyRegion = {}; + bufferCopyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + bufferCopyRegion.imageSubresource.mipLevel = 0; + bufferCopyRegion.imageSubresource.baseArrayLayer = 0; + bufferCopyRegion.imageSubresource.layerCount = 1; + bufferCopyRegion.imageExtent.width = width; + bufferCopyRegion.imageExtent.height = height; + bufferCopyRegion.imageExtent.depth = 1; + bufferCopyRegion.bufferOffset = textureDataUploadBuf.offset; + + // Transition the texture image layout to transfer target, so we can safely copy our buffer data to it. + VkImageMemoryBarrier imageMemoryBarrier = {}; + imageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageMemoryBarrier.image = m_checkerboardTexture; + imageMemoryBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imageMemoryBarrier.subresourceRange.baseMipLevel = 0; + imageMemoryBarrier.subresourceRange.levelCount = 1; + imageMemoryBarrier.subresourceRange.layerCount = 1; + imageMemoryBarrier.srcAccessMask = 0; + imageMemoryBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + // Insert a memory dependency at the proper pipeline stages that will execute the image layout transition + // Source pipeline stage is host write/read exection (VK_PIPELINE_STAGE_HOST_BIT) + // Destination pipeline stage is copy command exection (VK_PIPELINE_STAGE_TRANSFER_BIT) + vkCmdPipelineBarrier(initCmdBuf, + VK_PIPELINE_STAGE_HOST_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, + 0, + nullptr, + 0, + nullptr, + 1, + &imageMemoryBarrier); + + // Copy mip levels from staging buffer + vkCmdCopyBufferToImage(initCmdBuf, + textureDataUploadBuf.buffer, + m_checkerboardTexture, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, + &bufferCopyRegion); + + // Once the data has been uploaded we transfer to the texture image to the shader read layout, so it can be sampled from + imageMemoryBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + vkCmdPipelineBarrier(initCmdBuf, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + 0, + 0, + nullptr, + 0, + nullptr, + 1, + &imageMemoryBarrier); + } + + // Create image view + // Textures are not directly accessed by the shaders and + // are abstracted by image views containing additional + // information and sub resource ranges + VkImageViewCreateInfo view = {}; + view.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view.viewType = VK_IMAGE_VIEW_TYPE_2D; + view.format = format; + view.components = { + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A}; + view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + view.subresourceRange.baseMipLevel = 0; + view.subresourceRange.baseArrayLayer = 0; + view.subresourceRange.layerCount = 1; + view.subresourceRange.levelCount = 1; + view.image = m_checkerboardTexture; + ThrowIfFailedVK(vkCreateImageView(m_device, &view, nullptr, &m_checkerboardTextureView)); + } + + void CreatePipeline(const char* pShaderCode, VkRenderPass renderPass, VkPipeline* pPipeline, bool bMSAA) + { + VkVertexInputBindingDescription vertBinding = {}; + vertBinding.binding = 0; + vertBinding.stride = sizeof(Vertex); + vertBinding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + + VkVertexInputAttributeDescription vertAttrBinding[3] = {}; + vertAttrBinding[0].binding = 0; + vertAttrBinding[0].location = 0; + vertAttrBinding[0].format = VK_FORMAT_R32G32B32_SFLOAT; + vertAttrBinding[0].offset = offsetof(Vertex, position); + vertAttrBinding[1].binding = 0; + vertAttrBinding[1].location = 1; + vertAttrBinding[1].format = VK_FORMAT_R32G32B32_SFLOAT; + vertAttrBinding[1].offset = offsetof(Vertex, color); + vertAttrBinding[2].binding = 0; + vertAttrBinding[2].location = 2; + vertAttrBinding[2].format = VK_FORMAT_R32G32_SFLOAT; + vertAttrBinding[2].offset = offsetof(Vertex, uv); + + VkPipelineVertexInputStateCreateInfo vi = {}; + vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vi.pNext = NULL; + vi.flags = 0; + vi.vertexBindingDescriptionCount = 1; + vi.pVertexBindingDescriptions = &vertBinding; + vi.vertexAttributeDescriptionCount = _countof(vertAttrBinding); + vi.pVertexAttributeDescriptions = vertAttrBinding; + + // input assembly state + // + VkPipelineInputAssemblyStateCreateInfo ia; + ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + ia.pNext = NULL; + ia.flags = 0; + ia.primitiveRestartEnable = VK_FALSE; + ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + // rasterizer state + VkPipelineRasterizationStateCreateInfo rs; + rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rs.pNext = NULL; + rs.flags = 0; + rs.polygonMode = VK_POLYGON_MODE_FILL; + rs.cullMode = VK_CULL_MODE_NONE; + rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rs.depthClampEnable = VK_FALSE; + rs.rasterizerDiscardEnable = VK_FALSE; + rs.depthBiasEnable = VK_FALSE; + rs.depthBiasConstantFactor = 0; + rs.depthBiasClamp = 0; + rs.depthBiasSlopeFactor = 0; + rs.lineWidth = 1.0f; + + VkPipelineColorBlendAttachmentState bs[1] = {}; + bs[0].blendEnable = VK_FALSE; + bs[0].srcColorBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].colorBlendOp = VK_BLEND_OP_ADD; + bs[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; + bs[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + bs[0].alphaBlendOp = VK_BLEND_OP_ADD; + bs[0].colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + // Color blend state + VkPipelineColorBlendStateCreateInfo cb; + cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + cb.flags = 0; + cb.pNext = NULL; + cb.attachmentCount = 1; + cb.pAttachments = bs; + cb.logicOpEnable = VK_FALSE; + cb.logicOp = VK_LOGIC_OP_NO_OP; + cb.blendConstants[0] = 1.0f; + cb.blendConstants[1] = 1.0f; + cb.blendConstants[2] = 1.0f; + cb.blendConstants[3] = 1.0f; + + std::vector dynamicStateEnables = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo dynamicState = {}; + dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamicState.pNext = NULL; + dynamicState.pDynamicStates = dynamicStateEnables.data(); + dynamicState.dynamicStateCount = (uint32_t)dynamicStateEnables.size(); + + // view port state + + VkPipelineViewportStateCreateInfo vp = {}; + vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + vp.pNext = NULL; + vp.flags = 0; + vp.viewportCount = 1; + vp.scissorCount = 1; + vp.pScissors = NULL; + vp.pViewports = NULL; + + // depth stencil state + + VkPipelineDepthStencilStateCreateInfo ds; + ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + ds.pNext = NULL; + ds.flags = 0; + ds.depthTestEnable = VK_FALSE; + ds.depthWriteEnable = VK_FALSE; + ds.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; + ds.depthBoundsTestEnable = VK_FALSE; + ds.stencilTestEnable = VK_FALSE; + ds.back.failOp = VK_STENCIL_OP_KEEP; + ds.back.passOp = VK_STENCIL_OP_KEEP; + ds.back.compareOp = VK_COMPARE_OP_ALWAYS; + ds.back.compareMask = 0; + ds.back.reference = 0; + ds.back.depthFailOp = VK_STENCIL_OP_KEEP; + ds.back.writeMask = 0; + ds.minDepthBounds = 0; + ds.maxDepthBounds = 0; + ds.stencilTestEnable = VK_FALSE; + ds.front = ds.back; + + // multi sample state + + VkPipelineMultisampleStateCreateInfo ms; + ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + ms.pNext = NULL; + ms.flags = 0; + ms.pSampleMask = NULL; + ms.rasterizationSamples = bMSAA ? VK_SAMPLE_COUNT_4_BIT : VK_SAMPLE_COUNT_1_BIT; + ms.sampleShadingEnable = VK_FALSE; + ms.alphaToCoverageEnable = VK_FALSE; + ms.alphaToOneEnable = VK_FALSE; + ms.minSampleShading = 0.0; + + VkShaderModule vsModule, psModule; + std::vector vsCode, psCode; + + DxcCompileToSpirv(pShaderCode, L"VSMain", L"vs_6_0", L"", nullptr, 0, vsCode); + DxcCompileToSpirv(pShaderCode, L"PSMain", L"ps_6_0", L"", nullptr, 0, psCode); + + VkShaderModuleCreateInfo smCI = {}; + smCI.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + + smCI.pCode = reinterpret_cast(vsCode.data()); + smCI.codeSize = vsCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &vsModule)); + + smCI.pCode = reinterpret_cast(psCode.data()); + smCI.codeSize = psCode.size(); + + ThrowIfFailedVK(vkCreateShaderModule(m_device, &smCI, nullptr, &psModule)); + + VkPipelineShaderStageCreateInfo shaderStages[2] = {}; + shaderStages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[0].module = vsModule; + shaderStages[0].pName = "VSMain"; + shaderStages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; + shaderStages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderStages[1].module = psModule; + shaderStages[1].pName = "PSMain"; + shaderStages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + + VkGraphicsPipelineCreateInfo psoCI = {}; + psoCI.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + psoCI.pNext = NULL; + psoCI.layout = m_pipelineLayout; + psoCI.basePipelineHandle = VK_NULL_HANDLE; + psoCI.basePipelineIndex = 0; + psoCI.flags = 0; + psoCI.pVertexInputState = &vi; + psoCI.pInputAssemblyState = &ia; + psoCI.pRasterizationState = &rs; + psoCI.pColorBlendState = &cb; + psoCI.pTessellationState = NULL; + psoCI.pMultisampleState = &ms; + psoCI.pDynamicState = &dynamicState; + psoCI.pViewportState = &vp; + psoCI.pDepthStencilState = &ds; + psoCI.pStages = shaderStages; + psoCI.stageCount = _countof(shaderStages); + psoCI.renderPass = renderPass; + psoCI.subpass = 0; + + ThrowIfFailedVK(vkCreateGraphicsPipelines(m_device, VK_NULL_HANDLE, 1, &psoCI, nullptr, pPipeline)); + + vkDestroyShaderModule(m_device, vsModule, nullptr); + vkDestroyShaderModule(m_device, psModule, nullptr); + } + + void UpdatePipeline(uint64_t frameIndex, uint64_t completedFrameIndex) + { + RpsRuntimeResource backBuffers[16]; + + if (m_swapChainImages.size() > RPS_COUNTOF(backBuffers)) + throw; + + for (uint32_t i = 0; i < m_swapChainImages.size(); i++) + { + backBuffers[i] = rpsVKImageToHandle(m_swapChainImages[i].image); + } + + RpsResourceDesc backBufferDesc = {}; + backBufferDesc.type = RPS_RESOURCE_TYPE_IMAGE_2D; + backBufferDesc.temporalLayers = uint32_t(m_swapChainImages.size()); + backBufferDesc.image.arrayLayers = 1; + backBufferDesc.image.mipLevels = 1; + backBufferDesc.image.format = rpsFormatFromVK(m_swapChainFormat.format); + backBufferDesc.image.width = m_width; + backBufferDesc.image.height = m_height; + backBufferDesc.image.sampleCount = 1; + + TestRpsRenderVisualizer::UpdateRpsPipeline(frameIndex, completedFrameIndex, backBufferDesc, backBuffers); + } + +private: + VkPipeline m_geoPipeline = VK_NULL_HANDLE; + VkPipeline m_geoPipelineMSAA = VK_NULL_HANDLE; + VkPipeline m_quadPipeline = VK_NULL_HANDLE; + + VkPipelineLayout m_pipelineLayout = VK_NULL_HANDLE; + + VkSampler m_defaultSampler = VK_NULL_HANDLE; + + VkDescriptorSetLayout m_sharedDescriptorSetLayout = VK_NULL_HANDLE; + + VkImage m_checkerboardTexture = RPS_NULL_HANDLE; + VkImageView m_checkerboardTextureView = RPS_NULL_HANDLE; + VkBuffer m_vertexBuffer = RPS_NULL_HANDLE; + + VkDeviceSize m_triangleVbOffset = 0; + VkDeviceSize m_quadVbOffset = 0; +}; + +TEST_CASE(TEST_APP_NAME) +{ + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#if defined(BREAK_AT_ALLOC_ID) + _CrtSetBreakAlloc(BREAK_AT_ALLOC_ID); +#endif + + TestVkRenderVisualizer renderer; + + RpsTestRunWindowInfo runInfo = {}; + runInfo.title = TEXT(TEST_APP_NAME); + runInfo.numFramesToRender = g_exitAfterFrame; + runInfo.width = 2560; + runInfo.height = 1440; + runInfo.pRenderer = &renderer; + RpsTestRunWindowApp(&runInfo); +} diff --git a/tests/utils/CMakeLists.txt b/tests/utils/CMakeLists.txt new file mode 100644 index 0000000..4e0af23 --- /dev/null +++ b/tests/utils/CMakeLists.txt @@ -0,0 +1,13 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# This file is part of the AMD Render Pipeline Shaders SDK which is +# released under the AMD INTERNAL EVALUATION LICENSE. +# +# See file LICENSE.RTF for full license details. + +file( GLOB_RECURSE UtilHeaders + "${CMAKE_CURRENT_SOURCE_DIR}/*.h" + "${CMAKE_CURRENT_SOURCE_DIR}/*.hpp" ) + + +AddModule( rps_test_utils ${CMAKE_CURRENT_SOURCE_DIR} "${AppFolder}" "" "" INTERFACE "" ) \ No newline at end of file diff --git a/tests/utils/rps_test_common.h b/tests/utils/rps_test_common.h new file mode 100644 index 0000000..dc663ed --- /dev/null +++ b/tests/utils/rps_test_common.h @@ -0,0 +1,205 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_TEST_COMMON_H_ +#define _RPS_TEST_COMMON_H_ + +#include "rps/rps.h" + +#ifdef __cplusplus + +#include +#include +#include +#include + +#include "app_framework/afx_common_helpers.h" + +#define RPS_TEST_COUNTOF(A) (std::extent::value) + +#if RPS_HAS_MAYBE_UNUSED +#define RPS_TEST_MAYBE_UNUSED [[maybe_unused]] +#else +#define RPS_TEST_MAYBE_UNUSED +#endif + +#define RPS_TEST_CONCATENATE_DIRECT(A, B) A##B +#define RPS_TEST_CONCATENATE_INDIRECT(A, B) RPS_TEST_CONCATENATE_DIRECT(A, B) + +#define REQUIRE_RPS_OK(Expr) \ + do \ + { \ + RpsResult RPS_TEST_CONCATENATE_INDIRECT(result_, __LINE__) = Expr; \ + REQUIRE(RPS_TEST_CONCATENATE_INDIRECT(result_, __LINE__) == RPS_OK); \ + } while (false); + +#ifdef USE_RPSL_DLL +RpsRpslEntry rpsTestLoadRpslEntryFromDLL(const std::string& moduleName, const std::string& entryName); +#define rpsTestLoadRpslEntry(ModuleName, EntryFunction) \ + rpsTestLoadRpslEntryFromDLL(#ModuleName "_rpsl.dll", RPS_ENTRY_NAME(ModuleName, EntryFunction)) +#else +#define rpsTestLoadRpslEntry(ModuleName, EntryFunction) RPS_ENTRY_REF(ModuleName, EntryFunction) +#endif + +static int g_NumMallocs = 0; + +static void* CountedMalloc(void* pContext, size_t size, size_t alignment) +{ + g_NumMallocs++; +#if _MSC_VER + return _aligned_malloc(size, alignment); +#else + const size_t alignedSize = alignment ? (size + (alignment - 1)) & ~(alignment - 1) : size; + return aligned_alloc(alignment, alignedSize); +#endif +} + +static void CountedFree(void* pContext, void* ptr) +{ + g_NumMallocs--; +#if _MSC_VER + _aligned_free(ptr); +#else + free(ptr); +#endif +} + +RPS_TEST_MAYBE_UNUSED static void* CountedRealloc( + void* pContext, void* oldBuffer, size_t oldSize, size_t newSize, size_t alignment) +{ + void* pNewBuffer = oldBuffer; + + if (newSize > oldSize) + { + pNewBuffer = CountedMalloc(pContext, newSize, alignment); + if (oldBuffer) + { + memcpy(pNewBuffer, oldBuffer, std::min(oldSize, newSize)); + CountedFree(pContext, oldBuffer); + } + } + + return pNewBuffer; +} + +void rpsTestPrintDebugString(const char *); + +static void PrintToStdErr(void* pCtx, const char* formatString, ...) +{ + va_list args; + va_start(args, formatString); +#if _MSC_VER + vfprintf_s(stderr, formatString, args); +#else + vfprintf(stderr, formatString, args); +#endif + va_end(args); + + fflush(stderr); + + // TODO: Currently rpsTestPrintDebugString only implemented for WIN32. +#if defined(_MSC_VER) && defined(_WIN32) + char buf[4096]; + + va_start(args, formatString); + vsprintf_s(buf, formatString, args); + va_end(args); + rpsTestPrintDebugString(buf); +#endif +} + +RPS_TEST_MAYBE_UNUSED static uint32_t rpsTestUtilGetMallocCounter() +{ + return g_NumMallocs; +} + +RPS_TEST_MAYBE_UNUSED static RpsDevice rpsTestUtilCreateDevice( + std::function::type> createDevice = {}) +{ + RpsDevice device = RPS_NULL_HANDLE; + RpsDeviceCreateInfo createInfo = {}; + + createInfo.allocator.pfnAlloc = CountedMalloc; + createInfo.allocator.pfnFree = CountedFree; + createInfo.printer.pfnPrintf = PrintToStdErr; + + RpsResult result = createDevice ? createDevice(&createInfo, &device) : rpsDeviceCreate(&createInfo, &device); + REQUIRE(result == RPS_OK); + REQUIRE(device != RPS_NULL_HANDLE); + REQUIRE(g_NumMallocs > 0); + + return device; +} + +RPS_TEST_MAYBE_UNUSED static RpsDevice rpsTestUtilCreateNullRuntimeDevice() +{ + RpsDevice device = RPS_NULL_HANDLE; + RpsDeviceCreateInfo createInfo = {}; + + createInfo.allocator.pfnAlloc = CountedMalloc; + createInfo.allocator.pfnFree = CountedFree; + createInfo.printer.pfnPrintf = PrintToStdErr; + + RpsRuntimeCallbacks callbacks = {}; + callbacks.pfnBuildRenderGraphPhases = nullptr; + + RpsRuntimeDeviceCreateInfo rtCreate = {}; + + RpsNullRuntimeDeviceCreateInfo nullCreateInfo = {}; + nullCreateInfo.pDeviceCreateInfo = &createInfo; + + RpsResult result = rpsNullRuntimeDeviceCreate(&nullCreateInfo, &device); + REQUIRE(result == RPS_OK); + REQUIRE(device != RPS_NULL_HANDLE); + REQUIRE(g_NumMallocs > 0); + + return device; +} + +RPS_TEST_MAYBE_UNUSED static void rpsTestUtilDestroyDevice(RpsDevice device) +{ + rpsDeviceDestroy(device); + REQUIRE(rpsTestUtilGetMallocCounter() == 0); +} + +#define RPS_TEST_MALLOC_CHECKPOINT(Id) const uint32_t _RPS_MALLOC_COUNTER_##Id = rpsTestUtilGetMallocCounter() +#define RPS_TEST_MALLOC_COUNTER_EQUAL(Id1, Id2) REQUIRE(_RPS_MALLOC_COUNTER_##Id1 == _RPS_MALLOC_COUNTER_##Id2) +#define RPS_TEST_MALLOC_COUNTER_COMPARE(Id1, Op, Id2) REQUIRE(_RPS_MALLOC_COUNTER_##Id1 Op _RPS_MALLOC_COUNTER_##Id2) +#define RPS_TEST_MALLOC_COUNTER_EQUAL_CURRENT(Id) REQUIRE(_RPS_MALLOC_COUNTER_##Id == rpsTestUtilGetMallocCounter()) + +extern "C" { + +void REQUIRE_PROXY(RpsBool condition, const char* expr, const char* file, int line) +{ + REQUIRE(condition); +} + +void REQUIRE_OK_PROXY(RpsResult result, const char* expr, const char* file, int line) +{ + REQUIRE(result == RPS_OK); +} + +} // extern "C" + +// TODO: impl other platforms. +#ifdef _WIN32 +#include "rps_test_win32.h" +#endif + +#else //__cplusplus + +extern void REQUIRE_PROXY(RpsBool condition, const char* expr, const char* file, int line); +extern void REQUIRE_OK_PROXY(RpsResult result, const char* expr, const char* file, int line); + +#define REQUIRE(Cond) REQUIRE_PROXY(Cond, #Cond, __FILE__, __LINE__) +#define REQUIRE_RPS_OK(Cond) REQUIRE_OK_PROXY(Cond, #Cond " == RPS_OK", __FILE__, __LINE__) + +#define RPS_TEST_COUNTOF(A) (sizeof(A) / sizeof(A[0])) + +#endif //__cplusplus + +#endif //_RPS_TEST_COMMON_H_ diff --git a/tests/utils/rps_test_d3d11_renderer.h b/tests/utils/rps_test_d3d11_renderer.h new file mode 100644 index 0000000..afc991a --- /dev/null +++ b/tests/utils/rps_test_d3d11_renderer.h @@ -0,0 +1,21 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_TEST_D3D11_RENDERER_H_ +#define _RPS_TEST_D3D11_RENDERER_H_ + +#include "app_framework/afx_d3d11_renderer.h" + +#ifndef TEST_APP_NAME +#define TEST_APP_NAME (TEST_APP_NAME_RAW " - D3D11") +#endif + +class RpsTestD3D11Renderer : public RpsAfxD3D11Renderer +{ +}; + +#endif //_RPS_TEST_D3D11_RENDERER_H_ \ No newline at end of file diff --git a/tests/utils/rps_test_d3d12_renderer.h b/tests/utils/rps_test_d3d12_renderer.h new file mode 100644 index 0000000..74a1b48 --- /dev/null +++ b/tests/utils/rps_test_d3d12_renderer.h @@ -0,0 +1,21 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_TEST_D3D12_RENDERER_H_ +#define _RPS_TEST_D3D12_RENDERER_H_ + +#include "app_framework/afx_d3d12_renderer.h" + +#ifndef TEST_APP_NAME +#define TEST_APP_NAME (TEST_APP_NAME_RAW " - D3D12") +#endif + +class RpsTestD3D12Renderer : public RpsAfxD3D12Renderer +{ +}; + +#endif //_RPS_TEST_D3D12_RENDERER_H_ \ No newline at end of file diff --git a/tests/utils/rps_test_host.h b/tests/utils/rps_test_host.h new file mode 100644 index 0000000..1d479bb --- /dev/null +++ b/tests/utils/rps_test_host.h @@ -0,0 +1,127 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#pragma once + +#define CATCH_CONFIG_RUNNER +#include +#include +#include + +#include "rps/rps.h" + +#include "rps_test_common.h" +#include "app_framework/afx_cmd_parser.h" +#include "app_framework/afx_renderer.h" + +using namespace DirectX; + +enum MultiQueueMode +{ + MULTI_QUEUE_DISABLE = 0, + MULTI_QUEUE_GFX_COMPUTE, + MULTI_QUEUE_GFX_COMPUTE_COPY, +}; + +static rps::CmdArg g_MultiQueueMode{"multi-queue", MULTI_QUEUE_DISABLE, {"mq"}}; + +class RpsTestHost +{ +public: + RpsTestHost() + { + } + virtual ~RpsTestHost() + { + } + +protected: + RpsDevice GetRpsDevice() const + { + return m_rpsDevice; + } + + RpsRenderGraph GetRpsRenderGraph() const + { + return m_rpsRenderGraph; + } + + void OnInit(RpsDevice hDevice, RpsRpslEntry hRpslEntryPoint) + { + m_rpsDevice = hDevice ? hDevice : rpsTestUtilCreateDevice(); + + RpsRenderGraphCreateInfo renderGraphCreateInfo = {}; + renderGraphCreateInfo.mainEntryCreateInfo.hRpslEntryPoint = hRpslEntryPoint; + + static_assert(RPS_AFX_QUEUE_INDEX_COUNT <= RPS_MAX_QUEUES, + "RpsAfxQueueIndices index count must not exceed RPS_MAX_QUEUES."); + // order of graphics, compute, and copy is written here to be same as RpsAfxQueueIndices + RpsQueueFlags queueFlags[] = {RPS_QUEUE_FLAG_GRAPHICS, RPS_QUEUE_FLAG_COMPUTE, RPS_QUEUE_FLAG_COPY}; + if (g_MultiQueueMode != MULTI_QUEUE_DISABLE) + { + renderGraphCreateInfo.scheduleInfo.numQueues = (g_MultiQueueMode == MULTI_QUEUE_GFX_COMPUTE) ? 2 : 3; + renderGraphCreateInfo.scheduleInfo.pQueueInfos = queueFlags; + } + + RpsResult result = rpsRenderGraphCreate(m_rpsDevice, &renderGraphCreateInfo, &m_rpsRenderGraph); + REQUIRE(result == RPS_OK); + + BindNodes(rpsRenderGraphGetMainEntry(m_rpsRenderGraph)); + } + + virtual void BindNodes(RpsSubprogram hRpslEntry) + { + } + + virtual void OnDestroy() + { + rpsRenderGraphDestroy(m_rpsRenderGraph); + rpsTestUtilDestroyDevice(m_rpsDevice); + } + + void OnUpdate(uint64_t frameIndex, + uint64_t completedFrameIndex, + uint32_t numArgs, + const RpsConstant* argData, + const RpsRuntimeResource* const* argResources) + { + if (m_rpsRenderGraph != RPS_NULL_HANDLE) + { + RpsRenderGraphUpdateInfo updateInfo = {}; + updateInfo.frameIndex = frameIndex; + updateInfo.gpuCompletedFrameIndex = completedFrameIndex; + updateInfo.numArgs = numArgs; + updateInfo.ppArgs = argData; + updateInfo.ppArgResources = argResources; + + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_RUNTIME_DEBUG_NAMES; + if (completedFrameIndex == RPS_GPU_COMPLETED_FRAME_INDEX_NONE) + { + updateInfo.diagnosticFlags = RPS_DIAGNOSTIC_ENABLE_ALL; + } + + REQUIRE_RPS_OK(rpsRenderGraphUpdate(m_rpsRenderGraph, &updateInfo)); + } + } + +private: + RpsDevice m_rpsDevice = RPS_NULL_HANDLE; + RpsRenderGraph m_rpsRenderGraph = RPS_NULL_HANDLE; +}; + +int main(int argc, char* argv[]) +{ + Catch::Session session; + + rps::CLI::Parse(&argc, &argv); + + int returnCode = session.applyCommandLine(argc, argv); + if (returnCode != 0) // Indicates a command line error + return returnCode; + + return session.run(); +} diff --git a/tests/utils/rps_test_vk_renderer.h b/tests/utils/rps_test_vk_renderer.h new file mode 100644 index 0000000..370d979 --- /dev/null +++ b/tests/utils/rps_test_vk_renderer.h @@ -0,0 +1,21 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_TEST_VK_RENDERER_H_ +#define _RPS_TEST_VK_RENDERER_H_ + +#include "app_framework/afx_vk_renderer.h" + +#ifndef TEST_APP_NAME +#define TEST_APP_NAME (TEST_APP_NAME_RAW " - Vulkan") +#endif + +class RpsTestVulkanRenderer : public RpsAfxVulkanRenderer +{ +}; + +#endif //_RPS_TEST_VK_RENDERER_H_ \ No newline at end of file diff --git a/tests/utils/rps_test_win32.h b/tests/utils/rps_test_win32.h new file mode 100644 index 0000000..ee6c6c4 --- /dev/null +++ b/tests/utils/rps_test_win32.h @@ -0,0 +1,48 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#ifndef _RPS_TEST_WINDOWS_H_ +#define _RPS_TEST_WINDOWS_H_ + +#include "app_framework/afx_win32.h" + +typedef RpsAfxRunWindowInfo RpsTestRunWindowInfo; + +static inline int RpsTestRunWindowApp(const RpsAfxRunWindowInfo* pRunInfo) +{ + return RpsAfxRunWindowApp(pRunInfo); +} + +RpsRpslEntry rpsTestLoadRpslEntryFromDLL(const std::string& moduleName, const std::string& entryName) +{ + HMODULE hDLL = ::LoadLibraryA(moduleName.c_str()); + + if (hDLL) + { + auto pfn_dynLibInit = reinterpret_cast(GetProcAddress(hDLL, "___rps_dyn_lib_init")); + RpsResult result = rpsRpslDynamicLibraryInit(pfn_dynLibInit); + if (RPS_FAILED(result)) + { + return nullptr; + } + + auto pEntry = reinterpret_cast(GetProcAddress(hDLL, entryName.c_str())); + if (pEntry) + { + return *pEntry; + } + } + + return nullptr; +} + +void rpsTestPrintDebugString(const char* str) +{ + ::OutputDebugStringA(str); +} + +#endif //_RPS_TEST_WINDOWS_H_ diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt new file mode 100644 index 0000000..d49d9a6 --- /dev/null +++ b/tools/CMakeLists.txt @@ -0,0 +1,13 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# This file is part of the AMD Render Pipeline Shaders SDK which is +# released under the AMD INTERNAL EVALUATION LICENSE. +# +# See file LICENSE.RTF for full license details. + +BuildFolderProperty( "tools" AppFolder ) + +if ( ${RpsEnableImGui} ) + add_subdirectory(rpsl_explorer) + add_subdirectory(rps_visualizer) +endif() diff --git a/tools/app_framework/Microsoft-DirectX-Graphics-Samples-LICENSE.txt b/tools/app_framework/Microsoft-DirectX-Graphics-Samples-LICENSE.txt new file mode 100644 index 0000000..b8b569d --- /dev/null +++ b/tools/app_framework/Microsoft-DirectX-Graphics-Samples-LICENSE.txt @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 Microsoft + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/tools/app_framework/afx_cmd_parser.h b/tools/app_framework/afx_cmd_parser.h new file mode 100644 index 0000000..37ab4bd --- /dev/null +++ b/tools/app_framework/afx_cmd_parser.h @@ -0,0 +1,407 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#pragma once + +#include +#include +#include +#include +#include + +namespace rps +{ + class ICmdArg + { + std::string Name; + bool IsRequired : 1; + bool IsPersistent : 1; + + public: + template + T* AsPtr() + { + return static_cast(GetValuePointer(sizeof(T))); + } + + protected: + ICmdArg(const std::string& name, bool bPersistent = true, bool bRequired = false) + : Name(name) + , IsRequired(bRequired) + , IsPersistent(bPersistent) + { + } + + virtual ~ICmdArg() + { + } + + virtual int32_t Parse(int32_t numRemainingArgs, char** ppRemainingArgs) = 0; + + virtual void SerializeValue(std::ostream& s) + { + } + + virtual void* GetValuePointer(size_t expectedSize) + { + return nullptr; + } + + friend class CLI; + }; + + class CLI + { + public: + static void Parse(int* pArgc, char*** pArgv) + { + Instance()->ParseImpl(*pArgc, *pArgv); + + *pArgc = int((Instance()->m_UnparsedArgs).size()); + *pArgv = &*Instance()->m_UnparsedArgs.begin(); + } + + static void Parse(int argc, char** argv) + { + Instance()->ParseImpl(argc, argv); + } + + static void LoadConfig(const std::string& fileName) + { + Instance()->Load(fileName); + } + + static void SaveConfig(const std::string& fileName) + { + Instance()->Save(fileName); + } + + static ICmdArg* FindCmdArg(const std::string& name) + { + return Instance()->FindRegisteredCmdArg(name); + } + + private: + static CLI* Instance() + { + static CLI s_Instance; + return &s_Instance; + } + + bool ParseImpl(int32_t argc, char** argv) + { + std::unordered_set requiredArgs; + + for (auto& registered : m_RegisteredArgs) + { + if (registered.second->IsRequired) + { + requiredArgs.insert(registered.second); + } + } + + m_UnparsedArgs.push_back(argv[0]); + + for (int32_t iArg = 1; iArg < argc; iArg++) + { + auto iter = m_RegisteredArgs.find(argv[iArg]); + if (iter != m_RegisteredArgs.end()) + { + int32_t numArgsConsumed = iter->second->Parse(argc - iArg - 1, &argv[iArg + 1]); + if (numArgsConsumed < 0) + { + fprintf_s(stderr, "\nError parsing command arg '%s'.", iter->second->Name.c_str()); + return false; + } + + iArg += numArgsConsumed; + + if (iter->second->IsRequired) + { + requiredArgs.erase(iter->second); + } + } + else + { + m_UnparsedArgs.push_back(argv[iArg]); + } + } + + if (!requiredArgs.empty()) + { + for (auto& required : requiredArgs) + { + fprintf_s(stderr, "\nRequired command arg '%s' not specified.", required->Name.c_str()); + } + return false; + } + + return true; + } + + void Register(const char* prefix, const char* name, ICmdArg* pArg) + { + auto fullName = std::string(prefix) + name; + auto result = m_RegisteredArgs.insert(std::make_pair(fullName, pArg)); + if (!result.second) + { + fprintf_s(stderr, "\nDuplicated command argument name '%s'.", fullName.c_str()); + } + } + + void Load(const std::string& fileName) + { + std::ifstream fs(fileName, std::ios::in); + + if (!fs.good()) + return; + + std::string line; + std::string name; + std::string value; + while (std::getline(fs, line)) + { + if (!std::getline(fs, name, '=') || !std::getline(fs, value)) + continue; + + auto iter = m_RegisteredArgs.find("--" + name); + if (!value.empty() && (iter != m_RegisteredArgs.end())) + { + if (!iter->second->IsPersistent) + continue; + + // TODO: only support single value case atm: + char* pText = &value[0]; + int32_t numArgsConsumed = iter->second->Parse(1, &pText); + + if (numArgsConsumed < 1) + { + fprintf_s(stderr, "\nFailed to load argument '%s'", name.c_str()); + } + } + } + } + + void Save(const std::string& fileName) + { + std::ofstream fs(fileName, std::ios::out); + + if (!fs.good()) + return; + + for (auto& registered : m_RegisteredArgs) + { + if (registered.second->IsPersistent && (registered.first.find("--") == 0)) + { + fs << std::endl << registered.second->Name << "="; + registered.second->SerializeValue(fs); + } + } + } + + ICmdArg* FindRegisteredCmdArg(const std::string& name) const + { + auto iter = m_RegisteredArgs.find(name); + return (iter != m_RegisteredArgs.end()) ? iter->second : nullptr; + } + + std::unordered_map m_RegisteredArgs; + std::vector m_UnparsedArgs; + + template + friend class CmdArg; + }; + + template + struct CmdArgValueParser + { + int32_t operator()(T* pValue, int32_t numRemainingArgs, const char* const* pStr) + { + return -1; + } + }; + + template<> + struct CmdArgValueParser + { + int32_t operator()(bool* pValue, int32_t numRemainingArgs, const char* const* pStr) + { + int consumed = 0; + *pValue = true; + + if (numRemainingArgs > 0) + { + std::string trueStrs[] = { "1", "on", "true", "yes", "y" }; + std::string falseStrs[] = {"0", "off", "false", "no", "n"}; + + auto trueIter = std::find_if( + std::begin(trueStrs), std::end(trueStrs), [=](auto i) { return i == pStr[0]; }); + + if (std::end(trueStrs) != trueIter) + { + consumed = 1; + } + auto falseIter = + std::find_if(std::begin(falseStrs), std::end(falseStrs), [=](auto i) { return i == pStr[0]; }); + + if (std::end(falseStrs) != falseIter) + { + *pValue = false; + consumed = 1; + } + } + + return consumed; + } + }; + + template<> + struct CmdArgValueParser + { + int32_t operator()(int32_t* pValue, int32_t numRemainingArgs, const char* const* pStr) + { + if (numRemainingArgs > 0) + { + char* pEnd = nullptr; + int32_t parsedVal = std::strtol(pStr[0], &pEnd, 0); + + if (pEnd != pStr[0]) + { + *pValue = parsedVal; + return 1; + } + } + + return -1; + } + }; + + template<> + struct CmdArgValueParser + { + int32_t operator()(uint32_t* pValue, int32_t numRemainingArgs, const char* const* pStr) + { + if (numRemainingArgs > 0) + { + char* pEnd = nullptr; + uint32_t parsedVal = std::strtoul(pStr[0], &pEnd, 0); + + if (pEnd != pStr[0]) + { + *pValue = parsedVal; + return 1; + } + } + + return -1; + } + }; + + template<> + struct CmdArgValueParser + { + int32_t operator()(uint64_t* pValue, int32_t numRemainingArgs, const char* const* pStr) + { + if (numRemainingArgs > 0) + { + char* pEnd = nullptr; + uint64_t parsedVal = std::strtoull(pStr[0], &pEnd, 0); + + if (pEnd != pStr[0]) + { + *pValue = parsedVal; + return 1; + } + } + + return -1; + } + }; + + template<> + struct CmdArgValueParser + { + int32_t operator()(std::string* pValue, int32_t numRemainingArgs, const char* const* pStr) + { + if (numRemainingArgs > 0) + { + *pValue = pStr[0]; + return 1; + } + + return -1; + } + }; + + template + class CmdArg : private ICmdArg + { + T Value; + + public: + explicit CmdArg(const char* name, + const T& defaultVal = {}, + std::initializer_list aliases = {}, + bool isPersistent = true, + bool isRequired = false) + : ICmdArg(name, isPersistent, isRequired) + , Value(defaultVal) + { + CLI::Instance()->Register("--", name, this); + for (auto& a : aliases) + { + CLI::Instance()->Register("-", a, this); + } + } + + virtual int32_t Parse(int32_t numRemainingArgs, char** ppRemainingArgs) override + { + return CmdArgValueParser()(&Value, numRemainingArgs, ppRemainingArgs); + } + + virtual void SerializeValue(std::ostream& s) override + { + s << Value; + } + + virtual void* GetValuePointer(size_t expectedSize) override + { + return (expectedSize == sizeof(T)) ? &Value : nullptr; + } + + operator const T& () const + { + return Value; + } + + const T& get() const + { + return Value; + } + + const T* operator->() const + { + return &Value; + } + + const T& operator=(const T& val) + { + Value = val; + return *this; + } + + T* operator&() + { + return &Value; + } + + const T* operator&() const + { + return &Value; + } + }; +} diff --git a/tools/app_framework/afx_common_helpers.h b/tools/app_framework/afx_common_helpers.h new file mode 100644 index 0000000..05b66f5 --- /dev/null +++ b/tools/app_framework/afx_common_helpers.h @@ -0,0 +1,279 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#pragma once + +#include +#include +#include +#include + +#include "rps/core/rps_api.h" + +static constexpr uint32_t DivRoundUp(uint32_t dividend, uint32_t divisor) +{ + return (dividend + divisor - 1) / divisor; +} + +template ::value>::type> +static constexpr T AlignUp(T offset, T alignment) +{ + return alignment ? (offset + (alignment - T(1))) & ~(alignment - T(1)) : offset; +} + +static inline uint32_t AsUInt(float f) +{ + uint32_t result; + static_assert(sizeof(float) == sizeof(uint32_t), "Unsupported float size"); + memcpy(&result, &f, sizeof(float)); + return result; +} + +struct RpsAfxCpuTimer +{ + using time_point = std::chrono::time_point; + + static_assert(std::chrono::steady_clock::period::num * std::micro::den <= + std::micro::num * std::chrono::steady_clock::period::den, + "Timer resolution not enough."); + + static time_point Now() + { + return std::chrono::steady_clock::now(); + } + static std::chrono::duration SecondsSinceEpoch() + { + return Now().time_since_epoch(); + } +}; + +struct RpsAfxScopedCpuTimer +{ + std::chrono::time_point m_StartTime; + + const char* m_Id = nullptr; + int64_t* m_pDuration = nullptr; + + RpsAfxScopedCpuTimer(const char* id) + : RpsAfxScopedCpuTimer(id, nullptr) + { + } + + RpsAfxScopedCpuTimer(int64_t* pDuration) + : RpsAfxScopedCpuTimer(nullptr, pDuration) + { + } + + RpsAfxScopedCpuTimer(const char* id, int64_t* pDuration) + : m_Id(id) + , m_pDuration(pDuration) + { + m_StartTime = RpsAfxCpuTimer::Now(); + } + + ~RpsAfxScopedCpuTimer() + { + std::chrono::duration duration = RpsAfxCpuTimer::Now() - m_StartTime; + + const int64_t microSecs = int64_t(duration.count()); + + if (m_Id) + { +#if _MSC_VER + fprintf_s +#else + fprintf +#endif + (stderr, "\nTimer '%s' : %" PRId64 " us\n", m_Id, microSecs); + } + + if (m_pDuration) + { + *m_pDuration = microSecs; + } + } +}; + +template +struct RpsAfxAveragedSampler +{ + std::chrono::milliseconds m_resetInterval = std::chrono::milliseconds(1000); + RpsAfxCpuTimer::time_point m_lastResetTime = RpsAfxCpuTimer::Now(); + TValue m_lastSample = 0; + TValue m_sum = 0; + TValue m_avg = 0; + uint32_t m_count = 0; + + void Update(TValue sample) + { + auto nowTime = RpsAfxCpuTimer::Now(); + if ((m_count > 0) && ((nowTime - m_lastResetTime) > m_resetInterval)) + { + m_avg = m_sum / m_count; + + m_lastResetTime = nowTime; + m_count = 0; + m_sum = 0; + } + + m_lastSample = sample; + m_sum = m_sum + sample; + m_count++; + } +}; + +// TODO: pull out Win32 stuff below to make non-platform dependent. +struct RpsAfxJITHelper +{ + RpsAfxJITHelper(int argc = 0, const char** argv = nullptr) + { +#if USE_RPSL_JIT + hRpsJITDll = ::LoadLibraryA("rps-jit.dll"); + + if (hRpsJITDll) + { + pfnRpsJITStartup = PFN_RpsJITStartup(GetProcAddress(hRpsJITDll, RPS_JIT_PROC_NAME_STARTUP)); + + pfnRpsJITShutdown = PFN_RpsJITShutdown(GetProcAddress(hRpsJITDll, RPS_JIT_PROC_NAME_SHUTDOWN)); + + pfnRpsJITLoad = PFN_RpsJITLoad(GetProcAddress(hRpsJITDll, RPS_JIT_PROC_NAME_LOAD)); + + pfnRpsJITUnload = PFN_RpsJITUnload(GetProcAddress(hRpsJITDll, RPS_JIT_PROC_NAME_UNLOAD)); + + pfnRpsJITGetEntryPoint = + PFN_RpsJITGetEntryPoint(GetProcAddress(hRpsJITDll, RPS_JIT_PROC_NAME_GETENTRYPOINT)); + + if (pfnRpsJITStartup) + { + const char* emptyArgs[] = {""}; + int32_t startupResult = pfnRpsJITStartup(argc ? argc : 1, argv ? argv : emptyArgs); + + if (startupResult < 0) + { + pfnRpsJITStartup = nullptr; + } + } + } +#endif //USE_RPSL_JIT + } + + ~RpsAfxJITHelper() + { +#if USE_RPSL_JIT + if (pfnRpsJITShutdown) + { + pfnRpsJITShutdown(); + } + + if (hRpsJITDll) + { + ::FreeLibrary(hRpsJITDll); + } +#endif //USE_RPSL_JIT + } + + bool IsValid() const + { +#if USE_RPSL_JIT + return hRpsJITDll && pfnRpsJITStartup && pfnRpsJITShutdown && pfnRpsJITLoad && pfnRpsJITUnload && + pfnRpsJITGetEntryPoint; +#else + return false; +#endif //USE_RPSL_JIT + } + + RpsJITModule LoadBitcode(const char* bitcodeFile, int64_t* pJitTiming = nullptr) + { + RpsJITModule hJITModule = RPS_NULL_HANDLE; + RpsResult result = RpsResult(pfnRpsJITLoad(bitcodeFile, &hJITModule)); + + if (RPS_FAILED(result)) + return nullptr; + + uint64_t dynLibInitFnAddr = 0; + + do + { + RpsAfxScopedCpuTimer timer(pJitTiming); + result = RpsResult(pfnRpsJITGetEntryPoint(hJITModule, "___rps_dyn_lib_init", &dynLibInitFnAddr)); + } while (false); + + if (dynLibInitFnAddr != 0) + { + auto pfnDynLibInit = PFN_rpslDynLibInit(uintptr_t(dynLibInitFnAddr)); + result = rpsRpslDynamicLibraryInit(pfnDynLibInit); + + if (RPS_SUCCEEDED(result)) + { + return hJITModule; + } + } + + pfnRpsJITUnload(hJITModule); + + return nullptr; + } + + const char* GetModuleName(RpsJITModule hJIT) + { + const char* const* ppName = GetEntryPoint(hJIT, RPS_MODULE_ID_NAME); + return ppName ? *ppName : nullptr; + } + + const char* const* GetEntryNameTable(RpsJITModule hJITModule) + { + const char* const* ppNameTable = GetEntryPoint(hJITModule, RPS_ENTRY_TABLE_NAME); + return ppNameTable; + } + + RpsRpslEntry GetEntryPoint(RpsJITModule hJITModule, const char* symbolName) const + { + const RpsRpslEntry* pEntry = GetEntryPoint(hJITModule, symbolName); + return (pEntry != 0) ? *pEntry : nullptr; + } + + template + const T* GetEntryPoint(RpsJITModule hJITModule, const char* symbolName) const + { + uint64_t rpslEntryAddr = 0; + pfnRpsJITGetEntryPoint(hJITModule, symbolName, &rpslEntryAddr); + return reinterpret_cast(static_cast(rpslEntryAddr)); + } + +#if USE_RPSL_JIT + HMODULE hRpsJITDll = {}; +#endif //USE_RPSL_JIT + PFN_RpsJITStartup pfnRpsJITStartup = {}; + PFN_RpsJITShutdown pfnRpsJITShutdown = {}; + PFN_RpsJITLoad pfnRpsJITLoad = {}; + PFN_RpsJITUnload pfnRpsJITUnload = {}; + PFN_RpsJITGetEntryPoint pfnRpsJITGetEntryPoint = {}; + +private: + RPS_CLASS_NO_COPY_MOVE(RpsAfxJITHelper); +}; + +bool WriteToFile(const std::string& fileName, const void* pBuf, size_t bufSize) +{ + FILE* fp = {}; +#ifdef _MSC_VER + fopen_s(&fp, fileName.c_str(), "wb"); +#else + fp = fopen(fileName.c_str(), "wb"); +#endif + + if (fp) + { + size_t written = fwrite(pBuf, 1, bufSize, fp); + + fclose(fp); + + return written == bufSize; + } + + return false; +} diff --git a/tools/app_framework/afx_d3d11_renderer.h b/tools/app_framework/afx_d3d11_renderer.h new file mode 100644 index 0000000..aa40e7c --- /dev/null +++ b/tools/app_framework/afx_d3d11_renderer.h @@ -0,0 +1,616 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +#include + +#ifndef RPS_D3D11_RUNTIME +#define RPS_D3D11_RUNTIME 1 +#endif //RPS_D3D12_RUNTIME + +#include "rps/rps.h" + +#include "afx_renderer.h" +#include "afx_d3d_helper.h" +#include "afx_shader_compiler.h" + +#pragma comment(lib, "d3d11.lib") +#pragma comment(lib, "dxgi.lib") +#pragma comment(lib, "dxguid.lib") +#pragma comment(lib, "d3dcompiler.lib") + +using Microsoft::WRL::ComPtr; + +class RpsAfxD3D11Renderer : public RpsAfxRendererBase +{ +public: + virtual bool Init(void* hWindow) override final + { + m_hWnd = (HWND)hWindow; + + RECT clientRect = {}; + ::GetClientRect(m_hWnd, &clientRect); + m_width = clientRect.right - clientRect.left; + m_height = clientRect.bottom - clientRect.top; + + // Create Device + UINT dxgiFactoryFlags = 0; + UINT deviceCreateFlags = 0; + +#if defined(_DEBUG) + ComPtr debugController; + + // Enable additional debug layers. + dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG; + deviceCreateFlags |= D3D11_CREATE_DEVICE_DEBUG; +#endif + + ComPtr factory; + ThrowIfFailed(CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(&factory))); + + D3D_FEATURE_LEVEL featureLevels[] = {D3D_FEATURE_LEVEL_11_1}; + + auto checkDevice = [featureLevels](IDXGIAdapter1* pAdapter) { + return SUCCEEDED(D3D11CreateDevice(pAdapter, + D3D_DRIVER_TYPE_UNKNOWN, + nullptr, + 0, + featureLevels, + _countof(featureLevels), + D3D11_SDK_VERSION, + nullptr, + nullptr, + nullptr)); + }; + + ComPtr adapter = nullptr; + FindAdapter(factory.Get(), checkDevice, adapter.GetAddressOf(), m_useWarpDevice); + + if (adapter == nullptr) + { + throw std::exception(); + } + + ThrowIfFailed(D3D11CreateDevice(adapter.Get(), + D3D_DRIVER_TYPE_UNKNOWN, + nullptr, + deviceCreateFlags, + featureLevels, + _countof(featureLevels), + D3D11_SDK_VERSION, + &m_device, + nullptr, + &m_immDC)); + +#define D3D_SDK_LAYER_BREAK_ON_WARNING 0 +#if D3D_SDK_LAYER_BREAK_ON_WARNING + ComPtr infoQueue = nullptr; + if (SUCCEEDED(m_device->QueryInterface(__uuidof(ID3D11InfoQueue), (void**)&infoQueue))) + { + infoQueue->SetBreakOnSeverity(D3D11_MESSAGE_SEVERITY_ERROR, TRUE); + infoQueue->SetBreakOnSeverity(D3D11_MESSAGE_SEVERITY_WARNING, TRUE); + } +#endif + + ThrowIfFailed(m_swapChain.Create( + factory.Get(), m_backBufferCount, m_width, m_height, DXGI_FORMAT_R8G8B8A8_UNORM, m_device.Get(), m_hWnd)); + UpdateSwapChainBuffers(); + + OnInit(); + + return true; + } + + virtual void Tick() override final + { + OnUpdate(m_frameCounter); + + OnRender(m_frameCounter); + + m_swapChain.Present(m_bVSync ? 1 : 0, 0); + + m_backBufferIndex = m_swapChain.GetCurrentBackBufferIndex(); + m_frameCounter++; + } + + virtual void CleanUp() override final + { + WaitForGpuIdle(); + + OnCleanUp(); + + m_deferredContexts.clear(); + m_swapChainRtv = nullptr; + m_backBuffer = nullptr; + m_swapChain.Destroy(); + m_immDC = nullptr; + +#if _DEBUG + ComPtr pDebugDevice; + m_device.As(&pDebugDevice); + if (pDebugDevice) + { + pDebugDevice->ReportLiveDeviceObjects(D3D11_RLDO_DETAIL | D3D11_RLDO_IGNORE_INTERNAL); + pDebugDevice = nullptr; + } +#endif + m_device = nullptr; + } + + virtual void OnResize(uint32_t width, uint32_t height) override final + { + if ((width > 0 && height > 0) && ((m_width != width) || (m_height != height))) + { + WaitForGpuIdle(); + + OnPreResize(); + + m_swapChainRtv = nullptr; + m_backBuffer = nullptr; + + DXGI_SWAP_CHAIN_DESC desc = {}; + BOOL bFullScreen = FALSE; + ThrowIfFailed(m_swapChain.GetFullscreenState(&bFullScreen, nullptr)); + ThrowIfFailed(m_swapChain.GetDesc(&desc)); + ThrowIfFailed( + m_swapChain.ResizeBuffers(m_backBufferCount, width, height, desc.BufferDesc.Format, desc.Flags)); + UpdateSwapChainBuffers(); + + m_width = width; + m_height = height; + + OnPostResize(); + } + } + +protected: + + virtual bool WaitForGpuIdle() override final + { + D3D11_QUERY_DESC queryDesc = {}; + queryDesc.Query = D3D11_QUERY_EVENT; + + ComPtr pEventQuery; + ThrowIfFailed(m_device->CreateQuery(&queryDesc, &pEventQuery)); + + m_immDC->End(pEventQuery.Get()); + + BOOL data; + while (S_FALSE == m_immDC->GetData(pEventQuery.Get(), &data, sizeof(data), 0)) + { + ::Sleep(100); + } + + return true; + } + + void UpdateSwapChainBuffers() + { + m_backBufferIndex = m_swapChain.GetCurrentBackBufferIndex(); + + if ((m_frameCounter % m_backBufferCount) != m_backBufferIndex) + m_frameCounter = m_backBufferIndex; + + ThrowIfFailed(m_swapChain.GetBuffer(0, IID_PPV_ARGS(&m_backBuffer))); + m_device->CreateRenderTargetView(m_backBuffer.Get(), nullptr, &m_swapChainRtv); + } + + ID3D11Resource* GetBackBuffer() const + { + return m_backBuffer.Get(); + } + + ID3D11RenderTargetView* GetBackBufferRTV() const + { + return m_swapChainRtv.Get(); + } + + void GetBackBuffers(RpsResourceDesc& backBufferDesc, + RpsRuntimeResource backBuffers[DXGI_MAX_SWAP_CHAIN_BUFFERS]) const + { + backBuffers[0] = rpsD3D11ResourceToHandle(m_backBuffer.Get()); + backBufferDesc.type = RPS_RESOURCE_TYPE_IMAGE_2D; + backBufferDesc.temporalLayers = 1; + backBufferDesc.flags = 0; + backBufferDesc.image.arrayLayers = 1; + backBufferDesc.image.mipLevels = 1; + backBufferDesc.image.format = rpsFormatFromDXGI(m_swapChain.GetFormat()); + backBufferDesc.image.width = m_width; + backBufferDesc.image.height = m_height; + backBufferDesc.image.sampleCount = 1; + } + +public: + struct ActiveCommandList + { + uint32_t backBufferIndex; + ComPtr cmdList; + + ID3D11DeviceContext* operator->() const + { + return cmdList.Get(); + } + }; + +protected: + virtual void OnInit() + { + } + + virtual void OnCleanUp() + { + } + + virtual void OnPreResize() + { + } + + virtual void OnPostResize() + { + } + + virtual void OnUpdate(uint32_t frameIndex) + { + } + + virtual void OnRender(uint32_t frameIndex) + { + } + + virtual RpsResult CreateRpsRuntimeDevice(const RpsDeviceCreateInfo& createInfo, RpsDevice& device) override + { + RpsD3D11RuntimeDeviceCreateInfo runtimeDeviceCreateInfo = {}; + runtimeDeviceCreateInfo.pDeviceCreateInfo = &createInfo; + runtimeDeviceCreateInfo.pD3D11Device = m_device.Get(); + + return rpsD3D11RuntimeDeviceCreate(&runtimeDeviceCreateInfo, &device); + } + + RpsResult ExecuteRenderGraph(uint32_t frameIndex, RpsRenderGraph hRenderGraph) + { + RpsRenderGraphBatchLayout batchLayout = {}; + + RpsResult result = rpsRenderGraphGetBatchLayout(hRenderGraph, &batchLayout); + if (RPS_FAILED(result)) + { + return result; + } + + for (uint32_t iBatch = 0; iBatch < batchLayout.numCmdBatches; iBatch++) + { + auto& batch = batchLayout.pCmdBatches[iBatch]; + + ActiveCommandList cmdList{m_backBufferIndex, m_immDC}; + + RpsRenderGraphRecordCommandInfo recordInfo = {}; + + recordInfo.pUserContext = this; + recordInfo.cmdBeginIndex = batch.cmdBegin; + recordInfo.numCmds = batch.numCmds; + recordInfo.hCmdBuffer = rpsD3D11DeviceContextToHandle(cmdList.cmdList.Get()); + + result = rpsRenderGraphRecordCommands(hRenderGraph, &recordInfo); + if (RPS_FAILED(result)) + return result; + } + + return result; + } + + ActiveCommandList AcquireDeferredContext() + { + ActiveCommandList result = {}; + result.backBufferIndex = m_backBufferIndex; + + std::lock_guard lock(m_cmdListMutex); + + if (m_deferredContexts.empty()) + { + ThrowIfFailed(m_device->CreateDeferredContext(0, &result.cmdList)); + } + else + { + result.cmdList = m_deferredContexts.back(); + m_deferredContexts.pop_back(); + } + + return result; + } + + void RecycleCmdList(ActiveCommandList& cmdList) + { + if (cmdList.cmdList == m_immDC) + return; + + std::lock_guard lock(m_cmdListMutex); + + m_deferredContexts.push_back(cmdList.cmdList); + cmdList.cmdList = nullptr; + } + + struct SwapChain + { + HRESULT Create(IDXGIFactory2* pFactory, + uint32_t backBufferCount, + uint32_t width, + uint32_t height, + DXGI_FORMAT backBufferFormat, + ID3D11Device* pDevice, + HWND hWnd) + { + if (m_swapChain) + return S_FALSE; + + m_pDevice = pDevice; + m_hWnd = hWnd; + + // Describe and create the swap chain. + DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {}; + swapChainDesc.BufferCount = backBufferCount; + swapChainDesc.Width = width; + swapChainDesc.Height = height; + swapChainDesc.Format = backBufferFormat; + swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + swapChainDesc.SampleDesc.Count = 1; + + ComPtr swapChain; + + HRESULT hr = pFactory->CreateSwapChainForHwnd(pDevice, hWnd, &swapChainDesc, nullptr, nullptr, &swapChain); + if (hr != DXGI_ERROR_NOT_CURRENTLY_AVAILABLE) + { + ThrowIfFailed(hr); + + // This sample does not support fullscreen transitions. + ThrowIfFailed(pFactory->MakeWindowAssociation(hWnd, DXGI_MWA_NO_ALT_ENTER)); + + ThrowIfFailed(swapChain.As(&m_swapChain)); + } + else + { + DWORD sessionId = 0; + ProcessIdToSessionId(GetCurrentProcessId(), &sessionId); + if (sessionId == 0) + { + fprintf_s(stderr, "\nCreating fallback dummy swapchain for session 0 process."); + hr = ResizeBuffers(backBufferCount, width, height, backBufferFormat, 0); + } + } + + return hr; + } + + void Destroy() + { + m_swapChain = nullptr; + m_backBufferIndex = 0; + m_buffers.clear(); + } + + HRESULT GetFullscreenState(BOOL* pbFullscreen, IDXGIOutput** ppOutput) + { + if (m_swapChain) + return m_swapChain->GetFullscreenState(pbFullscreen, ppOutput); + + *pbFullscreen = FALSE; + *ppOutput = nullptr; + return S_OK; + } + + HRESULT GetDesc(DXGI_SWAP_CHAIN_DESC* pDesc) const + { + if (m_swapChain) + return m_swapChain->GetDesc(pDesc); + + if (m_buffers.empty()) + return E_FAIL; + + D3D11_TEXTURE2D_DESC backBufferDesc; + m_buffers[0]->GetDesc(&backBufferDesc); + + pDesc->BufferDesc.Width = (uint32_t)backBufferDesc.Width; + pDesc->BufferDesc.Height = backBufferDesc.Height; + pDesc->BufferDesc.RefreshRate.Denominator = 1; + pDesc->BufferDesc.RefreshRate.Numerator = 60; + pDesc->BufferDesc.Format = backBufferDesc.Format; + pDesc->BufferDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_PROGRESSIVE; + pDesc->BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED; + pDesc->SampleDesc.Count = 1; + pDesc->SampleDesc.Quality = 0; + pDesc->BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + pDesc->BufferCount = (UINT)m_buffers.size(); + pDesc->OutputWindow = m_hWnd; + pDesc->Windowed = TRUE; + pDesc->SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + pDesc->Flags = 0; + + return S_OK; + } + + DXGI_FORMAT GetFormat() const + { + DXGI_SWAP_CHAIN_DESC desc; + if (SUCCEEDED(GetDesc(&desc))) + return desc.BufferDesc.Format; + else + return DXGI_FORMAT_UNKNOWN; + } + + HRESULT ResizeBuffers(UINT backBufferCount, UINT width, UINT height, DXGI_FORMAT backBufferFormat, UINT flags) + { + m_buffers.clear(); + + if (m_swapChain) + return m_swapChain->ResizeBuffers(backBufferCount, width, height, backBufferFormat, flags); + + m_buffers.resize(backBufferCount); + for (uint32_t i = 0; i < backBufferCount; i++) + { + auto resDesc = CD3D11_TEXTURE2D_DESC( + backBufferFormat, + width, + height, + 1, + 1, + D3D11_BIND_RENDER_TARGET | D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE, + D3D11_USAGE_DEFAULT, + 0, + 1, + 0, + 0); + + ThrowIfFailed(m_pDevice->CreateTexture2D(&resDesc, nullptr, &m_buffers[i])); + } + m_backBufferIndex = 0; + + return S_OK; + } + + HRESULT GetBuffer(UINT index, REFIID riid, void** ppSurface) + { + if (m_swapChain) + { + return m_swapChain->GetBuffer(index, riid, ppSurface); + } + else + { + return m_buffers[index]->QueryInterface(riid, ppSurface); + } + } + + HRESULT Present(UINT syncInternal, UINT flags) + { + HRESULT hr = S_OK; + if (m_swapChain) + { + hr = m_swapChain->Present(syncInternal, flags); + m_backBufferIndex = m_swapChain->GetCurrentBackBufferIndex(); + } + else + { + m_backBufferIndex = (m_backBufferIndex + 1) % m_buffers.size(); + } + return hr; + } + + UINT GetCurrentBackBufferIndex() + { + return m_swapChain ? m_swapChain->GetCurrentBackBufferIndex() : m_backBufferIndex; + } + + HWND m_hWnd; + ComPtr m_pDevice; + ComPtr m_swapChain; + std::vector> m_buffers; + uint32_t m_backBufferIndex; + }; + + uint64_t CalcGuaranteedCompletedFrameIndexForRps() const + { + // we always wait for swapchain buffer before rendering a new frame, so can guarntee at least + // this index for gpu complete status + return (m_frameCounter > m_backBufferCount) ? m_frameCounter - m_backBufferCount + : RPS_GPU_COMPLETED_FRAME_INDEX_NONE; + } + + void CreateStaticCheckerboardTexture(ComPtr& texture, + uint32_t texWidth, + uint32_t texHeight, + const float tintColor[4]) const + { + static const UINT TexturePixelSize = 4; + + // Describe and create a Texture2D. + D3D11_TEXTURE2D_DESC textureDesc = {}; + + textureDesc.Width = texWidth; + textureDesc.Height = texHeight; + textureDesc.MipLevels = 1; + textureDesc.ArraySize = 1; + textureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + textureDesc.SampleDesc.Count = 1; + textureDesc.Usage = D3D11_USAGE_IMMUTABLE; + textureDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + textureDesc.CPUAccessFlags = 0; + + // Copy data to the intermediate upload heap and then schedule a copy + // from the upload heap to the Texture2D. + const UINT rowPitch = texWidth * TexturePixelSize; + const UINT cellPitch = rowPitch >> 3; // The width of a cell in the checkboard texture. + const UINT cellHeight = texWidth >> 3; // The height of a cell in the checkerboard texture. + const UINT textureSize = rowPitch * texHeight; + + std::vector data(textureSize); + UINT8* pData = &data[0]; + +#define RPS_AFX_SCALE_BYTE(B, S) (std::max(0, std::min(0xff, (int32_t((B) * (S)))))) + + for (UINT n = 0; n < textureSize; n += TexturePixelSize) + { + UINT x = n % rowPitch; + UINT y = n / rowPitch; + UINT i = x / cellPitch; + UINT j = y / cellHeight; + + if (i % 2 == j % 2) + { + pData[n] = RPS_AFX_SCALE_BYTE(0xa0, tintColor[0]); // R + pData[n + 1] = RPS_AFX_SCALE_BYTE(0xa0, tintColor[1]); // G + pData[n + 2] = RPS_AFX_SCALE_BYTE(0xa0, tintColor[2]); // B + pData[n + 3] = RPS_AFX_SCALE_BYTE(0xff, tintColor[3]); // A + } + else + { + pData[n] = RPS_AFX_SCALE_BYTE(0xff, tintColor[0]); // R + pData[n + 1] = RPS_AFX_SCALE_BYTE(0xff, tintColor[1]); // G + pData[n + 2] = RPS_AFX_SCALE_BYTE(0xff, tintColor[2]); // B + pData[n + 3] = RPS_AFX_SCALE_BYTE(0xff, tintColor[3]); // A + } + } + +#undef RPS_AFX_SCALE_BYTE + + D3D11_SUBRESOURCE_DATA textureData = {}; + textureData.pSysMem = &data[0]; + textureData.SysMemPitch = texWidth * TexturePixelSize; + textureData.SysMemSlicePitch = textureData.SysMemPitch * texHeight; + + ComPtr tex2D; + ThrowIfFailed(m_device->CreateTexture2D(&textureDesc, &textureData, &tex2D)); + + texture = tex2D; + } + +protected: + bool m_useWarpDevice = false; + bool m_bVSync = g_VSync; + HWND m_hWnd = NULL; + UINT m_width = 0; + UINT m_height = 0; + uint32_t m_backBufferCount = 3; + ComPtr m_device; + SwapChain m_swapChain; + uint32_t m_backBufferIndex = 0; + ComPtr m_immDC; + ComPtr m_backBuffer; + ComPtr m_swapChainRtv; + + std::vector> m_deferredContexts; + std::mutex m_cmdListMutex; + + uint32_t m_frameCounter = 0; +}; diff --git a/tools/app_framework/afx_d3d12_renderer.h b/tools/app_framework/afx_d3d12_renderer.h new file mode 100644 index 0000000..ad12b84 --- /dev/null +++ b/tools/app_framework/afx_d3d12_renderer.h @@ -0,0 +1,1167 @@ +// Modifications Copyright (C) 2021 Advanced Micro Devices, Inc. + +// Copyright (c) Microsoft. All rights reserved. +// +// This code is licensed under the MIT License (MIT). +// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY +// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR +// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include "d3dx12.h" + +#ifndef RPS_D3D12_RUNTIME +#define RPS_D3D12_RUNTIME 1 +#endif //RPS_D3D12_RUNTIME + +#include "rps/rps.h" + +#include "afx_renderer.h" +#include "afx_d3d_helper.h" +#include "afx_shader_compiler.h" + +#pragma comment(lib, "d3d12.lib") +#pragma comment(lib, "dxgi.lib") +#pragma comment(lib, "dxguid.lib") +#pragma comment(lib, "d3dcompiler.lib") + +using Microsoft::WRL::ComPtr; + +static rps::CmdArg g_WarpDevice{"warp-device", false, {"warp"}, false}; +static rps::CmdArg g_Dx12PreferEnhancedBarriers{"dx12-enhanced-barriers", false, {"dx12-eb"}}; + +class RpsAfxD3D12Renderer : public RpsAfxRendererBase +{ +public: + static const uint32_t NUM_SHADER_VISIBLE_DESCRIPTOR_HEAPS = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER + 1; + + struct DescriptorHeapSizeRequirement + { + uint32_t staticCount; + uint32_t dynamicCountPerFrame; + }; + + RpsAfxD3D12Renderer() + { + m_descriptorHeapSizes[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = {1024, 512}; + m_descriptorHeapSizes[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = {128, 64}; + m_descriptorHeapSizes[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = {64, 0}; + m_descriptorHeapSizes[D3D12_DESCRIPTOR_HEAP_TYPE_DSV] = {64, 0}; + } + + virtual bool Init(void* hWindow) override final + { + m_hWnd = (HWND)hWindow; + + RECT clientRect = {}; + ::GetClientRect(m_hWnd, &clientRect); + m_width = clientRect.right - clientRect.left; + m_height = clientRect.bottom - clientRect.top; + + // Create Device + UINT dxgiFactoryFlags = 0; + + if (g_DebugDevice) + { + ComPtr debugController; + + // Enable the debug layer (requires the Graphics Tools "optional feature"). + // NOTE: Enabling the debug layer after device creation will invalidate the active device. + { + if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugController)))) + { + debugController->EnableDebugLayer(); + + // Enable additional debug layers. + dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG; + } + } + } + + ComPtr factory; + ThrowIfFailed(CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(&factory))); + + auto checkDevice = [](IDXGIAdapter1* pAdapter) { + return SUCCEEDED(D3D12CreateDevice(pAdapter, D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), nullptr)); + }; + + ComPtr adapter = nullptr; + FindAdapter(factory.Get(), checkDevice, adapter.GetAddressOf(), m_useWarpDevice); + + if (adapter == nullptr) + { + throw std::exception(); + } + + ThrowIfFailed(D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device))); + + if (g_DebugDevice && (g_DebugDeviceBreakLevel != RPS_AFX_DEBUG_MSG_SEVERITY_NONE)) + { + ComPtr infoQueue = nullptr; + if (SUCCEEDED(m_device->QueryInterface(__uuidof(ID3D12InfoQueue), (void**)&infoQueue))) + { + static constexpr struct + { + uint32_t bit; + D3D12_MESSAGE_SEVERITY d3dSeverity; + } severityMap[] = { + {RPS_AFX_DEBUG_MSG_SEVERITY_CORRUPTION, D3D12_MESSAGE_SEVERITY_CORRUPTION}, + {RPS_AFX_DEBUG_MSG_SEVERITY_ERROR, D3D12_MESSAGE_SEVERITY_ERROR}, + {RPS_AFX_DEBUG_MSG_SEVERITY_WARNING, D3D12_MESSAGE_SEVERITY_WARNING}, + {RPS_AFX_DEBUG_MSG_SEVERITY_INFO, D3D12_MESSAGE_SEVERITY_INFO}, + }; + + for (auto i = std::begin(severityMap), e = std::end(severityMap); i != e; ++i) + { + if (i->bit & g_DebugDeviceBreakLevel) + { + infoQueue->SetBreakOnSeverity(i->d3dSeverity, TRUE); + } + } + } + } + + for (UINT i = 0; i < D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES; i++) + { + m_descriptorSizes[i] = + m_device->GetDescriptorHandleIncrementSize(static_cast(i)); + + m_descriptorHeaps[i].Init(m_device.Get(), + static_cast(i), + m_descriptorHeapSizes[i].staticCount, + m_descriptorHeapSizes[i].dynamicCountPerFrame, + DXGI_MAX_SWAP_CHAIN_BUFFERS); + } + + m_swapChainRtvs = AllocStaticDescriptors(D3D12_DESCRIPTOR_HEAP_TYPE_RTV, DXGI_MAX_SWAP_CHAIN_BUFFERS); + + D3D12_COMMAND_QUEUE_DESC queueDesc = {}; + queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + queueDesc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL; + queueDesc.NodeMask = 1; + + ThrowIfFailed(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&m_presentQueue))); + + m_queueIndexToCmdListTypeMap[RPS_AFX_QUEUE_INDEX_GFX] = D3D12_COMMAND_LIST_TYPE_DIRECT; + m_queueIndexToCmdListTypeMap[RPS_AFX_QUEUE_INDEX_COMPUTE] = D3D12_COMMAND_LIST_TYPE_COMPUTE; + m_queueIndexToCmdListTypeMap[RPS_AFX_QUEUE_INDEX_COPY] = D3D12_COMMAND_LIST_TYPE_COPY; + + for (uint32_t i = 0; i < _countof(m_fences); i++) + { + ThrowIfFailed(m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_fences[i]))); + } + + m_fenceEvent = ::CreateEvent(NULL, FALSE, FALSE, NULL); + + std::fill(m_presentFenceValues.begin(), m_presentFenceValues.end(), 0ULL); + + ThrowIfFailed(m_swapChain.Create(factory.Get(), + m_backBufferCount, + m_width, + m_height, + DXGI_FORMAT_R8G8B8A8_UNORM, + m_device.Get(), + m_presentQueue.Get(), + m_hWnd)); + UpdateSwapChainBuffers(); + + ActiveCommandList cmdList = AcquireCmdList(RPS_AFX_QUEUE_INDEX_GFX); + + // To keep alive until init cmdlist is executed. + std::vector> tempResources; + + OnInit(cmdList.cmdList.Get(), tempResources); + + CloseCmdList(cmdList); + ID3D12CommandList* pCmdLists[] = {cmdList.cmdList.Get()}; + m_presentQueue->ExecuteCommandLists(1, pCmdLists); + RecycleCmdList(cmdList); + + WaitForGpuIdle(); + + return true; + } + + virtual void Tick() override final + { + OnUpdate(m_frameCounter); + + WaitForSwapChainBuffer(); + + m_pendingReleaseResources[m_backBufferIndex].clear(); + for (uint32_t i = 0; i < _countof(m_descriptorHeaps); i++) + { + m_descriptorHeaps[i].ResetDynamic(m_backBufferIndex); + } + + for (uint32_t i = 0; i < RPS_AFX_QUEUE_INDEX_COUNT; i++) + { + if (m_backBufferIndex < m_cmdAllocators[i].size()) + { + for (auto& cmdAllocator : m_cmdAllocators[i][m_backBufferIndex]) + { + ThrowIfFailed(cmdAllocator->Reset()); + } + } + } + + OnRender(m_frameCounter); + + BOOL bFullscreen; + m_swapChain.GetFullscreenState(&bFullscreen, nullptr); + m_swapChain.Present(m_bVSync ? 1 : 0, (m_bVSync || bFullscreen) ? 0 : DXGI_PRESENT_ALLOW_TEARING); + + m_fenceValue++; + ThrowIfFailed(m_presentQueue->Signal(m_fences[0].Get(), m_fenceValue)); + m_presentFenceValues[m_backBufferIndex] = m_fenceValue; + + m_backBufferIndex = m_swapChain.GetCurrentBackBufferIndex(); + m_frameCounter++; + } + + virtual void CleanUp() override final + { + WaitForGpuIdle(); + + OnCleanUp(); + + for (uint32_t i = 0; i < _countof(m_descriptorHeaps); i++) + { + m_descriptorHeaps[i].CleanUp(); + } + + for (uint32_t i = 0; i < _countof(m_fences); i++) + { + m_fences[i] = nullptr; + } + + for (uint32_t i = 0; i < RPS_AFX_QUEUE_INDEX_COUNT; i++) + { + m_cmdAllocators[i].clear(); + m_cmdLists[i].clear(); + m_queues[i] = nullptr; + } + + for (uint32_t i = 0; i < _countof(m_descriptorHeaps); i++) + { + m_descriptorHeaps[i].CleanUp(); + } + + m_pendingReleaseResources.clear(); + + m_backBuffers.clear(); + + m_swapChain.Destroy(); + m_presentQueue = nullptr; + +#if _DEBUG + ComPtr pDebugDevice; + m_device.As(&pDebugDevice); + if (pDebugDevice) + { + pDebugDevice->ReportLiveDeviceObjects(D3D12_RLDO_DETAIL | D3D12_RLDO_IGNORE_INTERNAL); + pDebugDevice = nullptr; + } +#else + m_device = nullptr; +#endif + + ::CloseHandle(m_fenceEvent); + } + + virtual void OnResize(uint32_t width, uint32_t height) override final + { + if ((width > 0 && height > 0) && + ((m_width != width) || (m_height != height) || (m_backBuffers.size() != m_backBufferCount))) + { + WaitForGpuIdle(); + + OnPreResize(); + + m_backBuffers.clear(); + + DXGI_SWAP_CHAIN_DESC desc = {}; + BOOL bFullScreen = FALSE; + ThrowIfFailed(m_swapChain.GetFullscreenState(&bFullScreen, nullptr)); + ThrowIfFailed(m_swapChain.GetDesc(&desc)); + ThrowIfFailed( + m_swapChain.ResizeBuffers(m_backBufferCount, width, height, desc.BufferDesc.Format, desc.Flags)); + UpdateSwapChainBuffers(); + + m_width = width; + m_height = height; + + OnPostResize(); + } + } + + virtual RpsResult CreateRpsRuntimeDevice(const RpsDeviceCreateInfo& createInfo, RpsDevice& device) override + { + RpsRuntimeDeviceCreateInfo runtimeCreateInfo = {}; + runtimeCreateInfo.pUserContext = this; + runtimeCreateInfo.callbacks.pfnRecordDebugMarker = &RecordDebugMarker; + + RpsD3D12RuntimeDeviceCreateInfo runtimeDeviceCreateInfo = {}; + runtimeDeviceCreateInfo.pDeviceCreateInfo = &createInfo; + runtimeDeviceCreateInfo.pD3D12Device = m_device.Get(); + runtimeDeviceCreateInfo.pRuntimeCreateInfo = &runtimeCreateInfo; + + if (g_Dx12PreferEnhancedBarriers) + { + runtimeDeviceCreateInfo.flags |= RPS_D3D12_RUNTIME_FLAG_PREFER_ENHANCED_BARRIERS; + } + + return rpsD3D12RuntimeDeviceCreate(&runtimeDeviceCreateInfo, &device); + } + +protected: + virtual bool WaitForGpuIdle() override final + { + m_fenceValue++; + ThrowIfFailed(m_presentQueue->Signal(m_fences[0].Get(), m_fenceValue)); + + // Wait until the fence has been processed. + ThrowIfFailed(m_fences[0]->SetEventOnCompletion(m_fenceValue, m_fenceEvent)); + WaitForSingleObjectEx(m_fenceEvent, INFINITE, FALSE); + + m_pendingReleaseResources[m_backBufferIndex].clear(); + + return true; + } + + void WaitForSwapChainBuffer() + { + // If the next frame is not ready to be rendered yet, wait until it is ready. + while (m_fences[0]->GetCompletedValue() < m_presentFenceValues[m_backBufferIndex]) + { + ThrowIfFailed(m_fences[0]->SetEventOnCompletion(m_presentFenceValues[m_backBufferIndex], m_fenceEvent)); + WaitForSingleObjectEx(m_fenceEvent, INFINITE, FALSE); + } + } + + void UpdateSwapChainBuffers() + { + m_backBufferIndex = m_swapChain.GetCurrentBackBufferIndex(); + + if ((m_frameCounter % m_backBufferCount) != m_backBufferIndex) + m_frameCounter = m_backBufferIndex; + + m_backBuffers.resize(m_backBufferCount); + m_pendingReleaseResources.resize(m_backBufferCount); + m_presentFenceValues.clear(); + m_presentFenceValues.resize(m_backBufferCount, 0ULL); + + for (uint32_t iBuf = 0; iBuf < m_backBufferCount; iBuf++) + { + ThrowIfFailed(m_swapChain.GetBuffer(iBuf, IID_PPV_ARGS(&m_backBuffers[iBuf]))); + m_device->CreateRenderTargetView(m_backBuffers[iBuf].Get(), nullptr, m_swapChainRtvs.GetCPU(iBuf)); + } + } + + ID3D12Resource* GetBackBuffer() const + { + return m_backBuffers[m_backBufferIndex].Get(); + } + + D3D12_CPU_DESCRIPTOR_HANDLE GetBackBufferRTV() const + { + return m_swapChainRtvs.GetCPU(m_backBufferIndex); + } + + void GetBackBuffers(RpsResourceDesc& backBufferDesc, + RpsRuntimeResource backBuffers[DXGI_MAX_SWAP_CHAIN_BUFFERS]) const + { + for (uint32_t i = 0; i < m_backBufferCount; i++) + { + backBuffers[i] = rpsD3D12ResourceToHandle(m_backBuffers[i].Get()); + } + + backBufferDesc.type = RPS_RESOURCE_TYPE_IMAGE_2D; + backBufferDesc.temporalLayers = uint32_t(m_backBuffers.size()); + backBufferDesc.flags = 0; + backBufferDesc.image.arrayLayers = 1; + backBufferDesc.image.mipLevels = 1; + backBufferDesc.image.format = rpsFormatFromDXGI(m_swapChain.GetFormat()); + backBufferDesc.image.width = m_width; + backBufferDesc.image.height = m_height; + backBufferDesc.image.sampleCount = 1; + } + + static void RecordDebugMarker(void* pUserContext, const RpsRuntimeOpRecordDebugMarkerArgs* pArgs) + { + auto* pCmdList = rpsD3D12CommandListFromHandle(pArgs->hCommandBuffer); + switch (pArgs->mode) + { + case RPS_RUNTIME_DEBUG_MARKER_BEGIN: + PIXBeginEvent(pCmdList, 0, pArgs->text); + break; + case RPS_RUNTIME_DEBUG_MARKER_END: + PIXEndEvent(pCmdList); + break; + case RPS_RUNTIME_DEBUG_MARKER_LABEL: + PIXSetMarker(pCmdList, 0, pArgs->text); + break; + } + } + +public: + struct ActiveCommandList + { + uint32_t backBufferIndex; + RpsAfxQueueIndices queueIndex; + ComPtr cmdList; + ComPtr cmdAllocator; + + ID3D12GraphicsCommandList* operator->() const + { + return cmdList.Get(); + } + }; + + struct FenceSignalInfo + { + uint32_t queueIndex; + UINT64 value; + + FenceSignalInfo() + : queueIndex(UINT32_MAX) + , value(UINT64_MAX) + { + } + }; + +protected: + virtual void OnInit(ID3D12GraphicsCommandList* pInitCmdList, std::vector>& tempResources) + { + } + + virtual void OnCleanUp() + { + } + + virtual void OnPreResize() + { + } + + virtual void OnPostResize() + { + } + + virtual void OnUpdate(uint32_t frameIndex) + { + } + + virtual void OnRender(uint32_t frameIndex) + { + ActiveCommandList cmdList = AcquireCmdList(RPS_AFX_QUEUE_INDEX_GFX); + + CloseCmdList(cmdList); + ID3D12CommandList* pCmdLists[] = {cmdList.cmdList.Get()}; + m_presentQueue->ExecuteCommandLists(1, pCmdLists); + RecycleCmdList(cmdList); + } + + RpsResult ExecuteRenderGraph(uint32_t frameIndex, RpsRenderGraph hRenderGraph) + { + RpsRenderGraphBatchLayout batchLayout = {}; + + RpsResult result = rpsRenderGraphGetBatchLayout(hRenderGraph, &batchLayout); + if (RPS_FAILED(result)) + { + return result; + } + + m_fenceSignalInfos.resize(batchLayout.numFenceSignals); + + for (uint32_t iBatch = 0; iBatch < batchLayout.numCmdBatches; iBatch++) + { + auto& batch = batchLayout.pCmdBatches[iBatch]; + + ID3D12CommandQueue* pCurrQueue = GetCmdQueue(RpsAfxQueueIndices(batch.queueIndex)); + + ActiveCommandList cmdList = AcquireCmdList(RpsAfxQueueIndices(batch.queueIndex)); + + RpsRenderGraphRecordCommandInfo recordInfo = {}; + + recordInfo.pUserContext = this; + recordInfo.cmdBeginIndex = batch.cmdBegin; + recordInfo.numCmds = batch.numCmds; + recordInfo.hCmdBuffer = rpsD3D12CommandListToHandle(cmdList.cmdList.Get()); + + if (g_DebugMarkers) + { + recordInfo.flags = RPS_RECORD_COMMAND_FLAG_ENABLE_COMMAND_DEBUG_MARKERS; + } + + for (uint32_t iWaitIdx = batch.waitFencesBegin; iWaitIdx < (batch.waitFencesBegin + batch.numWaitFences); + iWaitIdx++) + { + const auto& signalInfo = m_fenceSignalInfos[batchLayout.pWaitFenceIndices[iWaitIdx]]; + HRESULT hr = pCurrQueue->Wait(m_fences[signalInfo.queueIndex].Get(), signalInfo.value); + if (FAILED(hr)) + { + return RPS_ERROR_UNSPECIFIED; + } + } + + result = rpsRenderGraphRecordCommands(hRenderGraph, &recordInfo); + if (RPS_FAILED(result)) + return result; + + CloseCmdList(cmdList); + ID3D12CommandList* pCmdLists[] = {cmdList.cmdList.Get()}; + pCurrQueue->ExecuteCommandLists(1, pCmdLists); + RecycleCmdList(cmdList); + + if (batch.signalFenceIndex != RPS_INDEX_NONE_U32) + { + m_fenceValue++; + + auto& signalInfo = m_fenceSignalInfos[batch.signalFenceIndex]; + + signalInfo.queueIndex = batch.queueIndex; + signalInfo.value = m_fenceValue; + + HRESULT hr = pCurrQueue->Signal(m_fences[signalInfo.queueIndex].Get(), signalInfo.value); + if (FAILED(hr)) + { + return RPS_ERROR_UNSPECIFIED; + } + } + } + + return result; + } + + ActiveCommandList AcquireCmdList(RpsAfxQueueIndices queueIndex) + { + ActiveCommandList result = {}; + result.backBufferIndex = m_backBufferIndex; + result.queueIndex = queueIndex; + + const D3D12_COMMAND_LIST_TYPE cmdListType = m_queueIndexToCmdListTypeMap[queueIndex]; + + std::lock_guard lock(m_cmdListMutex); + + if (m_cmdAllocators[queueIndex].size() <= m_backBufferIndex) + { + m_cmdAllocators[queueIndex].resize(m_backBufferCount); + } + + if (m_cmdAllocators[queueIndex][m_backBufferIndex].empty()) + { + ThrowIfFailed(m_device->CreateCommandAllocator(cmdListType, IID_PPV_ARGS(&result.cmdAllocator))); + } + else + { + result.cmdAllocator = m_cmdAllocators[queueIndex][m_backBufferIndex].back(); + m_cmdAllocators[queueIndex][m_backBufferIndex].pop_back(); + } + + if (m_cmdLists[queueIndex].empty()) + { + ThrowIfFailed(m_device->CreateCommandList( + 1, cmdListType, result.cmdAllocator.Get(), nullptr, IID_PPV_ARGS(&result.cmdList))); + } + else + { + result.cmdList = m_cmdLists[queueIndex].back(); + m_cmdLists[queueIndex].pop_back(); + ThrowIfFailed(result.cmdList->Reset(result.cmdAllocator.Get(), nullptr)); + } + + return result; + } + + void CloseCmdList(ActiveCommandList& cmdList) + { + assert(cmdList.cmdAllocator != nullptr); + assert(cmdList.cmdList != nullptr); + assert(cmdList.backBufferIndex == m_backBufferIndex); + + const D3D12_COMMAND_LIST_TYPE cmdListType = cmdList.cmdList->GetType(); + const uint32_t queueIndex = cmdList.queueIndex; + + std::lock_guard lock(m_cmdListMutex); + + m_cmdAllocators[queueIndex][m_backBufferIndex].push_back(cmdList.cmdAllocator); + cmdList.cmdAllocator = nullptr; + + ThrowIfFailed(cmdList.cmdList->Close()); + } + + void RecycleCmdList(ActiveCommandList& cmdList) + { + assert(cmdList.cmdAllocator == nullptr); + + const D3D12_COMMAND_LIST_TYPE cmdListType = cmdList.cmdList->GetType(); + const uint32_t queueIndex = cmdList.queueIndex; + + std::lock_guard lock(m_cmdListMutex); + + m_cmdLists[queueIndex].push_back(cmdList.cmdList); + cmdList.cmdList = nullptr; + } + + ID3D12CommandQueue* GetCmdQueue(RpsAfxQueueIndices queueIndex) + { + ID3D12CommandQueue* result = nullptr; + + if (queueIndex < RPS_AFX_QUEUE_INDEX_COUNT) + { + std::lock_guard lock(m_cmdListMutex); + + result = m_queues[queueIndex].Get(); + + if (!result) + { + if (m_queueIndexToCmdListTypeMap[queueIndex] == D3D12_COMMAND_LIST_TYPE_DIRECT) + { + m_queues[queueIndex] = m_presentQueue; + } + else + { + D3D12_COMMAND_QUEUE_DESC queueDesc = {}; + queueDesc.Type = m_queueIndexToCmdListTypeMap[queueIndex]; + + ThrowIfFailed(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&m_queues[queueIndex]))); + } + + result = m_queues[queueIndex].Get(); + } + } + + return result; + } + + struct DescriptorHeap; + + struct DescriptorTable + { + DescriptorHeap* pHeap; + D3D12_CPU_DESCRIPTOR_HANDLE cpuHdl; + D3D12_GPU_DESCRIPTOR_HANDLE gpuHdl; + + D3D12_CPU_DESCRIPTOR_HANDLE GetCPU(uint32_t index) const + { + return {cpuHdl.ptr + index * pHeap->descriptorSize}; + } + D3D12_GPU_DESCRIPTOR_HANDLE GetGPU(uint32_t index) const + { + return {gpuHdl.ptr + index * pHeap->descriptorSize}; + } + }; + + struct DescriptorHeap + { + ComPtr pHeap; + + uint32_t descriptorSize = 0; + uint32_t capacity = 0; + uint32_t staticDescriptors = 0; + uint32_t dynamicDescriptorsPerFrame = 0; + uint32_t currentDynamicStart = 0; + uint32_t numStaticUsed = 0; + uint32_t numDynamicUsed = 0; + + HRESULT AllocStatic(uint32_t count, DescriptorTable* pTable) + { + return AllocRange(0, staticDescriptors, &numStaticUsed, count, pTable); + } + + HRESULT AllocDynamic(uint32_t count, DescriptorTable* pTable) + { + return AllocRange( + currentDynamicStart, currentDynamicStart + dynamicDescriptorsPerFrame, &numDynamicUsed, count, pTable); + } + + void Init(ID3D12Device* pDevice, + D3D12_DESCRIPTOR_HEAP_TYPE type, + uint32_t staticCount, + uint32_t dynamicCountPerFrame, + uint32_t maxQueuedFrames) + { + uint32_t totalCount = staticCount + dynamicCountPerFrame * maxQueuedFrames; + + descriptorSize = pDevice->GetDescriptorHandleIncrementSize(type); + capacity = totalCount; + staticDescriptors = staticCount; + dynamicDescriptorsPerFrame = dynamicCountPerFrame; + currentDynamicStart = 0; + numStaticUsed = 0; + numDynamicUsed = 0; + + D3D12_DESCRIPTOR_HEAP_DESC desc = {}; + desc.Type = type; + desc.NodeMask = 1; + desc.NumDescriptors = capacity; + desc.Flags = (type < (int32_t)NUM_SHADER_VISIBLE_DESCRIPTOR_HEAPS) + ? D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE + : D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + + ThrowIfFailed(pDevice->CreateDescriptorHeap(&desc, IID_PPV_ARGS(&pHeap))); + + descriptorSize = pDevice->GetDescriptorHandleIncrementSize(type); + } + + void CleanUp() + { + pHeap = nullptr; + } + + void ResetDynamic(uint32_t backBufferIndex) + { + numDynamicUsed = 0; + currentDynamicStart = staticDescriptors + dynamicDescriptorsPerFrame * backBufferIndex; + } + + void ResetStatic(uint32_t numStaticDescriptorsToKeep) + { + assert(numStaticDescriptorsToKeep <= numStaticUsed); + numStaticUsed = numStaticDescriptorsToKeep; + } + + private: + HRESULT AllocRange(uint32_t begin, uint32_t end, uint32_t* pUsed, uint32_t count, DescriptorTable* pTable) + { + if (begin + (*pUsed) + count > end) + { + return E_OUTOFMEMORY; + } + + pTable->cpuHdl = pHeap->GetCPUDescriptorHandleForHeapStart(); + pTable->gpuHdl = (pHeap->GetDesc().Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE) + ? pHeap->GetGPUDescriptorHandleForHeapStart() + : D3D12_GPU_DESCRIPTOR_HANDLE{}; + size_t ptrDelta = (begin + (*pUsed)) * descriptorSize; + pTable->cpuHdl.ptr += ptrDelta; + pTable->gpuHdl.ptr += ptrDelta; + pTable->pHeap = this; + + *pUsed += count; + + return S_OK; + } + }; + + DescriptorTable AllocStaticDescriptors(D3D12_DESCRIPTOR_HEAP_TYPE type, uint32_t count) + { + DescriptorTable result = {}; + ThrowIfFailed(m_descriptorHeaps[type].AllocStatic(count, &result)); + return result; + } + + DescriptorTable AllocStaticCBV_SRV_UAVs(uint32_t count) + { + return AllocStaticDescriptors(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, count); + } + + DescriptorTable AllocDynamicDescriptors(D3D12_DESCRIPTOR_HEAP_TYPE type, uint32_t count) + { + DescriptorTable result = {}; + ThrowIfFailed(m_descriptorHeaps[type].AllocDynamic(count, &result)); + return result; + } + + D3D12_GPU_DESCRIPTOR_HANDLE AllocDynamicDescriptorsAndWrite(D3D12_DESCRIPTOR_HEAP_TYPE type, + const D3D12_CPU_DESCRIPTOR_HANDLE* pCpuOnlyHandles, + uint32_t numHandles, + bool bSingleTable = false) + { + DescriptorTable table = AllocDynamicDescriptors(type, numHandles); + + if (bSingleTable) + { + m_device->CopyDescriptorsSimple(numHandles, table.GetCPU(0), pCpuOnlyHandles[0], type); + } + else + { + for (uint32_t i = 0; i < numHandles; i++) + { + m_device->CopyDescriptorsSimple(1, table.GetCPU(i), pCpuOnlyHandles[i], type); + } + } + return table.GetGPU(0); + } + + D3D12_GPU_DESCRIPTOR_HANDLE AllocDynamicDescriptorAndWriteCBV(D3D12_GPU_VIRTUAL_ADDRESS gpuVa, uint32_t size) + { + auto table = AllocDynamicDescriptors(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1); + + D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {}; + cbvDesc.BufferLocation = gpuVa; + cbvDesc.SizeInBytes = size; + m_device->CreateConstantBufferView(&cbvDesc, table.GetCPU(0)); + + return table.GetGPU(0); + } + + D3D12_GPU_DESCRIPTOR_HANDLE AllocStaticDescriptorsAndWriteCBV(D3D12_GPU_VIRTUAL_ADDRESS gpuVa, uint32_t size) + { + auto table = AllocStaticDescriptors(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1); + + D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {}; + cbvDesc.BufferLocation = gpuVa; + cbvDesc.SizeInBytes = size; + m_device->CreateConstantBufferView(&cbvDesc, table.GetCPU(0)); + + return table.GetGPU(0); + } + + void BindDescriptorHeaps(ID3D12GraphicsCommandList* pCmdList) + { + ID3D12DescriptorHeap* pHeaps[2] = {}; + uint32_t numHeaps = 0; + for (uint32_t i = 0; i < NUM_SHADER_VISIBLE_DESCRIPTOR_HEAPS; i++) + { + if (m_descriptorHeaps[i].pHeap) + { + pHeaps[numHeaps] = m_descriptorHeaps[i].pHeap.Get(); + numHeaps++; + } + } + pCmdList->SetDescriptorHeaps(numHeaps, pHeaps); + } + + struct SwapChain + { + HRESULT Create(IDXGIFactory2* pFactory, + uint32_t backBufferCount, + uint32_t width, + uint32_t height, + DXGI_FORMAT backBufferFormat, + ID3D12Device* pDevice, + ID3D12CommandQueue* pPresentQueue, + HWND hWnd) + { + if (m_swapChain) + return S_FALSE; + + m_pDevice = pDevice; + m_hWnd = hWnd; + + // Describe and create the swap chain. + DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {}; + swapChainDesc.BufferCount = backBufferCount; + swapChainDesc.Width = width; + swapChainDesc.Height = height; + swapChainDesc.Format = backBufferFormat; + swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + swapChainDesc.SampleDesc.Count = 1; + swapChainDesc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; + + ComPtr swapChain; + + HRESULT hr = + pFactory->CreateSwapChainForHwnd(pPresentQueue, hWnd, &swapChainDesc, nullptr, nullptr, &swapChain); + if (hr != DXGI_ERROR_NOT_CURRENTLY_AVAILABLE) + { + ThrowIfFailed(hr); + + // This sample does not support fullscreen transitions. + ThrowIfFailed(pFactory->MakeWindowAssociation(hWnd, DXGI_MWA_NO_ALT_ENTER)); + + ThrowIfFailed(swapChain.As(&m_swapChain)); + } + else + { + DWORD sessionId = 0; + ProcessIdToSessionId(GetCurrentProcessId(), &sessionId); + if (sessionId == 0) + { + fprintf_s(stderr, "\nCreating fallback dummy swapchain for session 0 process."); + hr = ResizeBuffers(backBufferCount, width, height, backBufferFormat, 0); + } + } + + return hr; + } + + void Destroy() + { + m_swapChain = nullptr; + m_backBufferIndex = 0; + m_buffers.clear(); + } + + HRESULT GetFullscreenState(BOOL* pbFullscreen, IDXGIOutput** ppOutput) + { + if (m_swapChain) + return m_swapChain->GetFullscreenState(pbFullscreen, ppOutput); + + if (pbFullscreen) + *pbFullscreen = FALSE; + + if (ppOutput) + *ppOutput = nullptr; + + return S_OK; + } + + HRESULT GetDesc(DXGI_SWAP_CHAIN_DESC* pDesc) const + { + if (m_swapChain) + return m_swapChain->GetDesc(pDesc); + + if (m_buffers.empty()) + return E_FAIL; + + const D3D12_RESOURCE_DESC bufDesc = m_buffers[0]->GetDesc(); + + pDesc->BufferDesc.Width = (uint32_t)bufDesc.Width; + pDesc->BufferDesc.Height = bufDesc.Height; + pDesc->BufferDesc.RefreshRate.Denominator = 1; + pDesc->BufferDesc.RefreshRate.Numerator = 60; + pDesc->BufferDesc.Format = bufDesc.Format; + pDesc->BufferDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_PROGRESSIVE; + pDesc->BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED; + pDesc->SampleDesc.Count = 1; + pDesc->SampleDesc.Quality = 0; + pDesc->BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + pDesc->BufferCount = (UINT)m_buffers.size(); + pDesc->OutputWindow = m_hWnd; + pDesc->Windowed = TRUE; + pDesc->SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + pDesc->Flags = 0; + + return S_OK; + } + + DXGI_FORMAT GetFormat() const + { + DXGI_SWAP_CHAIN_DESC desc; + if (SUCCEEDED(GetDesc(&desc))) + return desc.BufferDesc.Format; + else + return DXGI_FORMAT_UNKNOWN; + } + + HRESULT ResizeBuffers(UINT backBufferCount, UINT width, UINT height, DXGI_FORMAT backBufferFormat, UINT flags) + { + if (m_swapChain) + return m_swapChain->ResizeBuffers(backBufferCount, width, height, backBufferFormat, flags); + + m_buffers.clear(); + m_buffers.resize(backBufferCount); + for (uint32_t i = 0; i < backBufferCount; i++) + { + auto heapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + auto resDesc = CD3DX12_RESOURCE_DESC(D3D12_RESOURCE_DIMENSION_TEXTURE2D, + 0, + width, + height, + 1, + 1, + backBufferFormat, + 1, + 0, + D3D12_TEXTURE_LAYOUT_UNKNOWN, + D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); + ThrowIfFailed(m_pDevice->CreateCommittedResource(&heapProps, + D3D12_HEAP_FLAG_NONE, + &resDesc, + D3D12_RESOURCE_STATE_COMMON, + nullptr, + IID_PPV_ARGS(&m_buffers[i]))); + m_backBufferIndex = 0; + } + + return S_OK; + } + + HRESULT GetBuffer(UINT index, REFIID riid, void** ppSurface) + { + if (m_swapChain) + { + return m_swapChain->GetBuffer(index, riid, ppSurface); + } + else + { + return m_buffers[index]->QueryInterface(riid, ppSurface); + } + } + + HRESULT Present(UINT syncInternal, UINT flags) + { + HRESULT hr = S_OK; + if (m_swapChain) + { + hr = m_swapChain->Present(syncInternal, flags); + m_backBufferIndex = m_swapChain->GetCurrentBackBufferIndex(); + } + else + { + m_backBufferIndex = (m_backBufferIndex + 1) % m_buffers.size(); + } + return hr; + } + + UINT GetCurrentBackBufferIndex() + { + return m_swapChain ? m_swapChain->GetCurrentBackBufferIndex() : m_backBufferIndex; + } + + HWND m_hWnd; + ComPtr m_pDevice; + ComPtr m_swapChain; + std::vector> m_buffers; + uint32_t m_backBufferIndex; + }; + + uint64_t CalcGuaranteedCompletedFrameIndexForRps() const + { + // we always wait for swapchain buffer before rendering a new frame, so can guarntee at least + // this index for gpu complete status + return (m_frameCounter > m_backBufferCount) ? m_frameCounter - m_backBufferCount + : RPS_GPU_COMPLETED_FRAME_INDEX_NONE; + } + + void CreateStaticCheckerboardTexture(ComPtr& texture, + std::vector>& tempResources, + ID3D12GraphicsCommandList* pCmdList, + uint32_t texWidth, + uint32_t texHeight, + const float tintColor[4]) const + { + ComPtr textureUploadHeap; + + static const UINT TexturePixelSize = 4; + + // Describe and create a Texture2D. + D3D12_RESOURCE_DESC textureDesc = {}; + textureDesc.MipLevels = 1; + textureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + textureDesc.Width = texWidth; + textureDesc.Height = texHeight; + textureDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + textureDesc.DepthOrArraySize = 1; + textureDesc.SampleDesc.Count = 1; + textureDesc.SampleDesc.Quality = 0; + textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + + auto heapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + ThrowIfFailed(m_device->CreateCommittedResource(&heapProps, + D3D12_HEAP_FLAG_NONE, + &textureDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&texture))); + + const UINT64 uploadBufferSize = GetRequiredIntermediateSize(texture.Get(), 0, 1); + + // Create the GPU upload buffer. + auto uploadHeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + auto bufferDesc = CD3DX12_RESOURCE_DESC::Buffer(uploadBufferSize); + + ThrowIfFailed(m_device->CreateCommittedResource(&uploadHeapProps, + D3D12_HEAP_FLAG_NONE, + &bufferDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&textureUploadHeap))); + textureUploadHeap->SetName(L"textureUploadHeap"); + + // Copy data to the intermediate upload heap and then schedule a copy + // from the upload heap to the Texture2D. + const UINT rowPitch = texWidth * TexturePixelSize; + const UINT cellPitch = rowPitch >> 3; // The width of a cell in the checkboard texture. + const UINT cellHeight = texWidth >> 3; // The height of a cell in the checkerboard texture. + const UINT textureSize = rowPitch * texHeight; + + std::vector data(textureSize); + UINT8* pData = &data[0]; + +#define RPS_AFX_SCALE_BYTE(B, S) (std::max(0, std::min(0xff, (int32_t((B) * (S)))))) + + for (UINT n = 0; n < textureSize; n += TexturePixelSize) + { + UINT x = n % rowPitch; + UINT y = n / rowPitch; + UINT i = x / cellPitch; + UINT j = y / cellHeight; + + if (i % 2 == j % 2) + { + pData[n] = RPS_AFX_SCALE_BYTE(0xa0, tintColor[0]); // R + pData[n + 1] = RPS_AFX_SCALE_BYTE(0xa0, tintColor[1]); // G + pData[n + 2] = RPS_AFX_SCALE_BYTE(0xa0, tintColor[2]); // B + pData[n + 3] = RPS_AFX_SCALE_BYTE(0xff, tintColor[3]); // A + } + else + { + pData[n] = RPS_AFX_SCALE_BYTE(0xff, tintColor[0]); // R + pData[n + 1] = RPS_AFX_SCALE_BYTE(0xff, tintColor[1]); // G + pData[n + 2] = RPS_AFX_SCALE_BYTE(0xff, tintColor[2]); // B + pData[n + 3] = RPS_AFX_SCALE_BYTE(0xff, tintColor[3]); // A + } + } + +#undef RPS_AFX_SCALE_BYTE + + D3D12_SUBRESOURCE_DATA textureData = {}; + textureData.pData = &data[0]; + textureData.RowPitch = texWidth * TexturePixelSize; + textureData.SlicePitch = textureData.RowPitch * texHeight; + + UpdateSubresources(pCmdList, texture.Get(), textureUploadHeap.Get(), 0, 0, 1, &textureData); + auto barrier = CD3DX12_RESOURCE_BARRIER::Transition( + texture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + pCmdList->ResourceBarrier(1, &barrier); + + // Keep it around until upload cmdlist finishes executing. + tempResources.push_back(textureUploadHeap); + } + +protected: + bool m_useWarpDevice = g_WarpDevice; + bool m_bVSync = g_VSync; + HWND m_hWnd = NULL; + UINT m_width = 0; + UINT m_height = 0; + uint32_t m_backBufferCount = 3; + ComPtr m_device; + SwapChain m_swapChain; + uint32_t m_backBufferIndex = 0; + ComPtr m_presentQueue; + ComPtr m_queues[RPS_AFX_QUEUE_INDEX_COUNT]; + ComPtr m_fences[RPS_AFX_QUEUE_INDEX_COUNT]; + std::vector m_fenceSignalInfos; + std::vector m_presentFenceValues; + std::vector> m_backBuffers; + UINT64 m_fenceValue = 0; + HANDLE m_fenceEvent = INVALID_HANDLE_VALUE; + uint32_t m_descriptorSizes[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; + DescriptorHeapSizeRequirement m_descriptorHeapSizes[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; + DescriptorTable m_swapChainRtvs; + + D3D12_COMMAND_LIST_TYPE m_queueIndexToCmdListTypeMap[RPS_AFX_QUEUE_INDEX_COUNT]; + std::vector>> m_cmdAllocators[RPS_AFX_QUEUE_INDEX_COUNT]; + std::vector> m_cmdLists[RPS_AFX_QUEUE_INDEX_COUNT]; + std::mutex m_cmdListMutex; + DescriptorHeap m_descriptorHeaps[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; + + std::vector>> m_pendingReleaseResources; + + uint32_t m_frameCounter = 0; +}; + +#ifdef RPS_DX12_AGILITY_SDK_VER +extern "C" __declspec(dllexport) const UINT D3D12SDKVersion = RPS_DX12_AGILITY_SDK_VER; +extern "C" __declspec(dllexport) const char* D3D12SDKPath = u8".\\D3D12\\"; +#endif //ifdef RPS_DX12_AGILITY_SDK_VER diff --git a/tools/app_framework/afx_d3d_helper.h b/tools/app_framework/afx_d3d_helper.h new file mode 100644 index 0000000..063158a --- /dev/null +++ b/tools/app_framework/afx_d3d_helper.h @@ -0,0 +1,212 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#pragma once + +#include +#include +#include + +#include +#include +#include + +// Note that while ComPtr is used to manage the lifetime of resources on the CPU, +// it has no understanding of the lifetime of resources on the GPU. Apps must account +// for the GPU lifetime of resources to avoid destroying objects that may still be +// referenced by the GPU. +using Microsoft::WRL::ComPtr; + +inline std::string HrToString(HRESULT hr) +{ + char s_str[64] = {}; + sprintf_s(s_str, "HRESULT of 0x%08X", static_cast(hr)); + return std::string(s_str); +} + +class HrException : public std::runtime_error +{ +public: + HrException(HRESULT hr) : std::runtime_error(HrToString(hr)), m_hr(hr) {} + HRESULT Error() const { return m_hr; } +private: + const HRESULT m_hr; +}; + +#define SAFE_RELEASE(p) if (p) (p)->Release() + +inline void ThrowIfFailed(HRESULT hr) +{ + if (FAILED(hr)) + { + throw HrException(hr); + } +} + +template +inline void ThrowIfFailedEx(HRESULT hr, TBlob& errorBlob) +{ + if (errorBlob) + { + ::OutputDebugStringA((const char*)errorBlob->GetBufferPointer()); + } + + if (FAILED(hr)) + { + throw HrException(hr); + } +} + + +inline void GetAssetsPath(_Out_writes_(pathSize) WCHAR* path, UINT pathSize) +{ + if (path == nullptr) + { + throw std::exception(); + } + + DWORD size = GetModuleFileNameW(nullptr, path, pathSize); + if (size == 0 || size == pathSize) + { + // Method failed or path was truncated. + throw std::exception(); + } + + WCHAR* lastSlash = wcsrchr(path, L'\\'); + if (lastSlash) + { + *(lastSlash + 1) = L'\0'; + } +} + +// Assign a name to the object to aid with debugging. +#if defined(_DEBUG) || defined(DBG) +template +inline void SetName(TObject* pObject, LPCWSTR name) +{ + pObject->SetName(name); +} +template +inline void SetNameIndexed(TObject* pObject, LPCWSTR name, UINT index) +{ + WCHAR fullName[50]; + if (swprintf_s(fullName, L"%s[%u]", name, index) > 0) + { + pObject->SetName(fullName); + } +} +#else +template +inline void SetName(TObject*, LPCWSTR) +{ +} +template +inline void SetNameIndexed(TObject*, LPCWSTR, UINT) +{ +} +#endif + +// Naming helper for ComPtr. +// Assigns the name of the variable as the name of the object. +// The indexed variant will include the index in the name of the object. +#define NAME_D3D12_OBJECT(x) SetName((x).Get(), L#x) +#define NAME_D3D12_OBJECT_INDEXED(x, n) SetNameIndexed((x)[n].Get(), L#x, n) + +#ifdef D3D_COMPILE_STANDARD_FILE_INCLUDE +inline ComPtr CompileShader( + const std::wstring& filename, + const D3D_SHADER_MACRO* defines, + const std::string& entrypoint, + const std::string& target) +{ + UINT compileFlags = 0; +#if defined(_DEBUG) || defined(DBG) + compileFlags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; +#endif + + HRESULT hr; + + ComPtr byteCode = nullptr; + ComPtr errors; + hr = D3DCompileFromFile(filename.c_str(), defines, D3D_COMPILE_STANDARD_FILE_INCLUDE, + entrypoint.c_str(), target.c_str(), compileFlags, 0, &byteCode, &errors); + + if (errors != nullptr) + { + OutputDebugStringA((char*)errors->GetBufferPointer()); + } + ThrowIfFailed(hr); + + return byteCode; +} +#endif + +// Resets all elements in a ComPtr array. +template +void ResetComPtrArray(T* comPtrArray) +{ + for (auto &i : *comPtrArray) + { + i.Reset(); + } +} + + +// Resets all elements in a unique_ptr array. +template +void ResetUniquePtrArray(T* uniquePtrArray) +{ + for (auto &i : *uniquePtrArray) + { + i.reset(); + } +} + +template +void FindAdapter(IDXGIFactory4* pFactory, TFunc testDeviceCapability, IDXGIAdapter1** ppAdapter, bool& useWarpDevice) +{ + ComPtr selectedAdapter = nullptr; + + if (!useWarpDevice) + { + for (UINT adapterIndex = 0; S_OK == pFactory->EnumAdapters1(adapterIndex, &selectedAdapter); ++adapterIndex) + { + DXGI_ADAPTER_DESC1 desc; + ThrowIfFailed(selectedAdapter->GetDesc1(&desc)); + + static const wchar_t* const warpAdapterNames[] = {L"Microsoft Basic Render Driver", + L"Microsoft Basic Display Adapter"}; + + if (std::cend(warpAdapterNames) != + std::find_if( + std::cbegin(warpAdapterNames), std::cend(warpAdapterNames), [&](const wchar_t* warpAdapterName) { + return wcscmp(desc.Description, warpAdapterName) == 0; + })) + { + // Skip warp + continue; + } + + // Check to see if the adapter supports desired feature level, but don't create + // the actual device yet. + if (testDeviceCapability(selectedAdapter.Get())) + { + break; + } + } + } + + if ((selectedAdapter == nullptr) || useWarpDevice) + { + ThrowIfFailed(pFactory->EnumWarpAdapter(IID_PPV_ARGS(ppAdapter))); + useWarpDevice = true; + } + else + { + *ppAdapter = selectedAdapter.Detach(); + } +} diff --git a/tools/app_framework/afx_renderer.h b/tools/app_framework/afx_renderer.h new file mode 100644 index 0000000..6f27788 --- /dev/null +++ b/tools/app_framework/afx_renderer.h @@ -0,0 +1,89 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#pragma once + +#include + +#include "afx_cmd_parser.h" + +#include "rps/rps.h" + +enum +{ + RPS_AFX_DEBUG_MSG_SEVERITY_NONE = 0, + RPS_AFX_DEBUG_MSG_SEVERITY_CORRUPTION = 1 << 0, + RPS_AFX_DEBUG_MSG_SEVERITY_ERROR = 1 << 1, + RPS_AFX_DEBUG_MSG_SEVERITY_WARNING = 1 << 2, + RPS_AFX_DEBUG_MSG_SEVERITY_INFO = 1 << 3, +}; + +constexpr bool DEBUG_DEVICE_DEFAULT = +#ifdef _DEBUG + true; +#else + false; +#endif + +static rps::CmdArg g_DebugDevice{"debug-device", DEBUG_DEVICE_DEFAULT, {"sdk-layer"}, false}; +static rps::CmdArg g_DebugDeviceBreakLevel{"debug-device-break", RPS_AFX_DEBUG_MSG_SEVERITY_NONE}; +static rps::CmdArg g_VSync{"vsync", false, {"vsync"}}; +static rps::CmdArg g_DebugMarkers{"debug-markers", true, {"markers"}}; +static rps::CmdArg g_exitAfterFrame{"exit-after-frame", 300}; + +enum RpsAfxQueueIndices +{ + RPS_AFX_QUEUE_INDEX_GFX, + RPS_AFX_QUEUE_INDEX_COMPUTE, + RPS_AFX_QUEUE_INDEX_COPY, + RPS_AFX_QUEUE_INDEX_COUNT, +}; + +class RpsAfxRendererBase +{ +private: + RpsAfxRendererBase(const RpsAfxRendererBase&) = delete; + RpsAfxRendererBase(RpsAfxRendererBase&&) = delete; + +public: + RpsAfxRendererBase() = default; + virtual ~RpsAfxRendererBase() + { + } + virtual bool Init(void* window) + { + return true; + } + virtual void Tick() + { + } + virtual void CleanUp() + { + } + virtual void OnResize(uint32_t width, uint32_t height) + { + } + virtual void OnKeyUp(char key) + { + } + virtual void OnKeyDown(char key) + { + } + virtual LRESULT WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam, bool& bHandled) + { + bHandled = false; + return 0; + } + virtual RpsResult CreateRpsRuntimeDevice(const RpsDeviceCreateInfo& createInfo, RpsDevice& device) + { + return RPS_ERROR_NOT_IMPLEMENTED; + } + virtual bool WaitForGpuIdle() + { + return false; + } +}; \ No newline at end of file diff --git a/tools/app_framework/afx_shader_compiler.h b/tools/app_framework/afx_shader_compiler.h new file mode 100644 index 0000000..a533c03 --- /dev/null +++ b/tools/app_framework/afx_shader_compiler.h @@ -0,0 +1,184 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#pragma once + +#include +#include +#include +#include + +#include "afx_d3d_helper.h" + +static DxcCreateInstanceProc s_DxcCreateInstance = nullptr; + +static void InitDXC() +{ + if (s_DxcCreateInstance != nullptr) + { + return; + } + + HMODULE hDxcDll = ::LoadLibrary(TEXT("dxcompiler.dll")); + if (!hDxcDll) + { + throw "Failed to load dxcompiler.dll"; + } + + s_DxcCreateInstance = (DxcCreateInstanceProc)::GetProcAddress(hDxcDll, "DxcCreateInstance"); +} + +interface IncluderDxc : public IDxcIncludeHandler +{ + IDxcLibrary* m_pLibrary; + +public: + IncluderDxc(IDxcLibrary * pLibrary) + : m_pLibrary(pLibrary) + { + } + HRESULT STDMETHODCALLTYPE QueryInterface(const IID&, void**) override + { + return S_OK; + } + ULONG STDMETHODCALLTYPE AddRef() override + { + return 0; + } + ULONG STDMETHODCALLTYPE Release() override + { + return 0; + } + HRESULT STDMETHODCALLTYPE LoadSource(LPCWSTR pFilename, IDxcBlob** ppIncludeSource) override + { + IDxcBlobEncoding* pSource; + ThrowIfFailed(m_pLibrary->CreateBlobFromFile(pFilename, NULL, &pSource)); + + *ppIncludeSource = pSource; + + return S_OK; + } +}; + +#if RPS_HAS_MAYBE_UNUSED +[[maybe_unused]] +#endif +static bool DxcCompile(const char* pSrcCode, + const WCHAR* pEntryPoint, + const WCHAR* pProfile, + const WCHAR* pParams, + const DxcDefine* defines, + uint32_t defineCount, + std::vector& byteCode) +{ + InitDXC(); + + ComPtr pLibrary; + ThrowIfFailed(s_DxcCreateInstance(CLSID_DxcLibrary, IID_PPV_ARGS(&pLibrary))); + + ComPtr pSource; + ThrowIfFailed(pLibrary->CreateBlobWithEncodingFromPinned((LPBYTE)pSrcCode, (UINT32)strlen(pSrcCode), CP_UTF8, &pSource)); + + ComPtr pCompiler; + ThrowIfFailed(s_DxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&pCompiler))); + + IncluderDxc Includer(pLibrary.Get()); + + LPCWSTR pTargetProfile = L""; + + bool isSpirvTarget = false; + + std::vector args; + // splits params string into an array of strings + { + WCHAR params[1024]; + wcscpy_s(params, pParams); + + WCHAR* next_token; + WCHAR* token = wcstok_s(params, L" ", &next_token); + while (token != NULL) + { + args.push_back(token); + if (L"-spirv" == args.back()) + { + isSpirvTarget = true; + } + token = wcstok_s(NULL, L" ", &next_token); + } + } + + if (isSpirvTarget) + { + args.push_back(L"-T"); + args.push_back(pProfile); + } + else + { + pTargetProfile = pProfile; + } + + std::vector ppArgs(args.size()); + std::transform(args.begin(), args.end(), ppArgs.begin(), [](auto& s) { return s.c_str(); }); + + ComPtr pOpRes; + HRESULT res; + + if (true) + { + ppArgs.push_back(L"-Zi"); + ppArgs.push_back(L"-Qembed_debug"); + + ComPtr pPDB; + LPWSTR pDebugBlobName[1024]; + + res = pCompiler->CompileWithDebug(pSource.Get(), + NULL, + pEntryPoint, + pTargetProfile, + ppArgs.data(), + (UINT32)ppArgs.size(), + defines, + defineCount, + &Includer, + &pOpRes, + pDebugBlobName, + pPDB.GetAddressOf()); + } + else + { + res = pCompiler->Compile( + pSource.Get(), NULL, pEntryPoint, pTargetProfile, ppArgs.data(), (UINT32)ppArgs.size(), defines, defineCount, &Includer, &pOpRes); + } + + ComPtr pResult; + ComPtr pError; + if (pOpRes != NULL) + { + pOpRes->GetResult(&pResult); + pOpRes->GetErrorBuffer(&pError); + } + + if (pError) + { + ComPtr pErrorUtf8; + pLibrary->GetBlobAsUtf8(pError.Get(), &pErrorUtf8); + if (pErrorUtf8 && pErrorUtf8->GetBufferSize() > 0) + { + fprintf(stderr, "%s", (const char*)pErrorUtf8->GetBufferPointer()); + } + } + + if (pResult != NULL && pResult->GetBufferSize() > 0) + { + byteCode.resize(pResult->GetBufferSize()); + memcpy(byteCode.data(), pResult->GetBufferPointer(), pResult->GetBufferSize()); + + return true; + } + + return false; +} diff --git a/tools/app_framework/afx_threadpool.h b/tools/app_framework/afx_threadpool.h new file mode 100644 index 0000000..63e89c7 --- /dev/null +++ b/tools/app_framework/afx_threadpool.h @@ -0,0 +1,320 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#pragma once + +#include +#include +#include +#include +#include +#include + +class RpsAfxThreadPool +{ +public: + enum class JobStatus + { + Pending, + Executing, + Finished, + Free, + Unknown, + }; + + class WaitHandle final + { + RpsAfxThreadPool* m_pPool; + uint32_t m_jobId; + public: + WaitHandle() + : m_pPool(nullptr) + , m_jobId(UINT32_MAX) + { + } + + WaitHandle(RpsAfxThreadPool* pPool, uint32_t jobId) + : m_pPool(pPool) + , m_jobId(jobId) + { + } + + ~WaitHandle() + { + CleanUp(); + } + + WaitHandle(const WaitHandle&) = delete; + WaitHandle& operator=(const WaitHandle&) = delete; + + WaitHandle(WaitHandle&& r) + : m_pPool(nullptr) + { + if (&r != this) + { + Assign(r.m_pPool, r.m_jobId); + r.m_pPool = nullptr; + } + } + + WaitHandle& operator=(WaitHandle&& r) + { + if (&r != this) + { + CleanUp(); + Assign(r.m_pPool, r.m_jobId); + r.m_pPool = nullptr; + } + return *this; + } + + uint32_t GetJobId() const + { + return m_jobId; + } + + const RpsAfxThreadPool* GetPool() const + { + return m_pPool; + } + + uint32_t Detatch() + { + m_pPool = nullptr; + return m_jobId; + } + + operator bool() const + { + return m_pPool != nullptr; + } + + private: + void Assign(RpsAfxThreadPool* pPool, uint32_t jobId) + { + assert(m_pPool == nullptr); + + m_pPool = pPool; + m_jobId = jobId; + } + + void CleanUp() + { + if (m_pPool) + { + m_pPool->RemoveWaiter(m_jobId); + } + } + }; + +private: + struct Job + { + JobStatus status = JobStatus::Free; + bool hasWaiter = false; + std::function func; + }; + +public: + + ~RpsAfxThreadPool() + { + Destroy(); + } + + bool Init(uint32_t numThreads) + { + Destroy(); + + m_workerThreads.resize(numThreads); + + for (uint32_t i = 0; i < numThreads; i++) + { + m_workerThreads[i] = std::thread([this]() { WorkerThreadProc(); }); + } + + return true; + } + + void Destroy() + { + m_exiting = true; + m_jobAddedCV.notify_all(); + + for (auto& t : m_workerThreads) + { + if (t.joinable()) + t.join(); + } + m_exiting = false; + m_workerThreads.clear(); + } + + WaitHandle EnqueueJob(std::function func) + { + if (!m_exiting && !m_workerThreads.empty()) + { + std::unique_lock lock(m_mutex); + + uint32_t jobSlot = UINT32_MAX; + if (m_freeJobSlots.empty()) + { + jobSlot = (uint32_t)m_jobs.size(); + m_jobs.emplace_back(); + } + else + { + jobSlot = m_freeJobSlots.back(); + m_freeJobSlots.pop_back(); + } + + Job& job = m_jobs[jobSlot]; + + job.status = JobStatus::Pending; + job.hasWaiter = true; + job.func = func; + + m_jobQueue.push_back(jobSlot); + + if (m_activeThreads < m_workerThreads.size()) + { + m_jobAddedCV.notify_one(); + } + + return WaitHandle(this, jobSlot); + } + else + { + func(); + } + + return WaitHandle(); + } + + void WaitIdle() + { + std::unique_lock lock(m_mutex); + m_jobCompletedCV.wait(lock, [this]() { return (m_activeThreads == 0) && m_jobQueue.empty(); }); + } + + void WaitForJobs(WaitHandle* waitHandles, uint32_t numWaitHandles) + { + std::unique_lock lock(m_mutex); + + m_jobCompletedCV.wait(lock, [&]() { + for (uint32_t i = 0; i < numWaitHandles; i++) + { + if (waitHandles[i].GetPool() == this) + { + if (m_jobs[waitHandles[i].GetJobId()].status >= JobStatus::Finished) + { + uint32_t jobId = waitHandles[i].Detatch(); + RemoveWaiterNoLock(jobId); + } + else + { + break; + } + } + + if (i != numWaitHandles - 1) + { + std::swap(waitHandles[i], waitHandles[numWaitHandles - 1]); + i--; + } + numWaitHandles--; + } + return numWaitHandles == 0; + }); + } + + uint32_t GetNumThreads() const + { + return (uint32_t)m_workerThreads.size(); + } + +private: + + void RemoveWaiter(uint32_t jobId) + { + std::unique_lock lock(m_mutex); + + RemoveWaiterNoLock(jobId); + } + + void RemoveWaiterNoLock(uint32_t jobId) + { + assert(m_jobs[jobId].hasWaiter); + m_jobs[jobId].hasWaiter = false; + + if (m_jobs[jobId].status == JobStatus::Finished) + { + m_jobs[jobId].status = JobStatus::Free; + m_freeJobSlots.push_back(jobId); + } + } + + void WorkerThreadProc() + { + while (!m_exiting) + { + uint32_t jobIdx = UINT32_MAX; + + std::function func; + + { + std::unique_lock lock(m_mutex); + + m_jobAddedCV.wait(lock, [this] { return m_exiting || !m_jobQueue.empty(); }); + + if (m_exiting) + return; + + m_activeThreads++; + + jobIdx = m_jobQueue.front(); + m_jobQueue.pop_front(); + + assert(m_jobs[jobIdx].status == JobStatus::Pending); + + m_jobs[jobIdx].status = JobStatus::Executing; + func = m_jobs[jobIdx].func; + } + + func(); + + { + std::unique_lock lock(m_mutex); + m_activeThreads--; + + if (m_jobs[jobIdx].hasWaiter) + { + m_jobs[jobIdx].status = JobStatus::Finished; + } + else + { + m_jobs[jobIdx].func = {}; + m_jobs[jobIdx].status = JobStatus::Free; + m_freeJobSlots.push_back(jobIdx); + } + } + + m_jobCompletedCV.notify_all(); + } + } + +private: + bool m_exiting = false; + uint32_t m_activeThreads = 0; + std::vector m_workerThreads; + std::condition_variable m_jobAddedCV; + std::condition_variable m_jobCompletedCV; + std::mutex m_mutex; + std::deque m_jobQueue; + std::vector m_jobs; + std::vector m_freeJobSlots; +}; diff --git a/tools/app_framework/afx_vk_renderer.h b/tools/app_framework/afx_vk_renderer.h new file mode 100644 index 0000000..bb44399 --- /dev/null +++ b/tools/app_framework/afx_vk_renderer.h @@ -0,0 +1,2007 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#pragma once + +#ifdef _WIN32 +#define VK_USE_PLATFORM_WIN32_KHR +#else +#error "TODO" +#endif + +#include +#include +#include + +#include "afx_renderer.h" +#include "afx_shader_compiler.h" + +#ifdef RPS_AFX_REQUIRE_IMGUI + +// TODO: ImGui stuff should be platform nonspecifc +#include "backends/imgui_impl_win32.h" +#include "backends/imgui_impl_vulkan.h" + +#endif + +#pragma comment(lib, "vulkan-1.lib") + +inline void _ThrowIfNotSuccessVK(VkResult result, const char* text, const char* file, int line) +{ + if (result != VK_SUCCESS) + { + fprintf_s(stderr, "\nVK app failed ( VkResult = 0x%x, `%s` @ `%s` line %d", result, text, file, line); + + assert(false); + throw result; + } +} + +inline void _ThrowIfFailedVK(VkResult result, const char* text, const char* file, int line) +{ + if (result < 0) + { + fprintf_s(stderr, "\nVK app failed ( VkResult = 0x%x, `%s` @ `%s` line %d", result, text, file, line); + + assert(false); + throw result; + } +} + +inline void _SkipIfNotSuccessVk(VkResult result, const char* text, const char* file, int line) +{ + if (result != VK_SUCCESS) + { + fprintf_s(stderr, "\nVK app skipped ( VkResult = 0x%x, `%s` @ `%s` line %d", result, text, file, line); + exit(0); + } +} + +#define ThrowIfNotSuccessVK(EXPR) _ThrowIfNotSuccessVK(EXPR, #EXPR, __FILE__, __LINE__) +#define ThrowIfFailedVK(EXPR) _ThrowIfFailedVK(EXPR, #EXPR, __FILE__, __LINE__) +#define SkipIfNotSuccessVk(EXPR) _SkipIfNotSuccessVk(EXPR, #EXPR, __FILE__, __LINE__) + +class RpsAfxVulkanRenderer : public RpsAfxRendererBase +{ +public: + struct DescriptorHeapSizeRequirement + { + uint32_t staticCount; + uint32_t dynamicCountPerFrame; + }; + + struct InitTempResources + { + std::vector buffers; + std::vector images; + std::vector memory; + }; + + virtual bool Init(void* hWindow) override final + { + m_hWnd = (HWND)hWindow; + + RECT clientRect = {}; + ::GetClientRect(m_hWnd, &clientRect); + m_width = clientRect.right - clientRect.left; + m_height = clientRect.bottom - clientRect.top; + + InitVkInstance(); + InitVkDevice(); + CreateSwapChain(); + OnPostResize(); + + ActiveCommandList cmdList = BeginCmdList(RPS_AFX_QUEUE_INDEX_GFX); + + // To keep alive until init cmdlist is executed. + InitTempResources tempResources; + OnInit(cmdList.cmdBuf, tempResources); + + EndCmdList(cmdList); + + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cmdList.cmdBuf; + + FlushUploadBuffer(); + + vkQueueSubmit(m_queues[RPS_AFX_QUEUE_INDEX_GFX], 1, &submitInfo, VK_NULL_HANDLE); + + RecycleCmdList(cmdList); + + WaitForGpuIdle(); + + for (auto& buf : tempResources.buffers) + { + vkDestroyBuffer(m_device, buf, nullptr); + } + for (auto& img : tempResources.images) + { + vkDestroyImage(m_device, img, nullptr); + } + for (auto& mem : tempResources.memory) + { + vkFreeMemory(m_device, mem, nullptr); + } + + OnPostInit(); + + return true; + } + + virtual void Tick() override final + { + OnUpdate(m_frameCounter); + + WaitForSwapChainBuffer(); + + ResetFrameDynamicDescriptorPools(); + + ResetCommandPools(); + + m_frameConstantUsage = 0; + + OnRender(m_frameCounter); + + VkPresentInfoKHR presentInfo = {}; + presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; + presentInfo.swapchainCount = 1; + presentInfo.pSwapchains = &m_swapChain; + presentInfo.pImageIndices = &m_backBufferIndex; + + if (m_pendingPresentSemaphore != VK_NULL_HANDLE) + { + presentInfo.pWaitSemaphores = &m_pendingPresentSemaphore; + m_pendingPresentSemaphore = VK_NULL_HANDLE; + } + + ThrowIfFailedVK(vkQueuePresentKHR(m_presentQueue, &presentInfo)); + m_frameCounter++; + } + + virtual void CleanUp() override final + { + WaitForGpuIdle(); + + OnCleanUp(); + + for (auto& img : m_swapChainImages) + { + vkDestroyImageView(m_device, img.imageView, nullptr); + } + m_swapChainImages.clear(); + vkDestroySwapchainKHR(m_device, m_swapChain, nullptr); + + for (auto& ff : m_frameFences) + { + vkDestroySemaphore(m_device, ff.imageAcquiredSemaphore, nullptr); + vkDestroySemaphore(m_device, ff.renderCompleteSemaphore, nullptr); + vkDestroyFence(m_device, ff.renderCompleteFence, nullptr); + } + m_frameFences.clear(); + + if (m_surface != VK_NULL_HANDLE) + { + vkDestroySurfaceKHR(m_vkInstance, m_surface, nullptr); + m_surface = VK_NULL_HANDLE; + } + + for (auto& memPools : m_staticAssetMemoryPools) + { + for (auto& mem : memPools.pools) + { + vkFreeMemory(m_device, mem, nullptr); + } + } + + for (uint32_t i = 0, numSemaphores = uint32_t(m_queueSemaphores.size()); i < numSemaphores; i++) + { + if (m_queueSemaphores[i] != VK_NULL_HANDLE) + { + vkDestroySemaphore(m_device, m_queueSemaphores[i], nullptr); + } + } + m_queueSemaphores.clear(); + + for (uint32_t i = 0; i < _countof(m_cmdPools); i++) + { + for (auto& pools : m_cmdPools[i]) + { + for (auto& pool : pools) + { + if (!pool.cmdBuffers.empty()) + { + vkFreeCommandBuffers( + m_device, pool.cmdPool, uint32_t(pool.cmdBuffers.size()), pool.cmdBuffers.data()); + } + vkDestroyCommandPool(m_device, pool.cmdPool, nullptr); + } + } + m_cmdPools[i].clear(); + } + + if (m_descriptorPool != VK_NULL_HANDLE) + { + vkDestroyDescriptorPool(m_device, m_descriptorPool, nullptr); + m_descriptorPool = VK_NULL_HANDLE; + } + + if (m_constantBuffer != VK_NULL_HANDLE) + { + vkDestroyBuffer(m_device, m_constantBuffer, nullptr); + m_constantBuffer = VK_NULL_HANDLE; + } + + if (m_constantBufferMemory != VK_NULL_HANDLE) + { + vkFreeMemory(m_device, m_constantBufferMemory, nullptr); + m_constantBufferMemory = VK_NULL_HANDLE; + } + + for (auto& dp : m_frameDynamicDescriptorPools) + { + for (auto& p : dp.pools) + { + vkDestroyDescriptorPool(m_device, p, nullptr); + } + } + m_frameDynamicDescriptorPools.clear(); + + vkDestroyDevice(m_device, nullptr); + + if (m_vkDebugMsger != VK_NULL_HANDLE) + { + auto pfn_destroyDebugUtils = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr( + m_vkInstance, "vkDestroyDebugUtilsMessengerEXT"); + if (pfn_destroyDebugUtils != nullptr) + { + pfn_destroyDebugUtils(m_vkInstance, m_vkDebugMsger, nullptr); + } + } + + vkDestroyInstance(m_vkInstance, nullptr); + } + + virtual void OnResize(uint32_t width, uint32_t height) override final + { + if (m_swapChain != VK_NULL_HANDLE) + { + WaitForGpuIdle(); + + if ((width > 0 && height > 0) && ((m_width != width) || (m_height != height))) + { + OnPreResize(); + + m_width = width; + m_height = height; + + CreateSwapChain(); + + OnPostResize(); + } + } + } + +protected: + virtual bool WaitForGpuIdle() override final + { + return VK_SUCCESS == vkDeviceWaitIdle(m_device); + } + + virtual void OnPostInit() + { + } + + void WaitForSwapChainBuffer() + { + m_swapChainImageSemaphoreIndex = m_backBufferIndex; + + ThrowIfNotSuccessVK(vkAcquireNextImageKHR(m_device, + m_swapChain, + UINT64_MAX, + m_frameFences[m_backBufferIndex].imageAcquiredSemaphore, + VK_NULL_HANDLE, + &m_backBufferIndex)); + + if ((m_frameCounter % m_swapChainImages.size()) != m_backBufferIndex) + m_frameCounter = m_backBufferIndex; + + ThrowIfNotSuccessVK( + vkWaitForFences(m_device, 1, &m_frameFences[m_backBufferIndex].renderCompleteFence, VK_TRUE, UINT64_MAX)); + + ThrowIfNotSuccessVK(vkResetFences(m_device, 1, &m_frameFences[m_backBufferIndex].renderCompleteFence)); + } + + VkImage GetBackBuffer() const + { + return m_swapChainImages[m_backBufferIndex].image; + } + + VkImageView GetBackBufferView() const + { + return m_swapChainImages[m_backBufferIndex].imageView; + } + + const std::vector& GetBackBuffers(RpsResourceDesc& backBufferDesc) const + { + backBufferDesc.type = RPS_RESOURCE_TYPE_IMAGE_2D; + backBufferDesc.temporalLayers = uint32_t(m_swapChainImages.size()); + backBufferDesc.flags = 0; + backBufferDesc.image.arrayLayers = 1; + backBufferDesc.image.mipLevels = 1; + backBufferDesc.image.format = rpsFormatFromVK(m_swapChainFormat.format); + backBufferDesc.image.width = m_width; + backBufferDesc.image.height = m_height; + backBufferDesc.image.sampleCount = 1; + + return m_swapChainImageRpsResources; + } + + uint32_t GetBackBuffers(RpsResourceDesc& backBufferDesc, + RpsRuntimeResource* phResources, + uint32_t maxResources) const + { + auto runtimeResources = GetBackBuffers(backBufferDesc); + + const uint32_t numResToCopy = std::min(uint32_t(runtimeResources.size()), maxResources); + + std::copy(runtimeResources.begin(), runtimeResources.begin() + numResToCopy, phResources); + + return numResToCopy; + } + +public: + struct ActiveCommandList + { + uint32_t backBufferIndex; + uint32_t queueIndex; + uint32_t poolIndex; + VkCommandBuffer cmdBuf; + VkCommandPool cmdPool; + + operator VkCommandBuffer() const + { + return cmdBuf; + } + }; + +protected: + virtual void OnInit(VkCommandBuffer initCmdList, InitTempResources& tempResources) + { + } + + virtual void OnCleanUp() + { + } + + virtual void OnPreResize() + { + } + + virtual void OnPostResize() + { + m_frameCounter = 0; + } + + virtual void OnUpdate(uint32_t frameIndex) + { + } + + virtual void OnRender(uint32_t frameIndex) + { + ActiveCommandList cmdList = BeginCmdList(RPS_AFX_QUEUE_INDEX_GFX); + + VkImageMemoryBarrier imageBarrier = {}; + imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageBarrier.srcAccessMask = 0; + imageBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + imageBarrier.image = GetBackBuffer(); + imageBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imageBarrier.subresourceRange.layerCount = 1; + imageBarrier.subresourceRange.levelCount = 1; + + vkCmdPipelineBarrier(cmdList, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, + 0, + nullptr, + 0, + nullptr, + 1, + &imageBarrier); + + VkClearColorValue clearColor = {{0.0f, 0.2f, 0.4f, 1.0f}}; + vkCmdClearColorImage(cmdList, + GetBackBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + &clearColor, + 1, + &imageBarrier.subresourceRange); + + imageBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + imageBarrier.dstAccessMask = 0; + imageBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + imageBarrier.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + + vkCmdPipelineBarrier(cmdList, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + 0, + 0, + nullptr, + 0, + nullptr, + 1, + &imageBarrier); + + EndCmdList(cmdList); + + SubmitCmdLists(&cmdList, 1, true); + + RecycleCmdList(cmdList); + } + + virtual RpsResult CreateRpsRuntimeDevice(const RpsDeviceCreateInfo& createInfo, RpsDevice& device) override + { + RpsVKRuntimeDeviceCreateInfo vkRuntimeDeviceCreateInfo = {}; + vkRuntimeDeviceCreateInfo.pDeviceCreateInfo = &createInfo; + vkRuntimeDeviceCreateInfo.hVkDevice = m_device; + vkRuntimeDeviceCreateInfo.hVkPhysicalDevice = m_physicalDevice; + + RpsRuntimeDeviceCreateInfo runtimeDeviceCreateInfo = {}; + runtimeDeviceCreateInfo.pUserContext = this; + runtimeDeviceCreateInfo.callbacks.pfnRecordDebugMarker = &RecordDebugMarker; + runtimeDeviceCreateInfo.callbacks.pfnSetDebugName = &SetDebugName; + + vkRuntimeDeviceCreateInfo.pRuntimeCreateInfo = &runtimeDeviceCreateInfo; + + return rpsVKRuntimeDeviceCreate(&vkRuntimeDeviceCreateInfo, &device); + } + + RpsResult ExecuteRenderGraph(uint32_t frameIndex, + RpsRenderGraph hRenderGraph, + bool bWaitSwapChain = true, + bool frameEnd = true) + { + RpsRenderGraphBatchLayout batchLayout = {}; + RpsResult result = rpsRenderGraphGetBatchLayout(hRenderGraph, &batchLayout); + if (RPS_FAILED(result)) + return result; + + ReserveSemaphores(batchLayout.numFenceSignals); + + for (uint32_t iBatch = 0; iBatch < batchLayout.numCmdBatches; iBatch++) + { + auto& batch = batchLayout.pCmdBatches[iBatch]; + + ActiveCommandList cmdList = BeginCmdList(RpsAfxQueueIndices(batch.queueIndex)); + + RpsRenderGraphRecordCommandInfo recordInfo = {}; + + recordInfo.hCmdBuffer = rpsVKCommandBufferToHandle(cmdList.cmdBuf); + recordInfo.pUserContext = this; + recordInfo.frameIndex = frameIndex; + recordInfo.cmdBeginIndex = batch.cmdBegin; + recordInfo.numCmds = batch.numCmds; + + if (g_DebugMarkers) + { + recordInfo.flags = RPS_RECORD_COMMAND_FLAG_ENABLE_COMMAND_DEBUG_MARKERS; + } + + result = rpsRenderGraphRecordCommands(hRenderGraph, &recordInfo); + if (RPS_FAILED(result)) + return result; + + EndCmdList(cmdList); + + SubmitCmdLists(&cmdList, + 1, + frameEnd && ((iBatch + 1) == batchLayout.numCmdBatches), + batch.numWaitFences, + batchLayout.pWaitFenceIndices + batch.waitFencesBegin, + batch.signalFenceIndex, + bWaitSwapChain && (iBatch == 0)); // TODO - RPS to mark first access to swapchain image + + RecycleCmdList(cmdList); + } + + return RPS_OK; + } + +#ifdef RPS_AFX_REQUIRE_IMGUI + + void StartImGuiDraw() + { + ImGui_ImplWin32_NewFrame(); + ImGui_ImplVulkan_NewFrame(); + ImGui::NewFrame(); + } + + RpsResult FinishImGuiDraw(RpsRenderGraph hRenderGraph) + { + ImGui::Render(); + + RpsRenderGraphBatchLayout batchLayout = {}; + RpsResult result = rpsRenderGraphGetBatchLayout(hRenderGraph, &batchLayout); + REQUIRE_RPS_OK(result); + + ActiveCommandList cmdList = BeginCmdList(RPS_AFX_QUEUE_INDEX_GFX); + + VkImageMemoryBarrier graphToGuiBarrier = {}; + graphToGuiBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + //TODO Better solution for srcAccessMask + graphToGuiBarrier.srcAccessMask = 0; + graphToGuiBarrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + graphToGuiBarrier.oldLayout = + m_frameCounter < m_swapChainImages.size() ? VK_IMAGE_LAYOUT_UNDEFINED : VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + graphToGuiBarrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + graphToGuiBarrier.image = GetBackBuffer(); + graphToGuiBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + graphToGuiBarrier.subresourceRange.layerCount = 1; + graphToGuiBarrier.subresourceRange.levelCount = 1; + + vkCmdPipelineBarrier(cmdList, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + {}, + 0, + nullptr, + 0, + nullptr, + 1, + &graphToGuiBarrier); + + VkClearValue clearColor = {}; + + VkRenderPassBeginInfo rpBegin = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO}; + rpBegin.framebuffer = m_imguiFrameBuffers[m_backBufferIndex]; + rpBegin.clearValueCount = 1; + rpBegin.pClearValues = &clearColor; + rpBegin.renderPass = m_imguiRenderPass; + rpBegin.renderArea = VkRect2D{{0, 0}, { m_width, m_height }}; + + vkCmdBeginRenderPass(cmdList, &rpBegin, VK_SUBPASS_CONTENTS_INLINE); + + ImGui_ImplVulkan_RenderDrawData(ImGui::GetDrawData(), cmdList); + + vkCmdEndRenderPass(cmdList); + + EndCmdList(cmdList); + + SubmitCmdLists(&cmdList, 1, true); + + RecycleCmdList(cmdList); + + return RPS_OK; + } +#endif + + void SubmitCmdLists(ActiveCommandList* pCmdLists, + uint32_t numCmdLists, + bool frameEnd, + uint32_t waitSemaphoreCount = 0, + const uint32_t* pWaitSemaphoreIndices = nullptr, + uint32_t signalSemaphoreIndex = UINT32_MAX, + bool bWaitSwapChain = false) + + { + assert(numCmdLists > 0); + + FlushUploadBuffer(); + + VkCommandBuffer* pCmdBufs = &pCmdLists[0].cmdBuf; + if (numCmdLists > 1) + { + m_cmdBufsToSubmit.resize(numCmdLists); + for (uint32_t i = 0; i < m_cmdBufsToSubmit.size(); i++) + { + m_cmdBufsToSubmit[i] = pCmdLists[i].cmdBuf; + } + pCmdBufs = m_cmdBufsToSubmit.data(); + } + + VkPipelineStageFlags submitWaitStage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + + uint32_t numWaitSemaphores = 0; + VkSemaphore waitSemaphores[RPS_MAX_QUEUES + 1] = {}; + + if (bWaitSwapChain) + { + waitSemaphores[numWaitSemaphores] = m_frameFences[m_swapChainImageSemaphoreIndex].imageAcquiredSemaphore; + ++numWaitSemaphores; + } + + assert(waitSemaphoreCount <= RPS_MAX_QUEUES); + + for (uint32_t i = 0; (i < waitSemaphoreCount) && (i < RPS_MAX_QUEUES); i++) + { + assert(pWaitSemaphoreIndices[i] < m_queueSemaphores.size()); + waitSemaphores[numWaitSemaphores] = m_queueSemaphores[pWaitSemaphoreIndices[i]]; + ++numWaitSemaphores; + } + + VkFence submitFence = VK_NULL_HANDLE; + + uint32_t numSignalSemaphores = 0; + VkSemaphore signalSemaphores[2] = {}; + + if (frameEnd) + { + if (m_presentQueue != m_queues[pCmdLists->queueIndex]) + { + m_pendingPresentSemaphore = m_frameFences[m_backBufferIndex].renderCompleteSemaphore; + signalSemaphores[numSignalSemaphores] = m_pendingPresentSemaphore; + ++numSignalSemaphores; + } + + submitFence = m_frameFences[m_backBufferIndex].renderCompleteFence; + } + + if (signalSemaphoreIndex != UINT32_MAX) + { + signalSemaphores[numSignalSemaphores] = m_queueSemaphores[signalSemaphoreIndex]; + ++numSignalSemaphores; + } + + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = numCmdLists; + submitInfo.pCommandBuffers = pCmdBufs; + submitInfo.pWaitSemaphores = waitSemaphores; + submitInfo.waitSemaphoreCount = numWaitSemaphores; + submitInfo.pSignalSemaphores = signalSemaphores; + submitInfo.signalSemaphoreCount = numSignalSemaphores; + submitInfo.pWaitDstStageMask = &submitWaitStage; + + vkQueueSubmit(m_queues[pCmdLists->queueIndex], 1, &submitInfo, submitFence); + } + + ActiveCommandList BeginCmdList(RpsAfxQueueIndices queueIndex, + const VkCommandBufferInheritanceInfo* pInheritanceInfo = nullptr) + { + ActiveCommandList result = {}; + result.backBufferIndex = m_backBufferIndex; + result.queueIndex = queueIndex; + result.cmdPool = VK_NULL_HANDLE; + + std::lock_guard lock(m_cmdListMutex); + + if (m_cmdPools[queueIndex].size() <= m_swapChainImages.size()) + { + m_cmdPools[queueIndex].resize(m_swapChainImages.size()); + } + + uint32_t freeIdx = 0; + for (; freeIdx < m_cmdPools[queueIndex][m_backBufferIndex].size(); freeIdx++) + { + if (!m_cmdPools[queueIndex][m_backBufferIndex][freeIdx].inUse) + break; + } + + if (freeIdx == m_cmdPools[queueIndex][m_backBufferIndex].size()) + { + VkCommandPoolCreateInfo cmdPoolInfo = {}; + cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + cmdPoolInfo.queueFamilyIndex = m_rpsQueueIndexToVkQueueFamilyMap[queueIndex]; + + CommandPool newPool = {}; + ThrowIfNotSuccessVK(vkCreateCommandPool(m_device, &cmdPoolInfo, nullptr, &newPool.cmdPool)); + + m_cmdPools[queueIndex][m_backBufferIndex].emplace_back(newPool); + } + + CommandPool* pPool = &m_cmdPools[queueIndex][m_backBufferIndex][freeIdx]; + pPool->inUse = true; + result.poolIndex = freeIdx; + result.cmdPool = pPool->cmdPool; + + VkCommandBufferAllocateInfo allocInfo = {}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.commandBufferCount = 1; + allocInfo.commandPool = result.cmdPool; + allocInfo.level = + (pInheritanceInfo == nullptr) ? VK_COMMAND_BUFFER_LEVEL_PRIMARY : VK_COMMAND_BUFFER_LEVEL_SECONDARY; + + ThrowIfNotSuccessVK(vkAllocateCommandBuffers(m_device, &allocInfo, &result.cmdBuf)); + + pPool->cmdBuffers.push_back(result.cmdBuf); + + VkCommandBufferBeginInfo cmdBeginInfo = {}; + cmdBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cmdBeginInfo.pInheritanceInfo = pInheritanceInfo; + cmdBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + if (pInheritanceInfo) + cmdBeginInfo.flags |= VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; + + ThrowIfNotSuccessVK(vkBeginCommandBuffer(result.cmdBuf, &cmdBeginInfo)); + + return result; + } + + void EndCmdList(ActiveCommandList& cmdList) + { + assert(cmdList.cmdBuf != VK_NULL_HANDLE); + assert(cmdList.backBufferIndex == m_backBufferIndex); + + std::lock_guard lock(m_cmdListMutex); + + ThrowIfNotSuccessVK(vkEndCommandBuffer(cmdList.cmdBuf)); + + m_cmdPools[cmdList.queueIndex][m_backBufferIndex][cmdList.poolIndex].inUse = false; + cmdList.cmdPool = VK_NULL_HANDLE; + } + + void RecycleCmdList(ActiveCommandList& cmdList) + { + cmdList.cmdBuf = VK_NULL_HANDLE; + } + + VkResult AllocFrameDescriptorSet(VkDescriptorSetLayout* pLayouts, uint32_t numSets, VkDescriptorSet* pSets) + { + VkResult result = VK_SUCCESS; + + auto& poolInfo = m_frameDynamicDescriptorPools[m_backBufferIndex]; + + VkDescriptorSetAllocateInfo allocInfo = {}; + allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + allocInfo.descriptorSetCount = numSets; + allocInfo.pSetLayouts = pLayouts; + + for (uint32_t iTry = 0; iTry < 2; iTry++) + { + if (!poolInfo.pools.empty()) + { + allocInfo.descriptorPool = poolInfo.pools[poolInfo.current]; + result = vkAllocateDescriptorSets(m_device, &allocInfo, pSets); + if (result != VK_ERROR_OUT_OF_POOL_MEMORY) + return result; + } + + if (iTry == 0) + { + if ((poolInfo.current + 1) < poolInfo.pools.size()) + { + poolInfo.current++; + } + else + { + VkDescriptorPoolCreateInfo dpInfo = {}; + dpInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + dpInfo.maxSets = m_defaultFrameDynamicDescriptorPoolMaxSets; + dpInfo.pPoolSizes = m_defaultFrameDynamicDescriptorPoolSizes.data(); + dpInfo.poolSizeCount = (uint32_t)m_defaultFrameDynamicDescriptorPoolSizes.size(); + + VkDescriptorPool newPool; + ThrowIfFailedVK(vkCreateDescriptorPool(m_device, &dpInfo, nullptr, &newPool)); + poolInfo.pools.push_back(newPool); + } + } + } + + return result; + } + + void ResetFrameDynamicDescriptorPools() + { + m_frameDynamicDescriptorPools[m_backBufferIndex].current = 0; + for (auto& pool : m_frameDynamicDescriptorPools[m_backBufferIndex].pools) + { + ThrowIfFailedVK(vkResetDescriptorPool(m_device, pool, 0)); + } + } + + void ResetCommandPools() + { + for (uint32_t iQ = 0; iQ < RPS_AFX_QUEUE_INDEX_COUNT; iQ++) + { + if (m_backBufferIndex < m_cmdPools[iQ].size()) + { + for (auto& pool : m_cmdPools[iQ][m_backBufferIndex]) + { + if (!pool.cmdBuffers.empty()) + { + vkFreeCommandBuffers( + m_device, pool.cmdPool, uint32_t(pool.cmdBuffers.size()), pool.cmdBuffers.data()); + pool.cmdBuffers.clear(); + } + + ThrowIfFailedVK(vkResetCommandPool(m_device, pool.cmdPool, 0)); + } + } + } + } + + void AppendWriteDescriptorSet(VkWriteDescriptorSet* pOut, + VkDescriptorSet dstSet, + uint32_t binding, + uint32_t count, + VkDescriptorType type, + uint32_t dstArrayElement, + const VkDescriptorImageInfo* pImageInfos, + const VkDescriptorBufferInfo* pBufferInfos, + const VkBufferView* pTexelBufferViews) + { + pOut->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + pOut->dstSet = dstSet; + pOut->dstBinding = binding; + pOut->descriptorCount = count; + pOut->descriptorType = type; + pOut->dstArrayElement = dstArrayElement; + pOut->pImageInfo = pImageInfos; + pOut->pBufferInfo = pBufferInfos; + pOut->pTexelBufferView = pTexelBufferViews; + } + + void AppendWriteDescriptorSetBuffers(VkWriteDescriptorSet* pOut, + VkDescriptorSet dstSet, + uint32_t binding, + uint32_t count, + VkDescriptorType type, + const VkDescriptorBufferInfo* pBufferInfos) + { + AppendWriteDescriptorSet(pOut, dstSet, binding, count, type, 0, nullptr, pBufferInfos, nullptr); + } + + void AppendWriteDescriptorSetImages(VkWriteDescriptorSet* pOut, + VkDescriptorSet dstSet, + uint32_t binding, + uint32_t count, + VkDescriptorType type, + const VkDescriptorImageInfo* pImageInfos) + { + AppendWriteDescriptorSet(pOut, dstSet, binding, count, type, 0, pImageInfos, nullptr, nullptr); + } + + VkDescriptorBufferInfo AllocAndWriteFrameConstants(const void* pSrcData, uint32_t size) + { + VkDescriptorBufferInfo result = {}; + + uint32_t allocSize = (size + (uint32_t)m_physicalDeviceProperties.limits.minUniformBufferOffsetAlignment - 1) & + ~((uint32_t)m_physicalDeviceProperties.limits.minUniformBufferOffsetAlignment - 1); + + uint32_t newOffset = m_frameConstantUsage + allocSize; + if (newOffset > m_maxConstantSizePerFrame) + { + throw; + } + + const uint32_t totalOffset = m_maxConstantSizePerFrame * m_backBufferIndex + m_frameConstantUsage; + memcpy(m_constantBufferCpuVA + totalOffset, pSrcData, size); + + m_frameConstantUsage = newOffset; + result.buffer = m_constantBuffer; + result.offset = totalOffset; + result.range = size; + + return result; + } + + void FlushUploadBuffer() + { + if (m_constantBufferNeedsFlushAfterUpdate) + { + VkMappedMemoryRange currRange = {}; + currRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + currRange.memory = m_constantBufferMemory; + currRange.offset = m_maxConstantSizePerFrame * m_backBufferIndex; + currRange.size = (m_frameConstantUsage + m_physicalDeviceProperties.limits.nonCoherentAtomSize - 1) & + ~(m_physicalDeviceProperties.limits.nonCoherentAtomSize - 1); + + ThrowIfFailedVK(vkFlushMappedMemoryRanges(m_device, 1, &currRange)); + } + } + + VkBuffer CreateAndBindStaticBuffer(VkDeviceSize size, VkBufferUsageFlags usage) + { + VkBufferCreateInfo bufCI = {}; + bufCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bufCI.usage = usage; + bufCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + bufCI.queueFamilyIndexCount = 0; + bufCI.pQueueFamilyIndices = nullptr; + bufCI.size = size; + + VkBuffer buf; + ThrowIfFailedVK(vkCreateBuffer(m_device, &bufCI, nullptr, &buf)); + + AllocAndBindStaticMemory(buf); + + return buf; + } + + VkImage CreateAndBindStaticImage(VkImageType type, + VkImageUsageFlags usage, + VkFormat format, + uint32_t width, + uint32_t height, + uint32_t depth, + uint32_t mipLevels, + uint32_t arrayLayers) + { + VkImageCreateInfo imgCI = {}; + imgCI.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + imgCI.imageType = VK_IMAGE_TYPE_2D; + imgCI.format = format; + imgCI.mipLevels = mipLevels; + imgCI.arrayLayers = arrayLayers; + imgCI.samples = VK_SAMPLE_COUNT_1_BIT; + imgCI.tiling = VK_IMAGE_TILING_OPTIMAL; + imgCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + imgCI.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imgCI.extent = {width, height, depth}; + imgCI.usage = usage; + + VkImage img; + ThrowIfFailedVK(vkCreateImage(m_device, &imgCI, nullptr, &img)); + + AllocAndBindStaticMemory(img); + + return img; + } + + void AllocAndBindStaticMemory(VkImage image) + { + VkMemoryRequirements req = {}; + vkGetImageMemoryRequirements(m_device, image, &req); + + VkDeviceSize offset; + VkDeviceMemory mem = AllocStaticMemory(req, &offset); + + ThrowIfFailedVK(vkBindImageMemory(m_device, image, mem, offset)); + } + + void AllocAndBindStaticMemory(VkBuffer buffer) + { + VkMemoryRequirements req = {}; + vkGetBufferMemoryRequirements(m_device, buffer, &req); + + VkDeviceSize offset; + VkDeviceMemory mem = AllocStaticMemory(req, &offset); + + ThrowIfFailedVK(vkBindBufferMemory(m_device, buffer, mem, offset)); + } + + uint32_t FindMemoryTypeIndex(uint32_t bitMask, bool preferLocal, bool needCpuWrite, bool needCpuRead) + { + uint32_t typeIdx = UINT32_MAX; + for (uint32_t iType = 0; iType < m_deviceMemoryProperties.memoryTypeCount; iType++) + { + if (bitMask & (1 << iType)) + { + const auto memTypeFlags = m_deviceMemoryProperties.memoryTypes[iType].propertyFlags; + + // Require visible + if ((needCpuWrite || needCpuRead) && !(memTypeFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) + { + continue; + } + + typeIdx = iType; + + // Prefer local + if (preferLocal && (memTypeFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) + { + break; + } + + // Prefer cached + if (needCpuRead && (memTypeFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT)) + { + break; + } + } + } + return typeIdx; + } + + VkDeviceMemory AllocStaticMemory(const VkMemoryRequirements& req, VkDeviceSize* pOffset) + { + uint32_t typeIdx = FindMemoryTypeIndex(req.memoryTypeBits, true, false, false); + + auto& pool = m_staticAssetMemoryPools[typeIdx]; + + VkDeviceSize alignedOffset = ((pool.lastUsage + req.alignment - 1) & ~(req.alignment - 1)); + if ((alignedOffset + req.size) > pool.lastCapacity) + { + static const VkDeviceSize DEFAULT_POOL_SIZE = 64 * 1024 * 1024; + + VkMemoryAllocateInfo ai = {}; + ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + ai.memoryTypeIndex = typeIdx; + ai.allocationSize = std::max(req.size, DEFAULT_POOL_SIZE); + + VkDeviceMemory newMem; + ThrowIfFailedVK(vkAllocateMemory(m_device, &ai, nullptr, &newMem)); + + pool.pools.push_back(newMem); + pool.lastCapacity = ai.allocationSize; + pool.lastUsage = 0; + alignedOffset = 0; + } + + pool.lastUsage = (alignedOffset + req.size); + *pOffset = alignedOffset; + return pool.pools.back(); + } + + uint64_t CalcGuaranteedCompletedFrameIndexForRps() const + { + // For VK we wait for swapchain before submitting, so max queued frame count is swapChainImages + 1. + const uint32_t maxQueuedFrames = uint32_t(m_swapChainImages.size() + 1); + + return (m_frameCounter > maxQueuedFrames) ? m_frameCounter - maxQueuedFrames + : RPS_GPU_COMPLETED_FRAME_INDEX_NONE; + } + + void CreateStaticCheckerboardTexture(VkImageView& textureView, + VkImage& texture, + VkCommandBuffer initCmdBuf, + InitTempResources& tempResources, + uint32_t width, + uint32_t height, + const float tintColor[4]) + { + // Texture data contains 4 channels (RGBA) with unnormalized 8-bit values, this is the most commonly supported format + VkFormat format = VK_FORMAT_R8G8B8A8_UNORM; + uint32_t texturePixelSize = 4; + + const uint32_t rowPitch = width * texturePixelSize; + const uint32_t cellPitch = rowPitch >> 3; // The width of a cell in the checkboard texture. + const uint32_t cellHeight = width >> 3; // The height of a cell in the checkerboard texture. + const uint32_t textureSize = rowPitch * height; + + std::vector data(textureSize); + uint8_t* textureData = &data[0]; + +#define RPS_AFX_SCALE_BYTE(B, S) (std::max(0, std::min(0xff, (int32_t((B) * (S)))))) + + for (uint32_t n = 0; n < textureSize; n += texturePixelSize) + { + uint32_t x = n % rowPitch; + uint32_t y = n / rowPitch; + uint32_t i = x / cellPitch; + uint32_t j = y / cellHeight; + + if (i % 2 == j % 2) + { + textureData[n] = RPS_AFX_SCALE_BYTE(0xa0, tintColor[0]); // R + textureData[n + 1] = RPS_AFX_SCALE_BYTE(0xa0, tintColor[1]); // G + textureData[n + 2] = RPS_AFX_SCALE_BYTE(0xa0, tintColor[2]); // B + textureData[n + 3] = RPS_AFX_SCALE_BYTE(0xff, tintColor[3]); // A + } + else + { + textureData[n] = RPS_AFX_SCALE_BYTE(0xff, tintColor[0]); // R + textureData[n + 1] = RPS_AFX_SCALE_BYTE(0xff, tintColor[1]); // G + textureData[n + 2] = RPS_AFX_SCALE_BYTE(0xff, tintColor[2]); // B + textureData[n + 3] = RPS_AFX_SCALE_BYTE(0xff, tintColor[3]); // A + } + } + +#undef RPS_AFX_SCALE_BYTE + + { + auto textureDataUploadBuf = AllocAndWriteFrameConstants(textureData, textureSize); + + texture = CreateAndBindStaticImage(VK_IMAGE_TYPE_2D, + VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, + format, + width, + height, + 1, + 1, + 1); + + VkBufferImageCopy bufferCopyRegion = {}; + bufferCopyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + bufferCopyRegion.imageSubresource.mipLevel = 0; + bufferCopyRegion.imageSubresource.baseArrayLayer = 0; + bufferCopyRegion.imageSubresource.layerCount = 1; + bufferCopyRegion.imageExtent.width = width; + bufferCopyRegion.imageExtent.height = height; + bufferCopyRegion.imageExtent.depth = 1; + bufferCopyRegion.bufferOffset = textureDataUploadBuf.offset; + + VkImageSubresourceRange subresourceRange = {}; + + // Transition the texture image layout to transfer target, so we can safely copy our buffer data to it. + VkImageMemoryBarrier imageMemoryBarrier = {}; + imageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageMemoryBarrier.image = texture; + imageMemoryBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imageMemoryBarrier.subresourceRange.baseMipLevel = 0; + imageMemoryBarrier.subresourceRange.levelCount = 1; + imageMemoryBarrier.subresourceRange.layerCount = 1; + imageMemoryBarrier.srcAccessMask = 0; + imageMemoryBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + // Insert a memory dependency at the proper pipeline stages that will execute the image layout transition + // Source pipeline stage is host write/read exection (VK_PIPELINE_STAGE_HOST_BIT) + // Destination pipeline stage is copy command exection (VK_PIPELINE_STAGE_TRANSFER_BIT) + vkCmdPipelineBarrier(initCmdBuf, + VK_PIPELINE_STAGE_HOST_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, + 0, + nullptr, + 0, + nullptr, + 1, + &imageMemoryBarrier); + + // Copy mip levels from staging buffer + vkCmdCopyBufferToImage(initCmdBuf, + textureDataUploadBuf.buffer, + texture, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, + &bufferCopyRegion); + + // Once the data has been uploaded we transfer to the texture image to the shader read layout, so it can be sampled from + imageMemoryBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + vkCmdPipelineBarrier(initCmdBuf, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + 0, + 0, + nullptr, + 0, + nullptr, + 1, + &imageMemoryBarrier); + } + + // Create image view + // Textures are not directly accessed by the shaders and + // are abstracted by image views containing additional + // information and sub resource ranges + VkImageViewCreateInfo view = {}; + view.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view.viewType = VK_IMAGE_VIEW_TYPE_2D; + view.format = format; + view.components = { + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A}; + view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + view.subresourceRange.layerCount = 1; + view.subresourceRange.levelCount = 1; + view.image = texture; + ThrowIfFailedVK(vkCreateImageView(m_device, &view, nullptr, &textureView)); + } + + void ReserveSemaphores(uint32_t numSyncs) + { + const uint32_t oldSize = uint32_t(m_queueSemaphores.size()); + if (numSyncs > oldSize) + { + m_queueSemaphores.resize(numSyncs, VK_NULL_HANDLE); + } + + VkSemaphoreCreateInfo semaphoreCI = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO}; + + for (size_t i = oldSize; i < numSyncs; i++) + { + ThrowIfFailedVK(vkCreateSemaphore(m_device, &semaphoreCI, nullptr, &m_queueSemaphores[i])); + } + } + + VkSemaphore GetSemaphore(uint32_t index) const + { + return m_queueSemaphores[index]; + } + +private: + static VKAPI_ATTR VkBool32 VKAPI_CALL + ValidationDebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, + VkDebugUtilsMessageTypeFlagsEXT messageType, + const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, + void* pUserData) + { + static const struct + { + VkDebugUtilsMessageSeverityFlagBitsEXT severity; + const char* name; + } s_severityMap[] = { + {VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, "VERBOSE"}, + {VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT, "INFO"}, + {VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT, "WARNING"}, + {VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, "ERROR"}, + }; + + const char* severityName = ""; + for (uint32_t i = 0; i < _countof(s_severityMap); i++) + { + if (s_severityMap[i].severity & messageSeverity) + { + severityName = s_severityMap[i].name; + break; + } + } + + char buf[2048]; + sprintf_s(buf, "\n\n[VK %s]: %s", severityName, pCallbackData->pMessage); + fprintf_s(stderr, "%s", buf); + ::OutputDebugStringA(buf); + + // FAIL(buf); + + return VK_FALSE; + } + + static void RecordDebugMarker(void* pUserContext, const RpsRuntimeOpRecordDebugMarkerArgs* pArgs) + { + auto hCmdBuf = rpsVKCommandBufferFromHandle(pArgs->hCommandBuffer); + auto pThis = static_cast(pUserContext); + + VkDebugUtilsLabelEXT labelInfo = {VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT}; + + switch (pArgs->mode) + { + case RPS_RUNTIME_DEBUG_MARKER_BEGIN: + if (pThis->vkCmdBeginDebugUtilsLabel) + { + labelInfo.pLabelName = pArgs->text; + pThis->vkCmdBeginDebugUtilsLabel(hCmdBuf, &labelInfo); + } + break; + case RPS_RUNTIME_DEBUG_MARKER_END: + if (pThis->vkCmdEndDebugUtilsLabel) + { + pThis->vkCmdEndDebugUtilsLabel(hCmdBuf); + } + break; + case RPS_RUNTIME_DEBUG_MARKER_LABEL: + if (pThis->vkCmdInsertDebugUtilsLabel) + { + labelInfo.pLabelName = pArgs->text; + pThis->vkCmdInsertDebugUtilsLabel(hCmdBuf, &labelInfo); + } + break; + } + } + + static void SetDebugName(void* pUserContext, const RpsRuntimeOpSetDebugNameArgs* pArgs) + { + auto pThis = static_cast(pUserContext); + + if (pThis->vkSetDebugUtilsObjectName) + { + VkDebugUtilsObjectNameInfoEXT objNameInfo = {VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT}; + + objNameInfo.objectHandle = reinterpret_cast(static_cast(pArgs->hResource.ptr)); + objNameInfo.objectType = + (pArgs->resourceType == RPS_RESOURCE_TYPE_BUFFER) ? VK_OBJECT_TYPE_BUFFER : VK_OBJECT_TYPE_IMAGE; + objNameInfo.pObjectName = pArgs->name; + + pThis->vkSetDebugUtilsObjectName(pThis->m_device, &objNameInfo); + } + } + + void InitVkInstance() + { + // Query instance layers. + uint32_t instanceLayerPropertyCount = 0; + SkipIfNotSuccessVk(vkEnumerateInstanceLayerProperties(&instanceLayerPropertyCount, nullptr)); + std::vector instanceLayerProperties(instanceLayerPropertyCount); + if (instanceLayerPropertyCount > 0) + { + ThrowIfNotSuccessVK( + vkEnumerateInstanceLayerProperties(&instanceLayerPropertyCount, instanceLayerProperties.data())); + } + + // Query instance extensions. + // + uint32_t instanceExtensionPropertyCount = 0; + + SkipIfNotSuccessVk(vkEnumerateInstanceExtensionProperties(nullptr, &instanceExtensionPropertyCount, nullptr)); + + std::vector instanceExtensionProperties(instanceExtensionPropertyCount); + if (instanceExtensionPropertyCount > 0) + { + ThrowIfNotSuccessVK(vkEnumerateInstanceExtensionProperties( + nullptr, &instanceExtensionPropertyCount, instanceExtensionProperties.data())); + } + + auto findLayer = [&](const char* name) { + return std::find_if(instanceLayerProperties.begin(), + instanceLayerProperties.end(), + [name](const VkLayerProperties& layerProps) { + return strcmp(layerProps.layerName, name) == 0; + }) != instanceLayerProperties.end(); + }; + auto findExt = [&](const char* name) { + return std::find_if(instanceExtensionProperties.begin(), + instanceExtensionProperties.end(), + [name](const VkExtensionProperties& extensionProps) { + return strcmp(extensionProps.extensionName, name) == 0; + }) != instanceExtensionProperties.end(); + }; + + std::vector layerNames; + layerNames.reserve(8); + std::vector instanceExtNames; + instanceExtNames.reserve(16); + + auto findAndAddExt = [&](const char* name) { + if (findExt(name)) + { + instanceExtNames.push_back(name); + } + }; + + findAndAddExt(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); + findAndAddExt(VK_KHR_SURFACE_EXTENSION_NAME); + + VkInstanceCreateInfo instanceInfo = {}; + instanceInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + +#if defined(_DEBUG) + + const char* validationLayerName = "VK_LAYER_KHRONOS_validation"; + + VkValidationFeaturesEXT validationFeatures = {}; + const VkValidationFeatureEnableEXT featuresRequested[] = { + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT}; + + if (findLayer(validationLayerName) && findExt(VK_EXT_DEBUG_REPORT_EXTENSION_NAME)) + { + layerNames.push_back(validationLayerName); + instanceExtNames.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME); + + if (m_bGpuValidation) + { + validationFeatures.sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT; + validationFeatures.enabledValidationFeatureCount = _countof(featuresRequested); + validationFeatures.pEnabledValidationFeatures = featuresRequested; + + validationFeatures.pNext = instanceInfo.pNext; + instanceInfo.pNext = &validationFeatures; + } + } +#endif + + findAndAddExt(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + + instanceInfo.enabledLayerCount = (uint32_t)layerNames.size(); + instanceInfo.ppEnabledLayerNames = layerNames.data(); + instanceInfo.enabledExtensionCount = (uint32_t)instanceExtNames.size(); + instanceInfo.ppEnabledExtensionNames = instanceExtNames.data(); + + VkApplicationInfo appInfo = {}; + appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + appInfo.apiVersion = VK_API_VERSION_1_2; + appInfo.pApplicationName = "RPS app"; + appInfo.pEngineName = "RPS_AFX"; + instanceInfo.pApplicationInfo = &appInfo; + + SkipIfNotSuccessVk(vkCreateInstance(&instanceInfo, nullptr, &m_vkInstance)); + + VkDebugUtilsMessengerCreateInfoEXT debugMsgerCI{}; + debugMsgerCI.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; + debugMsgerCI.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + if (!m_bValidationErrorOnly) + { + debugMsgerCI.messageSeverity |= + VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT; + } + + debugMsgerCI.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; + debugMsgerCI.pfnUserCallback = ValidationDebugCallback; + + auto pfn_createDebugUtils = + (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(m_vkInstance, "vkCreateDebugUtilsMessengerEXT"); + if (pfn_createDebugUtils != nullptr) + { + ThrowIfNotSuccessVK(pfn_createDebugUtils(m_vkInstance, &debugMsgerCI, nullptr, &m_vkDebugMsger)); + } + + uint32_t gpuCount = 0; + ThrowIfNotSuccessVK(vkEnumeratePhysicalDevices(m_vkInstance, &gpuCount, nullptr)); + + if (gpuCount > 0) + { + std::vector physicalDevices(gpuCount); + ThrowIfNotSuccessVK(vkEnumeratePhysicalDevices(m_vkInstance, &gpuCount, physicalDevices.data())); + m_physicalDevice = physicalDevices[0]; + } + } + + void InitVkDevice() + { + vkGetPhysicalDeviceMemoryProperties(m_physicalDevice, &m_deviceMemoryProperties); + m_staticAssetMemoryPools.resize(m_deviceMemoryProperties.memoryTypeCount, {}); + + // Queue family properties, used for setting up requested queues upon device creation + uint32_t queueFamilyCount; + vkGetPhysicalDeviceQueueFamilyProperties(m_physicalDevice, &queueFamilyCount, nullptr); + assert(queueFamilyCount > 0); + m_queueFamilyProperties.resize(queueFamilyCount); + vkGetPhysicalDeviceQueueFamilyProperties(m_physicalDevice, &queueFamilyCount, m_queueFamilyProperties.data()); + + vkGetPhysicalDeviceProperties(m_physicalDevice, &m_physicalDeviceProperties); + + uint32_t deviceExtCount; + ThrowIfFailedVK(vkEnumerateDeviceExtensionProperties(m_physicalDevice, nullptr, &deviceExtCount, nullptr)); + std::vector deviceExtProps(deviceExtCount); + ThrowIfFailedVK( + vkEnumerateDeviceExtensionProperties(m_physicalDevice, nullptr, &deviceExtCount, deviceExtProps.data())); + + std::vector deviceExts = {}; + + auto findExt = [&](const char* name) { + return std::find_if(deviceExtProps.begin(), + deviceExtProps.end(), + [name](const VkExtensionProperties& extensionProps) { + return strcmp(extensionProps.extensionName, name) == 0; + }) != deviceExtProps.end(); + }; + + auto findAndAddExt = [&](const char* name) { + if (findExt(name)) + { + deviceExts.push_back(name); + } + }; + + findAndAddExt(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + findAndAddExt(VK_EXT_DEBUG_MARKER_EXTENSION_NAME); +//#ifndef VK_API_VERSION_1_3 + findAndAddExt(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME); +//#endif + +#if defined(_WIN32) + // Crate a Win32 Surface + VkWin32SurfaceCreateInfoKHR createInfo = {}; + createInfo.sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR; + createInfo.pNext = NULL; + createInfo.hinstance = NULL; + createInfo.hwnd = m_hWnd; + ThrowIfFailedVK(vkCreateWin32SurfaceKHR(m_vkInstance, &createInfo, NULL, &m_surface)); +#else +#error platform not supported +#endif + + struct QueueFamilyIndexSelection + { + uint32_t first = UINT32_MAX; + uint32_t prefered = UINT32_MAX; + + uint32_t Get() const + { + return (prefered != UINT32_MAX) ? prefered : first; + } + }; + + QueueFamilyIndexSelection presentQueueSel; + QueueFamilyIndexSelection graphicsQueueSel; + QueueFamilyIndexSelection computeQueueSel; + QueueFamilyIndexSelection copyQueueSel; + + for (uint32_t i = 0; i < queueFamilyCount; ++i) + { + VkBool32 supportsPresent; + ThrowIfFailedVK(vkGetPhysicalDeviceSurfaceSupportKHR(m_physicalDevice, i, m_surface, &supportsPresent)); + + bool hasGfx = (m_queueFamilyProperties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0; + bool hasCompute = (m_queueFamilyProperties[i].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0; + bool hasCopy = (m_queueFamilyProperties[i].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0; + + if (supportsPresent) + { + if (presentQueueSel.first == UINT32_MAX) + presentQueueSel.first = i; + + if (hasGfx) + presentQueueSel.prefered = i; + } + + if (hasGfx) + { + if (graphicsQueueSel.first == UINT32_MAX) + graphicsQueueSel.first = i; + + if (supportsPresent) + graphicsQueueSel.prefered = i; + } + + if (hasCompute) + { + if (computeQueueSel.first == UINT32_MAX) + computeQueueSel.first = i; + + if (!hasGfx) + computeQueueSel.prefered = i; + } + + if (hasCopy) + { + if (copyQueueSel.first == UINT32_MAX) + copyQueueSel.first = i; + + if (!hasCompute) + copyQueueSel.prefered = i; + } + } + + m_rpsQueueIndexToVkQueueFamilyMap[RPS_AFX_QUEUE_INDEX_GFX] = graphicsQueueSel.Get(); + m_rpsQueueIndexToVkQueueFamilyMap[RPS_AFX_QUEUE_INDEX_COMPUTE] = computeQueueSel.Get(); + m_rpsQueueIndexToVkQueueFamilyMap[RPS_AFX_QUEUE_INDEX_COPY] = copyQueueSel.Get(); + + float queuePriorities[1] = {0.0}; + + VkDeviceQueueCreateInfo queueCI[RPS_AFX_QUEUE_INDEX_COUNT] = {}; + for (uint32_t queueIdx = RPS_AFX_QUEUE_INDEX_GFX; queueIdx < RPS_AFX_QUEUE_INDEX_COUNT; queueIdx++) + { + queueCI[queueIdx].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queueCI[queueIdx].pNext = NULL; + queueCI[queueIdx].queueCount = 1; + queueCI[queueIdx].pQueuePriorities = queuePriorities; + queueCI[queueIdx].queueFamilyIndex = m_rpsQueueIndexToVkQueueFamilyMap[queueIdx]; + } + + VkPhysicalDeviceFeatures physicalDeviceFeatures = {}; + physicalDeviceFeatures.fillModeNonSolid = VK_TRUE; + physicalDeviceFeatures.pipelineStatisticsQuery = VK_TRUE; + physicalDeviceFeatures.fragmentStoresAndAtomics = VK_TRUE; + physicalDeviceFeatures.vertexPipelineStoresAndAtomics = VK_TRUE; + physicalDeviceFeatures.shaderImageGatherExtended = VK_TRUE; + physicalDeviceFeatures.samplerAnisotropy = VK_TRUE; + physicalDeviceFeatures.geometryShader = VK_TRUE; + physicalDeviceFeatures.multiDrawIndirect = VK_TRUE; + physicalDeviceFeatures.imageCubeArray = VK_TRUE; + physicalDeviceFeatures.multiViewport = VK_TRUE; + physicalDeviceFeatures.sampleRateShading = VK_TRUE; + +#ifdef VK_API_VERSION_1_3 + VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_feature = {}; + dynamic_rendering_feature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES; +#else + VkPhysicalDeviceDynamicRenderingFeaturesKHR dynamic_rendering_feature = {}; + dynamic_rendering_feature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR; +#endif + dynamic_rendering_feature.dynamicRendering = VK_TRUE; + + VkPhysicalDeviceVulkan12Features vk12Features = {}; + vk12Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; + vk12Features.separateDepthStencilLayouts = VK_TRUE; + vk12Features.pNext = &dynamic_rendering_feature; + + VkDeviceCreateInfo device_info = {}; + device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + device_info.queueCreateInfoCount = sizeof(queueCI) / sizeof(queueCI[0]); + device_info.pQueueCreateInfos = queueCI; + device_info.enabledExtensionCount = (uint32_t)deviceExts.size(); + device_info.ppEnabledExtensionNames = deviceExts.empty() ? NULL : deviceExts.data(); + device_info.pEnabledFeatures = &physicalDeviceFeatures; + device_info.pNext = &vk12Features; + + ThrowIfFailedVK(vkCreateDevice(m_physicalDevice, &device_info, NULL, &m_device)); + + // get queues + + vkGetDeviceQueue(m_device, presentQueueSel.Get(), 0, &m_presentQueue); + + for (uint32_t queueIdx = RPS_AFX_QUEUE_INDEX_GFX; queueIdx < RPS_AFX_QUEUE_INDEX_COUNT; queueIdx++) + { + vkGetDeviceQueue(m_device, m_rpsQueueIndexToVkQueueFamilyMap[queueIdx], 0, &m_queues[queueIdx]); + } + + // Init the extensions (if they have been enabled successfuly) + vkCmdBeginDebugUtilsLabel = + (PFN_vkCmdBeginDebugUtilsLabelEXT)vkGetDeviceProcAddr(m_device, "vkCmdBeginDebugUtilsLabelEXT"); + vkCmdEndDebugUtilsLabel = + (PFN_vkCmdEndDebugUtilsLabelEXT)vkGetDeviceProcAddr(m_device, "vkCmdEndDebugUtilsLabelEXT"); + vkCmdInsertDebugUtilsLabel = + (PFN_vkCmdInsertDebugUtilsLabelEXT)vkGetDeviceProcAddr(m_device, "vkCmdInsertDebugUtilsLabelEXT"); + vkSetDebugUtilsObjectName = + (PFN_vkSetDebugUtilsObjectNameEXT)vkGetDeviceProcAddr(m_device, "vkSetDebugUtilsObjectNameEXT"); + + // Create the default descriptor pool (mostly for ImGUI) + VkDescriptorPoolCreateInfo dpInfo = {}; + dpInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + dpInfo.maxSets = m_defaultFrameDynamicDescriptorPoolMaxSets; + dpInfo.pPoolSizes = m_defaultFrameDynamicDescriptorPoolSizes.data(); + dpInfo.poolSizeCount = (uint32_t)m_defaultFrameDynamicDescriptorPoolSizes.size(); + + ThrowIfFailedVK(vkCreateDescriptorPool(m_device, &dpInfo, nullptr, &m_descriptorPool)); + } + + void CreateSwapChain() + { + uint32_t oldImageCount = (uint32_t)m_swapChainImages.size(); + + VkSwapchainKHR oldSwapchain = m_swapChain; + + // Get physical device surface properties and formats + VkSurfaceCapabilitiesKHR surfCaps; + ThrowIfFailedVK(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(m_physicalDevice, m_surface, &surfCaps)); + + // Get available present modes + uint32_t presentModeCount; + ThrowIfFailedVK( + vkGetPhysicalDeviceSurfacePresentModesKHR(m_physicalDevice, m_surface, &presentModeCount, NULL)); + assert(presentModeCount > 0); + + std::vector presentModes(presentModeCount); + ThrowIfFailedVK(vkGetPhysicalDeviceSurfacePresentModesKHR( + m_physicalDevice, m_surface, &presentModeCount, presentModes.data())); + + VkExtent2D swapchainExtent = {}; + // If width (and height) equals the special value 0xFFFFFFFF, the size of the surface will be set by the swapchain + if (surfCaps.currentExtent.width == (uint32_t)-1) + { + // If the surface size is undefined, the size is set to + // the size of the images requested. + swapchainExtent.width = m_width; + swapchainExtent.height = m_height; + } + else + { + // If the surface size is defined, the swap chain size must match + swapchainExtent = surfCaps.currentExtent; + m_width = surfCaps.currentExtent.width; + m_height = surfCaps.currentExtent.height; + } + + // Select a present mode for the swapchain + + // The VK_PRESENT_MODE_FIFO_KHR mode must always be present as per spec + // This mode waits for the vertical blank ("v-sync") + VkPresentModeKHR swapchainPresentMode = VK_PRESENT_MODE_FIFO_KHR; + + // If v-sync is not requested, try to find a mailbox mode + // It's the lowest latency non-tearing present mode available + if (!m_bVSync) + { + for (size_t i = 0; i < presentModeCount; i++) + { + if (presentModes[i] == VK_PRESENT_MODE_MAILBOX_KHR) + { + swapchainPresentMode = VK_PRESENT_MODE_MAILBOX_KHR; + break; + } + if ((swapchainPresentMode != VK_PRESENT_MODE_MAILBOX_KHR) && + (presentModes[i] == VK_PRESENT_MODE_IMMEDIATE_KHR)) + { + swapchainPresentMode = VK_PRESENT_MODE_IMMEDIATE_KHR; + } + } + } + + // Determine the number of images + uint32_t desiredNumberOfSwapchainImages = surfCaps.minImageCount + 1; + if ((surfCaps.maxImageCount > 0) && (desiredNumberOfSwapchainImages > surfCaps.maxImageCount)) + { + desiredNumberOfSwapchainImages = surfCaps.maxImageCount; + } + + // Find the transformation of the surface + VkSurfaceTransformFlagsKHR preTransform; + if (surfCaps.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR) + { + // We prefer a non-rotated transform + preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + } + else + { + preTransform = surfCaps.currentTransform; + } + + uint32_t surfaceFormatCount = 0; + ThrowIfFailedVK( + vkGetPhysicalDeviceSurfaceFormatsKHR(m_physicalDevice, m_surface, &surfaceFormatCount, nullptr)); + std::vector surfaceFormats(surfaceFormatCount); + ThrowIfFailedVK(vkGetPhysicalDeviceSurfaceFormatsKHR( + m_physicalDevice, m_surface, &surfaceFormatCount, surfaceFormats.data())); + m_swapChainFormat = surfaceFormats[0]; + + // Find a supported composite alpha format (not all devices support alpha opaque) + VkCompositeAlphaFlagBitsKHR compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + // Simply select the first composite alpha format available + std::vector compositeAlphaFlags = { + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR, + VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR, + VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR, + }; + for (auto& compositeAlphaFlag : compositeAlphaFlags) + { + if (surfCaps.supportedCompositeAlpha & compositeAlphaFlag) + { + compositeAlpha = compositeAlphaFlag; + break; + }; + } + + VkSwapchainCreateInfoKHR swapchainCI = {}; + swapchainCI.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; + swapchainCI.pNext = NULL; + swapchainCI.surface = m_surface; + swapchainCI.minImageCount = desiredNumberOfSwapchainImages; + swapchainCI.imageFormat = m_swapChainFormat.format; + swapchainCI.imageColorSpace = m_swapChainFormat.colorSpace; + swapchainCI.imageExtent = {swapchainExtent.width, swapchainExtent.height}; + swapchainCI.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + swapchainCI.preTransform = (VkSurfaceTransformFlagBitsKHR)preTransform; + swapchainCI.imageArrayLayers = 1; + swapchainCI.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; + swapchainCI.queueFamilyIndexCount = 0; + swapchainCI.pQueueFamilyIndices = NULL; + swapchainCI.presentMode = swapchainPresentMode; + swapchainCI.oldSwapchain = oldSwapchain; + // Setting clipped to VK_TRUE allows the implementation to discard rendering outside of the surface area + swapchainCI.clipped = VK_TRUE; + swapchainCI.compositeAlpha = compositeAlpha; + + // Enable transfer source on swap chain images if supported + if (surfCaps.supportedUsageFlags & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) + { + swapchainCI.imageUsage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + } + + // Enable transfer destination on swap chain images if supported + if (surfCaps.supportedUsageFlags & VK_IMAGE_USAGE_TRANSFER_DST_BIT) + { + swapchainCI.imageUsage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + } + + ThrowIfFailedVK(vkCreateSwapchainKHR(m_device, &swapchainCI, nullptr, &m_swapChain)); + + // If an existing swap chain is re-created, destroy the old swap chain + // This also cleans up all the presentable images + if (oldSwapchain != VK_NULL_HANDLE) + { + for (auto& swapChainImgs : m_swapChainImages) + { + vkDestroyImageView(m_device, swapChainImgs.imageView, nullptr); + } + vkDestroySwapchainKHR(m_device, oldSwapchain, nullptr); + } + + uint32_t numImages = 0; + ThrowIfNotSuccessVK(vkGetSwapchainImagesKHR(m_device, m_swapChain, &numImages, nullptr)); + std::vector backBuffers(numImages); + ThrowIfNotSuccessVK(vkGetSwapchainImagesKHR(m_device, m_swapChain, &numImages, backBuffers.data())); + + m_swapChainImages.resize(numImages); + + if (m_frameFences.size() < numImages) + { + uint32_t oldSize = (uint32_t)m_frameFences.size(); + m_frameFences.resize(numImages); + + VkSemaphoreCreateInfo semaphoreCI = {}; + semaphoreCI.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + VkFenceCreateInfo fenceCI = {}; + fenceCI.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fenceCI.flags = VK_FENCE_CREATE_SIGNALED_BIT; + + for (uint32_t i = oldSize; i < m_frameFences.size(); i++) + { + vkCreateSemaphore(m_device, &semaphoreCI, nullptr, &m_frameFences[i].imageAcquiredSemaphore); + vkCreateFence(m_device, &fenceCI, nullptr, &m_frameFences[i].renderCompleteFence); + } + } + + if (m_frameDynamicDescriptorPools.size() < numImages) + { + uint32_t oldSize = (uint32_t)m_frameDynamicDescriptorPools.size(); + m_frameDynamicDescriptorPools.resize(numImages); + } + + if (oldImageCount < numImages) + { + if (m_constantBuffer != VK_NULL_HANDLE) + { + vkDestroyBuffer(m_device, m_constantBuffer, nullptr); + vkFreeMemory(m_device, m_constantBufferMemory, nullptr); + } + + // Create shared dynamic constant buffer + VkBufferCreateInfo bufCI = {}; + bufCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bufCI.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + bufCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + bufCI.queueFamilyIndexCount = 0; + bufCI.pQueueFamilyIndices = nullptr; + bufCI.size = m_maxConstantSizePerFrame * numImages; + + ThrowIfFailedVK(vkCreateBuffer(m_device, &bufCI, nullptr, &m_constantBuffer)); + + VkMemoryRequirements req; + vkGetBufferMemoryRequirements(m_device, m_constantBuffer, &req); + + VkMemoryAllocateInfo ai = {}; + ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + ai.memoryTypeIndex = FindMemoryTypeIndex(req.memoryTypeBits, true, true, false); + ai.allocationSize = req.size; + + m_constantBufferNeedsFlushAfterUpdate = + !(m_deviceMemoryProperties.memoryTypes[ai.memoryTypeIndex].propertyFlags & + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + + ThrowIfFailedVK(vkAllocateMemory(m_device, &ai, nullptr, &m_constantBufferMemory)); + ThrowIfFailedVK(vkBindBufferMemory(m_device, m_constantBuffer, m_constantBufferMemory, 0)); + ThrowIfFailedVK( + vkMapMemory(m_device, m_constantBufferMemory, 0, req.size, 0, (void**)&m_constantBufferCpuVA)); + } + + VkImageViewCreateInfo imageViewInfo = {}; + imageViewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + imageViewInfo.pNext = NULL; + imageViewInfo.format = m_swapChainFormat.format; + imageViewInfo.components = { + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A}; + imageViewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imageViewInfo.subresourceRange.baseMipLevel = 0; + imageViewInfo.subresourceRange.levelCount = 1; + imageViewInfo.subresourceRange.baseArrayLayer = 0; + imageViewInfo.subresourceRange.layerCount = 1; + imageViewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D; + imageViewInfo.flags = 0; + + m_swapChainImageRpsResources.resize(m_swapChainImages.size()); + + for (uint32_t iBuf = 0; iBuf < numImages; iBuf++) + { + m_swapChainImages[iBuf].image = backBuffers[iBuf]; + imageViewInfo.image = backBuffers[iBuf]; + vkCreateImageView(m_device, &imageViewInfo, nullptr, &m_swapChainImages[iBuf].imageView); + + m_swapChainImageRpsResources[iBuf] = rpsVKImageToHandle(backBuffers[iBuf]); + } + + m_backBufferIndex = 0; + } + +protected: + bool DxcCompileToSpirv(const char* shaderCode, + const WCHAR* pShaderEntryPoint, + const WCHAR* pProfile, + std::wstring compilerParams, + const DxcDefine* pDefines, + uint32_t definesCount, + std::vector& outSpvData) + { + std::wstring scp = L"-spirv -fspv-target-env=vulkan1.1 " + compilerParams; + return DxcCompile(shaderCode, pShaderEntryPoint, pProfile, scp.c_str(), pDefines, definesCount, outSpvData); + } + +protected: + bool m_bGpuValidation = true; + bool m_bValidationErrorOnly = true; + bool m_bVSync = g_VSync; + HWND m_hWnd = NULL; + UINT m_width = 0; + UINT m_height = 0; + VkInstance m_vkInstance = VK_NULL_HANDLE; + VkPhysicalDevice m_physicalDevice = VK_NULL_HANDLE; + VkDevice m_device = VK_NULL_HANDLE; + + VkDebugUtilsMessengerEXT m_vkDebugMsger = VK_NULL_HANDLE; + + std::vector m_queueFamilyProperties = {}; + VkPhysicalDeviceMemoryProperties m_deviceMemoryProperties = {}; + VkPhysicalDeviceProperties m_physicalDeviceProperties = {}; + + struct SwapChainImages + { + VkImage image; + VkImageView imageView; + }; + + struct FrameFences + { + VkFence renderCompleteFence; + VkSemaphore renderCompleteSemaphore; + VkSemaphore imageAcquiredSemaphore; + }; + + std::vector m_swapChainImages; + std::vector m_swapChainImageRpsResources; + std::vector m_frameFences; + VkSurfaceKHR m_surface = VK_NULL_HANDLE; + VkSurfaceFormatKHR m_swapChainFormat = {}; + VkSwapchainKHR m_swapChain = VK_NULL_HANDLE; + uint32_t m_backBufferIndex = 0; + uint32_t m_swapChainImageSemaphoreIndex = 0; + + uint32_t m_frameCounter = 0; + + struct StaticMemoryPool + { + std::vector pools; + VkDeviceSize lastUsage; + VkDeviceSize lastCapacity; + }; + + std::vector m_staticAssetMemoryPools; + + struct CommandPool + { + bool inUse; + VkCommandPool cmdPool; + std::vector cmdBuffers; + }; + + uint32_t m_presentQueueFamilyIndex = {}; + VkQueue m_presentQueue = VK_NULL_HANDLE; + VkQueue m_queues[RPS_AFX_QUEUE_INDEX_COUNT] = {}; + std::vector m_queueSemaphores; + VkSemaphore m_pendingPresentSemaphore = VK_NULL_HANDLE; + uint32_t m_rpsQueueIndexToVkQueueFamilyMap[RPS_AFX_QUEUE_INDEX_COUNT]; + std::vector> m_cmdPools[RPS_AFX_QUEUE_INDEX_COUNT]; + std::mutex m_cmdListMutex; + VkDescriptorPool m_descriptorPool = VK_NULL_HANDLE; + std::vector m_cmdBufsToSubmit; + + VkBuffer m_constantBuffer = VK_NULL_HANDLE; + VkDeviceMemory m_constantBufferMemory = VK_NULL_HANDLE; + uint8_t* m_constantBufferCpuVA = nullptr; + bool m_constantBufferNeedsFlushAfterUpdate = false; + uint32_t m_frameConstantUsage = 0; + uint32_t m_maxConstantSizePerFrame = 16 * 1024 * 1024; + + struct FrameDynamicDescriptorPools + { + std::vector pools; + uint32_t current; + }; + + std::vector m_frameDynamicDescriptorPools; + //TODO Let this be automatically determined + uint32_t m_defaultFrameDynamicDescriptorPoolMaxSets = 1024; + std::vector m_defaultFrameDynamicDescriptorPoolSizes = { + {VK_DESCRIPTOR_TYPE_SAMPLER, 64}, + {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 512}, + {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 512}, + {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 256}, + {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 256}, + {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 256}, + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1024}, + {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 512}, + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 128}, + {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, 128}, + {VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 64}, +#if 0 // Not using these: + { VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT, 0 }, + { VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, 0 }, +#endif + }; + + VkRenderPass m_imguiRenderPass = VK_NULL_HANDLE; + std::vector m_imguiFrameBuffers; + + void DestroySwapChainFrameBuffers() + { + for (VkFramebuffer fb : m_imguiFrameBuffers) + { + vkDestroyFramebuffer(m_device, fb, nullptr); + } + m_imguiFrameBuffers = {}; + } + +protected: + PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabel; + PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabel; + PFN_vkCmdInsertDebugUtilsLabelEXT vkCmdInsertDebugUtilsLabel; + PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectName; +}; \ No newline at end of file diff --git a/tools/app_framework/afx_win32.h b/tools/app_framework/afx_win32.h new file mode 100644 index 0000000..70dc1c2 --- /dev/null +++ b/tools/app_framework/afx_win32.h @@ -0,0 +1,223 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#pragma once + +#define NOMINMAX +#define WIN32_LEAN_AND_MEAN +#include + +#include "afx_renderer.h" + +typedef struct RpsAfxRunWindowInfo +{ + LPCTSTR title; + LONG width; + LONG height; + UINT numFramesToRender; + RpsAfxRendererBase* pRenderer; +} RpsAfxRunWindowInfo; + +LRESULT CALLBACK RpsAfxWindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam); + +static inline int RpsAfxRunWindowApp(const RpsAfxRunWindowInfo* pRunInfo) +{ + HWND hWnd = NULL; + HMODULE hModule = NULL; + ::GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + (LPCTSTR)(&RpsAfxRunWindowApp), + &hModule); + + // Initialize the window class. + WNDCLASSEX windowClass = {0}; + windowClass.cbSize = sizeof(WNDCLASSEX); + windowClass.style = CS_HREDRAW | CS_VREDRAW; + windowClass.lpfnWndProc = RpsAfxWindowProc; + windowClass.hInstance = (HINSTANCE)hModule; + windowClass.hCursor = LoadCursor(NULL, IDC_ARROW); + windowClass.lpszClassName = TEXT("RPSAfxApp"); + RegisterClassEx(&windowClass); + + RECT windowRect = {0, 0, pRunInfo->width, pRunInfo->height}; + AdjustWindowRect(&windowRect, WS_OVERLAPPEDWINDOW, FALSE); + + // Create the window and store a handle to it. + hWnd = CreateWindow(windowClass.lpszClassName, + pRunInfo->title, + WS_OVERLAPPEDWINDOW, + CW_USEDEFAULT, + CW_USEDEFAULT, + windowRect.right - windowRect.left, + windowRect.bottom - windowRect.top, + nullptr, // We have no parent window. + nullptr, // We aren't using menus. + (HINSTANCE)hModule, + pRunInfo->pRenderer); + + if (!pRunInfo->pRenderer->Init(hWnd)) + { + return -1; + } + + ::ShowWindow(hWnd, SW_SHOW); + + UINT frameCounter = 0; + + // Main sample loop. + MSG msg = {}; + while (msg.message != WM_QUIT) + { + // Process any messages in the queue. + if (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) + { + TranslateMessage(&msg); + DispatchMessage(&msg); + } + else + { + pRunInfo->pRenderer->Tick(); + + frameCounter++; + if ((pRunInfo->numFramesToRender != 0) && (frameCounter >= pRunInfo->numFramesToRender)) + { + ::PostMessage(hWnd, WM_CLOSE, 0, 0); + } + } + } + + pRunInfo->pRenderer->CleanUp(); + + // Return this part of the WM_QUIT message to Windows. + return (INT)msg.wParam; +} + +// Main message handler for the sample. +LRESULT CALLBACK RpsAfxWindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) +{ + RpsAfxRendererBase* pRenderer = reinterpret_cast(GetWindowLongPtr(hWnd, GWLP_USERDATA)); + + if (pRenderer) + { + bool bHandled = false; + LRESULT result = pRenderer->WindowProc(hWnd, message, wParam, lParam, bHandled); + + if (bHandled) + { + return result; + } + } + + switch (message) + { + case WM_CREATE: + { + // Save the DXSample* passed in to CreateWindow. + LPCREATESTRUCT pCreateStruct = reinterpret_cast(lParam); + SetWindowLongPtr(hWnd, GWLP_USERDATA, reinterpret_cast(pCreateStruct->lpCreateParams)); + } + return 0; + + case WM_KEYDOWN: + if (pRenderer) + { + pRenderer->OnKeyDown((UINT8)(wParam)); + } + return 0; + + case WM_KEYUP: + if (pRenderer) + { + pRenderer->OnKeyUp((UINT8)(wParam)); + } + return 0; + + case WM_WINDOWPOSCHANGED: + if (pRenderer) + { + RECT rect = {}; + if (::GetClientRect(hWnd, &rect)) + { + pRenderer->OnResize(rect.right - rect.left, rect.bottom - rect.top); + } + } + return 0; + + case WM_DESTROY: + PostQuitMessage(0); + return 0; + } + + // Handle any messages the switch statement didn't. + return DefWindowProc(hWnd, message, wParam, lParam); +} + +bool LaunchProcess(char* commandLine) +{ + // create a pipe to get possible errors from the process + // + HANDLE hChildStdOutRead = NULL; + HANDLE hChildStdOutWrite = NULL; + + SECURITY_ATTRIBUTES saAttr = {}; + saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); + saAttr.bInheritHandle = TRUE; + if (!CreatePipe(&hChildStdOutRead, &hChildStdOutWrite, &saAttr, 0)) + return false; + + // launch process + // + PROCESS_INFORMATION pi = {}; + STARTUPINFOA si = {}; + si.cb = sizeof(si); + si.dwFlags = STARTF_USESTDHANDLES; + si.hStdError = hChildStdOutWrite; + si.hStdOutput = hChildStdOutWrite; + si.wShowWindow = SW_HIDE; + + if (CreateProcessA(NULL, commandLine, NULL, NULL, TRUE, CREATE_NO_WINDOW, NULL, NULL, &si, &pi)) + { + WaitForSingleObject(pi.hProcess, INFINITE); + CloseHandle(hChildStdOutWrite); + + ULONG exitCode; + if (GetExitCodeProcess(pi.hProcess, &exitCode)) + { + HANDLE hParentStdOut = GetStdHandle(STD_OUTPUT_HANDLE); + DWORD dwRead, dwWritten; + char chBuf[4096]; + + for (;;) + { + BOOL bSuccess = ReadFile(hChildStdOutRead, chBuf, _countof(chBuf), &dwRead, NULL); + + if (!bSuccess || dwRead == 0) + break; + + bSuccess = WriteFile(hParentStdOut, chBuf, dwRead, &dwWritten, NULL); + if (!bSuccess) + break; + } + + if (exitCode == 0) + { + return true; + } + } + + CloseHandle(hChildStdOutRead); + CloseHandle(pi.hProcess); + CloseHandle(pi.hThread); + } + + return false; +} + +bool LaunchProcess(const char* commandLine) +{ + std::string mutableCmdLine = commandLine; + return LaunchProcess(&mutableCmdLine[0]); +} \ No newline at end of file diff --git a/tools/app_framework/d3dx12.h b/tools/app_framework/d3dx12.h new file mode 100644 index 0000000..3959e94 --- /dev/null +++ b/tools/app_framework/d3dx12.h @@ -0,0 +1,3905 @@ +//********************************************************* +// +// Copyright (c) Microsoft. All rights reserved. +// This code is licensed under the MIT License (MIT). +// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY +// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR +// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. +// +//********************************************************* + +#ifndef __D3DX12_H__ +#define __D3DX12_H__ + +#include "d3d12.h" + +#if defined( __cplusplus ) + +struct CD3DX12_DEFAULT {}; +extern const DECLSPEC_SELECTANY CD3DX12_DEFAULT D3D12_DEFAULT; + +//------------------------------------------------------------------------------------------------ +inline bool operator==( const D3D12_VIEWPORT& l, const D3D12_VIEWPORT& r ) noexcept +{ + return l.TopLeftX == r.TopLeftX && l.TopLeftY == r.TopLeftY && l.Width == r.Width && + l.Height == r.Height && l.MinDepth == r.MinDepth && l.MaxDepth == r.MaxDepth; +} + +//------------------------------------------------------------------------------------------------ +inline bool operator!=( const D3D12_VIEWPORT& l, const D3D12_VIEWPORT& r ) noexcept +{ return !( l == r ); } + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RECT : public D3D12_RECT +{ + CD3DX12_RECT() = default; + explicit CD3DX12_RECT( const D3D12_RECT& o ) noexcept : + D3D12_RECT( o ) + {} + explicit CD3DX12_RECT( + LONG Left, + LONG Top, + LONG Right, + LONG Bottom ) noexcept + { + left = Left; + top = Top; + right = Right; + bottom = Bottom; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_VIEWPORT : public D3D12_VIEWPORT +{ + CD3DX12_VIEWPORT() = default; + explicit CD3DX12_VIEWPORT( const D3D12_VIEWPORT& o ) noexcept : + D3D12_VIEWPORT( o ) + {} + explicit CD3DX12_VIEWPORT( + FLOAT topLeftX, + FLOAT topLeftY, + FLOAT width, + FLOAT height, + FLOAT minDepth = D3D12_MIN_DEPTH, + FLOAT maxDepth = D3D12_MAX_DEPTH ) noexcept + { + TopLeftX = topLeftX; + TopLeftY = topLeftY; + Width = width; + Height = height; + MinDepth = minDepth; + MaxDepth = maxDepth; + } + explicit CD3DX12_VIEWPORT( + _In_ ID3D12Resource* pResource, + UINT mipSlice = 0, + FLOAT topLeftX = 0.0f, + FLOAT topLeftY = 0.0f, + FLOAT minDepth = D3D12_MIN_DEPTH, + FLOAT maxDepth = D3D12_MAX_DEPTH ) noexcept + { + auto Desc = pResource->GetDesc(); + const UINT64 SubresourceWidth = Desc.Width >> mipSlice; + const UINT64 SubresourceHeight = Desc.Height >> mipSlice; + switch (Desc.Dimension) + { + case D3D12_RESOURCE_DIMENSION_BUFFER: + TopLeftX = topLeftX; + TopLeftY = 0.0f; + Width = float(Desc.Width) - topLeftX; + Height = 1.0f; + break; + case D3D12_RESOURCE_DIMENSION_TEXTURE1D: + TopLeftX = topLeftX; + TopLeftY = 0.0f; + Width = (SubresourceWidth ? float(SubresourceWidth) : 1.0f) - topLeftX; + Height = 1.0f; + break; + case D3D12_RESOURCE_DIMENSION_TEXTURE2D: + case D3D12_RESOURCE_DIMENSION_TEXTURE3D: + TopLeftX = topLeftX; + TopLeftY = topLeftY; + Width = (SubresourceWidth ? float(SubresourceWidth) : 1.0f) - topLeftX; + Height = (SubresourceHeight ? float(SubresourceHeight) : 1.0f) - topLeftY; + break; + default: break; + } + + MinDepth = minDepth; + MaxDepth = maxDepth; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_BOX : public D3D12_BOX +{ + CD3DX12_BOX() = default; + explicit CD3DX12_BOX( const D3D12_BOX& o ) noexcept : + D3D12_BOX( o ) + {} + explicit CD3DX12_BOX( + LONG Left, + LONG Right ) noexcept + { + left = static_cast(Left); + top = 0; + front = 0; + right = static_cast(Right); + bottom = 1; + back = 1; + } + explicit CD3DX12_BOX( + LONG Left, + LONG Top, + LONG Right, + LONG Bottom ) noexcept + { + left = static_cast(Left); + top = static_cast(Top); + front = 0; + right = static_cast(Right); + bottom = static_cast(Bottom); + back = 1; + } + explicit CD3DX12_BOX( + LONG Left, + LONG Top, + LONG Front, + LONG Right, + LONG Bottom, + LONG Back ) noexcept + { + left = static_cast(Left); + top = static_cast(Top); + front = static_cast(Front); + right = static_cast(Right); + bottom = static_cast(Bottom); + back = static_cast(Back); + } +}; +inline bool operator==( const D3D12_BOX& l, const D3D12_BOX& r ) noexcept +{ + return l.left == r.left && l.top == r.top && l.front == r.front && + l.right == r.right && l.bottom == r.bottom && l.back == r.back; +} +inline bool operator!=( const D3D12_BOX& l, const D3D12_BOX& r ) noexcept +{ return !( l == r ); } + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_DEPTH_STENCIL_DESC : public D3D12_DEPTH_STENCIL_DESC +{ + CD3DX12_DEPTH_STENCIL_DESC() = default; + explicit CD3DX12_DEPTH_STENCIL_DESC( const D3D12_DEPTH_STENCIL_DESC& o ) noexcept : + D3D12_DEPTH_STENCIL_DESC( o ) + {} + explicit CD3DX12_DEPTH_STENCIL_DESC( CD3DX12_DEFAULT ) noexcept + { + DepthEnable = TRUE; + DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + DepthFunc = D3D12_COMPARISON_FUNC_LESS; + StencilEnable = FALSE; + StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK; + StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK; + const D3D12_DEPTH_STENCILOP_DESC defaultStencilOp = + { D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_COMPARISON_FUNC_ALWAYS }; + FrontFace = defaultStencilOp; + BackFace = defaultStencilOp; + } + explicit CD3DX12_DEPTH_STENCIL_DESC( + BOOL depthEnable, + D3D12_DEPTH_WRITE_MASK depthWriteMask, + D3D12_COMPARISON_FUNC depthFunc, + BOOL stencilEnable, + UINT8 stencilReadMask, + UINT8 stencilWriteMask, + D3D12_STENCIL_OP frontStencilFailOp, + D3D12_STENCIL_OP frontStencilDepthFailOp, + D3D12_STENCIL_OP frontStencilPassOp, + D3D12_COMPARISON_FUNC frontStencilFunc, + D3D12_STENCIL_OP backStencilFailOp, + D3D12_STENCIL_OP backStencilDepthFailOp, + D3D12_STENCIL_OP backStencilPassOp, + D3D12_COMPARISON_FUNC backStencilFunc ) noexcept + { + DepthEnable = depthEnable; + DepthWriteMask = depthWriteMask; + DepthFunc = depthFunc; + StencilEnable = stencilEnable; + StencilReadMask = stencilReadMask; + StencilWriteMask = stencilWriteMask; + FrontFace.StencilFailOp = frontStencilFailOp; + FrontFace.StencilDepthFailOp = frontStencilDepthFailOp; + FrontFace.StencilPassOp = frontStencilPassOp; + FrontFace.StencilFunc = frontStencilFunc; + BackFace.StencilFailOp = backStencilFailOp; + BackFace.StencilDepthFailOp = backStencilDepthFailOp; + BackFace.StencilPassOp = backStencilPassOp; + BackFace.StencilFunc = backStencilFunc; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_DEPTH_STENCIL_DESC1 : public D3D12_DEPTH_STENCIL_DESC1 +{ + CD3DX12_DEPTH_STENCIL_DESC1() = default; + explicit CD3DX12_DEPTH_STENCIL_DESC1( const D3D12_DEPTH_STENCIL_DESC1& o ) noexcept : + D3D12_DEPTH_STENCIL_DESC1( o ) + {} + explicit CD3DX12_DEPTH_STENCIL_DESC1( const D3D12_DEPTH_STENCIL_DESC& o ) noexcept + { + DepthEnable = o.DepthEnable; + DepthWriteMask = o.DepthWriteMask; + DepthFunc = o.DepthFunc; + StencilEnable = o.StencilEnable; + StencilReadMask = o.StencilReadMask; + StencilWriteMask = o.StencilWriteMask; + FrontFace.StencilFailOp = o.FrontFace.StencilFailOp; + FrontFace.StencilDepthFailOp = o.FrontFace.StencilDepthFailOp; + FrontFace.StencilPassOp = o.FrontFace.StencilPassOp; + FrontFace.StencilFunc = o.FrontFace.StencilFunc; + BackFace.StencilFailOp = o.BackFace.StencilFailOp; + BackFace.StencilDepthFailOp = o.BackFace.StencilDepthFailOp; + BackFace.StencilPassOp = o.BackFace.StencilPassOp; + BackFace.StencilFunc = o.BackFace.StencilFunc; + DepthBoundsTestEnable = FALSE; + } + explicit CD3DX12_DEPTH_STENCIL_DESC1( CD3DX12_DEFAULT ) noexcept + { + DepthEnable = TRUE; + DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + DepthFunc = D3D12_COMPARISON_FUNC_LESS; + StencilEnable = FALSE; + StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK; + StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK; + const D3D12_DEPTH_STENCILOP_DESC defaultStencilOp = + { D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_COMPARISON_FUNC_ALWAYS }; + FrontFace = defaultStencilOp; + BackFace = defaultStencilOp; + DepthBoundsTestEnable = FALSE; + } + explicit CD3DX12_DEPTH_STENCIL_DESC1( + BOOL depthEnable, + D3D12_DEPTH_WRITE_MASK depthWriteMask, + D3D12_COMPARISON_FUNC depthFunc, + BOOL stencilEnable, + UINT8 stencilReadMask, + UINT8 stencilWriteMask, + D3D12_STENCIL_OP frontStencilFailOp, + D3D12_STENCIL_OP frontStencilDepthFailOp, + D3D12_STENCIL_OP frontStencilPassOp, + D3D12_COMPARISON_FUNC frontStencilFunc, + D3D12_STENCIL_OP backStencilFailOp, + D3D12_STENCIL_OP backStencilDepthFailOp, + D3D12_STENCIL_OP backStencilPassOp, + D3D12_COMPARISON_FUNC backStencilFunc, + BOOL depthBoundsTestEnable ) noexcept + { + DepthEnable = depthEnable; + DepthWriteMask = depthWriteMask; + DepthFunc = depthFunc; + StencilEnable = stencilEnable; + StencilReadMask = stencilReadMask; + StencilWriteMask = stencilWriteMask; + FrontFace.StencilFailOp = frontStencilFailOp; + FrontFace.StencilDepthFailOp = frontStencilDepthFailOp; + FrontFace.StencilPassOp = frontStencilPassOp; + FrontFace.StencilFunc = frontStencilFunc; + BackFace.StencilFailOp = backStencilFailOp; + BackFace.StencilDepthFailOp = backStencilDepthFailOp; + BackFace.StencilPassOp = backStencilPassOp; + BackFace.StencilFunc = backStencilFunc; + DepthBoundsTestEnable = depthBoundsTestEnable; + } + operator D3D12_DEPTH_STENCIL_DESC() const noexcept + { + D3D12_DEPTH_STENCIL_DESC D; + D.DepthEnable = DepthEnable; + D.DepthWriteMask = DepthWriteMask; + D.DepthFunc = DepthFunc; + D.StencilEnable = StencilEnable; + D.StencilReadMask = StencilReadMask; + D.StencilWriteMask = StencilWriteMask; + D.FrontFace.StencilFailOp = FrontFace.StencilFailOp; + D.FrontFace.StencilDepthFailOp = FrontFace.StencilDepthFailOp; + D.FrontFace.StencilPassOp = FrontFace.StencilPassOp; + D.FrontFace.StencilFunc = FrontFace.StencilFunc; + D.BackFace.StencilFailOp = BackFace.StencilFailOp; + D.BackFace.StencilDepthFailOp = BackFace.StencilDepthFailOp; + D.BackFace.StencilPassOp = BackFace.StencilPassOp; + D.BackFace.StencilFunc = BackFace.StencilFunc; + return D; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_BLEND_DESC : public D3D12_BLEND_DESC +{ + CD3DX12_BLEND_DESC() = default; + explicit CD3DX12_BLEND_DESC( const D3D12_BLEND_DESC& o ) noexcept : + D3D12_BLEND_DESC( o ) + {} + explicit CD3DX12_BLEND_DESC( CD3DX12_DEFAULT ) noexcept + { + AlphaToCoverageEnable = FALSE; + IndependentBlendEnable = FALSE; + const D3D12_RENDER_TARGET_BLEND_DESC defaultRenderTargetBlendDesc = + { + FALSE,FALSE, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_LOGIC_OP_NOOP, + D3D12_COLOR_WRITE_ENABLE_ALL, + }; + for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) + RenderTarget[ i ] = defaultRenderTargetBlendDesc; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RASTERIZER_DESC : public D3D12_RASTERIZER_DESC +{ + CD3DX12_RASTERIZER_DESC() = default; + explicit CD3DX12_RASTERIZER_DESC( const D3D12_RASTERIZER_DESC& o ) noexcept : + D3D12_RASTERIZER_DESC( o ) + {} + explicit CD3DX12_RASTERIZER_DESC( CD3DX12_DEFAULT ) noexcept + { + FillMode = D3D12_FILL_MODE_SOLID; + CullMode = D3D12_CULL_MODE_BACK; + FrontCounterClockwise = FALSE; + DepthBias = D3D12_DEFAULT_DEPTH_BIAS; + DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; + SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; + DepthClipEnable = TRUE; + MultisampleEnable = FALSE; + AntialiasedLineEnable = FALSE; + ForcedSampleCount = 0; + ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; + } + explicit CD3DX12_RASTERIZER_DESC( + D3D12_FILL_MODE fillMode, + D3D12_CULL_MODE cullMode, + BOOL frontCounterClockwise, + INT depthBias, + FLOAT depthBiasClamp, + FLOAT slopeScaledDepthBias, + BOOL depthClipEnable, + BOOL multisampleEnable, + BOOL antialiasedLineEnable, + UINT forcedSampleCount, + D3D12_CONSERVATIVE_RASTERIZATION_MODE conservativeRaster) noexcept + { + FillMode = fillMode; + CullMode = cullMode; + FrontCounterClockwise = frontCounterClockwise; + DepthBias = depthBias; + DepthBiasClamp = depthBiasClamp; + SlopeScaledDepthBias = slopeScaledDepthBias; + DepthClipEnable = depthClipEnable; + MultisampleEnable = multisampleEnable; + AntialiasedLineEnable = antialiasedLineEnable; + ForcedSampleCount = forcedSampleCount; + ConservativeRaster = conservativeRaster; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RESOURCE_ALLOCATION_INFO : public D3D12_RESOURCE_ALLOCATION_INFO +{ + CD3DX12_RESOURCE_ALLOCATION_INFO() = default; + explicit CD3DX12_RESOURCE_ALLOCATION_INFO( const D3D12_RESOURCE_ALLOCATION_INFO& o ) noexcept : + D3D12_RESOURCE_ALLOCATION_INFO( o ) + {} + CD3DX12_RESOURCE_ALLOCATION_INFO( + UINT64 size, + UINT64 alignment ) noexcept + { + SizeInBytes = size; + Alignment = alignment; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_HEAP_PROPERTIES : public D3D12_HEAP_PROPERTIES +{ + CD3DX12_HEAP_PROPERTIES() = default; + explicit CD3DX12_HEAP_PROPERTIES(const D3D12_HEAP_PROPERTIES &o) noexcept : + D3D12_HEAP_PROPERTIES(o) + {} + CD3DX12_HEAP_PROPERTIES( + D3D12_CPU_PAGE_PROPERTY cpuPageProperty, + D3D12_MEMORY_POOL memoryPoolPreference, + UINT creationNodeMask = 1, + UINT nodeMask = 1 ) noexcept + { + Type = D3D12_HEAP_TYPE_CUSTOM; + CPUPageProperty = cpuPageProperty; + MemoryPoolPreference = memoryPoolPreference; + CreationNodeMask = creationNodeMask; + VisibleNodeMask = nodeMask; + } + explicit CD3DX12_HEAP_PROPERTIES( + D3D12_HEAP_TYPE type, + UINT creationNodeMask = 1, + UINT nodeMask = 1 ) noexcept + { + Type = type; + CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + CreationNodeMask = creationNodeMask; + VisibleNodeMask = nodeMask; + } + bool IsCPUAccessible() const noexcept + { + return Type == D3D12_HEAP_TYPE_UPLOAD || Type == D3D12_HEAP_TYPE_READBACK || (Type == D3D12_HEAP_TYPE_CUSTOM && + (CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE || CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_BACK)); + } +}; +inline bool operator==( const D3D12_HEAP_PROPERTIES& l, const D3D12_HEAP_PROPERTIES& r ) noexcept +{ + return l.Type == r.Type && l.CPUPageProperty == r.CPUPageProperty && + l.MemoryPoolPreference == r.MemoryPoolPreference && + l.CreationNodeMask == r.CreationNodeMask && + l.VisibleNodeMask == r.VisibleNodeMask; +} +inline bool operator!=( const D3D12_HEAP_PROPERTIES& l, const D3D12_HEAP_PROPERTIES& r ) noexcept +{ return !( l == r ); } + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_HEAP_DESC : public D3D12_HEAP_DESC +{ + CD3DX12_HEAP_DESC() = default; + explicit CD3DX12_HEAP_DESC(const D3D12_HEAP_DESC &o) noexcept : + D3D12_HEAP_DESC(o) + {} + CD3DX12_HEAP_DESC( + UINT64 size, + D3D12_HEAP_PROPERTIES properties, + UINT64 alignment = 0, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) noexcept + { + SizeInBytes = size; + Properties = properties; + Alignment = alignment; + Flags = flags; + } + CD3DX12_HEAP_DESC( + UINT64 size, + D3D12_HEAP_TYPE type, + UINT64 alignment = 0, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) noexcept + { + SizeInBytes = size; + Properties = CD3DX12_HEAP_PROPERTIES( type ); + Alignment = alignment; + Flags = flags; + } + CD3DX12_HEAP_DESC( + UINT64 size, + D3D12_CPU_PAGE_PROPERTY cpuPageProperty, + D3D12_MEMORY_POOL memoryPoolPreference, + UINT64 alignment = 0, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) noexcept + { + SizeInBytes = size; + Properties = CD3DX12_HEAP_PROPERTIES( cpuPageProperty, memoryPoolPreference ); + Alignment = alignment; + Flags = flags; + } + CD3DX12_HEAP_DESC( + const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, + D3D12_HEAP_PROPERTIES properties, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) noexcept + { + SizeInBytes = resAllocInfo.SizeInBytes; + Properties = properties; + Alignment = resAllocInfo.Alignment; + Flags = flags; + } + CD3DX12_HEAP_DESC( + const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, + D3D12_HEAP_TYPE type, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) noexcept + { + SizeInBytes = resAllocInfo.SizeInBytes; + Properties = CD3DX12_HEAP_PROPERTIES( type ); + Alignment = resAllocInfo.Alignment; + Flags = flags; + } + CD3DX12_HEAP_DESC( + const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, + D3D12_CPU_PAGE_PROPERTY cpuPageProperty, + D3D12_MEMORY_POOL memoryPoolPreference, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) noexcept + { + SizeInBytes = resAllocInfo.SizeInBytes; + Properties = CD3DX12_HEAP_PROPERTIES( cpuPageProperty, memoryPoolPreference ); + Alignment = resAllocInfo.Alignment; + Flags = flags; + } + bool IsCPUAccessible() const noexcept + { return static_cast< const CD3DX12_HEAP_PROPERTIES* >( &Properties )->IsCPUAccessible(); } +}; +inline bool operator==( const D3D12_HEAP_DESC& l, const D3D12_HEAP_DESC& r ) noexcept +{ + return l.SizeInBytes == r.SizeInBytes && + l.Properties == r.Properties && + l.Alignment == r.Alignment && + l.Flags == r.Flags; +} +inline bool operator!=( const D3D12_HEAP_DESC& l, const D3D12_HEAP_DESC& r ) noexcept +{ return !( l == r ); } + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_CLEAR_VALUE : public D3D12_CLEAR_VALUE +{ + CD3DX12_CLEAR_VALUE() = default; + explicit CD3DX12_CLEAR_VALUE(const D3D12_CLEAR_VALUE &o) noexcept : + D3D12_CLEAR_VALUE(o) + {} + CD3DX12_CLEAR_VALUE( + DXGI_FORMAT format, + const FLOAT color[4] ) noexcept + { + Format = format; + memcpy( Color, color, sizeof( Color ) ); + } + CD3DX12_CLEAR_VALUE( + DXGI_FORMAT format, + FLOAT depth, + UINT8 stencil ) noexcept + { + Format = format; + memset( &Color, 0, sizeof( Color ) ); + /* Use memcpy to preserve NAN values */ + memcpy( &DepthStencil.Depth, &depth, sizeof( depth ) ); + DepthStencil.Stencil = stencil; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RANGE : public D3D12_RANGE +{ + CD3DX12_RANGE() = default; + explicit CD3DX12_RANGE(const D3D12_RANGE &o) noexcept : + D3D12_RANGE(o) + {} + CD3DX12_RANGE( + SIZE_T begin, + SIZE_T end ) noexcept + { + Begin = begin; + End = end; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RANGE_UINT64 : public D3D12_RANGE_UINT64 +{ + CD3DX12_RANGE_UINT64() = default; + explicit CD3DX12_RANGE_UINT64(const D3D12_RANGE_UINT64 &o) noexcept : + D3D12_RANGE_UINT64(o) + {} + CD3DX12_RANGE_UINT64( + UINT64 begin, + UINT64 end ) noexcept + { + Begin = begin; + End = end; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_SUBRESOURCE_RANGE_UINT64 : public D3D12_SUBRESOURCE_RANGE_UINT64 +{ + CD3DX12_SUBRESOURCE_RANGE_UINT64() = default; + explicit CD3DX12_SUBRESOURCE_RANGE_UINT64(const D3D12_SUBRESOURCE_RANGE_UINT64 &o) noexcept : + D3D12_SUBRESOURCE_RANGE_UINT64(o) + {} + CD3DX12_SUBRESOURCE_RANGE_UINT64( + UINT subresource, + const D3D12_RANGE_UINT64& range ) noexcept + { + Subresource = subresource; + Range = range; + } + CD3DX12_SUBRESOURCE_RANGE_UINT64( + UINT subresource, + UINT64 begin, + UINT64 end ) noexcept + { + Subresource = subresource; + Range.Begin = begin; + Range.End = end; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_SHADER_BYTECODE : public D3D12_SHADER_BYTECODE +{ + CD3DX12_SHADER_BYTECODE() = default; + explicit CD3DX12_SHADER_BYTECODE(const D3D12_SHADER_BYTECODE &o) noexcept : + D3D12_SHADER_BYTECODE(o) + {} + CD3DX12_SHADER_BYTECODE( + _In_ ID3DBlob* pShaderBlob ) noexcept + { + pShaderBytecode = pShaderBlob->GetBufferPointer(); + BytecodeLength = pShaderBlob->GetBufferSize(); + } + CD3DX12_SHADER_BYTECODE( + const void* _pShaderBytecode, + SIZE_T bytecodeLength ) noexcept + { + pShaderBytecode = _pShaderBytecode; + BytecodeLength = bytecodeLength; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_TILED_RESOURCE_COORDINATE : public D3D12_TILED_RESOURCE_COORDINATE +{ + CD3DX12_TILED_RESOURCE_COORDINATE() = default; + explicit CD3DX12_TILED_RESOURCE_COORDINATE(const D3D12_TILED_RESOURCE_COORDINATE &o) noexcept : + D3D12_TILED_RESOURCE_COORDINATE(o) + {} + CD3DX12_TILED_RESOURCE_COORDINATE( + UINT x, + UINT y, + UINT z, + UINT subresource ) noexcept + { + X = x; + Y = y; + Z = z; + Subresource = subresource; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_TILE_REGION_SIZE : public D3D12_TILE_REGION_SIZE +{ + CD3DX12_TILE_REGION_SIZE() = default; + explicit CD3DX12_TILE_REGION_SIZE(const D3D12_TILE_REGION_SIZE &o) noexcept : + D3D12_TILE_REGION_SIZE(o) + {} + CD3DX12_TILE_REGION_SIZE( + UINT numTiles, + BOOL useBox, + UINT width, + UINT16 height, + UINT16 depth ) noexcept + { + NumTiles = numTiles; + UseBox = useBox; + Width = width; + Height = height; + Depth = depth; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_SUBRESOURCE_TILING : public D3D12_SUBRESOURCE_TILING +{ + CD3DX12_SUBRESOURCE_TILING() = default; + explicit CD3DX12_SUBRESOURCE_TILING(const D3D12_SUBRESOURCE_TILING &o) noexcept : + D3D12_SUBRESOURCE_TILING(o) + {} + CD3DX12_SUBRESOURCE_TILING( + UINT widthInTiles, + UINT16 heightInTiles, + UINT16 depthInTiles, + UINT startTileIndexInOverallResource ) noexcept + { + WidthInTiles = widthInTiles; + HeightInTiles = heightInTiles; + DepthInTiles = depthInTiles; + StartTileIndexInOverallResource = startTileIndexInOverallResource; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_TILE_SHAPE : public D3D12_TILE_SHAPE +{ + CD3DX12_TILE_SHAPE() = default; + explicit CD3DX12_TILE_SHAPE(const D3D12_TILE_SHAPE &o) noexcept : + D3D12_TILE_SHAPE(o) + {} + CD3DX12_TILE_SHAPE( + UINT widthInTexels, + UINT heightInTexels, + UINT depthInTexels ) noexcept + { + WidthInTexels = widthInTexels; + HeightInTexels = heightInTexels; + DepthInTexels = depthInTexels; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RESOURCE_BARRIER : public D3D12_RESOURCE_BARRIER +{ + CD3DX12_RESOURCE_BARRIER() = default; + explicit CD3DX12_RESOURCE_BARRIER(const D3D12_RESOURCE_BARRIER &o) noexcept : + D3D12_RESOURCE_BARRIER(o) + {} + static inline CD3DX12_RESOURCE_BARRIER Transition( + _In_ ID3D12Resource* pResource, + D3D12_RESOURCE_STATES stateBefore, + D3D12_RESOURCE_STATES stateAfter, + UINT subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + D3D12_RESOURCE_BARRIER_FLAGS flags = D3D12_RESOURCE_BARRIER_FLAG_NONE) noexcept + { + CD3DX12_RESOURCE_BARRIER result = {}; + D3D12_RESOURCE_BARRIER &barrier = result; + result.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + result.Flags = flags; + barrier.Transition.pResource = pResource; + barrier.Transition.StateBefore = stateBefore; + barrier.Transition.StateAfter = stateAfter; + barrier.Transition.Subresource = subresource; + return result; + } + static inline CD3DX12_RESOURCE_BARRIER Aliasing( + _In_ ID3D12Resource* pResourceBefore, + _In_ ID3D12Resource* pResourceAfter) noexcept + { + CD3DX12_RESOURCE_BARRIER result = {}; + D3D12_RESOURCE_BARRIER &barrier = result; + result.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING; + barrier.Aliasing.pResourceBefore = pResourceBefore; + barrier.Aliasing.pResourceAfter = pResourceAfter; + return result; + } + static inline CD3DX12_RESOURCE_BARRIER UAV( + _In_ ID3D12Resource* pResource) noexcept + { + CD3DX12_RESOURCE_BARRIER result = {}; + D3D12_RESOURCE_BARRIER &barrier = result; + result.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barrier.UAV.pResource = pResource; + return result; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_PACKED_MIP_INFO : public D3D12_PACKED_MIP_INFO +{ + CD3DX12_PACKED_MIP_INFO() = default; + explicit CD3DX12_PACKED_MIP_INFO(const D3D12_PACKED_MIP_INFO &o) noexcept : + D3D12_PACKED_MIP_INFO(o) + {} + CD3DX12_PACKED_MIP_INFO( + UINT8 numStandardMips, + UINT8 numPackedMips, + UINT numTilesForPackedMips, + UINT startTileIndexInOverallResource ) noexcept + { + NumStandardMips = numStandardMips; + NumPackedMips = numPackedMips; + NumTilesForPackedMips = numTilesForPackedMips; + StartTileIndexInOverallResource = startTileIndexInOverallResource; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_SUBRESOURCE_FOOTPRINT : public D3D12_SUBRESOURCE_FOOTPRINT +{ + CD3DX12_SUBRESOURCE_FOOTPRINT() = default; + explicit CD3DX12_SUBRESOURCE_FOOTPRINT(const D3D12_SUBRESOURCE_FOOTPRINT &o) noexcept : + D3D12_SUBRESOURCE_FOOTPRINT(o) + {} + CD3DX12_SUBRESOURCE_FOOTPRINT( + DXGI_FORMAT format, + UINT width, + UINT height, + UINT depth, + UINT rowPitch ) noexcept + { + Format = format; + Width = width; + Height = height; + Depth = depth; + RowPitch = rowPitch; + } + explicit CD3DX12_SUBRESOURCE_FOOTPRINT( + const D3D12_RESOURCE_DESC& resDesc, + UINT rowPitch ) noexcept + { + Format = resDesc.Format; + Width = UINT( resDesc.Width ); + Height = resDesc.Height; + Depth = (resDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? resDesc.DepthOrArraySize : 1); + RowPitch = rowPitch; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_TEXTURE_COPY_LOCATION : public D3D12_TEXTURE_COPY_LOCATION +{ + CD3DX12_TEXTURE_COPY_LOCATION() = default; + explicit CD3DX12_TEXTURE_COPY_LOCATION(const D3D12_TEXTURE_COPY_LOCATION &o) noexcept : + D3D12_TEXTURE_COPY_LOCATION(o) + {} + CD3DX12_TEXTURE_COPY_LOCATION(_In_ ID3D12Resource* pRes) noexcept + { + pResource = pRes; + Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + PlacedFootprint = {}; + } + CD3DX12_TEXTURE_COPY_LOCATION(_In_ ID3D12Resource* pRes, D3D12_PLACED_SUBRESOURCE_FOOTPRINT const& Footprint) noexcept + { + pResource = pRes; + Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + PlacedFootprint = Footprint; + } + CD3DX12_TEXTURE_COPY_LOCATION(_In_ ID3D12Resource* pRes, UINT Sub) noexcept + { + pResource = pRes; + Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + PlacedFootprint = {}; + SubresourceIndex = Sub; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_DESCRIPTOR_RANGE : public D3D12_DESCRIPTOR_RANGE +{ + CD3DX12_DESCRIPTOR_RANGE() = default; + explicit CD3DX12_DESCRIPTOR_RANGE(const D3D12_DESCRIPTOR_RANGE &o) noexcept : + D3D12_DESCRIPTOR_RANGE(o) + {} + CD3DX12_DESCRIPTOR_RANGE( + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT numDescriptors, + UINT baseShaderRegister, + UINT registerSpace = 0, + UINT offsetInDescriptorsFromTableStart = + D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) noexcept + { + Init(rangeType, numDescriptors, baseShaderRegister, registerSpace, offsetInDescriptorsFromTableStart); + } + + inline void Init( + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT numDescriptors, + UINT baseShaderRegister, + UINT registerSpace = 0, + UINT offsetInDescriptorsFromTableStart = + D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) noexcept + { + Init(*this, rangeType, numDescriptors, baseShaderRegister, registerSpace, offsetInDescriptorsFromTableStart); + } + + static inline void Init( + _Out_ D3D12_DESCRIPTOR_RANGE &range, + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT numDescriptors, + UINT baseShaderRegister, + UINT registerSpace = 0, + UINT offsetInDescriptorsFromTableStart = + D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) noexcept + { + range.RangeType = rangeType; + range.NumDescriptors = numDescriptors; + range.BaseShaderRegister = baseShaderRegister; + range.RegisterSpace = registerSpace; + range.OffsetInDescriptorsFromTableStart = offsetInDescriptorsFromTableStart; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_DESCRIPTOR_TABLE : public D3D12_ROOT_DESCRIPTOR_TABLE +{ + CD3DX12_ROOT_DESCRIPTOR_TABLE() = default; + explicit CD3DX12_ROOT_DESCRIPTOR_TABLE(const D3D12_ROOT_DESCRIPTOR_TABLE &o) noexcept : + D3D12_ROOT_DESCRIPTOR_TABLE(o) + {} + CD3DX12_ROOT_DESCRIPTOR_TABLE( + UINT numDescriptorRanges, + _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* _pDescriptorRanges) noexcept + { + Init(numDescriptorRanges, _pDescriptorRanges); + } + + inline void Init( + UINT numDescriptorRanges, + _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* _pDescriptorRanges) noexcept + { + Init(*this, numDescriptorRanges, _pDescriptorRanges); + } + + static inline void Init( + _Out_ D3D12_ROOT_DESCRIPTOR_TABLE &rootDescriptorTable, + UINT numDescriptorRanges, + _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* _pDescriptorRanges) noexcept + { + rootDescriptorTable.NumDescriptorRanges = numDescriptorRanges; + rootDescriptorTable.pDescriptorRanges = _pDescriptorRanges; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_CONSTANTS : public D3D12_ROOT_CONSTANTS +{ + CD3DX12_ROOT_CONSTANTS() = default; + explicit CD3DX12_ROOT_CONSTANTS(const D3D12_ROOT_CONSTANTS &o) noexcept : + D3D12_ROOT_CONSTANTS(o) + {} + CD3DX12_ROOT_CONSTANTS( + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0) noexcept + { + Init(num32BitValues, shaderRegister, registerSpace); + } + + inline void Init( + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0) noexcept + { + Init(*this, num32BitValues, shaderRegister, registerSpace); + } + + static inline void Init( + _Out_ D3D12_ROOT_CONSTANTS &rootConstants, + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0) noexcept + { + rootConstants.Num32BitValues = num32BitValues; + rootConstants.ShaderRegister = shaderRegister; + rootConstants.RegisterSpace = registerSpace; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_DESCRIPTOR : public D3D12_ROOT_DESCRIPTOR +{ + CD3DX12_ROOT_DESCRIPTOR() = default; + explicit CD3DX12_ROOT_DESCRIPTOR(const D3D12_ROOT_DESCRIPTOR &o) noexcept : + D3D12_ROOT_DESCRIPTOR(o) + {} + CD3DX12_ROOT_DESCRIPTOR( + UINT shaderRegister, + UINT registerSpace = 0) noexcept + { + Init(shaderRegister, registerSpace); + } + + inline void Init( + UINT shaderRegister, + UINT registerSpace = 0) noexcept + { + Init(*this, shaderRegister, registerSpace); + } + + static inline void Init(_Out_ D3D12_ROOT_DESCRIPTOR &table, UINT shaderRegister, UINT registerSpace = 0) noexcept + { + table.ShaderRegister = shaderRegister; + table.RegisterSpace = registerSpace; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_PARAMETER : public D3D12_ROOT_PARAMETER +{ + CD3DX12_ROOT_PARAMETER() = default; + explicit CD3DX12_ROOT_PARAMETER(const D3D12_ROOT_PARAMETER &o) noexcept : + D3D12_ROOT_PARAMETER(o) + {} + + static inline void InitAsDescriptorTable( + _Out_ D3D12_ROOT_PARAMETER &rootParam, + UINT numDescriptorRanges, + _In_reads_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* pDescriptorRanges, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_DESCRIPTOR_TABLE::Init(rootParam.DescriptorTable, numDescriptorRanges, pDescriptorRanges); + } + + static inline void InitAsConstants( + _Out_ D3D12_ROOT_PARAMETER &rootParam, + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_CONSTANTS::Init(rootParam.Constants, num32BitValues, shaderRegister, registerSpace); + } + + static inline void InitAsConstantBufferView( + _Out_ D3D12_ROOT_PARAMETER &rootParam, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_DESCRIPTOR::Init(rootParam.Descriptor, shaderRegister, registerSpace); + } + + static inline void InitAsShaderResourceView( + _Out_ D3D12_ROOT_PARAMETER &rootParam, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_DESCRIPTOR::Init(rootParam.Descriptor, shaderRegister, registerSpace); + } + + static inline void InitAsUnorderedAccessView( + _Out_ D3D12_ROOT_PARAMETER &rootParam, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_DESCRIPTOR::Init(rootParam.Descriptor, shaderRegister, registerSpace); + } + + inline void InitAsDescriptorTable( + UINT numDescriptorRanges, + _In_reads_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* pDescriptorRanges, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + InitAsDescriptorTable(*this, numDescriptorRanges, pDescriptorRanges, visibility); + } + + inline void InitAsConstants( + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + InitAsConstants(*this, num32BitValues, shaderRegister, registerSpace, visibility); + } + + inline void InitAsConstantBufferView( + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + InitAsConstantBufferView(*this, shaderRegister, registerSpace, visibility); + } + + inline void InitAsShaderResourceView( + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + InitAsShaderResourceView(*this, shaderRegister, registerSpace, visibility); + } + + inline void InitAsUnorderedAccessView( + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + InitAsUnorderedAccessView(*this, shaderRegister, registerSpace, visibility); + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_STATIC_SAMPLER_DESC : public D3D12_STATIC_SAMPLER_DESC +{ + CD3DX12_STATIC_SAMPLER_DESC() = default; + explicit CD3DX12_STATIC_SAMPLER_DESC(const D3D12_STATIC_SAMPLER_DESC &o) noexcept : + D3D12_STATIC_SAMPLER_DESC(o) + {} + CD3DX12_STATIC_SAMPLER_DESC( + UINT shaderRegister, + D3D12_FILTER filter = D3D12_FILTER_ANISOTROPIC, + D3D12_TEXTURE_ADDRESS_MODE addressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + FLOAT mipLODBias = 0, + UINT maxAnisotropy = 16, + D3D12_COMPARISON_FUNC comparisonFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL, + D3D12_STATIC_BORDER_COLOR borderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE, + FLOAT minLOD = 0.f, + FLOAT maxLOD = D3D12_FLOAT32_MAX, + D3D12_SHADER_VISIBILITY shaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + UINT registerSpace = 0) noexcept + { + Init( + shaderRegister, + filter, + addressU, + addressV, + addressW, + mipLODBias, + maxAnisotropy, + comparisonFunc, + borderColor, + minLOD, + maxLOD, + shaderVisibility, + registerSpace); + } + + static inline void Init( + _Out_ D3D12_STATIC_SAMPLER_DESC &samplerDesc, + UINT shaderRegister, + D3D12_FILTER filter = D3D12_FILTER_ANISOTROPIC, + D3D12_TEXTURE_ADDRESS_MODE addressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + FLOAT mipLODBias = 0, + UINT maxAnisotropy = 16, + D3D12_COMPARISON_FUNC comparisonFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL, + D3D12_STATIC_BORDER_COLOR borderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE, + FLOAT minLOD = 0.f, + FLOAT maxLOD = D3D12_FLOAT32_MAX, + D3D12_SHADER_VISIBILITY shaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + UINT registerSpace = 0) noexcept + { + samplerDesc.ShaderRegister = shaderRegister; + samplerDesc.Filter = filter; + samplerDesc.AddressU = addressU; + samplerDesc.AddressV = addressV; + samplerDesc.AddressW = addressW; + samplerDesc.MipLODBias = mipLODBias; + samplerDesc.MaxAnisotropy = maxAnisotropy; + samplerDesc.ComparisonFunc = comparisonFunc; + samplerDesc.BorderColor = borderColor; + samplerDesc.MinLOD = minLOD; + samplerDesc.MaxLOD = maxLOD; + samplerDesc.ShaderVisibility = shaderVisibility; + samplerDesc.RegisterSpace = registerSpace; + } + inline void Init( + UINT shaderRegister, + D3D12_FILTER filter = D3D12_FILTER_ANISOTROPIC, + D3D12_TEXTURE_ADDRESS_MODE addressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + FLOAT mipLODBias = 0, + UINT maxAnisotropy = 16, + D3D12_COMPARISON_FUNC comparisonFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL, + D3D12_STATIC_BORDER_COLOR borderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE, + FLOAT minLOD = 0.f, + FLOAT maxLOD = D3D12_FLOAT32_MAX, + D3D12_SHADER_VISIBILITY shaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + UINT registerSpace = 0) noexcept + { + Init( + *this, + shaderRegister, + filter, + addressU, + addressV, + addressW, + mipLODBias, + maxAnisotropy, + comparisonFunc, + borderColor, + minLOD, + maxLOD, + shaderVisibility, + registerSpace); + } + +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_SIGNATURE_DESC : public D3D12_ROOT_SIGNATURE_DESC +{ + CD3DX12_ROOT_SIGNATURE_DESC() = default; + explicit CD3DX12_ROOT_SIGNATURE_DESC(const D3D12_ROOT_SIGNATURE_DESC &o) noexcept : + D3D12_ROOT_SIGNATURE_DESC(o) + {} + CD3DX12_ROOT_SIGNATURE_DESC( + UINT numParameters, + _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, + UINT numStaticSamplers = 0, + _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, + D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) noexcept + { + Init(numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags); + } + CD3DX12_ROOT_SIGNATURE_DESC(CD3DX12_DEFAULT) noexcept + { + Init(0, nullptr, 0, nullptr, D3D12_ROOT_SIGNATURE_FLAG_NONE); + } + + inline void Init( + UINT numParameters, + _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, + UINT numStaticSamplers = 0, + _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, + D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) noexcept + { + Init(*this, numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags); + } + + static inline void Init( + _Out_ D3D12_ROOT_SIGNATURE_DESC &desc, + UINT numParameters, + _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, + UINT numStaticSamplers = 0, + _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, + D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) noexcept + { + desc.NumParameters = numParameters; + desc.pParameters = _pParameters; + desc.NumStaticSamplers = numStaticSamplers; + desc.pStaticSamplers = _pStaticSamplers; + desc.Flags = flags; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_DESCRIPTOR_RANGE1 : public D3D12_DESCRIPTOR_RANGE1 +{ + CD3DX12_DESCRIPTOR_RANGE1() = default; + explicit CD3DX12_DESCRIPTOR_RANGE1(const D3D12_DESCRIPTOR_RANGE1 &o) noexcept : + D3D12_DESCRIPTOR_RANGE1(o) + {} + CD3DX12_DESCRIPTOR_RANGE1( + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT numDescriptors, + UINT baseShaderRegister, + UINT registerSpace = 0, + D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE, + UINT offsetInDescriptorsFromTableStart = + D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) noexcept + { + Init(rangeType, numDescriptors, baseShaderRegister, registerSpace, flags, offsetInDescriptorsFromTableStart); + } + + inline void Init( + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT numDescriptors, + UINT baseShaderRegister, + UINT registerSpace = 0, + D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE, + UINT offsetInDescriptorsFromTableStart = + D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) noexcept + { + Init(*this, rangeType, numDescriptors, baseShaderRegister, registerSpace, flags, offsetInDescriptorsFromTableStart); + } + + static inline void Init( + _Out_ D3D12_DESCRIPTOR_RANGE1 &range, + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT numDescriptors, + UINT baseShaderRegister, + UINT registerSpace = 0, + D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE, + UINT offsetInDescriptorsFromTableStart = + D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) noexcept + { + range.RangeType = rangeType; + range.NumDescriptors = numDescriptors; + range.BaseShaderRegister = baseShaderRegister; + range.RegisterSpace = registerSpace; + range.Flags = flags; + range.OffsetInDescriptorsFromTableStart = offsetInDescriptorsFromTableStart; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_DESCRIPTOR_TABLE1 : public D3D12_ROOT_DESCRIPTOR_TABLE1 +{ + CD3DX12_ROOT_DESCRIPTOR_TABLE1() = default; + explicit CD3DX12_ROOT_DESCRIPTOR_TABLE1(const D3D12_ROOT_DESCRIPTOR_TABLE1 &o) noexcept : + D3D12_ROOT_DESCRIPTOR_TABLE1(o) + {} + CD3DX12_ROOT_DESCRIPTOR_TABLE1( + UINT numDescriptorRanges, + _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE1* _pDescriptorRanges) noexcept + { + Init(numDescriptorRanges, _pDescriptorRanges); + } + + inline void Init( + UINT numDescriptorRanges, + _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE1* _pDescriptorRanges) noexcept + { + Init(*this, numDescriptorRanges, _pDescriptorRanges); + } + + static inline void Init( + _Out_ D3D12_ROOT_DESCRIPTOR_TABLE1 &rootDescriptorTable, + UINT numDescriptorRanges, + _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE1* _pDescriptorRanges) noexcept + { + rootDescriptorTable.NumDescriptorRanges = numDescriptorRanges; + rootDescriptorTable.pDescriptorRanges = _pDescriptorRanges; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_DESCRIPTOR1 : public D3D12_ROOT_DESCRIPTOR1 +{ + CD3DX12_ROOT_DESCRIPTOR1() = default; + explicit CD3DX12_ROOT_DESCRIPTOR1(const D3D12_ROOT_DESCRIPTOR1 &o) noexcept : + D3D12_ROOT_DESCRIPTOR1(o) + {} + CD3DX12_ROOT_DESCRIPTOR1( + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE) noexcept + { + Init(shaderRegister, registerSpace, flags); + } + + inline void Init( + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE) noexcept + { + Init(*this, shaderRegister, registerSpace, flags); + } + + static inline void Init( + _Out_ D3D12_ROOT_DESCRIPTOR1 &table, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE) noexcept + { + table.ShaderRegister = shaderRegister; + table.RegisterSpace = registerSpace; + table.Flags = flags; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_PARAMETER1 : public D3D12_ROOT_PARAMETER1 +{ + CD3DX12_ROOT_PARAMETER1() = default; + explicit CD3DX12_ROOT_PARAMETER1(const D3D12_ROOT_PARAMETER1 &o) noexcept : + D3D12_ROOT_PARAMETER1(o) + {} + + static inline void InitAsDescriptorTable( + _Out_ D3D12_ROOT_PARAMETER1 &rootParam, + UINT numDescriptorRanges, + _In_reads_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE1* pDescriptorRanges, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_DESCRIPTOR_TABLE1::Init(rootParam.DescriptorTable, numDescriptorRanges, pDescriptorRanges); + } + + static inline void InitAsConstants( + _Out_ D3D12_ROOT_PARAMETER1 &rootParam, + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_CONSTANTS::Init(rootParam.Constants, num32BitValues, shaderRegister, registerSpace); + } + + static inline void InitAsConstantBufferView( + _Out_ D3D12_ROOT_PARAMETER1 &rootParam, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_DESCRIPTOR1::Init(rootParam.Descriptor, shaderRegister, registerSpace, flags); + } + + static inline void InitAsShaderResourceView( + _Out_ D3D12_ROOT_PARAMETER1 &rootParam, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_DESCRIPTOR1::Init(rootParam.Descriptor, shaderRegister, registerSpace, flags); + } + + static inline void InitAsUnorderedAccessView( + _Out_ D3D12_ROOT_PARAMETER1 &rootParam, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_DESCRIPTOR1::Init(rootParam.Descriptor, shaderRegister, registerSpace, flags); + } + + inline void InitAsDescriptorTable( + UINT numDescriptorRanges, + _In_reads_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE1* pDescriptorRanges, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + InitAsDescriptorTable(*this, numDescriptorRanges, pDescriptorRanges, visibility); + } + + inline void InitAsConstants( + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + InitAsConstants(*this, num32BitValues, shaderRegister, registerSpace, visibility); + } + + inline void InitAsConstantBufferView( + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + InitAsConstantBufferView(*this, shaderRegister, registerSpace, flags, visibility); + } + + inline void InitAsShaderResourceView( + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + InitAsShaderResourceView(*this, shaderRegister, registerSpace, flags, visibility); + } + + inline void InitAsUnorderedAccessView( + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) noexcept + { + InitAsUnorderedAccessView(*this, shaderRegister, registerSpace, flags, visibility); + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC : public D3D12_VERSIONED_ROOT_SIGNATURE_DESC +{ + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC() = default; + explicit CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC &o) noexcept : + D3D12_VERSIONED_ROOT_SIGNATURE_DESC(o) + {} + explicit CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC(const D3D12_ROOT_SIGNATURE_DESC &o) noexcept + { + Version = D3D_ROOT_SIGNATURE_VERSION_1_0; + Desc_1_0 = o; + } + explicit CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC(const D3D12_ROOT_SIGNATURE_DESC1 &o) noexcept + { + Version = D3D_ROOT_SIGNATURE_VERSION_1_1; + Desc_1_1 = o; + } + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC( + UINT numParameters, + _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, + UINT numStaticSamplers = 0, + _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, + D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) noexcept + { + Init_1_0(numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags); + } + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC( + UINT numParameters, + _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER1* _pParameters, + UINT numStaticSamplers = 0, + _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, + D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) noexcept + { + Init_1_1(numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags); + } + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC(CD3DX12_DEFAULT) noexcept + { + Init_1_1(0, nullptr, 0, nullptr, D3D12_ROOT_SIGNATURE_FLAG_NONE); + } + + inline void Init_1_0( + UINT numParameters, + _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, + UINT numStaticSamplers = 0, + _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, + D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) noexcept + { + Init_1_0(*this, numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags); + } + + static inline void Init_1_0( + _Out_ D3D12_VERSIONED_ROOT_SIGNATURE_DESC &desc, + UINT numParameters, + _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, + UINT numStaticSamplers = 0, + _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, + D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) noexcept + { + desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_0; + desc.Desc_1_0.NumParameters = numParameters; + desc.Desc_1_0.pParameters = _pParameters; + desc.Desc_1_0.NumStaticSamplers = numStaticSamplers; + desc.Desc_1_0.pStaticSamplers = _pStaticSamplers; + desc.Desc_1_0.Flags = flags; + } + + inline void Init_1_1( + UINT numParameters, + _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER1* _pParameters, + UINT numStaticSamplers = 0, + _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, + D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) noexcept + { + Init_1_1(*this, numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags); + } + + static inline void Init_1_1( + _Out_ D3D12_VERSIONED_ROOT_SIGNATURE_DESC &desc, + UINT numParameters, + _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER1* _pParameters, + UINT numStaticSamplers = 0, + _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, + D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) noexcept + { + desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1; + desc.Desc_1_1.NumParameters = numParameters; + desc.Desc_1_1.pParameters = _pParameters; + desc.Desc_1_1.NumStaticSamplers = numStaticSamplers; + desc.Desc_1_1.pStaticSamplers = _pStaticSamplers; + desc.Desc_1_1.Flags = flags; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_CPU_DESCRIPTOR_HANDLE : public D3D12_CPU_DESCRIPTOR_HANDLE +{ + CD3DX12_CPU_DESCRIPTOR_HANDLE() = default; + explicit CD3DX12_CPU_DESCRIPTOR_HANDLE(const D3D12_CPU_DESCRIPTOR_HANDLE &o) noexcept : + D3D12_CPU_DESCRIPTOR_HANDLE(o) + {} + CD3DX12_CPU_DESCRIPTOR_HANDLE(CD3DX12_DEFAULT) noexcept { ptr = 0; } + CD3DX12_CPU_DESCRIPTOR_HANDLE(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE &other, INT offsetScaledByIncrementSize) noexcept + { + InitOffsetted(other, offsetScaledByIncrementSize); + } + CD3DX12_CPU_DESCRIPTOR_HANDLE(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE &other, INT offsetInDescriptors, UINT descriptorIncrementSize) noexcept + { + InitOffsetted(other, offsetInDescriptors, descriptorIncrementSize); + } + CD3DX12_CPU_DESCRIPTOR_HANDLE& Offset(INT offsetInDescriptors, UINT descriptorIncrementSize) noexcept + { + ptr = SIZE_T(INT64(ptr) + INT64(offsetInDescriptors) * INT64(descriptorIncrementSize)); + return *this; + } + CD3DX12_CPU_DESCRIPTOR_HANDLE& Offset(INT offsetScaledByIncrementSize) noexcept + { + ptr = SIZE_T(INT64(ptr) + INT64(offsetScaledByIncrementSize)); + return *this; + } + bool operator==(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE& other) const noexcept + { + return (ptr == other.ptr); + } + bool operator!=(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE& other) const noexcept + { + return (ptr != other.ptr); + } + CD3DX12_CPU_DESCRIPTOR_HANDLE &operator=(const D3D12_CPU_DESCRIPTOR_HANDLE &other) noexcept + { + ptr = other.ptr; + return *this; + } + + inline void InitOffsetted(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE &base, INT offsetScaledByIncrementSize) noexcept + { + InitOffsetted(*this, base, offsetScaledByIncrementSize); + } + + inline void InitOffsetted(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE &base, INT offsetInDescriptors, UINT descriptorIncrementSize) noexcept + { + InitOffsetted(*this, base, offsetInDescriptors, descriptorIncrementSize); + } + + static inline void InitOffsetted(_Out_ D3D12_CPU_DESCRIPTOR_HANDLE &handle, _In_ const D3D12_CPU_DESCRIPTOR_HANDLE &base, INT offsetScaledByIncrementSize) noexcept + { + handle.ptr = SIZE_T(INT64(base.ptr) + INT64(offsetScaledByIncrementSize)); + } + + static inline void InitOffsetted(_Out_ D3D12_CPU_DESCRIPTOR_HANDLE &handle, _In_ const D3D12_CPU_DESCRIPTOR_HANDLE &base, INT offsetInDescriptors, UINT descriptorIncrementSize) noexcept + { + handle.ptr = SIZE_T(INT64(base.ptr) + INT64(offsetInDescriptors) * INT64(descriptorIncrementSize)); + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_GPU_DESCRIPTOR_HANDLE : public D3D12_GPU_DESCRIPTOR_HANDLE +{ + CD3DX12_GPU_DESCRIPTOR_HANDLE() = default; + explicit CD3DX12_GPU_DESCRIPTOR_HANDLE(const D3D12_GPU_DESCRIPTOR_HANDLE &o) noexcept : + D3D12_GPU_DESCRIPTOR_HANDLE(o) + {} + CD3DX12_GPU_DESCRIPTOR_HANDLE(CD3DX12_DEFAULT) noexcept { ptr = 0; } + CD3DX12_GPU_DESCRIPTOR_HANDLE(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE &other, INT offsetScaledByIncrementSize) noexcept + { + InitOffsetted(other, offsetScaledByIncrementSize); + } + CD3DX12_GPU_DESCRIPTOR_HANDLE(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE &other, INT offsetInDescriptors, UINT descriptorIncrementSize) noexcept + { + InitOffsetted(other, offsetInDescriptors, descriptorIncrementSize); + } + CD3DX12_GPU_DESCRIPTOR_HANDLE& Offset(INT offsetInDescriptors, UINT descriptorIncrementSize) noexcept + { + ptr = UINT64(INT64(ptr) + INT64(offsetInDescriptors) * INT64(descriptorIncrementSize)); + return *this; + } + CD3DX12_GPU_DESCRIPTOR_HANDLE& Offset(INT offsetScaledByIncrementSize) noexcept + { + ptr = UINT64(INT64(ptr) + INT64(offsetScaledByIncrementSize)); + return *this; + } + inline bool operator==(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE& other) const noexcept + { + return (ptr == other.ptr); + } + inline bool operator!=(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE& other) const noexcept + { + return (ptr != other.ptr); + } + CD3DX12_GPU_DESCRIPTOR_HANDLE &operator=(const D3D12_GPU_DESCRIPTOR_HANDLE &other) noexcept + { + ptr = other.ptr; + return *this; + } + + inline void InitOffsetted(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE &base, INT offsetScaledByIncrementSize) noexcept + { + InitOffsetted(*this, base, offsetScaledByIncrementSize); + } + + inline void InitOffsetted(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE &base, INT offsetInDescriptors, UINT descriptorIncrementSize) noexcept + { + InitOffsetted(*this, base, offsetInDescriptors, descriptorIncrementSize); + } + + static inline void InitOffsetted(_Out_ D3D12_GPU_DESCRIPTOR_HANDLE &handle, _In_ const D3D12_GPU_DESCRIPTOR_HANDLE &base, INT offsetScaledByIncrementSize) noexcept + { + handle.ptr = UINT64(INT64(base.ptr) + INT64(offsetScaledByIncrementSize)); + } + + static inline void InitOffsetted(_Out_ D3D12_GPU_DESCRIPTOR_HANDLE &handle, _In_ const D3D12_GPU_DESCRIPTOR_HANDLE &base, INT offsetInDescriptors, UINT descriptorIncrementSize) noexcept + { + handle.ptr = UINT64(INT64(base.ptr) + INT64(offsetInDescriptors) * INT64(descriptorIncrementSize)); + } +}; + +//------------------------------------------------------------------------------------------------ +inline constexpr UINT D3D12CalcSubresource( UINT MipSlice, UINT ArraySlice, UINT PlaneSlice, UINT MipLevels, UINT ArraySize ) noexcept +{ + return MipSlice + ArraySlice * MipLevels + PlaneSlice * MipLevels * ArraySize; +} + +//------------------------------------------------------------------------------------------------ +template +inline void D3D12DecomposeSubresource( UINT Subresource, UINT MipLevels, UINT ArraySize, _Out_ T& MipSlice, _Out_ U& ArraySlice, _Out_ V& PlaneSlice ) noexcept +{ + MipSlice = static_cast(Subresource % MipLevels); + ArraySlice = static_cast((Subresource / MipLevels) % ArraySize); + PlaneSlice = static_cast(Subresource / (MipLevels * ArraySize)); +} + +//------------------------------------------------------------------------------------------------ +inline UINT8 D3D12GetFormatPlaneCount( + _In_ ID3D12Device* pDevice, + DXGI_FORMAT Format + ) noexcept +{ + D3D12_FEATURE_DATA_FORMAT_INFO formatInfo = { Format, 0 }; + if (FAILED(pDevice->CheckFeatureSupport(D3D12_FEATURE_FORMAT_INFO, &formatInfo, sizeof(formatInfo)))) + { + return 0; + } + return formatInfo.PlaneCount; +} + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RESOURCE_DESC : public D3D12_RESOURCE_DESC +{ + CD3DX12_RESOURCE_DESC() = default; + explicit CD3DX12_RESOURCE_DESC( const D3D12_RESOURCE_DESC& o ) noexcept : + D3D12_RESOURCE_DESC( o ) + {} + CD3DX12_RESOURCE_DESC( + D3D12_RESOURCE_DIMENSION dimension, + UINT64 alignment, + UINT64 width, + UINT height, + UINT16 depthOrArraySize, + UINT16 mipLevels, + DXGI_FORMAT format, + UINT sampleCount, + UINT sampleQuality, + D3D12_TEXTURE_LAYOUT layout, + D3D12_RESOURCE_FLAGS flags ) noexcept + { + Dimension = dimension; + Alignment = alignment; + Width = width; + Height = height; + DepthOrArraySize = depthOrArraySize; + MipLevels = mipLevels; + Format = format; + SampleDesc.Count = sampleCount; + SampleDesc.Quality = sampleQuality; + Layout = layout; + Flags = flags; + } + static inline CD3DX12_RESOURCE_DESC Buffer( + const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE ) noexcept + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_BUFFER, resAllocInfo.Alignment, resAllocInfo.SizeInBytes, + 1, 1, 1, DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags ); + } + static inline CD3DX12_RESOURCE_DESC Buffer( + UINT64 width, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + UINT64 alignment = 0 ) noexcept + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_BUFFER, alignment, width, 1, 1, 1, + DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags ); + } + static inline CD3DX12_RESOURCE_DESC Tex1D( + DXGI_FORMAT format, + UINT64 width, + UINT16 arraySize = 1, + UINT16 mipLevels = 0, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + UINT64 alignment = 0 ) noexcept + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE1D, alignment, width, 1, arraySize, + mipLevels, format, 1, 0, layout, flags ); + } + static inline CD3DX12_RESOURCE_DESC Tex2D( + DXGI_FORMAT format, + UINT64 width, + UINT height, + UINT16 arraySize = 1, + UINT16 mipLevels = 0, + UINT sampleCount = 1, + UINT sampleQuality = 0, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + UINT64 alignment = 0 ) noexcept + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE2D, alignment, width, height, arraySize, + mipLevels, format, sampleCount, sampleQuality, layout, flags ); + } + static inline CD3DX12_RESOURCE_DESC Tex3D( + DXGI_FORMAT format, + UINT64 width, + UINT height, + UINT16 depth, + UINT16 mipLevels = 0, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + UINT64 alignment = 0 ) noexcept + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE3D, alignment, width, height, depth, + mipLevels, format, 1, 0, layout, flags ); + } + inline UINT16 Depth() const noexcept + { return (Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? DepthOrArraySize : 1); } + inline UINT16 ArraySize() const noexcept + { return (Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D ? DepthOrArraySize : 1); } + inline UINT8 PlaneCount(_In_ ID3D12Device* pDevice) const noexcept + { return D3D12GetFormatPlaneCount(pDevice, Format); } + inline UINT Subresources(_In_ ID3D12Device* pDevice) const noexcept + { return MipLevels * ArraySize() * PlaneCount(pDevice); } + inline UINT CalcSubresource(UINT MipSlice, UINT ArraySlice, UINT PlaneSlice) noexcept + { return D3D12CalcSubresource(MipSlice, ArraySlice, PlaneSlice, MipLevels, ArraySize()); } +}; +inline bool operator==( const D3D12_RESOURCE_DESC& l, const D3D12_RESOURCE_DESC& r ) noexcept +{ + return l.Dimension == r.Dimension && + l.Alignment == r.Alignment && + l.Width == r.Width && + l.Height == r.Height && + l.DepthOrArraySize == r.DepthOrArraySize && + l.MipLevels == r.MipLevels && + l.Format == r.Format && + l.SampleDesc.Count == r.SampleDesc.Count && + l.SampleDesc.Quality == r.SampleDesc.Quality && + l.Layout == r.Layout && + l.Flags == r.Flags; +} +inline bool operator!=( const D3D12_RESOURCE_DESC& l, const D3D12_RESOURCE_DESC& r ) noexcept +{ return !( l == r ); } + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RESOURCE_DESC1 : public D3D12_RESOURCE_DESC1 +{ + CD3DX12_RESOURCE_DESC1() = default; + explicit CD3DX12_RESOURCE_DESC1( const D3D12_RESOURCE_DESC1& o ) noexcept : + D3D12_RESOURCE_DESC1( o ) + {} + CD3DX12_RESOURCE_DESC1( + D3D12_RESOURCE_DIMENSION dimension, + UINT64 alignment, + UINT64 width, + UINT height, + UINT16 depthOrArraySize, + UINT16 mipLevels, + DXGI_FORMAT format, + UINT sampleCount, + UINT sampleQuality, + D3D12_TEXTURE_LAYOUT layout, + D3D12_RESOURCE_FLAGS flags, + UINT samplerFeedbackMipRegionWidth = 0, + UINT samplerFeedbackMipRegionHeight = 0, + UINT samplerFeedbackMipRegionDepth = 0) noexcept + { + Dimension = dimension; + Alignment = alignment; + Width = width; + Height = height; + DepthOrArraySize = depthOrArraySize; + MipLevels = mipLevels; + Format = format; + SampleDesc.Count = sampleCount; + SampleDesc.Quality = sampleQuality; + Layout = layout; + Flags = flags; + SamplerFeedbackMipRegion.Width = samplerFeedbackMipRegionWidth; + SamplerFeedbackMipRegion.Height = samplerFeedbackMipRegionHeight; + SamplerFeedbackMipRegion.Depth = samplerFeedbackMipRegionDepth; + } + static inline CD3DX12_RESOURCE_DESC1 Buffer( + const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE ) noexcept + { + return CD3DX12_RESOURCE_DESC1( D3D12_RESOURCE_DIMENSION_BUFFER, resAllocInfo.Alignment, resAllocInfo.SizeInBytes, + 1, 1, 1, DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags, 0, 0, 0 ); + } + static inline CD3DX12_RESOURCE_DESC1 Buffer( + UINT64 width, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + UINT64 alignment = 0 ) noexcept + { + return CD3DX12_RESOURCE_DESC1( D3D12_RESOURCE_DIMENSION_BUFFER, alignment, width, 1, 1, 1, + DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags, 0, 0, 0 ); + } + static inline CD3DX12_RESOURCE_DESC1 Tex1D( + DXGI_FORMAT format, + UINT64 width, + UINT16 arraySize = 1, + UINT16 mipLevels = 0, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + UINT64 alignment = 0 ) noexcept + { + return CD3DX12_RESOURCE_DESC1( D3D12_RESOURCE_DIMENSION_TEXTURE1D, alignment, width, 1, arraySize, + mipLevels, format, 1, 0, layout, flags, 0, 0, 0 ); + } + static inline CD3DX12_RESOURCE_DESC1 Tex2D( + DXGI_FORMAT format, + UINT64 width, + UINT height, + UINT16 arraySize = 1, + UINT16 mipLevels = 0, + UINT sampleCount = 1, + UINT sampleQuality = 0, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + UINT64 alignment = 0, + UINT samplerFeedbackMipRegionWidth = 0, + UINT samplerFeedbackMipRegionHeight = 0, + UINT samplerFeedbackMipRegionDepth = 0) noexcept + { + return CD3DX12_RESOURCE_DESC1( D3D12_RESOURCE_DIMENSION_TEXTURE2D, alignment, width, height, arraySize, + mipLevels, format, sampleCount, sampleQuality, layout, flags, samplerFeedbackMipRegionWidth, + samplerFeedbackMipRegionHeight, samplerFeedbackMipRegionDepth ); + } + static inline CD3DX12_RESOURCE_DESC1 Tex3D( + DXGI_FORMAT format, + UINT64 width, + UINT height, + UINT16 depth, + UINT16 mipLevels = 0, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + UINT64 alignment = 0 ) noexcept + { + return CD3DX12_RESOURCE_DESC1( D3D12_RESOURCE_DIMENSION_TEXTURE3D, alignment, width, height, depth, + mipLevels, format, 1, 0, layout, flags, 0, 0, 0 ); + } + inline UINT16 Depth() const noexcept + { return (Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? DepthOrArraySize : 1); } + inline UINT16 ArraySize() const noexcept + { return (Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D ? DepthOrArraySize : 1); } + inline UINT8 PlaneCount(_In_ ID3D12Device* pDevice) const noexcept + { return D3D12GetFormatPlaneCount(pDevice, Format); } + inline UINT Subresources(_In_ ID3D12Device* pDevice) const noexcept + { return MipLevels * ArraySize() * PlaneCount(pDevice); } + inline UINT CalcSubresource(UINT MipSlice, UINT ArraySlice, UINT PlaneSlice) noexcept + { return D3D12CalcSubresource(MipSlice, ArraySlice, PlaneSlice, MipLevels, ArraySize()); } +}; +inline bool operator==( const D3D12_RESOURCE_DESC1& l, const D3D12_RESOURCE_DESC1& r ) noexcept +{ + return l.Dimension == r.Dimension && + l.Alignment == r.Alignment && + l.Width == r.Width && + l.Height == r.Height && + l.DepthOrArraySize == r.DepthOrArraySize && + l.MipLevels == r.MipLevels && + l.Format == r.Format && + l.SampleDesc.Count == r.SampleDesc.Count && + l.SampleDesc.Quality == r.SampleDesc.Quality && + l.Layout == r.Layout && + l.Flags == r.Flags && + l.SamplerFeedbackMipRegion.Width == r.SamplerFeedbackMipRegion.Width && + l.SamplerFeedbackMipRegion.Height == r.SamplerFeedbackMipRegion.Height && + l.SamplerFeedbackMipRegion.Depth == r.SamplerFeedbackMipRegion.Depth; +} +inline bool operator!=( const D3D12_RESOURCE_DESC1& l, const D3D12_RESOURCE_DESC1& r ) noexcept +{ return !( l == r ); } + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_VIEW_INSTANCING_DESC : public D3D12_VIEW_INSTANCING_DESC +{ + CD3DX12_VIEW_INSTANCING_DESC() = default; + explicit CD3DX12_VIEW_INSTANCING_DESC( const D3D12_VIEW_INSTANCING_DESC& o ) noexcept : + D3D12_VIEW_INSTANCING_DESC( o ) + {} + explicit CD3DX12_VIEW_INSTANCING_DESC( CD3DX12_DEFAULT ) noexcept + { + ViewInstanceCount = 0; + pViewInstanceLocations = nullptr; + Flags = D3D12_VIEW_INSTANCING_FLAG_NONE; + } + explicit CD3DX12_VIEW_INSTANCING_DESC( + UINT InViewInstanceCount, + const D3D12_VIEW_INSTANCE_LOCATION* InViewInstanceLocations, + D3D12_VIEW_INSTANCING_FLAGS InFlags) noexcept + { + ViewInstanceCount = InViewInstanceCount; + pViewInstanceLocations = InViewInstanceLocations; + Flags = InFlags; + } +}; + +//------------------------------------------------------------------------------------------------ +// Row-by-row memcpy +inline void MemcpySubresource( + _In_ const D3D12_MEMCPY_DEST* pDest, + _In_ const D3D12_SUBRESOURCE_DATA* pSrc, + SIZE_T RowSizeInBytes, + UINT NumRows, + UINT NumSlices) noexcept +{ + for (UINT z = 0; z < NumSlices; ++z) + { + auto pDestSlice = static_cast(pDest->pData) + pDest->SlicePitch * z; + auto pSrcSlice = static_cast(pSrc->pData) + pSrc->SlicePitch * LONG_PTR(z); + for (UINT y = 0; y < NumRows; ++y) + { + memcpy(pDestSlice + pDest->RowPitch * y, + pSrcSlice + pSrc->RowPitch * LONG_PTR(y), + RowSizeInBytes); + } + } +} + +//------------------------------------------------------------------------------------------------ +// Returns required size of a buffer to be used for data upload +inline UINT64 GetRequiredIntermediateSize( + _In_ ID3D12Resource* pDestinationResource, + _In_range_(0,D3D12_REQ_SUBRESOURCES) UINT FirstSubresource, + _In_range_(0,D3D12_REQ_SUBRESOURCES-FirstSubresource) UINT NumSubresources) noexcept +{ + auto Desc = pDestinationResource->GetDesc(); + UINT64 RequiredSize = 0; + + ID3D12Device* pDevice = nullptr; + pDestinationResource->GetDevice(IID_ID3D12Device, reinterpret_cast(&pDevice)); + pDevice->GetCopyableFootprints(&Desc, FirstSubresource, NumSubresources, 0, nullptr, nullptr, nullptr, &RequiredSize); + pDevice->Release(); + + return RequiredSize; +} + +//------------------------------------------------------------------------------------------------ +// All arrays must be populated (e.g. by calling GetCopyableFootprints) +inline UINT64 UpdateSubresources( + _In_ ID3D12GraphicsCommandList* pCmdList, + _In_ ID3D12Resource* pDestinationResource, + _In_ ID3D12Resource* pIntermediate, + _In_range_(0,D3D12_REQ_SUBRESOURCES) UINT FirstSubresource, + _In_range_(0,D3D12_REQ_SUBRESOURCES-FirstSubresource) UINT NumSubresources, + UINT64 RequiredSize, + _In_reads_(NumSubresources) const D3D12_PLACED_SUBRESOURCE_FOOTPRINT* pLayouts, + _In_reads_(NumSubresources) const UINT* pNumRows, + _In_reads_(NumSubresources) const UINT64* pRowSizesInBytes, + _In_reads_(NumSubresources) const D3D12_SUBRESOURCE_DATA* pSrcData) noexcept +{ + // Minor validation + auto IntermediateDesc = pIntermediate->GetDesc(); + auto DestinationDesc = pDestinationResource->GetDesc(); + if (IntermediateDesc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER || + IntermediateDesc.Width < RequiredSize + pLayouts[0].Offset || + RequiredSize > SIZE_T(-1) || + (DestinationDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && + (FirstSubresource != 0 || NumSubresources != 1))) + { + return 0; + } + + BYTE* pData; + HRESULT hr = pIntermediate->Map(0, nullptr, reinterpret_cast(&pData)); + if (FAILED(hr)) + { + return 0; + } + + for (UINT i = 0; i < NumSubresources; ++i) + { + if (pRowSizesInBytes[i] > SIZE_T(-1)) return 0; + D3D12_MEMCPY_DEST DestData = { pData + pLayouts[i].Offset, pLayouts[i].Footprint.RowPitch, SIZE_T(pLayouts[i].Footprint.RowPitch) * SIZE_T(pNumRows[i]) }; + MemcpySubresource(&DestData, &pSrcData[i], static_cast(pRowSizesInBytes[i]), pNumRows[i], pLayouts[i].Footprint.Depth); + } + pIntermediate->Unmap(0, nullptr); + + if (DestinationDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + { + pCmdList->CopyBufferRegion( + pDestinationResource, 0, pIntermediate, pLayouts[0].Offset, pLayouts[0].Footprint.Width); + } + else + { + for (UINT i = 0; i < NumSubresources; ++i) + { + CD3DX12_TEXTURE_COPY_LOCATION Dst(pDestinationResource, i + FirstSubresource); + CD3DX12_TEXTURE_COPY_LOCATION Src(pIntermediate, pLayouts[i]); + pCmdList->CopyTextureRegion(&Dst, 0, 0, 0, &Src, nullptr); + } + } + return RequiredSize; +} + +//------------------------------------------------------------------------------------------------ +// Heap-allocating UpdateSubresources implementation +inline UINT64 UpdateSubresources( + _In_ ID3D12GraphicsCommandList* pCmdList, + _In_ ID3D12Resource* pDestinationResource, + _In_ ID3D12Resource* pIntermediate, + UINT64 IntermediateOffset, + _In_range_(0,D3D12_REQ_SUBRESOURCES) UINT FirstSubresource, + _In_range_(0,D3D12_REQ_SUBRESOURCES-FirstSubresource) UINT NumSubresources, + _In_reads_(NumSubresources) const D3D12_SUBRESOURCE_DATA* pSrcData) noexcept +{ + UINT64 RequiredSize = 0; + UINT64 MemToAlloc = static_cast(sizeof(D3D12_PLACED_SUBRESOURCE_FOOTPRINT) + sizeof(UINT) + sizeof(UINT64)) * NumSubresources; + if (MemToAlloc > SIZE_MAX) + { + return 0; + } + void* pMem = HeapAlloc(GetProcessHeap(), 0, static_cast(MemToAlloc)); + if (pMem == nullptr) + { + return 0; + } + auto pLayouts = static_cast(pMem); + UINT64* pRowSizesInBytes = reinterpret_cast(pLayouts + NumSubresources); + UINT* pNumRows = reinterpret_cast(pRowSizesInBytes + NumSubresources); + + auto Desc = pDestinationResource->GetDesc(); + ID3D12Device* pDevice = nullptr; + pDestinationResource->GetDevice(IID_ID3D12Device, reinterpret_cast(&pDevice)); + pDevice->GetCopyableFootprints(&Desc, FirstSubresource, NumSubresources, IntermediateOffset, pLayouts, pNumRows, pRowSizesInBytes, &RequiredSize); + pDevice->Release(); + + UINT64 Result = UpdateSubresources(pCmdList, pDestinationResource, pIntermediate, FirstSubresource, NumSubresources, RequiredSize, pLayouts, pNumRows, pRowSizesInBytes, pSrcData); + HeapFree(GetProcessHeap(), 0, pMem); + return Result; +} + +//------------------------------------------------------------------------------------------------ +// Stack-allocating UpdateSubresources implementation +template +inline UINT64 UpdateSubresources( + _In_ ID3D12GraphicsCommandList* pCmdList, + _In_ ID3D12Resource* pDestinationResource, + _In_ ID3D12Resource* pIntermediate, + UINT64 IntermediateOffset, + _In_range_(0, MaxSubresources) UINT FirstSubresource, + _In_range_(1, MaxSubresources - FirstSubresource) UINT NumSubresources, + _In_reads_(NumSubresources) const D3D12_SUBRESOURCE_DATA* pSrcData) noexcept +{ + UINT64 RequiredSize = 0; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT Layouts[MaxSubresources]; + UINT NumRows[MaxSubresources]; + UINT64 RowSizesInBytes[MaxSubresources]; + + auto Desc = pDestinationResource->GetDesc(); + ID3D12Device* pDevice = nullptr; + pDestinationResource->GetDevice(IID_ID3D12Device, reinterpret_cast(&pDevice)); + pDevice->GetCopyableFootprints(&Desc, FirstSubresource, NumSubresources, IntermediateOffset, Layouts, NumRows, RowSizesInBytes, &RequiredSize); + pDevice->Release(); + + return UpdateSubresources(pCmdList, pDestinationResource, pIntermediate, FirstSubresource, NumSubresources, RequiredSize, Layouts, NumRows, RowSizesInBytes, pSrcData); +} + +//------------------------------------------------------------------------------------------------ +inline constexpr bool D3D12IsLayoutOpaque( D3D12_TEXTURE_LAYOUT Layout ) noexcept +{ return Layout == D3D12_TEXTURE_LAYOUT_UNKNOWN || Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; } + +//------------------------------------------------------------------------------------------------ +template +inline ID3D12CommandList * const * CommandListCast(t_CommandListType * const * pp) noexcept +{ + // This cast is useful for passing strongly typed command list pointers into + // ExecuteCommandLists. + // This cast is valid as long as the const-ness is respected. D3D12 APIs do + // respect the const-ness of their arguments. + return reinterpret_cast(pp); +} + +//------------------------------------------------------------------------------------------------ +// D3D12 exports a new method for serializing root signatures in the Windows 10 Anniversary Update. +// To help enable root signature 1.1 features when they are available and not require maintaining +// two code paths for building root signatures, this helper method reconstructs a 1.0 signature when +// 1.1 is not supported. +inline HRESULT D3DX12SerializeVersionedRootSignature( + _In_ const D3D12_VERSIONED_ROOT_SIGNATURE_DESC* pRootSignatureDesc, + D3D_ROOT_SIGNATURE_VERSION MaxVersion, + _Outptr_ ID3DBlob** ppBlob, + _Always_(_Outptr_opt_result_maybenull_) ID3DBlob** ppErrorBlob) noexcept +{ + if (ppErrorBlob != nullptr) + { + *ppErrorBlob = nullptr; + } + + switch (MaxVersion) + { + case D3D_ROOT_SIGNATURE_VERSION_1_0: + switch (pRootSignatureDesc->Version) + { + case D3D_ROOT_SIGNATURE_VERSION_1_0: + return D3D12SerializeRootSignature(&pRootSignatureDesc->Desc_1_0, D3D_ROOT_SIGNATURE_VERSION_1, ppBlob, ppErrorBlob); + + case D3D_ROOT_SIGNATURE_VERSION_1_1: + { + HRESULT hr = S_OK; + const D3D12_ROOT_SIGNATURE_DESC1& desc_1_1 = pRootSignatureDesc->Desc_1_1; + + const SIZE_T ParametersSize = sizeof(D3D12_ROOT_PARAMETER) * desc_1_1.NumParameters; + void* pParameters = (ParametersSize > 0) ? HeapAlloc(GetProcessHeap(), 0, ParametersSize) : nullptr; + if (ParametersSize > 0 && pParameters == nullptr) + { + hr = E_OUTOFMEMORY; + } + auto pParameters_1_0 = static_cast(pParameters); + + if (SUCCEEDED(hr)) + { + for (UINT n = 0; n < desc_1_1.NumParameters; n++) + { + __analysis_assume(ParametersSize == sizeof(D3D12_ROOT_PARAMETER) * desc_1_1.NumParameters); + pParameters_1_0[n].ParameterType = desc_1_1.pParameters[n].ParameterType; + pParameters_1_0[n].ShaderVisibility = desc_1_1.pParameters[n].ShaderVisibility; + + switch (desc_1_1.pParameters[n].ParameterType) + { + case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: + pParameters_1_0[n].Constants.Num32BitValues = desc_1_1.pParameters[n].Constants.Num32BitValues; + pParameters_1_0[n].Constants.RegisterSpace = desc_1_1.pParameters[n].Constants.RegisterSpace; + pParameters_1_0[n].Constants.ShaderRegister = desc_1_1.pParameters[n].Constants.ShaderRegister; + break; + + case D3D12_ROOT_PARAMETER_TYPE_CBV: + case D3D12_ROOT_PARAMETER_TYPE_SRV: + case D3D12_ROOT_PARAMETER_TYPE_UAV: + pParameters_1_0[n].Descriptor.RegisterSpace = desc_1_1.pParameters[n].Descriptor.RegisterSpace; + pParameters_1_0[n].Descriptor.ShaderRegister = desc_1_1.pParameters[n].Descriptor.ShaderRegister; + break; + + case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: + const D3D12_ROOT_DESCRIPTOR_TABLE1& table_1_1 = desc_1_1.pParameters[n].DescriptorTable; + + const SIZE_T DescriptorRangesSize = sizeof(D3D12_DESCRIPTOR_RANGE) * table_1_1.NumDescriptorRanges; + void* pDescriptorRanges = (DescriptorRangesSize > 0 && SUCCEEDED(hr)) ? HeapAlloc(GetProcessHeap(), 0, DescriptorRangesSize) : nullptr; + if (DescriptorRangesSize > 0 && pDescriptorRanges == nullptr) + { + hr = E_OUTOFMEMORY; + } + auto pDescriptorRanges_1_0 = static_cast(pDescriptorRanges); + + if (SUCCEEDED(hr)) + { + for (UINT x = 0; x < table_1_1.NumDescriptorRanges; x++) + { + __analysis_assume(DescriptorRangesSize == sizeof(D3D12_DESCRIPTOR_RANGE) * table_1_1.NumDescriptorRanges); + pDescriptorRanges_1_0[x].BaseShaderRegister = table_1_1.pDescriptorRanges[x].BaseShaderRegister; + pDescriptorRanges_1_0[x].NumDescriptors = table_1_1.pDescriptorRanges[x].NumDescriptors; + pDescriptorRanges_1_0[x].OffsetInDescriptorsFromTableStart = table_1_1.pDescriptorRanges[x].OffsetInDescriptorsFromTableStart; + pDescriptorRanges_1_0[x].RangeType = table_1_1.pDescriptorRanges[x].RangeType; + pDescriptorRanges_1_0[x].RegisterSpace = table_1_1.pDescriptorRanges[x].RegisterSpace; + } + } + + D3D12_ROOT_DESCRIPTOR_TABLE& table_1_0 = pParameters_1_0[n].DescriptorTable; + table_1_0.NumDescriptorRanges = table_1_1.NumDescriptorRanges; + table_1_0.pDescriptorRanges = pDescriptorRanges_1_0; + } + } + } + + if (SUCCEEDED(hr)) + { + CD3DX12_ROOT_SIGNATURE_DESC desc_1_0(desc_1_1.NumParameters, pParameters_1_0, desc_1_1.NumStaticSamplers, desc_1_1.pStaticSamplers, desc_1_1.Flags); + hr = D3D12SerializeRootSignature(&desc_1_0, D3D_ROOT_SIGNATURE_VERSION_1, ppBlob, ppErrorBlob); + } + + if (pParameters) + { + for (UINT n = 0; n < desc_1_1.NumParameters; n++) + { + if (desc_1_1.pParameters[n].ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) + { + HeapFree(GetProcessHeap(), 0, reinterpret_cast(const_cast(pParameters_1_0[n].DescriptorTable.pDescriptorRanges))); + } + } + HeapFree(GetProcessHeap(), 0, pParameters); + } + return hr; + } + } + break; + + case D3D_ROOT_SIGNATURE_VERSION_1_1: + return D3D12SerializeVersionedRootSignature(pRootSignatureDesc, ppBlob, ppErrorBlob); + } + + return E_INVALIDARG; +} + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RT_FORMAT_ARRAY : public D3D12_RT_FORMAT_ARRAY +{ + CD3DX12_RT_FORMAT_ARRAY() = default; + explicit CD3DX12_RT_FORMAT_ARRAY(const D3D12_RT_FORMAT_ARRAY& o) noexcept + : D3D12_RT_FORMAT_ARRAY(o) + {} + explicit CD3DX12_RT_FORMAT_ARRAY(_In_reads_(NumFormats) const DXGI_FORMAT* pFormats, UINT NumFormats) noexcept + { + NumRenderTargets = NumFormats; + memcpy(RTFormats, pFormats, sizeof(RTFormats)); + // assumes ARRAY_SIZE(pFormats) == ARRAY_SIZE(RTFormats) + } +}; + +//------------------------------------------------------------------------------------------------ +// Pipeline State Stream Helpers +//------------------------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------------------------ +// Stream Subobjects, i.e. elements of a stream + +struct DefaultSampleMask { operator UINT() noexcept { return UINT_MAX; } }; +struct DefaultSampleDesc { operator DXGI_SAMPLE_DESC() noexcept { return DXGI_SAMPLE_DESC{1, 0}; } }; + +#pragma warning(push) +#pragma warning(disable : 4324) +template +class alignas(void*) CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT +{ +private: + D3D12_PIPELINE_STATE_SUBOBJECT_TYPE _Type; + InnerStructType _Inner; +public: + CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT() noexcept : _Type(Type), _Inner(DefaultArg()) {} + CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT(InnerStructType const& i) noexcept : _Type(Type), _Inner(i) {} + CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT& operator=(InnerStructType const& i) noexcept { _Type = Type; _Inner = i; return *this; } + operator InnerStructType const&() const noexcept { return _Inner; } + operator InnerStructType&() noexcept { return _Inner; } + InnerStructType* operator&() noexcept { return &_Inner; } + InnerStructType const* operator&() const noexcept { return &_Inner; } +}; +#pragma warning(pop) +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_PIPELINE_STATE_FLAGS, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_FLAGS> CD3DX12_PIPELINE_STATE_STREAM_FLAGS; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< UINT, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_NODE_MASK> CD3DX12_PIPELINE_STATE_STREAM_NODE_MASK; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< ID3D12RootSignature*, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ROOT_SIGNATURE> CD3DX12_PIPELINE_STATE_STREAM_ROOT_SIGNATURE; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_INPUT_LAYOUT_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_INPUT_LAYOUT> CD3DX12_PIPELINE_STATE_STREAM_INPUT_LAYOUT; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_INDEX_BUFFER_STRIP_CUT_VALUE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_IB_STRIP_CUT_VALUE> CD3DX12_PIPELINE_STATE_STREAM_IB_STRIP_CUT_VALUE; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_PRIMITIVE_TOPOLOGY_TYPE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PRIMITIVE_TOPOLOGY> CD3DX12_PIPELINE_STATE_STREAM_PRIMITIVE_TOPOLOGY; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VS> CD3DX12_PIPELINE_STATE_STREAM_VS; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_GS> CD3DX12_PIPELINE_STATE_STREAM_GS; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_STREAM_OUTPUT_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_STREAM_OUTPUT> CD3DX12_PIPELINE_STATE_STREAM_STREAM_OUTPUT; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_HS> CD3DX12_PIPELINE_STATE_STREAM_HS; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DS> CD3DX12_PIPELINE_STATE_STREAM_DS; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PS> CD3DX12_PIPELINE_STATE_STREAM_PS; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_AS> CD3DX12_PIPELINE_STATE_STREAM_AS; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_MS> CD3DX12_PIPELINE_STATE_STREAM_MS; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CS> CD3DX12_PIPELINE_STATE_STREAM_CS; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< CD3DX12_BLEND_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_BLEND, CD3DX12_DEFAULT> CD3DX12_PIPELINE_STATE_STREAM_BLEND_DESC; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< CD3DX12_DEPTH_STENCIL_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL, CD3DX12_DEFAULT> CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< CD3DX12_DEPTH_STENCIL_DESC1, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL1, CD3DX12_DEFAULT> CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL1; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< DXGI_FORMAT, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL_FORMAT> CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL_FORMAT; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< CD3DX12_RASTERIZER_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER, CD3DX12_DEFAULT> CD3DX12_PIPELINE_STATE_STREAM_RASTERIZER; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_RT_FORMAT_ARRAY, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RENDER_TARGET_FORMATS> CD3DX12_PIPELINE_STATE_STREAM_RENDER_TARGET_FORMATS; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< DXGI_SAMPLE_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_DESC, DefaultSampleDesc> CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_DESC; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< UINT, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_MASK, DefaultSampleMask> CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_MASK; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_CACHED_PIPELINE_STATE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CACHED_PSO> CD3DX12_PIPELINE_STATE_STREAM_CACHED_PSO; +typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< CD3DX12_VIEW_INSTANCING_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VIEW_INSTANCING, CD3DX12_DEFAULT> CD3DX12_PIPELINE_STATE_STREAM_VIEW_INSTANCING; + +//------------------------------------------------------------------------------------------------ +// Stream Parser Helpers + +struct ID3DX12PipelineParserCallbacks +{ + // Subobject Callbacks + virtual void FlagsCb(D3D12_PIPELINE_STATE_FLAGS) {} + virtual void NodeMaskCb(UINT) {} + virtual void RootSignatureCb(ID3D12RootSignature*) {} + virtual void InputLayoutCb(const D3D12_INPUT_LAYOUT_DESC&) {} + virtual void IBStripCutValueCb(D3D12_INDEX_BUFFER_STRIP_CUT_VALUE) {} + virtual void PrimitiveTopologyTypeCb(D3D12_PRIMITIVE_TOPOLOGY_TYPE) {} + virtual void VSCb(const D3D12_SHADER_BYTECODE&) {} + virtual void GSCb(const D3D12_SHADER_BYTECODE&) {} + virtual void StreamOutputCb(const D3D12_STREAM_OUTPUT_DESC&) {} + virtual void HSCb(const D3D12_SHADER_BYTECODE&) {} + virtual void DSCb(const D3D12_SHADER_BYTECODE&) {} + virtual void PSCb(const D3D12_SHADER_BYTECODE&) {} + virtual void CSCb(const D3D12_SHADER_BYTECODE&) {} + virtual void ASCb(const D3D12_SHADER_BYTECODE&) {} + virtual void MSCb(const D3D12_SHADER_BYTECODE&) {} + virtual void BlendStateCb(const D3D12_BLEND_DESC&) {} + virtual void DepthStencilStateCb(const D3D12_DEPTH_STENCIL_DESC&) {} + virtual void DepthStencilState1Cb(const D3D12_DEPTH_STENCIL_DESC1&) {} + virtual void DSVFormatCb(DXGI_FORMAT) {} + virtual void RasterizerStateCb(const D3D12_RASTERIZER_DESC&) {} + virtual void RTVFormatsCb(const D3D12_RT_FORMAT_ARRAY&) {} + virtual void SampleDescCb(const DXGI_SAMPLE_DESC&) {} + virtual void SampleMaskCb(UINT) {} + virtual void ViewInstancingCb(const D3D12_VIEW_INSTANCING_DESC&) {} + virtual void CachedPSOCb(const D3D12_CACHED_PIPELINE_STATE&) {} + + // Error Callbacks + virtual void ErrorBadInputParameter(UINT /*ParameterIndex*/) {} + virtual void ErrorDuplicateSubobject(D3D12_PIPELINE_STATE_SUBOBJECT_TYPE /*DuplicateType*/) {} + virtual void ErrorUnknownSubobject(UINT /*UnknownTypeValue*/) {} + + virtual ~ID3DX12PipelineParserCallbacks() = default; +}; + +struct D3DX12_MESH_SHADER_PIPELINE_STATE_DESC +{ + ID3D12RootSignature* pRootSignature; + D3D12_SHADER_BYTECODE AS; + D3D12_SHADER_BYTECODE MS; + D3D12_SHADER_BYTECODE PS; + D3D12_BLEND_DESC BlendState; + UINT SampleMask; + D3D12_RASTERIZER_DESC RasterizerState; + D3D12_DEPTH_STENCIL_DESC DepthStencilState; + D3D12_PRIMITIVE_TOPOLOGY_TYPE PrimitiveTopologyType; + UINT NumRenderTargets; + DXGI_FORMAT RTVFormats[ D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT ]; + DXGI_FORMAT DSVFormat; + DXGI_SAMPLE_DESC SampleDesc; + UINT NodeMask; + D3D12_CACHED_PIPELINE_STATE CachedPSO; + D3D12_PIPELINE_STATE_FLAGS Flags; +}; + +// CD3DX12_PIPELINE_STATE_STREAM2 Works on OS Build 19041+ (where there is a new mesh shader pipeline). +// Use CD3DX12_PIPELINE_STATE_STREAM1 for OS Build 16299+ (where there is a new view instancing subobject). +// Use CD3DX12_PIPELINE_STATE_STREAM for OS Build 15063+ support. +struct CD3DX12_PIPELINE_STATE_STREAM2 +{ + CD3DX12_PIPELINE_STATE_STREAM2() = default; + // Mesh and amplification shaders must be set manually, since they do not have representation in D3D12_GRAPHICS_PIPELINE_STATE_DESC + CD3DX12_PIPELINE_STATE_STREAM2(const D3D12_GRAPHICS_PIPELINE_STATE_DESC& Desc) noexcept + : Flags(Desc.Flags) + , NodeMask(Desc.NodeMask) + , pRootSignature(Desc.pRootSignature) + , InputLayout(Desc.InputLayout) + , IBStripCutValue(Desc.IBStripCutValue) + , PrimitiveTopologyType(Desc.PrimitiveTopologyType) + , VS(Desc.VS) + , GS(Desc.GS) + , StreamOutput(Desc.StreamOutput) + , HS(Desc.HS) + , DS(Desc.DS) + , PS(Desc.PS) + , BlendState(CD3DX12_BLEND_DESC(Desc.BlendState)) + , DepthStencilState(CD3DX12_DEPTH_STENCIL_DESC1(Desc.DepthStencilState)) + , DSVFormat(Desc.DSVFormat) + , RasterizerState(CD3DX12_RASTERIZER_DESC(Desc.RasterizerState)) + , RTVFormats(CD3DX12_RT_FORMAT_ARRAY(Desc.RTVFormats, Desc.NumRenderTargets)) + , SampleDesc(Desc.SampleDesc) + , SampleMask(Desc.SampleMask) + , CachedPSO(Desc.CachedPSO) + , ViewInstancingDesc(CD3DX12_VIEW_INSTANCING_DESC(CD3DX12_DEFAULT())) + {} + CD3DX12_PIPELINE_STATE_STREAM2(const D3DX12_MESH_SHADER_PIPELINE_STATE_DESC& Desc) noexcept + : Flags(Desc.Flags) + , NodeMask(Desc.NodeMask) + , pRootSignature(Desc.pRootSignature) + , PrimitiveTopologyType(Desc.PrimitiveTopologyType) + , PS(Desc.PS) + , AS(Desc.AS) + , MS(Desc.MS) + , BlendState(CD3DX12_BLEND_DESC(Desc.BlendState)) + , DepthStencilState(CD3DX12_DEPTH_STENCIL_DESC1(Desc.DepthStencilState)) + , DSVFormat(Desc.DSVFormat) + , RasterizerState(CD3DX12_RASTERIZER_DESC(Desc.RasterizerState)) + , RTVFormats(CD3DX12_RT_FORMAT_ARRAY(Desc.RTVFormats, Desc.NumRenderTargets)) + , SampleDesc(Desc.SampleDesc) + , SampleMask(Desc.SampleMask) + , CachedPSO(Desc.CachedPSO) + , ViewInstancingDesc(CD3DX12_VIEW_INSTANCING_DESC(CD3DX12_DEFAULT())) + {} + CD3DX12_PIPELINE_STATE_STREAM2(const D3D12_COMPUTE_PIPELINE_STATE_DESC& Desc) noexcept + : Flags(Desc.Flags) + , NodeMask(Desc.NodeMask) + , pRootSignature(Desc.pRootSignature) + , CS(CD3DX12_SHADER_BYTECODE(Desc.CS)) + , CachedPSO(Desc.CachedPSO) + { + static_cast(DepthStencilState).DepthEnable = false; + } + CD3DX12_PIPELINE_STATE_STREAM_FLAGS Flags; + CD3DX12_PIPELINE_STATE_STREAM_NODE_MASK NodeMask; + CD3DX12_PIPELINE_STATE_STREAM_ROOT_SIGNATURE pRootSignature; + CD3DX12_PIPELINE_STATE_STREAM_INPUT_LAYOUT InputLayout; + CD3DX12_PIPELINE_STATE_STREAM_IB_STRIP_CUT_VALUE IBStripCutValue; + CD3DX12_PIPELINE_STATE_STREAM_PRIMITIVE_TOPOLOGY PrimitiveTopologyType; + CD3DX12_PIPELINE_STATE_STREAM_VS VS; + CD3DX12_PIPELINE_STATE_STREAM_GS GS; + CD3DX12_PIPELINE_STATE_STREAM_STREAM_OUTPUT StreamOutput; + CD3DX12_PIPELINE_STATE_STREAM_HS HS; + CD3DX12_PIPELINE_STATE_STREAM_DS DS; + CD3DX12_PIPELINE_STATE_STREAM_PS PS; + CD3DX12_PIPELINE_STATE_STREAM_AS AS; + CD3DX12_PIPELINE_STATE_STREAM_MS MS; + CD3DX12_PIPELINE_STATE_STREAM_CS CS; + CD3DX12_PIPELINE_STATE_STREAM_BLEND_DESC BlendState; + CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL1 DepthStencilState; + CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL_FORMAT DSVFormat; + CD3DX12_PIPELINE_STATE_STREAM_RASTERIZER RasterizerState; + CD3DX12_PIPELINE_STATE_STREAM_RENDER_TARGET_FORMATS RTVFormats; + CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_DESC SampleDesc; + CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_MASK SampleMask; + CD3DX12_PIPELINE_STATE_STREAM_CACHED_PSO CachedPSO; + CD3DX12_PIPELINE_STATE_STREAM_VIEW_INSTANCING ViewInstancingDesc; + D3D12_GRAPHICS_PIPELINE_STATE_DESC GraphicsDescV0() const noexcept + { + D3D12_GRAPHICS_PIPELINE_STATE_DESC D; + D.Flags = this->Flags; + D.NodeMask = this->NodeMask; + D.pRootSignature = this->pRootSignature; + D.InputLayout = this->InputLayout; + D.IBStripCutValue = this->IBStripCutValue; + D.PrimitiveTopologyType = this->PrimitiveTopologyType; + D.VS = this->VS; + D.GS = this->GS; + D.StreamOutput = this->StreamOutput; + D.HS = this->HS; + D.DS = this->DS; + D.PS = this->PS; + D.BlendState = this->BlendState; + D.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC1(D3D12_DEPTH_STENCIL_DESC1(this->DepthStencilState)); + D.DSVFormat = this->DSVFormat; + D.RasterizerState = this->RasterizerState; + D.NumRenderTargets = D3D12_RT_FORMAT_ARRAY(this->RTVFormats).NumRenderTargets; + memcpy(D.RTVFormats, D3D12_RT_FORMAT_ARRAY(this->RTVFormats).RTFormats, sizeof(D.RTVFormats)); + D.SampleDesc = this->SampleDesc; + D.SampleMask = this->SampleMask; + D.CachedPSO = this->CachedPSO; + return D; + } + D3D12_COMPUTE_PIPELINE_STATE_DESC ComputeDescV0() const noexcept + { + D3D12_COMPUTE_PIPELINE_STATE_DESC D; + D.Flags = this->Flags; + D.NodeMask = this->NodeMask; + D.pRootSignature = this->pRootSignature; + D.CS = this->CS; + D.CachedPSO = this->CachedPSO; + return D; + } +}; + +// CD3DX12_PIPELINE_STATE_STREAM1 Works on OS Build 16299+ (where there is a new view instancing subobject). +// Use CD3DX12_PIPELINE_STATE_STREAM for OS Build 15063+ support. +struct CD3DX12_PIPELINE_STATE_STREAM1 +{ + CD3DX12_PIPELINE_STATE_STREAM1() = default; + // Mesh and amplification shaders must be set manually, since they do not have representation in D3D12_GRAPHICS_PIPELINE_STATE_DESC + CD3DX12_PIPELINE_STATE_STREAM1(const D3D12_GRAPHICS_PIPELINE_STATE_DESC& Desc) noexcept + : Flags(Desc.Flags) + , NodeMask(Desc.NodeMask) + , pRootSignature(Desc.pRootSignature) + , InputLayout(Desc.InputLayout) + , IBStripCutValue(Desc.IBStripCutValue) + , PrimitiveTopologyType(Desc.PrimitiveTopologyType) + , VS(Desc.VS) + , GS(Desc.GS) + , StreamOutput(Desc.StreamOutput) + , HS(Desc.HS) + , DS(Desc.DS) + , PS(Desc.PS) + , BlendState(CD3DX12_BLEND_DESC(Desc.BlendState)) + , DepthStencilState(CD3DX12_DEPTH_STENCIL_DESC1(Desc.DepthStencilState)) + , DSVFormat(Desc.DSVFormat) + , RasterizerState(CD3DX12_RASTERIZER_DESC(Desc.RasterizerState)) + , RTVFormats(CD3DX12_RT_FORMAT_ARRAY(Desc.RTVFormats, Desc.NumRenderTargets)) + , SampleDesc(Desc.SampleDesc) + , SampleMask(Desc.SampleMask) + , CachedPSO(Desc.CachedPSO) + , ViewInstancingDesc(CD3DX12_VIEW_INSTANCING_DESC(CD3DX12_DEFAULT())) + {} + CD3DX12_PIPELINE_STATE_STREAM1(const D3DX12_MESH_SHADER_PIPELINE_STATE_DESC& Desc) noexcept + : Flags(Desc.Flags) + , NodeMask(Desc.NodeMask) + , pRootSignature(Desc.pRootSignature) + , PrimitiveTopologyType(Desc.PrimitiveTopologyType) + , PS(Desc.PS) + , BlendState(CD3DX12_BLEND_DESC(Desc.BlendState)) + , DepthStencilState(CD3DX12_DEPTH_STENCIL_DESC1(Desc.DepthStencilState)) + , DSVFormat(Desc.DSVFormat) + , RasterizerState(CD3DX12_RASTERIZER_DESC(Desc.RasterizerState)) + , RTVFormats(CD3DX12_RT_FORMAT_ARRAY(Desc.RTVFormats, Desc.NumRenderTargets)) + , SampleDesc(Desc.SampleDesc) + , SampleMask(Desc.SampleMask) + , CachedPSO(Desc.CachedPSO) + , ViewInstancingDesc(CD3DX12_VIEW_INSTANCING_DESC(CD3DX12_DEFAULT())) + {} + CD3DX12_PIPELINE_STATE_STREAM1(const D3D12_COMPUTE_PIPELINE_STATE_DESC& Desc) noexcept + : Flags(Desc.Flags) + , NodeMask(Desc.NodeMask) + , pRootSignature(Desc.pRootSignature) + , CS(CD3DX12_SHADER_BYTECODE(Desc.CS)) + , CachedPSO(Desc.CachedPSO) + { + static_cast(DepthStencilState).DepthEnable = false; + } + CD3DX12_PIPELINE_STATE_STREAM_FLAGS Flags; + CD3DX12_PIPELINE_STATE_STREAM_NODE_MASK NodeMask; + CD3DX12_PIPELINE_STATE_STREAM_ROOT_SIGNATURE pRootSignature; + CD3DX12_PIPELINE_STATE_STREAM_INPUT_LAYOUT InputLayout; + CD3DX12_PIPELINE_STATE_STREAM_IB_STRIP_CUT_VALUE IBStripCutValue; + CD3DX12_PIPELINE_STATE_STREAM_PRIMITIVE_TOPOLOGY PrimitiveTopologyType; + CD3DX12_PIPELINE_STATE_STREAM_VS VS; + CD3DX12_PIPELINE_STATE_STREAM_GS GS; + CD3DX12_PIPELINE_STATE_STREAM_STREAM_OUTPUT StreamOutput; + CD3DX12_PIPELINE_STATE_STREAM_HS HS; + CD3DX12_PIPELINE_STATE_STREAM_DS DS; + CD3DX12_PIPELINE_STATE_STREAM_PS PS; + CD3DX12_PIPELINE_STATE_STREAM_CS CS; + CD3DX12_PIPELINE_STATE_STREAM_BLEND_DESC BlendState; + CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL1 DepthStencilState; + CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL_FORMAT DSVFormat; + CD3DX12_PIPELINE_STATE_STREAM_RASTERIZER RasterizerState; + CD3DX12_PIPELINE_STATE_STREAM_RENDER_TARGET_FORMATS RTVFormats; + CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_DESC SampleDesc; + CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_MASK SampleMask; + CD3DX12_PIPELINE_STATE_STREAM_CACHED_PSO CachedPSO; + CD3DX12_PIPELINE_STATE_STREAM_VIEW_INSTANCING ViewInstancingDesc; + D3D12_GRAPHICS_PIPELINE_STATE_DESC GraphicsDescV0() const noexcept + { + D3D12_GRAPHICS_PIPELINE_STATE_DESC D; + D.Flags = this->Flags; + D.NodeMask = this->NodeMask; + D.pRootSignature = this->pRootSignature; + D.InputLayout = this->InputLayout; + D.IBStripCutValue = this->IBStripCutValue; + D.PrimitiveTopologyType = this->PrimitiveTopologyType; + D.VS = this->VS; + D.GS = this->GS; + D.StreamOutput = this->StreamOutput; + D.HS = this->HS; + D.DS = this->DS; + D.PS = this->PS; + D.BlendState = this->BlendState; + D.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC1(D3D12_DEPTH_STENCIL_DESC1(this->DepthStencilState)); + D.DSVFormat = this->DSVFormat; + D.RasterizerState = this->RasterizerState; + D.NumRenderTargets = D3D12_RT_FORMAT_ARRAY(this->RTVFormats).NumRenderTargets; + memcpy(D.RTVFormats, D3D12_RT_FORMAT_ARRAY(this->RTVFormats).RTFormats, sizeof(D.RTVFormats)); + D.SampleDesc = this->SampleDesc; + D.SampleMask = this->SampleMask; + D.CachedPSO = this->CachedPSO; + return D; + } + D3D12_COMPUTE_PIPELINE_STATE_DESC ComputeDescV0() const noexcept + { + D3D12_COMPUTE_PIPELINE_STATE_DESC D; + D.Flags = this->Flags; + D.NodeMask = this->NodeMask; + D.pRootSignature = this->pRootSignature; + D.CS = this->CS; + D.CachedPSO = this->CachedPSO; + return D; + } +}; + + +struct CD3DX12_PIPELINE_MESH_STATE_STREAM +{ + CD3DX12_PIPELINE_MESH_STATE_STREAM() = default; + CD3DX12_PIPELINE_MESH_STATE_STREAM(const D3DX12_MESH_SHADER_PIPELINE_STATE_DESC& Desc) noexcept + : Flags(Desc.Flags) + , NodeMask(Desc.NodeMask) + , pRootSignature(Desc.pRootSignature) + , PS(Desc.PS) + , AS(Desc.AS) + , MS(Desc.MS) + , BlendState(CD3DX12_BLEND_DESC(Desc.BlendState)) + , DepthStencilState(CD3DX12_DEPTH_STENCIL_DESC1(Desc.DepthStencilState)) + , DSVFormat(Desc.DSVFormat) + , RasterizerState(CD3DX12_RASTERIZER_DESC(Desc.RasterizerState)) + , RTVFormats(CD3DX12_RT_FORMAT_ARRAY(Desc.RTVFormats, Desc.NumRenderTargets)) + , SampleDesc(Desc.SampleDesc) + , SampleMask(Desc.SampleMask) + , CachedPSO(Desc.CachedPSO) + , ViewInstancingDesc(CD3DX12_VIEW_INSTANCING_DESC(CD3DX12_DEFAULT())) + {} + CD3DX12_PIPELINE_STATE_STREAM_FLAGS Flags; + CD3DX12_PIPELINE_STATE_STREAM_NODE_MASK NodeMask; + CD3DX12_PIPELINE_STATE_STREAM_ROOT_SIGNATURE pRootSignature; + CD3DX12_PIPELINE_STATE_STREAM_PS PS; + CD3DX12_PIPELINE_STATE_STREAM_AS AS; + CD3DX12_PIPELINE_STATE_STREAM_MS MS; + CD3DX12_PIPELINE_STATE_STREAM_BLEND_DESC BlendState; + CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL1 DepthStencilState; + CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL_FORMAT DSVFormat; + CD3DX12_PIPELINE_STATE_STREAM_RASTERIZER RasterizerState; + CD3DX12_PIPELINE_STATE_STREAM_RENDER_TARGET_FORMATS RTVFormats; + CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_DESC SampleDesc; + CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_MASK SampleMask; + CD3DX12_PIPELINE_STATE_STREAM_CACHED_PSO CachedPSO; + CD3DX12_PIPELINE_STATE_STREAM_VIEW_INSTANCING ViewInstancingDesc; + D3DX12_MESH_SHADER_PIPELINE_STATE_DESC MeshShaderDescV0() const noexcept + { + D3DX12_MESH_SHADER_PIPELINE_STATE_DESC D; + D.Flags = this->Flags; + D.NodeMask = this->NodeMask; + D.pRootSignature = this->pRootSignature; + D.PS = this->PS; + D.AS = this->AS; + D.MS = this->MS; + D.BlendState = this->BlendState; + D.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC1(D3D12_DEPTH_STENCIL_DESC1(this->DepthStencilState)); + D.DSVFormat = this->DSVFormat; + D.RasterizerState = this->RasterizerState; + D.NumRenderTargets = D3D12_RT_FORMAT_ARRAY(this->RTVFormats).NumRenderTargets; + memcpy(D.RTVFormats, D3D12_RT_FORMAT_ARRAY(this->RTVFormats).RTFormats, sizeof(D.RTVFormats)); + D.SampleDesc = this->SampleDesc; + D.SampleMask = this->SampleMask; + D.CachedPSO = this->CachedPSO; + return D; + } +}; + +// CD3DX12_PIPELINE_STATE_STREAM works on OS Build 15063+ but does not support new subobject(s) added in OS Build 16299+. +// See CD3DX12_PIPELINE_STATE_STREAM1 for instance. +struct CD3DX12_PIPELINE_STATE_STREAM +{ + CD3DX12_PIPELINE_STATE_STREAM() = default; + CD3DX12_PIPELINE_STATE_STREAM(const D3D12_GRAPHICS_PIPELINE_STATE_DESC& Desc) noexcept + : Flags(Desc.Flags) + , NodeMask(Desc.NodeMask) + , pRootSignature(Desc.pRootSignature) + , InputLayout(Desc.InputLayout) + , IBStripCutValue(Desc.IBStripCutValue) + , PrimitiveTopologyType(Desc.PrimitiveTopologyType) + , VS(Desc.VS) + , GS(Desc.GS) + , StreamOutput(Desc.StreamOutput) + , HS(Desc.HS) + , DS(Desc.DS) + , PS(Desc.PS) + , BlendState(CD3DX12_BLEND_DESC(Desc.BlendState)) + , DepthStencilState(CD3DX12_DEPTH_STENCIL_DESC1(Desc.DepthStencilState)) + , DSVFormat(Desc.DSVFormat) + , RasterizerState(CD3DX12_RASTERIZER_DESC(Desc.RasterizerState)) + , RTVFormats(CD3DX12_RT_FORMAT_ARRAY(Desc.RTVFormats, Desc.NumRenderTargets)) + , SampleDesc(Desc.SampleDesc) + , SampleMask(Desc.SampleMask) + , CachedPSO(Desc.CachedPSO) + {} + CD3DX12_PIPELINE_STATE_STREAM(const D3D12_COMPUTE_PIPELINE_STATE_DESC& Desc) noexcept + : Flags(Desc.Flags) + , NodeMask(Desc.NodeMask) + , pRootSignature(Desc.pRootSignature) + , CS(CD3DX12_SHADER_BYTECODE(Desc.CS)) + , CachedPSO(Desc.CachedPSO) + {} + CD3DX12_PIPELINE_STATE_STREAM_FLAGS Flags; + CD3DX12_PIPELINE_STATE_STREAM_NODE_MASK NodeMask; + CD3DX12_PIPELINE_STATE_STREAM_ROOT_SIGNATURE pRootSignature; + CD3DX12_PIPELINE_STATE_STREAM_INPUT_LAYOUT InputLayout; + CD3DX12_PIPELINE_STATE_STREAM_IB_STRIP_CUT_VALUE IBStripCutValue; + CD3DX12_PIPELINE_STATE_STREAM_PRIMITIVE_TOPOLOGY PrimitiveTopologyType; + CD3DX12_PIPELINE_STATE_STREAM_VS VS; + CD3DX12_PIPELINE_STATE_STREAM_GS GS; + CD3DX12_PIPELINE_STATE_STREAM_STREAM_OUTPUT StreamOutput; + CD3DX12_PIPELINE_STATE_STREAM_HS HS; + CD3DX12_PIPELINE_STATE_STREAM_DS DS; + CD3DX12_PIPELINE_STATE_STREAM_PS PS; + CD3DX12_PIPELINE_STATE_STREAM_CS CS; + CD3DX12_PIPELINE_STATE_STREAM_BLEND_DESC BlendState; + CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL1 DepthStencilState; + CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL_FORMAT DSVFormat; + CD3DX12_PIPELINE_STATE_STREAM_RASTERIZER RasterizerState; + CD3DX12_PIPELINE_STATE_STREAM_RENDER_TARGET_FORMATS RTVFormats; + CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_DESC SampleDesc; + CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_MASK SampleMask; + CD3DX12_PIPELINE_STATE_STREAM_CACHED_PSO CachedPSO; + D3D12_GRAPHICS_PIPELINE_STATE_DESC GraphicsDescV0() const noexcept + { + D3D12_GRAPHICS_PIPELINE_STATE_DESC D; + D.Flags = this->Flags; + D.NodeMask = this->NodeMask; + D.pRootSignature = this->pRootSignature; + D.InputLayout = this->InputLayout; + D.IBStripCutValue = this->IBStripCutValue; + D.PrimitiveTopologyType = this->PrimitiveTopologyType; + D.VS = this->VS; + D.GS = this->GS; + D.StreamOutput = this->StreamOutput; + D.HS = this->HS; + D.DS = this->DS; + D.PS = this->PS; + D.BlendState = this->BlendState; + D.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC1(D3D12_DEPTH_STENCIL_DESC1(this->DepthStencilState)); + D.DSVFormat = this->DSVFormat; + D.RasterizerState = this->RasterizerState; + D.NumRenderTargets = D3D12_RT_FORMAT_ARRAY(this->RTVFormats).NumRenderTargets; + memcpy(D.RTVFormats, D3D12_RT_FORMAT_ARRAY(this->RTVFormats).RTFormats, sizeof(D.RTVFormats)); + D.SampleDesc = this->SampleDesc; + D.SampleMask = this->SampleMask; + D.CachedPSO = this->CachedPSO; + return D; + } + D3D12_COMPUTE_PIPELINE_STATE_DESC ComputeDescV0() const noexcept + { + D3D12_COMPUTE_PIPELINE_STATE_DESC D; + D.Flags = this->Flags; + D.NodeMask = this->NodeMask; + D.pRootSignature = this->pRootSignature; + D.CS = this->CS; + D.CachedPSO = this->CachedPSO; + return D; + } +}; + +struct CD3DX12_PIPELINE_STATE_STREAM2_PARSE_HELPER : public ID3DX12PipelineParserCallbacks +{ + CD3DX12_PIPELINE_STATE_STREAM2 PipelineStream; + CD3DX12_PIPELINE_STATE_STREAM2_PARSE_HELPER() noexcept + : SeenDSS(false) + { + // Adjust defaults to account for absent members. + PipelineStream.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + + // Depth disabled if no DSV format specified. + static_cast(PipelineStream.DepthStencilState).DepthEnable = false; + } + + // ID3DX12PipelineParserCallbacks + void FlagsCb(D3D12_PIPELINE_STATE_FLAGS Flags) override {PipelineStream.Flags = Flags;} + void NodeMaskCb(UINT NodeMask) override {PipelineStream.NodeMask = NodeMask;} + void RootSignatureCb(ID3D12RootSignature* pRootSignature) override {PipelineStream.pRootSignature = pRootSignature;} + void InputLayoutCb(const D3D12_INPUT_LAYOUT_DESC& InputLayout) override {PipelineStream.InputLayout = InputLayout;} + void IBStripCutValueCb(D3D12_INDEX_BUFFER_STRIP_CUT_VALUE IBStripCutValue) override {PipelineStream.IBStripCutValue = IBStripCutValue;} + void PrimitiveTopologyTypeCb(D3D12_PRIMITIVE_TOPOLOGY_TYPE PrimitiveTopologyType) override {PipelineStream.PrimitiveTopologyType = PrimitiveTopologyType;} + void VSCb(const D3D12_SHADER_BYTECODE& VS) override {PipelineStream.VS = VS;} + void GSCb(const D3D12_SHADER_BYTECODE& GS) override {PipelineStream.GS = GS;} + void StreamOutputCb(const D3D12_STREAM_OUTPUT_DESC& StreamOutput) override {PipelineStream.StreamOutput = StreamOutput;} + void HSCb(const D3D12_SHADER_BYTECODE& HS) override {PipelineStream.HS = HS;} + void DSCb(const D3D12_SHADER_BYTECODE& DS) override {PipelineStream.DS = DS;} + void PSCb(const D3D12_SHADER_BYTECODE& PS) override {PipelineStream.PS = PS;} + void CSCb(const D3D12_SHADER_BYTECODE& CS) override {PipelineStream.CS = CS;} + void ASCb(const D3D12_SHADER_BYTECODE& AS) override {PipelineStream.AS = AS;} + void MSCb(const D3D12_SHADER_BYTECODE& MS) override {PipelineStream.MS = MS;} + void BlendStateCb(const D3D12_BLEND_DESC& BlendState) override {PipelineStream.BlendState = CD3DX12_BLEND_DESC(BlendState);} + void DepthStencilStateCb(const D3D12_DEPTH_STENCIL_DESC& DepthStencilState) override + { + PipelineStream.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC1(DepthStencilState); + SeenDSS = true; + } + void DepthStencilState1Cb(const D3D12_DEPTH_STENCIL_DESC1& DepthStencilState) override + { + PipelineStream.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC1(DepthStencilState); + SeenDSS = true; + } + void DSVFormatCb(DXGI_FORMAT DSVFormat) override + { + PipelineStream.DSVFormat = DSVFormat; + if (!SeenDSS && DSVFormat != DXGI_FORMAT_UNKNOWN) + { + // Re-enable depth for the default state. + static_cast(PipelineStream.DepthStencilState).DepthEnable = true; + } + } + void RasterizerStateCb(const D3D12_RASTERIZER_DESC& RasterizerState) override {PipelineStream.RasterizerState = CD3DX12_RASTERIZER_DESC(RasterizerState);} + void RTVFormatsCb(const D3D12_RT_FORMAT_ARRAY& RTVFormats) override {PipelineStream.RTVFormats = RTVFormats;} + void SampleDescCb(const DXGI_SAMPLE_DESC& SampleDesc) override {PipelineStream.SampleDesc = SampleDesc;} + void SampleMaskCb(UINT SampleMask) override {PipelineStream.SampleMask = SampleMask;} + void ViewInstancingCb(const D3D12_VIEW_INSTANCING_DESC& ViewInstancingDesc) override {PipelineStream.ViewInstancingDesc = CD3DX12_VIEW_INSTANCING_DESC(ViewInstancingDesc);} + void CachedPSOCb(const D3D12_CACHED_PIPELINE_STATE& CachedPSO) override {PipelineStream.CachedPSO = CachedPSO;} + +private: + bool SeenDSS; +}; + + +struct CD3DX12_PIPELINE_STATE_STREAM_PARSE_HELPER : public ID3DX12PipelineParserCallbacks +{ + CD3DX12_PIPELINE_STATE_STREAM1 PipelineStream; + CD3DX12_PIPELINE_STATE_STREAM_PARSE_HELPER() noexcept + : SeenDSS(false) + { + // Adjust defaults to account for absent members. + PipelineStream.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + + // Depth disabled if no DSV format specified. + static_cast(PipelineStream.DepthStencilState).DepthEnable = false; + } + + // ID3DX12PipelineParserCallbacks + void FlagsCb(D3D12_PIPELINE_STATE_FLAGS Flags) override {PipelineStream.Flags = Flags;} + void NodeMaskCb(UINT NodeMask) override {PipelineStream.NodeMask = NodeMask;} + void RootSignatureCb(ID3D12RootSignature* pRootSignature) override {PipelineStream.pRootSignature = pRootSignature;} + void InputLayoutCb(const D3D12_INPUT_LAYOUT_DESC& InputLayout) override {PipelineStream.InputLayout = InputLayout;} + void IBStripCutValueCb(D3D12_INDEX_BUFFER_STRIP_CUT_VALUE IBStripCutValue) override {PipelineStream.IBStripCutValue = IBStripCutValue;} + void PrimitiveTopologyTypeCb(D3D12_PRIMITIVE_TOPOLOGY_TYPE PrimitiveTopologyType) override {PipelineStream.PrimitiveTopologyType = PrimitiveTopologyType;} + void VSCb(const D3D12_SHADER_BYTECODE& VS) override {PipelineStream.VS = VS;} + void GSCb(const D3D12_SHADER_BYTECODE& GS) override {PipelineStream.GS = GS;} + void StreamOutputCb(const D3D12_STREAM_OUTPUT_DESC& StreamOutput) override {PipelineStream.StreamOutput = StreamOutput;} + void HSCb(const D3D12_SHADER_BYTECODE& HS) override {PipelineStream.HS = HS;} + void DSCb(const D3D12_SHADER_BYTECODE& DS) override {PipelineStream.DS = DS;} + void PSCb(const D3D12_SHADER_BYTECODE& PS) override {PipelineStream.PS = PS;} + void CSCb(const D3D12_SHADER_BYTECODE& CS) override {PipelineStream.CS = CS;} + void BlendStateCb(const D3D12_BLEND_DESC& BlendState) override {PipelineStream.BlendState = CD3DX12_BLEND_DESC(BlendState);} + void DepthStencilStateCb(const D3D12_DEPTH_STENCIL_DESC& DepthStencilState) override + { + PipelineStream.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC1(DepthStencilState); + SeenDSS = true; + } + void DepthStencilState1Cb(const D3D12_DEPTH_STENCIL_DESC1& DepthStencilState) override + { + PipelineStream.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC1(DepthStencilState); + SeenDSS = true; + } + void DSVFormatCb(DXGI_FORMAT DSVFormat) override + { + PipelineStream.DSVFormat = DSVFormat; + if (!SeenDSS && DSVFormat != DXGI_FORMAT_UNKNOWN) + { + // Re-enable depth for the default state. + static_cast(PipelineStream.DepthStencilState).DepthEnable = true; + } + } + void RasterizerStateCb(const D3D12_RASTERIZER_DESC& RasterizerState) override {PipelineStream.RasterizerState = CD3DX12_RASTERIZER_DESC(RasterizerState);} + void RTVFormatsCb(const D3D12_RT_FORMAT_ARRAY& RTVFormats) override {PipelineStream.RTVFormats = RTVFormats;} + void SampleDescCb(const DXGI_SAMPLE_DESC& SampleDesc) override {PipelineStream.SampleDesc = SampleDesc;} + void SampleMaskCb(UINT SampleMask) override {PipelineStream.SampleMask = SampleMask;} + void ViewInstancingCb(const D3D12_VIEW_INSTANCING_DESC& ViewInstancingDesc) override {PipelineStream.ViewInstancingDesc = CD3DX12_VIEW_INSTANCING_DESC(ViewInstancingDesc);} + void CachedPSOCb(const D3D12_CACHED_PIPELINE_STATE& CachedPSO) override {PipelineStream.CachedPSO = CachedPSO;} + +private: + bool SeenDSS; +}; + +inline D3D12_PIPELINE_STATE_SUBOBJECT_TYPE D3DX12GetBaseSubobjectType(D3D12_PIPELINE_STATE_SUBOBJECT_TYPE SubobjectType) noexcept +{ + switch (SubobjectType) + { + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL1: + return D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL; + default: + return SubobjectType; + } +} + +inline HRESULT D3DX12ParsePipelineStream(const D3D12_PIPELINE_STATE_STREAM_DESC& Desc, ID3DX12PipelineParserCallbacks* pCallbacks) +{ + if (pCallbacks == nullptr) + { + return E_INVALIDARG; + } + + if (Desc.SizeInBytes == 0 || Desc.pPipelineStateSubobjectStream == nullptr) + { + pCallbacks->ErrorBadInputParameter(1); // first parameter issue + return E_INVALIDARG; + } + + bool SubobjectSeen[D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_MAX_VALID] = {}; + for (SIZE_T CurOffset = 0, SizeOfSubobject = 0; CurOffset < Desc.SizeInBytes; CurOffset += SizeOfSubobject) + { + BYTE* pStream = static_cast(Desc.pPipelineStateSubobjectStream)+CurOffset; + auto SubobjectType = *reinterpret_cast(pStream); + if (SubobjectType < 0 || SubobjectType >= D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_MAX_VALID) + { + pCallbacks->ErrorUnknownSubobject(SubobjectType); + return E_INVALIDARG; + } + if (SubobjectSeen[D3DX12GetBaseSubobjectType(SubobjectType)]) + { + pCallbacks->ErrorDuplicateSubobject(SubobjectType); + return E_INVALIDARG; // disallow subobject duplicates in a stream + } + SubobjectSeen[SubobjectType] = true; + switch (SubobjectType) + { + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ROOT_SIGNATURE: + pCallbacks->RootSignatureCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::pRootSignature); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VS: + pCallbacks->VSCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::VS); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PS: + pCallbacks->PSCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::PS); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DS: + pCallbacks->DSCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::DS); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_HS: + pCallbacks->HSCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::HS); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_GS: + pCallbacks->GSCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::GS); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CS: + pCallbacks->CSCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::CS); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_AS: + pCallbacks->ASCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM2::AS); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_MS: + pCallbacks->MSCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM2::MS); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_STREAM_OUTPUT: + pCallbacks->StreamOutputCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::StreamOutput); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_BLEND: + pCallbacks->BlendStateCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::BlendState); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_MASK: + pCallbacks->SampleMaskCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::SampleMask); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER: + pCallbacks->RasterizerStateCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::RasterizerState); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL: + pCallbacks->DepthStencilStateCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL1: + pCallbacks->DepthStencilState1Cb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::DepthStencilState); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_INPUT_LAYOUT: + pCallbacks->InputLayoutCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::InputLayout); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_IB_STRIP_CUT_VALUE: + pCallbacks->IBStripCutValueCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::IBStripCutValue); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PRIMITIVE_TOPOLOGY: + pCallbacks->PrimitiveTopologyTypeCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::PrimitiveTopologyType); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RENDER_TARGET_FORMATS: + pCallbacks->RTVFormatsCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::RTVFormats); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL_FORMAT: + pCallbacks->DSVFormatCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::DSVFormat); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_DESC: + pCallbacks->SampleDescCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::SampleDesc); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_NODE_MASK: + pCallbacks->NodeMaskCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::NodeMask); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CACHED_PSO: + pCallbacks->CachedPSOCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::CachedPSO); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_FLAGS: + pCallbacks->FlagsCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::Flags); + break; + case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VIEW_INSTANCING: + pCallbacks->ViewInstancingCb(*reinterpret_cast(pStream)); + SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM1::ViewInstancingDesc); + break; + default: + pCallbacks->ErrorUnknownSubobject(SubobjectType); + return E_INVALIDARG; + } + } + + return S_OK; +} + +//------------------------------------------------------------------------------------------------ +inline bool operator==( const D3D12_CLEAR_VALUE &a, const D3D12_CLEAR_VALUE &b) noexcept +{ + if (a.Format != b.Format) return false; + if (a.Format == DXGI_FORMAT_D24_UNORM_S8_UINT + || a.Format == DXGI_FORMAT_D16_UNORM + || a.Format == DXGI_FORMAT_D32_FLOAT + || a.Format == DXGI_FORMAT_D32_FLOAT_S8X24_UINT) + { + return (a.DepthStencil.Depth == b.DepthStencil.Depth) && + (a.DepthStencil.Stencil == b.DepthStencil.Stencil); + } else { + return (a.Color[0] == b.Color[0]) && + (a.Color[1] == b.Color[1]) && + (a.Color[2] == b.Color[2]) && + (a.Color[3] == b.Color[3]); + } +} +inline bool operator==( const D3D12_RENDER_PASS_BEGINNING_ACCESS_CLEAR_PARAMETERS &a, const D3D12_RENDER_PASS_BEGINNING_ACCESS_CLEAR_PARAMETERS &b) noexcept +{ + return a.ClearValue == b.ClearValue; +} +inline bool operator==( const D3D12_RENDER_PASS_ENDING_ACCESS_RESOLVE_PARAMETERS &a, const D3D12_RENDER_PASS_ENDING_ACCESS_RESOLVE_PARAMETERS &b) noexcept +{ + if (a.pSrcResource != b.pSrcResource) return false; + if (a.pDstResource != b.pDstResource) return false; + if (a.SubresourceCount != b.SubresourceCount) return false; + if (a.Format != b.Format) return false; + if (a.ResolveMode != b.ResolveMode) return false; + if (a.PreserveResolveSource != b.PreserveResolveSource) return false; + return true; +} +inline bool operator==( const D3D12_RENDER_PASS_BEGINNING_ACCESS &a, const D3D12_RENDER_PASS_BEGINNING_ACCESS &b) noexcept +{ + if (a.Type != b.Type) return false; + if (a.Type == D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR && !(a.Clear == b.Clear)) return false; + return true; +} +inline bool operator==( const D3D12_RENDER_PASS_ENDING_ACCESS &a, const D3D12_RENDER_PASS_ENDING_ACCESS &b) noexcept +{ + if (a.Type != b.Type) return false; + if (a.Type == D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_RESOLVE && !(a.Resolve == b.Resolve)) return false; + return true; +} +inline bool operator==( const D3D12_RENDER_PASS_RENDER_TARGET_DESC &a, const D3D12_RENDER_PASS_RENDER_TARGET_DESC &b) noexcept +{ + if (a.cpuDescriptor.ptr != b.cpuDescriptor.ptr) return false; + if (!(a.BeginningAccess == b.BeginningAccess)) return false; + if (!(a.EndingAccess == b.EndingAccess)) return false; + return true; +} +inline bool operator==( const D3D12_RENDER_PASS_DEPTH_STENCIL_DESC &a, const D3D12_RENDER_PASS_DEPTH_STENCIL_DESC &b) noexcept +{ + if (a.cpuDescriptor.ptr != b.cpuDescriptor.ptr) return false; + if (!(a.DepthBeginningAccess == b.DepthBeginningAccess)) return false; + if (!(a.StencilBeginningAccess == b.StencilBeginningAccess)) return false; + if (!(a.DepthEndingAccess == b.DepthEndingAccess)) return false; + if (!(a.StencilEndingAccess == b.StencilEndingAccess)) return false; + return true; +} + + +#ifndef D3DX12_NO_STATE_OBJECT_HELPERS + +//================================================================================================ +// D3DX12 State Object Creation Helpers +// +// Helper classes for creating new style state objects out of an arbitrary set of subobjects. +// Uses STL +// +// Start by instantiating CD3DX12_STATE_OBJECT_DESC (see it's public methods). +// One of its methods is CreateSubobject(), which has a comment showing a couple of options for +// defining subobjects using the helper classes for each subobject (CD3DX12_DXIL_LIBRARY_SUBOBJECT +// etc.). The subobject helpers each have methods specific to the subobject for configuring it's +// contents. +// +//================================================================================================ +#include +#include +#include +#include +#ifndef D3DX12_USE_ATL +#include +#define D3DX12_COM_PTR Microsoft::WRL::ComPtr +#define D3DX12_COM_PTR_GET(x) x.Get() +#define D3DX12_COM_PTR_ADDRESSOF(x) x.GetAddressOf() +#else +#include +#define D3DX12_COM_PTR ATL::CComPtr +#define D3DX12_COM_PTR_GET(x) x.p +#define D3DX12_COM_PTR_ADDRESSOF(x) &x.p +#endif + +//------------------------------------------------------------------------------------------------ +class CD3DX12_STATE_OBJECT_DESC +{ +public: + CD3DX12_STATE_OBJECT_DESC() noexcept + { + Init(D3D12_STATE_OBJECT_TYPE_COLLECTION); + } + CD3DX12_STATE_OBJECT_DESC(D3D12_STATE_OBJECT_TYPE Type) noexcept + { + Init(Type); + } + void SetStateObjectType(D3D12_STATE_OBJECT_TYPE Type) noexcept { m_Desc.Type = Type; } + operator const D3D12_STATE_OBJECT_DESC&() + { + // Do final preparation work + m_RepointedAssociations.clear(); + m_SubobjectArray.clear(); + m_SubobjectArray.reserve(m_Desc.NumSubobjects); + // Flatten subobjects into an array (each flattened subobject still has a + // member that's a pointer to it's desc that's not flattened) + for (auto Iter = m_SubobjectList.begin(); + Iter != m_SubobjectList.end(); Iter++) + { + m_SubobjectArray.push_back(*Iter); + // Store new location in array so we can redirect pointers contained in subobjects + Iter->pSubobjectArrayLocation = &m_SubobjectArray.back(); + } + // For subobjects with pointer fields, create a new copy of those subobject definitions + // with fixed pointers + for (UINT i = 0; i < m_Desc.NumSubobjects; i++) + { + if (m_SubobjectArray[i].Type == D3D12_STATE_SUBOBJECT_TYPE_SUBOBJECT_TO_EXPORTS_ASSOCIATION) + { + auto pOriginalSubobjectAssociation = + static_cast(m_SubobjectArray[i].pDesc); + D3D12_SUBOBJECT_TO_EXPORTS_ASSOCIATION Repointed = *pOriginalSubobjectAssociation; + auto pWrapper = + static_cast(pOriginalSubobjectAssociation->pSubobjectToAssociate); + Repointed.pSubobjectToAssociate = pWrapper->pSubobjectArrayLocation; + m_RepointedAssociations.push_back(Repointed); + m_SubobjectArray[i].pDesc = &m_RepointedAssociations.back(); + } + } + // Below: using ugly way to get pointer in case .data() is not defined + m_Desc.pSubobjects = m_Desc.NumSubobjects ? &m_SubobjectArray[0] : nullptr; + return m_Desc; + } + operator const D3D12_STATE_OBJECT_DESC*() + { + // Cast calls the above final preparation work + return &static_cast(*this); + } + + // CreateSubobject creates a sububject helper (e.g. CD3DX12_HIT_GROUP_SUBOBJECT) + // whose lifetime is owned by this class. + // e.g. + // + // CD3DX12_STATE_OBJECT_DESC Collection1(D3D12_STATE_OBJECT_TYPE_COLLECTION); + // auto Lib0 = Collection1.CreateSubobject(); + // Lib0->SetDXILLibrary(&pMyAppDxilLibs[0]); + // Lib0->DefineExport(L"rayGenShader0"); // in practice these export listings might be + // // data/engine driven + // etc. + // + // Alternatively, users can instantiate sububject helpers explicitly, such as via local + // variables instead, passing the state object desc that should point to it into the helper + // constructor (or call mySubobjectHelper.AddToStateObject(Collection1)). + // In this alternative scenario, the user must keep the subobject alive as long as the state + // object it is associated with is alive, else it's pointer references will be stale. + // e.g. + // + // CD3DX12_STATE_OBJECT_DESC RaytracingState2(D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE); + // CD3DX12_DXIL_LIBRARY_SUBOBJECT LibA(RaytracingState2); + // LibA.SetDXILLibrary(&pMyAppDxilLibs[4]); // not manually specifying exports + // // - meaning all exports in the libraries + // // are exported + // etc. + + template + T* CreateSubobject() + { + T* pSubobject = new T(*this); + m_OwnedSubobjectHelpers.emplace_back(pSubobject); + return pSubobject; + } + +private: + D3D12_STATE_SUBOBJECT* TrackSubobject(D3D12_STATE_SUBOBJECT_TYPE Type, void* pDesc) + { + SUBOBJECT_WRAPPER Subobject; + Subobject.pSubobjectArrayLocation = nullptr; + Subobject.Type = Type; + Subobject.pDesc = pDesc; + m_SubobjectList.push_back(Subobject); + m_Desc.NumSubobjects++; + return &m_SubobjectList.back(); + } + void Init(D3D12_STATE_OBJECT_TYPE Type) noexcept + { + SetStateObjectType(Type); + m_Desc.pSubobjects = nullptr; + m_Desc.NumSubobjects = 0; + m_SubobjectList.clear(); + m_SubobjectArray.clear(); + m_RepointedAssociations.clear(); + } + typedef struct SUBOBJECT_WRAPPER : public D3D12_STATE_SUBOBJECT + { + D3D12_STATE_SUBOBJECT* pSubobjectArrayLocation; // new location when flattened into array + // for repointing pointers in subobjects + } SUBOBJECT_WRAPPER; + D3D12_STATE_OBJECT_DESC m_Desc; + std::list m_SubobjectList; // Pointers to list nodes handed out so + // these can be edited live + std::vector m_SubobjectArray; // Built at the end, copying list contents + + std::list + m_RepointedAssociations; // subobject type that contains pointers to other subobjects, + // repointed to flattened array + + class StringContainer + { + public: + LPCWSTR LocalCopy(LPCWSTR string, bool bSingleString = false) + { + if (string) + { + if (bSingleString) + { + m_Strings.clear(); + m_Strings.push_back(string); + } + else + { + m_Strings.push_back(string); + } + return m_Strings.back().c_str(); + } + else + { + return nullptr; + } + } + void clear() noexcept { m_Strings.clear(); } + private: + std::list m_Strings; + }; + + class SUBOBJECT_HELPER_BASE + { + public: + SUBOBJECT_HELPER_BASE() noexcept { Init(); } + virtual ~SUBOBJECT_HELPER_BASE() = default; + virtual D3D12_STATE_SUBOBJECT_TYPE Type() const noexcept = 0; + void AddToStateObject(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) + { + m_pSubobject = ContainingStateObject.TrackSubobject(Type(), Data()); + } + protected: + virtual void* Data() noexcept = 0; + void Init() noexcept { m_pSubobject = nullptr; } + D3D12_STATE_SUBOBJECT* m_pSubobject; + }; + +#if(__cplusplus >= 201103L) + std::list> m_OwnedSubobjectHelpers; +#else + class OWNED_HELPER + { + public: + OWNED_HELPER(const SUBOBJECT_HELPER_BASE* pHelper) noexcept { m_pHelper = pHelper; } + ~OWNED_HELPER() { delete m_pHelper; } + const SUBOBJECT_HELPER_BASE* m_pHelper; + }; + + std::list m_OwnedSubobjectHelpers; +#endif + + friend class CD3DX12_DXIL_LIBRARY_SUBOBJECT; + friend class CD3DX12_EXISTING_COLLECTION_SUBOBJECT; + friend class CD3DX12_SUBOBJECT_TO_EXPORTS_ASSOCIATION_SUBOBJECT; + friend class CD3DX12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION; + friend class CD3DX12_HIT_GROUP_SUBOBJECT; + friend class CD3DX12_RAYTRACING_SHADER_CONFIG_SUBOBJECT; + friend class CD3DX12_RAYTRACING_PIPELINE_CONFIG_SUBOBJECT; + friend class CD3DX12_RAYTRACING_PIPELINE_CONFIG1_SUBOBJECT; + friend class CD3DX12_GLOBAL_ROOT_SIGNATURE_SUBOBJECT; + friend class CD3DX12_LOCAL_ROOT_SIGNATURE_SUBOBJECT; + friend class CD3DX12_STATE_OBJECT_CONFIG_SUBOBJECT; + friend class CD3DX12_NODE_MASK_SUBOBJECT; +}; + +//------------------------------------------------------------------------------------------------ +class CD3DX12_DXIL_LIBRARY_SUBOBJECT + : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE +{ +public: + CD3DX12_DXIL_LIBRARY_SUBOBJECT() noexcept + { + Init(); + } + CD3DX12_DXIL_LIBRARY_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) + { + Init(); + AddToStateObject(ContainingStateObject); + } + void SetDXILLibrary(const D3D12_SHADER_BYTECODE* pCode) noexcept + { + static const D3D12_SHADER_BYTECODE Default = {}; + m_Desc.DXILLibrary = pCode ? *pCode : Default; + } + void DefineExport( + LPCWSTR Name, + LPCWSTR ExportToRename = nullptr, + D3D12_EXPORT_FLAGS Flags = D3D12_EXPORT_FLAG_NONE) + { + D3D12_EXPORT_DESC Export; + Export.Name = m_Strings.LocalCopy(Name); + Export.ExportToRename = m_Strings.LocalCopy(ExportToRename); + Export.Flags = Flags; + m_Exports.push_back(Export); + m_Desc.pExports = &m_Exports[0]; // using ugly way to get pointer in case .data() is not defined + m_Desc.NumExports = static_cast(m_Exports.size()); + } + template + void DefineExports(LPCWSTR(&Exports)[N]) + { + for (UINT i = 0; i < N; i++) + { + DefineExport(Exports[i]); + } + } + void DefineExports(const LPCWSTR* Exports, UINT N) + { + for (UINT i = 0; i < N; i++) + { + DefineExport(Exports[i]); + } + } + D3D12_STATE_SUBOBJECT_TYPE Type() const noexcept override + { + return D3D12_STATE_SUBOBJECT_TYPE_DXIL_LIBRARY; + } + operator const D3D12_STATE_SUBOBJECT&() const noexcept { return *m_pSubobject; } + operator const D3D12_DXIL_LIBRARY_DESC&() const noexcept { return m_Desc; } +private: + void Init() noexcept + { + SUBOBJECT_HELPER_BASE::Init(); + m_Desc = {}; + m_Strings.clear(); + m_Exports.clear(); + } + void* Data() noexcept override { return &m_Desc; } + D3D12_DXIL_LIBRARY_DESC m_Desc; + CD3DX12_STATE_OBJECT_DESC::StringContainer m_Strings; + std::vector m_Exports; +}; + +//------------------------------------------------------------------------------------------------ +class CD3DX12_EXISTING_COLLECTION_SUBOBJECT + : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE +{ +public: + CD3DX12_EXISTING_COLLECTION_SUBOBJECT() noexcept + { + Init(); + } + CD3DX12_EXISTING_COLLECTION_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) + { + Init(); + AddToStateObject(ContainingStateObject); + } + void SetExistingCollection(ID3D12StateObject*pExistingCollection) noexcept + { + m_Desc.pExistingCollection = pExistingCollection; + m_CollectionRef = pExistingCollection; + } + void DefineExport( + LPCWSTR Name, + LPCWSTR ExportToRename = nullptr, + D3D12_EXPORT_FLAGS Flags = D3D12_EXPORT_FLAG_NONE) + { + D3D12_EXPORT_DESC Export; + Export.Name = m_Strings.LocalCopy(Name); + Export.ExportToRename = m_Strings.LocalCopy(ExportToRename); + Export.Flags = Flags; + m_Exports.push_back(Export); + m_Desc.pExports = &m_Exports[0]; // using ugly way to get pointer in case .data() is not defined + m_Desc.NumExports = static_cast(m_Exports.size()); + } + template + void DefineExports(LPCWSTR(&Exports)[N]) + { + for (UINT i = 0; i < N; i++) + { + DefineExport(Exports[i]); + } + } + void DefineExports(const LPCWSTR* Exports, UINT N) + { + for (UINT i = 0; i < N; i++) + { + DefineExport(Exports[i]); + } + } + D3D12_STATE_SUBOBJECT_TYPE Type() const noexcept override + { + return D3D12_STATE_SUBOBJECT_TYPE_EXISTING_COLLECTION; + } + operator const D3D12_STATE_SUBOBJECT&() const noexcept { return *m_pSubobject; } + operator const D3D12_EXISTING_COLLECTION_DESC&() const noexcept { return m_Desc; } +private: + void Init() noexcept + { + SUBOBJECT_HELPER_BASE::Init(); + m_Desc = {}; + m_CollectionRef = nullptr; + m_Strings.clear(); + m_Exports.clear(); + } + void* Data() noexcept override { return &m_Desc; } + D3D12_EXISTING_COLLECTION_DESC m_Desc; + D3DX12_COM_PTR m_CollectionRef; + CD3DX12_STATE_OBJECT_DESC::StringContainer m_Strings; + std::vector m_Exports; +}; + +//------------------------------------------------------------------------------------------------ +class CD3DX12_SUBOBJECT_TO_EXPORTS_ASSOCIATION_SUBOBJECT + : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE +{ +public: + CD3DX12_SUBOBJECT_TO_EXPORTS_ASSOCIATION_SUBOBJECT() noexcept + { + Init(); + } + CD3DX12_SUBOBJECT_TO_EXPORTS_ASSOCIATION_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) + { + Init(); + AddToStateObject(ContainingStateObject); + } + void SetSubobjectToAssociate(const D3D12_STATE_SUBOBJECT& SubobjectToAssociate) noexcept + { + m_Desc.pSubobjectToAssociate = &SubobjectToAssociate; + } + void AddExport(LPCWSTR Export) + { + m_Desc.NumExports++; + m_Exports.push_back(m_Strings.LocalCopy(Export)); + m_Desc.pExports = &m_Exports[0]; // using ugly way to get pointer in case .data() is not defined + } + template + void AddExports(LPCWSTR (&Exports)[N]) + { + for (UINT i = 0; i < N; i++) + { + AddExport(Exports[i]); + } + } + void AddExports(const LPCWSTR* Exports, UINT N) + { + for (UINT i = 0; i < N; i++) + { + AddExport(Exports[i]); + } + } + D3D12_STATE_SUBOBJECT_TYPE Type() const noexcept override + { + return D3D12_STATE_SUBOBJECT_TYPE_SUBOBJECT_TO_EXPORTS_ASSOCIATION; + } + operator const D3D12_STATE_SUBOBJECT&() const noexcept { return *m_pSubobject; } + operator const D3D12_SUBOBJECT_TO_EXPORTS_ASSOCIATION&() const noexcept { return m_Desc; } +private: + void Init() noexcept + { + SUBOBJECT_HELPER_BASE::Init(); + m_Desc = {}; + m_Strings.clear(); + m_Exports.clear(); + } + void* Data() noexcept override { return &m_Desc; } + D3D12_SUBOBJECT_TO_EXPORTS_ASSOCIATION m_Desc; + CD3DX12_STATE_OBJECT_DESC::StringContainer m_Strings; + std::vector m_Exports; +}; + +//------------------------------------------------------------------------------------------------ +class CD3DX12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION + : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE +{ +public: + CD3DX12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION() noexcept + { + Init(); + } + CD3DX12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) + { + Init(); + AddToStateObject(ContainingStateObject); + } + void SetSubobjectNameToAssociate(LPCWSTR SubobjectToAssociate) + { + m_Desc.SubobjectToAssociate = m_SubobjectName.LocalCopy(SubobjectToAssociate, true); + } + void AddExport(LPCWSTR Export) + { + m_Desc.NumExports++; + m_Exports.push_back(m_Strings.LocalCopy(Export)); + m_Desc.pExports = &m_Exports[0]; // using ugly way to get pointer in case .data() is not defined + } + template + void AddExports(LPCWSTR (&Exports)[N]) + { + for (UINT i = 0; i < N; i++) + { + AddExport(Exports[i]); + } + } + void AddExports(const LPCWSTR* Exports, UINT N) + { + for (UINT i = 0; i < N; i++) + { + AddExport(Exports[i]); + } + } + D3D12_STATE_SUBOBJECT_TYPE Type() const noexcept override + { + return D3D12_STATE_SUBOBJECT_TYPE_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION; + } + operator const D3D12_STATE_SUBOBJECT&() const noexcept { return *m_pSubobject; } + operator const D3D12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION&() const noexcept { return m_Desc; } +private: + void Init() noexcept + { + SUBOBJECT_HELPER_BASE::Init(); + m_Desc = {}; + m_Strings.clear(); + m_SubobjectName.clear(); + m_Exports.clear(); + } + void* Data() noexcept override { return &m_Desc; } + D3D12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION m_Desc; + CD3DX12_STATE_OBJECT_DESC::StringContainer m_Strings; + CD3DX12_STATE_OBJECT_DESC::StringContainer m_SubobjectName; + std::vector m_Exports; +}; + +//------------------------------------------------------------------------------------------------ +class CD3DX12_HIT_GROUP_SUBOBJECT + : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE +{ +public: + CD3DX12_HIT_GROUP_SUBOBJECT() noexcept + { + Init(); + } + CD3DX12_HIT_GROUP_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) + { + Init(); + AddToStateObject(ContainingStateObject); + } + void SetHitGroupExport(LPCWSTR exportName) + { + m_Desc.HitGroupExport = m_Strings[0].LocalCopy(exportName, true); + } + void SetHitGroupType(D3D12_HIT_GROUP_TYPE Type) noexcept { m_Desc.Type = Type; } + void SetAnyHitShaderImport(LPCWSTR importName) + { + m_Desc.AnyHitShaderImport = m_Strings[1].LocalCopy(importName, true); + } + void SetClosestHitShaderImport(LPCWSTR importName) + { + m_Desc.ClosestHitShaderImport = m_Strings[2].LocalCopy(importName, true); + } + void SetIntersectionShaderImport(LPCWSTR importName) + { + m_Desc.IntersectionShaderImport = m_Strings[3].LocalCopy(importName, true); + } + D3D12_STATE_SUBOBJECT_TYPE Type() const noexcept override + { + return D3D12_STATE_SUBOBJECT_TYPE_HIT_GROUP; + } + operator const D3D12_STATE_SUBOBJECT&() const noexcept { return *m_pSubobject; } + operator const D3D12_HIT_GROUP_DESC&() const noexcept { return m_Desc; } +private: + void Init() noexcept + { + SUBOBJECT_HELPER_BASE::Init(); + m_Desc = {}; + for (UINT i = 0; i < m_NumStrings; i++) + { + m_Strings[i].clear(); + } + } + void* Data() noexcept override { return &m_Desc; } + D3D12_HIT_GROUP_DESC m_Desc; + static const UINT m_NumStrings = 4; + CD3DX12_STATE_OBJECT_DESC::StringContainer + m_Strings[m_NumStrings]; // one string for every entrypoint name +}; + +//------------------------------------------------------------------------------------------------ +class CD3DX12_RAYTRACING_SHADER_CONFIG_SUBOBJECT + : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE +{ +public: + CD3DX12_RAYTRACING_SHADER_CONFIG_SUBOBJECT() noexcept + { + Init(); + } + CD3DX12_RAYTRACING_SHADER_CONFIG_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) + { + Init(); + AddToStateObject(ContainingStateObject); + } + void Config(UINT MaxPayloadSizeInBytes, UINT MaxAttributeSizeInBytes) noexcept + { + m_Desc.MaxPayloadSizeInBytes = MaxPayloadSizeInBytes; + m_Desc.MaxAttributeSizeInBytes = MaxAttributeSizeInBytes; + } + D3D12_STATE_SUBOBJECT_TYPE Type() const noexcept override + { + return D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_SHADER_CONFIG; + } + operator const D3D12_STATE_SUBOBJECT&() const noexcept { return *m_pSubobject; } + operator const D3D12_RAYTRACING_SHADER_CONFIG&() const noexcept { return m_Desc; } +private: + void Init() noexcept + { + SUBOBJECT_HELPER_BASE::Init(); + m_Desc = {}; + } + void* Data() noexcept override { return &m_Desc; } + D3D12_RAYTRACING_SHADER_CONFIG m_Desc; +}; + +//------------------------------------------------------------------------------------------------ +class CD3DX12_RAYTRACING_PIPELINE_CONFIG_SUBOBJECT + : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE +{ +public: + CD3DX12_RAYTRACING_PIPELINE_CONFIG_SUBOBJECT() noexcept + { + Init(); + } + CD3DX12_RAYTRACING_PIPELINE_CONFIG_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) + { + Init(); + AddToStateObject(ContainingStateObject); + } + void Config(UINT MaxTraceRecursionDepth) noexcept + { + m_Desc.MaxTraceRecursionDepth = MaxTraceRecursionDepth; + } + D3D12_STATE_SUBOBJECT_TYPE Type() const noexcept override + { + return D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_PIPELINE_CONFIG; + } + operator const D3D12_STATE_SUBOBJECT&() const noexcept { return *m_pSubobject; } + operator const D3D12_RAYTRACING_PIPELINE_CONFIG&() const noexcept { return m_Desc; } +private: + void Init() noexcept + { + SUBOBJECT_HELPER_BASE::Init(); + m_Desc = {}; + } + void* Data() noexcept override { return &m_Desc; } + D3D12_RAYTRACING_PIPELINE_CONFIG m_Desc; +}; + +//------------------------------------------------------------------------------------------------ +class CD3DX12_RAYTRACING_PIPELINE_CONFIG1_SUBOBJECT + : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE +{ +public: + CD3DX12_RAYTRACING_PIPELINE_CONFIG1_SUBOBJECT() noexcept + { + Init(); + } + CD3DX12_RAYTRACING_PIPELINE_CONFIG1_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) + { + Init(); + AddToStateObject(ContainingStateObject); + } + void Config(UINT MaxTraceRecursionDepth, D3D12_RAYTRACING_PIPELINE_FLAGS Flags) noexcept + { + m_Desc.MaxTraceRecursionDepth = MaxTraceRecursionDepth; + m_Desc.Flags = Flags; + } + D3D12_STATE_SUBOBJECT_TYPE Type() const noexcept override + { + return D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_PIPELINE_CONFIG1; + } + operator const D3D12_STATE_SUBOBJECT&() const noexcept { return *m_pSubobject; } + operator const D3D12_RAYTRACING_PIPELINE_CONFIG1&() const noexcept { return m_Desc; } +private: + void Init() noexcept + { + SUBOBJECT_HELPER_BASE::Init(); + m_Desc = {}; + } + void* Data() noexcept override { return &m_Desc; } + D3D12_RAYTRACING_PIPELINE_CONFIG1 m_Desc; +}; + +//------------------------------------------------------------------------------------------------ +class CD3DX12_GLOBAL_ROOT_SIGNATURE_SUBOBJECT + : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE +{ +public: + CD3DX12_GLOBAL_ROOT_SIGNATURE_SUBOBJECT() noexcept + { + Init(); + } + CD3DX12_GLOBAL_ROOT_SIGNATURE_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) + { + Init(); + AddToStateObject(ContainingStateObject); + } + void SetRootSignature(ID3D12RootSignature* pRootSig) noexcept + { + m_pRootSig = pRootSig; + } + D3D12_STATE_SUBOBJECT_TYPE Type() const noexcept override + { + return D3D12_STATE_SUBOBJECT_TYPE_GLOBAL_ROOT_SIGNATURE; + } + operator const D3D12_STATE_SUBOBJECT&() const noexcept { return *m_pSubobject; } + operator ID3D12RootSignature*() const noexcept { return D3DX12_COM_PTR_GET(m_pRootSig); } +private: + void Init() noexcept + { + SUBOBJECT_HELPER_BASE::Init(); + m_pRootSig = nullptr; + } + void* Data() noexcept override { return D3DX12_COM_PTR_ADDRESSOF(m_pRootSig); } + D3DX12_COM_PTR m_pRootSig; +}; + +//------------------------------------------------------------------------------------------------ +class CD3DX12_LOCAL_ROOT_SIGNATURE_SUBOBJECT + : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE +{ +public: + CD3DX12_LOCAL_ROOT_SIGNATURE_SUBOBJECT() noexcept + { + Init(); + } + CD3DX12_LOCAL_ROOT_SIGNATURE_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) + { + Init(); + AddToStateObject(ContainingStateObject); + } + void SetRootSignature(ID3D12RootSignature* pRootSig) noexcept + { + m_pRootSig = pRootSig; + } + D3D12_STATE_SUBOBJECT_TYPE Type() const noexcept override + { + return D3D12_STATE_SUBOBJECT_TYPE_LOCAL_ROOT_SIGNATURE; + } + operator const D3D12_STATE_SUBOBJECT&() const noexcept { return *m_pSubobject; } + operator ID3D12RootSignature*() const noexcept { return D3DX12_COM_PTR_GET(m_pRootSig); } +private: + void Init() noexcept + { + SUBOBJECT_HELPER_BASE::Init(); + m_pRootSig = nullptr; + } + void* Data() noexcept override { return D3DX12_COM_PTR_ADDRESSOF(m_pRootSig); } + D3DX12_COM_PTR m_pRootSig; +}; + +//------------------------------------------------------------------------------------------------ +class CD3DX12_STATE_OBJECT_CONFIG_SUBOBJECT + : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE +{ +public: + CD3DX12_STATE_OBJECT_CONFIG_SUBOBJECT() noexcept + { + Init(); + } + CD3DX12_STATE_OBJECT_CONFIG_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) + { + Init(); + AddToStateObject(ContainingStateObject); + } + void SetFlags(D3D12_STATE_OBJECT_FLAGS Flags) noexcept + { + m_Desc.Flags = Flags; + } + D3D12_STATE_SUBOBJECT_TYPE Type() const noexcept override + { + return D3D12_STATE_SUBOBJECT_TYPE_STATE_OBJECT_CONFIG; + } + operator const D3D12_STATE_SUBOBJECT&() const noexcept { return *m_pSubobject; } + operator const D3D12_STATE_OBJECT_CONFIG&() const noexcept { return m_Desc; } +private: + void Init() noexcept + { + SUBOBJECT_HELPER_BASE::Init(); + m_Desc = {}; + } + void* Data() noexcept override { return &m_Desc; } + D3D12_STATE_OBJECT_CONFIG m_Desc; +}; + +//------------------------------------------------------------------------------------------------ +class CD3DX12_NODE_MASK_SUBOBJECT + : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE +{ +public: + CD3DX12_NODE_MASK_SUBOBJECT() noexcept + { + Init(); + } + CD3DX12_NODE_MASK_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) + { + Init(); + AddToStateObject(ContainingStateObject); + } + void SetNodeMask(UINT NodeMask) noexcept + { + m_Desc.NodeMask = NodeMask; + } + D3D12_STATE_SUBOBJECT_TYPE Type() const noexcept override + { + return D3D12_STATE_SUBOBJECT_TYPE_NODE_MASK; + } + operator const D3D12_STATE_SUBOBJECT&() const noexcept { return *m_pSubobject; } + operator const D3D12_NODE_MASK&() const noexcept { return m_Desc; } +private: + void Init() noexcept + { + SUBOBJECT_HELPER_BASE::Init(); + m_Desc = {}; + } + void* Data() noexcept override { return &m_Desc; } + D3D12_NODE_MASK m_Desc; +}; + +#undef D3DX12_COM_PTR +#undef D3DX12_COM_PTR_GET +#undef D3DX12_COM_PTR_ADDRESSOF +#endif // #ifndef D3DX12_NO_STATE_OBJECT_HELPERS + +#endif // defined( __cplusplus ) + +#endif //__D3DX12_H__ + + diff --git a/tools/cmake_tests/has_bitscan.c b/tools/cmake_tests/has_bitscan.c new file mode 100644 index 0000000..b154f69 --- /dev/null +++ b/tools/cmake_tests/has_bitscan.c @@ -0,0 +1,19 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include + +int main() +{ + unsigned long a = 0x42; + unsigned long i = 0; + unsigned char c = _BitScanForward(&i, a); + unsigned char d = _BitScanReverse(&i, a); + unsigned char e = _BitScanForward64(&i, a); + unsigned char f = _BitScanReverse64(&i, a); + return (int)(c + d + e + f); +} \ No newline at end of file diff --git a/tools/cmake_tests/has_builtin_clz_ctz.c b/tools/cmake_tests/has_builtin_clz_ctz.c new file mode 100644 index 0000000..9ac9410 --- /dev/null +++ b/tools/cmake_tests/has_builtin_clz_ctz.c @@ -0,0 +1,17 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +int main() +{ + unsigned int a = 0x42; + unsigned int b = __builtin_clz(a); + unsigned int c = __builtin_ctz(b); + unsigned int d = __builtin_clzll(a); + unsigned int e = __builtin_ctzll(b); + + return (int)(a + b + c + d + e); +} \ No newline at end of file diff --git a/tools/cmake_tests/has_builtin_popcount.c b/tools/cmake_tests/has_builtin_popcount.c new file mode 100644 index 0000000..1edd34e --- /dev/null +++ b/tools/cmake_tests/has_builtin_popcount.c @@ -0,0 +1,13 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +int main() +{ + unsigned int a = 0x42u; + unsigned int b = __builtin_popcount(a); + return (a + b); +} \ No newline at end of file diff --git a/tools/cmake_tests/has_maybe_unused.cpp b/tools/cmake_tests/has_maybe_unused.cpp new file mode 100644 index 0000000..9a65d92 --- /dev/null +++ b/tools/cmake_tests/has_maybe_unused.cpp @@ -0,0 +1,17 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +[[maybe_unused]] int foo(bool b) +{ + [[maybe_unused]] bool b1 = b; + return 1; +} + +int main() +{ + return 0; +} diff --git a/tools/cmake_tests/has_nodiscard.cpp b/tools/cmake_tests/has_nodiscard.cpp new file mode 100644 index 0000000..dc36565 --- /dev/null +++ b/tools/cmake_tests/has_nodiscard.cpp @@ -0,0 +1,17 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +[[nodiscard]] +int foo() +{ + return 1; +} + +int main() +{ + return foo(); +} diff --git a/tools/cmake_tests/has_popcnt.c b/tools/cmake_tests/has_popcnt.c new file mode 100644 index 0000000..7ca2160 --- /dev/null +++ b/tools/cmake_tests/has_popcnt.c @@ -0,0 +1,15 @@ +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// This file is part of the AMD Render Pipeline Shaders SDK which is +// released under the AMD INTERNAL EVALUATION LICENSE. +// +// See file LICENSE.RTF for full license details. + +#include + +int main() +{ + unsigned int a = 0x42u; + unsigned int b = __popcnt(a); + return (a + b); +} \ No newline at end of file diff --git a/tools/rps_hlslc/README.md b/tools/rps_hlslc/README.md new file mode 100644 index 0000000..a204d3e --- /dev/null +++ b/tools/rps_hlslc/README.md @@ -0,0 +1,27 @@ +# RPS-HLSLC Compiler Command Line Reference + +## General usage: +```bash +rps-hlslc.exe [