diff --git a/CMakePresets.json b/CMakePresets.json index 3869d53a327b01..1349681c13758e 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -73,7 +73,8 @@ "CMAKE_INSTALL_LIBDIR": "lib/CMAKE_SYSTEM_PROCESSOR-linux-gnu", "OBS_CMAKE_VERSION": {"type": "STRING", "value": "3.0.0"}, "ENABLE_AJA": false, - "ENABLE_NATIVE_NVENC": false, + "ENABLE_NVENC": false, + "ENABLE_FFMPEG_NVENC": true, "ENABLE_VLC": true, "ENABLE_WAYLAND": true, "ENABLE_WEBRTC": false diff --git a/UI/window-basic-main-outputs.cpp b/UI/window-basic-main-outputs.cpp index 033c5f3ca913db..b1edff50b9c695 100644 --- a/UI/window-basic-main-outputs.cpp +++ b/UI/window-basic-main-outputs.cpp @@ -609,15 +609,17 @@ const char *get_simple_output_encoder(const char *encoder) } else if (strcmp(encoder, SIMPLE_ENCODER_AMD_AV1) == 0) { return "av1_texture_amf"; } else if (strcmp(encoder, SIMPLE_ENCODER_NVENC) == 0) { - return EncoderAvailable("jim_nvenc") ? "jim_nvenc" - : "ffmpeg_nvenc"; + return EncoderAvailable("obs_nvenc_h264_tex") + ? "obs_nvenc_h264_tex" + : "ffmpeg_nvenc"; #ifdef ENABLE_HEVC } else if (strcmp(encoder, SIMPLE_ENCODER_NVENC_HEVC) == 0) { - return EncoderAvailable("jim_hevc_nvenc") ? "jim_hevc_nvenc" - : "ffmpeg_hevc_nvenc"; + return EncoderAvailable("obs_nvenc_hevc_tex") + ? "obs_nvenc_hevc_tex" + : "ffmpeg_hevc_nvenc"; #endif } else if (strcmp(encoder, SIMPLE_ENCODER_NVENC_AV1) == 0) { - return "jim_av1_nvenc"; + return "obs_nvenc_av1_tex"; } else if (strcmp(encoder, SIMPLE_ENCODER_APPLE_H264) == 0) { return "com.apple.videotoolbox.videoencoder.ave.avc"; #ifdef ENABLE_HEVC @@ -1848,7 +1850,7 @@ void AdvancedOutput::UpdateStreamSettings() blog(LOG_WARNING, "User is ignoring service settings."); } - if (dynBitrate && astrcmpi(streamEncoder, "jim_nvenc") == 0) + if (dynBitrate && strstr(streamEncoder, "nvenc") != nullptr) obs_data_set_bool(settings, "lookahead", false); video_t *video = obs_get_video(); diff --git a/UI/window-basic-settings-stream.cpp b/UI/window-basic-settings-stream.cpp index 0c34fdac86ee2a..f005faf8f7dbe5 100644 --- a/UI/window-basic-settings-stream.cpp +++ b/UI/window-basic-settings-stream.cpp @@ -1606,8 +1606,9 @@ bool OBSBasicSettings::ServiceAndACodecCompatible() /* we really need a way to find fallbacks in a less hardcoded way. maybe. */ static QString get_adv_fallback(const QString &enc) { - if (enc == "jim_hevc_nvenc" || enc == "jim_av1_nvenc") - return "jim_nvenc"; + if (enc == "obs_nvenc_hevc_tex" || enc == "obs_nvenc_av1_tex" || + enc == "jim_hevc_nvenc" || enc == "jim_av1_nvenc") + return "obs_nvenc_h264_tex"; if (enc == "h265_texture_amf" || enc == "av1_texture_amf") return "h264_texture_amf"; if (enc == "com.apple.videotoolbox.videoencoder.ave.hevc") @@ -1863,7 +1864,7 @@ void OBSBasicSettings::ResetEncoders(bool streamOnly) ui->simpleOutStrEncoder->addItem( ENCODER_STR("Hardware.NVENC.H264"), QString(SIMPLE_ENCODER_NVENC)); - if (service_supports_encoder(vcodecs, "jim_av1_nvenc")) + if (service_supports_encoder(vcodecs, "obs_nvenc_av1_tex")) ui->simpleOutStrEncoder->addItem( ENCODER_STR("Hardware.NVENC.AV1"), QString(SIMPLE_ENCODER_NVENC_AV1)); diff --git a/UI/window-basic-settings.cpp b/UI/window-basic-settings.cpp index 1f5812b42cf5db..787a5c047a4eba 100644 --- a/UI/window-basic-settings.cpp +++ b/UI/window-basic-settings.cpp @@ -5354,7 +5354,7 @@ void OBSBasicSettings::FillSimpleRecordingValues() ui->simpleOutRecEncoder->addItem( ENCODER_STR("Hardware.NVENC.H264"), QString(SIMPLE_ENCODER_NVENC)); - if (EncoderAvailable("jim_av1_nvenc")) + if (EncoderAvailable("obs_nvenc_av1_tex")) ui->simpleOutRecEncoder->addItem( ENCODER_STR("Hardware.NVENC.AV1"), QString(SIMPLE_ENCODER_NVENC_AV1)); diff --git a/plugins/CMakeLists.txt b/plugins/CMakeLists.txt index 68cb1024970cab..632a40be4604f2 100644 --- a/plugins/CMakeLists.txt +++ b/plugins/CMakeLists.txt @@ -62,6 +62,7 @@ if(OBS_CMAKE_VERSION VERSION_GREATER_EQUAL 3.0.0) add_obs_plugin(obs-ffmpeg) add_obs_plugin(obs-filters) add_obs_plugin(obs-libfdk) + add_obs_plugin(obs-nvenc PLATFORMS WINDOWS LINUX) add_obs_plugin(obs-outputs) add_obs_plugin( obs-qsv11 diff --git a/plugins/obs-ffmpeg/CMakeLists.txt b/plugins/obs-ffmpeg/CMakeLists.txt index 4a928c82cf5208..906f0bca5b70fc 100644 --- a/plugins/obs-ffmpeg/CMakeLists.txt +++ b/plugins/obs-ffmpeg/CMakeLists.txt @@ -5,8 +5,8 @@ legacy_check() option(ENABLE_FFMPEG_LOGGING "Enables obs-ffmpeg logging" OFF) option(ENABLE_NEW_MPEGTS_OUTPUT "Use native SRT/RIST mpegts output" ON) -if(OS_LINUX) - option(ENABLE_NATIVE_NVENC "Use native NVENC implementation" ON) +if(OS_LINUX OR OS_WINDOWS) + option(ENABLE_FFMPEG_NVENC "Enable legacy FFmpeg NVENC encoder" OFF) endif() include(cmake/dependencies.cmake) @@ -20,6 +20,7 @@ target_sources( obs-ffmpeg PRIVATE # cmake-format: sortable $<$:obs-ffmpeg-logging.c> + $<$:obs-ffmpeg-nvenc.c> $<$:obs-ffmpeg-mpegts.c> $<$:obs-ffmpeg-rist.h> $<$:obs-ffmpeg-srt.h> @@ -36,7 +37,6 @@ target_sources( obs-ffmpeg-hls-mux.c obs-ffmpeg-mux.c obs-ffmpeg-mux.h - obs-ffmpeg-nvenc.c obs-ffmpeg-output.c obs-ffmpeg-output.h obs-ffmpeg-source.c @@ -44,8 +44,11 @@ target_sources( obs-ffmpeg.c) target_compile_options(obs-ffmpeg PRIVATE $<$:-Wno-shorten-64-to-32>) -target_compile_definitions(obs-ffmpeg PRIVATE $<$:ENABLE_FFMPEG_LOGGING> - $<$:NEW_MPEGTS_OUTPUT>) +target_compile_definitions( + obs-ffmpeg + PRIVATE $<$:ENABLE_FFMPEG_LOGGING> + $<$:ENABLE_FFMPEG_NVENC> + $<$:NEW_MPEGTS_OUTPUT>) target_link_libraries( obs-ffmpeg @@ -59,7 +62,6 @@ target_link_libraries( FFmpeg::avutil FFmpeg::swscale FFmpeg::swresample - $ $<$:OBS::w32-pthreads> $<$:AMF::AMF> $<$:ws2_32> diff --git a/plugins/obs-ffmpeg/cmake/dependencies.cmake b/plugins/obs-ffmpeg/cmake/dependencies.cmake index 6e809bbc273960..da110e110b1a4f 100644 --- a/plugins/obs-ffmpeg/cmake/dependencies.cmake +++ b/plugins/obs-ffmpeg/cmake/dependencies.cmake @@ -36,32 +36,6 @@ elseif( find_package(Libdrm REQUIRED) endif() -if(OS_WINDOWS OR (OS_LINUX AND ENABLE_NATIVE_NVENC)) - add_library(obs-nvenc-version INTERFACE) - add_library(OBS::obs-nvenc-version ALIAS obs-nvenc-version) - target_sources(obs-nvenc-version INTERFACE obs-nvenc-ver.h) - target_include_directories(obs-nvenc-version INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}") - - find_package(FFnvcodec 12.0.0.0...<12.2.0.0 REQUIRED) - - if(OS_LINUX AND NOT TARGET OBS::glad) - add_subdirectory("${CMAKE_SOURCE_DIR}/deps/glad" "${CMAKE_BINARY_DIR}/deps/glad") - endif() - - add_library(obs-nvenc-native INTERFACE) - add_library(OBS::obs-nvenc-native ALIAS obs-nvenc-native) - target_sources(obs-nvenc-native INTERFACE obs-nvenc-helpers.c obs-nvenc.c obs-nvenc.h) - target_compile_definitions(obs-nvenc-native INTERFACE $<$:NVCODEC_AVAILABLE>) - target_include_directories(obs-nvenc-native INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}") - - target_link_libraries(obs-nvenc-native INTERFACE FFnvcodec::FFnvcodec OBS::obs-nvenc-version - $<$:OBS::glad>) - - if(OS_WINDOWS) - add_subdirectory(obs-nvenc-test) - endif() -endif() - if(ENABLE_NEW_MPEGTS_OUTPUT) find_package(Librist QUIET) find_package(Libsrt QUIET) diff --git a/plugins/obs-ffmpeg/cmake/legacy.cmake b/plugins/obs-ffmpeg/cmake/legacy.cmake index 04866b90ab0b92..6bc1b6b2f090b7 100644 --- a/plugins/obs-ffmpeg/cmake/legacy.cmake +++ b/plugins/obs-ffmpeg/cmake/legacy.cmake @@ -2,7 +2,10 @@ project(obs-ffmpeg) option(ENABLE_FFMPEG_LOGGING "Enables obs-ffmpeg logging" OFF) option(ENABLE_NEW_MPEGTS_OUTPUT "Use native SRT/RIST mpegts output" ON) -option(ENABLE_NATIVE_NVENC "Use native NVENC implementation" ON) + +if(OS_LINUX OR OS_WINDOWS) + option(ENABLE_FFMPEG_NVENC "Enables legacy FFmpeg NVENC encoder" OFF) +endif() find_package( FFmpeg REQUIRED @@ -49,7 +52,6 @@ target_sources( obs-ffmpeg-video-encoders.c obs-ffmpeg-audio-encoders.c obs-ffmpeg-av1.c - obs-ffmpeg-nvenc.c obs-ffmpeg-output.c obs-ffmpeg-output.h obs-ffmpeg-mux.c @@ -85,6 +87,11 @@ if(ENABLE_NEW_MPEGTS_OUTPUT) target_compile_definitions(obs-ffmpeg PRIVATE NEW_MPEGTS_OUTPUT) endif() +if(ENABLE_FFMPEG_NVENC) + target_sources(obs-ffmpeg PRIVATE obs-ffmpeg-nvenc.c) + target_compile_definitions(obs-ffmpeg PRIVATE ENABLE_FFMPEG_NVENC) +endif() + if(ENABLE_FFMPEG_LOGGING) target_sources(obs-ffmpeg PRIVATE obs-ffmpeg-logging.c) endif() @@ -96,7 +103,6 @@ if(OS_WINDOWS) find_package(FFnvcodec 12 REQUIRED) add_subdirectory(obs-amf-test) - add_subdirectory(obs-nvenc-test) if(MSVC) target_link_libraries(obs-ffmpeg PRIVATE OBS::w32-pthreads) @@ -106,15 +112,7 @@ if(OS_WINDOWS) set(MODULE_DESCRIPTION "OBS FFmpeg module") configure_file(${CMAKE_SOURCE_DIR}/cmake/bundle/windows/obs-module.rc.in obs-ffmpeg.rc) - target_sources( - obs-ffmpeg - PRIVATE texture-amf.cpp - texture-amf-opts.hpp - obs-nvenc.c - obs-nvenc.h - obs-nvenc-helpers.c - obs-nvenc-ver.h - obs-ffmpeg.rc) + target_sources(obs-ffmpeg PRIVATE texture-amf.cpp texture-amf-opts.hpp obs-ffmpeg.rc) elseif(OS_POSIX AND NOT OS_MACOS) find_package(Libva REQUIRED) @@ -122,13 +120,6 @@ elseif(OS_POSIX AND NOT OS_MACOS) find_package(Libdrm REQUIRED) target_sources(obs-ffmpeg PRIVATE obs-ffmpeg-vaapi.c vaapi-utils.c vaapi-utils.h) target_link_libraries(obs-ffmpeg PRIVATE Libva::va Libva::drm LIBPCI::LIBPCI Libdrm::Libdrm) - - if(ENABLE_NATIVE_NVENC) - find_package(FFnvcodec 12.0.0.0...<12.2.0.0 REQUIRED) - target_sources(obs-ffmpeg PRIVATE obs-nvenc.c obs-nvenc.h obs-nvenc-helpers.c obs-nvenc-ver.h) - target_link_libraries(obs-ffmpeg PRIVATE FFnvcodec::FFnvcodec OBS::obsglad) - target_compile_definitions(obs-ffmpeg PRIVATE NVCODEC_AVAILABLE) - endif() endif() setup_plugin_target(obs-ffmpeg) diff --git a/plugins/obs-ffmpeg/obs-ffmpeg-nvenc.c b/plugins/obs-ffmpeg/obs-ffmpeg-nvenc.c index 3740cbc1a2a2de..ba151b834b4df8 100644 --- a/plugins/obs-ffmpeg/obs-ffmpeg-nvenc.c +++ b/plugins/obs-ffmpeg/obs-ffmpeg-nvenc.c @@ -494,7 +494,7 @@ static bool rate_control_modified(obs_properties_t *ppts, obs_property_t *p, return true; } -obs_properties_t *nvenc_properties_internal(enum codec_type codec, bool ffmpeg) +obs_properties_t *nvenc_properties_internal(enum codec_type codec) { obs_properties_t *props = obs_properties_create(); obs_property_t *p; @@ -587,15 +587,6 @@ obs_properties_t *nvenc_properties_internal(enum codec_type codec, bool ffmpeg) } #undef add_profile - if (!ffmpeg) { - p = obs_properties_add_bool(props, "lookahead", - obs_module_text("NVENC.LookAhead")); - obs_property_set_long_description( - p, obs_module_text("NVENC.LookAhead.ToolTip")); - p = obs_properties_add_bool(props, "repeat_headers", - "repeat_headers"); - obs_property_set_visible(p, false); - } p = obs_properties_add_bool( props, "psycho_aq", obs_module_text("NVENC.PsychoVisualTuning")); @@ -610,37 +601,17 @@ obs_properties_t *nvenc_properties_internal(enum codec_type codec, bool ffmpeg) return props; } -obs_properties_t *h264_nvenc_properties(void *unused) -{ - UNUSED_PARAMETER(unused); - return nvenc_properties_internal(CODEC_H264, false); -} - -#ifdef ENABLE_HEVC -obs_properties_t *hevc_nvenc_properties(void *unused) -{ - UNUSED_PARAMETER(unused); - return nvenc_properties_internal(CODEC_HEVC, false); -} -#endif - -obs_properties_t *av1_nvenc_properties(void *unused) -{ - UNUSED_PARAMETER(unused); - return nvenc_properties_internal(CODEC_AV1, false); -} - obs_properties_t *h264_nvenc_properties_ffmpeg(void *unused) { UNUSED_PARAMETER(unused); - return nvenc_properties_internal(CODEC_H264, true); + return nvenc_properties_internal(CODEC_H264); } #ifdef ENABLE_HEVC obs_properties_t *hevc_nvenc_properties_ffmpeg(void *unused) { UNUSED_PARAMETER(unused); - return nvenc_properties_internal(CODEC_HEVC, true); + return nvenc_properties_internal(CODEC_HEVC); } #endif @@ -676,11 +647,7 @@ struct obs_encoder_info h264_nvenc_encoder_info = { .get_extra_data = nvenc_extra_data, .get_sei_data = nvenc_sei_data, .get_video_info = nvenc_video_info, -#if defined(_WIN32) || defined(NVCODEC_AVAILABLE) - .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_INTERNAL, -#else .caps = OBS_ENCODER_CAP_DYN_BITRATE, -#endif }; #ifdef ENABLE_HEVC @@ -698,10 +665,6 @@ struct obs_encoder_info hevc_nvenc_encoder_info = { .get_extra_data = nvenc_extra_data, .get_sei_data = nvenc_sei_data, .get_video_info = nvenc_video_info, -#if defined(_WIN32) || defined(NVCODEC_AVAILABLE) - .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_INTERNAL, -#else .caps = OBS_ENCODER_CAP_DYN_BITRATE, -#endif }; #endif diff --git a/plugins/obs-ffmpeg/obs-ffmpeg.c b/plugins/obs-ffmpeg/obs-ffmpeg.c index c20281f631986b..04c715e420fc42 100644 --- a/plugins/obs-ffmpeg/obs-ffmpeg.c +++ b/plugins/obs-ffmpeg/obs-ffmpeg.c @@ -5,14 +5,8 @@ #include #ifdef _WIN32 +#define INITGUID #include -#include -#endif - -#if defined(_WIN32) || defined(NVCODEC_AVAILABLE) -#include "obs-nvenc.h" - -#define OBS_NVENC_AVAILABLE #endif #if !defined(_WIN32) && !defined(__APPLE__) @@ -41,10 +35,12 @@ extern struct obs_encoder_info pcm24_encoder_info; extern struct obs_encoder_info pcm32_encoder_info; extern struct obs_encoder_info alac_encoder_info; extern struct obs_encoder_info flac_encoder_info; +#ifdef ENABLE_FFMPEG_NVENC extern struct obs_encoder_info h264_nvenc_encoder_info; #ifdef ENABLE_HEVC extern struct obs_encoder_info hevc_nvenc_encoder_info; #endif +#endif extern struct obs_encoder_info svt_av1_encoder_info; extern struct obs_encoder_info aom_av1_encoder_info; @@ -59,11 +55,10 @@ extern struct obs_encoder_info hevc_vaapi_encoder_tex_info; #endif #endif -#ifndef __APPLE__ +#ifdef ENABLE_FFMPEG_NVENC static const char *nvenc_check_name = "nvenc_check"; -#if defined(_WIN32) || defined(__linux__) static const int blacklisted_adapters[] = { 0x1298, // GK208M [GeForce GT 720M] 0x1140, // GF117M [GeForce 610M/710M/810M/820M / GT 620M/625M/630M/720M] @@ -127,9 +122,8 @@ static bool is_blacklisted(const int device_id) return false; } -#endif -#if defined(_WIN32) +#ifdef _WIN32 typedef HRESULT(WINAPI *create_dxgi_proc)(const IID *, IDXGIFactory1 **); static bool nvenc_device_available(void) @@ -243,10 +237,6 @@ static bool nvenc_device_available(void) } #endif -#ifdef OBS_NVENC_AVAILABLE -extern bool load_nvenc_lib(void); -#endif - static bool nvenc_codec_exists(const char *name, const char *fallback) { const AVCodec *nvenc = avcodec_find_encoder_by_name(name); @@ -256,7 +246,7 @@ static bool nvenc_codec_exists(const char *name, const char *fallback) return nvenc != NULL; } -static bool nvenc_supported(bool *out_h264, bool *out_hevc, bool *out_av1) +static bool nvenc_supported(bool *out_h264, bool *out_hevc) { profile_start(nvenc_check_name); @@ -267,13 +257,18 @@ static bool nvenc_supported(bool *out_h264, bool *out_hevc, bool *out_av1) const bool hevc = false; #endif - bool av1 = false; - bool success = h264 || hevc; if (success) { -#ifdef OBS_NVENC_AVAILABLE - success = nvenc_device_available() && load_nvenc_lib(); - av1 = success && (get_nvenc_ver() >= ((12 << 4) | 0)); +#ifdef _WIN32 + success = nvenc_device_available(); +#elif defined(__linux__) + success = nvenc_device_available(); + if (success) { + void *const lib = os_dlopen("libnvidia-encode.so.1"); + success = lib != NULL; + if (success) + os_dlclose(lib); + } #else void *const lib = os_dlopen("libnvidia-encode.so.1"); success = lib != NULL; @@ -284,7 +279,6 @@ static bool nvenc_supported(bool *out_h264, bool *out_hevc, bool *out_av1) if (success) { *out_h264 = h264; *out_hevc = hevc; - *out_av1 = av1; } } @@ -334,11 +328,6 @@ static bool hevc_vaapi_supported(void) #endif #endif -#ifdef OBS_NVENC_AVAILABLE -extern void obs_nvenc_load(bool h264, bool hevc, bool av1); -extern void obs_nvenc_unload(void); -#endif - #ifdef _WIN32 extern void amf_load(void); extern void amf_unload(void); @@ -375,16 +364,12 @@ bool obs_module_load(void) obs_register_encoder(&pcm32_encoder_info); obs_register_encoder(&alac_encoder_info); obs_register_encoder(&flac_encoder_info); -#ifndef __APPLE__ +#ifdef ENABLE_FFMPEG_NVENC bool h264 = false; bool hevc = false; - bool av1 = false; - if (nvenc_supported(&h264, &hevc, &av1)) { + if (nvenc_supported(&h264, &hevc)) { blog(LOG_INFO, "NVENC supported"); -#ifdef OBS_NVENC_AVAILABLE - obs_nvenc_load(h264, hevc, av1); -#endif if (h264) obs_register_encoder(&h264_nvenc_encoder_info); #ifdef ENABLE_HEVC @@ -447,7 +432,4 @@ void obs_module_unload(void) #ifdef _WIN32 amf_unload(); #endif -#ifdef OBS_NVENC_AVAILABLE - obs_nvenc_unload(); -#endif } diff --git a/plugins/obs-ffmpeg/obs-nvenc-test/cmake/legacy.cmake b/plugins/obs-ffmpeg/obs-nvenc-test/cmake/legacy.cmake deleted file mode 100644 index b48460bf812767..00000000000000 --- a/plugins/obs-ffmpeg/obs-nvenc-test/cmake/legacy.cmake +++ /dev/null @@ -1,13 +0,0 @@ -project(obs-nvenc-test) - -add_executable(obs-nvenc-test) - -find_package(FFnvcodec 12 REQUIRED) - -target_sources(obs-nvenc-test PRIVATE obs-nvenc-test.c ../obs-nvenc-ver.h) -target_compile_definitions(obs-nvenc-test PRIVATE OBS_LEGACY) -target_link_libraries(obs-nvenc-test d3d11 dxgi dxguid FFnvcodec::FFnvcodec) - -set_target_properties(obs-nvenc-test PROPERTIES FOLDER "plugins/obs-ffmpeg") - -setup_binary_target(obs-nvenc-test) diff --git a/plugins/obs-ffmpeg/obs-nvenc-test/obs-nvenc-test.c b/plugins/obs-ffmpeg/obs-nvenc-test/obs-nvenc-test.c deleted file mode 100644 index 7d8e75183198e5..00000000000000 --- a/plugins/obs-ffmpeg/obs-nvenc-test/obs-nvenc-test.c +++ /dev/null @@ -1,238 +0,0 @@ -#include -#include -#include - -#include -#ifdef OBS_LEGACY -#include "../obs-nvenc-ver.h" -#else -#include -#endif - -#include -#include -#include - -__declspec(dllexport) DWORD NvOptimusEnablement = 1; -NV_ENCODE_API_FUNCTION_LIST nv = {NV_ENCODE_API_FUNCTION_LIST_VER}; -static void *nvenc_lib = NULL; -static bool av1_supported = false; - -#define NVIDIA_VENDOR_ID 0x10DE - -struct nvenc_info { - bool is_nvidia; - bool supports_av1; -}; - -#define MAX_CAPS 10 -static uint32_t luid_count = 0; -static uint64_t luid_order[MAX_CAPS] = {0}; -static struct nvenc_info adapter_info[MAX_CAPS] = {0}; - -bool load_nvenc_lib(void) -{ - const char *const file = (sizeof(void *) == 8) ? "nvEncodeAPI64.dll" - : "nvEncodeAPI.dll"; - nvenc_lib = LoadLibraryA(file); - return nvenc_lib != NULL; -} - -static inline void *load_nv_func(const char *func) -{ - void *func_ptr = (void *)GetProcAddress(nvenc_lib, func); - return func_ptr; -} - -static inline uint32_t get_adapter_idx(uint32_t adapter_idx, LUID luid) -{ - for (uint32_t i = 0; i < luid_count; i++) { - if (luid_order[i] == *(uint64_t *)&luid) { - return i; - } - } - - return adapter_idx; -} - -static bool get_adapter_caps(IDXGIFactory *factory, uint32_t adapter_idx) -{ - struct nvenc_info *caps; - IDXGIAdapter *adapter = NULL; - ID3D11Device *device = NULL; - ID3D11DeviceContext *context = NULL; - GUID *guids = NULL; - void *session = NULL; - HRESULT hr; - - if (adapter_idx == MAX_CAPS) - return false; - - hr = factory->lpVtbl->EnumAdapters(factory, adapter_idx, &adapter); - if (FAILED(hr)) - return false; - - DXGI_ADAPTER_DESC desc; - adapter->lpVtbl->GetDesc(adapter, &desc); - - caps = &adapter_info[get_adapter_idx(adapter_idx, desc.AdapterLuid)]; - if (desc.VendorId != NVIDIA_VENDOR_ID) - return true; - - caps->is_nvidia = true; - - hr = D3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, NULL, 0, NULL, - 0, D3D11_SDK_VERSION, &device, NULL, &context); - if (FAILED(hr)) - goto finish; - - /* ---------------------------------------------------------------- */ - - NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { - NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER}; - params.device = device; - params.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX; - params.apiVersion = NVENCAPI_VERSION; - - NVENCSTATUS stat = nv.nvEncOpenEncodeSessionEx(¶ms, &session); - if (stat != NV_ENC_SUCCESS) - goto finish; - - uint32_t guid_count = 0; - if (nv.nvEncGetEncodeGUIDCount(session, &guid_count) != NV_ENC_SUCCESS) - goto finish; - - guids = malloc(guid_count * sizeof(GUID)); - stat = nv.nvEncGetEncodeGUIDs(session, guids, guid_count, &guid_count); - if (stat != NV_ENC_SUCCESS) - goto finish; - - for (uint32_t i = 0; i < guid_count; i++) { - GUID *guid = &guids[i]; - - if (memcmp(guid, &NV_ENC_CODEC_AV1_GUID, sizeof(GUID)) == 0) { - caps->supports_av1 = true; - break; - } - } - -finish: - if (guids) - free(guids); - if (session) - nv.nvEncDestroyEncoder(session); - if (context) - context->lpVtbl->Release(context); - if (device) - device->lpVtbl->Release(device); - if (adapter) - adapter->lpVtbl->Release(adapter); - return true; -} - -typedef NVENCSTATUS(NVENCAPI *NV_MAX_VER_FUNC)(uint32_t *); -typedef NVENCSTATUS(NVENCAPI *NV_CREATE_INSTANCE_FUNC)( - NV_ENCODE_API_FUNCTION_LIST *); - -static inline uint32_t get_nvenc_ver(void) -{ - NV_MAX_VER_FUNC nv_max_ver = (NV_MAX_VER_FUNC)load_nv_func( - "NvEncodeAPIGetMaxSupportedVersion"); - if (!nv_max_ver) { - return 0; - } - - uint32_t ver = 0; - if (nv_max_ver(&ver) != NV_ENC_SUCCESS) { - return 0; - } - return ver; -} - -static inline bool init_nvenc_internal(void) -{ - if (!load_nvenc_lib()) - return false; - - uint32_t ver = get_nvenc_ver(); - if (ver == 0) - return false; - - uint32_t supported_ver = (NVENC_COMPAT_MAJOR_VER << 4) | - NVENC_COMPAT_MINOR_VER; - if (supported_ver > ver) - return false; - - NV_CREATE_INSTANCE_FUNC nv_create_instance = - (NV_CREATE_INSTANCE_FUNC)load_nv_func( - "NvEncodeAPICreateInstance"); - if (!nv_create_instance) - return false; - - return nv_create_instance(&nv) == NV_ENC_SUCCESS; -} - -DWORD WINAPI TimeoutThread(LPVOID param) -{ - HANDLE hMainThread = (HANDLE)param; - - DWORD ret = WaitForSingleObject(hMainThread, 2500); - if (ret == WAIT_TIMEOUT) - TerminateProcess(GetCurrentProcess(), STATUS_TIMEOUT); - - CloseHandle(hMainThread); - return 0; -} - -int main(int argc, char *argv[]) -{ - IDXGIFactory *factory = NULL; - HRESULT hr; - - HANDLE hMainThread; - DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), - GetCurrentProcess(), &hMainThread, 0, FALSE, - DUPLICATE_SAME_ACCESS); - DWORD threadId; - HANDLE hThread; - hThread = - CreateThread(NULL, 0, TimeoutThread, hMainThread, 0, &threadId); - CloseHandle(hThread); - - /* --------------------------------------------------------- */ - /* try initializing nvenc, I guess */ - - if (!init_nvenc_internal()) - return 0; - - /* --------------------------------------------------------- */ - /* parse expected LUID order */ - - luid_count = argc - 1; - for (int i = 1; i < argc; i++) { - luid_order[i - 1] = strtoull(argv[i], NULL, 16); - } - - /* --------------------------------------------------------- */ - /* obtain adapter compatibility information */ - - hr = CreateDXGIFactory1(&IID_IDXGIFactory1, (void **)&factory); - if (FAILED(hr)) - return 0; - - uint32_t idx = 0; - while (get_adapter_caps(factory, idx++)) - ; - - for (uint32_t i = 0; i < idx; i++) { - struct nvenc_info caps = adapter_info[i]; - - printf("[%u]\n", i); - printf("is_nvidia=%s\n", caps.is_nvidia ? "true" : "false"); - printf("supports_av1=%s\n", - caps.supports_av1 ? "true" : "false"); - } - - factory->lpVtbl->Release(factory); - return 0; -} diff --git a/plugins/obs-ffmpeg/obs-nvenc-ver.h b/plugins/obs-ffmpeg/obs-nvenc-ver.h deleted file mode 100644 index fcd57606556125..00000000000000 --- a/plugins/obs-ffmpeg/obs-nvenc-ver.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -#define NVENC_COMPAT_MAJOR_VER 11 -#define NVENC_COMPAT_MINOR_VER 1 - -#define NVENC_COMPAT_VER \ - (NVENC_COMPAT_MAJOR_VER | (NVENC_COMPAT_MINOR_VER << 24)) diff --git a/plugins/obs-ffmpeg/obs-nvenc.c b/plugins/obs-ffmpeg/obs-nvenc.c deleted file mode 100644 index 818b8ca894294a..00000000000000 --- a/plugins/obs-ffmpeg/obs-nvenc.c +++ /dev/null @@ -1,2485 +0,0 @@ -#include "obs-nvenc.h" - -#include -#include -#include -#include -#include - -#include - -#ifdef _WIN32 -#define INITGUID -#include -#include -#include -#else -#include -#endif - -/* ========================================================================= */ -/* a hack of the ages: nvenc backward compatibility */ - -#define CONFIGURED_NVENC_MAJOR 12 -#define CONFIGURED_NVENC_MINOR 1 - -/* we cannot guarantee structures haven't changed, so purposely break on - * version change to force the programmer to update or remove backward - * compatibility NVENC code. */ -#if CONFIGURED_NVENC_MAJOR != NVENCAPI_MAJOR_VERSION || \ - CONFIGURED_NVENC_MINOR < NVENCAPI_MINOR_VERSION -#error NVENC version changed, update or remove NVENC compatibility code -#endif - -#undef NVENCAPI_STRUCT_VERSION -#define NVENCAPI_STRUCT_VERSION(ver) \ - ((uint32_t)(enc->needs_compat_ver ? NVENC_COMPAT_VER \ - : NVENCAPI_VERSION) | \ - ((ver) << 16) | (0x7 << 28)) - -#define NV_ENC_CONFIG_COMPAT_VER (NVENCAPI_STRUCT_VERSION(7) | (1 << 31)) -#define NV_ENC_INITIALIZE_PARAMS_COMPAT_VER \ - (NVENCAPI_STRUCT_VERSION(5) | (1 << 31)) -#define NV_ENC_PIC_PARAMS_COMPAT_VER (NVENCAPI_STRUCT_VERSION(4) | (1 << 31)) -#define NV_ENC_LOCK_BITSTREAM_COMPAT_VER NVENCAPI_STRUCT_VERSION(1) -#define NV_ENC_REGISTER_RESOURCE_COMPAT_VER NVENCAPI_STRUCT_VERSION(3) - -#define COMPATIBILITY_VERSION \ - (NVENC_COMPAT_MAJOR_VER << 4 | NVENC_COMPAT_MINOR_VER) - -/* ========================================================================= */ - -#define EXTRA_BUFFERS 5 - -#define do_log(level, format, ...) \ - blog(level, "[obs-nvenc: '%s'] " format, \ - obs_encoder_get_name(enc->encoder), ##__VA_ARGS__) - -#define error(format, ...) do_log(LOG_ERROR, format, ##__VA_ARGS__) -#define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__) -#define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__) -#define debug(format, ...) do_log(LOG_DEBUG, format, ##__VA_ARGS__) - -#define error_hr(msg) error("%s: %s: 0x%08lX", __FUNCTION__, msg, (uint32_t)hr); - -#ifndef _WIN32 -#define min(a, b) (((a) < (b)) ? (a) : (b)) -#define max(a, b) (((a) > (b)) ? (a) : (b)) -#endif - -struct nv_bitstream; -struct nv_texture; - -struct handle_tex { -#ifdef _WIN32 - uint32_t handle; - ID3D11Texture2D *tex; - IDXGIKeyedMutex *km; -#else - GLuint tex_id; - CUgraphicsResource res_y; - CUgraphicsResource res_uv; -#endif -}; - -/* ------------------------------------------------------------------------- */ -/* Main Implementation Structure */ - -enum codec_type { - CODEC_H264, - CODEC_HEVC, - CODEC_AV1, -}; - -static const char *get_codec_name(enum codec_type type) -{ - switch (type) { - case CODEC_H264: - return "H264"; - case CODEC_HEVC: - return "HEVC"; - case CODEC_AV1: - return "AV1"; - } - - return "Unknown"; -} - -struct nvenc_data { - obs_encoder_t *encoder; - enum codec_type codec; - GUID codec_guid; - - void *session; - NV_ENC_INITIALIZE_PARAMS params; - NV_ENC_CONFIG config; - int rc_lookahead; - uint32_t buf_count; - int output_delay; - int buffers_queued; - size_t next_bitstream; - size_t cur_bitstream; - bool encode_started; - bool first_packet; - bool can_change_bitrate; - bool needs_compat_ver; - bool fallback; - int32_t bframes; - - DARRAY(struct handle_tex) input_textures; - DARRAY(struct nv_bitstream) bitstreams; - DARRAY(struct nv_cuda_surface) surfaces; - NV_ENC_BUFFER_FORMAT surface_format; - struct deque dts_list; - - DARRAY(uint8_t) packet_data; - int64_t packet_pts; - bool packet_keyframe; - -#ifdef _WIN32 - DARRAY(struct nv_texture) textures; - ID3D11Device *device; - ID3D11DeviceContext *context; -#endif - - uint32_t cx; - uint32_t cy; - enum video_format in_format; - - uint8_t *header; - size_t header_size; - - uint8_t *sei; - size_t sei_size; - - int8_t *roi_map; - size_t roi_map_size; - uint32_t roi_increment; - - CUcontext cu_ctx; -}; - -/* ------------------------------------------------------------------------- */ -/* Bitstream Buffer */ - -struct nv_bitstream { - void *ptr; -}; - -#define NV_FAIL(format, ...) nv_fail(enc->encoder, format, ##__VA_ARGS__) -#define NV_FAILED(x) nv_failed(enc->encoder, x, __FUNCTION__, #x) - -static bool nv_bitstream_init(struct nvenc_data *enc, struct nv_bitstream *bs) -{ - NV_ENC_CREATE_BITSTREAM_BUFFER buf = { - NV_ENC_CREATE_BITSTREAM_BUFFER_VER}; - - if (NV_FAILED(nv.nvEncCreateBitstreamBuffer(enc->session, &buf))) { - return false; - } - - bs->ptr = buf.bitstreamBuffer; - return true; -} - -static void nv_bitstream_free(struct nvenc_data *enc, struct nv_bitstream *bs) -{ - if (bs->ptr) { - nv.nvEncDestroyBitstreamBuffer(enc->session, bs->ptr); - } -} - -/* ------------------------------------------------------------------------- */ -/* Texture Resource */ - -#ifdef _WIN32 -struct nv_texture { - void *res; - ID3D11Texture2D *tex; - void *mapped_res; -}; - -static bool nv_texture_init(struct nvenc_data *enc, struct nv_texture *nvtex) -{ - const bool p010 = obs_p010_tex_active(); - - D3D11_TEXTURE2D_DESC desc = {0}; - desc.Width = enc->cx; - desc.Height = enc->cy; - desc.MipLevels = 1; - desc.ArraySize = 1; - desc.Format = p010 ? DXGI_FORMAT_P010 : DXGI_FORMAT_NV12; - desc.SampleDesc.Count = 1; - desc.BindFlags = D3D11_BIND_RENDER_TARGET; - - ID3D11Device *const device = enc->device; - ID3D11Texture2D *tex; - HRESULT hr = device->lpVtbl->CreateTexture2D(device, &desc, NULL, &tex); - if (FAILED(hr)) { - error_hr("Failed to create texture"); - return false; - } - - tex->lpVtbl->SetEvictionPriority(tex, DXGI_RESOURCE_PRIORITY_MAXIMUM); - - uint32_t struct_ver = enc->needs_compat_ver - ? NV_ENC_REGISTER_RESOURCE_COMPAT_VER - : NV_ENC_REGISTER_RESOURCE_VER; - - NV_ENC_REGISTER_RESOURCE res = {struct_ver}; - res.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX; - res.resourceToRegister = tex; - res.width = enc->cx; - res.height = enc->cy; - res.bufferFormat = p010 ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT - : NV_ENC_BUFFER_FORMAT_NV12; - - if (NV_FAILED(nv.nvEncRegisterResource(enc->session, &res))) { - tex->lpVtbl->Release(tex); - return false; - } - - nvtex->res = res.registeredResource; - nvtex->tex = tex; - nvtex->mapped_res = NULL; - return true; -} - -static void nv_texture_free(struct nvenc_data *enc, struct nv_texture *nvtex) -{ - - if (nvtex->res) { - if (nvtex->mapped_res) { - nv.nvEncUnmapInputResource(enc->session, - nvtex->mapped_res); - } - nv.nvEncUnregisterResource(enc->session, nvtex->res); - nvtex->tex->lpVtbl->Release(nvtex->tex); - } -} -#endif - -/* ------------------------------------------------------------------------- */ -/* CUDA Stuff */ - -/* CUDA error handling */ - -static inline bool cuda_error_check(struct nvenc_data *enc, CUresult res, - const char *func, const char *call) -{ - if (res == CUDA_SUCCESS) - return true; - - struct dstr message = {0}; - - const char *name, *desc; - if (cuda_get_error_desc(res, &name, &desc)) { - dstr_printf(&message, - "%s: CUDA call \"%s\" failed with %s (%d): %s", - func, call, name, res, desc); - } else { - dstr_printf(&message, "%s: CUDA call \"%s\" failed with %d", - func, call, res); - } - - error("%s", message.array); - obs_encoder_set_last_error(enc->encoder, message.array); - - dstr_free(&message); - return false; -} - -#define CU_FAILED(call) \ - if (!cuda_error_check(enc, call, __FUNCTION__, #call)) \ - return false; - -#define CU_CHECK(call) \ - if (!cuda_error_check(enc, call, __FUNCTION__, #call)) { \ - success = false; \ - goto unmap; \ - } - -/* CUDA Surfaces */ - -struct nv_cuda_surface { - CUarray tex; - NV_ENC_REGISTERED_PTR res; - NV_ENC_INPUT_PTR *mapped_res; -}; - -/* Missing from ffmpeg nvcodec headers, required for CUDA arrays to be usable in NVENC */ -static const int CUDA_ARRAY3D_SURFACE_LDST = 0x02; - -static bool nv_cuda_surface_init(struct nvenc_data *enc, - struct nv_cuda_surface *nvsurf) -{ - const bool p010 = obs_p010_tex_active(); - CUDA_ARRAY3D_DESCRIPTOR desc; - desc.Width = enc->cx; - desc.Height = enc->cy; - desc.Depth = 0; - desc.Flags = CUDA_ARRAY3D_SURFACE_LDST; - desc.NumChannels = 1; - - if (!enc->fallback) { - desc.Format = p010 ? CU_AD_FORMAT_UNSIGNED_INT16 - : CU_AD_FORMAT_UNSIGNED_INT8; - desc.Height = enc->cy + enc->cy / 2; - } else { - switch (enc->surface_format) { - case NV_ENC_BUFFER_FORMAT_NV12: - desc.Format = CU_AD_FORMAT_UNSIGNED_INT8; - // Additional half-height plane for UV data - desc.Height += enc->cy / 2; - break; - case NV_ENC_BUFFER_FORMAT_YUV420_10BIT: - desc.Format = CU_AD_FORMAT_UNSIGNED_INT16; - desc.Height += enc->cy / 2; - desc.NumChannels = 2; // number of bytes per element - break; - case NV_ENC_BUFFER_FORMAT_YUV444: - desc.Format = CU_AD_FORMAT_UNSIGNED_INT8; - desc.Height *= 3; // 3 full-size planes - break; - default: - error("Unknown input format: %d", enc->surface_format); - return false; - } - } - - CU_FAILED(cu->cuArray3DCreate(&nvsurf->tex, &desc)) - - NV_ENC_REGISTER_RESOURCE res = {0}; - res.version = enc->needs_compat_ver - ? NV_ENC_REGISTER_RESOURCE_COMPAT_VER - : NV_ENC_REGISTER_RESOURCE_VER; - res.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDAARRAY; - res.resourceToRegister = (void *)nvsurf->tex; - res.width = enc->cx; - res.height = enc->cy; - res.pitch = (uint32_t)(desc.Width * desc.NumChannels); - if (!enc->fallback) { - res.bufferFormat = p010 ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT - : NV_ENC_BUFFER_FORMAT_NV12; - } else { - res.bufferFormat = enc->surface_format; - } - - if (NV_FAILED(nv.nvEncRegisterResource(enc->session, &res))) { - return false; - } - - nvsurf->res = res.registeredResource; - nvsurf->mapped_res = NULL; - return true; -} - -static void nv_cuda_surface_free(struct nvenc_data *enc, - struct nv_cuda_surface *nvsurf) -{ - - if (nvsurf->res) { - if (nvsurf->mapped_res) { - nv.nvEncUnmapInputResource(enc->session, - nvsurf->mapped_res); - } - nv.nvEncUnregisterResource(enc->session, nvsurf->res); - cu->cuArrayDestroy(nvsurf->tex); - } -} - -/* ------------------------------------------------------------------------- */ -/* Implementation */ - -static const char *h264_nvenc_get_name(void *type_data) -{ - UNUSED_PARAMETER(type_data); - return "NVIDIA NVENC H.264"; -} - -static const char *h264_nvenc_soft_get_name(void *type_data) -{ - UNUSED_PARAMETER(type_data); - return "NVIDIA NVENC H.264 (Fallback)"; -} - -#ifdef ENABLE_HEVC -static const char *hevc_nvenc_get_name(void *type_data) -{ - UNUSED_PARAMETER(type_data); - return "NVIDIA NVENC HEVC"; -} - -static const char *hevc_nvenc_soft_get_name(void *type_data) -{ - UNUSED_PARAMETER(type_data); - return "NVIDIA NVENC HEVC (Fallback)"; -} -#endif - -static const char *av1_nvenc_get_name(void *type_data) -{ - UNUSED_PARAMETER(type_data); - return "NVIDIA NVENC AV1"; -} - -static const char *av1_nvenc_soft_get_name(void *type_data) -{ - UNUSED_PARAMETER(type_data); - return "NVIDIA NVENC AV1 (Fallback)"; -} - -static inline int nv_get_cap(struct nvenc_data *enc, NV_ENC_CAPS cap) -{ - if (!enc->session) - return 0; - - NV_ENC_CAPS_PARAM param = {NV_ENC_CAPS_PARAM_VER}; - int v; - - param.capsToQuery = cap; - nv.nvEncGetEncodeCaps(enc->session, enc->codec_guid, ¶m, &v); - return v; -} - -static bool nvenc_update(void *data, obs_data_t *settings) -{ - struct nvenc_data *enc = data; - - /* Only support reconfiguration of CBR bitrate */ - if (enc->can_change_bitrate) { - int bitrate = (int)obs_data_get_int(settings, "bitrate"); - int max_bitrate = - (int)obs_data_get_int(settings, "max_bitrate"); - bool vbr = (enc->config.rcParams.rateControlMode == - NV_ENC_PARAMS_RC_VBR); - - enc->config.rcParams.averageBitRate = bitrate * 1000; - enc->config.rcParams.maxBitRate = vbr ? max_bitrate * 1000 - : bitrate * 1000; - - NV_ENC_RECONFIGURE_PARAMS params = {0}; - params.version = NV_ENC_RECONFIGURE_PARAMS_VER; - params.reInitEncodeParams = enc->params; - params.resetEncoder = 1; - params.forceIDR = 1; - - if (NV_FAILED(nv.nvEncReconfigureEncoder(enc->session, - ¶ms))) { - return false; - } - } - - return true; -} - -#ifdef _WIN32 -static HANDLE get_lib(struct nvenc_data *enc, const char *lib) -{ - HMODULE mod = GetModuleHandleA(lib); - if (mod) - return mod; - - mod = LoadLibraryA(lib); - if (!mod) - error("Failed to load %s", lib); - return mod; -} - -typedef HRESULT(WINAPI *CREATEDXGIFACTORY1PROC)(REFIID, void **); - -static bool init_d3d11(struct nvenc_data *enc, obs_data_t *settings) -{ - HMODULE dxgi = get_lib(enc, "DXGI.dll"); - HMODULE d3d11 = get_lib(enc, "D3D11.dll"); - CREATEDXGIFACTORY1PROC create_dxgi; - PFN_D3D11_CREATE_DEVICE create_device; - IDXGIFactory1 *factory; - IDXGIAdapter *adapter; - ID3D11Device *device; - ID3D11DeviceContext *context; - HRESULT hr; - - if (!dxgi || !d3d11) { - return false; - } - - create_dxgi = (CREATEDXGIFACTORY1PROC)GetProcAddress( - dxgi, "CreateDXGIFactory1"); - create_device = (PFN_D3D11_CREATE_DEVICE)GetProcAddress( - d3d11, "D3D11CreateDevice"); - - if (!create_dxgi || !create_device) { - error("Failed to load D3D11/DXGI procedures"); - return false; - } - - hr = create_dxgi(&IID_IDXGIFactory1, &factory); - if (FAILED(hr)) { - error_hr("CreateDXGIFactory1 failed"); - return false; - } - - hr = factory->lpVtbl->EnumAdapters(factory, 0, &adapter); - factory->lpVtbl->Release(factory); - if (FAILED(hr)) { - error_hr("EnumAdapters failed"); - return false; - } - - hr = create_device(adapter, D3D_DRIVER_TYPE_UNKNOWN, NULL, 0, NULL, 0, - D3D11_SDK_VERSION, &device, NULL, &context); - adapter->lpVtbl->Release(adapter); - if (FAILED(hr)) { - error_hr("D3D11CreateDevice failed"); - return false; - } - - enc->device = device; - enc->context = context; - return true; -} -#endif - -static bool init_session(struct nvenc_data *enc) -{ - NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { - NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER}; - params.apiVersion = enc->needs_compat_ver ? NVENC_COMPAT_VER - : NVENCAPI_VERSION; -#ifdef _WIN32 - if (enc->fallback) { - params.device = enc->cu_ctx; - params.deviceType = NV_ENC_DEVICE_TYPE_CUDA; - } else { - params.device = enc->device; - params.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX; - } -#else - params.device = enc->cu_ctx; - params.deviceType = NV_ENC_DEVICE_TYPE_CUDA; -#endif - - if (NV_FAILED(nv.nvEncOpenEncodeSessionEx(¶ms, &enc->session))) { - return false; - } - return true; -} - -static void initialize_params(struct nvenc_data *enc, const GUID *nv_preset, - NV_ENC_TUNING_INFO nv_tuning, uint32_t width, - uint32_t height, uint32_t fps_num, - uint32_t fps_den) -{ - int darWidth, darHeight; - av_reduce(&darWidth, &darHeight, width, height, 1024 * 1024); - - NV_ENC_INITIALIZE_PARAMS *params = &enc->params; - memset(params, 0, sizeof(*params)); - params->version = enc->needs_compat_ver - ? NV_ENC_INITIALIZE_PARAMS_COMPAT_VER - : NV_ENC_INITIALIZE_PARAMS_VER; - params->encodeGUID = enc->codec_guid; - params->presetGUID = *nv_preset; - params->encodeWidth = width; - params->encodeHeight = height; - params->darWidth = enc->codec == CODEC_AV1 ? width : (uint32_t)darWidth; - params->darHeight = enc->codec == CODEC_AV1 ? height - : (uint32_t)darHeight; - params->frameRateNum = fps_num; - params->frameRateDen = fps_den; - params->enableEncodeAsync = 0; - params->enablePTD = 1; - params->encodeConfig = &enc->config; - params->tuningInfo = nv_tuning; -} - -static inline GUID get_nv_preset2(const char *preset2) -{ - if (astrcmpi(preset2, "p1") == 0) { - return NV_ENC_PRESET_P1_GUID; - } else if (astrcmpi(preset2, "p2") == 0) { - return NV_ENC_PRESET_P2_GUID; - } else if (astrcmpi(preset2, "p3") == 0) { - return NV_ENC_PRESET_P3_GUID; - } else if (astrcmpi(preset2, "p4") == 0) { - return NV_ENC_PRESET_P4_GUID; - } else if (astrcmpi(preset2, "p6") == 0) { - return NV_ENC_PRESET_P6_GUID; - } else if (astrcmpi(preset2, "p7") == 0) { - return NV_ENC_PRESET_P7_GUID; - } else { - return NV_ENC_PRESET_P5_GUID; - } -} - -static inline NV_ENC_TUNING_INFO get_nv_tuning(const char *tuning) -{ - if (astrcmpi(tuning, "ll") == 0) { - return NV_ENC_TUNING_INFO_LOW_LATENCY; - } else if (astrcmpi(tuning, "ull") == 0) { - return NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY; - } else { - return NV_ENC_TUNING_INFO_HIGH_QUALITY; - } -} - -static inline NV_ENC_MULTI_PASS get_nv_multipass(const char *multipass) -{ - if (astrcmpi(multipass, "qres") == 0) { - return NV_ENC_TWO_PASS_QUARTER_RESOLUTION; - } else if (astrcmpi(multipass, "fullres") == 0) { - return NV_ENC_TWO_PASS_FULL_RESOLUTION; - } else { - return NV_ENC_MULTI_PASS_DISABLED; - } -} - -static bool is_10_bit(const struct nvenc_data *enc) -{ - return enc->fallback ? enc->in_format == VIDEO_FORMAT_P010 - : obs_p010_tex_active(); -} - -static bool init_encoder_base(struct nvenc_data *enc, obs_data_t *settings, - int bf, bool compatibility, bool *lossless) -{ - const char *rc = obs_data_get_string(settings, "rate_control"); - int bitrate = (int)obs_data_get_int(settings, "bitrate"); - int max_bitrate = (int)obs_data_get_int(settings, "max_bitrate"); - int cqp = (int)obs_data_get_int(settings, "cqp"); - int keyint_sec = (int)obs_data_get_int(settings, "keyint_sec"); - const char *preset = obs_data_get_string(settings, "preset"); - const char *preset2 = obs_data_get_string(settings, "preset2"); - const char *tuning = obs_data_get_string(settings, "tune"); - const char *multipass = obs_data_get_string(settings, "multipass"); - const char *profile = obs_data_get_string(settings, "profile"); - bool lookahead = obs_data_get_bool(settings, "lookahead"); - bool vbr = astrcmpi(rc, "VBR") == 0; - bool psycho_aq = !compatibility && - obs_data_get_bool(settings, "psycho_aq"); - bool disable_scenecut = obs_data_get_bool(settings, "disable_scenecut"); - NVENCSTATUS err; - - video_t *video = obs_encoder_video(enc->encoder); - const struct video_output_info *voi = video_output_get_info(video); - - enc->cx = obs_encoder_get_width(enc->encoder); - enc->cy = obs_encoder_get_height(enc->encoder); - - /* -------------------------- */ - /* get preset */ - - GUID nv_preset = get_nv_preset2(preset2); - NV_ENC_TUNING_INFO nv_tuning = get_nv_tuning(tuning); - NV_ENC_MULTI_PASS nv_multipass = compatibility - ? NV_ENC_MULTI_PASS_DISABLED - : get_nv_multipass(multipass); - - if (obs_data_has_user_value(settings, "preset") && - !obs_data_has_user_value(settings, "preset2") && - enc->codec == CODEC_H264) { - if (astrcmpi(preset, "mq") == 0) { - nv_preset = NV_ENC_PRESET_P5_GUID; - nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY; - nv_multipass = NV_ENC_TWO_PASS_QUARTER_RESOLUTION; - - } else if (astrcmpi(preset, "hq") == 0) { - nv_preset = NV_ENC_PRESET_P5_GUID; - nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY; - nv_multipass = NV_ENC_MULTI_PASS_DISABLED; - - } else if (astrcmpi(preset, "default") == 0) { - nv_preset = NV_ENC_PRESET_P3_GUID; - nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY; - nv_multipass = NV_ENC_MULTI_PASS_DISABLED; - - } else if (astrcmpi(preset, "hp") == 0) { - nv_preset = NV_ENC_PRESET_P1_GUID; - nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY; - nv_multipass = NV_ENC_MULTI_PASS_DISABLED; - - } else if (astrcmpi(preset, "ll") == 0) { - nv_preset = NV_ENC_PRESET_P3_GUID; - nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY; - nv_multipass = NV_ENC_MULTI_PASS_DISABLED; - - } else if (astrcmpi(preset, "llhq") == 0) { - nv_preset = NV_ENC_PRESET_P4_GUID; - nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY; - nv_multipass = NV_ENC_MULTI_PASS_DISABLED; - - } else if (astrcmpi(preset, "llhp") == 0) { - nv_preset = NV_ENC_PRESET_P2_GUID; - nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY; - nv_multipass = NV_ENC_MULTI_PASS_DISABLED; - } - } else if (obs_data_has_user_value(settings, "preset") && - !obs_data_has_user_value(settings, "preset2") && - enc->codec == CODEC_HEVC) { - if (astrcmpi(preset, "mq") == 0) { - nv_preset = NV_ENC_PRESET_P6_GUID; - nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY; - nv_multipass = NV_ENC_TWO_PASS_QUARTER_RESOLUTION; - - } else if (astrcmpi(preset, "hq") == 0) { - nv_preset = NV_ENC_PRESET_P6_GUID; - nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY; - nv_multipass = NV_ENC_MULTI_PASS_DISABLED; - - } else if (astrcmpi(preset, "default") == 0) { - nv_preset = NV_ENC_PRESET_P5_GUID; - nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY; - nv_multipass = NV_ENC_MULTI_PASS_DISABLED; - - } else if (astrcmpi(preset, "hp") == 0) { - nv_preset = NV_ENC_PRESET_P1_GUID; - nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY; - nv_multipass = NV_ENC_MULTI_PASS_DISABLED; - - } else if (astrcmpi(preset, "ll") == 0) { - nv_preset = NV_ENC_PRESET_P3_GUID; - nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY; - nv_multipass = NV_ENC_MULTI_PASS_DISABLED; - - } else if (astrcmpi(preset, "llhq") == 0) { - nv_preset = NV_ENC_PRESET_P4_GUID; - nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY; - nv_multipass = NV_ENC_MULTI_PASS_DISABLED; - - } else if (astrcmpi(preset, "llhp") == 0) { - nv_preset = NV_ENC_PRESET_P2_GUID; - nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY; - nv_multipass = NV_ENC_MULTI_PASS_DISABLED; - } - } - - const bool rc_lossless = astrcmpi(rc, "lossless") == 0; - *lossless = rc_lossless; - if (rc_lossless) { - *lossless = - nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE); - if (*lossless) { - nv_tuning = NV_ENC_TUNING_INFO_LOSSLESS; - nv_multipass = NV_ENC_MULTI_PASS_DISABLED; - } else { - warn("lossless encode is not supported, ignoring"); - nv_preset = NV_ENC_PRESET_P5_GUID; - nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY; - nv_multipass = NV_ENC_TWO_PASS_QUARTER_RESOLUTION; - } - } - - /* -------------------------- */ - /* get preset default config */ - - uint32_t config_ver = enc->needs_compat_ver ? NV_ENC_CONFIG_COMPAT_VER - : NV_ENC_CONFIG_VER; - - NV_ENC_PRESET_CONFIG preset_config = {NV_ENC_PRESET_CONFIG_VER, - {config_ver}}; - - err = nv.nvEncGetEncodePresetConfigEx(enc->session, enc->codec_guid, - nv_preset, nv_tuning, - &preset_config); - if (nv_failed(enc->encoder, err, __FUNCTION__, - "nvEncGetEncodePresetConfig")) { - return false; - } - - /* -------------------------- */ - /* main configuration */ - - enc->config = preset_config.presetCfg; - - uint32_t gop_size = - (keyint_sec) ? keyint_sec * voi->fps_num / voi->fps_den : 250; - - NV_ENC_CONFIG *config = &enc->config; - - initialize_params(enc, &nv_preset, nv_tuning, voi->width, voi->height, - voi->fps_num, voi->fps_den); - - config->gopLength = gop_size; - config->frameIntervalP = 1 + bf; - - enc->bframes = bf; - - /* lookahead */ - const bool use_profile_lookahead = config->rcParams.enableLookahead; - lookahead = nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_LOOKAHEAD) && - (lookahead || use_profile_lookahead); - if (lookahead) { - enc->rc_lookahead = use_profile_lookahead - ? config->rcParams.lookaheadDepth - : 8; - } - - int buf_count = max(4, config->frameIntervalP * 2 * 2); - if (lookahead) { - buf_count = max(buf_count, config->frameIntervalP + - enc->rc_lookahead + - EXTRA_BUFFERS); - } - - buf_count = min(64, buf_count); - enc->buf_count = buf_count; - - const int output_delay = buf_count - 1; - enc->output_delay = output_delay; - - if (lookahead) { - const int lkd_bound = output_delay - config->frameIntervalP - 4; - if (lkd_bound >= 0) { - config->rcParams.enableLookahead = 1; - config->rcParams.lookaheadDepth = - max(enc->rc_lookahead, lkd_bound); - config->rcParams.disableIadapt = 0; - config->rcParams.disableBadapt = 0; - } else { - lookahead = false; - } - } - - enc->config.rcParams.disableIadapt = disable_scenecut; - - /* psycho aq */ - if (!compatibility) { - if (nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ)) { - config->rcParams.enableAQ = psycho_aq; - config->rcParams.aqStrength = 8; - config->rcParams.enableTemporalAQ = psycho_aq; - } else { - warn("Ignoring Psycho Visual Tuning request since GPU is not capable"); - } - } - - /* -------------------------- */ - /* rate control */ - - enc->can_change_bitrate = - nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE); - - config->rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR; - - if (astrcmpi(rc, "cqp") == 0 || rc_lossless) { - if (*lossless) - cqp = 0; - - int cqp_val = enc->codec == CODEC_AV1 ? cqp * 4 : cqp; - - config->rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP; - config->rcParams.constQP.qpInterP = cqp_val; - config->rcParams.constQP.qpInterB = cqp_val; - config->rcParams.constQP.qpIntra = cqp_val; - enc->can_change_bitrate = false; - - bitrate = 0; - max_bitrate = 0; - - } else if (astrcmpi(rc, "vbr") != 0) { /* CBR by default */ - config->rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR; - } - - config->rcParams.averageBitRate = bitrate * 1000; - config->rcParams.maxBitRate = vbr ? max_bitrate * 1000 : bitrate * 1000; - config->rcParams.vbvBufferSize = bitrate * 1000; - config->rcParams.multiPass = nv_multipass; - config->rcParams.qpMapMode = NV_ENC_QP_MAP_DELTA; - - /* -------------------------- */ - /* initialize */ - - info("settings:\n" - "\tcodec: %s\n" - "\trate_control: %s\n" - "\tbitrate: %d\n" - "\tcqp: %d\n" - "\tkeyint: %d\n" - "\tpreset: %s\n" - "\ttuning: %s\n" - "\tmultipass: %s\n" - "\tprofile: %s\n" - "\twidth: %d\n" - "\theight: %d\n" - "\tb-frames: %d\n" - "\tlookahead: %s\n" - "\tpsycho_aq: %s\n", - get_codec_name(enc->codec), rc, bitrate, cqp, gop_size, preset2, - tuning, multipass, profile, enc->cx, enc->cy, bf, - lookahead ? "true" : "false", psycho_aq ? "true" : "false"); - - return true; -} - -static bool init_encoder_h264(struct nvenc_data *enc, obs_data_t *settings, - int bf, bool compatibility) -{ - const char *rc = obs_data_get_string(settings, "rate_control"); - int keyint_sec = (int)obs_data_get_int(settings, "keyint_sec"); - const char *profile = obs_data_get_string(settings, "profile"); - bool lossless; - - if (!init_encoder_base(enc, settings, bf, compatibility, &lossless)) { - return false; - } - - NV_ENC_CONFIG *config = &enc->config; - NV_ENC_CONFIG_H264 *h264_config = &config->encodeCodecConfig.h264Config; - NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui_params = - &h264_config->h264VUIParameters; - - video_t *video = obs_encoder_video(enc->encoder); - const struct video_output_info *voi = video_output_get_info(video); - uint32_t gop_size = - (keyint_sec) ? keyint_sec * voi->fps_num / voi->fps_den : 250; - - h264_config->idrPeriod = gop_size; - - bool repeat_headers = obs_data_get_bool(settings, "repeat_headers"); - if (repeat_headers) { - h264_config->repeatSPSPPS = 1; - h264_config->disableSPSPPS = 0; - h264_config->outputAUD = 1; - } - - h264_config->sliceMode = 3; - h264_config->sliceModeData = 1; - - h264_config->useBFramesAsRef = NV_ENC_BFRAME_REF_MODE_DISABLED; - - /* Enable CBR padding */ - if (config->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR) - h264_config->enableFillerDataInsertion = 1; - - vui_params->videoSignalTypePresentFlag = 1; - vui_params->videoFullRangeFlag = (voi->range == VIDEO_RANGE_FULL); - vui_params->colourDescriptionPresentFlag = 1; - - switch (voi->colorspace) { - case VIDEO_CS_601: - vui_params->colourPrimaries = 6; - vui_params->transferCharacteristics = 6; - vui_params->colourMatrix = 6; - break; - case VIDEO_CS_DEFAULT: - case VIDEO_CS_709: - vui_params->colourPrimaries = 1; - vui_params->transferCharacteristics = 1; - vui_params->colourMatrix = 1; - break; - case VIDEO_CS_SRGB: - vui_params->colourPrimaries = 1; - vui_params->transferCharacteristics = 13; - vui_params->colourMatrix = 1; - break; - default: - break; - } - - if (astrcmpi(rc, "lossless") == 0) { - h264_config->qpPrimeYZeroTransformBypassFlag = 1; - } else if (astrcmpi(rc, "vbr") != 0) { /* CBR */ - h264_config->outputBufferingPeriodSEI = 1; - } - - h264_config->outputPictureTimingSEI = 1; - - /* -------------------------- */ - /* profile */ - - if (enc->in_format == VIDEO_FORMAT_I444) { - config->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID; - h264_config->chromaFormatIDC = 3; - } else if (astrcmpi(profile, "main") == 0) { - config->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID; - } else if (astrcmpi(profile, "baseline") == 0) { - config->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID; - } else if (!lossless) { - config->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID; - } - - if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) { - return false; - } - - return true; -} - -static bool init_encoder_hevc(struct nvenc_data *enc, obs_data_t *settings, - int bf, bool compatibility) -{ - const char *rc = obs_data_get_string(settings, "rate_control"); - int keyint_sec = (int)obs_data_get_int(settings, "keyint_sec"); - const char *profile = obs_data_get_string(settings, "profile"); - bool lossless; - - if (!init_encoder_base(enc, settings, bf, compatibility, &lossless)) { - return false; - } - - NV_ENC_CONFIG *config = &enc->config; - NV_ENC_CONFIG_HEVC *hevc_config = &config->encodeCodecConfig.hevcConfig; - NV_ENC_CONFIG_HEVC_VUI_PARAMETERS *vui_params = - &hevc_config->hevcVUIParameters; - - video_t *video = obs_encoder_video(enc->encoder); - const struct video_output_info *voi = video_output_get_info(video); - uint32_t gop_size = - (keyint_sec) ? keyint_sec * voi->fps_num / voi->fps_den : 250; - - hevc_config->idrPeriod = gop_size; - - bool repeat_headers = obs_data_get_bool(settings, "repeat_headers"); - if (repeat_headers) { - hevc_config->repeatSPSPPS = 1; - hevc_config->disableSPSPPS = 0; - hevc_config->outputAUD = 1; - } - - hevc_config->sliceMode = 3; - hevc_config->sliceModeData = 1; - - hevc_config->useBFramesAsRef = NV_ENC_BFRAME_REF_MODE_DISABLED; - - /* Enable CBR padding */ - if (config->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR) - hevc_config->enableFillerDataInsertion = 1; - - vui_params->videoSignalTypePresentFlag = 1; - vui_params->videoFullRangeFlag = (voi->range == VIDEO_RANGE_FULL); - vui_params->colourDescriptionPresentFlag = 1; - - switch (voi->colorspace) { - case VIDEO_CS_601: - vui_params->colourPrimaries = 6; - vui_params->transferCharacteristics = 6; - vui_params->colourMatrix = 6; - break; - case VIDEO_CS_DEFAULT: - case VIDEO_CS_709: - vui_params->colourPrimaries = 1; - vui_params->transferCharacteristics = 1; - vui_params->colourMatrix = 1; - break; - case VIDEO_CS_SRGB: - vui_params->colourPrimaries = 1; - vui_params->transferCharacteristics = 13; - vui_params->colourMatrix = 1; - break; - case VIDEO_CS_2100_PQ: - vui_params->colourPrimaries = 9; - vui_params->transferCharacteristics = 16; - vui_params->colourMatrix = 9; - vui_params->chromaSampleLocationFlag = 1; - vui_params->chromaSampleLocationTop = 2; - vui_params->chromaSampleLocationBot = 2; - break; - case VIDEO_CS_2100_HLG: - vui_params->colourPrimaries = 9; - vui_params->transferCharacteristics = 18; - vui_params->colourMatrix = 9; - vui_params->chromaSampleLocationFlag = 1; - vui_params->chromaSampleLocationTop = 2; - vui_params->chromaSampleLocationBot = 2; - } - - hevc_config->pixelBitDepthMinus8 = is_10_bit(enc) ? 2 : 0; - - if (astrcmpi(rc, "cbr") == 0) { - hevc_config->outputBufferingPeriodSEI = 1; - } - - hevc_config->outputPictureTimingSEI = 1; - - /* -------------------------- */ - /* profile */ - - if (enc->in_format == VIDEO_FORMAT_I444) { - config->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID; - hevc_config->chromaFormatIDC = 3; - } else if (astrcmpi(profile, "main10") == 0) { - config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID; - } else if (is_10_bit(enc)) { - blog(LOG_WARNING, "[obs-nvenc] Forcing main10 for P010"); - config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID; - } else { - config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID; - } - - if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) { - return false; - } - - return true; -} - -static bool init_encoder_av1(struct nvenc_data *enc, obs_data_t *settings, - int bf, bool compatibility) -{ - int keyint_sec = (int)obs_data_get_int(settings, "keyint_sec"); - bool lossless; - - if (!init_encoder_base(enc, settings, bf, compatibility, &lossless)) { - return false; - } - - NV_ENC_CONFIG *config = &enc->config; - NV_ENC_CONFIG_AV1 *av1_config = &config->encodeCodecConfig.av1Config; - - video_t *video = obs_encoder_video(enc->encoder); - const struct video_output_info *voi = video_output_get_info(video); - uint32_t gop_size = - (keyint_sec) ? keyint_sec * voi->fps_num / voi->fps_den : 250; - - av1_config->idrPeriod = gop_size; - - av1_config->useBFramesAsRef = NV_ENC_BFRAME_REF_MODE_DISABLED; - - av1_config->colorRange = (voi->range == VIDEO_RANGE_FULL); - - /* Enable CBR padding */ - if (config->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR) - av1_config->enableBitstreamPadding = 1; - -#define PIXELCOUNT_4K (3840 * 2160) - - /* If size is 4K+, set tiles to 2 uniform columns. */ - if ((voi->width * voi->height) >= PIXELCOUNT_4K) { - av1_config->enableCustomTileConfig = 0; - av1_config->numTileColumns = 2; - } - - switch (voi->colorspace) { - case VIDEO_CS_601: - av1_config->colorPrimaries = 6; - av1_config->transferCharacteristics = 6; - av1_config->matrixCoefficients = 6; - break; - case VIDEO_CS_DEFAULT: - case VIDEO_CS_709: - av1_config->colorPrimaries = 1; - av1_config->transferCharacteristics = 1; - av1_config->matrixCoefficients = 1; - break; - case VIDEO_CS_SRGB: - av1_config->colorPrimaries = 1; - av1_config->transferCharacteristics = 13; - av1_config->matrixCoefficients = 1; - break; - case VIDEO_CS_2100_PQ: - av1_config->colorPrimaries = 9; - av1_config->transferCharacteristics = 16; - av1_config->matrixCoefficients = 9; - break; - case VIDEO_CS_2100_HLG: - av1_config->colorPrimaries = 9; - av1_config->transferCharacteristics = 18; - av1_config->matrixCoefficients = 9; - } - - /* -------------------------- */ - /* profile */ - - config->profileGUID = NV_ENC_AV1_PROFILE_MAIN_GUID; - av1_config->tier = NV_ENC_TIER_AV1_0; - - av1_config->level = NV_ENC_LEVEL_AV1_AUTOSELECT; - av1_config->chromaFormatIDC = 1; - av1_config->pixelBitDepthMinus8 = is_10_bit(enc) ? 2 : 0; - av1_config->inputPixelBitDepthMinus8 = av1_config->pixelBitDepthMinus8; - av1_config->numFwdRefs = 1; - av1_config->numBwdRefs = 1; - av1_config->repeatSeqHdr = 1; - - if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) { - return false; - } - - return true; -} - -static bool init_bitstreams(struct nvenc_data *enc) -{ - da_reserve(enc->bitstreams, enc->buf_count); - for (uint32_t i = 0; i < enc->buf_count; i++) { - struct nv_bitstream bitstream; - if (!nv_bitstream_init(enc, &bitstream)) { - return false; - } - - da_push_back(enc->bitstreams, &bitstream); - } - - return true; -} - -#ifdef _WIN32 -static bool init_textures(struct nvenc_data *enc) -{ - da_reserve(enc->textures, enc->buf_count); - for (uint32_t i = 0; i < enc->buf_count; i++) { - struct nv_texture texture; - if (!nv_texture_init(enc, &texture)) { - return false; - } - - da_push_back(enc->textures, &texture); - } - - return true; -} -#endif - -static bool init_cuda_surfaces(struct nvenc_data *enc) -{ - switch (enc->in_format) { - case VIDEO_FORMAT_P010: - enc->surface_format = NV_ENC_BUFFER_FORMAT_YUV420_10BIT; - break; - case VIDEO_FORMAT_I444: - enc->surface_format = NV_ENC_BUFFER_FORMAT_YUV444; - break; - default: - enc->surface_format = NV_ENC_BUFFER_FORMAT_NV12; - } - - da_reserve(enc->surfaces, enc->buf_count); - - CU_FAILED(cu->cuCtxPushCurrent(enc->cu_ctx)) - for (uint32_t i = 0; i < enc->buf_count; i++) { - struct nv_cuda_surface buf; - if (!nv_cuda_surface_init(enc, &buf)) { - return false; - } - - da_push_back(enc->surfaces, &buf); - } - CU_FAILED(cu->cuCtxPopCurrent(NULL)) - - return true; -} - -static bool init_cuda_ctx(struct nvenc_data *enc, obs_data_t *settings, - const bool texture) -{ - int count, gpu; - CUdevice device; - bool cuda_override; - - /* Allow CUDA device override for texture encoders (experimental) */ - if (obs_data_has_user_value(settings, "cuda_device")) { - gpu = (int)obs_data_get_int(settings, "cuda_device"); - cuda_override = true; - } else { - gpu = (int)obs_data_get_int(settings, "gpu"); - cuda_override = false; - } - - CU_FAILED(cu->cuInit(0)) - CU_FAILED(cu->cuDeviceGetCount(&count)) - if (!count) { - NV_FAIL("No CUDA devices found"); - return false; - } -#ifdef _WIN32 - CU_FAILED(cu->cuDeviceGet(&device, gpu)) -#else - if (!texture || cuda_override) { - CU_FAILED(cu->cuDeviceGet(&device, gpu)) - } else { - unsigned int ctx_count = 0; - CUdevice devices[2]; - - obs_enter_graphics(); - CUresult res = cu->cuGLGetDevices(&ctx_count, devices, 2, - CU_GL_DEVICE_LIST_ALL); - obs_leave_graphics(); - - if (res != CUDA_SUCCESS || !ctx_count) { - /* CUDA_ERROR_INVALID_GRAPHICS_CONTEXT should be treated - * as non-fatal fallback (probably running on iGPU). */ - if (res == 219) { - info("Not running on NVIDIA GPU, falling back to non-texture encoder"); - } else { - const char *name, *desc; - if (cuda_get_error_desc(res, &name, &desc)) { - error("Failed to get a CUDA device for the current OpenGL context: %s: %s", - name, desc); - } else { - error("Failed to get a CUDA device for the current OpenGL context: %d", - res); - } - } - return false; - } - - /* Documentation indicates this should only ever happen with SLI, i.e. never for OBS. */ - if (ctx_count > 1) { - warn("Got more than one CUDA devices for OpenGL context, this is untested."); - } - - device = devices[0]; - debug("Loading up CUDA on device %u", device); - } -#endif - CU_FAILED(cu->cuCtxCreate(&enc->cu_ctx, 0, device)) - CU_FAILED(cu->cuCtxPopCurrent(NULL)) - - return true; -} - -static enum video_format get_preferred_format(enum video_format format) -{ - switch (format) { - case VIDEO_FORMAT_I010: - case VIDEO_FORMAT_P010: - return VIDEO_FORMAT_P010; - case VIDEO_FORMAT_RGBA: - case VIDEO_FORMAT_BGRA: - case VIDEO_FORMAT_BGRX: - case VIDEO_FORMAT_I444: - return VIDEO_FORMAT_I444; - default: - return VIDEO_FORMAT_NV12; - } -} - -static void nvenc_destroy(void *data); - -static bool init_specific_encoder(struct nvenc_data *enc, obs_data_t *settings, - int bf, bool compatibility) -{ - switch (enc->codec) { - case CODEC_HEVC: - return init_encoder_hevc(enc, settings, bf, compatibility); - case CODEC_H264: - return init_encoder_h264(enc, settings, bf, compatibility); - case CODEC_AV1: - return init_encoder_av1(enc, settings, bf, compatibility); - } - - return false; -} - -static bool init_encoder(struct nvenc_data *enc, enum codec_type codec, - obs_data_t *settings, obs_encoder_t *encoder) -{ - UNUSED_PARAMETER(codec); - UNUSED_PARAMETER(encoder); - - int bf = (int)obs_data_get_int(settings, "bf"); - const bool support_10bit = - nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_10BIT_ENCODE); - const bool support_444 = - nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_YUV444_ENCODE); - const int bf_max = nv_get_cap(enc, NV_ENC_CAPS_NUM_MAX_BFRAMES); - - video_t *video = obs_encoder_video(enc->encoder); - const struct video_output_info *voi = video_output_get_info(video); - enum video_format pref_format = - obs_encoder_get_preferred_video_format(enc->encoder); - if (pref_format == VIDEO_FORMAT_NONE) - pref_format = voi->format; - - enc->in_format = get_preferred_format(pref_format); - - if (enc->in_format == VIDEO_FORMAT_I444 && !support_444) { - NV_FAIL(obs_module_text("NVENC.444Unsupported")); - return false; - } - - if (is_10_bit(enc) && !support_10bit) { - NV_FAIL(obs_module_text("NVENC.10bitUnsupported")); - return false; - } - - switch (voi->format) { - case VIDEO_FORMAT_I010: - case VIDEO_FORMAT_P010: - break; - default: - switch (voi->colorspace) { - case VIDEO_CS_2100_PQ: - case VIDEO_CS_2100_HLG: - NV_FAIL(obs_module_text("NVENC.8bitUnsupportedHdr")); - return false; - default: - break; - } - } - - if (bf > bf_max) { - blog(LOG_WARNING, - "[obs-nvenc] Max B-frames setting (%d) is more than encoder supports (%d).\n" - "Setting B-frames to %d", - bf, bf_max, bf_max); - bf = bf_max; - } - - if (!init_specific_encoder(enc, settings, bf, false)) { - blog(LOG_WARNING, "[obs-nvenc] init_specific_encoder failed, " - "trying again with compatibility options"); - - nv.nvEncDestroyEncoder(enc->session); - enc->session = NULL; - - if (!init_session(enc)) { - return false; - } - /* try without multipass and psycho aq */ - if (!init_specific_encoder(enc, settings, bf, true)) { - return false; - } - } - - return true; -} - -static void *nvenc_create_internal(enum codec_type codec, obs_data_t *settings, - obs_encoder_t *encoder, bool texture) -{ - struct nvenc_data *enc = bzalloc(sizeof(*enc)); - enc->encoder = encoder; - enc->codec = codec; - enc->first_packet = true; - enc->fallback = !texture; - - if (get_nvenc_ver() == COMPATIBILITY_VERSION) { - enc->needs_compat_ver = true; - } - NV_ENCODE_API_FUNCTION_LIST init = {NV_ENCODE_API_FUNCTION_LIST_VER}; - - switch (enc->codec) { - case CODEC_H264: - enc->codec_guid = NV_ENC_CODEC_H264_GUID; - break; - case CODEC_HEVC: - enc->codec_guid = NV_ENC_CODEC_HEVC_GUID; - break; - case CODEC_AV1: - enc->codec_guid = NV_ENC_CODEC_AV1_GUID; - break; - } - - if (!init_nvenc(encoder)) { - goto fail; - } - if ( -#ifdef _WIN32 - !texture && -#endif - !init_cuda(encoder)) { - goto fail; - } - if (NV_FAILED(nv_create_instance(&init))) { - goto fail; - } -#ifdef _WIN32 - if (texture && !init_d3d11(enc, settings)) { - goto fail; - } -#endif - if ( -#ifdef _WIN32 - !texture && -#endif - !init_cuda_ctx(enc, settings, texture)) { - goto fail; - } - if (!init_session(enc)) { - goto fail; - } - if (!init_encoder(enc, codec, settings, encoder)) { - goto fail; - } - if (!init_bitstreams(enc)) { - goto fail; - } -#ifdef _WIN32 - if (texture && !init_textures(enc)) { - goto fail; - } -#endif - if ( -#ifdef _WIN32 - !texture && -#endif - !init_cuda_surfaces(enc)) { - goto fail; - } - enc->codec = codec; - - if (enc->cu_ctx) - cu->cuCtxPopCurrent(NULL); - - return enc; - -fail: - nvenc_destroy(enc); - return NULL; -} - -static void *nvenc_create_base(enum codec_type codec, obs_data_t *settings, - obs_encoder_t *encoder, bool texture) -{ - /* this encoder requires shared textures, this cannot be used on a - * gpu other than the one OBS is currently running on. */ - const int gpu = (int)obs_data_get_int(settings, "gpu"); - if (gpu != 0 && texture) { - blog(LOG_INFO, - "[obs-nvenc] different GPU selected by user, falling back " - "to non-texture encoder"); - goto reroute; - } - - if (obs_encoder_scaling_enabled(encoder)) { - if (obs_encoder_gpu_scaling_enabled(encoder)) { - blog(LOG_INFO, "[obs-nvenc] GPU scaling enabled"); - } else if (texture) { - blog(LOG_INFO, - "[obs-nvenc] CPU scaling enabled, falling back to" - " non-texture encoder"); - goto reroute; - } - } - - if (texture && !obs_p010_tex_active() && !obs_nv12_tex_active()) { - blog(LOG_INFO, - "[obs-nvenc] nv12/p010 not active, falling back to " - "non-texture encoder"); - goto reroute; - } - - struct nvenc_data *enc = - nvenc_create_internal(codec, settings, encoder, texture); - - if (enc) { - return enc; - } - -reroute: - if (!texture) { - blog(LOG_ERROR, - "Already in fallback encoder, can't fall back further!"); - return NULL; - } - - switch (codec) { - case CODEC_H264: - return obs_encoder_create_rerouted(encoder, - "obs_nvenc_h264_cuda"); - case CODEC_HEVC: - return obs_encoder_create_rerouted(encoder, - "obs_nvenc_hevc_cuda"); - case CODEC_AV1: - return obs_encoder_create_rerouted(encoder, - "obs_nvenc_av1_cuda"); - } - - return NULL; -} - -static void *h264_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder) -{ - return nvenc_create_base(CODEC_H264, settings, encoder, true); -} - -#ifdef ENABLE_HEVC -static void *hevc_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder) -{ - return nvenc_create_base(CODEC_HEVC, settings, encoder, true); -} -#endif - -static void *av1_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder) -{ - return nvenc_create_base(CODEC_AV1, settings, encoder, true); -} - -static void *h264_nvenc_soft_create(obs_data_t *settings, - obs_encoder_t *encoder) -{ - return nvenc_create_base(CODEC_H264, settings, encoder, false); -} - -#ifdef ENABLE_HEVC -static void *hevc_nvenc_soft_create(obs_data_t *settings, - obs_encoder_t *encoder) -{ - return nvenc_create_base(CODEC_HEVC, settings, encoder, false); -} -#endif - -static void *av1_nvenc_soft_create(obs_data_t *settings, obs_encoder_t *encoder) -{ - return nvenc_create_base(CODEC_AV1, settings, encoder, false); -} - -static bool get_encoded_packet(struct nvenc_data *enc, bool finalize); - -static void nvenc_destroy(void *data) -{ - struct nvenc_data *enc = data; - - if (enc->cu_ctx) - cu->cuCtxPushCurrent(enc->cu_ctx); - - if (enc->encode_started) { - uint32_t struct_ver = enc->needs_compat_ver - ? NV_ENC_PIC_PARAMS_COMPAT_VER - : NV_ENC_PIC_PARAMS_VER; - NV_ENC_PIC_PARAMS params = {struct_ver}; - params.encodePicFlags = NV_ENC_PIC_FLAG_EOS; - nv.nvEncEncodePicture(enc->session, ¶ms); - get_encoded_packet(enc, true); - } -#ifdef _WIN32 - for (size_t i = 0; i < enc->textures.num; i++) { - nv_texture_free(enc, &enc->textures.array[i]); - } -#endif - for (size_t i = 0; i < enc->surfaces.num; i++) { - nv_cuda_surface_free(enc, &enc->surfaces.array[i]); - } - for (size_t i = 0; i < enc->bitstreams.num; i++) { - nv_bitstream_free(enc, &enc->bitstreams.array[i]); - } - if (enc->session) { - nv.nvEncDestroyEncoder(enc->session); - } -#ifdef _WIN32 - for (size_t i = 0; i < enc->input_textures.num; i++) { - ID3D11Texture2D *tex = enc->input_textures.array[i].tex; - IDXGIKeyedMutex *km = enc->input_textures.array[i].km; - tex->lpVtbl->Release(tex); - km->lpVtbl->Release(km); - } - if (enc->context) { - enc->context->lpVtbl->Release(enc->context); - } - if (enc->device) { - enc->device->lpVtbl->Release(enc->device); - } -#else - for (size_t i = 0; i < enc->input_textures.num; i++) { - CUgraphicsResource res_y = enc->input_textures.array[i].res_y; - CUgraphicsResource res_uv = enc->input_textures.array[i].res_uv; - cu->cuGraphicsUnregisterResource(res_y); - cu->cuGraphicsUnregisterResource(res_uv); - } -#endif - if (enc->cu_ctx) { - cu->cuCtxPopCurrent(NULL); - cu->cuCtxDestroy(enc->cu_ctx); - } - - bfree(enc->header); - bfree(enc->sei); - deque_free(&enc->dts_list); - da_free(enc->surfaces); - da_free(enc->input_textures); - da_free(enc->bitstreams); -#ifdef _WIN32 - da_free(enc->textures); -#endif - da_free(enc->packet_data); - bfree(enc->roi_map); - bfree(enc); -} - -#ifdef _WIN32 -static ID3D11Texture2D *get_tex_from_handle(struct nvenc_data *enc, - uint32_t handle, - IDXGIKeyedMutex **km_out) -{ - ID3D11Device *device = enc->device; - IDXGIKeyedMutex *km; - ID3D11Texture2D *input_tex; - HRESULT hr; - - for (size_t i = 0; i < enc->input_textures.num; i++) { - struct handle_tex *ht = &enc->input_textures.array[i]; - if (ht->handle == handle) { - *km_out = ht->km; - return ht->tex; - } - } - - hr = device->lpVtbl->OpenSharedResource(device, - (HANDLE)(uintptr_t)handle, - &IID_ID3D11Texture2D, - &input_tex); - if (FAILED(hr)) { - error_hr("OpenSharedResource failed"); - return NULL; - } - - hr = input_tex->lpVtbl->QueryInterface(input_tex, &IID_IDXGIKeyedMutex, - &km); - if (FAILED(hr)) { - error_hr("QueryInterface(IDXGIKeyedMutex) failed"); - input_tex->lpVtbl->Release(input_tex); - return NULL; - } - - input_tex->lpVtbl->SetEvictionPriority(input_tex, - DXGI_RESOURCE_PRIORITY_MAXIMUM); - - *km_out = km; - - struct handle_tex new_ht = {handle, input_tex, km}; - da_push_back(enc->input_textures, &new_ht); - return input_tex; -} -#endif - -static bool get_encoded_packet(struct nvenc_data *enc, bool finalize) -{ - void *s = enc->session; - - da_resize(enc->packet_data, 0); - - if (!enc->buffers_queued) - return true; - if (!finalize && enc->buffers_queued < enc->output_delay) - return true; - - size_t count = finalize ? enc->buffers_queued : 1; - - for (size_t i = 0; i < count; i++) { - size_t cur_bs_idx = enc->cur_bitstream; - struct nv_bitstream *bs = &enc->bitstreams.array[cur_bs_idx]; -#ifdef _WIN32 - struct nv_texture *nvtex = - enc->fallback ? NULL : &enc->textures.array[cur_bs_idx]; - struct nv_cuda_surface *surf = - enc->fallback ? &enc->surfaces.array[cur_bs_idx] : NULL; -#else - struct nv_cuda_surface *surf = &enc->surfaces.array[cur_bs_idx]; -#endif - - /* ---------------- */ - - uint32_t struct_ver = enc->needs_compat_ver - ? NV_ENC_LOCK_BITSTREAM_COMPAT_VER - : NV_ENC_LOCK_BITSTREAM_VER; - - NV_ENC_LOCK_BITSTREAM lock = {struct_ver}; - lock.outputBitstream = bs->ptr; - lock.doNotWait = false; - - if (NV_FAILED(nv.nvEncLockBitstream(s, &lock))) { - return false; - } - - if (enc->first_packet) { - NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = {0}; - uint8_t buf[256]; - uint32_t size = 0; - - payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER; - payload.spsppsBuffer = buf; - payload.inBufferSize = sizeof(buf); - payload.outSPSPPSPayloadSize = &size; - - nv.nvEncGetSequenceParams(s, &payload); - enc->header = bmemdup(buf, size); - enc->header_size = size; - enc->first_packet = false; - } - - da_copy_array(enc->packet_data, lock.bitstreamBufferPtr, - lock.bitstreamSizeInBytes); - - enc->packet_pts = (int64_t)lock.outputTimeStamp; - enc->packet_keyframe = lock.pictureType == NV_ENC_PIC_TYPE_IDR; - - if (NV_FAILED(nv.nvEncUnlockBitstream(s, bs->ptr))) { - return false; - } - - /* ---------------- */ -#ifdef _WIN32 - if (nvtex && nvtex->mapped_res) { - NVENCSTATUS err; - err = nv.nvEncUnmapInputResource(s, nvtex->mapped_res); - if (nv_failed(enc->encoder, err, __FUNCTION__, - "unmap")) { - return false; - } - nvtex->mapped_res = NULL; - } -#endif - /* ---------------- */ - - if (surf && surf->mapped_res) { - NVENCSTATUS err; - err = nv.nvEncUnmapInputResource(s, surf->mapped_res); - if (nv_failed(enc->encoder, err, __FUNCTION__, - "unmap")) { - return false; - } - surf->mapped_res = NULL; - } - - /* ---------------- */ - - if (++enc->cur_bitstream == enc->buf_count) - enc->cur_bitstream = 0; - - enc->buffers_queued--; - } - - return true; -} - -struct roi_params { - uint32_t mb_width; - uint32_t mb_height; - uint32_t mb_size; - bool av1; - int8_t *map; -}; - -static void roi_cb(void *param, struct obs_encoder_roi *roi) -{ - const struct roi_params *rp = param; - - int8_t qp_val; - /* AV1 has a larger QP range than HEVC/H.264 */ - if (rp->av1) { - qp_val = (int8_t)(-128.0f * roi->priority); - } else { - qp_val = (int8_t)(-51.0f * roi->priority); - } - - const uint32_t roi_left = roi->left / rp->mb_size; - const uint32_t roi_top = roi->top / rp->mb_size; - const uint32_t roi_right = (roi->right - 1) / rp->mb_size; - const uint32_t roi_bottom = (roi->bottom - 1) / rp->mb_size; - - for (uint32_t mb_y = 0; mb_y < rp->mb_height; mb_y++) { - if (mb_y < roi_top || mb_y > roi_bottom) - continue; - - for (uint32_t mb_x = 0; mb_x < rp->mb_width; mb_x++) { - if (mb_x < roi_left || mb_x > roi_right) - continue; - - rp->map[mb_y * rp->mb_width + mb_x] = qp_val; - } - } -} - -static void add_roi(struct nvenc_data *enc, NV_ENC_PIC_PARAMS *params) -{ - const uint32_t increment = obs_encoder_get_roi_increment(enc->encoder); - - if (enc->roi_map && enc->roi_increment == increment) { - params->qpDeltaMap = enc->roi_map; - params->qpDeltaMapSize = (uint32_t)enc->roi_map_size; - return; - } - - uint32_t mb_size = 0; - switch (enc->codec) { - case CODEC_H264: - /* H.264 is always 16x16 */ - mb_size = 16; - break; - case CODEC_HEVC: - /* HEVC can be 16x16, 32x32, or 64x64, but NVENC is always 32x32 */ - mb_size = 32; - break; - case CODEC_AV1: - /* AV1 can be 64x64 or 128x128, but NVENC is always 64x64 */ - mb_size = 64; - break; - } - - const uint32_t mb_width = (enc->cx + mb_size - 1) / mb_size; - const uint32_t mb_height = (enc->cy + mb_size - 1) / mb_size; - const size_t map_size = mb_width * mb_height * sizeof(int8_t); - - if (map_size != enc->roi_map_size) { - enc->roi_map = brealloc(enc->roi_map, map_size); - enc->roi_map_size = map_size; - } - - memset(enc->roi_map, 0, enc->roi_map_size); - - struct roi_params par = { - .mb_width = mb_width, - .mb_height = mb_height, - .mb_size = mb_size, - .av1 = enc->codec == CODEC_AV1, - .map = enc->roi_map, - }; - - obs_encoder_enum_roi(enc->encoder, roi_cb, &par); - - enc->roi_increment = increment; - params->qpDeltaMap = enc->roi_map; - params->qpDeltaMapSize = (uint32_t)map_size; -} - -static bool nvenc_encode_shared(struct nvenc_data *enc, struct nv_bitstream *bs, - void *pic, int64_t pts, - struct encoder_packet *packet, - bool *received_packet) -{ - NV_ENC_PIC_PARAMS params = {0}; - params.version = enc->needs_compat_ver ? NV_ENC_PIC_PARAMS_COMPAT_VER - : NV_ENC_PIC_PARAMS_VER; - params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; - params.inputBuffer = pic; - params.inputTimeStamp = (uint64_t)pts; - params.inputWidth = enc->cx; - params.inputHeight = enc->cy; - params.inputPitch = enc->cx; - params.outputBitstream = bs->ptr; - - if (enc->fallback) { - params.bufferFmt = enc->surface_format; - } else { - params.bufferFmt = obs_p010_tex_active() - ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT - : NV_ENC_BUFFER_FORMAT_NV12; - } - - /* Add ROI map if enabled */ - if (obs_encoder_has_roi(enc->encoder)) - add_roi(enc, ¶ms); - - NVENCSTATUS err = nv.nvEncEncodePicture(enc->session, ¶ms); - if (err != NV_ENC_SUCCESS && err != NV_ENC_ERR_NEED_MORE_INPUT) { - nv_failed(enc->encoder, err, __FUNCTION__, - "nvEncEncodePicture"); - return false; - } - - enc->encode_started = true; - enc->buffers_queued++; - - if (++enc->next_bitstream == enc->buf_count) { - enc->next_bitstream = 0; - } - - /* ------------------------------------ */ - /* check for encoded packet and parse */ - - if (!get_encoded_packet(enc, false)) { - return false; - } - - /* ------------------------------------ */ - /* output encoded packet */ - - if (enc->packet_data.num) { - int64_t dts; - deque_pop_front(&enc->dts_list, &dts, sizeof(dts)); - - /* subtract bframe delay from dts for H.264 and HEVC */ - if (enc->codec != CODEC_AV1) - dts -= (int64_t)enc->bframes * packet->timebase_num; - - *received_packet = true; - packet->data = enc->packet_data.array; - packet->size = enc->packet_data.num; - packet->type = OBS_ENCODER_VIDEO; - packet->pts = enc->packet_pts; - packet->dts = dts; - packet->keyframe = enc->packet_keyframe; - } else { - *received_packet = false; - } - - return true; -} - -#ifdef _WIN32 -static bool nvenc_encode_tex(void *data, uint32_t handle, int64_t pts, - uint64_t lock_key, uint64_t *next_key, - struct encoder_packet *packet, - bool *received_packet) -{ - struct nvenc_data *enc = data; - ID3D11DeviceContext *context = enc->context; - ID3D11Texture2D *input_tex; - ID3D11Texture2D *output_tex; - IDXGIKeyedMutex *km; - struct nv_texture *nvtex; - struct nv_bitstream *bs; - - if (handle == GS_INVALID_HANDLE) { - error("Encode failed: bad texture handle"); - *next_key = lock_key; - return false; - } - - bs = &enc->bitstreams.array[enc->next_bitstream]; - nvtex = &enc->textures.array[enc->next_bitstream]; - - input_tex = get_tex_from_handle(enc, handle, &km); - output_tex = nvtex->tex; - - if (!input_tex) { - *next_key = lock_key; - return false; - } - - deque_push_back(&enc->dts_list, &pts, sizeof(pts)); - - /* ------------------------------------ */ - /* copy to output tex */ - - km->lpVtbl->AcquireSync(km, lock_key, INFINITE); - - context->lpVtbl->CopyResource(context, (ID3D11Resource *)output_tex, - (ID3D11Resource *)input_tex); - - km->lpVtbl->ReleaseSync(km, *next_key); - - /* ------------------------------------ */ - /* map output tex so nvenc can use it */ - - NV_ENC_MAP_INPUT_RESOURCE map = {NV_ENC_MAP_INPUT_RESOURCE_VER}; - map.registeredResource = nvtex->res; - if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &map))) { - return false; - } - - nvtex->mapped_res = map.mappedResource; - - /* ------------------------------------ */ - /* do actual encode call */ - - return nvenc_encode_shared(enc, bs, nvtex->mapped_res, pts, packet, - received_packet); -} - -#else - -static inline bool get_res_for_tex_ids(struct nvenc_data *enc, GLuint tex_id_y, - GLuint tex_id_uv, - CUgraphicsResource *tex_y, - CUgraphicsResource *tex_uv) -{ - bool success = true; - - for (size_t idx = 0; idx < enc->input_textures.num; idx++) { - struct handle_tex *ht = &enc->input_textures.array[idx]; - if (ht->tex_id != tex_id_y) - continue; - - *tex_y = ht->res_y; - *tex_uv = ht->res_uv; - return success; - } - - CU_CHECK(cu->cuGraphicsGLRegisterImage( - tex_y, tex_id_y, GL_TEXTURE_2D, - CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY)) - CU_CHECK(cu->cuGraphicsGLRegisterImage( - tex_uv, tex_id_uv, GL_TEXTURE_2D, - CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY)) - - struct handle_tex ht = {tex_id_y, *tex_y, *tex_uv}; - da_push_back(enc->input_textures, &ht); - -unmap: - if (!success) { - cu->cuGraphicsUnregisterResource(*tex_y); - cu->cuGraphicsUnregisterResource(*tex_uv); - } - - return success; -} - -static inline bool copy_tex_cuda(struct nvenc_data *enc, const bool p010, - GLuint tex[2], struct nv_cuda_surface *surf) -{ - bool success = true; - CUgraphicsResource mapped_tex[2] = {0}; - CUarray mapped_cuda; - - if (!get_res_for_tex_ids(enc, tex[0], tex[1], &mapped_tex[0], - &mapped_tex[1])) - return false; - - CU_CHECK(cu->cuGraphicsMapResources(2, mapped_tex, 0)) - - CUDA_MEMCPY2D m = {0}; - m.dstMemoryType = CU_MEMORYTYPE_ARRAY; - m.srcMemoryType = CU_MEMORYTYPE_ARRAY; - m.dstArray = surf->tex; - m.WidthInBytes = p010 ? enc->cx * 2 : enc->cx; - m.Height = enc->cy; - - // Map and copy Y texture - CU_CHECK(cu->cuGraphicsSubResourceGetMappedArray(&mapped_cuda, - mapped_tex[0], 0, 0)); - m.srcArray = mapped_cuda; - CU_CHECK(cu->cuMemcpy2D(&m)) - - // Map and copy UV texture - CU_CHECK(cu->cuGraphicsSubResourceGetMappedArray(&mapped_cuda, - mapped_tex[1], 0, 0)) - m.srcArray = mapped_cuda; - m.dstY += enc->cy; - m.Height = enc->cy / 2; - - CU_CHECK(cu->cuMemcpy2D(&m)) - -unmap: - cu->cuGraphicsUnmapResources(2, mapped_tex, 0); - - return success; -} - -static bool nvenc_encode_tex2(void *data, struct encoder_texture *tex, - int64_t pts, uint64_t lock_key, - uint64_t *next_key, struct encoder_packet *packet, - bool *received_packet) -{ - struct nvenc_data *enc = data; - struct nv_cuda_surface *surf; - struct nv_bitstream *bs; - const bool p010 = obs_p010_tex_active(); - GLuint input_tex[2]; - - if (tex == NULL || tex->tex[0] == NULL) { - error("Encode failed: bad texture handle"); - *next_key = lock_key; - return false; - } - - bs = &enc->bitstreams.array[enc->next_bitstream]; - surf = &enc->surfaces.array[enc->next_bitstream]; - - deque_push_back(&enc->dts_list, &pts, sizeof(pts)); - - /* ------------------------------------ */ - /* copy to CUDA data */ - - CU_FAILED(cu->cuCtxPushCurrent(enc->cu_ctx)) - obs_enter_graphics(); - input_tex[0] = *(GLuint *)gs_texture_get_obj(tex->tex[0]); - input_tex[1] = *(GLuint *)gs_texture_get_obj(tex->tex[1]); - - bool success = copy_tex_cuda(enc, p010, input_tex, surf); - - obs_leave_graphics(); - CU_FAILED(cu->cuCtxPopCurrent(NULL)) - - if (!success) - return false; - - /* ------------------------------------ */ - /* map output tex so nvenc can use it */ - - NV_ENC_MAP_INPUT_RESOURCE map = {NV_ENC_MAP_INPUT_RESOURCE_VER}; - map.registeredResource = surf->res; - map.mappedBufferFmt = p010 ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT - : NV_ENC_BUFFER_FORMAT_NV12; - - if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &map))) - return false; - - surf->mapped_res = map.mappedResource; - - /* ------------------------------------ */ - /* do actual encode call */ - - return nvenc_encode_shared(enc, bs, surf->mapped_res, pts, packet, - received_packet); -} -#endif - -static inline bool nvenc_copy_frame(struct nvenc_data *enc, - struct encoder_frame *frame, - struct nv_cuda_surface *surf) -{ - bool success = true; - size_t height = enc->cy; - size_t width = enc->cx; - CUDA_MEMCPY2D m = {0}; - - m.srcMemoryType = CU_MEMORYTYPE_HOST; - m.dstMemoryType = CU_MEMORYTYPE_ARRAY; - m.dstArray = surf->tex; - m.WidthInBytes = width; - m.Height = height; - - CU_FAILED(cu->cuCtxPushCurrent(enc->cu_ctx)) - - if (enc->surface_format == NV_ENC_BUFFER_FORMAT_NV12) { - /* Page-locks the host memory so that it can be DMAd directly - * rather than CUDA doing an internal copy to page-locked - * memory before actually DMA-ing to the GPU. */ - CU_CHECK(cu->cuMemHostRegister(frame->data[0], - frame->linesize[0] * height, 0)) - CU_CHECK(cu->cuMemHostRegister( - frame->data[1], frame->linesize[1] * height / 2, 0)) - - m.srcPitch = frame->linesize[0]; - m.srcHost = frame->data[0]; - CU_FAILED(cu->cuMemcpy2D(&m)) - - m.srcPitch = frame->linesize[1]; - m.srcHost = frame->data[1]; - m.dstY += height; - m.Height /= 2; - CU_FAILED(cu->cuMemcpy2D(&m)) - } else if (enc->surface_format == NV_ENC_BUFFER_FORMAT_YUV420_10BIT) { - CU_CHECK(cu->cuMemHostRegister(frame->data[0], - frame->linesize[0] * height, 0)) - CU_CHECK(cu->cuMemHostRegister( - frame->data[1], frame->linesize[1] * height / 2, 0)) - - // P010 lines are double the size (16 bit per pixel) - m.WidthInBytes *= 2; - - m.srcPitch = frame->linesize[0]; - m.srcHost = frame->data[0]; - CU_FAILED(cu->cuMemcpy2D(&m)) - - m.srcPitch = frame->linesize[1]; - m.srcHost = frame->data[1]; - m.dstY += height; - m.Height /= 2; - CU_FAILED(cu->cuMemcpy2D(&m)) - } else { // I444 - CU_CHECK(cu->cuMemHostRegister(frame->data[0], - frame->linesize[0] * height, 0)) - CU_CHECK(cu->cuMemHostRegister(frame->data[1], - frame->linesize[1] * height, 0)) - CU_CHECK(cu->cuMemHostRegister(frame->data[2], - frame->linesize[2] * height, 0)) - - m.srcPitch = frame->linesize[0]; - m.srcHost = frame->data[0]; - CU_FAILED(cu->cuMemcpy2D(&m)) - - m.srcPitch = frame->linesize[1]; - m.srcHost = frame->data[1]; - m.dstY += height; - CU_FAILED(cu->cuMemcpy2D(&m)) - - m.srcPitch = frame->linesize[2]; - m.srcHost = frame->data[2]; - m.dstY += height; - CU_FAILED(cu->cuMemcpy2D(&m)) - } - -unmap: - if (frame->data[0]) - cu->cuMemHostUnregister(frame->data[0]); - if (frame->data[1]) - cu->cuMemHostUnregister(frame->data[1]); - if (frame->data[2]) - cu->cuMemHostUnregister(frame->data[2]); - - CU_FAILED(cu->cuCtxPopCurrent(NULL)) - - return success; -} - -static bool nvenc_encode_soft(void *data, struct encoder_frame *frame, - struct encoder_packet *packet, - bool *received_packet) -{ - struct nvenc_data *enc = data; - struct nv_cuda_surface *surf; - struct nv_bitstream *bs; - - bs = &enc->bitstreams.array[enc->next_bitstream]; - surf = &enc->surfaces.array[enc->next_bitstream]; - - deque_push_back(&enc->dts_list, &frame->pts, sizeof(frame->pts)); - - /* ------------------------------------ */ - /* copy to CUDA surface */ - - if (!nvenc_copy_frame(enc, frame, surf)) - return false; - - /* ------------------------------------ */ - /* map output tex so nvenc can use it */ - - NV_ENC_MAP_INPUT_RESOURCE map = {NV_ENC_MAP_INPUT_RESOURCE_VER}; - map.registeredResource = surf->res; - map.mappedBufferFmt = enc->surface_format; - - if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &map))) - return false; - - surf->mapped_res = map.mappedResource; - - /* ------------------------------------ */ - /* do actual encode call */ - - return nvenc_encode_shared(enc, bs, surf->mapped_res, frame->pts, - packet, received_packet); -} - -static void nvenc_soft_video_info(void *data, struct video_scale_info *info) -{ - struct nvenc_data *enc = data; - info->format = enc->in_format; -} - -extern void h264_nvenc_defaults(obs_data_t *settings); -extern obs_properties_t *h264_nvenc_properties(void *unused); -#ifdef ENABLE_HEVC -extern void hevc_nvenc_defaults(obs_data_t *settings); -extern obs_properties_t *hevc_nvenc_properties(void *unused); -#endif -extern obs_properties_t *av1_nvenc_properties(void *unused); -extern void av1_nvenc_defaults(obs_data_t *settings); - -static bool nvenc_extra_data(void *data, uint8_t **header, size_t *size) -{ - struct nvenc_data *enc = data; - - if (!enc->header) { - return false; - } - - *header = enc->header; - *size = enc->header_size; - return true; -} - -static bool nvenc_sei_data(void *data, uint8_t **sei, size_t *size) -{ - struct nvenc_data *enc = data; - - if (!enc->sei) { - return false; - } - - *sei = enc->sei; - *size = enc->sei_size; - return true; -} - -struct obs_encoder_info h264_nvenc_info = { - .id = "jim_nvenc", - .codec = "h264", - .type = OBS_ENCODER_VIDEO, - .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE | - OBS_ENCODER_CAP_ROI, - .get_name = h264_nvenc_get_name, - .create = h264_nvenc_create, - .destroy = nvenc_destroy, - .update = nvenc_update, -#ifdef _WIN32 - .encode_texture = nvenc_encode_tex, -#else - .encode_texture2 = nvenc_encode_tex2, -#endif - .get_defaults = h264_nvenc_defaults, - .get_properties = h264_nvenc_properties, - .get_extra_data = nvenc_extra_data, - .get_sei_data = nvenc_sei_data, -}; - -#ifdef ENABLE_HEVC -struct obs_encoder_info hevc_nvenc_info = { - .id = "jim_hevc_nvenc", - .codec = "hevc", - .type = OBS_ENCODER_VIDEO, - .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE | - OBS_ENCODER_CAP_ROI, - .get_name = hevc_nvenc_get_name, - .create = hevc_nvenc_create, - .destroy = nvenc_destroy, - .update = nvenc_update, -#ifdef _WIN32 - .encode_texture = nvenc_encode_tex, -#else - .encode_texture2 = nvenc_encode_tex2, -#endif - .get_defaults = hevc_nvenc_defaults, - .get_properties = hevc_nvenc_properties, - .get_extra_data = nvenc_extra_data, - .get_sei_data = nvenc_sei_data, -}; -#endif - -struct obs_encoder_info av1_nvenc_info = { - .id = "jim_av1_nvenc", - .codec = "av1", - .type = OBS_ENCODER_VIDEO, - .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE | - OBS_ENCODER_CAP_ROI, - .get_name = av1_nvenc_get_name, - .create = av1_nvenc_create, - .destroy = nvenc_destroy, - .update = nvenc_update, -#ifdef _WIN32 - .encode_texture = nvenc_encode_tex, -#else - .encode_texture2 = nvenc_encode_tex2, -#endif - .get_defaults = av1_nvenc_defaults, - .get_properties = av1_nvenc_properties, - .get_extra_data = nvenc_extra_data, -}; - -struct obs_encoder_info h264_nvenc_soft_info = { - .id = "obs_nvenc_h264_cuda", - .codec = "h264", - .type = OBS_ENCODER_VIDEO, - .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI | - OBS_ENCODER_CAP_INTERNAL, - .get_name = h264_nvenc_soft_get_name, - .create = h264_nvenc_soft_create, - .destroy = nvenc_destroy, - .update = nvenc_update, - .encode = nvenc_encode_soft, - .get_defaults = h264_nvenc_defaults, - .get_properties = h264_nvenc_properties, - .get_extra_data = nvenc_extra_data, - .get_sei_data = nvenc_sei_data, - .get_video_info = nvenc_soft_video_info, -}; - -#ifdef ENABLE_HEVC -struct obs_encoder_info hevc_nvenc_soft_info = { - .id = "obs_nvenc_hevc_cuda", - .codec = "hevc", - .type = OBS_ENCODER_VIDEO, - .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI | - OBS_ENCODER_CAP_INTERNAL, - .get_name = hevc_nvenc_soft_get_name, - .create = hevc_nvenc_soft_create, - .destroy = nvenc_destroy, - .update = nvenc_update, - .encode = nvenc_encode_soft, - .get_defaults = hevc_nvenc_defaults, - .get_properties = hevc_nvenc_properties, - .get_extra_data = nvenc_extra_data, - .get_sei_data = nvenc_sei_data, - .get_video_info = nvenc_soft_video_info, -}; -#endif - -struct obs_encoder_info av1_nvenc_soft_info = { - .id = "obs_nvenc_av1_cuda", - .codec = "av1", - .type = OBS_ENCODER_VIDEO, - .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI | - OBS_ENCODER_CAP_INTERNAL, - .get_name = av1_nvenc_soft_get_name, - .create = av1_nvenc_soft_create, - .destroy = nvenc_destroy, - .update = nvenc_update, - .encode = nvenc_encode_soft, - .get_defaults = av1_nvenc_defaults, - .get_properties = av1_nvenc_properties, - .get_extra_data = nvenc_extra_data, - .get_video_info = nvenc_soft_video_info, -}; diff --git a/plugins/obs-nvenc/CMakeLists.txt b/plugins/obs-nvenc/CMakeLists.txt new file mode 100644 index 00000000000000..301dd7edfc8476 --- /dev/null +++ b/plugins/obs-nvenc/CMakeLists.txt @@ -0,0 +1,58 @@ +cmake_minimum_required(VERSION 3.22...3.25) + +option(ENABLE_NVENC "Build NVIDIA Hardware Encoder Plugin" ON) +option(ENABLE_NVENC_FFMPEG_IDS "Register FFmpeg encoder IDs" ON) +mark_as_advanced(ENABLE_NVENC_FFMPEG_IDS) + +if(NOT ENABLE_NVENC) + target_disable_feature(obs-nvenc "NVIDIA Hardware Encoder") + target_disable(obs-nvenc) + return() +endif() + +if(NOT TARGET OBS::opts-parser) + add_subdirectory("${CMAKE_SOURCE_DIR}/shared/opts-parser" "${CMAKE_BINARY_DIR}/shared/opts-parser") +endif() + +if(OS_LINUX AND NOT TARGET OBS::glad) + add_subdirectory("${CMAKE_SOURCE_DIR}/deps/glad" "${CMAKE_BINARY_DIR}/deps/glad") +endif() + +find_package(FFnvcodec 12 REQUIRED) + +add_library(obs-nvenc MODULE) +add_library(OBS::nvenc ALIAS obs-nvenc) + +add_subdirectory(obs-nvenc-test) + +target_sources( + obs-nvenc + PRIVATE # cmake-format: sortable + $<$:nvenc-opengl.c> + $<$:nvenc-d3d11.c> + cuda-helpers.c + cuda-helpers.h + nvenc-compat.c + nvenc-cuda.c + nvenc-helpers.c + nvenc-helpers.h + nvenc-internal.h + nvenc-opts-parser.c + nvenc-properties.c + nvenc.c + obs-nvenc.c + obs-nvenc.h) + +target_link_libraries(obs-nvenc PRIVATE OBS::libobs OBS::opts-parser FFnvcodec::FFnvcodec + $<$:OBS::glad>) + +target_compile_definitions(obs-nvenc PRIVATE $<$:REGISTER_FFMPEG_IDS>) + +if(OS_WINDOWS) + configure_file(cmake/windows/obs-module.rc.in obs-nvenc.rc) + target_sources(obs-nvenc PRIVATE obs-nvenc.rc) +endif() + +# cmake-format: off +set_target_properties_obs(obs-nvenc PROPERTIES FOLDER plugins/obs-nvenc PREFIX "") +# cmake-format: on diff --git a/plugins/obs-nvenc/cmake/windows/obs-module.rc.in b/plugins/obs-nvenc/cmake/windows/obs-module.rc.in new file mode 100644 index 00000000000000..e0c73d52f79837 --- /dev/null +++ b/plugins/obs-nvenc/cmake/windows/obs-module.rc.in @@ -0,0 +1,24 @@ +1 VERSIONINFO +FILEVERSION ${OBS_VERSION_MAJOR},${OBS_VERSION_MINOR},${OBS_VERSION_PATCH},0 +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904B0" + BEGIN + VALUE "CompanyName", "${OBS_COMPANY_NAME}" + VALUE "FileDescription", "OBS NVENC module" + VALUE "FileVersion", "${OBS_VERSION_CANONICAL}" + VALUE "ProductName", "${OBS_PRODUCT_NAME}" + VALUE "ProductVersion", "${OBS_VERSION_CANONICAL}" + VALUE "Comments", "${OBS_COMMENTS}" + VALUE "LegalCopyright", "${OBS_LEGAL_COPYRIGHT}" + VALUE "InternalName", "obs-nvenc" + VALUE "OriginalFilename", "obs-nvenc" + END + END + + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x0409, 0x04B0 + END +END diff --git a/plugins/obs-nvenc/cuda-helpers.c b/plugins/obs-nvenc/cuda-helpers.c new file mode 100644 index 00000000000000..430a4d5d82c65d --- /dev/null +++ b/plugins/obs-nvenc/cuda-helpers.c @@ -0,0 +1,172 @@ +#include "obs-nvenc.h" + +#include "nvenc-internal.h" +#include "cuda-helpers.h" + +#include +#include +#include +#include +#include + +static void *cuda_lib = NULL; +static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER; +CudaFunctions *cu = NULL; + +bool load_cuda_lib(void) +{ +#ifdef _WIN32 + cuda_lib = os_dlopen("nvcuda.dll"); +#else + cuda_lib = os_dlopen("libcuda.so.1"); +#endif + return cuda_lib != NULL; +} + +static void *load_cuda_func(const char *func) +{ + void *func_ptr = os_dlsym(cuda_lib, func); + if (!func_ptr) { + blog(LOG_ERROR, "[obs-nvenc] Could not load function: %s", + func); + } + return func_ptr; +} + +typedef struct cuda_function { + ptrdiff_t offset; + const char *name; +} cuda_function; + +static const cuda_function cuda_functions[] = { + {offsetof(CudaFunctions, cuInit), "cuInit"}, + + {offsetof(CudaFunctions, cuDeviceGetCount), "cuDeviceGetCount"}, + {offsetof(CudaFunctions, cuDeviceGet), "cuDeviceGet"}, + {offsetof(CudaFunctions, cuDeviceGetAttribute), "cuDeviceGetAttribute"}, + + {offsetof(CudaFunctions, cuCtxCreate), "cuCtxCreate_v2"}, + {offsetof(CudaFunctions, cuCtxDestroy), "cuCtxDestroy_v2"}, + {offsetof(CudaFunctions, cuCtxPushCurrent), "cuCtxPushCurrent_v2"}, + {offsetof(CudaFunctions, cuCtxPopCurrent), "cuCtxPopCurrent_v2"}, + + {offsetof(CudaFunctions, cuArray3DCreate), "cuArray3DCreate_v2"}, + {offsetof(CudaFunctions, cuArrayDestroy), "cuArrayDestroy"}, + {offsetof(CudaFunctions, cuMemcpy2D), "cuMemcpy2D_v2"}, + + {offsetof(CudaFunctions, cuGetErrorName), "cuGetErrorName"}, + {offsetof(CudaFunctions, cuGetErrorString), "cuGetErrorString"}, + + {offsetof(CudaFunctions, cuMemHostRegister), "cuMemHostRegister_v2"}, + {offsetof(CudaFunctions, cuMemHostUnregister), "cuMemHostUnregister"}, + +#ifndef _WIN32 + {offsetof(CudaFunctions, cuGLGetDevices), "cuGLGetDevices_v2"}, + {offsetof(CudaFunctions, cuGraphicsGLRegisterImage), + "cuGraphicsGLRegisterImage"}, + {offsetof(CudaFunctions, cuGraphicsUnregisterResource), + "cuGraphicsUnregisterResource"}, + {offsetof(CudaFunctions, cuGraphicsMapResources), + "cuGraphicsMapResources"}, + {offsetof(CudaFunctions, cuGraphicsUnmapResources), + "cuGraphicsUnmapResources"}, + {offsetof(CudaFunctions, cuGraphicsSubResourceGetMappedArray), + "cuGraphicsSubResourceGetMappedArray"}, +#endif +}; + +static const size_t num_cuda_funcs = + sizeof(cuda_functions) / sizeof(cuda_function); + +static bool init_cuda_internal(obs_encoder_t *encoder) +{ + static bool initialized = false; + static bool success = false; + + if (initialized) + return success; + initialized = true; + + if (!load_cuda_lib()) { + obs_encoder_set_last_error(encoder, + "Loading CUDA library failed."); + return false; + } + + cu = bzalloc(sizeof(CudaFunctions)); + + for (size_t idx = 0; idx < num_cuda_funcs; idx++) { + const cuda_function func = cuda_functions[idx]; + void *fptr = load_cuda_func(func.name); + + if (!fptr) { + blog(LOG_ERROR, + "[obs-nvenc] Failed to find CUDA function: %s", + func.name); + obs_encoder_set_last_error( + encoder, "Loading CUDA functions failed."); + return false; + } + + *(uintptr_t *)((uintptr_t)cu + func.offset) = (uintptr_t)fptr; + } + + success = true; + return true; +} + +bool cuda_get_error_desc(CUresult res, const char **name, const char **desc) +{ + if (cu->cuGetErrorName(res, name) != CUDA_SUCCESS || + cu->cuGetErrorString(res, desc) != CUDA_SUCCESS) + return false; + + return true; +} + +bool cuda_error_check(struct nvenc_data *enc, CUresult res, const char *func, + const char *call) +{ + if (res == CUDA_SUCCESS) + return true; + + struct dstr message = {0}; + + const char *name, *desc; + if (cuda_get_error_desc(res, &name, &desc)) { + dstr_printf(&message, + "%s: CUDA call \"%s\" failed with %s (%d): %s", + func, call, name, res, desc); + } else { + dstr_printf(&message, "%s: CUDA call \"%s\" failed with %d", + func, call, res); + } + + error("%s", message.array); + obs_encoder_set_last_error(enc->encoder, message.array); + + dstr_free(&message); + return false; +} + +bool init_cuda(obs_encoder_t *encoder) +{ + bool success; + + pthread_mutex_lock(&init_mutex); + success = init_cuda_internal(encoder); + pthread_mutex_unlock(&init_mutex); + + return success; +} + +void obs_cuda_load(void) +{ + pthread_mutex_init(&init_mutex, NULL); +} + +void obs_cuda_unload(void) +{ + bfree(cu); + pthread_mutex_destroy(&init_mutex); +} diff --git a/plugins/obs-ffmpeg/obs-nvenc.h b/plugins/obs-nvenc/cuda-helpers.h similarity index 60% rename from plugins/obs-ffmpeg/obs-nvenc.h rename to plugins/obs-nvenc/cuda-helpers.h index 3c0f949f099c9d..be76261fef6335 100644 --- a/plugins/obs-ffmpeg/obs-nvenc.h +++ b/plugins/obs-nvenc/cuda-helpers.h @@ -1,21 +1,17 @@ #pragma once -#ifdef _WIN32 -#define WIN32_LEAN_AND_MEAN -#include -#endif - #include -#include -#include -#include "obs-nvenc-ver.h" +#include /* Missing from FFmpeg headers */ typedef CUresult CUDAAPI tcuMemHostRegister(void *p, size_t bytesize, unsigned int Flags); typedef CUresult CUDAAPI tcuMemHostUnregister(void *p); +#define CUDA_ERROR_INVALID_GRAPHICS_CONTEXT 219 +#define CUDA_ARRAY3D_SURFACE_LDST 0x02 + typedef struct CudaFunctions { tcuInit *cuInit; @@ -49,23 +45,22 @@ typedef struct CudaFunctions { #endif } CudaFunctions; -typedef NVENCSTATUS(NVENCAPI *NV_CREATE_INSTANCE_FUNC)( - NV_ENCODE_API_FUNCTION_LIST *); - -extern const char *nv_error_name(NVENCSTATUS err); -extern NV_ENCODE_API_FUNCTION_LIST nv; -extern NV_CREATE_INSTANCE_FUNC nv_create_instance; extern CudaFunctions *cu; -extern uint32_t get_nvenc_ver(void); -extern bool init_nvenc(obs_encoder_t *encoder); -extern bool init_cuda(obs_encoder_t *encoder); + +bool init_cuda(obs_encoder_t *encoder); bool cuda_get_error_desc(CUresult res, const char **name, const char **desc); -bool nv_fail2(obs_encoder_t *encoder, void *session, const char *format, ...); -bool nv_failed2(obs_encoder_t *encoder, void *session, NVENCSTATUS err, - const char *func, const char *call); -#define nv_fail(encoder, format, ...) \ - nv_fail2(encoder, enc->session, format, ##__VA_ARGS__) +struct nvenc_data; +bool cuda_error_check(struct nvenc_data *enc, CUresult res, const char *func, + const char *call); + +/* CUDA error handling */ +#define CU_FAILED(call) \ + if (!cuda_error_check(enc, call, __FUNCTION__, #call)) \ + return false; -#define nv_failed(encoder, err, func, call) \ - nv_failed2(encoder, enc->session, err, func, call) +#define CU_CHECK(call) \ + if (!cuda_error_check(enc, call, __FUNCTION__, #call)) { \ + success = false; \ + goto unmap; \ + } diff --git a/plugins/obs-nvenc/data/locale/en-US.ini b/plugins/obs-nvenc/data/locale/en-US.ini new file mode 100644 index 00000000000000..4d1418758fe5f8 --- /dev/null +++ b/plugins/obs-nvenc/data/locale/en-US.ini @@ -0,0 +1,71 @@ +RateControl="Rate Control" +CBR="Constant Bitrate" +VBR="Variable Bitrate" +CQVBR="Variable Bitrate with Target Quality" +CQP="Constant QP" +Lossless="Lossless" + +Bitrate="Bitrate" +MaxBitrate="Maximum Bitrate (0 = Level Limit)" +TargetQuality="Target Quality" + +KeyframeIntervalSec="Keyframe interval (seconds, 0 = auto)" + +LookAhead="Look-ahead" +LookAhead.ToolTip="Enables Lookahead.\n\nIf enabled, it will increase visual quality by determining a better bitrate distribution through analysis of future frames,\nat the cost of increased GPU utilization and latency." + +AdaptiveQuantization="Adaptive Quantization" +AdaptiveQuantization.ToolTip="Enables Temporal/Spatial Adaptive Quantization which optimizes the use of bitrate for increased perceived visual quality,\nespecially in situations with high motion, at the cost of increased GPU utilization.\n\nFormerly known as \"Psycho-Visual Tuning\"." + +Preset="Preset" +Preset.p1="P1: Fastest (Lowest Quality)" +Preset.p2="P2: Faster (Lower Quality)" +Preset.p3="P3: Fast (Low Quality)" +Preset.p4="P4: Medium (Medium Quality)" +Preset.p5="P5: Slow (Good Quality)" +Preset.p6="P6: Slower (Better Quality)" +Preset.p7="P7: Slowest (Best Quality)" + +Tuning.uhq="Ultra High Quality (slow, not recommended)" +Tuning.hq="High Quality" +Tuning.ll="Low Latency" +Tuning.ull="Ultra Low Latency" + +Multipass="Multipass Mode" +Multipass.disabled="Single Pass" +Multipass.qres="Two Passes (Quarter Resolution)" +Multipass.fullres="Two Passes (Full Resolution)" + +BFrames="B-Frames" +BFrameRefMode="B-Frame as Reference" +BframeRefMode.Disabled="Disabled" +BframeRefMode.Each="Each" +BframeRefMode.Middle="Middle b-frame only" + +SplitEncode="Split Encode" +SplitEncode.Auto="Auto" +SplitEncode.Disabled="Disabled" +SplitEncode.Enabled="Two-way split" +SplitEncode.ThreeWay="Three-way split" + +Opts="Custom Encoder Options" +Opts.TT="Space-separated list of options to apply to the rate control and codec settings,\nbased their names in the nvEncodeAPI header.\ne.g. \"lookaheadDepth=16 aqStrength=4\"" + +Error="Failed to open NVENC codec: %1" +GenericError="Try installing the latest NVIDIA driver and closing other recording software that might be using NVENC such as NVIDIA ShadowPlay or Windows Game DVR." +BadGPUIndex="You have selected GPU %1 in your output encoder settings. Set this back to 0 and try again." +OutdatedDriver="The installed NVIDIA driver does not support this NVENC version, try updating the driver." +UnsupportedDevice="NVENC Error: Unsupported device. Check that your video card supports NVENC and try updating the driver." +TooManySessions="NVENC Error: Too many concurrent sessions. Try closing other recording software that might be using NVENC such as NVIDIA ShadowPlay or Windows Game DVR." +CheckDrivers="Try installing the latest NVIDIA driver." + +8bitUnsupportedHdr="OBS does not support 8-bit output of Rec. 2100." +I010Unsupported="NVENC does not support I010. Use P010 instead." +10bitUnsupported="Cannot perform 10-bit encode on this encoder." +16bitUnsupported="Cannot perform 16-bit encode on this encoder." +444Unsupported="Cannot perform 4:4:4 encode on this encoder." + +# Legacy strings, to be removed once compat encoders are removed +CQLevel="CQ Level" +PsychoVisualTuning="Psycho Visual Tuning" +PsychoVisualTuning.ToolTip="Enables encoder settings that optimize the use of bitrate for increased perceived visual quality,\nespecially in situations with high motion, at the cost of increased GPU utilization." diff --git a/plugins/obs-nvenc/nvenc-compat.c b/plugins/obs-nvenc/nvenc-compat.c new file mode 100644 index 00000000000000..e8958d3b065900 --- /dev/null +++ b/plugins/obs-nvenc/nvenc-compat.c @@ -0,0 +1,457 @@ +#include "nvenc-helpers.h" + +#include + +/* + * Compatibility encoder objects for pre-31.0 encoder compatibility. + * + * All they do is update the settings object, and then reroute to one of the + * new encoder implementations. + * + * This should be removed once NVENC settings are migrated directly and + * backwards-compatibility is no longer required. + */ + +/* ------------------------------------------------------------------------- */ +/* Actual redirector implementation. */ + +static void migrate_settings(obs_data_t *settings, enum codec_type codec) +{ + struct encoder_caps *caps = get_encoder_caps(codec); + + const char *preset = obs_data_get_string(settings, "preset2"); + obs_data_set_string(settings, "preset", preset); + + const char *rc = obs_data_get_string(settings, "rate_control"); + /* Old NVENC allowed lossless even if unsupported, + * and just emulated it via CQP 0, do the same here. */ + if (!caps->lossless && strcmp(rc, "lossless") == 0) { + obs_data_set_string(settings, "rate_control", "CQP"); + obs_data_set_int(settings, "cqp", 0); + } + + obs_data_set_bool(settings, "adaptive_quantization", + obs_data_get_bool(settings, "psycho_aq")); + + if (obs_data_has_user_value(settings, "gpu") && + num_encoder_devices() > 1) { + obs_data_set_int(settings, "device", + obs_data_get_int(settings, "gpu")); + } +} + +static void *nvenc_reroute(enum codec_type codec, obs_data_t *settings, + obs_encoder_t *encoder, bool texture) +{ + /* Update settings object to v2 encoder configuration */ + migrate_settings(settings, codec); + + switch (codec) { + case CODEC_H264: + return obs_encoder_create_rerouted( + encoder, + texture ? "obs_nvenc_h264_tex" : "obs_nvenc_h264_soft"); + case CODEC_HEVC: + return obs_encoder_create_rerouted( + encoder, + texture ? "obs_nvenc_hevc_tex" : "obs_nvenc_hevc_soft"); + case CODEC_AV1: + return obs_encoder_create_rerouted( + encoder, + texture ? "obs_nvenc_av1_tex" : "obs_nvenc_av1_soft"); + } + + return NULL; +} + +/* ------------------------------------------------------------------------- */ + +static const char *h264_nvenc_get_name(void *type_data) +{ + UNUSED_PARAMETER(type_data); + return "NVIDIA NVENC H.264 (deprecated)"; +} + +#ifdef ENABLE_HEVC +static const char *hevc_nvenc_get_name(void *type_data) +{ + UNUSED_PARAMETER(type_data); + return "NVIDIA NVENC HEVC (deprecated)"; +} +#endif + +static const char *av1_nvenc_get_name(void *type_data) +{ + UNUSED_PARAMETER(type_data); + return "NVIDIA NVENC AV1 (deprecated)"; +} + +static void *h264_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder) +{ + return nvenc_reroute(CODEC_H264, settings, encoder, true); +} + +#ifdef ENABLE_HEVC +static void *hevc_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder) +{ + return nvenc_reroute(CODEC_HEVC, settings, encoder, true); +} +#endif + +static void *av1_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder) +{ + return nvenc_reroute(CODEC_AV1, settings, encoder, true); +} + +static void *h264_nvenc_soft_create(obs_data_t *settings, + obs_encoder_t *encoder) +{ + return nvenc_reroute(CODEC_H264, settings, encoder, false); +} + +#ifdef ENABLE_HEVC +static void *hevc_nvenc_soft_create(obs_data_t *settings, + obs_encoder_t *encoder) +{ + return nvenc_reroute(CODEC_HEVC, settings, encoder, false); +} +#endif + +static void *av1_nvenc_soft_create(obs_data_t *settings, obs_encoder_t *encoder) +{ + return nvenc_reroute(CODEC_AV1, settings, encoder, false); +} + +static void nvenc_defaults_base(enum codec_type codec, obs_data_t *settings) +{ + /* Defaults from legacy FFmpeg encoder */ + obs_data_set_default_int(settings, "bitrate", 2500); + obs_data_set_default_int(settings, "max_bitrate", 5000); + obs_data_set_default_int(settings, "keyint_sec", 0); + obs_data_set_default_int(settings, "cqp", 20); + obs_data_set_default_string(settings, "rate_control", "CBR"); + obs_data_set_default_string(settings, "preset2", "p5"); + obs_data_set_default_string(settings, "multipass", "qres"); + obs_data_set_default_string(settings, "tune", "hq"); + obs_data_set_default_string(settings, "profile", + codec != CODEC_H264 ? "main" : "high"); + obs_data_set_default_bool(settings, "psycho_aq", true); + obs_data_set_default_int(settings, "gpu", 0); + obs_data_set_default_int(settings, "bf", 2); + obs_data_set_default_bool(settings, "repeat_headers", false); +} + +static void h264_nvenc_defaults(obs_data_t *settings) +{ + nvenc_defaults_base(CODEC_H264, settings); +} + +#ifdef ENABLE_HEVC +static void hevc_nvenc_defaults(obs_data_t *settings) +{ + nvenc_defaults_base(CODEC_HEVC, settings); +} +#endif + +static void av1_nvenc_defaults(obs_data_t *settings) +{ + nvenc_defaults_base(CODEC_AV1, settings); +} + +static bool rate_control_modified(obs_properties_t *ppts, obs_property_t *p, + obs_data_t *settings) +{ + const char *rc = obs_data_get_string(settings, "rate_control"); + bool cqp = astrcmpi(rc, "CQP") == 0; + bool vbr = astrcmpi(rc, "VBR") == 0; + bool lossless = astrcmpi(rc, "lossless") == 0; + + p = obs_properties_get(ppts, "bitrate"); + obs_property_set_visible(p, !cqp && !lossless); + p = obs_properties_get(ppts, "max_bitrate"); + obs_property_set_visible(p, vbr); + p = obs_properties_get(ppts, "cqp"); + obs_property_set_visible(p, cqp); + p = obs_properties_get(ppts, "preset2"); + obs_property_set_visible(p, !lossless); + p = obs_properties_get(ppts, "tune"); + obs_property_set_visible(p, !lossless); + + return true; +} + +static obs_properties_t *nvenc_properties_internal(enum codec_type codec) +{ + obs_properties_t *props = obs_properties_create(); + obs_property_t *p; + + p = obs_properties_add_list(props, "rate_control", + obs_module_text("RateControl"), + OBS_COMBO_TYPE_LIST, + OBS_COMBO_FORMAT_STRING); + obs_property_list_add_string(p, "CBR", "CBR"); + obs_property_list_add_string(p, "CQP", "CQP"); + obs_property_list_add_string(p, "VBR", "VBR"); + obs_property_list_add_string(p, obs_module_text("Lossless"), + "lossless"); + + obs_property_set_modified_callback(p, rate_control_modified); + + p = obs_properties_add_int(props, "bitrate", obs_module_text("Bitrate"), + 50, 300000, 50); + obs_property_int_set_suffix(p, " Kbps"); + p = obs_properties_add_int(props, "max_bitrate", + obs_module_text("MaxBitrate"), 50, 300000, + 50); + obs_property_int_set_suffix(p, " Kbps"); + + obs_properties_add_int(props, "cqp", obs_module_text("CQLevel"), 1, + codec == CODEC_AV1 ? 63 : 51, 1); + + p = obs_properties_add_int(props, "keyint_sec", + obs_module_text("KeyframeIntervalSec"), 0, + 10, 1); + obs_property_int_set_suffix(p, " s"); + + p = obs_properties_add_list(props, "preset2", obs_module_text("Preset"), + OBS_COMBO_TYPE_LIST, + OBS_COMBO_FORMAT_STRING); + +#define add_preset(val) \ + obs_property_list_add_string(p, obs_module_text("Preset." val), val) + + add_preset("p1"); + add_preset("p2"); + add_preset("p3"); + add_preset("p4"); + add_preset("p5"); + add_preset("p6"); + add_preset("p7"); +#undef add_preset + + p = obs_properties_add_list(props, "tune", obs_module_text("Tuning"), + OBS_COMBO_TYPE_LIST, + OBS_COMBO_FORMAT_STRING); + +#define add_tune(val) \ + obs_property_list_add_string(p, obs_module_text("Tuning." val), val) + add_tune("hq"); + add_tune("ll"); + add_tune("ull"); +#undef add_tune + + p = obs_properties_add_list(props, "multipass", + obs_module_text("Multipass"), + OBS_COMBO_TYPE_LIST, + OBS_COMBO_FORMAT_STRING); + +#define add_multipass(val) \ + obs_property_list_add_string(p, obs_module_text("Multipass." val), val) + add_multipass("disabled"); + add_multipass("qres"); + add_multipass("fullres"); +#undef add_multipass + + p = obs_properties_add_list(props, "profile", + obs_module_text("Profile"), + OBS_COMBO_TYPE_LIST, + OBS_COMBO_FORMAT_STRING); + +#define add_profile(val) obs_property_list_add_string(p, val, val) + if (codec == CODEC_HEVC) { + add_profile("main10"); + add_profile("main"); + } else if (codec == CODEC_AV1) { + add_profile("main"); + } else { + add_profile("high"); + add_profile("main"); + add_profile("baseline"); + } +#undef add_profile + + p = obs_properties_add_bool(props, "lookahead", + obs_module_text("LookAhead")); + obs_property_set_long_description(p, + obs_module_text("LookAhead.ToolTip")); + p = obs_properties_add_bool(props, "repeat_headers", "repeat_headers"); + obs_property_set_visible(p, false); + + p = obs_properties_add_bool(props, "psycho_aq", + obs_module_text("PsychoVisualTuning")); + obs_property_set_long_description( + p, obs_module_text("PsychoVisualTuning.ToolTip")); + + obs_properties_add_int(props, "gpu", obs_module_text("GPU"), 0, 8, 1); + + obs_properties_add_int(props, "bf", obs_module_text("BFrames"), 0, 4, + 1); + + return props; +} + +static obs_properties_t *h264_nvenc_properties(void *unused) +{ + UNUSED_PARAMETER(unused); + return nvenc_properties_internal(CODEC_H264); +} + +#ifdef ENABLE_HEVC +static obs_properties_t *hevc_nvenc_properties(void *unused) +{ + UNUSED_PARAMETER(unused); + return nvenc_properties_internal(CODEC_HEVC); +} +#endif + +static obs_properties_t *av1_nvenc_properties(void *unused) +{ + UNUSED_PARAMETER(unused); + return nvenc_properties_internal(CODEC_AV1); +} + +/* ------------------------------------------------------------------------- */ +/* Stubs for required - but unused - functions. */ + +static void fake_nvenc_destroy(void *p) +{ + UNUSED_PARAMETER(p); +} + +static bool fake_encode(void *data, struct encoder_frame *frame, + struct encoder_packet *packet, bool *received_packet) +{ + UNUSED_PARAMETER(data); + UNUSED_PARAMETER(frame); + UNUSED_PARAMETER(packet); + UNUSED_PARAMETER(received_packet); + + return true; +} + +static bool fake_encode_tex2(void *data, struct encoder_texture *texture, + int64_t pts, uint64_t lock_key, uint64_t *next_key, + struct encoder_packet *packet, + bool *received_packet) +{ + UNUSED_PARAMETER(data); + UNUSED_PARAMETER(texture); + UNUSED_PARAMETER(pts); + UNUSED_PARAMETER(lock_key); + UNUSED_PARAMETER(next_key); + UNUSED_PARAMETER(packet); + UNUSED_PARAMETER(received_packet); + + return true; +} + +struct obs_encoder_info compat_h264_nvenc_info = { + .id = "jim_nvenc", + .codec = "h264", + .type = OBS_ENCODER_VIDEO, + .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE | + OBS_ENCODER_CAP_ROI | OBS_ENCODER_CAP_DEPRECATED, + .get_name = h264_nvenc_get_name, + .create = h264_nvenc_create, + .destroy = fake_nvenc_destroy, + .encode_texture2 = fake_encode_tex2, + .get_defaults = h264_nvenc_defaults, + .get_properties = h264_nvenc_properties, +}; + +#ifdef ENABLE_HEVC +struct obs_encoder_info compat_hevc_nvenc_info = { + .id = "jim_hevc_nvenc", + .codec = "hevc", + .type = OBS_ENCODER_VIDEO, + .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE | + OBS_ENCODER_CAP_ROI | OBS_ENCODER_CAP_DEPRECATED, + .get_name = hevc_nvenc_get_name, + .create = hevc_nvenc_create, + .destroy = fake_nvenc_destroy, + .encode_texture2 = fake_encode_tex2, + .get_defaults = hevc_nvenc_defaults, + .get_properties = hevc_nvenc_properties, +}; +#endif + +struct obs_encoder_info compat_av1_nvenc_info = { + .id = "jim_av1_nvenc", + .codec = "av1", + .type = OBS_ENCODER_VIDEO, + .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE | + OBS_ENCODER_CAP_ROI | OBS_ENCODER_CAP_DEPRECATED, + .get_name = av1_nvenc_get_name, + .create = av1_nvenc_create, + .destroy = fake_nvenc_destroy, + .encode_texture2 = fake_encode_tex2, + .get_defaults = av1_nvenc_defaults, + .get_properties = av1_nvenc_properties, +}; + +struct obs_encoder_info compat_h264_nvenc_soft_info = { + .id = "obs_nvenc_h264_cuda", + .codec = "h264", + .type = OBS_ENCODER_VIDEO, + .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI | + OBS_ENCODER_CAP_DEPRECATED, + .get_name = h264_nvenc_get_name, + .create = h264_nvenc_soft_create, + .destroy = fake_nvenc_destroy, + .encode = fake_encode, + .get_defaults = h264_nvenc_defaults, + .get_properties = h264_nvenc_properties, +}; + +#ifdef ENABLE_HEVC +struct obs_encoder_info compat_hevc_nvenc_soft_info = { + .id = "obs_nvenc_hevc_cuda", + .codec = "hevc", + .type = OBS_ENCODER_VIDEO, + .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI | + OBS_ENCODER_CAP_DEPRECATED, + .get_name = hevc_nvenc_get_name, + .create = hevc_nvenc_soft_create, + .destroy = fake_nvenc_destroy, + .encode = fake_encode, + .get_defaults = hevc_nvenc_defaults, + .get_properties = hevc_nvenc_properties, +}; +#endif + +struct obs_encoder_info compat_av1_nvenc_soft_info = { + .id = "obs_nvenc_av1_cuda", + .codec = "av1", + .type = OBS_ENCODER_VIDEO, + .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI | + OBS_ENCODER_CAP_DEPRECATED, + .get_name = av1_nvenc_get_name, + .create = av1_nvenc_soft_create, + .destroy = fake_nvenc_destroy, + .encode = fake_encode, + .get_defaults = av1_nvenc_defaults, + .get_properties = av1_nvenc_properties, +}; + +void register_compat_encoders(void) +{ + obs_register_encoder(&compat_h264_nvenc_info); + obs_register_encoder(&compat_h264_nvenc_soft_info); +#ifdef ENABLE_HEVC + obs_register_encoder(&compat_hevc_nvenc_info); + obs_register_encoder(&compat_hevc_nvenc_soft_info); +#endif + if (is_codec_supported(CODEC_AV1)) { + obs_register_encoder(&compat_av1_nvenc_info); + obs_register_encoder(&compat_av1_nvenc_soft_info); + } + +#ifdef REGISTER_FFMPEG_IDS + compat_h264_nvenc_soft_info.id = "ffmpeg_nvenc"; + obs_register_encoder(&compat_h264_nvenc_soft_info); +#ifdef ENABLE_HEVC + compat_hevc_nvenc_soft_info.id = "ffmpeg_hevc_nvenc"; + obs_register_encoder(&compat_hevc_nvenc_soft_info); +#endif +#endif +} diff --git a/plugins/obs-nvenc/nvenc-cuda.c b/plugins/obs-nvenc/nvenc-cuda.c new file mode 100644 index 00000000000000..5cdbca45e165c8 --- /dev/null +++ b/plugins/obs-nvenc/nvenc-cuda.c @@ -0,0 +1,345 @@ +#include "nvenc-internal.h" +#include "nvenc-helpers.h" + +/* + * NVENC implementation using CUDA context and arrays + */ + +/* ------------------------------------------------------------------------- */ +/* CUDA Context management */ + +bool cuda_ctx_init(struct nvenc_data *enc, obs_data_t *settings, + const bool texture) +{ +#ifdef _WIN32 + if (texture) + return true; +#endif + + int count; + CUdevice device; + + int gpu = (int)obs_data_get_int(settings, "device"); +#ifndef _WIN32 + /* CUDA can do fairly efficient cross-GPU OpenGL mappings, allow it as + * a hidden option for experimentation. */ + bool force_cuda_tex = obs_data_get_bool(settings, "force_cuda_tex"); +#endif + + if (gpu == -1) + gpu = 0; + + CU_FAILED(cu->cuInit(0)) + CU_FAILED(cu->cuDeviceGetCount(&count)) + if (!count) { + NV_FAIL("No CUDA devices found"); + return false; + } +#ifdef _WIN32 + CU_FAILED(cu->cuDeviceGet(&device, gpu)) +#else + if (!texture || force_cuda_tex) { + CU_FAILED(cu->cuDeviceGet(&device, gpu)) + } else { + unsigned int ctx_count = 0; + CUdevice devices[2]; + + obs_enter_graphics(); + CUresult res = cu->cuGLGetDevices(&ctx_count, devices, 2, + CU_GL_DEVICE_LIST_ALL); + obs_leave_graphics(); + + if (res != CUDA_SUCCESS || !ctx_count) { + /* Probably running on iGPU, should just fall back to + * non-texture encoder. */ + if (res == CUDA_ERROR_INVALID_GRAPHICS_CONTEXT) { + info("Not running on NVIDIA GPU, falling back " + "to non-texture encoder"); + } else { + const char *name, *desc; + if (cuda_get_error_desc(res, &name, &desc)) { + error("Failed to get a CUDA device for " + "the current OpenGL context: " + "%s: %s", + name, desc); + } else { + error("Failed to get a CUDA device for " + "the current OpenGL context: %d", + res); + } + } + return false; + } + + /* Documentation indicates this should only ever happen with + * SLI, i.e. never for OBS. */ + if (ctx_count > 1) { + warn("Got more than one CUDA devices for OpenGL context," + " this is untested."); + } + + device = devices[0]; + debug("Loading up CUDA on device %u", device); + } +#endif + CU_FAILED(cu->cuCtxCreate(&enc->cu_ctx, 0, device)) + CU_FAILED(cu->cuCtxPopCurrent(NULL)) + + return true; +} + +void cuda_ctx_free(struct nvenc_data *enc) +{ + if (enc->cu_ctx) { + cu->cuCtxPopCurrent(NULL); + cu->cuCtxDestroy(enc->cu_ctx); + } +} + +/* ------------------------------------------------------------------------- */ +/* CUDA Surface management */ + +static bool cuda_surface_init(struct nvenc_data *enc, + struct nv_cuda_surface *nvsurf) +{ + const bool p010 = obs_p010_tex_active(); + CUDA_ARRAY3D_DESCRIPTOR desc; + desc.Width = enc->cx; + desc.Height = enc->cy; + desc.Depth = 0; + desc.Flags = CUDA_ARRAY3D_SURFACE_LDST; + desc.NumChannels = 1; + + if (!enc->non_texture) { + desc.Format = p010 ? CU_AD_FORMAT_UNSIGNED_INT16 + : CU_AD_FORMAT_UNSIGNED_INT8; + desc.Height = enc->cy + enc->cy / 2; + } else { + switch (enc->surface_format) { + case NV_ENC_BUFFER_FORMAT_NV12: + desc.Format = CU_AD_FORMAT_UNSIGNED_INT8; + // Additional half-height plane for UV data + desc.Height += enc->cy / 2; + break; + case NV_ENC_BUFFER_FORMAT_YUV420_10BIT: + desc.Format = CU_AD_FORMAT_UNSIGNED_INT16; + desc.Height += enc->cy / 2; + desc.NumChannels = 2; // number of bytes per element + break; + case NV_ENC_BUFFER_FORMAT_YUV444: + desc.Format = CU_AD_FORMAT_UNSIGNED_INT8; + desc.Height *= 3; // 3 full-size planes + break; + default: + error("Unknown input format: %d", enc->surface_format); + return false; + } + } + + CU_FAILED(cu->cuArray3DCreate(&nvsurf->tex, &desc)) + + NV_ENC_REGISTER_RESOURCE res = {0}; + res.version = NV_ENC_REGISTER_RESOURCE_VER; + res.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDAARRAY; + res.resourceToRegister = (void *)nvsurf->tex; + res.width = enc->cx; + res.height = enc->cy; + res.pitch = (uint32_t)(desc.Width * desc.NumChannels); + if (!enc->non_texture) { + res.bufferFormat = p010 ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT + : NV_ENC_BUFFER_FORMAT_NV12; + } else { + res.bufferFormat = enc->surface_format; + } + + if (NV_FAILED(nv.nvEncRegisterResource(enc->session, &res))) { + return false; + } + + nvsurf->res = res.registeredResource; + nvsurf->mapped_res = NULL; + return true; +} + +bool cuda_init_surfaces(struct nvenc_data *enc) +{ + switch (enc->in_format) { + case VIDEO_FORMAT_P010: + enc->surface_format = NV_ENC_BUFFER_FORMAT_YUV420_10BIT; + break; + case VIDEO_FORMAT_I444: + enc->surface_format = NV_ENC_BUFFER_FORMAT_YUV444; + break; + default: + enc->surface_format = NV_ENC_BUFFER_FORMAT_NV12; + } + + da_reserve(enc->surfaces, enc->buf_count); + + CU_FAILED(cu->cuCtxPushCurrent(enc->cu_ctx)) + for (uint32_t i = 0; i < enc->buf_count; i++) { + struct nv_cuda_surface buf; + if (!cuda_surface_init(enc, &buf)) { + return false; + } + + da_push_back(enc->surfaces, &buf); + } + CU_FAILED(cu->cuCtxPopCurrent(NULL)) + + return true; +} + +static void cuda_surface_free(struct nvenc_data *enc, + struct nv_cuda_surface *nvsurf) +{ + if (nvsurf->res) { + if (nvsurf->mapped_res) { + nv.nvEncUnmapInputResource(enc->session, + nvsurf->mapped_res); + } + nv.nvEncUnregisterResource(enc->session, nvsurf->res); + cu->cuArrayDestroy(nvsurf->tex); + } +} + +void cuda_free_surfaces(struct nvenc_data *enc) +{ + if (!enc->cu_ctx) + return; + + cu->cuCtxPushCurrent(enc->cu_ctx); + for (size_t i = 0; i < enc->surfaces.num; i++) { + cuda_surface_free(enc, &enc->surfaces.array[i]); + } + cu->cuCtxPopCurrent(NULL); +} + +/* ------------------------------------------------------------------------- */ +/* Actual encoding stuff */ + +static inline bool copy_frame(struct nvenc_data *enc, + struct encoder_frame *frame, + struct nv_cuda_surface *surf) +{ + bool success = true; + size_t height = enc->cy; + size_t width = enc->cx; + CUDA_MEMCPY2D m = {0}; + + m.srcMemoryType = CU_MEMORYTYPE_HOST; + m.dstMemoryType = CU_MEMORYTYPE_ARRAY; + m.dstArray = surf->tex; + m.WidthInBytes = width; + m.Height = height; + + CU_FAILED(cu->cuCtxPushCurrent(enc->cu_ctx)) + + if (enc->surface_format == NV_ENC_BUFFER_FORMAT_NV12) { + /* Page-locks the host memory so that it can be DMAd directly + * rather than CUDA doing an internal copy to page-locked + * memory before actually DMA-ing to the GPU. */ + CU_CHECK(cu->cuMemHostRegister(frame->data[0], + frame->linesize[0] * height, 0)) + CU_CHECK(cu->cuMemHostRegister( + frame->data[1], frame->linesize[1] * height / 2, 0)) + + m.srcPitch = frame->linesize[0]; + m.srcHost = frame->data[0]; + CU_FAILED(cu->cuMemcpy2D(&m)) + + m.srcPitch = frame->linesize[1]; + m.srcHost = frame->data[1]; + m.dstY += height; + m.Height /= 2; + CU_FAILED(cu->cuMemcpy2D(&m)) + } else if (enc->surface_format == NV_ENC_BUFFER_FORMAT_YUV420_10BIT) { + CU_CHECK(cu->cuMemHostRegister(frame->data[0], + frame->linesize[0] * height, 0)) + CU_CHECK(cu->cuMemHostRegister( + frame->data[1], frame->linesize[1] * height / 2, 0)) + + // P010 lines are double the size (16 bit per pixel) + m.WidthInBytes *= 2; + + m.srcPitch = frame->linesize[0]; + m.srcHost = frame->data[0]; + CU_FAILED(cu->cuMemcpy2D(&m)) + + m.srcPitch = frame->linesize[1]; + m.srcHost = frame->data[1]; + m.dstY += height; + m.Height /= 2; + CU_FAILED(cu->cuMemcpy2D(&m)) + } else { // I444 + CU_CHECK(cu->cuMemHostRegister(frame->data[0], + frame->linesize[0] * height, 0)) + CU_CHECK(cu->cuMemHostRegister(frame->data[1], + frame->linesize[1] * height, 0)) + CU_CHECK(cu->cuMemHostRegister(frame->data[2], + frame->linesize[2] * height, 0)) + + m.srcPitch = frame->linesize[0]; + m.srcHost = frame->data[0]; + CU_FAILED(cu->cuMemcpy2D(&m)) + + m.srcPitch = frame->linesize[1]; + m.srcHost = frame->data[1]; + m.dstY += height; + CU_FAILED(cu->cuMemcpy2D(&m)) + + m.srcPitch = frame->linesize[2]; + m.srcHost = frame->data[2]; + m.dstY += height; + CU_FAILED(cu->cuMemcpy2D(&m)) + } + +unmap: + if (frame->data[0]) + cu->cuMemHostUnregister(frame->data[0]); + if (frame->data[1]) + cu->cuMemHostUnregister(frame->data[1]); + if (frame->data[2]) + cu->cuMemHostUnregister(frame->data[2]); + + CU_FAILED(cu->cuCtxPopCurrent(NULL)) + + return success; +} + +bool cuda_encode(void *data, struct encoder_frame *frame, + struct encoder_packet *packet, bool *received_packet) +{ + struct nvenc_data *enc = data; + struct nv_cuda_surface *surf; + struct nv_bitstream *bs; + + bs = &enc->bitstreams.array[enc->next_bitstream]; + surf = &enc->surfaces.array[enc->next_bitstream]; + + deque_push_back(&enc->dts_list, &frame->pts, sizeof(frame->pts)); + + /* ------------------------------------ */ + /* copy to CUDA surface */ + + if (!copy_frame(enc, frame, surf)) + return false; + + /* ------------------------------------ */ + /* map output tex so nvenc can use it */ + + NV_ENC_MAP_INPUT_RESOURCE map = {NV_ENC_MAP_INPUT_RESOURCE_VER}; + map.registeredResource = surf->res; + map.mappedBufferFmt = enc->surface_format; + + if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &map))) + return false; + + surf->mapped_res = map.mappedResource; + + /* ------------------------------------ */ + /* do actual encode call */ + + return nvenc_encode_base(enc, bs, surf->mapped_res, frame->pts, packet, + received_packet); +} diff --git a/plugins/obs-nvenc/nvenc-d3d11.c b/plugins/obs-nvenc/nvenc-d3d11.c new file mode 100644 index 00000000000000..3ebe230140bced --- /dev/null +++ b/plugins/obs-nvenc/nvenc-d3d11.c @@ -0,0 +1,278 @@ +#include "nvenc-internal.h" +#include "nvenc-helpers.h" + +/* + * NVENC implementation using Direct3D 11 context and textures + */ + +/* ------------------------------------------------------------------------- */ +/* D3D11 Context/Device management */ + +static HANDLE get_lib(struct nvenc_data *enc, const char *lib) +{ + HMODULE mod = GetModuleHandleA(lib); + if (mod) + return mod; + + mod = LoadLibraryA(lib); + if (!mod) + error("Failed to load %s", lib); + return mod; +} + +typedef HRESULT(WINAPI *CREATEDXGIFACTORY1PROC)(REFIID, void **); + +bool d3d11_init(struct nvenc_data *enc, obs_data_t *settings) +{ + HMODULE dxgi = get_lib(enc, "DXGI.dll"); + HMODULE d3d11 = get_lib(enc, "D3D11.dll"); + CREATEDXGIFACTORY1PROC create_dxgi; + PFN_D3D11_CREATE_DEVICE create_device; + IDXGIFactory1 *factory; + IDXGIAdapter *adapter; + ID3D11Device *device; + ID3D11DeviceContext *context; + HRESULT hr; + + if (!dxgi || !d3d11) { + return false; + } + + create_dxgi = (CREATEDXGIFACTORY1PROC)GetProcAddress( + dxgi, "CreateDXGIFactory1"); + create_device = (PFN_D3D11_CREATE_DEVICE)GetProcAddress( + d3d11, "D3D11CreateDevice"); + + if (!create_dxgi || !create_device) { + error("Failed to load D3D11/DXGI procedures"); + return false; + } + + hr = create_dxgi(&IID_IDXGIFactory1, &factory); + if (FAILED(hr)) { + error_hr("CreateDXGIFactory1 failed"); + return false; + } + + hr = factory->lpVtbl->EnumAdapters(factory, 0, &adapter); + factory->lpVtbl->Release(factory); + if (FAILED(hr)) { + error_hr("EnumAdapters failed"); + return false; + } + + hr = create_device(adapter, D3D_DRIVER_TYPE_UNKNOWN, NULL, 0, NULL, 0, + D3D11_SDK_VERSION, &device, NULL, &context); + adapter->lpVtbl->Release(adapter); + if (FAILED(hr)) { + error_hr("D3D11CreateDevice failed"); + return false; + } + + enc->device = device; + enc->context = context; + return true; +} + +void d3d11_free(struct nvenc_data *enc) +{ + for (size_t i = 0; i < enc->input_textures.num; i++) { + ID3D11Texture2D *tex = enc->input_textures.array[i].tex; + IDXGIKeyedMutex *km = enc->input_textures.array[i].km; + tex->lpVtbl->Release(tex); + km->lpVtbl->Release(km); + } + if (enc->context) { + enc->context->lpVtbl->Release(enc->context); + } + if (enc->device) { + enc->device->lpVtbl->Release(enc->device); + } +} + +/* ------------------------------------------------------------------------- */ +/* D3D11 Surface management */ + +static bool d3d11_texture_init(struct nvenc_data *enc, struct nv_texture *nvtex) +{ + const bool p010 = obs_p010_tex_active(); + + D3D11_TEXTURE2D_DESC desc = {0}; + desc.Width = enc->cx; + desc.Height = enc->cy; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = p010 ? DXGI_FORMAT_P010 : DXGI_FORMAT_NV12; + desc.SampleDesc.Count = 1; + desc.BindFlags = D3D11_BIND_RENDER_TARGET; + + ID3D11Device *const device = enc->device; + ID3D11Texture2D *tex; + HRESULT hr = device->lpVtbl->CreateTexture2D(device, &desc, NULL, &tex); + if (FAILED(hr)) { + error_hr("Failed to create texture"); + return false; + } + + tex->lpVtbl->SetEvictionPriority(tex, DXGI_RESOURCE_PRIORITY_MAXIMUM); + + NV_ENC_REGISTER_RESOURCE res = {NV_ENC_REGISTER_RESOURCE_VER}; + res.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX; + res.resourceToRegister = tex; + res.width = enc->cx; + res.height = enc->cy; + res.bufferFormat = p010 ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT + : NV_ENC_BUFFER_FORMAT_NV12; + + if (NV_FAILED(nv.nvEncRegisterResource(enc->session, &res))) { + tex->lpVtbl->Release(tex); + return false; + } + + nvtex->res = res.registeredResource; + nvtex->tex = tex; + nvtex->mapped_res = NULL; + return true; +} + +bool d3d11_init_textures(struct nvenc_data *enc) +{ + //blog(LOG_DEBUG, "buf count: %d", enc->buf_count); + da_reserve(enc->textures, enc->buf_count); + for (uint32_t i = 0; i < enc->buf_count; i++) { + struct nv_texture texture; + if (!d3d11_texture_init(enc, &texture)) { + return false; + } + + da_push_back(enc->textures, &texture); + } + + return true; +} + +static void d3d11_texture_free(struct nvenc_data *enc, struct nv_texture *nvtex) +{ + + if (nvtex->res) { + if (nvtex->mapped_res) { + nv.nvEncUnmapInputResource(enc->session, + nvtex->mapped_res); + } + nv.nvEncUnregisterResource(enc->session, nvtex->res); + nvtex->tex->lpVtbl->Release(nvtex->tex); + } +} + +void d3d11_free_textures(struct nvenc_data *enc) +{ + for (size_t i = 0; i < enc->textures.num; i++) { + d3d11_texture_free(enc, &enc->textures.array[i]); + } +} + +/* ------------------------------------------------------------------------- */ +/* Actual encoding stuff */ + +static ID3D11Texture2D *get_tex_from_handle(struct nvenc_data *enc, + uint32_t handle, + IDXGIKeyedMutex **km_out) +{ + ID3D11Device *device = enc->device; + IDXGIKeyedMutex *km; + ID3D11Texture2D *input_tex; + HRESULT hr; + + for (size_t i = 0; i < enc->input_textures.num; i++) { + struct handle_tex *ht = &enc->input_textures.array[i]; + if (ht->handle == handle) { + *km_out = ht->km; + return ht->tex; + } + } + + hr = device->lpVtbl->OpenSharedResource(device, + (HANDLE)(uintptr_t)handle, + &IID_ID3D11Texture2D, + &input_tex); + if (FAILED(hr)) { + error_hr("OpenSharedResource failed"); + return NULL; + } + + hr = input_tex->lpVtbl->QueryInterface(input_tex, &IID_IDXGIKeyedMutex, + &km); + if (FAILED(hr)) { + error_hr("QueryInterface(IDXGIKeyedMutex) failed"); + input_tex->lpVtbl->Release(input_tex); + return NULL; + } + + input_tex->lpVtbl->SetEvictionPriority(input_tex, + DXGI_RESOURCE_PRIORITY_MAXIMUM); + + *km_out = km; + + struct handle_tex new_ht = {handle, input_tex, km}; + da_push_back(enc->input_textures, &new_ht); + return input_tex; +} + +bool d3d11_encode(void *data, struct encoder_texture *texture, int64_t pts, + uint64_t lock_key, uint64_t *next_key, + struct encoder_packet *packet, bool *received_packet) +{ + struct nvenc_data *enc = data; + ID3D11DeviceContext *context = enc->context; + ID3D11Texture2D *input_tex; + ID3D11Texture2D *output_tex; + IDXGIKeyedMutex *km; + struct nv_texture *nvtex; + struct nv_bitstream *bs; + + if (texture->handle == GS_INVALID_HANDLE) { + error("Encode failed: bad texture handle"); + *next_key = lock_key; + return false; + } + + bs = &enc->bitstreams.array[enc->next_bitstream]; + nvtex = &enc->textures.array[enc->next_bitstream]; + + input_tex = get_tex_from_handle(enc, texture->handle, &km); + output_tex = nvtex->tex; + + if (!input_tex) { + *next_key = lock_key; + return false; + } + + deque_push_back(&enc->dts_list, &pts, sizeof(pts)); + + /* ------------------------------------ */ + /* copy to output tex */ + + km->lpVtbl->AcquireSync(km, lock_key, INFINITE); + + context->lpVtbl->CopyResource(context, (ID3D11Resource *)output_tex, + (ID3D11Resource *)input_tex); + + km->lpVtbl->ReleaseSync(km, *next_key); + + /* ------------------------------------ */ + /* map output tex so nvenc can use it */ + + NV_ENC_MAP_INPUT_RESOURCE map = {NV_ENC_MAP_INPUT_RESOURCE_VER}; + map.registeredResource = nvtex->res; + if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &map))) { + return false; + } + + nvtex->mapped_res = map.mappedResource; + + /* ------------------------------------ */ + /* do actual encode call */ + + return nvenc_encode_base(enc, bs, nvtex->mapped_res, pts, packet, + received_packet); +} diff --git a/plugins/obs-ffmpeg/obs-nvenc-helpers.c b/plugins/obs-nvenc/nvenc-helpers.c similarity index 54% rename from plugins/obs-ffmpeg/obs-nvenc-helpers.c rename to plugins/obs-nvenc/nvenc-helpers.c index e5b3f369a0e03b..a07d6ff274585e 100644 --- a/plugins/obs-ffmpeg/obs-nvenc-helpers.c +++ b/plugins/obs-nvenc/nvenc-helpers.c @@ -1,20 +1,23 @@ #include "obs-nvenc.h" +#include "nvenc-helpers.h" + #include #include #include #include #include -#ifdef _WIN32 -#include -#endif - static void *nvenc_lib = NULL; -static void *cuda_lib = NULL; static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER; NV_ENCODE_API_FUNCTION_LIST nv = {NV_ENCODE_API_FUNCTION_LIST_VER}; NV_CREATE_INSTANCE_FUNC nv_create_instance = NULL; -CudaFunctions *cu = NULL; + +/* Will be populated with results from obs-nvenc-test */ +static struct encoder_caps encoder_capabilities[3]; +static bool codec_supported[3]; +static int num_devices; +static int driver_version_major; +static int driver_version_minor; #define error(format, ...) blog(LOG_ERROR, "[obs-nvenc] " format, ##__VA_ARGS__) @@ -61,19 +64,19 @@ bool nv_failed2(obs_encoder_t *encoder, void *session, NVENCSTATUS err, switch (err) { case NV_ENC_ERR_OUT_OF_MEMORY: - obs_encoder_set_last_error( - encoder, obs_module_text("NVENC.TooManySessions")); + obs_encoder_set_last_error(encoder, + obs_module_text("TooManySessions")); break; case NV_ENC_ERR_NO_ENCODE_DEVICE: case NV_ENC_ERR_UNSUPPORTED_DEVICE: obs_encoder_set_last_error( - encoder, obs_module_text("NVENC.UnsupportedDevice")); + encoder, obs_module_text("UnsupportedDevice")); break; case NV_ENC_ERR_INVALID_VERSION: - obs_encoder_set_last_error( - encoder, obs_module_text("NVENC.OutdatedDriver")); + obs_encoder_set_last_error(encoder, + obs_module_text("OutdatedDriver")); break; default: @@ -122,28 +125,9 @@ static void *load_nv_func(const char *func) return func_ptr; } -bool load_cuda_lib(void) -{ -#ifdef _WIN32 - cuda_lib = os_dlopen("nvcuda.dll"); -#else - cuda_lib = os_dlopen("libcuda.so.1"); -#endif - return cuda_lib != NULL; -} - -static void *load_cuda_func(const char *func) -{ - void *func_ptr = os_dlsym(cuda_lib, func); - if (!func_ptr) { - error("Could not load function: %s", func); - } - return func_ptr; -} - typedef NVENCSTATUS(NVENCAPI *NV_MAX_VER_FUNC)(uint32_t *); -uint32_t get_nvenc_ver(void) +static uint32_t get_nvenc_ver(void) { static NV_MAX_VER_FUNC nv_max_ver = NULL; static bool failed = false; @@ -227,11 +211,9 @@ static inline bool init_nvenc_internal(obs_encoder_t *encoder) return false; } - uint32_t supported_ver = (NVENC_COMPAT_MAJOR_VER << 4) | - NVENC_COMPAT_MINOR_VER; - if (supported_ver > ver) { - obs_encoder_set_last_error( - encoder, obs_module_text("NVENC.OutdatedDriver")); + if (ver < NVCODEC_CONFIGURED_VERSION) { + obs_encoder_set_last_error(encoder, + obs_module_text("OutdatedDriver")); error("Current driver version does not support this NVENC " "version, please upgrade your driver"); @@ -255,95 +237,6 @@ static inline bool init_nvenc_internal(obs_encoder_t *encoder) return true; } -typedef struct cuda_function { - ptrdiff_t offset; - const char *name; -} cuda_function; - -static const cuda_function cuda_functions[] = { - {offsetof(CudaFunctions, cuInit), "cuInit"}, - - {offsetof(CudaFunctions, cuDeviceGetCount), "cuDeviceGetCount"}, - {offsetof(CudaFunctions, cuDeviceGet), "cuDeviceGet"}, - {offsetof(CudaFunctions, cuDeviceGetAttribute), "cuDeviceGetAttribute"}, - - {offsetof(CudaFunctions, cuCtxCreate), "cuCtxCreate_v2"}, - {offsetof(CudaFunctions, cuCtxDestroy), "cuCtxDestroy_v2"}, - {offsetof(CudaFunctions, cuCtxPushCurrent), "cuCtxPushCurrent_v2"}, - {offsetof(CudaFunctions, cuCtxPopCurrent), "cuCtxPopCurrent_v2"}, - - {offsetof(CudaFunctions, cuArray3DCreate), "cuArray3DCreate_v2"}, - {offsetof(CudaFunctions, cuArrayDestroy), "cuArrayDestroy"}, - {offsetof(CudaFunctions, cuMemcpy2D), "cuMemcpy2D_v2"}, - - {offsetof(CudaFunctions, cuGetErrorName), "cuGetErrorName"}, - {offsetof(CudaFunctions, cuGetErrorString), "cuGetErrorString"}, - - {offsetof(CudaFunctions, cuMemHostRegister), "cuMemHostRegister_v2"}, - {offsetof(CudaFunctions, cuMemHostUnregister), "cuMemHostUnregister"}, - -#ifndef _WIN32 - {offsetof(CudaFunctions, cuGLGetDevices), "cuGLGetDevices_v2"}, - {offsetof(CudaFunctions, cuGraphicsGLRegisterImage), - "cuGraphicsGLRegisterImage"}, - {offsetof(CudaFunctions, cuGraphicsUnregisterResource), - "cuGraphicsUnregisterResource"}, - {offsetof(CudaFunctions, cuGraphicsMapResources), - "cuGraphicsMapResources"}, - {offsetof(CudaFunctions, cuGraphicsUnmapResources), - "cuGraphicsUnmapResources"}, - {offsetof(CudaFunctions, cuGraphicsSubResourceGetMappedArray), - "cuGraphicsSubResourceGetMappedArray"}, -#endif -}; - -static const size_t num_cuda_funcs = - sizeof(cuda_functions) / sizeof(cuda_function); - -static bool init_cuda_internal(obs_encoder_t *encoder) -{ - static bool initialized = false; - static bool success = false; - - if (initialized) - return success; - initialized = true; - - if (!load_cuda_lib()) { - obs_encoder_set_last_error(encoder, - "Loading CUDA library failed."); - return false; - } - - cu = bzalloc(sizeof(CudaFunctions)); - - for (size_t idx = 0; idx < num_cuda_funcs; idx++) { - const cuda_function func = cuda_functions[idx]; - void *fptr = load_cuda_func(func.name); - - if (!fptr) { - error("Failed to find CUDA function: %s", func.name); - obs_encoder_set_last_error( - encoder, "Loading CUDA functions failed."); - return false; - } - - *(uintptr_t *)((uintptr_t)cu + func.offset) = (uintptr_t)fptr; - } - - success = true; - return true; -} - -bool cuda_get_error_desc(CUresult res, const char **name, const char **desc) -{ - if (cu->cuGetErrorName(res, name) != CUDA_SUCCESS || - cu->cuGetErrorString(res, desc) != CUDA_SUCCESS) - return false; - - return true; -} - bool init_nvenc(obs_encoder_t *encoder) { bool success; @@ -355,53 +248,71 @@ bool init_nvenc(obs_encoder_t *encoder) return success; } -bool init_cuda(obs_encoder_t *encoder) +struct encoder_caps *get_encoder_caps(enum codec_type codec) { - bool success; - - pthread_mutex_lock(&init_mutex); - success = init_cuda_internal(encoder); - pthread_mutex_unlock(&init_mutex); + struct encoder_caps *caps = &encoder_capabilities[codec]; + return caps; +} - return success; +int num_encoder_devices(void) +{ + return num_devices; } -extern struct obs_encoder_info h264_nvenc_info; -#ifdef ENABLE_HEVC -extern struct obs_encoder_info hevc_nvenc_info; -#endif -extern struct obs_encoder_info av1_nvenc_info; +bool is_codec_supported(enum codec_type codec) +{ + return codec_supported[codec]; +} -extern struct obs_encoder_info h264_nvenc_soft_info; -#ifdef ENABLE_HEVC -extern struct obs_encoder_info hevc_nvenc_soft_info; -#endif -extern struct obs_encoder_info av1_nvenc_soft_info; +bool has_broken_split_encoding(void) +{ + /* CBR padding and tearing artifacts with split encoding are fixed in + * driver versions 555+, previous ones should be considered broken. */ + return driver_version_major < 555; +} -#ifdef _WIN32 -static bool enum_luids(void *param, uint32_t idx, uint64_t luid) +static void read_codec_caps(config_t *config, enum codec_type codec, + const char *section) { - struct dstr *cmd = param; - dstr_catf(cmd, " %llX", luid); - UNUSED_PARAMETER(idx); - return true; + struct encoder_caps *caps = &encoder_capabilities[codec]; + + codec_supported[codec] = + config_get_bool(config, section, "codec_supported"); + if (!codec_supported[codec]) + return; + + caps->bframes = (int)config_get_int(config, section, "bframes"); + caps->bref_modes = (int)config_get_int(config, section, "bref"); + caps->engines = (int)config_get_int(config, section, "engines"); + caps->max_width = (int)config_get_int(config, section, "max_width"); + caps->max_height = (int)config_get_int(config, section, "max_height"); + caps->temporal_filter = + (int)config_get_int(config, section, "temporal_filter"); + caps->lookahead_level = + (int)config_get_int(config, section, "lookahead_level"); + + caps->dyn_bitrate = config_get_bool(config, section, "dynamic_bitrate"); + caps->lookahead = config_get_bool(config, section, "lookahead"); + caps->lossless = config_get_bool(config, section, "lossless"); + caps->temporal_aq = config_get_bool(config, section, "temporal_aq"); + caps->ten_bit = config_get_bool(config, section, "10bit"); + caps->four_four_four = config_get_bool(config, section, "yuv_444"); } -static bool av1_supported(void) +static bool nvenc_check(void) { +#ifdef _WIN32 char *test_exe = os_get_executable_path_ptr("obs-nvenc-test.exe"); - struct dstr cmd = {0}; +#else + char *test_exe = os_get_executable_path_ptr("obs-nvenc-test"); +#endif + os_process_args_t *args; struct dstr caps_str = {0}; - bool av1_supported = false; config_t *config = NULL; - dstr_init_move_array(&cmd, test_exe); - dstr_insert_ch(&cmd, 0, '\"'); - dstr_cat(&cmd, "\""); + args = os_process_args_create(test_exe); - enum_graphics_device_luids(enum_luids, &cmd); - - os_process_pipe_t *pp = os_process_pipe_create(cmd.array, "r"); + os_process_pipe_t *pp = os_process_pipe_create2(args, "r"); if (!pp) { blog(LOG_WARNING, "[NVENC] Failed to launch the NVENC " "test process I guess"); @@ -423,8 +334,7 @@ static bool av1_supported(void) if (dstr_is_empty(&caps_str)) { blog(LOG_WARNING, "[NVENC] Seems the NVENC test subprocess crashed. " - "Better there than here I guess. Let's just " - "skip NVENC AV1 detection then I suppose."); + "Better there than here I guess. "); goto fail; } @@ -433,57 +343,67 @@ static bool av1_supported(void) goto fail; } - const char *error = config_get_string(config, "error", "string"); - if (error) { - blog(LOG_WARNING, "[NVENC] AV1 test process failed: %s", error); + bool success = config_get_bool(config, "general", "nvenc_supported"); + if (!success) { + const char *error = + config_get_string(config, "general", "reason"); + blog(LOG_WARNING, "[NVENC] Test process failed: %s", + error ? error : "unknown"); goto fail; } - uint32_t adapter_count = (uint32_t)config_num_sections(config); - bool avc_supported = false; - bool hevc_supported = false; - - /* for now, just check AV1 support on device 0 */ - av1_supported = config_get_bool(config, "0", "supports_av1"); + num_devices = (int)config_get_int(config, "general", "nvenc_devices"); + read_codec_caps(config, CODEC_H264, "h264"); + read_codec_caps(config, CODEC_HEVC, "hevc"); + read_codec_caps(config, CODEC_AV1, "av1"); + + const char *nvenc_ver = + config_get_string(config, "general", "nvenc_ver"); + const char *cuda_ver = config_get_string(config, "general", "cuda_ver"); + const char *driver_ver = + config_get_string(config, "general", "driver_ver"); + /* Parse out major/minor for some brokenness checks */ + sscanf(driver_ver, "%d.%d", &driver_version_major, + &driver_version_minor); + + blog(LOG_INFO, + "[obs-nvenc] NVENC version: %d.%d (compiled) / %s (driver), " + "CUDA driver version: %s, AV1 supported: %s", + NVCODEC_CONFIGURED_VERSION >> 4, NVCODEC_CONFIGURED_VERSION & 0xf, + nvenc_ver, cuda_ver, + codec_supported[CODEC_AV1] ? "true" : "false"); fail: if (config) config_close(config); + + bfree(test_exe); dstr_free(&caps_str); - dstr_free(&cmd); + os_process_args_destroy(args); - return av1_supported; + return true; } -#else -bool av1_supported() + +static const char *nvenc_check_name = "nvenc_check"; +bool nvenc_supported(void) { - return get_nvenc_ver() >= ((12 << 4) | 0); + bool success; + + profile_start(nvenc_check_name); + success = load_nvenc_lib() && nvenc_check(); + profile_end(nvenc_check_name); + + return success; } -#endif -void obs_nvenc_load(bool h264, bool hevc, bool av1) +void obs_nvenc_load(void) { pthread_mutex_init(&init_mutex, NULL); - if (h264) { - obs_register_encoder(&h264_nvenc_info); - obs_register_encoder(&h264_nvenc_soft_info); - } -#ifdef ENABLE_HEVC - if (hevc) { - obs_register_encoder(&hevc_nvenc_info); - obs_register_encoder(&hevc_nvenc_soft_info); - } -#endif - if (av1 && av1_supported()) { - obs_register_encoder(&av1_nvenc_info); - obs_register_encoder(&av1_nvenc_soft_info); - } else { - blog(LOG_WARNING, "[NVENC] AV1 is not supported"); - } + register_encoders(); + register_compat_encoders(); } void obs_nvenc_unload(void) { - bfree(cu); pthread_mutex_destroy(&init_mutex); } diff --git a/plugins/obs-nvenc/nvenc-helpers.h b/plugins/obs-nvenc/nvenc-helpers.h new file mode 100644 index 00000000000000..ab3cd3bca969c8 --- /dev/null +++ b/plugins/obs-nvenc/nvenc-helpers.h @@ -0,0 +1,89 @@ +#pragma once + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#endif + +#include +#include + +#define NVCODEC_CONFIGURED_VERSION \ + ((NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION) + +#if NVENCAPI_MAJOR_VERSION > 12 || NVENCAPI_MINOR_VERSION >= 1 +#define NVENC_12_1_OR_LATER +#endif + +#if NVENCAPI_MAJOR_VERSION > 12 || NVENCAPI_MINOR_VERSION >= 2 +#define NVENC_12_2_OR_LATER +#endif + +enum codec_type { + CODEC_H264, + CODEC_HEVC, + CODEC_AV1, +}; + +static const char *get_codec_name(enum codec_type type) +{ + switch (type) { + case CODEC_H264: + return "H264"; + case CODEC_HEVC: + return "HEVC"; + case CODEC_AV1: + return "AV1"; + } + + return "Unknown"; +} + +struct encoder_caps { + int bframes; + int bref_modes; + int engines; + + int max_width; + int max_height; + + /* These don't seem to work correctly, thanks NVIDIA. */ + int temporal_filter; + int lookahead_level; + + bool dyn_bitrate; + bool lookahead; + bool lossless; + bool temporal_aq; + + /* Yeah... */ + bool ten_bit; + bool four_four_four; +}; + +typedef NVENCSTATUS(NVENCAPI *NV_CREATE_INSTANCE_FUNC)( + NV_ENCODE_API_FUNCTION_LIST *); + +extern NV_ENCODE_API_FUNCTION_LIST nv; +extern NV_CREATE_INSTANCE_FUNC nv_create_instance; + +const char *nv_error_name(NVENCSTATUS err); + +bool init_nvenc(obs_encoder_t *encoder); +bool nv_fail2(obs_encoder_t *encoder, void *session, const char *format, ...); +bool nv_failed2(obs_encoder_t *encoder, void *session, NVENCSTATUS err, + const char *func, const char *call); + +struct encoder_caps *get_encoder_caps(enum codec_type codec); +int num_encoder_devices(void); +bool is_codec_supported(enum codec_type codec); +bool has_broken_split_encoding(void); + +void register_encoders(void); +void register_compat_encoders(void); + +#define nv_fail(encoder, format, ...) \ + nv_fail2(encoder, enc->session, format, ##__VA_ARGS__) + +#define nv_failed(encoder, err, func, call) \ + nv_failed2(encoder, enc->session, err, func, call) diff --git a/plugins/obs-nvenc/nvenc-internal.h b/plugins/obs-nvenc/nvenc-internal.h new file mode 100644 index 00000000000000..efaaef9d199d61 --- /dev/null +++ b/plugins/obs-nvenc/nvenc-internal.h @@ -0,0 +1,211 @@ +#pragma once + +#include "cuda-helpers.h" +#include "nvenc-helpers.h" + +#include +#include + +#ifdef _WIN32 +#define INITGUID +#include +#include +#include +#else +#include +#endif + +#define do_log(level, format, ...) \ + blog(level, "[obs-nvenc: '%s'] " format, \ + obs_encoder_get_name(enc->encoder), ##__VA_ARGS__) + +#define error(format, ...) do_log(LOG_ERROR, format, ##__VA_ARGS__) +#define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__) +#define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__) +#define debug(format, ...) do_log(LOG_DEBUG, format, ##__VA_ARGS__) + +#define error_hr(msg) error("%s: %s: 0x%08lX", __FUNCTION__, msg, (uint32_t)hr); + +#define NV_FAIL(format, ...) nv_fail(enc->encoder, format, ##__VA_ARGS__) +#define NV_FAILED(x) nv_failed(enc->encoder, x, __FUNCTION__, #x) + +/* ------------------------------------------------------------------------- */ +/* Main Implementation Structure */ + +struct nvenc_properties { + int64_t bitrate; + int64_t max_bitrate; + int64_t keyint_sec; + int64_t cqp; + int64_t device; + int64_t bf; + int64_t bframe_ref_mode; + int64_t split_encode; + int64_t target_quality; + + const char *rate_control; + const char *preset; + const char *profile; + const char *tune; + const char *multipass; + const char *opts_str; + + bool adaptive_quantization; + bool lookahead; + bool disable_scenecut; + bool repeat_headers; + bool force_cuda_tex; + + struct obs_options opts; + obs_data_t *data; +}; + +struct nvenc_data { + obs_encoder_t *encoder; + enum codec_type codec; + GUID codec_guid; + + void *session; + NV_ENC_INITIALIZE_PARAMS params; + NV_ENC_CONFIG config; + uint32_t buf_count; + int output_delay; + int buffers_queued; + size_t next_bitstream; + size_t cur_bitstream; + bool encode_started; + bool first_packet; + bool can_change_bitrate; + bool non_texture; + + DARRAY(struct handle_tex) input_textures; + DARRAY(struct nv_bitstream) bitstreams; + DARRAY(struct nv_cuda_surface) surfaces; + NV_ENC_BUFFER_FORMAT surface_format; + struct deque dts_list; + + DARRAY(uint8_t) packet_data; + int64_t packet_pts; + bool packet_keyframe; + +#ifdef _WIN32 + DARRAY(struct nv_texture) textures; + ID3D11Device *device; + ID3D11DeviceContext *context; +#endif + + uint32_t cx; + uint32_t cy; + enum video_format in_format; + + uint8_t *header; + size_t header_size; + + uint8_t *sei; + size_t sei_size; + + int8_t *roi_map; + size_t roi_map_size; + uint32_t roi_increment; + + struct nvenc_properties props; + + CUcontext cu_ctx; +}; + +/* ------------------------------------------------------------------------- */ +/* Resource data structures */ + +/* Input texture handles */ +struct handle_tex { +#ifdef _WIN32 + uint32_t handle; + ID3D11Texture2D *tex; + IDXGIKeyedMutex *km; +#else + GLuint tex_id; + /* CUDA mappings */ + CUgraphicsResource res_y; + CUgraphicsResource res_uv; +#endif +}; + +/* Bitstream buffer */ +struct nv_bitstream { + void *ptr; +}; + +/** Mapped resources **/ +/* CUDA Arrays */ +struct nv_cuda_surface { + CUarray tex; + NV_ENC_REGISTERED_PTR res; + NV_ENC_INPUT_PTR *mapped_res; +}; + +#ifdef _WIN32 +/* DX11 textures */ +struct nv_texture { + void *res; + ID3D11Texture2D *tex; + void *mapped_res; +}; +#endif + +/* ------------------------------------------------------------------------- */ +/* Shared functions */ + +bool nvenc_encode_base(struct nvenc_data *enc, struct nv_bitstream *bs, + void *pic, int64_t pts, struct encoder_packet *packet, + bool *received_packet); + +/* ------------------------------------------------------------------------- */ +/* Backend-specific functions */ + +#ifdef _WIN32 +/** D3D11 **/ +bool d3d11_init(struct nvenc_data *enc, obs_data_t *settings); +void d3d11_free(struct nvenc_data *enc); + +bool d3d11_init_textures(struct nvenc_data *enc); +void d3d11_free_textures(struct nvenc_data *enc); + +bool d3d11_encode(void *data, struct encoder_texture *texture, int64_t pts, + uint64_t lock_key, uint64_t *next_key, + struct encoder_packet *packet, bool *received_packet); +#endif + +/** CUDA **/ +bool cuda_ctx_init(struct nvenc_data *enc, obs_data_t *settings, bool texture); +void cuda_ctx_free(struct nvenc_data *enc); + +bool cuda_init_surfaces(struct nvenc_data *enc); +void cuda_free_surfaces(struct nvenc_data *enc); + +bool cuda_encode(void *data, struct encoder_frame *frame, + struct encoder_packet *packet, bool *received_packet); + +#ifndef _WIN32 +/** CUDA OpenGL **/ +void cuda_opengl_free(struct nvenc_data *enc); +bool cuda_opengl_encode(void *data, struct encoder_texture *tex, int64_t pts, + uint64_t lock_key, uint64_t *next_key, + struct encoder_packet *packet, bool *received_packet); +#endif + +/* ------------------------------------------------------------------------- */ +/* Properties crap */ + +void nvenc_properties_read(struct nvenc_properties *enc, obs_data_t *settings); + +void h264_nvenc_defaults(obs_data_t *settings); +void hevc_nvenc_defaults(obs_data_t *settings); +void av1_nvenc_defaults(obs_data_t *settings); + +obs_properties_t *h264_nvenc_properties(void *); +obs_properties_t *hevc_nvenc_properties(void *); +obs_properties_t *av1_nvenc_properties(void *); + +/* Custom argument parsing */ +void apply_user_args(struct nvenc_data *enc); +bool get_user_arg_int(struct nvenc_data *enc, const char *name, int *val); diff --git a/plugins/obs-nvenc/nvenc-opengl.c b/plugins/obs-nvenc/nvenc-opengl.c new file mode 100644 index 00000000000000..c8b10d3f3e75f9 --- /dev/null +++ b/plugins/obs-nvenc/nvenc-opengl.c @@ -0,0 +1,158 @@ +#include "nvenc-internal.h" +#include "nvenc-helpers.h" + +/* + * NVENC implementation using CUDA context and OpenGL textures + */ + +void cuda_opengl_free(struct nvenc_data *enc) +{ + if (!enc->cu_ctx) + return; + + cu->cuCtxPushCurrent(enc->cu_ctx); + for (size_t i = 0; i < enc->input_textures.num; i++) { + CUgraphicsResource res_y = enc->input_textures.array[i].res_y; + CUgraphicsResource res_uv = enc->input_textures.array[i].res_uv; + cu->cuGraphicsUnregisterResource(res_y); + cu->cuGraphicsUnregisterResource(res_uv); + } + cu->cuCtxPopCurrent(NULL); +} + +/* ------------------------------------------------------------------------- */ +/* Actual encoding stuff */ + +static inline bool get_res_for_tex_ids(struct nvenc_data *enc, GLuint tex_id_y, + GLuint tex_id_uv, + CUgraphicsResource *tex_y, + CUgraphicsResource *tex_uv) +{ + bool success = true; + + for (size_t idx = 0; idx < enc->input_textures.num; idx++) { + struct handle_tex *ht = &enc->input_textures.array[idx]; + if (ht->tex_id != tex_id_y) + continue; + + *tex_y = ht->res_y; + *tex_uv = ht->res_uv; + return success; + } + + CU_CHECK(cu->cuGraphicsGLRegisterImage( + tex_y, tex_id_y, GL_TEXTURE_2D, + CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY)) + CU_CHECK(cu->cuGraphicsGLRegisterImage( + tex_uv, tex_id_uv, GL_TEXTURE_2D, + CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY)) + + struct handle_tex ht = {tex_id_y, *tex_y, *tex_uv}; + da_push_back(enc->input_textures, &ht); + +unmap: + if (!success) { + cu->cuGraphicsUnregisterResource(*tex_y); + cu->cuGraphicsUnregisterResource(*tex_uv); + } + + return success; +} + +static inline bool copy_tex_data(struct nvenc_data *enc, const bool p010, + GLuint tex[2], struct nv_cuda_surface *surf) +{ + bool success = true; + CUgraphicsResource mapped_tex[2] = {0}; + CUarray mapped_cuda; + + if (!get_res_for_tex_ids(enc, tex[0], tex[1], &mapped_tex[0], + &mapped_tex[1])) + return false; + + CU_CHECK(cu->cuGraphicsMapResources(2, mapped_tex, 0)) + + CUDA_MEMCPY2D m = {0}; + m.dstMemoryType = CU_MEMORYTYPE_ARRAY; + m.srcMemoryType = CU_MEMORYTYPE_ARRAY; + m.dstArray = surf->tex; + m.WidthInBytes = p010 ? enc->cx * 2 : enc->cx; + m.Height = enc->cy; + + // Map and copy Y texture + CU_CHECK(cu->cuGraphicsSubResourceGetMappedArray(&mapped_cuda, + mapped_tex[0], 0, 0)); + m.srcArray = mapped_cuda; + CU_CHECK(cu->cuMemcpy2D(&m)) + + // Map and copy UV texture + CU_CHECK(cu->cuGraphicsSubResourceGetMappedArray(&mapped_cuda, + mapped_tex[1], 0, 0)) + m.srcArray = mapped_cuda; + m.dstY += enc->cy; + m.Height = enc->cy / 2; + + CU_CHECK(cu->cuMemcpy2D(&m)) + +unmap: + cu->cuGraphicsUnmapResources(2, mapped_tex, 0); + + return success; +} + +bool cuda_opengl_encode(void *data, struct encoder_texture *tex, int64_t pts, + uint64_t lock_key, uint64_t *next_key, + struct encoder_packet *packet, bool *received_packet) +{ + struct nvenc_data *enc = data; + struct nv_cuda_surface *surf; + struct nv_bitstream *bs; + const bool p010 = obs_p010_tex_active(); + GLuint input_tex[2]; + + if (tex == NULL || tex->tex[0] == NULL) { + error("Encode failed: bad texture handle"); + *next_key = lock_key; + return false; + } + + bs = &enc->bitstreams.array[enc->next_bitstream]; + surf = &enc->surfaces.array[enc->next_bitstream]; + + deque_push_back(&enc->dts_list, &pts, sizeof(pts)); + + /* ------------------------------------ */ + /* copy to CUDA data */ + + CU_FAILED(cu->cuCtxPushCurrent(enc->cu_ctx)) + obs_enter_graphics(); + input_tex[0] = *(GLuint *)gs_texture_get_obj(tex->tex[0]); + input_tex[1] = *(GLuint *)gs_texture_get_obj(tex->tex[1]); + + bool success = copy_tex_data(enc, p010, input_tex, surf); + + obs_leave_graphics(); + CU_FAILED(cu->cuCtxPopCurrent(NULL)) + + if (!success) + return false; + + /* ------------------------------------ */ + /* map output tex so nvenc can use it */ + + NV_ENC_MAP_INPUT_RESOURCE map = {NV_ENC_MAP_INPUT_RESOURCE_VER}; + map.registeredResource = surf->res; + map.mappedBufferFmt = p010 ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT + : NV_ENC_BUFFER_FORMAT_NV12; + + if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &map))) + return false; + + surf->mapped_res = map.mappedResource; + + /* ------------------------------------ */ + /* do actual encode call */ + + return nvenc_encode_base(enc, bs, surf->mapped_res, pts, packet, + received_packet); +} diff --git a/plugins/obs-nvenc/nvenc-opts-parser.c b/plugins/obs-nvenc/nvenc-opts-parser.c new file mode 100644 index 00000000000000..ae43bcd14f46f6 --- /dev/null +++ b/plugins/obs-nvenc/nvenc-opts-parser.c @@ -0,0 +1,220 @@ +#include "nvenc-internal.h" + +#include + +/* NVIDIA uses bitfields for a variety of options. As it is not possible to + * use offsetof() or similar with those we resort to macros here to avoid too + * much boilerplate. */ + +#define APPLY_BIT_OPT(opt_name, bits) \ + if (strcmp(opt->name, #opt_name) == 0) { \ + uint32_t old_val = nv_conf->opt_name; \ + nv_conf->opt_name = strtol(opt->value, NULL, 10); \ + blog(LOG_DEBUG, "[obs-nvenc] Changing: \"%s\": %u -> %u", \ + #opt_name, old_val, nv_conf->opt_name); \ + return true; \ + } + +#define APPLY_INT_OPT(opt_name, type) \ + if (strcmp(opt->name, #opt_name) == 0) { \ + blog(LOG_DEBUG, "[obs-nvenc] Changing \"%s\": %d -> %s (%s)", \ + #opt_name, nv_conf->opt_name, opt->value, #type); \ + nv_conf->opt_name = (type)strtol(opt->value, NULL, 10); \ + return true; \ + } + +static void parse_qp_opt(const char *name, const char *val, NV_ENC_QP *qp_opt) +{ + /* QP options can be passed in either as a single value to apply to all + * or as three values separated by ":". */ + int32_t p, b, i; + + if (sscanf(val, "%d:%d:%d", &p, &b, &i) != 3) { + p = b = i = atoi(val); + } + + blog(LOG_DEBUG, + "[obs-nvenc] Applying custom %s = %d / %d / %d (P / B / I)", name, + p, b, i); + + /* Values should be treated as int32_t but are passed in as uint32_t + * for legacy reasons, see comment in nvEncodeAPI.h */ + qp_opt->qpInterP = (uint32_t)p; + qp_opt->qpInterB = (uint32_t)b; + qp_opt->qpIntra = (uint32_t)i; +} + +#define APPLY_QP_OPT(opt_name) \ + if (strcmp(opt->name, #opt_name) == 0) { \ + parse_qp_opt(#opt_name, opt->value, &nv_conf->opt_name); \ + return true; \ + } + +static bool apply_rc_opt(const struct obs_option *opt, + NV_ENC_RC_PARAMS *nv_conf) +{ + APPLY_QP_OPT(constQP) + APPLY_QP_OPT(minQP) + APPLY_QP_OPT(maxQP) + APPLY_QP_OPT(initialRCQP) + + APPLY_INT_OPT(averageBitRate, uint32_t) + APPLY_INT_OPT(maxBitRate, uint32_t) + APPLY_INT_OPT(vbvBufferSize, uint32_t) + APPLY_INT_OPT(vbvInitialDelay, uint32_t) + + APPLY_INT_OPT(targetQuality, uint8_t) + APPLY_INT_OPT(targetQualityLSB, uint8_t) + + APPLY_INT_OPT(cbQPIndexOffset, int8_t) + APPLY_INT_OPT(crQPIndexOffset, int8_t) + + APPLY_BIT_OPT(enableMinQP, 1) + APPLY_BIT_OPT(enableMaxQP, 1) + APPLY_BIT_OPT(enableInitialRCQP, 1) + APPLY_BIT_OPT(enableAQ, 1) + APPLY_BIT_OPT(enableLookahead, 1) + APPLY_BIT_OPT(disableIadapt, 1) + APPLY_BIT_OPT(disableBadapt, 1) + APPLY_BIT_OPT(enableTemporalAQ, 1) + APPLY_BIT_OPT(aqStrength, 4) + +#ifdef NVENC_12_2_OR_LATER + APPLY_INT_OPT(lookaheadLevel, NV_ENC_LOOKAHEAD_LEVEL) +#endif + + /* Macros above will return true if succesfully evaluated. + * Otherwise, return false if option unknown/unsupported. */ + return false; +} + +static bool apply_conf_opt(const struct obs_option *opt, NV_ENC_CONFIG *nv_conf) +{ + APPLY_INT_OPT(gopLength, uint32_t) + APPLY_INT_OPT(frameIntervalP, int32_t) + + return false; +} + +static void parse_level_opt(const char *val, uint32_t *level, bool hevc) +{ + /* Support for passing level both as raw value (e.g. "42") + * and human-readable format (e.g. "4.2"). */ + uint32_t int_val = 0; + + if (strstr(val, ".") != NULL) { + uint32_t high_val, low_val; + if (sscanf(val, "%u.%u", &high_val, &low_val) == 2) { + int_val = high_val * 10 + low_val; + } + } else { + int_val = strtol(val, NULL, 10); + } + + if (!int_val) + return; + + if (hevc) + int_val *= 3; + + blog(LOG_DEBUG, "[obs-nvenc] Applying custom level = %s (%u)", val, + int_val); + *level = int_val; +} + +static bool apply_h264_opt(struct obs_option *opt, NV_ENC_CONFIG_H264 *nv_conf) +{ + if (strcmp(opt->name, "level") == 0) { + parse_level_opt(opt->value, &nv_conf->level, false); + return true; + } + + APPLY_INT_OPT(idrPeriod, uint32_t) + APPLY_INT_OPT(useBFramesAsRef, NV_ENC_BFRAME_REF_MODE) + + APPLY_BIT_OPT(enableFillerDataInsertion, 1) + + return false; +} + +static bool apply_hevc_opt(struct obs_option *opt, NV_ENC_CONFIG_HEVC *nv_conf) +{ + if (strcmp(opt->name, "level") == 0) { + parse_level_opt(opt->value, &nv_conf->level, true); + return true; + } + + APPLY_INT_OPT(tier, uint32_t) + APPLY_INT_OPT(idrPeriod, uint32_t) + APPLY_INT_OPT(useBFramesAsRef, NV_ENC_BFRAME_REF_MODE) +#ifdef NVENC_12_2_OR_LATER + APPLY_INT_OPT(tfLevel, NV_ENC_TEMPORAL_FILTER_LEVEL) +#endif + + APPLY_BIT_OPT(enableFillerDataInsertion, 1) + + return false; +} + +static bool apply_av1_opt(struct obs_option *opt, NV_ENC_CONFIG_AV1 *nv_conf) +{ + APPLY_INT_OPT(level, uint32_t) + APPLY_INT_OPT(tier, uint32_t) + APPLY_INT_OPT(numTileColumns, uint32_t) + APPLY_INT_OPT(numTileRows, uint32_t) + APPLY_INT_OPT(idrPeriod, uint32_t) + APPLY_INT_OPT(useBFramesAsRef, NV_ENC_BFRAME_REF_MODE) + + APPLY_BIT_OPT(enableBitstreamPadding, 1) + + return false; +} + +static bool apply_codec_opt(enum codec_type codec, struct obs_option *opt, + NV_ENC_CODEC_CONFIG *enc_config) +{ + if (codec == CODEC_H264) + return apply_h264_opt(opt, &enc_config->h264Config); + if (codec == CODEC_HEVC) + return apply_hevc_opt(opt, &enc_config->hevcConfig); + if (codec == CODEC_AV1) + return apply_av1_opt(opt, &enc_config->av1Config); + + return false; +} + +void apply_user_args(struct nvenc_data *enc) +{ + for (size_t idx = 0; idx < enc->props.opts.count; idx++) { + struct obs_option *opt = &enc->props.opts.options[idx]; + + /* Special options handled elsewhere */ + if (strcmp(opt->name, "lookaheadDepth") == 0 || + strcmp(opt->name, "keyint") == 0) + continue; + + if (apply_rc_opt(opt, &enc->config.rcParams)) + continue; + if (apply_conf_opt(opt, &enc->config)) + continue; + if (apply_codec_opt(enc->codec, opt, + &enc->config.encodeCodecConfig)) + continue; + + warn("Unknown custom option: \"%s\"", opt->name); + } +} + +bool get_user_arg_int(struct nvenc_data *enc, const char *name, int *val) +{ + for (size_t idx = 0; idx < enc->props.opts.count; idx++) { + struct obs_option *opt = &enc->props.opts.options[idx]; + if (strcmp(opt->name, name) != 0) + continue; + + *val = strtol(opt->value, NULL, 10); + return true; + } + + return false; +} diff --git a/plugins/obs-nvenc/nvenc-properties.c b/plugins/obs-nvenc/nvenc-properties.c new file mode 100644 index 00000000000000..ff67cdba13cba9 --- /dev/null +++ b/plugins/obs-nvenc/nvenc-properties.c @@ -0,0 +1,324 @@ +#include "nvenc-internal.h" + +void nvenc_properties_read(struct nvenc_properties *props, obs_data_t *settings) +{ + props->bitrate = obs_data_get_int(settings, "bitrate"); + props->max_bitrate = obs_data_get_int(settings, "max_bitrate"); + props->keyint_sec = obs_data_get_int(settings, "keyint_sec"); + props->cqp = obs_data_get_int(settings, "cqp"); + props->device = obs_data_get_int(settings, "device"); + props->bf = obs_data_get_int(settings, "bf"); + props->bframe_ref_mode = obs_data_get_int(settings, "bframe_ref_mode"); + props->split_encode = obs_data_get_int(settings, "split_encode"); + props->target_quality = obs_data_get_int(settings, "target_quality"); + + props->rate_control = obs_data_get_string(settings, "rate_control"); + props->preset = obs_data_get_string(settings, "preset"); + props->profile = obs_data_get_string(settings, "profile"); + props->tune = obs_data_get_string(settings, "tune"); + props->multipass = obs_data_get_string(settings, "multipass"); + + props->adaptive_quantization = + obs_data_get_bool(settings, "adaptive_quantization"); + props->lookahead = obs_data_get_bool(settings, "lookahead"); + props->disable_scenecut = + obs_data_get_bool(settings, "disable_scenecut"); + props->repeat_headers = obs_data_get_bool(settings, "repeat_headers"); + props->force_cuda_tex = obs_data_get_bool(settings, "force_cuda_tex"); + + if (obs_data_has_user_value(settings, "opts")) { + props->opts_str = obs_data_get_string(settings, "opts"); + props->opts = obs_parse_options(props->opts_str); + } + + /* Retain settings object until destroyed since we use its strings. */ + obs_data_addref(settings); + props->data = settings; +} + +static void nvenc_defaults_base(enum codec_type codec, obs_data_t *settings) +{ + struct encoder_caps *caps = get_encoder_caps(codec); + + obs_data_set_default_int(settings, "bitrate", 10000); + obs_data_set_default_int(settings, "max_bitrate", 10000); + obs_data_set_default_int(settings, "target_quality", 20); + obs_data_set_default_int(settings, "keyint_sec", 0); + obs_data_set_default_int(settings, "cqp", 20); + obs_data_set_default_int(settings, "device", -1); + obs_data_set_default_int(settings, "bf", caps->bframes > 0 ? 2 : 0); + + obs_data_set_default_string(settings, "rate_control", "cbr"); + obs_data_set_default_string(settings, "preset", "p5"); + obs_data_set_default_string(settings, "multipass", "qres"); + obs_data_set_default_string(settings, "tune", "hq"); + obs_data_set_default_string(settings, "profile", + codec != CODEC_H264 ? "main" : "high"); + + obs_data_set_default_bool(settings, "adaptive_quantization", true); + obs_data_set_default_bool(settings, "lookahead", caps->lookahead); + + /* Hidden options */ + obs_data_set_default_bool(settings, "repeat_headers", false); + obs_data_set_default_bool(settings, "force_cuda_tex", false); + obs_data_set_default_bool(settings, "disable_scenecut", false); +} + +void h264_nvenc_defaults(obs_data_t *settings) +{ + nvenc_defaults_base(CODEC_H264, settings); +} + +#ifdef ENABLE_HEVC +void hevc_nvenc_defaults(obs_data_t *settings) +{ + nvenc_defaults_base(CODEC_HEVC, settings); +} +#endif + +void av1_nvenc_defaults(obs_data_t *settings) +{ + nvenc_defaults_base(CODEC_AV1, settings); +} + +static bool rate_control_modified(obs_properties_t *ppts, obs_property_t *p, + obs_data_t *settings) +{ + const char *rc = obs_data_get_string(settings, "rate_control"); + bool cqp = strcmp(rc, "CQP") == 0; + bool vbr = strcmp(rc, "VBR") == 0; + bool cqvbr = strcmp(rc, "CQVBR") == 0; + bool lossless = strcmp(rc, "lossless") == 0; + + p = obs_properties_get(ppts, "bitrate"); + obs_property_set_visible(p, !cqp && !lossless && !cqvbr); + p = obs_properties_get(ppts, "max_bitrate"); + obs_property_set_visible(p, vbr || cqvbr); + p = obs_properties_get(ppts, "target_quality"); + obs_property_set_visible(p, cqvbr); + p = obs_properties_get(ppts, "cqp"); + obs_property_set_visible(p, cqp); + p = obs_properties_get(ppts, "preset"); + obs_property_set_visible(p, !lossless); + p = obs_properties_get(ppts, "tune"); + obs_property_set_visible(p, !lossless); + p = obs_properties_get(ppts, "adaptive_quantization"); + obs_property_set_visible(p, !lossless); + + return true; +} + +obs_properties_t *nvenc_properties_internal(enum codec_type codec) +{ + obs_properties_t *props = obs_properties_create(); + obs_property_t *p; + + struct encoder_caps *caps = get_encoder_caps(codec); + + p = obs_properties_add_list(props, "rate_control", + obs_module_text("RateControl"), + OBS_COMBO_TYPE_LIST, + OBS_COMBO_FORMAT_STRING); + obs_property_list_add_string(p, obs_module_text("CBR"), "CBR"); + obs_property_list_add_string(p, obs_module_text("CQP"), "CQP"); + obs_property_list_add_string(p, obs_module_text("VBR"), "VBR"); + obs_property_list_add_string(p, obs_module_text("CQVBR"), "CQVBR"); + if (caps->lossless) { + obs_property_list_add_string(p, obs_module_text("Lossless"), + "lossless"); + } + + obs_property_set_modified_callback(p, rate_control_modified); + + p = obs_properties_add_int(props, "bitrate", obs_module_text("Bitrate"), + 50, UINT32_MAX / 1000, 50); + obs_property_int_set_suffix(p, " Kbps"); + + obs_properties_add_int(props, "target_quality", + obs_module_text("TargetQuality"), 1, 51, 1); + + p = obs_properties_add_int(props, "max_bitrate", + obs_module_text("MaxBitrate"), 0, + UINT32_MAX / 1000, 50); + obs_property_int_set_suffix(p, " Kbps"); + + /* AV1 uses 0-255 instead of 0-51 for QP, and most implementations just + * multiply the value by 4 to keep the range smaller. */ + obs_properties_add_int(props, "cqp", obs_module_text("CQP"), 1, + codec == CODEC_AV1 ? 63 : 51, 1); + + p = obs_properties_add_int(props, "keyint_sec", + obs_module_text("KeyframeIntervalSec"), 0, + 10, 1); + obs_property_int_set_suffix(p, " s"); + + p = obs_properties_add_list(props, "preset", obs_module_text("Preset"), + OBS_COMBO_TYPE_LIST, + OBS_COMBO_FORMAT_STRING); + +#define add_preset(val) \ + obs_property_list_add_string(p, obs_module_text("Preset." val), val) + + add_preset("p1"); + add_preset("p2"); + add_preset("p3"); + add_preset("p4"); + add_preset("p5"); + add_preset("p6"); + add_preset("p7"); +#undef add_preset + + p = obs_properties_add_list(props, "tune", obs_module_text("Tuning"), + OBS_COMBO_TYPE_LIST, + OBS_COMBO_FORMAT_STRING); + +#define add_tune(val) \ + obs_property_list_add_string(p, obs_module_text("Tuning." val), val) +#ifdef NVENC_12_2_OR_LATER + if (codec == CODEC_HEVC) + add_tune("uhq"); +#endif + add_tune("hq"); + add_tune("ll"); + add_tune("ull"); +#undef add_tune + + p = obs_properties_add_list(props, "multipass", + obs_module_text("Multipass"), + OBS_COMBO_TYPE_LIST, + OBS_COMBO_FORMAT_STRING); + +#define add_multipass(val) \ + obs_property_list_add_string(p, obs_module_text("Multipass." val), val) + add_multipass("disabled"); + add_multipass("qres"); + add_multipass("fullres"); +#undef add_multipass + + p = obs_properties_add_list(props, "profile", + obs_module_text("Profile"), + OBS_COMBO_TYPE_LIST, + OBS_COMBO_FORMAT_STRING); + +#define add_profile(val) obs_property_list_add_string(p, val, val) + if (codec == CODEC_HEVC) { + if (caps->ten_bit) + add_profile("main10"); + add_profile("main"); + } else if (codec == CODEC_AV1) { + add_profile("main"); + } else { + add_profile("high"); + add_profile("main"); + add_profile("baseline"); + } +#undef add_profile + + p = obs_properties_add_bool(props, "lookahead", + obs_module_text("LookAhead")); + obs_property_set_long_description(p, + obs_module_text("LookAhead.ToolTip")); + + p = obs_properties_add_bool(props, "adaptive_quantization", + obs_module_text("AdaptiveQuantization")); + obs_property_set_long_description( + p, obs_module_text("AdaptiveQuantization.ToolTip")); + + if (num_encoder_devices() > 1) { + obs_properties_add_int(props, "device", obs_module_text("GPU"), + -1, num_encoder_devices(), 1); + } + + if (caps->bframes > 0) { + obs_properties_add_int(props, "bf", obs_module_text("BFrames"), + 0, caps->bframes, 1); + } + + /* H.264 supports this, but seems to cause issues with some decoders, + * so restrict it to the custom options field for now. */ + if (caps->bref_modes && codec != CODEC_H264) { + p = obs_properties_add_list(props, "bframe_ref_mode", + obs_module_text("BFrameRefMode"), + OBS_COMBO_TYPE_LIST, + OBS_COMBO_FORMAT_INT); + + obs_property_list_add_int( + p, obs_module_text("BframeRefMode.Disabled"), + NV_ENC_BFRAME_REF_MODE_DISABLED); + + if (caps->bref_modes & 1) { + obs_property_list_add_int( + p, obs_module_text("BframeRefMode.Each"), + NV_ENC_BFRAME_REF_MODE_EACH); + } + if (caps->bref_modes & 2) { + obs_property_list_add_int( + p, obs_module_text("BframeRefMode.Middle"), + NV_ENC_BFRAME_REF_MODE_MIDDLE); + } + } + +#ifdef NVENC_12_1_OR_LATER + /* Some older GPUs such as the 1080 Ti have 2 NVENC chips, but do not + * support split encoding. Therefore, we check for AV1 support here to + * make sure this option is only presented on 40-series and later. */ + if (is_codec_supported(CODEC_AV1) && caps->engines > 1 && + !has_broken_split_encoding() && + (codec == CODEC_HEVC || codec == CODEC_AV1)) { + p = obs_properties_add_list(props, "split_encode", + obs_module_text("SplitEncode"), + OBS_COMBO_TYPE_LIST, + OBS_COMBO_FORMAT_INT); + + obs_property_list_add_int(p, + obs_module_text("SplitEncode.Auto"), + NV_ENC_SPLIT_AUTO_MODE); + obs_property_list_add_int( + p, obs_module_text("SplitEncode.Disabled"), + NV_ENC_SPLIT_DISABLE_MODE); + obs_property_list_add_int( + p, obs_module_text("SplitEncode.Enabled"), + NV_ENC_SPLIT_TWO_FORCED_MODE); + if (caps->engines > 2) { + obs_property_list_add_int( + p, obs_module_text("SplitEncode.ThreeWay"), + NV_ENC_SPLIT_THREE_FORCED_MODE); + } + } +#endif + + p = obs_properties_add_text(props, "opts", obs_module_text("Opts"), + OBS_TEXT_DEFAULT); + obs_property_set_long_description(p, obs_module_text("Opts.TT")); + + /* Invisible properties */ + p = obs_properties_add_bool(props, "repeat_headers", "repeat_headers"); + obs_property_set_visible(p, false); + p = obs_properties_add_bool(props, "force_cuda_tex", "force_cuda_tex"); + obs_property_set_visible(p, false); + p = obs_properties_add_bool(props, "disable_scenecut", + "disable_scenecut"); + obs_property_set_visible(p, false); + + return props; +} + +obs_properties_t *h264_nvenc_properties(void *unused) +{ + UNUSED_PARAMETER(unused); + return nvenc_properties_internal(CODEC_H264); +} + +#ifdef ENABLE_HEVC +obs_properties_t *hevc_nvenc_properties(void *unused) +{ + UNUSED_PARAMETER(unused); + return nvenc_properties_internal(CODEC_HEVC); +} +#endif + +obs_properties_t *av1_nvenc_properties(void *unused) +{ + UNUSED_PARAMETER(unused); + return nvenc_properties_internal(CODEC_AV1); +} diff --git a/plugins/obs-nvenc/nvenc.c b/plugins/obs-nvenc/nvenc.c new file mode 100644 index 00000000000000..9bd27adced56bc --- /dev/null +++ b/plugins/obs-nvenc/nvenc.c @@ -0,0 +1,1444 @@ +#include "nvenc-internal.h" + +#include +#include + +/* ========================================================================= */ + +#define EXTRA_BUFFERS 5 + +#ifndef _WIN32 +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +/* ------------------------------------------------------------------------- */ +/* Bitstream Buffer */ + +static bool nv_bitstream_init(struct nvenc_data *enc, struct nv_bitstream *bs) +{ + NV_ENC_CREATE_BITSTREAM_BUFFER buf = { + NV_ENC_CREATE_BITSTREAM_BUFFER_VER}; + + if (NV_FAILED(nv.nvEncCreateBitstreamBuffer(enc->session, &buf))) { + return false; + } + + bs->ptr = buf.bitstreamBuffer; + return true; +} + +static void nv_bitstream_free(struct nvenc_data *enc, struct nv_bitstream *bs) +{ + if (bs->ptr) { + nv.nvEncDestroyBitstreamBuffer(enc->session, bs->ptr); + } +} + +/* ------------------------------------------------------------------------- */ +/* Implementation */ + +static const char *h264_nvenc_get_name(void *type_data) +{ + UNUSED_PARAMETER(type_data); + return "NVIDIA NVENC H.264"; +} + +static const char *h264_nvenc_soft_get_name(void *type_data) +{ + UNUSED_PARAMETER(type_data); + return "NVIDIA NVENC H.264 (Fallback)"; +} + +#ifdef ENABLE_HEVC +static const char *hevc_nvenc_get_name(void *type_data) +{ + UNUSED_PARAMETER(type_data); + return "NVIDIA NVENC HEVC"; +} + +static const char *hevc_nvenc_soft_get_name(void *type_data) +{ + UNUSED_PARAMETER(type_data); + return "NVIDIA NVENC HEVC (Fallback)"; +} +#endif + +static const char *av1_nvenc_get_name(void *type_data) +{ + UNUSED_PARAMETER(type_data); + return "NVIDIA NVENC AV1"; +} + +static const char *av1_nvenc_soft_get_name(void *type_data) +{ + UNUSED_PARAMETER(type_data); + return "NVIDIA NVENC AV1 (Fallback)"; +} + +static inline int nv_get_cap(struct nvenc_data *enc, NV_ENC_CAPS cap) +{ + if (!enc->session) + return 0; + + NV_ENC_CAPS_PARAM param = {NV_ENC_CAPS_PARAM_VER}; + int v; + + param.capsToQuery = cap; + nv.nvEncGetEncodeCaps(enc->session, enc->codec_guid, ¶m, &v); + return v; +} + +static bool nvenc_update(void *data, obs_data_t *settings) +{ + struct nvenc_data *enc = data; + + /* Only support reconfiguration of CBR bitrate */ + if (enc->can_change_bitrate) { + enc->props.bitrate = obs_data_get_int(settings, "bitrate"); + enc->props.max_bitrate = + obs_data_get_int(settings, "max_bitrate"); + + bool vbr = (enc->config.rcParams.rateControlMode == + NV_ENC_PARAMS_RC_VBR); + enc->config.rcParams.averageBitRate = + (uint32_t)enc->props.bitrate * 1000; + enc->config.rcParams.maxBitRate = + vbr ? (uint32_t)enc->props.max_bitrate * 1000 + : (uint32_t)enc->props.bitrate * 1000; + + NV_ENC_RECONFIGURE_PARAMS params = {0}; + params.version = NV_ENC_RECONFIGURE_PARAMS_VER; + params.reInitEncodeParams = enc->params; + params.resetEncoder = 1; + params.forceIDR = 1; + + if (NV_FAILED(nv.nvEncReconfigureEncoder(enc->session, + ¶ms))) { + return false; + } + } + + return true; +} + +static bool init_session(struct nvenc_data *enc) +{ + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER}; + params.apiVersion = NVENCAPI_VERSION; +#ifdef _WIN32 + if (enc->non_texture) { + params.device = enc->cu_ctx; + params.deviceType = NV_ENC_DEVICE_TYPE_CUDA; + } else { + params.device = enc->device; + params.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX; + } +#else + params.device = enc->cu_ctx; + params.deviceType = NV_ENC_DEVICE_TYPE_CUDA; +#endif + + if (NV_FAILED(nv.nvEncOpenEncodeSessionEx(¶ms, &enc->session))) { + return false; + } + return true; +} + +static void initialize_params(struct nvenc_data *enc, const GUID *nv_preset, + NV_ENC_TUNING_INFO nv_tuning, uint32_t width, + uint32_t height, uint32_t fps_num, + uint32_t fps_den) +{ + NV_ENC_INITIALIZE_PARAMS *params = &enc->params; + memset(params, 0, sizeof(*params)); + params->version = NV_ENC_INITIALIZE_PARAMS_VER; + params->encodeGUID = enc->codec_guid; + params->presetGUID = *nv_preset; + params->encodeWidth = width; + params->encodeHeight = height; + params->darWidth = width; + params->darHeight = height; + params->frameRateNum = fps_num; + params->frameRateDen = fps_den; + params->enableEncodeAsync = 0; + params->enablePTD = 1; + params->encodeConfig = &enc->config; + params->tuningInfo = nv_tuning; +#ifdef NVENC_12_1_OR_LATER + params->splitEncodeMode = + (NV_ENC_SPLIT_ENCODE_MODE)enc->props.split_encode; +#endif +} + +static inline GUID get_nv_preset(const char *preset2) +{ + if (astrcmpi(preset2, "p1") == 0) { + return NV_ENC_PRESET_P1_GUID; + } else if (astrcmpi(preset2, "p2") == 0) { + return NV_ENC_PRESET_P2_GUID; + } else if (astrcmpi(preset2, "p3") == 0) { + return NV_ENC_PRESET_P3_GUID; + } else if (astrcmpi(preset2, "p4") == 0) { + return NV_ENC_PRESET_P4_GUID; + } else if (astrcmpi(preset2, "p6") == 0) { + return NV_ENC_PRESET_P6_GUID; + } else if (astrcmpi(preset2, "p7") == 0) { + return NV_ENC_PRESET_P7_GUID; + } else { + return NV_ENC_PRESET_P5_GUID; + } +} + +static inline NV_ENC_TUNING_INFO get_nv_tuning(const char *tuning) +{ + if (astrcmpi(tuning, "ll") == 0) { + return NV_ENC_TUNING_INFO_LOW_LATENCY; + } else if (astrcmpi(tuning, "ull") == 0) { + return NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY; +#ifdef NVENC_12_2_OR_LATER + } else if (astrcmpi(tuning, "uhq") == 0) { + return NV_ENC_TUNING_INFO_ULTRA_HIGH_QUALITY; +#endif + } else { + return NV_ENC_TUNING_INFO_HIGH_QUALITY; + } +} + +static inline NV_ENC_MULTI_PASS get_nv_multipass(const char *multipass) +{ + if (astrcmpi(multipass, "qres") == 0) { + return NV_ENC_TWO_PASS_QUARTER_RESOLUTION; + } else if (astrcmpi(multipass, "fullres") == 0) { + return NV_ENC_TWO_PASS_FULL_RESOLUTION; + } else { + return NV_ENC_MULTI_PASS_DISABLED; + } +} + +static bool is_10_bit(const struct nvenc_data *enc) +{ + return enc->non_texture ? enc->in_format == VIDEO_FORMAT_P010 + : obs_p010_tex_active(); +} + +static bool init_encoder_base(struct nvenc_data *enc, obs_data_t *settings) +{ + UNUSED_PARAMETER(settings); + + int bitrate = (int)enc->props.bitrate; + int max_bitrate = (int)enc->props.max_bitrate; + int rc_lookahead = 0; + + bool cqvbr = astrcmpi(enc->props.rate_control, "CQVBR") == 0; + bool vbr = cqvbr || astrcmpi(enc->props.rate_control, "VBR") == 0; + bool lossless = strcmp(enc->props.rate_control, "lossless") == 0; + + NVENCSTATUS err; + + video_t *video = obs_encoder_video(enc->encoder); + const struct video_output_info *voi = video_output_get_info(video); + + enc->cx = obs_encoder_get_width(enc->encoder); + enc->cy = obs_encoder_get_height(enc->encoder); + + /* -------------------------- */ + /* get preset */ + + GUID nv_preset = get_nv_preset(enc->props.preset); + NV_ENC_TUNING_INFO nv_tuning = get_nv_tuning(enc->props.tune); + NV_ENC_MULTI_PASS nv_multipass = get_nv_multipass(enc->props.multipass); + + if (lossless) { + nv_tuning = NV_ENC_TUNING_INFO_LOSSLESS; + nv_multipass = NV_ENC_MULTI_PASS_DISABLED; + enc->props.adaptive_quantization = false; + enc->props.cqp = 0; + } + + /* -------------------------- */ + /* get preset default config */ + + NV_ENC_PRESET_CONFIG preset_config = {0}; + preset_config.version = NV_ENC_PRESET_CONFIG_VER; + preset_config.presetCfg.version = NV_ENC_CONFIG_VER; + + err = nv.nvEncGetEncodePresetConfigEx(enc->session, enc->codec_guid, + nv_preset, nv_tuning, + &preset_config); + if (nv_failed(enc->encoder, err, __FUNCTION__, + "nvEncGetEncodePresetConfig")) { + return false; + } + + /* -------------------------- */ + /* main configuration */ + + enc->config = preset_config.presetCfg; + + int keyint = (int)enc->props.keyint_sec * voi->fps_num / voi->fps_den; + get_user_arg_int(enc, "keyint", &keyint); + + uint32_t gop_size = keyint > 0 ? keyint : 250; + + NV_ENC_CONFIG *config = &enc->config; + + initialize_params(enc, &nv_preset, nv_tuning, voi->width, voi->height, + voi->fps_num, voi->fps_den); + + config->gopLength = gop_size; + config->frameIntervalP = gop_size == 1 ? 0 : (int32_t)enc->props.bf + 1; + + /* lookahead */ + + const bool use_profile_lookahead = config->rcParams.enableLookahead; + bool lookahead = nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_LOOKAHEAD) && + (enc->props.lookahead || use_profile_lookahead); + + if (lookahead) { + rc_lookahead = use_profile_lookahead + ? config->rcParams.lookaheadDepth + : 8; + + /* Due to the additional calculations required to handle lookahead, + * get the user override here (if any). */ + get_user_arg_int(enc, "lookaheadDepth", &rc_lookahead); + } + + int buf_count = max(4, config->frameIntervalP * 2 * 2); + if (lookahead) { + buf_count = + max(buf_count, config->frameIntervalP + rc_lookahead + + EXTRA_BUFFERS); + } + + buf_count = min(64, buf_count); + enc->buf_count = buf_count; + + const int output_delay = buf_count - 1; + enc->output_delay = output_delay; + + if (lookahead) { + const int lkd_bound = output_delay - config->frameIntervalP - 4; + if (lkd_bound >= 0) { + config->rcParams.enableLookahead = 1; + config->rcParams.lookaheadDepth = + min(rc_lookahead, lkd_bound); + config->rcParams.disableIadapt = 0; + config->rcParams.disableBadapt = 0; + } else { + lookahead = false; + } + } + + enc->config.rcParams.disableIadapt = enc->props.disable_scenecut; + + /* psycho aq */ + + if (enc->props.adaptive_quantization) { + config->rcParams.enableAQ = 1; + config->rcParams.aqStrength = 8; + config->rcParams.enableTemporalAQ = + nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ); + } + + /* -------------------------- */ + /* rate control */ + + enc->can_change_bitrate = + nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE); + + config->rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR; + config->rcParams.averageBitRate = bitrate * 1000; + config->rcParams.maxBitRate = vbr ? max_bitrate * 1000 : bitrate * 1000; + config->rcParams.vbvBufferSize = bitrate * 1000; + + if (strcmp(enc->props.rate_control, "CQP") == 0 || lossless) { + int cqp_val = enc->codec == CODEC_AV1 ? (int)enc->props.cqp * 4 + : (int)enc->props.cqp; + + config->rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP; + config->rcParams.constQP.qpInterP = cqp_val; + config->rcParams.constQP.qpInterB = cqp_val; + config->rcParams.constQP.qpIntra = cqp_val; + enc->can_change_bitrate = false; + + bitrate = 0; + max_bitrate = 0; + + } else if (!vbr) { /* CBR by default */ + config->rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR; + } else if (cqvbr) { + config->rcParams.targetQuality = + (uint8_t)enc->props.target_quality; + config->rcParams.averageBitRate = 0; + config->rcParams.vbvBufferSize = 0; + } + + config->rcParams.multiPass = nv_multipass; + config->rcParams.qpMapMode = NV_ENC_QP_MAP_DELTA; + + /* -------------------------- */ + /* initialize */ + + info("settings:\n" + "\tcodec: %s\n" + "\trate_control: %s\n" + "\tbitrate: %d\n" + "\tmax_bitrate: %d\n" + "\tcq/cqp: %ld\n" + "\tkeyint: %d\n" + "\tpreset: %s\n" + "\ttuning: %s\n" + "\tmultipass: %s\n" + "\tprofile: %s\n" + "\twidth: %d\n" + "\theight: %d\n" + "\tb-frames: %ld\n" + "\tb-ref-mode: %ld\n" + "\tlookahead: %s (%d)\n" + "\taq: %s\n" + "\tsplit encode: %ld\n" + "\tuser opts: %s\n", + get_codec_name(enc->codec), enc->props.rate_control, bitrate, + max_bitrate, vbr ? enc->props.target_quality : enc->props.cqp, + gop_size, enc->props.preset, enc->props.tune, enc->props.multipass, + enc->props.profile, enc->cx, enc->cy, enc->props.bf, + enc->props.bframe_ref_mode, lookahead ? "true" : "false", + rc_lookahead, enc->props.adaptive_quantization ? "true" : "false", + enc->props.split_encode, enc->props.opts_str); + + return true; +} + +static bool init_encoder_h264(struct nvenc_data *enc, obs_data_t *settings) +{ + bool lossless = strcmp(enc->props.rate_control, "lossless") == 0; + + if (!init_encoder_base(enc, settings)) { + return false; + } + + NV_ENC_CONFIG *config = &enc->config; + NV_ENC_CONFIG_H264 *h264_config = &config->encodeCodecConfig.h264Config; + NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui_params = + &h264_config->h264VUIParameters; + + video_t *video = obs_encoder_video(enc->encoder); + const struct video_output_info *voi = video_output_get_info(video); + + if (enc->props.repeat_headers) { + h264_config->repeatSPSPPS = 1; + h264_config->disableSPSPPS = 0; + h264_config->outputAUD = 1; + } + + h264_config->idrPeriod = config->gopLength; + + h264_config->sliceMode = 3; + h264_config->sliceModeData = 1; + + h264_config->useBFramesAsRef = + (NV_ENC_BFRAME_REF_MODE)enc->props.bframe_ref_mode; + + /* Enable CBR padding */ + if (config->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR) + h264_config->enableFillerDataInsertion = 1; + + vui_params->videoSignalTypePresentFlag = 1; + vui_params->videoFullRangeFlag = (voi->range == VIDEO_RANGE_FULL); + vui_params->colourDescriptionPresentFlag = 1; + + switch (voi->colorspace) { + case VIDEO_CS_601: + vui_params->colourPrimaries = 6; + vui_params->transferCharacteristics = 6; + vui_params->colourMatrix = 6; + break; + case VIDEO_CS_DEFAULT: + case VIDEO_CS_709: + vui_params->colourPrimaries = 1; + vui_params->transferCharacteristics = 1; + vui_params->colourMatrix = 1; + break; + case VIDEO_CS_SRGB: + vui_params->colourPrimaries = 1; + vui_params->transferCharacteristics = 13; + vui_params->colourMatrix = 1; + break; + default: + break; + } + + if (lossless) { + h264_config->qpPrimeYZeroTransformBypassFlag = 1; + } else if (strcmp(enc->props.rate_control, "CBR") == 0) { /* CBR */ + h264_config->outputBufferingPeriodSEI = 1; + } + + h264_config->outputPictureTimingSEI = 1; + + /* -------------------------- */ + /* profile */ + + if (enc->in_format == VIDEO_FORMAT_I444) { + config->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID; + h264_config->chromaFormatIDC = 3; + } else if (astrcmpi(enc->props.profile, "main") == 0) { + config->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID; + } else if (astrcmpi(enc->props.profile, "baseline") == 0) { + config->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID; + } else if (!lossless) { + config->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID; + } + + apply_user_args(enc); + + if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) { + return false; + } + + return true; +} + +static bool init_encoder_hevc(struct nvenc_data *enc, obs_data_t *settings) +{ + if (!init_encoder_base(enc, settings)) { + return false; + } + + NV_ENC_CONFIG *config = &enc->config; + NV_ENC_CONFIG_HEVC *hevc_config = &config->encodeCodecConfig.hevcConfig; + NV_ENC_CONFIG_HEVC_VUI_PARAMETERS *vui_params = + &hevc_config->hevcVUIParameters; + + video_t *video = obs_encoder_video(enc->encoder); + const struct video_output_info *voi = video_output_get_info(video); + + if (enc->props.repeat_headers) { + hevc_config->repeatSPSPPS = 1; + hevc_config->disableSPSPPS = 0; + hevc_config->outputAUD = 1; + } + + hevc_config->idrPeriod = config->gopLength; + + hevc_config->sliceMode = 3; + hevc_config->sliceModeData = 1; + + hevc_config->useBFramesAsRef = + (NV_ENC_BFRAME_REF_MODE)enc->props.bframe_ref_mode; + + /* Enable CBR padding */ + if (config->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR) + hevc_config->enableFillerDataInsertion = 1; + + vui_params->videoSignalTypePresentFlag = 1; + vui_params->videoFullRangeFlag = (voi->range == VIDEO_RANGE_FULL); + vui_params->colourDescriptionPresentFlag = 1; + + switch (voi->colorspace) { + case VIDEO_CS_601: + vui_params->colourPrimaries = 6; + vui_params->transferCharacteristics = 6; + vui_params->colourMatrix = 6; + break; + case VIDEO_CS_DEFAULT: + case VIDEO_CS_709: + vui_params->colourPrimaries = 1; + vui_params->transferCharacteristics = 1; + vui_params->colourMatrix = 1; + break; + case VIDEO_CS_SRGB: + vui_params->colourPrimaries = 1; + vui_params->transferCharacteristics = 13; + vui_params->colourMatrix = 1; + break; + case VIDEO_CS_2100_PQ: + vui_params->colourPrimaries = 9; + vui_params->transferCharacteristics = 16; + vui_params->colourMatrix = 9; + vui_params->chromaSampleLocationFlag = 1; + vui_params->chromaSampleLocationTop = 2; + vui_params->chromaSampleLocationBot = 2; + break; + case VIDEO_CS_2100_HLG: + vui_params->colourPrimaries = 9; + vui_params->transferCharacteristics = 18; + vui_params->colourMatrix = 9; + vui_params->chromaSampleLocationFlag = 1; + vui_params->chromaSampleLocationTop = 2; + vui_params->chromaSampleLocationBot = 2; + } + + if (astrcmpi(enc->props.rate_control, "cbr") == 0) { + hevc_config->outputBufferingPeriodSEI = 1; + } + + hevc_config->outputPictureTimingSEI = 1; + + /* -------------------------- */ + /* profile */ + + bool profile_is_10bpc = false; + + if (enc->in_format == VIDEO_FORMAT_I444) { + config->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID; + hevc_config->chromaFormatIDC = 3; + } else if (astrcmpi(enc->props.profile, "main10") == 0) { + config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID; + profile_is_10bpc = true; + } else if (is_10_bit(enc)) { + blog(LOG_WARNING, "[obs-nvenc] Forcing main10 for P010"); + config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID; + profile_is_10bpc = true; + } else { + config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID; + } + +#ifndef NVENC_12_2_OR_LATER + hevc_config->pixelBitDepthMinus8 = is_10_bit(enc) ? 2 : 0; +#else + hevc_config->inputBitDepth = is_10_bit(enc) ? NV_ENC_BIT_DEPTH_10 + : NV_ENC_BIT_DEPTH_8; + hevc_config->outputBitDepth = profile_is_10bpc ? NV_ENC_BIT_DEPTH_10 + : NV_ENC_BIT_DEPTH_8; +#endif + + apply_user_args(enc); + + if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) { + return false; + } + + return true; +} + +static bool init_encoder_av1(struct nvenc_data *enc, obs_data_t *settings) +{ + if (!init_encoder_base(enc, settings)) { + return false; + } + + NV_ENC_CONFIG *config = &enc->config; + NV_ENC_CONFIG_AV1 *av1_config = &config->encodeCodecConfig.av1Config; + + video_t *video = obs_encoder_video(enc->encoder); + const struct video_output_info *voi = video_output_get_info(video); + + av1_config->idrPeriod = config->gopLength; + + av1_config->useBFramesAsRef = + (NV_ENC_BFRAME_REF_MODE)enc->props.bframe_ref_mode; + + av1_config->colorRange = (voi->range == VIDEO_RANGE_FULL); + + /* Enable CBR padding */ + if (config->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR) + av1_config->enableBitstreamPadding = 1; + +#define PIXELCOUNT_4K (3840 * 2160) + + /* If size is 4K+, set tiles to 2 uniform columns. */ + if ((voi->width * voi->height) >= PIXELCOUNT_4K) + av1_config->numTileColumns = 2; + + switch (voi->colorspace) { + case VIDEO_CS_601: + av1_config->colorPrimaries = 6; + av1_config->transferCharacteristics = 6; + av1_config->matrixCoefficients = 6; + break; + case VIDEO_CS_DEFAULT: + case VIDEO_CS_709: + av1_config->colorPrimaries = 1; + av1_config->transferCharacteristics = 1; + av1_config->matrixCoefficients = 1; + break; + case VIDEO_CS_SRGB: + av1_config->colorPrimaries = 1; + av1_config->transferCharacteristics = 13; + av1_config->matrixCoefficients = 1; + break; + case VIDEO_CS_2100_PQ: + av1_config->colorPrimaries = 9; + av1_config->transferCharacteristics = 16; + av1_config->matrixCoefficients = 9; + break; + case VIDEO_CS_2100_HLG: + av1_config->colorPrimaries = 9; + av1_config->transferCharacteristics = 18; + av1_config->matrixCoefficients = 9; + } + + /* -------------------------- */ + /* profile */ + + config->profileGUID = NV_ENC_AV1_PROFILE_MAIN_GUID; + av1_config->tier = NV_ENC_TIER_AV1_0; + + av1_config->level = NV_ENC_LEVEL_AV1_AUTOSELECT; + av1_config->chromaFormatIDC = 1; +#ifndef NVENC_12_2_OR_LATER + av1_config->pixelBitDepthMinus8 = is_10_bit(enc) ? 2 : 0; + av1_config->inputPixelBitDepthMinus8 = av1_config->pixelBitDepthMinus8; +#else + av1_config->inputBitDepth = is_10_bit(enc) ? NV_ENC_BIT_DEPTH_10 + : NV_ENC_BIT_DEPTH_8; + av1_config->outputBitDepth = av1_config->inputBitDepth; +#endif + av1_config->numFwdRefs = 1; + av1_config->numBwdRefs = 1; + av1_config->repeatSeqHdr = 1; + + apply_user_args(enc); + + if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) { + return false; + } + + return true; +} + +static bool init_bitstreams(struct nvenc_data *enc) +{ + da_reserve(enc->bitstreams, enc->buf_count); + for (uint32_t i = 0; i < enc->buf_count; i++) { + struct nv_bitstream bitstream; + if (!nv_bitstream_init(enc, &bitstream)) { + return false; + } + + da_push_back(enc->bitstreams, &bitstream); + } + + return true; +} + +static enum video_format get_preferred_format(enum video_format format) +{ + switch (format) { + case VIDEO_FORMAT_I010: + case VIDEO_FORMAT_P010: + return VIDEO_FORMAT_P010; + case VIDEO_FORMAT_RGBA: + case VIDEO_FORMAT_BGRA: + case VIDEO_FORMAT_BGRX: + case VIDEO_FORMAT_I444: + return VIDEO_FORMAT_I444; + default: + return VIDEO_FORMAT_NV12; + } +} + +static void nvenc_destroy(void *data); + +static bool init_encoder(struct nvenc_data *enc, enum codec_type codec, + obs_data_t *settings, obs_encoder_t *encoder) +{ + UNUSED_PARAMETER(codec); + UNUSED_PARAMETER(encoder); + + const bool support_10bit = + nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_10BIT_ENCODE); + const bool support_444 = + nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_YUV444_ENCODE); + + video_t *video = obs_encoder_video(enc->encoder); + const struct video_output_info *voi = video_output_get_info(video); + enum video_format pref_format = + obs_encoder_get_preferred_video_format(enc->encoder); + if (pref_format == VIDEO_FORMAT_NONE) + pref_format = voi->format; + + enc->in_format = get_preferred_format(pref_format); + + if (enc->in_format == VIDEO_FORMAT_I444 && !support_444) { + NV_FAIL(obs_module_text("NVENC.444Unsupported")); + return false; + } + + if (is_10_bit(enc) && !support_10bit) { + NV_FAIL(obs_module_text("10bitUnsupported")); + return false; + } + + switch (voi->format) { + case VIDEO_FORMAT_I010: + case VIDEO_FORMAT_P010: + break; + default: + switch (voi->colorspace) { + case VIDEO_CS_2100_PQ: + case VIDEO_CS_2100_HLG: + NV_FAIL(obs_module_text("8bitUnsupportedHdr")); + return false; + default: + break; + } + } + + switch (enc->codec) { + case CODEC_HEVC: + return init_encoder_hevc(enc, settings); + case CODEC_H264: + return init_encoder_h264(enc, settings); + case CODEC_AV1: + return init_encoder_av1(enc, settings); + } + + return false; +} + +static void *nvenc_create_internal(enum codec_type codec, obs_data_t *settings, + obs_encoder_t *encoder, bool texture) +{ + struct nvenc_data *enc = bzalloc(sizeof(*enc)); + enc->encoder = encoder; + enc->codec = codec; + enc->first_packet = true; + enc->non_texture = !texture; + + nvenc_properties_read(&enc->props, settings); + + NV_ENCODE_API_FUNCTION_LIST init = {NV_ENCODE_API_FUNCTION_LIST_VER}; + + switch (enc->codec) { + case CODEC_H264: + enc->codec_guid = NV_ENC_CODEC_H264_GUID; + break; + case CODEC_HEVC: + enc->codec_guid = NV_ENC_CODEC_HEVC_GUID; + break; + case CODEC_AV1: + enc->codec_guid = NV_ENC_CODEC_AV1_GUID; + break; + } + + if (!init_nvenc(encoder)) + goto fail; + +#ifdef _WIN32 + if (texture ? !d3d11_init(enc, settings) : !init_cuda(encoder)) + goto fail; +#else + if (!init_cuda(encoder)) + goto fail; +#endif + + if (NV_FAILED(nv_create_instance(&init))) + goto fail; + + if (!cuda_ctx_init(enc, settings, texture)) + goto fail; + + if (!init_session(enc)) { + goto fail; + } + if (!init_encoder(enc, codec, settings, encoder)) { + goto fail; + } + if (!init_bitstreams(enc)) { + goto fail; + } + +#ifdef _WIN32 + if (texture ? !d3d11_init_textures(enc) : !cuda_init_surfaces(enc)) + goto fail; +#else + if (!cuda_init_surfaces(enc)) + goto fail; +#endif + + enc->codec = codec; + + return enc; + +fail: + nvenc_destroy(enc); + return NULL; +} + +static void *nvenc_create_base(enum codec_type codec, obs_data_t *settings, + obs_encoder_t *encoder, bool texture) +{ + /* This encoder requires shared textures, this cannot be used on a + * gpu other than the one OBS is currently running on. + * + * 2024 Amendment: On Linux when using CUDA<->OpenGL interop we can + * in fact use shared textures even when using a different GPU, this + * will still copy data through the CPU, but much more efficiently than + * our native non-texture encoder. For now allow this via a hidden + * option as it may cause issues for people. + */ + const int gpu = (int)obs_data_get_int(settings, "device"); +#ifndef _WIN32 + const bool force_tex = obs_data_get_bool(settings, "force_cuda_tex"); +#else + const bool force_tex = false; +#endif + + if (gpu != -1 && texture && !force_tex) { + blog(LOG_INFO, + "[obs-nvenc] different GPU selected by user, falling back " + "to non-texture encoder"); + goto reroute; + } + + if (obs_encoder_scaling_enabled(encoder)) { + if (obs_encoder_gpu_scaling_enabled(encoder)) { + blog(LOG_INFO, "[obs-nvenc] GPU scaling enabled"); + } else if (texture) { + blog(LOG_INFO, + "[obs-nvenc] CPU scaling enabled, falling back to" + " non-texture encoder"); + goto reroute; + } + } + + if (texture && !obs_p010_tex_active() && !obs_nv12_tex_active()) { + blog(LOG_INFO, + "[obs-nvenc] nv12/p010 not active, falling back to " + "non-texture encoder"); + goto reroute; + } + + struct nvenc_data *enc = + nvenc_create_internal(codec, settings, encoder, texture); + + if (enc) { + return enc; + } + +reroute: + if (!texture) { + blog(LOG_ERROR, + "Already in non_texture encoder, can't fall back further!"); + return NULL; + } + + switch (codec) { + case CODEC_H264: + return obs_encoder_create_rerouted(encoder, + "obs_nvenc_h264_soft"); + case CODEC_HEVC: + return obs_encoder_create_rerouted(encoder, + "obs_nvenc_hevc_soft"); + case CODEC_AV1: + return obs_encoder_create_rerouted(encoder, + "obs_nvenc_av1_soft"); + } + + return NULL; +} + +static void *h264_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder) +{ + return nvenc_create_base(CODEC_H264, settings, encoder, true); +} + +#ifdef ENABLE_HEVC +static void *hevc_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder) +{ + return nvenc_create_base(CODEC_HEVC, settings, encoder, true); +} +#endif + +static void *av1_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder) +{ + return nvenc_create_base(CODEC_AV1, settings, encoder, true); +} + +static void *h264_nvenc_soft_create(obs_data_t *settings, + obs_encoder_t *encoder) +{ + return nvenc_create_base(CODEC_H264, settings, encoder, false); +} + +#ifdef ENABLE_HEVC +static void *hevc_nvenc_soft_create(obs_data_t *settings, + obs_encoder_t *encoder) +{ + return nvenc_create_base(CODEC_HEVC, settings, encoder, false); +} +#endif + +static void *av1_nvenc_soft_create(obs_data_t *settings, obs_encoder_t *encoder) +{ + return nvenc_create_base(CODEC_AV1, settings, encoder, false); +} + +static bool get_encoded_packet(struct nvenc_data *enc, bool finalize); + +static void nvenc_destroy(void *data) +{ + struct nvenc_data *enc = data; + + if (enc->encode_started) { + NV_ENC_PIC_PARAMS params = {NV_ENC_PIC_PARAMS_VER}; + params.encodePicFlags = NV_ENC_PIC_FLAG_EOS; + nv.nvEncEncodePicture(enc->session, ¶ms); + get_encoded_packet(enc, true); + } + + for (size_t i = 0; i < enc->bitstreams.num; i++) { + nv_bitstream_free(enc, &enc->bitstreams.array[i]); + } + if (enc->session) + nv.nvEncDestroyEncoder(enc->session); + +#ifdef _WIN32 + d3d11_free_textures(enc); + d3d11_free(enc); +#else + cuda_opengl_free(enc); +#endif + cuda_free_surfaces(enc); + cuda_ctx_free(enc); + + bfree(enc->header); + bfree(enc->sei); + bfree(enc->roi_map); + + deque_free(&enc->dts_list); + + da_free(enc->surfaces); + da_free(enc->input_textures); + da_free(enc->bitstreams); +#ifdef _WIN32 + da_free(enc->textures); +#endif + da_free(enc->packet_data); + + obs_free_options(enc->props.opts); + obs_data_release(enc->props.data); + + bfree(enc); +} + +static bool get_encoded_packet(struct nvenc_data *enc, bool finalize) +{ + void *s = enc->session; + + da_resize(enc->packet_data, 0); + + if (!enc->buffers_queued) + return true; + if (!finalize && enc->buffers_queued < enc->output_delay) + return true; + + size_t count = finalize ? enc->buffers_queued : 1; + + for (size_t i = 0; i < count; i++) { + size_t cur_bs_idx = enc->cur_bitstream; + struct nv_bitstream *bs = &enc->bitstreams.array[cur_bs_idx]; +#ifdef _WIN32 + struct nv_texture *nvtex = + enc->non_texture ? NULL + : &enc->textures.array[cur_bs_idx]; + struct nv_cuda_surface *surf = + enc->non_texture ? &enc->surfaces.array[cur_bs_idx] + : NULL; +#else + struct nv_cuda_surface *surf = &enc->surfaces.array[cur_bs_idx]; +#endif + + /* ---------------- */ + + NV_ENC_LOCK_BITSTREAM lock = {NV_ENC_LOCK_BITSTREAM_VER}; + lock.outputBitstream = bs->ptr; + lock.doNotWait = false; + + if (NV_FAILED(nv.nvEncLockBitstream(s, &lock))) { + return false; + } + + if (enc->first_packet) { + NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = {0}; + uint8_t buf[256]; + uint32_t size = 0; + + payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER; + payload.spsppsBuffer = buf; + payload.inBufferSize = sizeof(buf); + payload.outSPSPPSPayloadSize = &size; + + nv.nvEncGetSequenceParams(s, &payload); + enc->header = bmemdup(buf, size); + enc->header_size = size; + enc->first_packet = false; + } + + da_copy_array(enc->packet_data, lock.bitstreamBufferPtr, + lock.bitstreamSizeInBytes); + + enc->packet_pts = (int64_t)lock.outputTimeStamp; + enc->packet_keyframe = lock.pictureType == NV_ENC_PIC_TYPE_IDR; + + if (NV_FAILED(nv.nvEncUnlockBitstream(s, bs->ptr))) { + return false; + } + + /* ---------------- */ +#ifdef _WIN32 + if (nvtex && nvtex->mapped_res) { + NVENCSTATUS err; + err = nv.nvEncUnmapInputResource(s, nvtex->mapped_res); + if (nv_failed(enc->encoder, err, __FUNCTION__, + "unmap")) { + return false; + } + nvtex->mapped_res = NULL; + } +#endif + /* ---------------- */ + + if (surf && surf->mapped_res) { + NVENCSTATUS err; + err = nv.nvEncUnmapInputResource(s, surf->mapped_res); + if (nv_failed(enc->encoder, err, __FUNCTION__, + "unmap")) { + return false; + } + surf->mapped_res = NULL; + } + + /* ---------------- */ + + if (++enc->cur_bitstream == enc->buf_count) + enc->cur_bitstream = 0; + + enc->buffers_queued--; + } + + return true; +} + +struct roi_params { + uint32_t mb_width; + uint32_t mb_height; + uint32_t mb_size; + bool av1; + int8_t *map; +}; + +static void roi_cb(void *param, struct obs_encoder_roi *roi) +{ + const struct roi_params *rp = param; + + int8_t qp_val; + /* AV1 has a larger QP range than HEVC/H.264 */ + if (rp->av1) { + qp_val = (int8_t)(-128.0f * roi->priority); + } else { + qp_val = (int8_t)(-51.0f * roi->priority); + } + + const uint32_t roi_left = roi->left / rp->mb_size; + const uint32_t roi_top = roi->top / rp->mb_size; + const uint32_t roi_right = (roi->right - 1) / rp->mb_size; + const uint32_t roi_bottom = (roi->bottom - 1) / rp->mb_size; + + for (uint32_t mb_y = 0; mb_y < rp->mb_height; mb_y++) { + if (mb_y < roi_top || mb_y > roi_bottom) + continue; + + for (uint32_t mb_x = 0; mb_x < rp->mb_width; mb_x++) { + if (mb_x < roi_left || mb_x > roi_right) + continue; + + rp->map[mb_y * rp->mb_width + mb_x] = qp_val; + } + } +} + +static void add_roi(struct nvenc_data *enc, NV_ENC_PIC_PARAMS *params) +{ + const uint32_t increment = obs_encoder_get_roi_increment(enc->encoder); + + if (enc->roi_map && enc->roi_increment == increment) { + params->qpDeltaMap = enc->roi_map; + params->qpDeltaMapSize = (uint32_t)enc->roi_map_size; + return; + } + + uint32_t mb_size = 0; + switch (enc->codec) { + case CODEC_H264: + /* H.264 is always 16x16 */ + mb_size = 16; + break; + case CODEC_HEVC: + /* HEVC can be 16x16, 32x32, or 64x64, but NVENC is always 32x32 */ + mb_size = 32; + break; + case CODEC_AV1: + /* AV1 can be 64x64 or 128x128, but NVENC is always 64x64 */ + mb_size = 64; + break; + } + + const uint32_t mb_width = (enc->cx + mb_size - 1) / mb_size; + const uint32_t mb_height = (enc->cy + mb_size - 1) / mb_size; + const size_t map_size = mb_width * mb_height * sizeof(int8_t); + + if (map_size != enc->roi_map_size) { + enc->roi_map = brealloc(enc->roi_map, map_size); + enc->roi_map_size = map_size; + } + + memset(enc->roi_map, 0, enc->roi_map_size); + + struct roi_params par = { + .mb_width = mb_width, + .mb_height = mb_height, + .mb_size = mb_size, + .av1 = enc->codec == CODEC_AV1, + .map = enc->roi_map, + }; + + obs_encoder_enum_roi(enc->encoder, roi_cb, &par); + + enc->roi_increment = increment; + params->qpDeltaMap = enc->roi_map; + params->qpDeltaMapSize = (uint32_t)map_size; +} + +bool nvenc_encode_base(struct nvenc_data *enc, struct nv_bitstream *bs, + void *pic, int64_t pts, struct encoder_packet *packet, + bool *received_packet) +{ + NV_ENC_PIC_PARAMS params = {0}; + params.version = NV_ENC_PIC_PARAMS_VER; + params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; + params.inputBuffer = pic; + params.inputTimeStamp = (uint64_t)pts; + params.inputWidth = enc->cx; + params.inputHeight = enc->cy; + params.inputPitch = enc->cx; + params.outputBitstream = bs->ptr; + params.frameIdx = (uint32_t)pts; + + if (enc->non_texture) { + params.bufferFmt = enc->surface_format; + } else { + params.bufferFmt = obs_p010_tex_active() + ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT + : NV_ENC_BUFFER_FORMAT_NV12; + } + + /* Add ROI map if enabled */ + if (obs_encoder_has_roi(enc->encoder)) + add_roi(enc, ¶ms); + + NVENCSTATUS err = nv.nvEncEncodePicture(enc->session, ¶ms); + if (err != NV_ENC_SUCCESS && err != NV_ENC_ERR_NEED_MORE_INPUT) { + nv_failed(enc->encoder, err, __FUNCTION__, + "nvEncEncodePicture"); + return false; + } + + enc->encode_started = true; + enc->buffers_queued++; + + if (++enc->next_bitstream == enc->buf_count) { + enc->next_bitstream = 0; + } + + /* ------------------------------------ */ + /* check for encoded packet and parse */ + + if (!get_encoded_packet(enc, false)) { + return false; + } + + /* ------------------------------------ */ + /* output encoded packet */ + + if (enc->packet_data.num) { + int64_t dts; + deque_pop_front(&enc->dts_list, &dts, sizeof(dts)); + + /* subtract bframe delay from dts for H.264/HEVC */ + if (enc->codec != CODEC_AV1) + dts -= enc->props.bf * packet->timebase_num; + + *received_packet = true; + packet->data = enc->packet_data.array; + packet->size = enc->packet_data.num; + packet->type = OBS_ENCODER_VIDEO; + packet->pts = enc->packet_pts; + packet->dts = dts; + packet->keyframe = enc->packet_keyframe; + } else { + *received_packet = false; + } + + return true; +} + +static void nvenc_soft_video_info(void *data, struct video_scale_info *info) +{ + struct nvenc_data *enc = data; + info->format = enc->in_format; +} + +static bool nvenc_extra_data(void *data, uint8_t **header, size_t *size) +{ + struct nvenc_data *enc = data; + + if (!enc->header) { + return false; + } + + *header = enc->header; + *size = enc->header_size; + return true; +} + +static bool nvenc_sei_data(void *data, uint8_t **sei, size_t *size) +{ + struct nvenc_data *enc = data; + + if (!enc->sei) { + return false; + } + + *sei = enc->sei; + *size = enc->sei_size; + return true; +} + +struct obs_encoder_info h264_nvenc_info = { + .id = "obs_nvenc_h264_tex", + .codec = "h264", + .type = OBS_ENCODER_VIDEO, + .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE | + OBS_ENCODER_CAP_ROI, + .get_name = h264_nvenc_get_name, + .create = h264_nvenc_create, + .destroy = nvenc_destroy, + .update = nvenc_update, +#ifdef _WIN32 + .encode_texture2 = d3d11_encode, +#else + .encode_texture2 = cuda_opengl_encode, +#endif + .get_defaults = h264_nvenc_defaults, + .get_properties = h264_nvenc_properties, + .get_extra_data = nvenc_extra_data, + .get_sei_data = nvenc_sei_data, +}; + +#ifdef ENABLE_HEVC +struct obs_encoder_info hevc_nvenc_info = { + .id = "obs_nvenc_hevc_tex", + .codec = "hevc", + .type = OBS_ENCODER_VIDEO, + .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE | + OBS_ENCODER_CAP_ROI, + .get_name = hevc_nvenc_get_name, + .create = hevc_nvenc_create, + .destroy = nvenc_destroy, + .update = nvenc_update, +#ifdef _WIN32 + .encode_texture2 = d3d11_encode, +#else + .encode_texture2 = cuda_opengl_encode, +#endif + .get_defaults = hevc_nvenc_defaults, + .get_properties = hevc_nvenc_properties, + .get_extra_data = nvenc_extra_data, + .get_sei_data = nvenc_sei_data, +}; +#endif + +struct obs_encoder_info av1_nvenc_info = { + .id = "obs_nvenc_av1_tex", + .codec = "av1", + .type = OBS_ENCODER_VIDEO, + .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE | + OBS_ENCODER_CAP_ROI, + .get_name = av1_nvenc_get_name, + .create = av1_nvenc_create, + .destroy = nvenc_destroy, + .update = nvenc_update, +#ifdef _WIN32 + .encode_texture2 = d3d11_encode, +#else + .encode_texture2 = cuda_opengl_encode, +#endif + .get_defaults = av1_nvenc_defaults, + .get_properties = av1_nvenc_properties, + .get_extra_data = nvenc_extra_data, +}; + +struct obs_encoder_info h264_nvenc_soft_info = { + .id = "obs_nvenc_h264_soft", + .codec = "h264", + .type = OBS_ENCODER_VIDEO, + .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI | + OBS_ENCODER_CAP_INTERNAL, + .get_name = h264_nvenc_soft_get_name, + .create = h264_nvenc_soft_create, + .destroy = nvenc_destroy, + .update = nvenc_update, + .encode = cuda_encode, + .get_defaults = h264_nvenc_defaults, + .get_properties = h264_nvenc_properties, + .get_extra_data = nvenc_extra_data, + .get_sei_data = nvenc_sei_data, + .get_video_info = nvenc_soft_video_info, +}; + +#ifdef ENABLE_HEVC +struct obs_encoder_info hevc_nvenc_soft_info = { + .id = "obs_nvenc_hevc_soft", + .codec = "hevc", + .type = OBS_ENCODER_VIDEO, + .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI | + OBS_ENCODER_CAP_INTERNAL, + .get_name = hevc_nvenc_soft_get_name, + .create = hevc_nvenc_soft_create, + .destroy = nvenc_destroy, + .update = nvenc_update, + .encode = cuda_encode, + .get_defaults = hevc_nvenc_defaults, + .get_properties = hevc_nvenc_properties, + .get_extra_data = nvenc_extra_data, + .get_sei_data = nvenc_sei_data, + .get_video_info = nvenc_soft_video_info, +}; +#endif + +struct obs_encoder_info av1_nvenc_soft_info = { + .id = "obs_nvenc_av1_soft", + .codec = "av1", + .type = OBS_ENCODER_VIDEO, + .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI | + OBS_ENCODER_CAP_INTERNAL, + .get_name = av1_nvenc_soft_get_name, + .create = av1_nvenc_soft_create, + .destroy = nvenc_destroy, + .update = nvenc_update, + .encode = cuda_encode, + .get_defaults = av1_nvenc_defaults, + .get_properties = av1_nvenc_properties, + .get_extra_data = nvenc_extra_data, + .get_video_info = nvenc_soft_video_info, +}; + +void register_encoders(void) +{ + obs_register_encoder(&h264_nvenc_info); + obs_register_encoder(&h264_nvenc_soft_info); +#ifdef ENABLE_HEVC + obs_register_encoder(&hevc_nvenc_info); + obs_register_encoder(&hevc_nvenc_soft_info); +#endif + if (is_codec_supported(CODEC_AV1)) { + obs_register_encoder(&av1_nvenc_info); + obs_register_encoder(&av1_nvenc_soft_info); + } +} diff --git a/plugins/obs-ffmpeg/obs-nvenc-test/CMakeLists.txt b/plugins/obs-nvenc/obs-nvenc-test/CMakeLists.txt similarity index 53% rename from plugins/obs-ffmpeg/obs-nvenc-test/CMakeLists.txt rename to plugins/obs-nvenc/obs-nvenc-test/CMakeLists.txt index 351a2e055b913b..db51d25e1ed068 100644 --- a/plugins/obs-ffmpeg/obs-nvenc-test/CMakeLists.txt +++ b/plugins/obs-nvenc/obs-nvenc-test/CMakeLists.txt @@ -1,14 +1,12 @@ cmake_minimum_required(VERSION 3.24...3.25) -legacy_check() - find_package(FFnvcodec 12 REQUIRED) add_executable(obs-nvenc-test) -target_sources(obs-nvenc-test PRIVATE obs-nvenc-test.c) -target_link_libraries(obs-nvenc-test OBS::obs-nvenc-version FFnvcodec::FFnvcodec d3d11 dxgi dxguid) +target_sources(obs-nvenc-test PRIVATE obs-nvenc-test.cpp) +target_link_libraries(obs-nvenc-test FFnvcodec::FFnvcodec) # cmake-format: off -set_target_properties_obs(obs-nvenc-test PROPERTIES FOLDER plugins/obs-ffmpeg) +set_target_properties_obs(obs-nvenc-test PROPERTIES FOLDER plugins/obs-nvenc) # cmake-format: on diff --git a/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp b/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp new file mode 100644 index 00000000000000..91cfbb9cf3a61a --- /dev/null +++ b/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp @@ -0,0 +1,532 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * Utility to check for NVENC support and capabilities. + * Will check all GPUs and return INI-formatted results based on highest capability of all devices. + */ + +using namespace std; +using namespace std::chrono_literals; + +static CudaFunctions *cu = nullptr; +static NvencFunctions *nvenc = nullptr; + +NV_ENCODE_API_FUNCTION_LIST nv = {NV_ENCODE_API_FUNCTION_LIST_VER}; +static constexpr uint32_t NVENC_CONFIGURED_VERSION = + (NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION; + +/* NVML stuff */ +#define NVML_SUCCESS 0 +#define NVML_DEVICE_UUID_V2_BUFFER_SIZE 96 +#define NVML_DEVICE_NAME_V2_BUFFER_SIZE 96 +#define NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE 80 + +typedef int nvmlReturn_t; +typedef struct nvmlDevice *nvmlDevice_t; + +typedef enum nvmlEncoderType { + NVML_ENCODER_QUERY_H264, + NVML_ENCODER_QUERY_HEVC, + NVML_ENCODER_QUERY_AV1, + NVML_ENCODER_QUERY_UNKNOWN +} nvmlEncoderType_t; + +typedef nvmlReturn_t (*NVML_GET_DRIVER_VER_FUNC)(char *, unsigned int); +typedef nvmlReturn_t (*NVML_INIT_V2)(); +typedef nvmlReturn_t (*NVML_SHUTDOWN)(); +typedef nvmlReturn_t (*NVML_GET_HANDLE_BY_BUS_ID)(const char *, nvmlDevice_t *); +typedef nvmlReturn_t (*NVML_GET_DEVICE_UUID)(nvmlDevice_t, char *, unsigned); +typedef nvmlReturn_t (*NVML_GET_DEVICE_NAME)(nvmlDevice_t, char *, unsigned); +typedef nvmlReturn_t (*NVML_GET_DEVICE_PCIE_GEN)(nvmlDevice_t, unsigned *); +typedef nvmlReturn_t (*NVML_GET_DEVICE_PCIE_WIDTH)(nvmlDevice_t, unsigned *); +typedef nvmlReturn_t (*NVML_GET_DEVICE_NAME)(nvmlDevice_t, char *, unsigned); +typedef nvmlReturn_t (*NVML_GET_ENCODER_SESSIONS)(nvmlDevice_t, unsigned *, + void *); +typedef nvmlReturn_t (*NVML_GET_ENCODER_CAPACITY)(nvmlDevice_t, nvmlEncoderType, + unsigned *); +typedef nvmlReturn_t (*NVML_GET_ENCODER_UTILISATION)(nvmlDevice_t, unsigned *, + unsigned *); +/* List of capabilities to be queried per codec */ +static const vector> capabilities = { + {NV_ENC_CAPS_NUM_MAX_BFRAMES, "bframes"}, + {NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE, "lossless"}, + {NV_ENC_CAPS_SUPPORT_LOOKAHEAD, "lookahead"}, + {NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ, "temporal_aq"}, + {NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE, "dynamic_bitrate"}, + {NV_ENC_CAPS_SUPPORT_10BIT_ENCODE, "10bit"}, + {NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE, "bref"}, + {NV_ENC_CAPS_NUM_ENCODER_ENGINES, "engines"}, + {NV_ENC_CAPS_SUPPORT_YUV444_ENCODE, "yuv_444"}, + {NV_ENC_CAPS_WIDTH_MAX, "max_width"}, + {NV_ENC_CAPS_HEIGHT_MAX, "max_height"}, +#if NVENCAPI_MAJOR_VERSION > 12 || NVENCAPI_MINOR_VERSION >= 2 + /* SDK 12.2+ features */ + {NV_ENC_CAPS_SUPPORT_TEMPORAL_FILTER, "temporal_filter"}, + {NV_ENC_CAPS_SUPPORT_LOOKAHEAD_LEVEL, "lookahead_level"}, +#endif +}; + +static const vector> codecs = { + {"h264", NV_ENC_CODEC_H264_GUID}, + {"hevc", NV_ENC_CODEC_HEVC_GUID}, + {"av1", NV_ENC_CODEC_AV1_GUID}}; + +typedef unordered_map> codec_caps_map; + +struct device_info { + string pci_id; + string nvml_uuid; + string cuda_uuid; + string name; + + uint32_t pcie_gen; + uint32_t pcie_width; + + uint32_t encoder_sessions; + uint32_t utilisation; + uint32_t sample_period; + uint32_t capacity_h264; + uint32_t capacity_hevc; + uint32_t capacity_av1; + + codec_caps_map caps; +}; + +/* RAII wrappers to make my life a little easier. */ +struct NVML { + NVML_INIT_V2 init; + NVML_SHUTDOWN shutdown; + NVML_GET_DRIVER_VER_FUNC getDriverVersion; + NVML_GET_HANDLE_BY_BUS_ID getDeviceHandleByPCIBusId; + NVML_GET_DEVICE_UUID getDeviceUUID; + NVML_GET_DEVICE_NAME getDeviceName; + NVML_GET_DEVICE_PCIE_GEN getDevicePCIeGen; + NVML_GET_DEVICE_PCIE_WIDTH getDevicePCIeWidth; + NVML_GET_ENCODER_SESSIONS getEncoderSessions; + NVML_GET_ENCODER_CAPACITY getEncoderCapacity; + NVML_GET_ENCODER_UTILISATION getEncoderUtilisation; + + NVML() = default; + + ~NVML() + { + if (initialised && shutdown) + shutdown(); + } + + bool Init() + { + if (!load_nvml_lib()) { + printf("reason=nvml_lib\n"); + return false; + } + + init = (NVML_INIT_V2)load_nvml_func("nvmlInit_v2"); + shutdown = (NVML_SHUTDOWN)load_nvml_func("nvmlShutdown"); + getDriverVersion = (NVML_GET_DRIVER_VER_FUNC)load_nvml_func( + "nvmlSystemGetDriverVersion"); + getDeviceHandleByPCIBusId = + (NVML_GET_HANDLE_BY_BUS_ID)load_nvml_func( + "nvmlDeviceGetHandleByPciBusId_v2"); + getDeviceUUID = (NVML_GET_DEVICE_UUID)load_nvml_func( + "nvmlDeviceGetUUID"); + getDeviceName = (NVML_GET_DEVICE_NAME)load_nvml_func( + "nvmlDeviceGetName"); + getDevicePCIeGen = (NVML_GET_DEVICE_PCIE_GEN)load_nvml_func( + "nvmlDeviceGetCurrPcieLinkGeneration"); + getDevicePCIeWidth = (NVML_GET_DEVICE_PCIE_WIDTH)load_nvml_func( + "nvmlDeviceGetCurrPcieLinkWidth"); + getEncoderSessions = (NVML_GET_ENCODER_SESSIONS)load_nvml_func( + "nvmlDeviceGetEncoderSessions"); + getEncoderCapacity = (NVML_GET_ENCODER_CAPACITY)load_nvml_func( + "nvmlDeviceGetEncoderCapacity"); + getEncoderUtilisation = + (NVML_GET_ENCODER_UTILISATION)load_nvml_func( + "nvmlDeviceGetEncoderUtilization"); + + if (!init || !shutdown || !getDriverVersion || + !getDeviceHandleByPCIBusId || !getDeviceUUID || + !getDeviceName || !getDevicePCIeGen || + !getDevicePCIeWidth || !getEncoderSessions || + !getEncoderCapacity || !getEncoderUtilisation) { + return false; + } + + nvmlReturn_t res = init(); + if (res != 0) { + printf("reason=nvml_init_%d\n", res); + return false; + } + + initialised = true; + return true; + } + +private: + bool initialised = false; + static inline void *nvml_lib = nullptr; + + bool load_nvml_lib() + { +#ifdef _WIN32 + nvml_lib = LoadLibraryA("nvml.dll"); +#else + nvml_lib = dlopen("libnvidia-ml.so.1", RTLD_LAZY); +#endif + return nvml_lib != nullptr; + } + + static void *load_nvml_func(const char *func) + { +#ifdef _WIN32 + void *func_ptr = + (void *)GetProcAddress((HMODULE)nvml_lib, func); +#else + void *func_ptr = dlsym(nvml_lib, func); +#endif + return func_ptr; + } +}; + +struct CUDACtx { + CUcontext ctx; + + CUDACtx() = default; + + ~CUDACtx() { cu->cuCtxDestroy(ctx); } + + bool Init(int adapter_idx) + { + CUdevice dev; + if (cu->cuDeviceGet(&dev, adapter_idx) != CUDA_SUCCESS) + return false; + + return cu->cuCtxCreate(&ctx, 0, dev) == CUDA_SUCCESS; + } + + string GetPCIBusId() + { + CUdevice dev; + string bus_id; + bus_id.resize(16); + + cu->cuCtxGetDevice(&dev); + cu->cuDeviceGetPCIBusId(bus_id.data(), (int)bus_id.capacity(), + dev); + return bus_id; + } + + string GetUUID() + { + CUdevice dev; + CUuuid uuid; + string uuid_str; + + cu->cuCtxGetDevice(&dev); + cu->cuDeviceGetUuid_v2(&uuid, dev); + + uuid_str.resize(32); + for (size_t idx = 0; idx < 16; idx++) { + sprintf(uuid_str.data() + idx * 2, "%02x", + uuid.bytes[idx] & 0xFF); + } + + return uuid_str; + } +}; + +struct NVSession { + void *ptr = nullptr; + + NVSession() = default; + + ~NVSession() { nv.nvEncDestroyEncoder(ptr); } + + bool OpenSession(const CUDACtx &ctx) + { + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = {}; + params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; + params.apiVersion = NVENCAPI_VERSION; + params.device = ctx.ctx; + params.deviceType = NV_ENC_DEVICE_TYPE_CUDA; + + return nv.nvEncOpenEncodeSessionEx(¶ms, &ptr) == + NV_ENC_SUCCESS; + } +}; + +static bool init_nvenc() +{ + if (nvenc_load_functions(&nvenc, nullptr)) { + printf("reason=nvenc_lib\n"); + return false; + } + + NVENCSTATUS res = nvenc->NvEncodeAPICreateInstance(&nv); + if (res != NV_ENC_SUCCESS) { + printf("reason=nvenc_init_%d\n", res); + return false; + } + + return true; +} + +static bool init_cuda() +{ + if (cuda_load_functions(&cu, nullptr)) { + printf("reason=cuda_lib\n"); + return false; + } + + CUresult res = cu->cuInit(0); + if (res != CUDA_SUCCESS) { + printf("reason=cuda_init_%d\n", res); + return false; + } + + return true; +} + +static bool get_adapter_caps(int adapter_idx, codec_caps_map &caps, + device_info &device_info, NVML &nvml) +{ + CUDACtx cudaCtx; + NVSession nvSession; + + if (!cudaCtx.Init(adapter_idx)) + return false; + + device_info.pci_id = cudaCtx.GetPCIBusId(); + device_info.cuda_uuid = cudaCtx.GetUUID(); + + nvmlDevice_t dev; + if (nvml.getDeviceHandleByPCIBusId(device_info.pci_id.data(), &dev) == + NVML_SUCCESS) { + char uuid[NVML_DEVICE_UUID_V2_BUFFER_SIZE]; + nvml.getDeviceUUID(dev, uuid, sizeof(uuid)); + device_info.nvml_uuid = uuid; + + char name[NVML_DEVICE_NAME_V2_BUFFER_SIZE]; + nvml.getDeviceName(dev, name, sizeof(name)); + device_info.name = name; + + nvml.getDevicePCIeGen(dev, &device_info.pcie_gen); + nvml.getDevicePCIeWidth(dev, &device_info.pcie_width); + nvml.getEncoderSessions(dev, &device_info.encoder_sessions, + nullptr); + nvml.getEncoderUtilisation(dev, &device_info.utilisation, + &device_info.sample_period); + nvml.getEncoderCapacity(dev, NVML_ENCODER_QUERY_H264, + &device_info.capacity_h264); + nvml.getEncoderCapacity(dev, NVML_ENCODER_QUERY_HEVC, + &device_info.capacity_hevc); + nvml.getEncoderCapacity(dev, NVML_ENCODER_QUERY_AV1, + &device_info.capacity_av1); + } + + if (!nvSession.OpenSession(cudaCtx)) + return false; + + uint32_t guid_count = 0; + if (nv.nvEncGetEncodeGUIDCount(nvSession.ptr, &guid_count) != + NV_ENC_SUCCESS) + return false; + + vector guids; + guids.resize(guid_count); + NVENCSTATUS stat = nv.nvEncGetEncodeGUIDs(nvSession.ptr, guids.data(), + guid_count, &guid_count); + if (stat != NV_ENC_SUCCESS) + return false; + + NV_ENC_CAPS_PARAM param = {NV_ENC_CAPS_PARAM_VER}; + + for (uint32_t i = 0; i < guid_count; i++) { + GUID *guid = &guids[i]; + + std::string codec_name = "unknown"; + for (const auto &[name, codec_guid] : codecs) { + if (memcmp(&codec_guid, guid, sizeof(GUID)) == 0) { + codec_name = name; + break; + } + } + + caps[codec_name]["codec_supported"] = 1; + device_info.caps[codec_name]["codec_supported"] = 1; + + for (const auto &[cap, name] : capabilities) { + int v; + param.capsToQuery = cap; + if (nv.nvEncGetEncodeCaps(nvSession.ptr, *guid, ¶m, + &v) != NV_ENC_SUCCESS) + continue; + + device_info.caps[codec_name][name] = v; + if (v > caps[codec_name][name]) + caps[codec_name][name] = v; + } + } + + return true; +} + +bool nvenc_checks(codec_caps_map &caps, vector &device_infos) +{ + /* NVENC API init */ + if (!init_nvenc()) + return false; + + /* CUDA init */ + if (!init_cuda()) + return false; + + NVML nvml; + if (!nvml.Init()) + return false; + + /* --------------------------------------------------------- */ + /* obtain adapter compatibility information */ + + uint32_t nvenc_ver; + int cuda_driver_ver; + int cuda_devices = 0; + int nvenc_devices = 0; + char driver_ver[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE]; + + /* NVIDIA driver version */ + if (nvml.getDriverVersion(driver_ver, sizeof(driver_ver)) == + NVML_SUCCESS) { + printf("driver_ver=%s\n", driver_ver); + } else { + // Treat this as a non-fatal failure + printf("driver_ver=0.0\n"); + } + + /* CUDA driver version and devices */ + if (cu->cuDriverGetVersion(&cuda_driver_ver) == CUDA_SUCCESS) { + printf("cuda_ver=%d.%d\n", cuda_driver_ver / 1000, + cuda_driver_ver % 1000); + } else { + printf("reason=no_cuda_version\n"); + return false; + } + + if (cu->cuDeviceGetCount(&cuda_devices) == CUDA_SUCCESS && + cuda_devices) { + printf("cuda_devices=%d\n", cuda_devices); + } else { + printf("reason=no_devices\n"); + return false; + } + + /* NVENC API version */ + if (nvenc->NvEncodeAPIGetMaxSupportedVersion(&nvenc_ver) == + NV_ENC_SUCCESS) { + printf("nvenc_ver=%d.%d\n", nvenc_ver >> 4, nvenc_ver & 0xf); + } else { + printf("reason=no_nvenc_version\n"); + return false; + } + + if (nvenc_ver < NVENC_CONFIGURED_VERSION) { + printf("reason=outdated_driver\n"); + return false; + } + + device_infos.resize(cuda_devices); + for (int idx = 0; idx < cuda_devices; idx++) { + if (get_adapter_caps(idx, caps, device_infos[idx], nvml)) + nvenc_devices++; + } + + printf("nvenc_devices=%d\n", nvenc_devices); + if (!nvenc_devices) { + printf("reason=no_supported_devices\n"); + return false; + } + + return true; +} + +int check_thread() +{ + int ret = 0; + codec_caps_map caps; + vector device_infos; + + caps["h264"]["codec_supported"] = 0; + caps["hevc"]["codec_supported"] = 0; + caps["av1"]["codec_supported"] = 0; + + printf("[general]\n"); + + if (nvenc_checks(caps, device_infos)) { + printf("nvenc_supported=true\n"); + } else { + printf("nvenc_supported=false\n"); + ret = 1; + } + + /* Global capabilities, based on highest supported across all devices */ + for (const auto &[codec, codec_caps] : caps) { + printf("\n[%s]\n", codec.c_str()); + + for (const auto &[name, value] : codec_caps) { + printf("%s=%d\n", name.c_str(), value); + } + } + + /* Per-device info (mostly for debugging) */ + for (size_t idx = 0; idx < device_infos.size(); idx++) { + const auto &info = device_infos[idx]; + + printf("\n[device.%zu]\n" + "pci_id=%s\n" + "nvml_uuid=%s\n" + "cuda_uuid=%s\n" + "name=%s\n" + "pcie_link_width=%d\n" + "pcie_link_gen=%d\n" + "encoder_sessions=%u\n" + "utilisation=%u\n" + "sample_period=%u\n" + "capacity_h264=%u\n" + "capacity_hevc=%u\n" + "capacity_av1=%u\n", + idx, info.pci_id.c_str(), info.nvml_uuid.c_str(), + info.cuda_uuid.c_str(), info.name.c_str(), + info.pcie_width, info.pcie_gen, info.encoder_sessions, + info.utilisation, info.sample_period, info.capacity_h264, + info.capacity_hevc, info.capacity_av1); + + for (const auto &[codec, codec_caps] : info.caps) { + printf("\n[device.%zu.%s]\n", idx, codec.c_str()); + + for (const auto &[name, value] : codec_caps) { + printf("%s=%d\n", name.c_str(), value); + } + } + } + + return ret; +} + +int main(int, char **) +{ + future f = async(launch::async, check_thread); + future_status status = f.wait_for(2.5s); + + if (status == future_status::timeout) + exit(1); + + return f.get(); +} diff --git a/plugins/obs-nvenc/obs-nvenc.c b/plugins/obs-nvenc/obs-nvenc.c new file mode 100644 index 00000000000000..3e3735e56ff1a2 --- /dev/null +++ b/plugins/obs-nvenc/obs-nvenc.c @@ -0,0 +1,30 @@ +#include + +#include "obs-nvenc.h" + +OBS_DECLARE_MODULE() +OBS_MODULE_USE_DEFAULT_LOCALE("obs-nvenc", "en-US") + +MODULE_EXPORT const char *obs_module_description(void) +{ + return "NVIDIA Encoder (NVENC) Plugin"; +} + +bool obs_module_load(void) +{ + if (!nvenc_supported()) { + blog(LOG_INFO, "NVENC not supported"); + return false; + } + + obs_nvenc_load(); + obs_cuda_load(); + + return true; +} + +void obs_module_unload(void) +{ + obs_cuda_unload(); + obs_nvenc_unload(); +} diff --git a/plugins/obs-nvenc/obs-nvenc.h b/plugins/obs-nvenc/obs-nvenc.h new file mode 100644 index 00000000000000..6ee96fae4b426d --- /dev/null +++ b/plugins/obs-nvenc/obs-nvenc.h @@ -0,0 +1,11 @@ +#pragma once + +#include + +bool nvenc_supported(void); + +void obs_nvenc_load(void); +void obs_nvenc_unload(void); + +void obs_cuda_load(void); +void obs_cuda_unload(void);