diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index 8ecea6f9fc9..9f01e9c414d 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -397,6 +397,11 @@ Comment: Multi-channel signed distance field generator Copyright: 2016-2022, Viktor Chlumsky License: MIT +Files: ./thirdparty/nvapi/nvapi_minimal.h +Comment: Stripped down version of "nvapi.h" from the NVIDIA NVAPI SDK +Copyright: 2019-2022, NVIDIA Corporation +License: Expat + Files: ./thirdparty/oidn/ Comment: Intel Open Image Denoise Copyright: 2009-2019, Intel Corporation diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml index ef47e4a3d0e..ffb61ab20d3 100644 --- a/doc/classes/ProjectSettings.xml +++ b/doc/classes/ProjectSettings.xml @@ -2324,6 +2324,10 @@ Maximum number of canvas items commands that can be drawn in a single viewport update. If more render commands are issued they will be ignored. Decreasing this limit may improve performance on bandwidth limited devices. Increase this limit if you find that not all objects are being drawn in a frame. + + If [code]true[/code], disables the threaded optimization feature from the NVIDIA drivers, which are known to cause stuttering in most OpenGL applications. + [b]Note:[/b] This setting only works on Windows, as threaded optimization is disabled by default on other platforms. + If [code]true[/code], renders [VoxelGI] and SDFGI ([member Environment.sdfgi_enabled]) buffers at halved resolution (e.g. 960×540 when the viewport size is 1920×1080). This improves performance significantly when VoxelGI or SDFGI is enabled, at the cost of artifacts that may be visible on polygon edges. The loss in quality becomes less noticeable as the viewport resolution increases. [LightmapGI] rendering is not affected by this setting. [b]Note:[/b] This property is only read when the project starts. To set half-resolution GI at run-time, call [method RenderingServer.gi_set_use_half_resolution] instead. diff --git a/main/main.cpp b/main/main.cpp index ef997e71a73..b759944ff03 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -1647,6 +1647,7 @@ Error Main::setup(const char *execpath, int argc, char *argv[], bool p_second_ph GLOBAL_DEF(PropertyInfo(Variant::STRING, "rendering/gl_compatibility/driver.android", PROPERTY_HINT_ENUM, driver_hints), default_driver); GLOBAL_DEF(PropertyInfo(Variant::STRING, "rendering/gl_compatibility/driver.ios", PROPERTY_HINT_ENUM, driver_hints), default_driver); GLOBAL_DEF(PropertyInfo(Variant::STRING, "rendering/gl_compatibility/driver.macos", PROPERTY_HINT_ENUM, driver_hints), default_driver); + GLOBAL_DEF_RST("rendering/gl_compatibility/nvidia_disable_threaded_optimization", true); } // Start with RenderingDevice-based backends. Should be included if any RD driver present. diff --git a/platform/windows/gl_manager_windows.cpp b/platform/windows/gl_manager_windows.cpp index 1494e3ed1bc..ad7c637a370 100644 --- a/platform/windows/gl_manager_windows.cpp +++ b/platform/windows/gl_manager_windows.cpp @@ -32,6 +32,11 @@ #if defined(WINDOWS_ENABLED) && defined(GLES3_ENABLED) +#include "core/config/project_settings.h" +#include "core/version.h" + +#include "thirdparty/nvapi/nvapi_minimal.h" + #include #include #include @@ -64,6 +69,171 @@ static String format_error_message(DWORD id) { return msg; } +const int OGL_THREAD_CONTROL_ID = 0x20C1221E; +const int OGL_THREAD_CONTROL_DISABLE = 0x00000002; +const int OGL_THREAD_CONTROL_ENABLE = 0x00000001; + +typedef int(__cdecl *NvAPI_Initialize_t)(); +typedef int(__cdecl *NvAPI_Unload_t)(); +typedef int(__cdecl *NvAPI_GetErrorMessage_t)(unsigned int, NvAPI_ShortString); +typedef int(__cdecl *NvAPI_DRS_CreateSession_t)(NvDRSSessionHandle *); +typedef int(__cdecl *NvAPI_DRS_DestroySession_t)(NvDRSSessionHandle); +typedef int(__cdecl *NvAPI_DRS_LoadSettings_t)(NvDRSSessionHandle); +typedef int(__cdecl *NvAPI_DRS_CreateProfile_t)(NvDRSSessionHandle, NVDRS_PROFILE *, NvDRSProfileHandle *); +typedef int(__cdecl *NvAPI_DRS_CreateApplication_t)(NvDRSSessionHandle, NvDRSProfileHandle, NVDRS_APPLICATION *); +typedef int(__cdecl *NvAPI_DRS_SaveSettings_t)(NvDRSSessionHandle); +typedef int(__cdecl *NvAPI_DRS_SetSetting_t)(NvDRSSessionHandle, NvDRSProfileHandle, NVDRS_SETTING *); +typedef int(__cdecl *NvAPI_DRS_FindProfileByName_t)(NvDRSSessionHandle, NvAPI_UnicodeString, NvDRSProfileHandle *); +NvAPI_GetErrorMessage_t NvAPI_GetErrorMessage__; + +static bool nvapi_err_check(char *msg, int status) { + if (status != 0) { + if (OS::get_singleton()->is_stdout_verbose()) { + NvAPI_ShortString err_desc = { 0 }; + NvAPI_GetErrorMessage__(status, err_desc); + print_verbose(vformat("%s: %s(code %d)", msg, err_desc, status)); + } + return false; + } + return true; +} + +// On windows we have to disable threaded optimization when using NVIDIA graphics cards +// to avoid stuttering, see https://github.com/microsoft/vscode-cpptools/issues/6592 +// also see https://github.com/Ryujinx/Ryujinx/blob/master/Ryujinx.Common/GraphicsDriver/NVThreadedOptimization.cs +void GLManager_Windows::_nvapi_disable_threaded_optimization() { + HMODULE nvapi = 0; +#ifdef _WIN64 + nvapi = LoadLibraryA("nvapi64.dll"); +#else + nvapi = LoadLibraryA("nvapi.dll"); +#endif + + if (nvapi == NULL) { + return; + } + + void *(__cdecl * NvAPI_QueryInterface)(unsigned int interface_id) = 0; + + NvAPI_QueryInterface = (void *(__cdecl *)(unsigned int))GetProcAddress(nvapi, "nvapi_QueryInterface"); + + if (NvAPI_QueryInterface == NULL) { + print_verbose("Error getting NVAPI NvAPI_QueryInterface"); + return; + } + + // Setup NVAPI function pointers + NvAPI_Initialize_t NvAPI_Initialize = (NvAPI_Initialize_t)NvAPI_QueryInterface(0x0150E828); + NvAPI_GetErrorMessage__ = (NvAPI_GetErrorMessage_t)NvAPI_QueryInterface(0x6C2D048C); + NvAPI_DRS_CreateSession_t NvAPI_DRS_CreateSession = (NvAPI_DRS_CreateSession_t)NvAPI_QueryInterface(0x0694D52E); + NvAPI_DRS_DestroySession_t NvAPI_DRS_DestroySession = (NvAPI_DRS_DestroySession_t)NvAPI_QueryInterface(0xDAD9CFF8); + NvAPI_Unload_t NvAPI_Unload = (NvAPI_Unload_t)NvAPI_QueryInterface(0xD22BDD7E); + NvAPI_DRS_LoadSettings_t NvAPI_DRS_LoadSettings = (NvAPI_DRS_LoadSettings_t)NvAPI_QueryInterface(0x375DBD6B); + NvAPI_DRS_CreateProfile_t NvAPI_DRS_CreateProfile = (NvAPI_DRS_CreateProfile_t)NvAPI_QueryInterface(0xCC176068); + NvAPI_DRS_CreateApplication_t NvAPI_DRS_CreateApplication = (NvAPI_DRS_CreateApplication_t)NvAPI_QueryInterface(0x4347A9DE); + NvAPI_DRS_SaveSettings_t NvAPI_DRS_SaveSettings = (NvAPI_DRS_SaveSettings_t)NvAPI_QueryInterface(0xFCBC7E14); + NvAPI_DRS_SetSetting_t NvAPI_DRS_SetSetting = (NvAPI_DRS_SetSetting_t)NvAPI_QueryInterface(0x577DD202); + NvAPI_DRS_FindProfileByName_t NvAPI_DRS_FindProfileByName = (NvAPI_DRS_FindProfileByName_t)NvAPI_QueryInterface(0x7E4A9A0B); + + if (!nvapi_err_check("NVAPI: Init failed", NvAPI_Initialize())) { + return; + } + + print_verbose("NVAPI: Init OK!"); + + NvDRSSessionHandle session_handle; + + if (!nvapi_err_check("NVAPI: Error creating DRS session", NvAPI_DRS_CreateSession(&session_handle))) { + NvAPI_Unload(); + return; + } + + if (!nvapi_err_check("NVAPI: Error loading DRS settings", NvAPI_DRS_LoadSettings(session_handle))) { + NvAPI_DRS_DestroySession(session_handle); + NvAPI_Unload(); + return; + } + + String app_executable_name = OS::get_singleton()->get_executable_path().get_file(); + String app_friendly_name = GLOBAL_GET("application/config/name"); + // We need a name anyways, so let's use the engine name if an application name is not available + // (this is used mostly by the Project Manager) + if (app_friendly_name.is_empty()) { + app_friendly_name = VERSION_NAME; + } + String app_profile_name = app_friendly_name + " Nvidia Profile"; + Char16String app_profile_name_u16 = app_profile_name.utf16(); + Char16String app_executable_name_u16 = app_executable_name.utf16(); + Char16String app_friendly_name_u16 = app_friendly_name.utf16(); + + NvDRSProfileHandle profile_handle = 0; + + int status = NvAPI_DRS_FindProfileByName(session_handle, (NvU16 *)(app_profile_name_u16.ptrw()), &profile_handle); + + if (status != 0) { + print_verbose("NVAPI: Profile not found, creating...."); + + NVDRS_PROFILE profile_info; + profile_info.version = NVDRS_PROFILE_VER; + profile_info.isPredefined = 0; + memcpy(profile_info.profileName, app_profile_name_u16.get_data(), sizeof(char16_t) * app_profile_name_u16.size()); + + if (!nvapi_err_check("NVAPI: Error creating profile", NvAPI_DRS_CreateProfile(session_handle, &profile_info, &profile_handle))) { + NvAPI_DRS_DestroySession(session_handle); + NvAPI_Unload(); + return; + } + + NVDRS_APPLICATION_V4 app; + app.version = NVDRS_APPLICATION_VER_V4; + app.isPredefined = 0; + app.isMetro = 1; + app.isCommandLine = 1; + memcpy(app.appName, app_executable_name_u16.get_data(), sizeof(char16_t) * app_executable_name_u16.size()); + memcpy(app.userFriendlyName, app_friendly_name_u16.get_data(), sizeof(char16_t) * app_friendly_name_u16.size()); + memcpy(app.launcher, L"", 1); + memcpy(app.fileInFolder, L"", 1); + + if (!nvapi_err_check("NVAPI: Error creating application", NvAPI_DRS_CreateApplication(session_handle, profile_handle, &app))) { + NvAPI_DRS_DestroySession(session_handle); + NvAPI_Unload(); + return; + } + } + + NVDRS_SETTING setting; + setting.version = NVDRS_SETTING_VER; + setting.settingId = OGL_THREAD_CONTROL_ID; + setting.settingType = NVDRS_DWORD_TYPE; + setting.settingLocation = NVDRS_CURRENT_PROFILE_LOCATION; + setting.isCurrentPredefined = 0; + setting.isPredefinedValid = 0; + int thread_control_val = OGL_THREAD_CONTROL_DISABLE; + if (!GLOBAL_GET("rendering/gl_compatibility/nvidia_disable_threaded_optimization")) { + thread_control_val = OGL_THREAD_CONTROL_ENABLE; + } + setting.u32CurrentValue = thread_control_val; + setting.u32PredefinedValue = thread_control_val; + + if (!nvapi_err_check("NVAPI: Error calling NvAPI_DRS_SetSetting", NvAPI_DRS_SetSetting(session_handle, profile_handle, &setting))) { + NvAPI_DRS_DestroySession(session_handle); + NvAPI_Unload(); + return; + } + + if (!nvapi_err_check("NVAPI: Error saving settings", NvAPI_DRS_SaveSettings(session_handle))) { + NvAPI_DRS_DestroySession(session_handle); + NvAPI_Unload(); + return; + } + if (thread_control_val == OGL_THREAD_CONTROL_DISABLE) { + print_verbose("NVAPI: Disabled OpenGL threaded optimization successfully"); + } else { + print_verbose("NVAPI: Enabled OpenGL threaded optimization successfully"); + } + NvAPI_DRS_DestroySession(session_handle); +} + int GLManager_Windows::_find_or_create_display(GLWindow &win) { // find display NYI, only 1 supported so far if (_displays.size()) { @@ -295,6 +465,7 @@ void GLManager_Windows::swap_buffers() { } Error GLManager_Windows::initialize() { + _nvapi_disable_threaded_optimization(); return OK; } diff --git a/platform/windows/gl_manager_windows.h b/platform/windows/gl_manager_windows.h index a0d4b28c6f0..482b00a1bac 100644 --- a/platform/windows/gl_manager_windows.h +++ b/platform/windows/gl_manager_windows.h @@ -89,6 +89,7 @@ private: ContextType context_type; private: + void _nvapi_disable_threaded_optimization(); int _find_or_create_display(GLWindow &win); Error _create_context(GLWindow &win, GLDisplay &gl_display); diff --git a/thirdparty/README.md b/thirdparty/README.md index fc4ba57d665..fccb18b1e34 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -539,6 +539,15 @@ Files extracted from the upstream source: - `LICENSE.txt` +## nvapi + +- Upstream: http://download.nvidia.com/XFree86/nvapi-open-source-sdk +- Version: R525 +- License: MIT + +- `nvapi_minimal.h` was created by using `nvapi.h` from upstream and removing unnecessary code. + + ## oidn - Upstream: https://github.com/OpenImageDenoise/oidn diff --git a/thirdparty/nvapi/nvapi_minimal.h b/thirdparty/nvapi/nvapi_minimal.h new file mode 100644 index 00000000000..c0836edbfaf --- /dev/null +++ b/thirdparty/nvapi/nvapi_minimal.h @@ -0,0 +1,175 @@ +#ifndef NVAPI_MINIMAL_H +#define NVAPI_MINIMAL_H +typedef uint32_t NvU32; +typedef uint16_t NvU16; +typedef uint8_t NvU8; + +#define MAKE_NVAPI_VERSION(typeName,ver) (NvU32)(sizeof(typeName) | ((ver)<<16)) + +#define NV_DECLARE_HANDLE(name) struct name##__ { int unused; }; typedef struct name##__ *name + +NV_DECLARE_HANDLE(NvDRSSessionHandle); +NV_DECLARE_HANDLE(NvDRSProfileHandle); + +#define NVAPI_UNICODE_STRING_MAX 2048 +#define NVAPI_BINARY_DATA_MAX 4096 +typedef NvU16 NvAPI_UnicodeString[NVAPI_UNICODE_STRING_MAX]; +typedef char NvAPI_ShortString[64]; + +#define NVAPI_SETTING_MAX_VALUES 100 + +typedef enum _NVDRS_SETTING_TYPE +{ + NVDRS_DWORD_TYPE, + NVDRS_BINARY_TYPE, + NVDRS_STRING_TYPE, + NVDRS_WSTRING_TYPE +} NVDRS_SETTING_TYPE; + +typedef enum _NVDRS_SETTING_LOCATION +{ + NVDRS_CURRENT_PROFILE_LOCATION, + NVDRS_GLOBAL_PROFILE_LOCATION, + NVDRS_BASE_PROFILE_LOCATION, + NVDRS_DEFAULT_PROFILE_LOCATION +} NVDRS_SETTING_LOCATION; + +typedef struct _NVDRS_GPU_SUPPORT +{ + NvU32 geforce : 1; + NvU32 quadro : 1; + NvU32 nvs : 1; + NvU32 reserved4 : 1; + NvU32 reserved5 : 1; + NvU32 reserved6 : 1; + NvU32 reserved7 : 1; + NvU32 reserved8 : 1; + NvU32 reserved9 : 1; + NvU32 reserved10 : 1; + NvU32 reserved11 : 1; + NvU32 reserved12 : 1; + NvU32 reserved13 : 1; + NvU32 reserved14 : 1; + NvU32 reserved15 : 1; + NvU32 reserved16 : 1; + NvU32 reserved17 : 1; + NvU32 reserved18 : 1; + NvU32 reserved19 : 1; + NvU32 reserved20 : 1; + NvU32 reserved21 : 1; + NvU32 reserved22 : 1; + NvU32 reserved23 : 1; + NvU32 reserved24 : 1; + NvU32 reserved25 : 1; + NvU32 reserved26 : 1; + NvU32 reserved27 : 1; + NvU32 reserved28 : 1; + NvU32 reserved29 : 1; + NvU32 reserved30 : 1; + NvU32 reserved31 : 1; + NvU32 reserved32 : 1; +} NVDRS_GPU_SUPPORT; + +//! Enum to decide on the datatype of setting value. +typedef struct _NVDRS_BINARY_SETTING +{ + NvU32 valueLength; //!< valueLength should always be in number of bytes. + NvU8 valueData[NVAPI_BINARY_DATA_MAX]; +} NVDRS_BINARY_SETTING; + +typedef struct _NVDRS_SETTING_VALUES +{ + NvU32 version; //!< Structure Version + NvU32 numSettingValues; //!< Total number of values available in a setting. + NVDRS_SETTING_TYPE settingType; //!< Type of setting value. + union //!< Setting can hold either DWORD or Binary value or string. Not mixed types. + { + NvU32 u32DefaultValue; //!< Accessing default DWORD value of this setting. + NVDRS_BINARY_SETTING binaryDefaultValue; //!< Accessing default Binary value of this setting. + //!< Must be allocated by caller with valueLength specifying buffer size, or only valueLength will be filled in. + NvAPI_UnicodeString wszDefaultValue; //!< Accessing default unicode string value of this setting. + }; + union //!< Setting values can be of either DWORD, Binary values or String type, + { //!< NOT mixed types. + NvU32 u32Value; //!< All possible DWORD values for a setting + NVDRS_BINARY_SETTING binaryValue; //!< All possible Binary values for a setting + NvAPI_UnicodeString wszValue; //!< Accessing current unicode string value of this setting. + }settingValues[NVAPI_SETTING_MAX_VALUES]; +} NVDRS_SETTING_VALUES; + +//! Macro for constructing the version field of ::_NVDRS_SETTING_VALUES +#define NVDRS_SETTING_VALUES_VER MAKE_NVAPI_VERSION(NVDRS_SETTING_VALUES,1) + +typedef struct _NVDRS_SETTING_V1 +{ + NvU32 version; //!< Structure Version + NvAPI_UnicodeString settingName; //!< String name of setting + NvU32 settingId; //!< 32 bit setting Id + NVDRS_SETTING_TYPE settingType; //!< Type of setting value. + NVDRS_SETTING_LOCATION settingLocation; //!< Describes where the value in CurrentValue comes from. + NvU32 isCurrentPredefined; //!< It is different than 0 if the currentValue is a predefined Value, + //!< 0 if the currentValue is a user value. + NvU32 isPredefinedValid; //!< It is different than 0 if the PredefinedValue union contains a valid value. + union //!< Setting can hold either DWORD or Binary value or string. Not mixed types. + { + NvU32 u32PredefinedValue; //!< Accessing default DWORD value of this setting. + NVDRS_BINARY_SETTING binaryPredefinedValue; //!< Accessing default Binary value of this setting. + //!< Must be allocated by caller with valueLength specifying buffer size, + //!< or only valueLength will be filled in. + NvAPI_UnicodeString wszPredefinedValue; //!< Accessing default unicode string value of this setting. + }; + union //!< Setting can hold either DWORD or Binary value or string. Not mixed types. + { + NvU32 u32CurrentValue; //!< Accessing current DWORD value of this setting. + NVDRS_BINARY_SETTING binaryCurrentValue; //!< Accessing current Binary value of this setting. + //!< Must be allocated by caller with valueLength specifying buffer size, + //!< or only valueLength will be filled in. + NvAPI_UnicodeString wszCurrentValue; //!< Accessing current unicode string value of this setting. + }; +} NVDRS_SETTING_V1; + +//! Macro for constructing the version field of ::_NVDRS_SETTING +#define NVDRS_SETTING_VER1 MAKE_NVAPI_VERSION(NVDRS_SETTING_V1, 1) + +typedef NVDRS_SETTING_V1 NVDRS_SETTING; +#define NVDRS_SETTING_VER NVDRS_SETTING_VER1 + +typedef struct _NVDRS_APPLICATION_V4 +{ + NvU32 version; //!< Structure Version + NvU32 isPredefined; //!< Is the application userdefined/predefined + NvAPI_UnicodeString appName; //!< String name of the Application + NvAPI_UnicodeString userFriendlyName; //!< UserFriendly name of the Application + NvAPI_UnicodeString launcher; //!< Indicates the name (if any) of the launcher that starts the Application + NvAPI_UnicodeString fileInFolder; //!< Select this application only if this file is found. + //!< When specifying multiple files, separate them using the ':' character. + NvU32 isMetro:1; //!< Windows 8 style app + NvU32 isCommandLine:1; //!< Command line parsing for the application name + NvU32 reserved:30; //!< Reserved. Should be 0. + NvAPI_UnicodeString commandLine; //!< If isCommandLine is set to 0 this must be an empty. If isCommandLine is set to 1 + //!< this contains application's command line as if it was returned by GetCommandLineW. +} NVDRS_APPLICATION_V4; + +#define NVDRS_APPLICATION_VER_V4 MAKE_NVAPI_VERSION(NVDRS_APPLICATION_V4,4) + +typedef NVDRS_APPLICATION_V4 NVDRS_APPLICATION; +#define NVDRS_APPLICATION_VER NVDRS_APPLICATION_VER_V4 + +typedef struct _NVDRS_PROFILE_V1 +{ + NvU32 version; //!< Structure Version + NvAPI_UnicodeString profileName; //!< String name of the Profile + NVDRS_GPU_SUPPORT gpuSupport; //!< This read-only flag indicates the profile support on either + //!< Quadro, or Geforce, or both. + NvU32 isPredefined; //!< Is the Profile user-defined, or predefined + NvU32 numOfApps; //!< Total number of applications that belong to this profile. Read-only + NvU32 numOfSettings; //!< Total number of settings applied for this Profile. Read-only +} NVDRS_PROFILE_V1; + +typedef NVDRS_PROFILE_V1 NVDRS_PROFILE; + +//! Macro for constructing the version field of ::NVDRS_PROFILE +#define NVDRS_PROFILE_VER1 MAKE_NVAPI_VERSION(NVDRS_PROFILE_V1,1) +#define NVDRS_PROFILE_VER NVDRS_PROFILE_VER1 + +#endif