Disable NVIDIA's threaded optimization on Windows

This commit is contained in:
Álex Román Núñez 2023-01-15 10:42:49 +01:00
parent ae896bbd85
commit 938a837056
7 changed files with 366 additions and 0 deletions

View File

@ -397,6 +397,11 @@ Comment: Multi-channel signed distance field generator
Copyright: 2016-2022, Viktor Chlumsky
License: MIT
Files: ./thirdparty/nvapi/nvapi_minimal.h
Comment: Stripped down version of "nvapi.h" from the NVIDIA NVAPI SDK
Copyright: 2019-2022, NVIDIA Corporation
License: Expat
Files: ./thirdparty/oidn/
Comment: Intel Open Image Denoise
Copyright: 2009-2019, Intel Corporation

View File

@ -2324,6 +2324,10 @@
<member name="rendering/gl_compatibility/item_buffer_size" type="int" setter="" getter="" default="16384">
Maximum number of canvas items commands that can be drawn in a single viewport update. If more render commands are issued they will be ignored. Decreasing this limit may improve performance on bandwidth limited devices. Increase this limit if you find that not all objects are being drawn in a frame.
</member>
<member name="rendering/gl_compatibility/nvidia_disable_threaded_optimization" type="bool" setter="" getter="" default="true">
If [code]true[/code], disables the threaded optimization feature from the NVIDIA drivers, which are known to cause stuttering in most OpenGL applications.
[b]Note:[/b] This setting only works on Windows, as threaded optimization is disabled by default on other platforms.
</member>
<member name="rendering/global_illumination/gi/use_half_resolution" type="bool" setter="" getter="" default="false">
If [code]true[/code], renders [VoxelGI] and SDFGI ([member Environment.sdfgi_enabled]) buffers at halved resolution (e.g. 960×540 when the viewport size is 1920×1080). This improves performance significantly when VoxelGI or SDFGI is enabled, at the cost of artifacts that may be visible on polygon edges. The loss in quality becomes less noticeable as the viewport resolution increases. [LightmapGI] rendering is not affected by this setting.
[b]Note:[/b] This property is only read when the project starts. To set half-resolution GI at run-time, call [method RenderingServer.gi_set_use_half_resolution] instead.

View File

@ -1647,6 +1647,7 @@ Error Main::setup(const char *execpath, int argc, char *argv[], bool p_second_ph
GLOBAL_DEF(PropertyInfo(Variant::STRING, "rendering/gl_compatibility/driver.android", PROPERTY_HINT_ENUM, driver_hints), default_driver);
GLOBAL_DEF(PropertyInfo(Variant::STRING, "rendering/gl_compatibility/driver.ios", PROPERTY_HINT_ENUM, driver_hints), default_driver);
GLOBAL_DEF(PropertyInfo(Variant::STRING, "rendering/gl_compatibility/driver.macos", PROPERTY_HINT_ENUM, driver_hints), default_driver);
GLOBAL_DEF_RST("rendering/gl_compatibility/nvidia_disable_threaded_optimization", true);
}
// Start with RenderingDevice-based backends. Should be included if any RD driver present.

View File

@ -32,6 +32,11 @@
#if defined(WINDOWS_ENABLED) && defined(GLES3_ENABLED)
#include "core/config/project_settings.h"
#include "core/version.h"
#include "thirdparty/nvapi/nvapi_minimal.h"
#include <dwmapi.h>
#include <stdio.h>
#include <stdlib.h>
@ -64,6 +69,171 @@ static String format_error_message(DWORD id) {
return msg;
}
const int OGL_THREAD_CONTROL_ID = 0x20C1221E;
const int OGL_THREAD_CONTROL_DISABLE = 0x00000002;
const int OGL_THREAD_CONTROL_ENABLE = 0x00000001;
typedef int(__cdecl *NvAPI_Initialize_t)();
typedef int(__cdecl *NvAPI_Unload_t)();
typedef int(__cdecl *NvAPI_GetErrorMessage_t)(unsigned int, NvAPI_ShortString);
typedef int(__cdecl *NvAPI_DRS_CreateSession_t)(NvDRSSessionHandle *);
typedef int(__cdecl *NvAPI_DRS_DestroySession_t)(NvDRSSessionHandle);
typedef int(__cdecl *NvAPI_DRS_LoadSettings_t)(NvDRSSessionHandle);
typedef int(__cdecl *NvAPI_DRS_CreateProfile_t)(NvDRSSessionHandle, NVDRS_PROFILE *, NvDRSProfileHandle *);
typedef int(__cdecl *NvAPI_DRS_CreateApplication_t)(NvDRSSessionHandle, NvDRSProfileHandle, NVDRS_APPLICATION *);
typedef int(__cdecl *NvAPI_DRS_SaveSettings_t)(NvDRSSessionHandle);
typedef int(__cdecl *NvAPI_DRS_SetSetting_t)(NvDRSSessionHandle, NvDRSProfileHandle, NVDRS_SETTING *);
typedef int(__cdecl *NvAPI_DRS_FindProfileByName_t)(NvDRSSessionHandle, NvAPI_UnicodeString, NvDRSProfileHandle *);
NvAPI_GetErrorMessage_t NvAPI_GetErrorMessage__;
static bool nvapi_err_check(char *msg, int status) {
if (status != 0) {
if (OS::get_singleton()->is_stdout_verbose()) {
NvAPI_ShortString err_desc = { 0 };
NvAPI_GetErrorMessage__(status, err_desc);
print_verbose(vformat("%s: %s(code %d)", msg, err_desc, status));
}
return false;
}
return true;
}
// On windows we have to disable threaded optimization when using NVIDIA graphics cards
// to avoid stuttering, see https://github.com/microsoft/vscode-cpptools/issues/6592
// also see https://github.com/Ryujinx/Ryujinx/blob/master/Ryujinx.Common/GraphicsDriver/NVThreadedOptimization.cs
void GLManager_Windows::_nvapi_disable_threaded_optimization() {
HMODULE nvapi = 0;
#ifdef _WIN64
nvapi = LoadLibraryA("nvapi64.dll");
#else
nvapi = LoadLibraryA("nvapi.dll");
#endif
if (nvapi == NULL) {
return;
}
void *(__cdecl * NvAPI_QueryInterface)(unsigned int interface_id) = 0;
NvAPI_QueryInterface = (void *(__cdecl *)(unsigned int))GetProcAddress(nvapi, "nvapi_QueryInterface");
if (NvAPI_QueryInterface == NULL) {
print_verbose("Error getting NVAPI NvAPI_QueryInterface");
return;
}
// Setup NVAPI function pointers
NvAPI_Initialize_t NvAPI_Initialize = (NvAPI_Initialize_t)NvAPI_QueryInterface(0x0150E828);
NvAPI_GetErrorMessage__ = (NvAPI_GetErrorMessage_t)NvAPI_QueryInterface(0x6C2D048C);
NvAPI_DRS_CreateSession_t NvAPI_DRS_CreateSession = (NvAPI_DRS_CreateSession_t)NvAPI_QueryInterface(0x0694D52E);
NvAPI_DRS_DestroySession_t NvAPI_DRS_DestroySession = (NvAPI_DRS_DestroySession_t)NvAPI_QueryInterface(0xDAD9CFF8);
NvAPI_Unload_t NvAPI_Unload = (NvAPI_Unload_t)NvAPI_QueryInterface(0xD22BDD7E);
NvAPI_DRS_LoadSettings_t NvAPI_DRS_LoadSettings = (NvAPI_DRS_LoadSettings_t)NvAPI_QueryInterface(0x375DBD6B);
NvAPI_DRS_CreateProfile_t NvAPI_DRS_CreateProfile = (NvAPI_DRS_CreateProfile_t)NvAPI_QueryInterface(0xCC176068);
NvAPI_DRS_CreateApplication_t NvAPI_DRS_CreateApplication = (NvAPI_DRS_CreateApplication_t)NvAPI_QueryInterface(0x4347A9DE);
NvAPI_DRS_SaveSettings_t NvAPI_DRS_SaveSettings = (NvAPI_DRS_SaveSettings_t)NvAPI_QueryInterface(0xFCBC7E14);
NvAPI_DRS_SetSetting_t NvAPI_DRS_SetSetting = (NvAPI_DRS_SetSetting_t)NvAPI_QueryInterface(0x577DD202);
NvAPI_DRS_FindProfileByName_t NvAPI_DRS_FindProfileByName = (NvAPI_DRS_FindProfileByName_t)NvAPI_QueryInterface(0x7E4A9A0B);
if (!nvapi_err_check("NVAPI: Init failed", NvAPI_Initialize())) {
return;
}
print_verbose("NVAPI: Init OK!");
NvDRSSessionHandle session_handle;
if (!nvapi_err_check("NVAPI: Error creating DRS session", NvAPI_DRS_CreateSession(&session_handle))) {
NvAPI_Unload();
return;
}
if (!nvapi_err_check("NVAPI: Error loading DRS settings", NvAPI_DRS_LoadSettings(session_handle))) {
NvAPI_DRS_DestroySession(session_handle);
NvAPI_Unload();
return;
}
String app_executable_name = OS::get_singleton()->get_executable_path().get_file();
String app_friendly_name = GLOBAL_GET("application/config/name");
// We need a name anyways, so let's use the engine name if an application name is not available
// (this is used mostly by the Project Manager)
if (app_friendly_name.is_empty()) {
app_friendly_name = VERSION_NAME;
}
String app_profile_name = app_friendly_name + " Nvidia Profile";
Char16String app_profile_name_u16 = app_profile_name.utf16();
Char16String app_executable_name_u16 = app_executable_name.utf16();
Char16String app_friendly_name_u16 = app_friendly_name.utf16();
NvDRSProfileHandle profile_handle = 0;
int status = NvAPI_DRS_FindProfileByName(session_handle, (NvU16 *)(app_profile_name_u16.ptrw()), &profile_handle);
if (status != 0) {
print_verbose("NVAPI: Profile not found, creating....");
NVDRS_PROFILE profile_info;
profile_info.version = NVDRS_PROFILE_VER;
profile_info.isPredefined = 0;
memcpy(profile_info.profileName, app_profile_name_u16.get_data(), sizeof(char16_t) * app_profile_name_u16.size());
if (!nvapi_err_check("NVAPI: Error creating profile", NvAPI_DRS_CreateProfile(session_handle, &profile_info, &profile_handle))) {
NvAPI_DRS_DestroySession(session_handle);
NvAPI_Unload();
return;
}
NVDRS_APPLICATION_V4 app;
app.version = NVDRS_APPLICATION_VER_V4;
app.isPredefined = 0;
app.isMetro = 1;
app.isCommandLine = 1;
memcpy(app.appName, app_executable_name_u16.get_data(), sizeof(char16_t) * app_executable_name_u16.size());
memcpy(app.userFriendlyName, app_friendly_name_u16.get_data(), sizeof(char16_t) * app_friendly_name_u16.size());
memcpy(app.launcher, L"", 1);
memcpy(app.fileInFolder, L"", 1);
if (!nvapi_err_check("NVAPI: Error creating application", NvAPI_DRS_CreateApplication(session_handle, profile_handle, &app))) {
NvAPI_DRS_DestroySession(session_handle);
NvAPI_Unload();
return;
}
}
NVDRS_SETTING setting;
setting.version = NVDRS_SETTING_VER;
setting.settingId = OGL_THREAD_CONTROL_ID;
setting.settingType = NVDRS_DWORD_TYPE;
setting.settingLocation = NVDRS_CURRENT_PROFILE_LOCATION;
setting.isCurrentPredefined = 0;
setting.isPredefinedValid = 0;
int thread_control_val = OGL_THREAD_CONTROL_DISABLE;
if (!GLOBAL_GET("rendering/gl_compatibility/nvidia_disable_threaded_optimization")) {
thread_control_val = OGL_THREAD_CONTROL_ENABLE;
}
setting.u32CurrentValue = thread_control_val;
setting.u32PredefinedValue = thread_control_val;
if (!nvapi_err_check("NVAPI: Error calling NvAPI_DRS_SetSetting", NvAPI_DRS_SetSetting(session_handle, profile_handle, &setting))) {
NvAPI_DRS_DestroySession(session_handle);
NvAPI_Unload();
return;
}
if (!nvapi_err_check("NVAPI: Error saving settings", NvAPI_DRS_SaveSettings(session_handle))) {
NvAPI_DRS_DestroySession(session_handle);
NvAPI_Unload();
return;
}
if (thread_control_val == OGL_THREAD_CONTROL_DISABLE) {
print_verbose("NVAPI: Disabled OpenGL threaded optimization successfully");
} else {
print_verbose("NVAPI: Enabled OpenGL threaded optimization successfully");
}
NvAPI_DRS_DestroySession(session_handle);
}
int GLManager_Windows::_find_or_create_display(GLWindow &win) {
// find display NYI, only 1 supported so far
if (_displays.size()) {
@ -295,6 +465,7 @@ void GLManager_Windows::swap_buffers() {
}
Error GLManager_Windows::initialize() {
_nvapi_disable_threaded_optimization();
return OK;
}

View File

@ -89,6 +89,7 @@ private:
ContextType context_type;
private:
void _nvapi_disable_threaded_optimization();
int _find_or_create_display(GLWindow &win);
Error _create_context(GLWindow &win, GLDisplay &gl_display);

View File

@ -539,6 +539,15 @@ Files extracted from the upstream source:
- `LICENSE.txt`
## nvapi
- Upstream: http://download.nvidia.com/XFree86/nvapi-open-source-sdk
- Version: R525
- License: MIT
- `nvapi_minimal.h` was created by using `nvapi.h` from upstream and removing unnecessary code.
## oidn
- Upstream: https://github.com/OpenImageDenoise/oidn

175
thirdparty/nvapi/nvapi_minimal.h vendored Normal file
View File

@ -0,0 +1,175 @@
#ifndef NVAPI_MINIMAL_H
#define NVAPI_MINIMAL_H
typedef uint32_t NvU32;
typedef uint16_t NvU16;
typedef uint8_t NvU8;
#define MAKE_NVAPI_VERSION(typeName,ver) (NvU32)(sizeof(typeName) | ((ver)<<16))
#define NV_DECLARE_HANDLE(name) struct name##__ { int unused; }; typedef struct name##__ *name
NV_DECLARE_HANDLE(NvDRSSessionHandle);
NV_DECLARE_HANDLE(NvDRSProfileHandle);
#define NVAPI_UNICODE_STRING_MAX 2048
#define NVAPI_BINARY_DATA_MAX 4096
typedef NvU16 NvAPI_UnicodeString[NVAPI_UNICODE_STRING_MAX];
typedef char NvAPI_ShortString[64];
#define NVAPI_SETTING_MAX_VALUES 100
typedef enum _NVDRS_SETTING_TYPE
{
NVDRS_DWORD_TYPE,
NVDRS_BINARY_TYPE,
NVDRS_STRING_TYPE,
NVDRS_WSTRING_TYPE
} NVDRS_SETTING_TYPE;
typedef enum _NVDRS_SETTING_LOCATION
{
NVDRS_CURRENT_PROFILE_LOCATION,
NVDRS_GLOBAL_PROFILE_LOCATION,
NVDRS_BASE_PROFILE_LOCATION,
NVDRS_DEFAULT_PROFILE_LOCATION
} NVDRS_SETTING_LOCATION;
typedef struct _NVDRS_GPU_SUPPORT
{
NvU32 geforce : 1;
NvU32 quadro : 1;
NvU32 nvs : 1;
NvU32 reserved4 : 1;
NvU32 reserved5 : 1;
NvU32 reserved6 : 1;
NvU32 reserved7 : 1;
NvU32 reserved8 : 1;
NvU32 reserved9 : 1;
NvU32 reserved10 : 1;
NvU32 reserved11 : 1;
NvU32 reserved12 : 1;
NvU32 reserved13 : 1;
NvU32 reserved14 : 1;
NvU32 reserved15 : 1;
NvU32 reserved16 : 1;
NvU32 reserved17 : 1;
NvU32 reserved18 : 1;
NvU32 reserved19 : 1;
NvU32 reserved20 : 1;
NvU32 reserved21 : 1;
NvU32 reserved22 : 1;
NvU32 reserved23 : 1;
NvU32 reserved24 : 1;
NvU32 reserved25 : 1;
NvU32 reserved26 : 1;
NvU32 reserved27 : 1;
NvU32 reserved28 : 1;
NvU32 reserved29 : 1;
NvU32 reserved30 : 1;
NvU32 reserved31 : 1;
NvU32 reserved32 : 1;
} NVDRS_GPU_SUPPORT;
//! Enum to decide on the datatype of setting value.
typedef struct _NVDRS_BINARY_SETTING
{
NvU32 valueLength; //!< valueLength should always be in number of bytes.
NvU8 valueData[NVAPI_BINARY_DATA_MAX];
} NVDRS_BINARY_SETTING;
typedef struct _NVDRS_SETTING_VALUES
{
NvU32 version; //!< Structure Version
NvU32 numSettingValues; //!< Total number of values available in a setting.
NVDRS_SETTING_TYPE settingType; //!< Type of setting value.
union //!< Setting can hold either DWORD or Binary value or string. Not mixed types.
{
NvU32 u32DefaultValue; //!< Accessing default DWORD value of this setting.
NVDRS_BINARY_SETTING binaryDefaultValue; //!< Accessing default Binary value of this setting.
//!< Must be allocated by caller with valueLength specifying buffer size, or only valueLength will be filled in.
NvAPI_UnicodeString wszDefaultValue; //!< Accessing default unicode string value of this setting.
};
union //!< Setting values can be of either DWORD, Binary values or String type,
{ //!< NOT mixed types.
NvU32 u32Value; //!< All possible DWORD values for a setting
NVDRS_BINARY_SETTING binaryValue; //!< All possible Binary values for a setting
NvAPI_UnicodeString wszValue; //!< Accessing current unicode string value of this setting.
}settingValues[NVAPI_SETTING_MAX_VALUES];
} NVDRS_SETTING_VALUES;
//! Macro for constructing the version field of ::_NVDRS_SETTING_VALUES
#define NVDRS_SETTING_VALUES_VER MAKE_NVAPI_VERSION(NVDRS_SETTING_VALUES,1)
typedef struct _NVDRS_SETTING_V1
{
NvU32 version; //!< Structure Version
NvAPI_UnicodeString settingName; //!< String name of setting
NvU32 settingId; //!< 32 bit setting Id
NVDRS_SETTING_TYPE settingType; //!< Type of setting value.
NVDRS_SETTING_LOCATION settingLocation; //!< Describes where the value in CurrentValue comes from.
NvU32 isCurrentPredefined; //!< It is different than 0 if the currentValue is a predefined Value,
//!< 0 if the currentValue is a user value.
NvU32 isPredefinedValid; //!< It is different than 0 if the PredefinedValue union contains a valid value.
union //!< Setting can hold either DWORD or Binary value or string. Not mixed types.
{
NvU32 u32PredefinedValue; //!< Accessing default DWORD value of this setting.
NVDRS_BINARY_SETTING binaryPredefinedValue; //!< Accessing default Binary value of this setting.
//!< Must be allocated by caller with valueLength specifying buffer size,
//!< or only valueLength will be filled in.
NvAPI_UnicodeString wszPredefinedValue; //!< Accessing default unicode string value of this setting.
};
union //!< Setting can hold either DWORD or Binary value or string. Not mixed types.
{
NvU32 u32CurrentValue; //!< Accessing current DWORD value of this setting.
NVDRS_BINARY_SETTING binaryCurrentValue; //!< Accessing current Binary value of this setting.
//!< Must be allocated by caller with valueLength specifying buffer size,
//!< or only valueLength will be filled in.
NvAPI_UnicodeString wszCurrentValue; //!< Accessing current unicode string value of this setting.
};
} NVDRS_SETTING_V1;
//! Macro for constructing the version field of ::_NVDRS_SETTING
#define NVDRS_SETTING_VER1 MAKE_NVAPI_VERSION(NVDRS_SETTING_V1, 1)
typedef NVDRS_SETTING_V1 NVDRS_SETTING;
#define NVDRS_SETTING_VER NVDRS_SETTING_VER1
typedef struct _NVDRS_APPLICATION_V4
{
NvU32 version; //!< Structure Version
NvU32 isPredefined; //!< Is the application userdefined/predefined
NvAPI_UnicodeString appName; //!< String name of the Application
NvAPI_UnicodeString userFriendlyName; //!< UserFriendly name of the Application
NvAPI_UnicodeString launcher; //!< Indicates the name (if any) of the launcher that starts the Application
NvAPI_UnicodeString fileInFolder; //!< Select this application only if this file is found.
//!< When specifying multiple files, separate them using the ':' character.
NvU32 isMetro:1; //!< Windows 8 style app
NvU32 isCommandLine:1; //!< Command line parsing for the application name
NvU32 reserved:30; //!< Reserved. Should be 0.
NvAPI_UnicodeString commandLine; //!< If isCommandLine is set to 0 this must be an empty. If isCommandLine is set to 1
//!< this contains application's command line as if it was returned by GetCommandLineW.
} NVDRS_APPLICATION_V4;
#define NVDRS_APPLICATION_VER_V4 MAKE_NVAPI_VERSION(NVDRS_APPLICATION_V4,4)
typedef NVDRS_APPLICATION_V4 NVDRS_APPLICATION;
#define NVDRS_APPLICATION_VER NVDRS_APPLICATION_VER_V4
typedef struct _NVDRS_PROFILE_V1
{
NvU32 version; //!< Structure Version
NvAPI_UnicodeString profileName; //!< String name of the Profile
NVDRS_GPU_SUPPORT gpuSupport; //!< This read-only flag indicates the profile support on either
//!< Quadro, or Geforce, or both.
NvU32 isPredefined; //!< Is the Profile user-defined, or predefined
NvU32 numOfApps; //!< Total number of applications that belong to this profile. Read-only
NvU32 numOfSettings; //!< Total number of settings applied for this Profile. Read-only
} NVDRS_PROFILE_V1;
typedef NVDRS_PROFILE_V1 NVDRS_PROFILE;
//! Macro for constructing the version field of ::NVDRS_PROFILE
#define NVDRS_PROFILE_VER1 MAKE_NVAPI_VERSION(NVDRS_PROFILE_V1,1)
#define NVDRS_PROFILE_VER NVDRS_PROFILE_VER1
#endif