Disable extra memory tracking by default

PR #90993 added several debugging utilities.

Among them, advanced memory tracking through the use of custom
allocators and VK_EXT_device_memory_report.

However as issue #95967 reveals, it is dangerous to leave it on by
default because drivers (or even the Vulkan loader) can too easily
accidentally break custom allocators by allocating memory through std
malloc but then request us to deallocate it (or viceversa).

This PR fixes the following problems:
 - Adds --extra-gpu-memory-tracking cmd line argument
 - Adds missing enum entries to
RenderingContextDriverVulkan::VkTrackedObjectType
 - Adds RenderingDevice::get_driver_and_device_memory_report
    - GDScript users can easily check via print(
RenderingServer.get_rendering_device().get_driver_and_device_memory_report()
)
- Uses get_driver_and_device_memory_report on device lost for appending
further info.

Fixes #95967
This commit is contained in:
Matias N. Goldberg 2024-08-24 17:39:46 -03:00
parent e3550cb20f
commit 59d0422dcd
11 changed files with 109 additions and 13 deletions

View File

@ -263,6 +263,10 @@ bool Engine::is_generate_spirv_debug_info_enabled() const {
return generate_spirv_debug_info; return generate_spirv_debug_info;
} }
bool Engine::is_extra_gpu_memory_tracking_enabled() const {
return extra_gpu_memory_tracking;
}
void Engine::set_print_error_messages(bool p_enabled) { void Engine::set_print_error_messages(bool p_enabled) {
CoreGlobals::print_error_enabled = p_enabled; CoreGlobals::print_error_enabled = p_enabled;
} }

View File

@ -72,6 +72,7 @@ private:
bool abort_on_gpu_errors = false; bool abort_on_gpu_errors = false;
bool use_validation_layers = false; bool use_validation_layers = false;
bool generate_spirv_debug_info = false; bool generate_spirv_debug_info = false;
bool extra_gpu_memory_tracking = false;
int32_t gpu_idx = -1; int32_t gpu_idx = -1;
uint64_t _process_frames = 0; uint64_t _process_frames = 0;
@ -181,6 +182,7 @@ public:
bool is_abort_on_gpu_errors_enabled() const; bool is_abort_on_gpu_errors_enabled() const;
bool is_validation_layers_enabled() const; bool is_validation_layers_enabled() const;
bool is_generate_spirv_debug_info_enabled() const; bool is_generate_spirv_debug_info_enabled() const;
bool is_extra_gpu_memory_tracking_enabled() const;
int32_t get_gpu_index() const; int32_t get_gpu_index() const;
void increment_frames_drawn(); void increment_frames_drawn();

View File

@ -497,7 +497,7 @@
<return type="int" /> <return type="int" />
<description> <description>
Returns how many allocations the GPU has performed for internal driver structures. Returns how many allocations the GPU has performed for internal driver structures.
This is only used by Vulkan in Debug builds and can return 0 when this information is not tracked or unknown. This is only used by Vulkan in debug builds and can return 0 when this information is not tracked or unknown.
</description> </description>
</method> </method>
<method name="get_device_allocs_by_object_type" qualifiers="const"> <method name="get_device_allocs_by_object_type" qualifiers="const">
@ -506,7 +506,7 @@
<description> <description>
Same as [method get_device_allocation_count] but filtered for a given object type. Same as [method get_device_allocation_count] but filtered for a given object type.
The type argument must be in range [code][0; get_tracked_object_type_count - 1][/code]. If [method get_tracked_object_type_count] is 0, then type argument is ignored and always returns 0. The type argument must be in range [code][0; get_tracked_object_type_count - 1][/code]. If [method get_tracked_object_type_count] is 0, then type argument is ignored and always returns 0.
This is only used by Vulkan in Debug builds and can return 0 when this information is not tracked or unknown. This is only used by Vulkan in debug builds and can return 0 when this information is not tracked or unknown.
</description> </description>
</method> </method>
<method name="get_device_memory_by_object_type" qualifiers="const"> <method name="get_device_memory_by_object_type" qualifiers="const">
@ -515,7 +515,7 @@
<description> <description>
Same as [method get_device_total_memory] but filtered for a given object type. Same as [method get_device_total_memory] but filtered for a given object type.
The type argument must be in range [code][0; get_tracked_object_type_count - 1][/code]. If [method get_tracked_object_type_count] is 0, then type argument is ignored and always returns 0. The type argument must be in range [code][0; get_tracked_object_type_count - 1][/code]. If [method get_tracked_object_type_count] is 0, then type argument is ignored and always returns 0.
This is only used by Vulkan in Debug builds and can return 0 when this information is not tracked or unknown. This is only used by Vulkan in debug builds and can return 0 when this information is not tracked or unknown.
</description> </description>
</method> </method>
<method name="get_device_name" qualifiers="const"> <method name="get_device_name" qualifiers="const">
@ -534,7 +534,7 @@
<return type="int" /> <return type="int" />
<description> <description>
Returns how much bytes the GPU is using. Returns how much bytes the GPU is using.
This is only used by Vulkan in Debug builds and can return 0 when this information is not tracked or unknown. This is only used by Vulkan in debug builds and can return 0 when this information is not tracked or unknown.
</description> </description>
</method> </method>
<method name="get_device_vendor_name" qualifiers="const"> <method name="get_device_vendor_name" qualifiers="const">
@ -547,7 +547,7 @@
<return type="int" /> <return type="int" />
<description> <description>
Returns how many allocations the GPU driver has performed for internal driver structures. Returns how many allocations the GPU driver has performed for internal driver structures.
This is only used by Vulkan in Debug builds and can return 0 when this information is not tracked or unknown. This is only used by Vulkan in debug builds and can return 0 when this information is not tracked or unknown.
</description> </description>
</method> </method>
<method name="get_driver_allocs_by_object_type" qualifiers="const"> <method name="get_driver_allocs_by_object_type" qualifiers="const">
@ -556,7 +556,24 @@
<description> <description>
Same as [method get_driver_allocation_count] but filtered for a given object type. Same as [method get_driver_allocation_count] but filtered for a given object type.
The type argument must be in range [code][0; get_tracked_object_type_count - 1][/code]. If [method get_tracked_object_type_count] is 0, then type argument is ignored and always returns 0. The type argument must be in range [code][0; get_tracked_object_type_count - 1][/code]. If [method get_tracked_object_type_count] is 0, then type argument is ignored and always returns 0.
This is only used by Vulkan in Debug builds and can return 0 when this information is not tracked or unknown. This is only used by Vulkan in debug builds and can return 0 when this information is not tracked or unknown.
</description>
</method>
<method name="get_driver_and_device_memory_report" qualifiers="const">
<return type="String" />
<description>
Returns string report in CSV format using the following methods:
- [method get_tracked_object_name]
- [method get_tracked_object_type_count]
- [method get_driver_total_memory]
- [method get_driver_allocation_count]
- [method get_driver_memory_by_object_type]
- [method get_driver_allocs_by_object_type]
- [method get_device_total_memory]
- [method get_device_allocation_count]
- [method get_device_memory_by_object_type]
- [method get_device_allocs_by_object_type]
This is only used by Vulkan in debug builds. Godot must also be started with the [code]--extra-gpu-memory-tracking[/code] [url=$DOCS_URL/tutorials/editor/command_line_tutorial.html]command line argument[/url].
</description> </description>
</method> </method>
<method name="get_driver_memory_by_object_type" qualifiers="const"> <method name="get_driver_memory_by_object_type" qualifiers="const">
@ -565,7 +582,7 @@
<description> <description>
Same as [method get_driver_total_memory] but filtered for a given object type. Same as [method get_driver_total_memory] but filtered for a given object type.
The type argument must be in range [code][0; get_tracked_object_type_count - 1][/code]. If [method get_tracked_object_type_count] is 0, then type argument is ignored and always returns 0. The type argument must be in range [code][0; get_tracked_object_type_count - 1][/code]. If [method get_tracked_object_type_count] is 0, then type argument is ignored and always returns 0.
This is only used by Vulkan in Debug builds and can return 0 when this information is not tracked or unknown. This is only used by Vulkan in debug builds and can return 0 when this information is not tracked or unknown.
</description> </description>
</method> </method>
<method name="get_driver_resource"> <method name="get_driver_resource">
@ -581,7 +598,7 @@
<return type="int" /> <return type="int" />
<description> <description>
Returns how much bytes the GPU driver is using for internal driver structures. Returns how much bytes the GPU driver is using for internal driver structures.
This is only used by Vulkan in Debug builds and can return 0 when this information is not tracked or unknown. This is only used by Vulkan in debug builds and can return 0 when this information is not tracked or unknown.
</description> </description>
</method> </method>
<method name="get_frame_delay" qualifiers="const"> <method name="get_frame_delay" qualifiers="const">
@ -614,14 +631,14 @@
- SWAPCHAIN_KHR - SWAPCHAIN_KHR
- COMMAND_POOL - COMMAND_POOL
Thus if e.g. [code]get_tracked_object_name(5)[/code] returns "COMMAND_POOL", then [code]get_device_memory_by_object_type(5)[/code] returns the bytes used by the GPU for command pools. Thus if e.g. [code]get_tracked_object_name(5)[/code] returns "COMMAND_POOL", then [code]get_device_memory_by_object_type(5)[/code] returns the bytes used by the GPU for command pools.
This is only used by Vulkan in Debug builds. This is only used by Vulkan in debug builds. Godot must also be started with the [code]--extra-gpu-memory-tracking[/code] [url=$DOCS_URL/tutorials/editor/command_line_tutorial.html]command line argument[/url].
</description> </description>
</method> </method>
<method name="get_tracked_object_type_count" qualifiers="const"> <method name="get_tracked_object_type_count" qualifiers="const">
<return type="int" /> <return type="int" />
<description> <description>
Returns how many types of trackable objects are. Returns how many types of trackable objects are.
This is only used by Vulkan in Debug builds. This is only used by Vulkan in debug builds. Godot must also be started with the [code]--extra-gpu-memory-tracking[/code] [url=$DOCS_URL/tutorials/editor/command_line_tutorial.html]command line argument[/url].
</description> </description>
</method> </method>
<method name="index_array_create"> <method name="index_array_create">

View File

@ -106,7 +106,7 @@ const char *RenderingContextDriverVulkan::get_tracked_object_name(uint32_t p_typ
return vkTrackedObjectTypeNames[p_type_index]; return vkTrackedObjectTypeNames[p_type_index];
#else #else
return "VK_TRACK_DRIVER_* disabled at build time"; return "VK_TRACK_*_MEMORY disabled at build time";
#endif #endif
} }
@ -120,6 +120,8 @@ uint64_t RenderingContextDriverVulkan::get_tracked_object_type_count() const {
RenderingContextDriverVulkan::VkTrackedObjectType vk_object_to_tracked_object(VkObjectType p_type) { RenderingContextDriverVulkan::VkTrackedObjectType vk_object_to_tracked_object(VkObjectType p_type) {
if (p_type > VK_OBJECT_TYPE_COMMAND_POOL && p_type != (VkObjectType)RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_VMA) { if (p_type > VK_OBJECT_TYPE_COMMAND_POOL && p_type != (VkObjectType)RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_VMA) {
switch (p_type) { switch (p_type) {
case VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE:
return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_DESCRIPTOR_UPDATE_TEMPLATE_KHR;
case VK_OBJECT_TYPE_SURFACE_KHR: case VK_OBJECT_TYPE_SURFACE_KHR:
return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_SURFACE; return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_SURFACE;
case VK_OBJECT_TYPE_SWAPCHAIN_KHR: case VK_OBJECT_TYPE_SWAPCHAIN_KHR:
@ -128,6 +130,9 @@ RenderingContextDriverVulkan::VkTrackedObjectType vk_object_to_tracked_object(Vk
return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT; return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT;
case VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT: case VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT:
return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT; return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT;
case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR:
case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV:
return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_ACCELERATION_STRUCTURE;
default: default:
_err_print_error(FUNCTION_STR, __FILE__, __LINE__, "Unknown VkObjectType enum value " + itos((uint32_t)p_type) + ".Please add it to VkTrackedObjectType, switch statement in " _err_print_error(FUNCTION_STR, __FILE__, __LINE__, "Unknown VkObjectType enum value " + itos((uint32_t)p_type) + ".Please add it to VkTrackedObjectType, switch statement in "
"vk_object_to_tracked_object and get_tracked_object_name.", "vk_object_to_tracked_object and get_tracked_object_name.",
@ -229,6 +234,9 @@ VkAllocationCallbacks *RenderingContextDriverVulkan::get_allocation_callbacks(Vk
#if !defined(VK_TRACK_DRIVER_MEMORY) #if !defined(VK_TRACK_DRIVER_MEMORY)
return nullptr; return nullptr;
#else #else
if (!Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) {
return nullptr;
}
#ifdef _MSC_VER #ifdef _MSC_VER
#define LAMBDA_VK_CALL_CONV #define LAMBDA_VK_CALL_CONV

View File

@ -170,10 +170,12 @@ public:
#if defined(VK_TRACK_DRIVER_MEMORY) || defined(VK_TRACK_DEVICE_MEMORY) #if defined(VK_TRACK_DRIVER_MEMORY) || defined(VK_TRACK_DEVICE_MEMORY)
enum VkTrackedObjectType{ enum VkTrackedObjectType{
VK_TRACKED_OBJECT_TYPE_SURFACE = VK_OBJECT_TYPE_COMMAND_POOL + 1, VK_TRACKED_OBJECT_DESCRIPTOR_UPDATE_TEMPLATE_KHR = VK_OBJECT_TYPE_COMMAND_POOL + 1,
VK_TRACKED_OBJECT_TYPE_SURFACE,
VK_TRACKED_OBJECT_TYPE_SWAPCHAIN, VK_TRACKED_OBJECT_TYPE_SWAPCHAIN,
VK_TRACKED_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT, VK_TRACKED_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT,
VK_TRACKED_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT, VK_TRACKED_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT,
VK_TRACKED_OBJECT_TYPE_ACCELERATION_STRUCTURE,
VK_TRACKED_OBJECT_TYPE_VMA, VK_TRACKED_OBJECT_TYPE_VMA,
VK_TRACKED_OBJECT_TYPE_COUNT VK_TRACKED_OBJECT_TYPE_COUNT
}; };

View File

@ -503,7 +503,9 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() {
} }
#if defined(VK_TRACK_DEVICE_MEMORY) #if defined(VK_TRACK_DEVICE_MEMORY)
if (Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) {
_register_requested_device_extension(VK_EXT_DEVICE_MEMORY_REPORT_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_DEVICE_MEMORY_REPORT_EXTENSION_NAME, false);
}
#endif #endif
_register_requested_device_extension(VK_EXT_DEVICE_FAULT_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_DEVICE_FAULT_EXTENSION_NAME, false);
@ -5044,6 +5046,8 @@ void RenderingDeviceDriverVulkan::on_device_lost() const {
if (fault_info.pAddressInfos) { if (fault_info.pAddressInfos) {
memfree(fault_info.pAddressInfos); memfree(fault_info.pAddressInfos);
} }
_err_print_error(FUNCTION_STR, __FILE__, __LINE__, context_driver->get_driver_and_device_memory_report());
} }
void RenderingDeviceDriverVulkan::print_lost_device_info() { void RenderingDeviceDriverVulkan::print_lost_device_info() {

View File

@ -605,6 +605,9 @@ void Main::print_help(const char *p_binary) {
print_help_option("--gpu-abort", "Abort on graphics API usage errors (usually validation layer errors). May help see the problem if your system freezes.\n", CLI_OPTION_AVAILABILITY_TEMPLATE_DEBUG); print_help_option("--gpu-abort", "Abort on graphics API usage errors (usually validation layer errors). May help see the problem if your system freezes.\n", CLI_OPTION_AVAILABILITY_TEMPLATE_DEBUG);
#endif #endif
print_help_option("--generate-spirv-debug-info", "Generate SPIR-V debug information. This allows source-level shader debugging with RenderDoc.\n"); print_help_option("--generate-spirv-debug-info", "Generate SPIR-V debug information. This allows source-level shader debugging with RenderDoc.\n");
#if defined(DEBUG_ENABLED) || defined(DEV_ENABLED)
print_help_option("--extra-gpu-memory-tracking", "Enables additional memory tracking (see class reference for `RenderingDevice.get_driver_and_device_memory_report()` and linked methods). Currently only implemented for Vulkan. Enabling this feature may cause crashes on some systems due to buggy drivers or bugs in the Vulkan Loader. See https://github.com/godotengine/godot/issues/95967\n");
#endif
print_help_option("--remote-debug <uri>", "Remote debug (<protocol>://<host/IP>[:<port>], e.g. tcp://127.0.0.1:6007).\n"); print_help_option("--remote-debug <uri>", "Remote debug (<protocol>://<host/IP>[:<port>], e.g. tcp://127.0.0.1:6007).\n");
print_help_option("--single-threaded-scene", "Force scene tree to run in single-threaded mode. Sub-thread groups are disabled and run on the main thread.\n"); print_help_option("--single-threaded-scene", "Force scene tree to run in single-threaded mode. Sub-thread groups are disabled and run on the main thread.\n");
#if defined(DEBUG_ENABLED) #if defined(DEBUG_ENABLED)
@ -1204,6 +1207,8 @@ Error Main::setup(const char *execpath, int argc, char *argv[], bool p_second_ph
#endif #endif
} else if (arg == "--generate-spirv-debug-info") { } else if (arg == "--generate-spirv-debug-info") {
Engine::singleton->generate_spirv_debug_info = true; Engine::singleton->generate_spirv_debug_info = true;
} else if (arg == "--extra-gpu-memory-tracking") {
Engine::singleton->extra_gpu_memory_tracking = true;
} else if (arg == "--tablet-driver") { } else if (arg == "--tablet-driver") {
if (N) { if (N) {
tablet_driver = N->get(); tablet_driver = N->get();

View File

@ -84,6 +84,51 @@ void RenderingContextDriver::window_destroy(DisplayServer::WindowID p_window) {
window_surface_map.erase(p_window); window_surface_map.erase(p_window);
} }
String RenderingContextDriver::get_driver_and_device_memory_report() const {
String report;
const uint32_t num_tracked_obj_types = static_cast<uint32_t>(get_tracked_object_type_count());
report += "=== Driver Memory Report ===";
report += "\nLaunch with --extra-gpu-memory-tracking and build with "
"DEBUG_ENABLED for this functionality to work.";
report += "\nDevice memory may be unavailable if the API does not support it"
"(e.g. VK_EXT_device_memory_report is unsupported).";
report += "\n";
report += "\nTotal Driver Memory:";
report += String::num_real(double(get_driver_total_memory()) / (1024.0 * 1024.0));
report += " MB";
report += "\nTotal Driver Num Allocations: ";
report += String::num_uint64(get_driver_allocation_count());
report += "\nTotal Device Memory:";
report += String::num_real(double(get_device_total_memory()) / (1024.0 * 1024.0));
report += " MB";
report += "\nTotal Device Num Allocations: ";
report += String::num_uint64(get_device_allocation_count());
report += "\n\nMemory use by object type (CSV format):";
report += "\n\nCategory; Driver memory in MB; Driver Allocation Count; "
"Device memory in MB; Device Allocation Count";
for (uint32_t i = 0u; i < num_tracked_obj_types; ++i) {
report += "\n";
report += get_tracked_object_name(i);
report += ";";
report += String::num_real(double(get_driver_memory_by_object_type(i)) / (1024.0 * 1024.0));
report += ";";
report += String::num_uint64(get_driver_allocs_by_object_type(i));
report += ";";
report += String::num_real(double(get_device_memory_by_object_type(i)) / (1024.0 * 1024.0));
report += ";";
report += String::num_uint64(get_device_allocs_by_object_type(i));
}
return report;
}
const char *RenderingContextDriver::get_tracked_object_name(uint32_t p_type_index) const { const char *RenderingContextDriver::get_tracked_object_name(uint32_t p_type_index) const {
return "Tracking Unsupported by API"; return "Tracking Unsupported by API";
} }

View File

@ -102,6 +102,8 @@ public:
virtual void surface_destroy(SurfaceID p_surface) = 0; virtual void surface_destroy(SurfaceID p_surface) = 0;
virtual bool is_debug_utils_enabled() const = 0; virtual bool is_debug_utils_enabled() const = 0;
String get_driver_and_device_memory_report() const;
virtual const char *get_tracked_object_name(uint32_t p_type_index) const; virtual const char *get_tracked_object_name(uint32_t p_type_index) const;
virtual uint64_t get_tracked_object_type_count() const; virtual uint64_t get_tracked_object_type_count() const;

View File

@ -5723,6 +5723,10 @@ uint64_t RenderingDevice::get_driver_resource(DriverResource p_resource, RID p_r
return driver->get_resource_native_handle(p_resource, driver_id); return driver->get_resource_native_handle(p_resource, driver_id);
} }
String RenderingDevice::get_driver_and_device_memory_report() const {
return context->get_driver_and_device_memory_report();
}
String RenderingDevice::get_tracked_object_name(uint32_t p_type_index) const { String RenderingDevice::get_tracked_object_name(uint32_t p_type_index) const {
return context->get_tracked_object_name(p_type_index); return context->get_tracked_object_name(p_type_index);
} }
@ -6077,6 +6081,7 @@ void RenderingDevice::_bind_methods() {
ClassDB::bind_method(D_METHOD("get_perf_report"), &RenderingDevice::get_perf_report); ClassDB::bind_method(D_METHOD("get_perf_report"), &RenderingDevice::get_perf_report);
ClassDB::bind_method(D_METHOD("get_driver_and_device_memory_report"), &RenderingDevice::get_driver_and_device_memory_report);
ClassDB::bind_method(D_METHOD("get_tracked_object_name", "type_index"), &RenderingDevice::get_tracked_object_name); ClassDB::bind_method(D_METHOD("get_tracked_object_name", "type_index"), &RenderingDevice::get_tracked_object_name);
ClassDB::bind_method(D_METHOD("get_tracked_object_type_count"), &RenderingDevice::get_tracked_object_type_count); ClassDB::bind_method(D_METHOD("get_tracked_object_type_count"), &RenderingDevice::get_tracked_object_type_count);
ClassDB::bind_method(D_METHOD("get_driver_total_memory"), &RenderingDevice::get_driver_total_memory); ClassDB::bind_method(D_METHOD("get_driver_total_memory"), &RenderingDevice::get_driver_total_memory);

View File

@ -1417,6 +1417,8 @@ public:
uint64_t get_driver_resource(DriverResource p_resource, RID p_rid = RID(), uint64_t p_index = 0); uint64_t get_driver_resource(DriverResource p_resource, RID p_rid = RID(), uint64_t p_index = 0);
String get_driver_and_device_memory_report() const;
String get_tracked_object_name(uint32_t p_type_index) const; String get_tracked_object_name(uint32_t p_type_index) const;
uint64_t get_tracked_object_type_count() const; uint64_t get_tracked_object_type_count() const;