From 59d0422dcd59686689d915000a800ed6c7753f00 Mon Sep 17 00:00:00 2001 From: "Matias N. Goldberg" Date: Sat, 24 Aug 2024 17:39:46 -0300 Subject: [PATCH] Disable extra memory tracking by default PR #90993 added several debugging utilities. Among them, advanced memory tracking through the use of custom allocators and VK_EXT_device_memory_report. However as issue #95967 reveals, it is dangerous to leave it on by default because drivers (or even the Vulkan loader) can too easily accidentally break custom allocators by allocating memory through std malloc but then request us to deallocate it (or viceversa). This PR fixes the following problems: - Adds --extra-gpu-memory-tracking cmd line argument - Adds missing enum entries to RenderingContextDriverVulkan::VkTrackedObjectType - Adds RenderingDevice::get_driver_and_device_memory_report - GDScript users can easily check via print( RenderingServer.get_rendering_device().get_driver_and_device_memory_report() ) - Uses get_driver_and_device_memory_report on device lost for appending further info. Fixes #95967 --- core/config/engine.cpp | 4 ++ core/config/engine.h | 2 + doc/classes/RenderingDevice.xml | 37 ++++++++++----- .../rendering_context_driver_vulkan.cpp | 10 ++++- .../vulkan/rendering_context_driver_vulkan.h | 4 +- .../vulkan/rendering_device_driver_vulkan.cpp | 6 ++- main/main.cpp | 5 +++ .../rendering/rendering_context_driver.cpp | 45 +++++++++++++++++++ servers/rendering/rendering_context_driver.h | 2 + servers/rendering/rendering_device.cpp | 5 +++ servers/rendering/rendering_device.h | 2 + 11 files changed, 109 insertions(+), 13 deletions(-) diff --git a/core/config/engine.cpp b/core/config/engine.cpp index 3574430cf75..9cdc21fe8e2 100644 --- a/core/config/engine.cpp +++ b/core/config/engine.cpp @@ -263,6 +263,10 @@ bool Engine::is_generate_spirv_debug_info_enabled() const { return generate_spirv_debug_info; } +bool Engine::is_extra_gpu_memory_tracking_enabled() const { + return extra_gpu_memory_tracking; +} + void Engine::set_print_error_messages(bool p_enabled) { CoreGlobals::print_error_enabled = p_enabled; } diff --git a/core/config/engine.h b/core/config/engine.h index 7e617d8773a..f858eba3283 100644 --- a/core/config/engine.h +++ b/core/config/engine.h @@ -72,6 +72,7 @@ private: bool abort_on_gpu_errors = false; bool use_validation_layers = false; bool generate_spirv_debug_info = false; + bool extra_gpu_memory_tracking = false; int32_t gpu_idx = -1; uint64_t _process_frames = 0; @@ -181,6 +182,7 @@ public: bool is_abort_on_gpu_errors_enabled() const; bool is_validation_layers_enabled() const; bool is_generate_spirv_debug_info_enabled() const; + bool is_extra_gpu_memory_tracking_enabled() const; int32_t get_gpu_index() const; void increment_frames_drawn(); diff --git a/doc/classes/RenderingDevice.xml b/doc/classes/RenderingDevice.xml index 96c7d0d4d40..ddd52c6835b 100644 --- a/doc/classes/RenderingDevice.xml +++ b/doc/classes/RenderingDevice.xml @@ -497,7 +497,7 @@ Returns how many allocations the GPU has performed for internal driver structures. - This is only used by Vulkan in Debug builds and can return 0 when this information is not tracked or unknown. + This is only used by Vulkan in debug builds and can return 0 when this information is not tracked or unknown. @@ -506,7 +506,7 @@ Same as [method get_device_allocation_count] but filtered for a given object type. The type argument must be in range [code][0; get_tracked_object_type_count - 1][/code]. If [method get_tracked_object_type_count] is 0, then type argument is ignored and always returns 0. - This is only used by Vulkan in Debug builds and can return 0 when this information is not tracked or unknown. + This is only used by Vulkan in debug builds and can return 0 when this information is not tracked or unknown. @@ -515,7 +515,7 @@ Same as [method get_device_total_memory] but filtered for a given object type. The type argument must be in range [code][0; get_tracked_object_type_count - 1][/code]. If [method get_tracked_object_type_count] is 0, then type argument is ignored and always returns 0. - This is only used by Vulkan in Debug builds and can return 0 when this information is not tracked or unknown. + This is only used by Vulkan in debug builds and can return 0 when this information is not tracked or unknown. @@ -534,7 +534,7 @@ Returns how much bytes the GPU is using. - This is only used by Vulkan in Debug builds and can return 0 when this information is not tracked or unknown. + This is only used by Vulkan in debug builds and can return 0 when this information is not tracked or unknown. @@ -547,7 +547,7 @@ Returns how many allocations the GPU driver has performed for internal driver structures. - This is only used by Vulkan in Debug builds and can return 0 when this information is not tracked or unknown. + This is only used by Vulkan in debug builds and can return 0 when this information is not tracked or unknown. @@ -556,7 +556,24 @@ Same as [method get_driver_allocation_count] but filtered for a given object type. The type argument must be in range [code][0; get_tracked_object_type_count - 1][/code]. If [method get_tracked_object_type_count] is 0, then type argument is ignored and always returns 0. - This is only used by Vulkan in Debug builds and can return 0 when this information is not tracked or unknown. + This is only used by Vulkan in debug builds and can return 0 when this information is not tracked or unknown. + + + + + + Returns string report in CSV format using the following methods: + - [method get_tracked_object_name] + - [method get_tracked_object_type_count] + - [method get_driver_total_memory] + - [method get_driver_allocation_count] + - [method get_driver_memory_by_object_type] + - [method get_driver_allocs_by_object_type] + - [method get_device_total_memory] + - [method get_device_allocation_count] + - [method get_device_memory_by_object_type] + - [method get_device_allocs_by_object_type] + This is only used by Vulkan in debug builds. Godot must also be started with the [code]--extra-gpu-memory-tracking[/code] [url=$DOCS_URL/tutorials/editor/command_line_tutorial.html]command line argument[/url]. @@ -565,7 +582,7 @@ Same as [method get_driver_total_memory] but filtered for a given object type. The type argument must be in range [code][0; get_tracked_object_type_count - 1][/code]. If [method get_tracked_object_type_count] is 0, then type argument is ignored and always returns 0. - This is only used by Vulkan in Debug builds and can return 0 when this information is not tracked or unknown. + This is only used by Vulkan in debug builds and can return 0 when this information is not tracked or unknown. @@ -581,7 +598,7 @@ Returns how much bytes the GPU driver is using for internal driver structures. - This is only used by Vulkan in Debug builds and can return 0 when this information is not tracked or unknown. + This is only used by Vulkan in debug builds and can return 0 when this information is not tracked or unknown. @@ -614,14 +631,14 @@ - SWAPCHAIN_KHR - COMMAND_POOL Thus if e.g. [code]get_tracked_object_name(5)[/code] returns "COMMAND_POOL", then [code]get_device_memory_by_object_type(5)[/code] returns the bytes used by the GPU for command pools. - This is only used by Vulkan in Debug builds. + This is only used by Vulkan in debug builds. Godot must also be started with the [code]--extra-gpu-memory-tracking[/code] [url=$DOCS_URL/tutorials/editor/command_line_tutorial.html]command line argument[/url]. Returns how many types of trackable objects are. - This is only used by Vulkan in Debug builds. + This is only used by Vulkan in debug builds. Godot must also be started with the [code]--extra-gpu-memory-tracking[/code] [url=$DOCS_URL/tutorials/editor/command_line_tutorial.html]command line argument[/url]. diff --git a/drivers/vulkan/rendering_context_driver_vulkan.cpp b/drivers/vulkan/rendering_context_driver_vulkan.cpp index 7db79bddd3f..df9bd986240 100644 --- a/drivers/vulkan/rendering_context_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_context_driver_vulkan.cpp @@ -106,7 +106,7 @@ const char *RenderingContextDriverVulkan::get_tracked_object_name(uint32_t p_typ return vkTrackedObjectTypeNames[p_type_index]; #else - return "VK_TRACK_DRIVER_* disabled at build time"; + return "VK_TRACK_*_MEMORY disabled at build time"; #endif } @@ -120,6 +120,8 @@ uint64_t RenderingContextDriverVulkan::get_tracked_object_type_count() const { RenderingContextDriverVulkan::VkTrackedObjectType vk_object_to_tracked_object(VkObjectType p_type) { if (p_type > VK_OBJECT_TYPE_COMMAND_POOL && p_type != (VkObjectType)RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_VMA) { switch (p_type) { + case VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE: + return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_DESCRIPTOR_UPDATE_TEMPLATE_KHR; case VK_OBJECT_TYPE_SURFACE_KHR: return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_SURFACE; case VK_OBJECT_TYPE_SWAPCHAIN_KHR: @@ -128,6 +130,9 @@ RenderingContextDriverVulkan::VkTrackedObjectType vk_object_to_tracked_object(Vk return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT; case VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT: return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT; + case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR: + case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV: + return RenderingContextDriverVulkan::VK_TRACKED_OBJECT_TYPE_ACCELERATION_STRUCTURE; default: _err_print_error(FUNCTION_STR, __FILE__, __LINE__, "Unknown VkObjectType enum value " + itos((uint32_t)p_type) + ".Please add it to VkTrackedObjectType, switch statement in " "vk_object_to_tracked_object and get_tracked_object_name.", @@ -229,6 +234,9 @@ VkAllocationCallbacks *RenderingContextDriverVulkan::get_allocation_callbacks(Vk #if !defined(VK_TRACK_DRIVER_MEMORY) return nullptr; #else + if (!Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) { + return nullptr; + } #ifdef _MSC_VER #define LAMBDA_VK_CALL_CONV diff --git a/drivers/vulkan/rendering_context_driver_vulkan.h b/drivers/vulkan/rendering_context_driver_vulkan.h index e70d17e1315..4fbca012c61 100644 --- a/drivers/vulkan/rendering_context_driver_vulkan.h +++ b/drivers/vulkan/rendering_context_driver_vulkan.h @@ -170,10 +170,12 @@ public: #if defined(VK_TRACK_DRIVER_MEMORY) || defined(VK_TRACK_DEVICE_MEMORY) enum VkTrackedObjectType{ - VK_TRACKED_OBJECT_TYPE_SURFACE = VK_OBJECT_TYPE_COMMAND_POOL + 1, + VK_TRACKED_OBJECT_DESCRIPTOR_UPDATE_TEMPLATE_KHR = VK_OBJECT_TYPE_COMMAND_POOL + 1, + VK_TRACKED_OBJECT_TYPE_SURFACE, VK_TRACKED_OBJECT_TYPE_SWAPCHAIN, VK_TRACKED_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT, VK_TRACKED_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT, + VK_TRACKED_OBJECT_TYPE_ACCELERATION_STRUCTURE, VK_TRACKED_OBJECT_TYPE_VMA, VK_TRACKED_OBJECT_TYPE_COUNT }; diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index 092af13b213..2ba353868b5 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -503,7 +503,9 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() { } #if defined(VK_TRACK_DEVICE_MEMORY) - _register_requested_device_extension(VK_EXT_DEVICE_MEMORY_REPORT_EXTENSION_NAME, false); + if (Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) { + _register_requested_device_extension(VK_EXT_DEVICE_MEMORY_REPORT_EXTENSION_NAME, false); + } #endif _register_requested_device_extension(VK_EXT_DEVICE_FAULT_EXTENSION_NAME, false); @@ -5044,6 +5046,8 @@ void RenderingDeviceDriverVulkan::on_device_lost() const { if (fault_info.pAddressInfos) { memfree(fault_info.pAddressInfos); } + + _err_print_error(FUNCTION_STR, __FILE__, __LINE__, context_driver->get_driver_and_device_memory_report()); } void RenderingDeviceDriverVulkan::print_lost_device_info() { diff --git a/main/main.cpp b/main/main.cpp index f82df786bc2..af8f1c692a8 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -605,6 +605,9 @@ void Main::print_help(const char *p_binary) { print_help_option("--gpu-abort", "Abort on graphics API usage errors (usually validation layer errors). May help see the problem if your system freezes.\n", CLI_OPTION_AVAILABILITY_TEMPLATE_DEBUG); #endif print_help_option("--generate-spirv-debug-info", "Generate SPIR-V debug information. This allows source-level shader debugging with RenderDoc.\n"); +#if defined(DEBUG_ENABLED) || defined(DEV_ENABLED) + print_help_option("--extra-gpu-memory-tracking", "Enables additional memory tracking (see class reference for `RenderingDevice.get_driver_and_device_memory_report()` and linked methods). Currently only implemented for Vulkan. Enabling this feature may cause crashes on some systems due to buggy drivers or bugs in the Vulkan Loader. See https://github.com/godotengine/godot/issues/95967\n"); +#endif print_help_option("--remote-debug ", "Remote debug (://[:], e.g. tcp://127.0.0.1:6007).\n"); print_help_option("--single-threaded-scene", "Force scene tree to run in single-threaded mode. Sub-thread groups are disabled and run on the main thread.\n"); #if defined(DEBUG_ENABLED) @@ -1204,6 +1207,8 @@ Error Main::setup(const char *execpath, int argc, char *argv[], bool p_second_ph #endif } else if (arg == "--generate-spirv-debug-info") { Engine::singleton->generate_spirv_debug_info = true; + } else if (arg == "--extra-gpu-memory-tracking") { + Engine::singleton->extra_gpu_memory_tracking = true; } else if (arg == "--tablet-driver") { if (N) { tablet_driver = N->get(); diff --git a/servers/rendering/rendering_context_driver.cpp b/servers/rendering/rendering_context_driver.cpp index 23e091e00c8..b623be40980 100644 --- a/servers/rendering/rendering_context_driver.cpp +++ b/servers/rendering/rendering_context_driver.cpp @@ -84,6 +84,51 @@ void RenderingContextDriver::window_destroy(DisplayServer::WindowID p_window) { window_surface_map.erase(p_window); } +String RenderingContextDriver::get_driver_and_device_memory_report() const { + String report; + + const uint32_t num_tracked_obj_types = static_cast(get_tracked_object_type_count()); + + report += "=== Driver Memory Report ==="; + + report += "\nLaunch with --extra-gpu-memory-tracking and build with " + "DEBUG_ENABLED for this functionality to work."; + report += "\nDevice memory may be unavailable if the API does not support it" + "(e.g. VK_EXT_device_memory_report is unsupported)."; + report += "\n"; + + report += "\nTotal Driver Memory:"; + report += String::num_real(double(get_driver_total_memory()) / (1024.0 * 1024.0)); + report += " MB"; + report += "\nTotal Driver Num Allocations: "; + report += String::num_uint64(get_driver_allocation_count()); + + report += "\nTotal Device Memory:"; + report += String::num_real(double(get_device_total_memory()) / (1024.0 * 1024.0)); + report += " MB"; + report += "\nTotal Device Num Allocations: "; + report += String::num_uint64(get_device_allocation_count()); + + report += "\n\nMemory use by object type (CSV format):"; + report += "\n\nCategory; Driver memory in MB; Driver Allocation Count; " + "Device memory in MB; Device Allocation Count"; + + for (uint32_t i = 0u; i < num_tracked_obj_types; ++i) { + report += "\n"; + report += get_tracked_object_name(i); + report += ";"; + report += String::num_real(double(get_driver_memory_by_object_type(i)) / (1024.0 * 1024.0)); + report += ";"; + report += String::num_uint64(get_driver_allocs_by_object_type(i)); + report += ";"; + report += String::num_real(double(get_device_memory_by_object_type(i)) / (1024.0 * 1024.0)); + report += ";"; + report += String::num_uint64(get_device_allocs_by_object_type(i)); + } + + return report; +} + const char *RenderingContextDriver::get_tracked_object_name(uint32_t p_type_index) const { return "Tracking Unsupported by API"; } diff --git a/servers/rendering/rendering_context_driver.h b/servers/rendering/rendering_context_driver.h index 8449db442c3..2e5951ae4f8 100644 --- a/servers/rendering/rendering_context_driver.h +++ b/servers/rendering/rendering_context_driver.h @@ -102,6 +102,8 @@ public: virtual void surface_destroy(SurfaceID p_surface) = 0; virtual bool is_debug_utils_enabled() const = 0; + String get_driver_and_device_memory_report() const; + virtual const char *get_tracked_object_name(uint32_t p_type_index) const; virtual uint64_t get_tracked_object_type_count() const; diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 332e18bb683..9e3ab5da495 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -5723,6 +5723,10 @@ uint64_t RenderingDevice::get_driver_resource(DriverResource p_resource, RID p_r return driver->get_resource_native_handle(p_resource, driver_id); } +String RenderingDevice::get_driver_and_device_memory_report() const { + return context->get_driver_and_device_memory_report(); +} + String RenderingDevice::get_tracked_object_name(uint32_t p_type_index) const { return context->get_tracked_object_name(p_type_index); } @@ -6077,6 +6081,7 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("get_perf_report"), &RenderingDevice::get_perf_report); + ClassDB::bind_method(D_METHOD("get_driver_and_device_memory_report"), &RenderingDevice::get_driver_and_device_memory_report); ClassDB::bind_method(D_METHOD("get_tracked_object_name", "type_index"), &RenderingDevice::get_tracked_object_name); ClassDB::bind_method(D_METHOD("get_tracked_object_type_count"), &RenderingDevice::get_tracked_object_type_count); ClassDB::bind_method(D_METHOD("get_driver_total_memory"), &RenderingDevice::get_driver_total_memory); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 362fe499e4a..d8f9e2c31a7 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -1417,6 +1417,8 @@ public: uint64_t get_driver_resource(DriverResource p_resource, RID p_rid = RID(), uint64_t p_index = 0); + String get_driver_and_device_memory_report() const; + String get_tracked_object_name(uint32_t p_type_index) const; uint64_t get_tracked_object_type_count() const;