From 4ad424234f761c6b8ae167d478f79f13545e13f2 Mon Sep 17 00:00:00 2001 From: Dario Date: Fri, 18 Oct 2024 14:23:37 -0300 Subject: [PATCH] Improve synchronization of rendering commands after changes from transfer queues. Fix an error where barriers are expected to be inserted for the swap chain textures. Add the relevant synchronization stages and accesses to resources between frames. Fix an error where debug labels weren't finished correctly between frames. Breadcrumbs are now behind an optional macro as they currently lead to synchronization errors which are harmless. --- .../vulkan/rendering_device_driver_vulkan.cpp | 42 +++++++++++++++---- .../vulkan/rendering_device_driver_vulkan.h | 9 ++++ servers/rendering/rendering_device_graph.cpp | 18 ++++++-- servers/rendering/rendering_device_graph.h | 5 ++- 4 files changed, 63 insertions(+), 11 deletions(-) diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index 154095552bf..32086515da3 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -2637,11 +2637,13 @@ bool RenderingDeviceDriverVulkan::command_buffer_begin(CommandBufferID p_cmd_buf bool RenderingDeviceDriverVulkan::command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) { // Reset is implicit (VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT). + Framebuffer *framebuffer = (Framebuffer *)(p_framebuffer.id); + VkCommandBufferInheritanceInfo inheritance_info = {}; inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; inheritance_info.renderPass = (VkRenderPass)p_render_pass.id; inheritance_info.subpass = p_subpass; - inheritance_info.framebuffer = (VkFramebuffer)p_framebuffer.id; + inheritance_info.framebuffer = framebuffer->vk_framebuffer; VkCommandBufferBeginInfo cmd_buf_begin_info = {}; cmd_buf_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; @@ -2951,12 +2953,16 @@ Error RenderingDeviceDriverVulkan::swap_chain_resize(CommandQueueID p_cmd_queue, fb_create_info.height = surface->height; fb_create_info.layers = 1; - VkFramebuffer framebuffer; + VkFramebuffer vk_framebuffer; for (uint32_t i = 0; i < image_count; i++) { fb_create_info.pAttachments = &swap_chain->image_views[i]; - err = vkCreateFramebuffer(vk_device, &fb_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER), &framebuffer); + err = vkCreateFramebuffer(vk_device, &fb_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER), &vk_framebuffer); ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE); + Framebuffer *framebuffer = memnew(Framebuffer); + framebuffer->vk_framebuffer = vk_framebuffer; + framebuffer->swap_chain_image = swap_chain->images[i]; + framebuffer->swap_chain_image_subresource_range = view_create_info.subresourceRange; swap_chain->framebuffers.push_back(RDD::FramebufferID(framebuffer)); } @@ -3025,7 +3031,10 @@ RDD::FramebufferID RenderingDeviceDriverVulkan::swap_chain_acquire_framebuffer(C command_queue->pending_semaphores_for_fence.push_back(semaphore_index); // Return the corresponding framebuffer to the new current image. - return swap_chain->framebuffers[swap_chain->image_index]; + FramebufferID framebuffer_id = swap_chain->framebuffers[swap_chain->image_index]; + Framebuffer *framebuffer = (Framebuffer *)(framebuffer_id.id); + framebuffer->swap_chain_acquired = true; + return framebuffer_id; } RDD::RenderPassID RenderingDeviceDriverVulkan::swap_chain_get_render_pass(SwapChainID p_swap_chain) { @@ -3094,11 +3103,15 @@ RDD::FramebufferID RenderingDeviceDriverVulkan::framebuffer_create(RenderPassID } #endif - return FramebufferID(vk_framebuffer); + Framebuffer *framebuffer = memnew(Framebuffer); + framebuffer->vk_framebuffer = vk_framebuffer; + return FramebufferID(framebuffer); } void RenderingDeviceDriverVulkan::framebuffer_free(FramebufferID p_framebuffer) { - vkDestroyFramebuffer(vk_device, (VkFramebuffer)p_framebuffer.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER)); + Framebuffer *framebuffer = (Framebuffer *)(p_framebuffer.id); + vkDestroyFramebuffer(vk_device, framebuffer->vk_framebuffer, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER)); + memdelete(framebuffer); } /****************/ @@ -4316,10 +4329,25 @@ void RenderingDeviceDriverVulkan::render_pass_free(RenderPassID p_render_pass) { static_assert(ARRAYS_COMPATIBLE_FIELDWISE(RDD::RenderPassClearValue, VkClearValue)); void RenderingDeviceDriverVulkan::command_begin_render_pass(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, FramebufferID p_framebuffer, CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView p_clear_values) { + Framebuffer *framebuffer = (Framebuffer *)(p_framebuffer.id); + if (framebuffer->swap_chain_acquired) { + // Insert a barrier to wait for the acquisition of the framebuffer before the render pass begins. + VkImageMemoryBarrier image_barrier = {}; + image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + image_barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.image = framebuffer->swap_chain_image; + image_barrier.subresourceRange = framebuffer->swap_chain_image_subresource_range; + vkCmdPipelineBarrier((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_barrier); + framebuffer->swap_chain_acquired = false; + } + VkRenderPassBeginInfo render_pass_begin = {}; render_pass_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; render_pass_begin.renderPass = (VkRenderPass)p_render_pass.id; - render_pass_begin.framebuffer = (VkFramebuffer)p_framebuffer.id; + render_pass_begin.framebuffer = framebuffer->vk_framebuffer; render_pass_begin.renderArea.offset.x = p_rect.position.x; render_pass_begin.renderArea.offset.y = p_rect.position.y; diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 58f7a97ec0d..4d5de897cd9 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -366,6 +366,15 @@ public: /**** FRAMEBUFFER ****/ /*********************/ + struct Framebuffer { + VkFramebuffer vk_framebuffer = VK_NULL_HANDLE; + + // Only filled in by a framebuffer created by a swap chain. Unused otherwise. + VkImage swap_chain_image = VK_NULL_HANDLE; + VkImageSubresourceRange swap_chain_image_subresource_range = {}; + bool swap_chain_acquired = false; + }; + virtual FramebufferID framebuffer_create(RenderPassID p_render_pass, VectorView p_attachments, uint32_t p_width, uint32_t p_height) override final; virtual void framebuffer_free(FramebufferID p_framebuffer) override final; diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index abcb76cd43a..0ecd818805d 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -34,6 +34,7 @@ #define FORCE_FULL_ACCESS_BITS 0 #define PRINT_RESOURCE_TRACKER_TOTAL 0 #define PRINT_COMMAND_RECORDING 0 +#define INSERT_BREADCRUMBS 1 RenderingDeviceGraph::RenderingDeviceGraph() { driver_honors_barriers = false; @@ -438,6 +439,15 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr // Always update the access of the tracker according to the latest usage. resource_tracker->usage_access = new_usage_access; + // Always accumulate the stages of the tracker with the commands that use it. + search_tracker->current_frame_stages = search_tracker->current_frame_stages | r_command->self_stages; + + if (!search_tracker->previous_frame_stages.is_empty()) { + // Add to the command the stages the tracker was used on in the previous frame. + r_command->previous_stages = r_command->previous_stages | search_tracker->previous_frame_stages; + search_tracker->previous_frame_stages.clear(); + } + if (different_usage) { // Even if the usage of the resource isn't a write usage explicitly, a different usage implies a transition and it should therefore be considered a write. write_usage = true; @@ -823,7 +833,7 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC const RecordedDrawListCommand *draw_list_command = reinterpret_cast(command); const VectorView clear_values(draw_list_command->clear_values(), draw_list_command->clear_values_count); -#if defined(DEBUG_ENABLED) || defined(DEV_ENABLED) +#if INSERT_BREADCRUMBS driver->command_insert_breadcrumb(r_command_buffer, draw_list_command->breadcrumb); #endif driver->command_begin_render_pass(r_command_buffer, draw_list_command->render_pass, draw_list_command->framebuffer, draw_list_command->command_buffer_type, draw_list_command->region, clear_values); @@ -874,7 +884,7 @@ void RenderingDeviceGraph::_run_label_command_change(RDD::CommandBufferID p_comm } if (p_ignore_previous_value || p_new_label_index != r_current_label_index || p_new_level != r_current_label_level) { - if (!p_ignore_previous_value && (p_use_label_for_empty || r_current_label_index >= 0)) { + if (!p_ignore_previous_value && (p_use_label_for_empty || r_current_label_index >= 0 || r_current_label_level >= 0)) { // End the current label. driver->command_end_label(p_command_buffer); } @@ -888,6 +898,8 @@ void RenderingDeviceGraph::_run_label_command_change(RDD::CommandBufferID p_comm } else if (p_use_label_for_empty) { label_name = "Command graph"; label_color = Color(1, 1, 1, 1); + } else { + return; } // Add the level to the name. @@ -2064,7 +2076,7 @@ void RenderingDeviceGraph::end(bool p_reorder_commands, bool p_full_barriers, RD } } - _run_label_command_change(r_command_buffer, -1, -1, true, false, nullptr, 0, current_label_index, current_label_level); + _run_label_command_change(r_command_buffer, -1, -1, false, false, nullptr, 0, current_label_index, current_label_level); #if PRINT_COMMAND_RECORDING print_line(vformat("Recorded %d commands", command_count)); diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index e13e3a04298..0534d4ee1ea 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -151,6 +151,8 @@ public: struct ResourceTracker { uint32_t reference_count = 0; int64_t command_frame = -1; + BitField previous_frame_stages; + BitField current_frame_stages; int32_t read_full_command_list_index = -1; int32_t read_slice_command_list_index = -1; int32_t write_command_or_list_index = -1; @@ -174,8 +176,9 @@ public: _FORCE_INLINE_ void reset_if_outdated(int64_t new_command_frame) { if (new_command_frame != command_frame) { - usage_access.clear(); command_frame = new_command_frame; + previous_frame_stages = current_frame_stages; + current_frame_stages.clear(); read_full_command_list_index = -1; read_slice_command_list_index = -1; write_command_or_list_index = -1;