From cc4d39b0c1f0d4645d63120832a49e54242e3f5f Mon Sep 17 00:00:00 2001 From: Dario Date: Fri, 24 Nov 2023 08:23:22 -0300 Subject: [PATCH] Acyclic Command Graph for RenderingDevice. Adds a new system to automatically reorder commands, perform layout transitions and insert synchronization barriers based on the commands issued to RenderingDevice. --- doc/classes/ProjectSettings.xml | 2 - doc/classes/RenderingDevice.xml | 110 +- drivers/d3d12/d3d12_context.cpp | 36 +- drivers/d3d12/d3d12_context.h | 4 +- .../d3d12/rendering_device_driver_d3d12.cpp | 14 + drivers/d3d12/rendering_device_driver_d3d12.h | 7 + .../vulkan/rendering_device_driver_vulkan.cpp | 50 +- .../vulkan/rendering_device_driver_vulkan.h | 7 + drivers/vulkan/vulkan_context.cpp | 44 +- drivers/vulkan/vulkan_context.h | 4 +- .../4.2-stable.expected | 23 + modules/lightmapper_rd/lightmapper_rd.cpp | 4 +- .../rendering/renderer_rd/api_context_rd.h | 5 +- .../renderer_rd/cluster_builder_rd.cpp | 16 +- .../renderer_rd/effects/bokeh_dof.cpp | 12 +- .../renderer_rd/effects/copy_effects.cpp | 28 +- .../renderer_rd/effects/copy_effects.h | 2 +- .../renderer_rd/effects/debug_effects.cpp | 6 +- servers/rendering/renderer_rd/effects/fsr.cpp | 2 +- .../rendering/renderer_rd/effects/fsr2.cpp | 9 +- .../renderer_rd/effects/luminance.cpp | 2 +- .../rendering/renderer_rd/effects/resolve.cpp | 8 +- .../rendering/renderer_rd/effects/resolve.h | 4 +- .../renderer_rd/effects/ss_effects.cpp | 12 +- .../renderer_rd/effects/tone_mapper.cpp | 2 +- servers/rendering/renderer_rd/effects/vrs.cpp | 2 +- .../rendering/renderer_rd/environment/fog.cpp | 6 +- .../rendering/renderer_rd/environment/gi.cpp | 67 +- .../rendering/renderer_rd/environment/gi.h | 6 +- .../rendering/renderer_rd/environment/sky.cpp | 10 +- .../render_forward_clustered.cpp | 142 +- .../render_forward_clustered.h | 15 +- .../scene_shader_forward_clustered.cpp | 2 +- .../forward_mobile/render_forward_mobile.cpp | 80 +- .../forward_mobile/render_forward_mobile.h | 15 +- .../scene_shader_forward_mobile.cpp | 2 +- .../renderer_rd/renderer_canvas_render_rd.cpp | 14 +- .../renderer_rd/renderer_scene_render_rd.cpp | 12 +- .../renderer_rd/renderer_scene_render_rd.h | 3 +- .../renderer_rd/shaders/effects/fsr2/SCsub | 5 + .../renderer_rd/storage_rd/light_storage.cpp | 8 +- .../storage_rd/material_storage.cpp | 4 +- .../renderer_rd/storage_rd/material_storage.h | 2 +- .../renderer_rd/storage_rd/mesh_storage.cpp | 6 +- .../storage_rd/particles_storage.cpp | 20 +- .../storage_rd/particles_storage.h | 3 + .../storage_rd/render_scene_data_rd.cpp | 2 +- .../storage_rd/texture_storage.cpp | 8 +- servers/rendering/rendering_device.compat.inc | 110 +- servers/rendering/rendering_device.cpp | 2704 ++++++----------- servers/rendering/rendering_device.h | 235 +- servers/rendering/rendering_device_commons.h | 2 + servers/rendering/rendering_device_driver.h | 11 + servers/rendering/rendering_device_graph.cpp | 1930 ++++++++++++ servers/rendering/rendering_device_graph.h | 668 ++++ servers/rendering_server.cpp | 1 - .../shaders/ffx_fsr2_depth_clip_pass.glsl | 1 - 57 files changed, 4128 insertions(+), 2381 deletions(-) create mode 100644 servers/rendering/rendering_device_graph.cpp create mode 100644 servers/rendering/rendering_device_graph.h diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml index b969e3bb8ea..8640879a4ec 100644 --- a/doc/classes/ProjectSettings.xml +++ b/doc/classes/ProjectSettings.xml @@ -2534,8 +2534,6 @@ Decreasing this value may improve GPU performance on certain setups, even if the maximum number of clustered elements is never reached in the project. [b]Note:[/b] This setting is only effective when using the Forward+ rendering method, not Mobile and Compatibility. - - diff --git a/doc/classes/RenderingDevice.xml b/doc/classes/RenderingDevice.xml index 5ca6d382679..495bec24dcc 100644 --- a/doc/classes/RenderingDevice.xml +++ b/doc/classes/RenderingDevice.xml @@ -14,12 +14,12 @@ $DOCS_URL/tutorials/shaders/compute_shaders.html - + - Puts a memory barrier in place. This is used for synchronization to avoid data races. See also [method full_barrier], which may be useful for debugging. + [i]Deprecated.[/i] Barriers are automatically inserted by RenderingDevice. @@ -27,9 +27,8 @@ - - Clears the contents of the [param buffer], clearing [param size_bytes] bytes, starting at [param offset]. Always raises a memory barrier. + Clears the contents of the [param buffer], clearing [param size_bytes] bytes, starting at [param offset]. Prints an error if: - the size isn't a multiple of four - the region specified by [param offset] + [param size_bytes] exceeds the buffer @@ -37,6 +36,21 @@ - a compute list is currently active (created by [method compute_list_begin]) + + + + + + + + + Copies [param size] bytes from the [param src_buffer] at [param src_offset] into [param dst_buffer] at [param dst_offset]. + Prints an error if: + - [param size] exceeds the size of either [param src_buffer] or [param dst_buffer] at their corresponding offsets + - a draw list is currently active (created by [method draw_list_begin]) + - a compute list is currently active (created by [method compute_list_begin]) + + @@ -52,9 +66,8 @@ - - Updates a region of [param size_bytes] bytes, starting at [param offset], in the buffer, with the specified [param data]. Raises a memory barrier except when [param post_barrier] is set to [constant BARRIER_MASK_NO_BARRIER]. + Updates a region of [param size_bytes] bytes, starting at [param offset], in the buffer, with the specified [param data]. Prints an error if: - the region specified by [param offset] + [param size_bytes] exceeds the buffer - a draw list is currently active (created by [method draw_list_begin]) @@ -77,10 +90,9 @@ - Starts a list of compute commands created with the [code]compute_*[/code] methods. The returned value should be passed to other [code]compute_list_*[/code] functions. - If [param allow_draw_overlap] is [code]true[/code], you may have one draw list running at the same time as one compute list. Multiple compute lists cannot be created at the same time; you must finish the previous compute list first using [method compute_list_end]. + Multiple compute lists cannot be created at the same time; you must finish the previous compute list first using [method compute_list_end]. A simple compute operation might look like this (code is not a complete example): [codeblock] var rd = RenderingDevice.new() @@ -128,7 +140,6 @@ - Finishes a list of compute commands created with the [code]compute_*[/code] methods. @@ -170,7 +181,7 @@ Create a command buffer debug label region that can be displayed in third-party tools such as [url=https://renderdoc.org/]RenderDoc[/url]. All regions must be ended with a [method draw_command_end_label] call. When viewed from the linear series of submissions to a single queue, calls to [method draw_command_begin_label] and [method draw_command_end_label] must be matched and balanced. - The [code]VK_EXT_DEBUG_UTILS_EXTENSION_NAME[/code] Vulkan extension must be available and enabled for command buffer debug label region to work. See also [method draw_command_insert_label] and [method draw_command_end_label]. + The [code]VK_EXT_DEBUG_UTILS_EXTENSION_NAME[/code] Vulkan extension must be available and enabled for command buffer debug label region to work. See also [method draw_command_end_label]. @@ -179,12 +190,12 @@ Ends the command buffer debug label region started by a [method draw_command_begin_label] call. - + - Inserts a command buffer debug label region in the current command buffer. Unlike [method draw_command_begin_label], this region should not be ended with a [method draw_command_end_label] call. + [i]Deprecated.[/i] Inserting labels no longer applies due to command reordering. @@ -198,7 +209,6 @@ - Starts a list of raster drawing commands created with the [code]draw_*[/code] methods. The returned value should be passed to other [code]draw_list_*[/code] functions. Multiple draw lists cannot be created at the same time; you must finish the previous draw list first using [method draw_list_end]. @@ -232,7 +242,7 @@ [b]Note:[/b] Cannot be used with local RenderingDevices, as these don't have a screen. If called on a local RenderingDevice, [method draw_list_begin_for_screen] returns [constant INVALID_ID]. - + @@ -246,7 +256,7 @@ - Variant of [method draw_list_begin] with support for multiple splits. The [param splits] parameter determines how many splits are created. + [i]Deprecated.[/i] Split draw lists are used automatically by RenderingDevice. @@ -310,7 +320,6 @@ - Finishes a list of raster drawing commands created with the [code]draw_*[/code] methods. @@ -335,14 +344,14 @@ - Switches to the next draw pass and returns the split's ID. Equivalent to [method draw_list_switch_to_next_pass_split] with [code]splits[/code] set to [code]1[/code]. + Switches to the next draw pass. - + - Switches to the next draw pass, with the number of splits allocated specified in [param splits]. The return value is an array containing the ID of each split. For single-split usage, see [method draw_list_switch_to_next_pass]. + [i]Deprecated.[/i] Split draw lists are used automatically by RenderingDevice. @@ -430,10 +439,10 @@ Tries to free an object in the RenderingDevice. To avoid memory leaks, this should be called after using an object as memory management does not occur automatically when using RenderingDevice directly. - + - Puts a [i]full[/i] memory barrier in place. This is a memory [method barrier] with all flags enabled. [method full_barrier] it should only be used for debugging as it can severely impact performance. + [i]Deprecated.[/i] Barriers are automatically inserted by RenderingDevice. @@ -704,7 +713,6 @@ - Clears the specified [param texture] by replacing all of its pixels with the specified [param color]. [param base_mipmap] and [param mipmap_count] determine which mipmaps of the texture are affected by this clear operation, while [param base_layer] and [param layer_count] determine which layers of a 3D texture (or texture array) are affected by this clear operation. For 2D textures (which only have one layer by design), [param base_layer] must be [code]0[/code] and [param layer_count] must be [code]1[/code]. [b]Note:[/b] [param texture] can't be cleared while a draw list that uses it as part of a framebuffer is being created. Ensure the draw list is finalized (and that the color/depth texture using it is not set to [constant FINAL_ACTION_CONTINUE]) to clear this texture. @@ -721,7 +729,6 @@ - Copies the [param from_texture] to [param to_texture] with the specified [param from_pos], [param to_pos] and [param size] coordinates. The Z axis of the [param from_pos], [param to_pos] and [param size] must be [code]0[/code] for 2-dimensional textures. Source and destination mipmaps/layers must also be specified, with these parameters being [code]0[/code] for textures without mipmaps or single-layer textures. Returns [constant @GlobalScope.OK] if the texture copy was successful or [constant @GlobalScope.ERR_INVALID_PARAMETER] otherwise. [b]Note:[/b] [param from_texture] texture can't be copied while a draw list that uses it as part of a framebuffer is being created. Ensure the draw list is finalized (and that the color/depth texture using it is not set to [constant FINAL_ACTION_CONTINUE]) to copy this texture. @@ -831,7 +838,6 @@ - Resolves the [param from_texture] texture onto [param to_texture] with multisample antialiasing enabled. This must be used when rendering a framebuffer for MSAA to work. Returns [constant @GlobalScope.OK] if successful, [constant @GlobalScope.ERR_INVALID_PARAMETER] otherwise. [b]Note:[/b] [param from_texture] and [param to_texture] textures must have the same dimension, format and type (color or depth). @@ -848,7 +854,6 @@ - Updates texture data with new data, replacing the previous data in place. The updated texture data must have the same dimensions and format. For 2D textures (which only have one layer), [param layer] must be [code]0[/code]. Returns [constant @GlobalScope.OK] if the update was successful, [constant @GlobalScope.ERR_INVALID_PARAMETER] otherwise. [b]Note:[/b] Updating textures is forbidden during creation of a draw or compute list. @@ -2150,39 +2155,48 @@ - - Start rendering and clear the whole framebuffer. + + Load the previous contents of the framebuffer. - - Start rendering and clear the framebuffer in the specified region. + + Clear the whole framebuffer or its specified region. - - Continue rendering and clear the framebuffer in the specified region. Framebuffer must have been left in [constant FINAL_ACTION_CONTINUE] state as the final action previously. + + Ignore the previous contents of the framebuffer. This is the fastest option if you'll overwrite all of the pixels and don't need to read any of them. - - Start rendering, but keep attached color texture contents. If the framebuffer was previously used to read in a shader, this will automatically insert a layout transition. - - - Start rendering, ignore what is there; write above it. In general, this is the fastest option when you will be writing every single pixel and you don't need a clear color. - - - Continue rendering. Framebuffer must have been left in [constant FINAL_ACTION_CONTINUE] state as the final action previously. - - + Represents the size of the [enum InitialAction] enum. - - Store the texture for reading and make it read-only if it has the [constant TEXTURE_USAGE_SAMPLING_BIT] bit (only applies to color, depth and stencil attachments). + + [i]Deprecated.[/i] Use [constant INITIAL_ACTION_CLEAR] instead. + + + [i]Deprecated.[/i] Use [constant INITIAL_ACTION_LOAD] instead. + + + [i]Deprecated.[/i] Use [constant INITIAL_ACTION_LOAD] instead. + + + [i]Deprecated.[/i] Use [constant INITIAL_ACTION_DISCARD] instead. + + + [i]Deprecated.[/i] Use [constant INITIAL_ACTION_LOAD] instead. + + + Store the result of the draw list in the framebuffer. This is generally what you want to do. - Discard the texture data and make it read-only if it has the [constant TEXTURE_USAGE_SAMPLING_BIT] bit (only applies to color, depth and stencil attachments). + Discard the contents of the framebuffer. This is the fastest option if you don't need to use the results of the draw list. - - Store the texture and continue for further processing. Similar to [constant FINAL_ACTION_READ], but does not make the texture read-only if it has the [constant TEXTURE_USAGE_SAMPLING_BIT] bit. - - + Represents the size of the [enum FinalAction] enum. + + [i]Deprecated.[/i] Use [constant FINAL_ACTION_STORE] instead. + + + [i]Deprecated.[/i] Use [constant FINAL_ACTION_STORE] instead. + Vertex shader stage. This can be used to manipulate vertices from a shader (but not create new vertices). diff --git a/drivers/d3d12/d3d12_context.cpp b/drivers/d3d12/d3d12_context.cpp index fa27b9bc559..37066a811d8 100644 --- a/drivers/d3d12/d3d12_context.cpp +++ b/drivers/d3d12/d3d12_context.cpp @@ -76,6 +76,8 @@ char godot_nir_arch_name[32]; #endif #endif +#define D3D12_DEBUG_LAYER_BREAK_ON_ERROR 0 + void D3D12Context::_debug_message_func( D3D12_MESSAGE_CATEGORY p_category, D3D12_MESSAGE_SEVERITY p_severity, @@ -563,6 +565,11 @@ Error D3D12Context::_create_device(DeviceBasics &r_basics) { res = info_queue->PushStorageFilter(&filter); ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); + +#if D3D12_DEBUG_LAYER_BREAK_ON_ERROR + res = info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, true); + ERR_FAIL_COND_V(!SUCCEEDED(res), ERR_CANT_CREATE); +#endif } return OK; @@ -1056,27 +1063,6 @@ void D3D12Context::local_device_free(RID p_local_device) { local_device_owner.free(p_local_device); } -void D3D12Context::command_begin_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) { -#ifdef PIX_ENABLED - const RenderingDeviceDriverD3D12::CommandBufferInfo *cmd_buf_info = (const RenderingDeviceDriverD3D12::CommandBufferInfo *)p_command_buffer.id; - PIXBeginEvent(cmd_buf_info->cmd_list.Get(), p_color.to_argb32(), p_label_name.utf8().get_data()); -#endif -} - -void D3D12Context::command_insert_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) { -#ifdef PIX_ENABLED - const RenderingDeviceDriverD3D12::CommandBufferInfo *cmd_buf_info = (const RenderingDeviceDriverD3D12::CommandBufferInfo *)p_command_buffer.id; - PIXSetMarker(cmd_buf_info->cmd_list.Get(), p_color.to_argb32(), p_label_name.utf8().get_data()); -#endif -} - -void D3D12Context::command_end_label(RDD::CommandBufferID p_command_buffer) { -#ifdef PIX_ENABLED - const RenderingDeviceDriverD3D12::CommandBufferInfo *cmd_buf_info = (const RenderingDeviceDriverD3D12::CommandBufferInfo *)p_command_buffer.id; - PIXEndEvent(cmd_buf_info->cmd_list.Get()); -#endif -} - void D3D12Context::set_object_name(ID3D12Object *p_object, String p_object_name) { ERR_FAIL_NULL(p_object); int name_len = p_object_name.size(); @@ -1125,6 +1111,14 @@ RenderingDeviceDriver *D3D12Context::get_driver(RID p_local_device) { } } +bool D3D12Context::is_debug_utils_enabled() const { +#ifdef PIX_ENABLED + return true; +#else + return false; +#endif +} + D3D12Context::D3D12Context() { command_list_queue.resize(1); // First one is always the setup command. command_list_queue[0] = nullptr; diff --git a/drivers/d3d12/d3d12_context.h b/drivers/d3d12/d3d12_context.h index a27c8f4320b..ac90d384674 100644 --- a/drivers/d3d12/d3d12_context.h +++ b/drivers/d3d12/d3d12_context.h @@ -240,9 +240,6 @@ public: virtual Error swap_buffers() override final; virtual Error initialize() override final; - virtual void command_begin_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) override final; - virtual void command_insert_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) override final; - virtual void command_end_label(RDD::CommandBufferID p_command_buffer) override final; void set_object_name(ID3D12Object *p_object, String p_object_name); virtual String get_device_vendor_name() const override final; @@ -255,6 +252,7 @@ public: virtual DisplayServer::VSyncMode get_vsync_mode(DisplayServer::WindowID p_window = 0) const override final; virtual RenderingDeviceDriver *get_driver(RID p_local_device = RID()) override final; + virtual bool is_debug_utils_enabled() const override final; D3D12Context(); virtual ~D3D12Context(); diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index 6a2a3c32b06..d3a7344c9d3 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -5162,6 +5162,20 @@ void RenderingDeviceDriverD3D12::command_timestamp_write(CommandBufferID p_cmd_b cmd_buf_info->cmd_list->ResolveQueryData(tqp_info->query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, p_index, tqp_info->query_count, results_buffer, p_index * sizeof(uint64_t)); } +void RenderingDeviceDriverD3D12::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) { +#ifdef PIX_ENABLED + const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + PIXBeginEvent(cmd_buf_info->cmd_list.Get(), p_color.to_argb32(), p_label_name); +#endif +} + +void RenderingDeviceDriverD3D12::command_end_label(CommandBufferID p_cmd_buffer) { +#ifdef PIX_ENABLED + const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + PIXEndEvent(cmd_buf_info->cmd_list.Get()); +#endif +} + /****************/ /**** SCREEN ****/ /****************/ diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index bd195728783..0da339c6fdb 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -770,6 +770,13 @@ public: virtual void command_timestamp_query_pool_reset(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_query_count) override final; virtual void command_timestamp_write(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_index) override final; + /****************/ + /**** LABELS ****/ + /****************/ + + virtual void command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) override final; + virtual void command_end_label(CommandBufferID p_cmd_buffer) override final; + /****************/ /**** SCREEN ****/ /****************/ diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index e18161c9748..34acc0cc15c 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -733,6 +733,9 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared_from_slice(Tex case TEXTURE_SLICE_2D_ARRAY: { image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; } break; + default: { + return TextureID(nullptr); + } } image_view_create_info.format = RD_TO_VK_FORMAT[p_view.format]; image_view_create_info.components.r = (VkComponentSwizzle)p_view.swizzle_r; @@ -1172,7 +1175,7 @@ bool RenderingDeviceDriverVulkan::command_buffer_begin_secondary(CommandBufferID VkCommandBufferBeginInfo cmd_buf_begin_info = {}; cmd_buf_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - cmd_buf_begin_info.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT | VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; + cmd_buf_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT | VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; cmd_buf_begin_info.pInheritanceInfo = &inheritance_info; VkResult err = vkBeginCommandBuffer((VkCommandBuffer)p_cmd_buffer.id, &cmd_buf_begin_info); @@ -1557,11 +1560,9 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec read_offset += sizeof(ShaderBinary::SpecializationConstant); } - struct Stage { - ShaderStage type = SHADER_STAGE_MAX; - Vector spirv; - }; - Vector stages; + Vector> stages_spirv; + stages_spirv.resize(binary_data.stage_count); + r_shader_desc.stages.resize(binary_data.stage_count); for (uint32_t i = 0; i < binary_data.stage_count; i++) { ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) * 3 >= binsize, ShaderID()); @@ -1587,17 +1588,14 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec src_smolv = binptr + read_offset; } - Vector spirv; + Vector &spirv = stages_spirv.ptrw()[i]; uint32_t spirv_size = smolv::GetDecodedBufferSize(src_smolv, smolv_size); spirv.resize(spirv_size); if (!smolv::Decode(src_smolv, smolv_size, spirv.ptrw(), spirv_size)) { ERR_FAIL_V_MSG(ShaderID(), "Malformed smolv input uncompressing shader stage:" + String(SHADER_STAGE_NAMES[stage])); } - Stage stage_entry; - stage_entry.type = ShaderStage(stage); - stage_entry.spirv = spirv; - stages.push_back(stage_entry); + r_shader_desc.stages.set(i, ShaderStage(stage)); if (buf_size % 4 != 0) { buf_size += 4 - (buf_size % 4); @@ -1614,22 +1612,22 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec String error_text; - for (int i = 0; i < stages.size(); i++) { + for (int i = 0; i < r_shader_desc.stages.size(); i++) { VkShaderModuleCreateInfo shader_module_create_info = {}; shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - shader_module_create_info.codeSize = stages[i].spirv.size(); - shader_module_create_info.pCode = (const uint32_t *)stages[i].spirv.ptr(); + shader_module_create_info.codeSize = stages_spirv[i].size(); + shader_module_create_info.pCode = (const uint32_t *)stages_spirv[i].ptr(); VkShaderModule vk_module = VK_NULL_HANDLE; VkResult res = vkCreateShaderModule(vk_device, &shader_module_create_info, nullptr, &vk_module); if (res) { - error_text = "Error (" + itos(res) + ") creating shader module for stage: " + String(SHADER_STAGE_NAMES[stages[i].type]); + error_text = "Error (" + itos(res) + ") creating shader module for stage: " + String(SHADER_STAGE_NAMES[r_shader_desc.stages[i]]); break; } VkPipelineShaderStageCreateInfo create_info = {}; create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - create_info.stage = RD_STAGE_TO_VK_SHADER_STAGE_BITS[stages[i].type]; + create_info.stage = RD_STAGE_TO_VK_SHADER_STAGE_BITS[r_shader_desc.stages[i]]; create_info.module = vk_module; create_info.pName = "main"; @@ -3053,6 +3051,26 @@ void RenderingDeviceDriverVulkan::command_timestamp_write(CommandBufferID p_cmd_ vkCmdWriteTimestamp((VkCommandBuffer)p_cmd_buffer.id, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, (VkQueryPool)p_pool_id.id, p_index); } +/****************/ +/**** LABELS ****/ +/****************/ + +void RenderingDeviceDriverVulkan::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) { + VkDebugUtilsLabelEXT label; + label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; + label.pNext = nullptr; + label.pLabelName = p_label_name; + label.color[0] = p_color[0]; + label.color[1] = p_color[1]; + label.color[2] = p_color[2]; + label.color[3] = p_color[3]; + vkCmdBeginDebugUtilsLabelEXT((VkCommandBuffer)p_cmd_buffer.id, &label); +} + +void RenderingDeviceDriverVulkan::command_end_label(CommandBufferID p_cmd_buffer) { + vkCmdEndDebugUtilsLabelEXT((VkCommandBuffer)p_cmd_buffer.id); +} + /****************/ /**** SCREEN ****/ /****************/ diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 6d8f6fd0e03..1edee6b76e1 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -432,6 +432,13 @@ public: virtual void command_timestamp_query_pool_reset(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_query_count) override final; virtual void command_timestamp_write(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_index) override final; + /****************/ + /**** LABELS ****/ + /****************/ + + virtual void command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) override final; + virtual void command_end_label(CommandBufferID p_cmd_buffer) override final; + /****************/ /**** SCREEN ****/ /****************/ diff --git a/drivers/vulkan/vulkan_context.cpp b/drivers/vulkan/vulkan_context.cpp index d536b4455ab..7db2a9cd661 100644 --- a/drivers/vulkan/vulkan_context.cpp +++ b/drivers/vulkan/vulkan_context.cpp @@ -2822,46 +2822,6 @@ void VulkanContext::local_device_free(RID p_local_device) { local_device_owner.free(p_local_device); } -void VulkanContext::command_begin_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) { - if (!is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { - return; - } - - CharString cs = p_label_name.utf8(); - VkDebugUtilsLabelEXT label; - label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; - label.pNext = nullptr; - label.pLabelName = cs.get_data(); - label.color[0] = p_color[0]; - label.color[1] = p_color[1]; - label.color[2] = p_color[2]; - label.color[3] = p_color[3]; - CmdBeginDebugUtilsLabelEXT((VkCommandBuffer)p_command_buffer.id, &label); -} - -void VulkanContext::command_insert_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) { - if (!is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { - return; - } - CharString cs = p_label_name.utf8(); - VkDebugUtilsLabelEXT label; - label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; - label.pNext = nullptr; - label.pLabelName = cs.get_data(); - label.color[0] = p_color[0]; - label.color[1] = p_color[1]; - label.color[2] = p_color[2]; - label.color[3] = p_color[3]; - CmdInsertDebugUtilsLabelEXT((VkCommandBuffer)p_command_buffer.id, &label); -} - -void VulkanContext::command_end_label(RDD::CommandBufferID p_command_buffer) { - if (!is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { - return; - } - CmdEndDebugUtilsLabelEXT((VkCommandBuffer)p_command_buffer.id); -} - void VulkanContext::set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name) { if (!is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { return; @@ -2917,6 +2877,10 @@ RenderingDeviceDriver *VulkanContext::get_driver(RID p_local_device) { } } +bool VulkanContext::is_debug_utils_enabled() const { + return is_instance_extension_enabled(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); +} + VulkanContext::VulkanContext() { command_buffer_queue.resize(1); // First one is always the setup command. command_buffer_queue[0] = nullptr; diff --git a/drivers/vulkan/vulkan_context.h b/drivers/vulkan/vulkan_context.h index b914acf3a94..cbb6cf326fa 100644 --- a/drivers/vulkan/vulkan_context.h +++ b/drivers/vulkan/vulkan_context.h @@ -327,9 +327,6 @@ public: virtual Error swap_buffers() override final; virtual Error initialize() override final; - virtual void command_begin_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) override final; - virtual void command_insert_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) override final; - virtual void command_end_label(RDD::CommandBufferID p_command_buffer) override final; void set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name); virtual String get_device_vendor_name() const override final; @@ -342,6 +339,7 @@ public: virtual DisplayServer::VSyncMode get_vsync_mode(DisplayServer::WindowID p_window = 0) const override final; virtual RenderingDeviceDriver *get_driver(RID p_local_device = RID()) override final; + virtual bool is_debug_utils_enabled() const override final; VulkanContext(); virtual ~VulkanContext(); diff --git a/misc/extension_api_validation/4.2-stable.expected b/misc/extension_api_validation/4.2-stable.expected index a8b3af78919..53303ebec42 100644 --- a/misc/extension_api_validation/4.2-stable.expected +++ b/misc/extension_api_validation/4.2-stable.expected @@ -28,3 +28,26 @@ GH-86687 Validate extension JSON: Error: Field 'classes/AnimationMixer/methods/_post_process_key_value/arguments/3': type changed value in new API, from "Object" to "int". Exposing the pointer was dangerous and it must be changed to avoid crash. Compatibility methods registered. + + +GH-84976 +-------- +Validate extension JSON: Error: Field 'classes/RenderingDevice/enums/FinalAction/values/FINAL_ACTION_CONTINUE': value changed value in new API, from 2.0 to 0. +Validate extension JSON: Error: Field 'classes/RenderingDevice/enums/FinalAction/values/FINAL_ACTION_MAX': value changed value in new API, from 3.0 to 2. +Validate extension JSON: Error: Field 'classes/RenderingDevice/enums/InitialAction/values/INITIAL_ACTION_CLEAR': value changed value in new API, from 0.0 to 1. +Validate extension JSON: Error: Field 'classes/RenderingDevice/enums/InitialAction/values/INITIAL_ACTION_CLEAR_REGION_CONTINUE': value changed value in new API, from 2.0 to 0. +Validate extension JSON: Error: Field 'classes/RenderingDevice/enums/InitialAction/values/INITIAL_ACTION_CONTINUE': value changed value in new API, from 5.0 to 0. +Validate extension JSON: Error: Field 'classes/RenderingDevice/enums/InitialAction/values/INITIAL_ACTION_DROP': value changed value in new API, from 4.0 to 2. +Validate extension JSON: Error: Field 'classes/RenderingDevice/enums/InitialAction/values/INITIAL_ACTION_KEEP': value changed value in new API, from 3.0 to 0. +Validate extension JSON: Error: Field 'classes/RenderingDevice/enums/InitialAction/values/INITIAL_ACTION_MAX': value changed value in new API, from 6.0 to 3. +Validate extension JSON: Error: Field 'classes/RenderingDevice/methods/buffer_clear/arguments': size changed value in new API, from 4 to 3. +Validate extension JSON: Error: Field 'classes/RenderingDevice/methods/buffer_update/arguments': size changed value in new API, from 5 to 4. +Validate extension JSON: Error: Field 'classes/RenderingDevice/methods/draw_list_begin/arguments': size changed value in new API, from 10 to 9. +Validate extension JSON: Error: Field 'classes/RenderingDevice/methods/texture_clear/arguments': size changed value in new API, from 7 to 6. +Validate extension JSON: Error: Field 'classes/RenderingDevice/methods/texture_copy/arguments': size changed value in new API, from 10 to 9. +Validate extension JSON: Error: Field 'classes/RenderingDevice/methods/texture_resolve_multisample/arguments': size changed value in new API, from 3 to 2. +Validate extension JSON: Error: Field 'classes/RenderingDevice/methods/texture_update/arguments': size changed value in new API, from 4 to 3. + +Barrier arguments have been removed from all relevant functions as they're no longer required. +Draw and compute list overlap no longer needs to be specified. +Initial and final actions have been simplified into fewer options. diff --git a/modules/lightmapper_rd/lightmapper_rd.cpp b/modules/lightmapper_rd/lightmapper_rd.cpp index 5c2c3f96ded..4746ffb79b9 100644 --- a/modules/lightmapper_rd/lightmapper_rd.cpp +++ b/modules/lightmapper_rd/lightmapper_rd.cpp @@ -703,7 +703,7 @@ void LightmapperRD::_raster_geometry(RenderingDevice *rd, Size2i atlas_size, int raster_push_constant.uv_offset[0] = -0.5f / float(atlas_size.x); raster_push_constant.uv_offset[1] = -0.5f / float(atlas_size.y); - RD::DrawListID draw_list = rd->draw_list_begin(framebuffers[i], RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_DISCARD, clear_colors); + RD::DrawListID draw_list = rd->draw_list_begin(framebuffers[i], RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_DISCARD, clear_colors); //draw opaque rd->draw_list_bind_render_pipeline(draw_list, raster_pipeline); rd->draw_list_bind_uniform_set(draw_list, raster_base_uniform, 0); @@ -1863,7 +1863,7 @@ LightmapperRD::BakeError LightmapperRD::bake(BakeQuality p_quality, bool p_use_d seams_push_constant.slice = uint32_t(i * subslices + k); seams_push_constant.debug = debug; - RD::DrawListID draw_list = rd->draw_list_begin(framebuffers[i], RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_DISCARD, clear_colors); + RD::DrawListID draw_list = rd->draw_list_begin(framebuffers[i], RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_DISCARD, clear_colors); rd->draw_list_bind_uniform_set(draw_list, raster_base_uniform, 0); rd->draw_list_bind_uniform_set(draw_list, blendseams_raster_uniform, 1); diff --git a/servers/rendering/renderer_rd/api_context_rd.h b/servers/rendering/renderer_rd/api_context_rd.h index 16e877032cb..fd3be806052 100644 --- a/servers/rendering/renderer_rd/api_context_rd.h +++ b/servers/rendering/renderer_rd/api_context_rd.h @@ -64,10 +64,6 @@ public: virtual Error swap_buffers() = 0; virtual Error initialize() = 0; - virtual void command_begin_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) = 0; - virtual void command_insert_label(RDD::CommandBufferID p_command_buffer, String p_label_name, const Color &p_color) = 0; - virtual void command_end_label(RDD::CommandBufferID p_command_buffer) = 0; - virtual String get_device_vendor_name() const = 0; virtual String get_device_name() const = 0; virtual RDD::DeviceType get_device_type() const = 0; @@ -78,6 +74,7 @@ public: virtual DisplayServer::VSyncMode get_vsync_mode(DisplayServer::WindowID p_window = 0) const = 0; virtual RenderingDeviceDriver *get_driver(RID p_local_device = RID()) = 0; + virtual bool is_debug_utils_enabled() const = 0; virtual ~ApiContextRD(); }; diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.cpp b/servers/rendering/renderer_rd/cluster_builder_rd.cpp index 0d1721039cb..e661fd92178 100644 --- a/servers/rendering/renderer_rd/cluster_builder_rd.cpp +++ b/servers/rendering/renderer_rd/cluster_builder_rd.cpp @@ -420,11 +420,11 @@ void ClusterBuilderRD::bake_cluster() { RD::get_singleton()->draw_command_begin_label("Bake Light Cluster"); // Clear cluster buffer. - RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size); if (render_element_count > 0) { // Clear render buffer. - RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size, RD::BARRIER_MASK_RASTER); + RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size); { // Fill state uniform. @@ -439,18 +439,18 @@ void ClusterBuilderRD::bake_cluster() { state.cluster_depth_offset = (render_element_max / 32); state.cluster_data_size = state.cluster_depth_offset + render_element_max; - RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state); } // Update instances. - RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements); RENDER_TIMESTAMP("Render 3D Cluster Elements"); // Render elements. { - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD); ClusterBuilderSharedDataRD::ClusterRender::PushConstant push_constant = {}; RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shared->cluster_render.shader_pipelines[use_msaa ? ClusterBuilderSharedDataRD::ClusterRender::PIPELINE_MSAA : ClusterBuilderSharedDataRD::ClusterRender::PIPELINE_NORMAL]); @@ -488,7 +488,7 @@ void ClusterBuilderRD::bake_cluster() { RD::get_singleton()->draw_list_draw(draw_list, true, instances); i += instances; } - RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->draw_list_end(); } // Store elements. RENDER_TIMESTAMP("Pack 3D Cluster Elements"); @@ -513,10 +513,8 @@ void ClusterBuilderRD::bake_cluster() { RD::get_singleton()->compute_list_dispatch_threads(compute_list, cluster_screen_size.x, cluster_screen_size.y, 1); - RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->compute_list_end(); } - } else { - RD::get_singleton()->barrier(RD::BARRIER_MASK_TRANSFER, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); } RENDER_TIMESTAMP("< Bake 3D Cluster"); RD::get_singleton()->draw_command_end_label(); diff --git a/servers/rendering/renderer_rd/effects/bokeh_dof.cpp b/servers/rendering/renderer_rd/effects/bokeh_dof.cpp index cc5031823ea..e6262c83e2b 100644 --- a/servers/rendering/renderer_rd/effects/bokeh_dof.cpp +++ b/servers/rendering/renderer_rd/effects/bokeh_dof.cpp @@ -356,7 +356,7 @@ void BokehDOF::bokeh_dof_raster(const BokehBuffers &p_buffers, RID p_camera_attr ERR_FAIL_COND(shader.is_null()); RID framebuffer = p_buffers.base_weight_fb; - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, bokeh.raster_pipelines[BOKEH_GEN_BLUR_SIZE].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_depth_texture), 0); @@ -388,7 +388,7 @@ void BokehDOF::bokeh_dof_raster(const BokehBuffers &p_buffers, RID p_camera_attr RID framebuffer = bokeh.push_constant.half_size ? p_buffers.half_fb[0] : p_buffers.secondary_fb; // Pass 1 - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, bokeh.raster_pipelines[mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_base_texture), 0); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 1, u_weight_texture0), 1); @@ -412,7 +412,7 @@ void BokehDOF::bokeh_dof_raster(const BokehBuffers &p_buffers, RID p_camera_attr RD::Uniform texture = bokeh.push_constant.half_size ? u_half_texture0 : u_secondary_texture; RD::Uniform weight = bokeh.push_constant.half_size ? u_weight_texture2 : u_weight_texture1; - draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, bokeh.raster_pipelines[mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, texture), 0); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 1, weight), 1); @@ -430,7 +430,7 @@ void BokehDOF::bokeh_dof_raster(const BokehBuffers &p_buffers, RID p_camera_attr framebuffer = p_buffers.base_fb; - draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, bokeh.raster_pipelines[mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_half_texture1), 0); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 1, u_weight_texture3), 1); @@ -463,7 +463,7 @@ void BokehDOF::bokeh_dof_raster(const BokehBuffers &p_buffers, RID p_camera_attr RID framebuffer = bokeh.push_constant.half_size ? p_buffers.half_fb[0] : p_buffers.secondary_fb; - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, bokeh.raster_pipelines[mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_base_texture), 0); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 1, u_weight_texture0), 1); @@ -481,7 +481,7 @@ void BokehDOF::bokeh_dof_raster(const BokehBuffers &p_buffers, RID p_camera_attr framebuffer = p_buffers.base_fb; - draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, bokeh.raster_pipelines[mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_half_texture0), 0); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 1, u_weight_texture2), 1); diff --git a/servers/rendering/renderer_rd/effects/copy_effects.cpp b/servers/rendering/renderer_rd/effects/copy_effects.cpp index bb584beb52c..fd6409d6bf1 100644 --- a/servers/rendering/renderer_rd/effects/copy_effects.cpp +++ b/servers/rendering/renderer_rd/effects/copy_effects.cpp @@ -583,7 +583,7 @@ void CopyEffects::copy_to_fb_rect(RID p_source_rd_texture, RID p_dest_framebuffe RID shader = copy_to_fb.shader.version_get_shader(copy_to_fb.shader_version, mode); ERR_FAIL_COND(shader.is_null()); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD, Vector(), 1.0, 0, p_rect); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD, Vector(), 1.0, 0, p_rect); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, copy_to_fb.pipelines[mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_rd_texture), 0); if (p_secondary.is_valid()) { @@ -650,7 +650,7 @@ void CopyEffects::copy_raster(RID p_source_texture, RID p_dest_framebuffer) { ERR_FAIL_COND(shader.is_null()); // Just copy it back (we use our blur raster shader here).. - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, blur_raster.pipelines[BLUR_MODE_COPY].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_texture), 0); RD::get_singleton()->draw_list_bind_index_array(draw_list, material_storage->get_quad_index_array()); @@ -724,7 +724,7 @@ void CopyEffects::gaussian_blur_raster(RID p_source_rd_texture, RID p_dest_textu RID shader = blur_raster.shader.version_get_shader(blur_raster.shader_version, blur_mode); ERR_FAIL_COND(shader.is_null()); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(dest_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, blur_raster.pipelines[blur_mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(dest_framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_rd_texture), 0); @@ -826,7 +826,7 @@ void CopyEffects::gaussian_glow_raster(RID p_source_rd_texture, RID p_half_textu ERR_FAIL_COND(shader.is_null()); //HORIZONTAL - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(half_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(half_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, blur_raster.pipelines[blur_mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(half_framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_rd_texture), 0); if (p_auto_exposure.is_valid() && p_first_pass) { @@ -846,7 +846,7 @@ void CopyEffects::gaussian_glow_raster(RID p_source_rd_texture, RID p_half_textu ERR_FAIL_COND(shader.is_null()); //VERTICAL - draw_list = RD::get_singleton()->draw_list_begin(dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + draw_list = RD::get_singleton()->draw_list_begin(dest_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, blur_raster.pipelines[blur_mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(dest_framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_half_texture), 0); @@ -916,7 +916,7 @@ void CopyEffects::make_mipmap_raster(RID p_source_rd_texture, RID p_dest_texture RID shader = blur_raster.shader.version_get_shader(blur_raster.shader_version, mode); ERR_FAIL_COND(shader.is_null()); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(dest_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, blur_raster.pipelines[mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(dest_framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_rd_texture), 0); RD::get_singleton()->draw_list_bind_index_array(draw_list, material_storage->get_quad_index_array()); @@ -982,7 +982,7 @@ void CopyEffects::set_color_raster(RID p_dest_texture, const Color &p_color, con RID shader = copy_to_fb.shader.version_get_shader(copy_to_fb.shader_version, mode); ERR_FAIL_COND(shader.is_null()); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD, Vector(), 1.0, 0, p_region); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(dest_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD, Vector(), 1.0, 0, p_region); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, copy_to_fb.pipelines[mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(dest_framebuffer))); RD::get_singleton()->draw_list_bind_index_array(draw_list, material_storage->get_quad_index_array()); RD::get_singleton()->draw_list_set_push_constant(draw_list, ©_to_fb.push_constant, sizeof(CopyToFbPushConstant)); @@ -990,7 +990,7 @@ void CopyEffects::set_color_raster(RID p_dest_texture, const Color &p_color, con RD::get_singleton()->draw_list_end(); } -void CopyEffects::copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuffer, const Rect2 &p_rect, const Vector2 &p_dst_size, float p_z_near, float p_z_far, bool p_dp_flip, BitField p_post_barrier) { +void CopyEffects::copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuffer, const Rect2 &p_rect, const Vector2 &p_dst_size, float p_z_near, float p_z_far, bool p_dp_flip) { UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); ERR_FAIL_NULL(uniform_set_cache); MaterialStorage *material_storage = MaterialStorage::get_singleton(); @@ -1015,14 +1015,14 @@ void CopyEffects::copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuf RID shader = cube_to_dp.shader.version_get_shader(cube_to_dp.shader_version, 0); ERR_FAIL_COND(shader.is_null()); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dst_framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dst_framebuffer, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, cube_to_dp.pipeline.get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dst_framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_rd_texture), 0); RD::get_singleton()->draw_list_bind_index_array(draw_list, material_storage->get_quad_index_array()); RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(CopyToDPPushConstant)); RD::get_singleton()->draw_list_draw(draw_list, true); - RD::get_singleton()->draw_list_end(p_post_barrier); + RD::get_singleton()->draw_list_end(); } void CopyEffects::cubemap_downsample(RID p_source_cubemap, RID p_dest_cubemap, const Size2i &p_size) { @@ -1080,7 +1080,7 @@ void CopyEffects::cubemap_downsample_raster(RID p_source_cubemap, RID p_dest_fra RID shader = cubemap_downsampler.raster_shader.version_get_shader(cubemap_downsampler.shader_version, 0); ERR_FAIL_COND(shader.is_null()); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, cubemap_downsampler.raster_pipeline.get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_cubemap), 0); @@ -1159,7 +1159,7 @@ void CopyEffects::cubemap_filter_raster(RID p_source_cubemap, RID p_dest_framebu RID shader = filter.raster_shader.version_get_shader(filter.shader_version, mode); ERR_FAIL_COND(shader.is_null()); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, filter.raster_pipelines[mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_cubemap), 0); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, filter.uniform_set, 1); @@ -1237,7 +1237,7 @@ void CopyEffects::cubemap_roughness_raster(RID p_source_rd_texture, RID p_dest_f RID shader = roughness.raster_shader.version_get_shader(roughness.shader_version, 0); ERR_FAIL_COND(shader.is_null()); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, roughness.raster_pipeline.get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_rd_texture), 0); @@ -1257,7 +1257,7 @@ void CopyEffects::merge_specular(RID p_dest_framebuffer, RID p_specular, RID p_b RD::get_singleton()->draw_command_begin_label("Merge specular"); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, Vector()); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, Vector()); int mode; if (p_reflection.is_valid()) { diff --git a/servers/rendering/renderer_rd/effects/copy_effects.h b/servers/rendering/renderer_rd/effects/copy_effects.h index 60272a2eab9..d18971a6767 100644 --- a/servers/rendering/renderer_rd/effects/copy_effects.h +++ b/servers/rendering/renderer_rd/effects/copy_effects.h @@ -345,7 +345,7 @@ public: void set_color(RID p_dest_texture, const Color &p_color, const Rect2i &p_region, bool p_8bit_dst = false); void set_color_raster(RID p_dest_texture, const Color &p_color, const Rect2i &p_region); - void copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuffer, const Rect2 &p_rect, const Vector2 &p_dst_size, float p_z_near, float p_z_far, bool p_dp_flip, BitField p_post_barrier = RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_TRANSFER); + void copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuffer, const Rect2 &p_rect, const Vector2 &p_dst_size, float p_z_near, float p_z_far, bool p_dp_flip); void cubemap_downsample(RID p_source_cubemap, RID p_dest_cubemap, const Size2i &p_size); void cubemap_downsample_raster(RID p_source_cubemap, RID p_dest_framebuffer, uint32_t p_face_id, const Size2i &p_size); void cubemap_filter(RID p_source_cubemap, Vector p_dest_cubemap, bool p_use_array); diff --git a/servers/rendering/renderer_rd/effects/debug_effects.cpp b/servers/rendering/renderer_rd/effects/debug_effects.cpp index 3033d423758..a57a65fd5a6 100644 --- a/servers/rendering/renderer_rd/effects/debug_effects.cpp +++ b/servers/rendering/renderer_rd/effects/debug_effects.cpp @@ -282,7 +282,7 @@ void DebugEffects::draw_shadow_frustum(RID p_light, const Projection &p_cam_proj // And draw our frustum. RD::FramebufferFormatID fb_format_id = RD::get_singleton()->framebuffer_get_format(p_dest_fb); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_fb, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD, Vector(), 1.0, 0, rect); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_fb, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD, Vector(), 1.0, 0, rect); RID pipeline = shadow_frustum.pipelines[SFP_TRANSPARENT].get_render_pipeline(frustum.vertex_format, fb_format_id); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, pipeline); @@ -326,7 +326,7 @@ void DebugEffects::draw_shadow_frustum(RID p_light, const Projection &p_cam_proj rect.size.x *= atlas_rect_norm.size.x; rect.size.y *= atlas_rect_norm.size.y; - draw_list = RD::get_singleton()->draw_list_begin(p_dest_fb, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD, Vector(), 1.0, 0, rect); + draw_list = RD::get_singleton()->draw_list_begin(p_dest_fb, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD, Vector(), 1.0, 0, rect); pipeline = shadow_frustum.pipelines[SFP_TRANSPARENT].get_render_pipeline(frustum.vertex_format, fb_format_id); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, pipeline); @@ -351,7 +351,7 @@ void DebugEffects::draw_motion_vectors(RID p_velocity, RID p_depth, RID p_dest_f RD::Uniform u_source_velocity(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_velocity })); RD::Uniform u_source_depth(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 1, Vector({ default_sampler, p_depth })); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_fb, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_fb, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, motion_vectors.pipeline.get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_fb), false, RD::get_singleton()->draw_list_get_current_pass())); Projection reprojection = p_previous_projection.flipped_y() * p_previous_transform.affine_inverse() * p_current_transform * p_current_projection.flipped_y().inverse(); diff --git a/servers/rendering/renderer_rd/effects/fsr.cpp b/servers/rendering/renderer_rd/effects/fsr.cpp index 0c51adf9ee3..5599a5a162d 100644 --- a/servers/rendering/renderer_rd/effects/fsr.cpp +++ b/servers/rendering/renderer_rd/effects/fsr.cpp @@ -124,5 +124,5 @@ void FSR::fsr_upscale(Ref p_render_buffers, RID p_source_r RD::get_singleton()->compute_list_dispatch(compute_list, dispatch_x, dispatch_y, 1); - RD::get_singleton()->compute_list_end(compute_list); + RD::get_singleton()->compute_list_end(); } diff --git a/servers/rendering/renderer_rd/effects/fsr2.cpp b/servers/rendering/renderer_rd/effects/fsr2.cpp index 0c389893049..bebbf51d51e 100644 --- a/servers/rendering/renderer_rd/effects/fsr2.cpp +++ b/servers/rendering/renderer_rd/effects/fsr2.cpp @@ -377,10 +377,7 @@ static FfxErrorCode execute_gpu_job_copy_rd(FSR2Context::Scratch &p_scratch, con ERR_FAIL_COND_V(dst_desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT); for (uint32_t mip_level = 0; mip_level < src_desc.mipCount; mip_level++) { - // Only push the barriers on the last copy. - // FIXME: This could be optimized if RenderingDevice was able to copy multiple mip levels in a single command. - BitField post_barrier = (mip_level == (src_desc.mipCount - 1)) ? RD::BARRIER_MASK_ALL_BARRIERS : RD::BARRIER_MASK_NO_BARRIER; - RD::get_singleton()->texture_copy(src, dst, Vector3(0, 0, 0), Vector3(0, 0, 0), Vector3(src_desc.width, src_desc.height, src_desc.depth), mip_level, mip_level, 0, 0, post_barrier); + RD::get_singleton()->texture_copy(src, dst, Vector3(0, 0, 0), Vector3(0, 0, 0), Vector3(src_desc.width, src_desc.height, src_desc.depth), mip_level, mip_level, 0, 0); } return FFX_OK; @@ -435,8 +432,7 @@ static FfxErrorCode execute_gpu_job_compute_rd(FSR2Context::Scratch &p_scratch, RID buffer_rid = p_scratch.ubo_ring_buffer[p_scratch.ubo_ring_buffer_index]; p_scratch.ubo_ring_buffer_index = (p_scratch.ubo_ring_buffer_index + 1) % FSR2_UBO_RING_BUFFER_SIZE; - BitField post_barrier = (i == (p_job.pipeline.constCount - 1)) ? RD::BARRIER_MASK_ALL_BARRIERS : RD::BARRIER_MASK_NO_BARRIER; - RD::get_singleton()->buffer_update(buffer_rid, 0, p_job.cbs[i].uint32Size * sizeof(uint32_t), p_job.cbs[i].data, post_barrier); + RD::get_singleton()->buffer_update(buffer_rid, 0, p_job.cbs[i].uint32Size * sizeof(uint32_t), p_job.cbs[i].data); RD::Uniform buffer_uniform(RD::UNIFORM_TYPE_UNIFORM_BUFFER, p_job.pipeline.cbResourceBindings[i].slotIndex, buffer_rid); compute_uniforms.push_back(buffer_uniform); @@ -566,7 +562,6 @@ FSR2Effect::FSR2Effect() { FfxResourceBinding{ 2, 0, L"r_dilatedDepth" }, FfxResourceBinding{ 3, 0, L"r_reactive_mask" }, FfxResourceBinding{ 4, 0, L"r_transparency_and_composition_mask" }, - FfxResourceBinding{ 5, 0, L"r_prepared_input_color" }, FfxResourceBinding{ 6, 0, L"r_previous_dilated_motion_vectors" }, FfxResourceBinding{ 7, 0, L"r_input_motion_vectors" }, FfxResourceBinding{ 8, 0, L"r_input_color_jittered" }, diff --git a/servers/rendering/renderer_rd/effects/luminance.cpp b/servers/rendering/renderer_rd/effects/luminance.cpp index 3aa5f5706e2..61b2248b5cd 100644 --- a/servers/rendering/renderer_rd/effects/luminance.cpp +++ b/servers/rendering/renderer_rd/effects/luminance.cpp @@ -184,7 +184,7 @@ void Luminance::luminance_reduction(RID p_source_texture, const Size2i p_source_ RD::Uniform u_source_texture(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, i == 0 ? p_source_texture : p_luminance_buffers->reduce[i - 1] })); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, luminance_reduce_raster.pipelines[mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_texture), 0); if (final) { diff --git a/servers/rendering/renderer_rd/effects/resolve.cpp b/servers/rendering/renderer_rd/effects/resolve.cpp index 18671d06e14..feb0e6ed1e2 100644 --- a/servers/rendering/renderer_rd/effects/resolve.cpp +++ b/servers/rendering/renderer_rd/effects/resolve.cpp @@ -54,7 +54,7 @@ Resolve::~Resolve() { resolve.shader.version_free(resolve.shader_version); } -void Resolve::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_voxel_gi, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_voxel_gi, Vector2i p_screen_size, int p_samples, uint32_t p_barrier) { +void Resolve::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_voxel_gi, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_voxel_gi, Vector2i p_screen_size, int p_samples) { UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); ERR_FAIL_NULL(uniform_set_cache); MaterialStorage *material_storage = MaterialStorage::get_singleton(); @@ -93,10 +93,10 @@ void Resolve::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.x, p_screen_size.y, 1); - RD::get_singleton()->compute_list_end(p_barrier); + RD::get_singleton()->compute_list_end(); } -void Resolve::resolve_depth(RID p_source_depth, RID p_dest_depth, Vector2i p_screen_size, int p_samples, uint32_t p_barrier) { +void Resolve::resolve_depth(RID p_source_depth, RID p_dest_depth, Vector2i p_screen_size, int p_samples) { UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); ERR_FAIL_NULL(uniform_set_cache); MaterialStorage *material_storage = MaterialStorage::get_singleton(); @@ -126,5 +126,5 @@ void Resolve::resolve_depth(RID p_source_depth, RID p_dest_depth, Vector2i p_scr RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.x, p_screen_size.y, 1); - RD::get_singleton()->compute_list_end(p_barrier); + RD::get_singleton()->compute_list_end(); } diff --git a/servers/rendering/renderer_rd/effects/resolve.h b/servers/rendering/renderer_rd/effects/resolve.h index fcc1021904e..14477f90e4a 100644 --- a/servers/rendering/renderer_rd/effects/resolve.h +++ b/servers/rendering/renderer_rd/effects/resolve.h @@ -65,8 +65,8 @@ public: Resolve(); ~Resolve(); - void resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_voxel_gi, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_voxel_gi, Vector2i p_screen_size, int p_samples, uint32_t p_barrier = RD::BARRIER_MASK_ALL_BARRIERS); - void resolve_depth(RID p_source_depth, RID p_dest_depth, Vector2i p_screen_size, int p_samples, uint32_t p_barrier = RD::BARRIER_MASK_ALL_BARRIERS); + void resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_voxel_gi, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_voxel_gi, Vector2i p_screen_size, int p_samples); + void resolve_depth(RID p_source_depth, RID p_dest_depth, Vector2i p_screen_size, int p_samples); }; } // namespace RendererRD diff --git a/servers/rendering/renderer_rd/effects/ss_effects.cpp b/servers/rendering/renderer_rd/effects/ss_effects.cpp index 628edc0127d..46fb0a75d68 100644 --- a/servers/rendering/renderer_rd/effects/ss_effects.cpp +++ b/servers/rendering/renderer_rd/effects/ss_effects.cpp @@ -525,7 +525,7 @@ void SSEffects::downsample_depth(Ref p_render_buffers, uin RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->draw_command_end_label(); - RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->compute_list_end(); ss_effects.used_full_mips_last_frame = use_full_mips; ss_effects.used_half_size_last_frame = use_half_size; @@ -950,10 +950,10 @@ void SSEffects::screen_space_indirect_lighting(Ref p_rende RD::get_singleton()->draw_command_end_label(); // SSIL - RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_TRANSFER); // Zeroing importance_map_load_counter depends on us. + RD::get_singleton()->compute_list_end(); int zero[1] = { 0 }; - RD::get_singleton()->buffer_update(ssil.importance_map_load_counter, 0, sizeof(uint32_t), &zero, 0); //no barrier + RD::get_singleton()->buffer_update(ssil.importance_map_load_counter, 0, sizeof(uint32_t), &zero); } /* SSAO */ @@ -1332,10 +1332,10 @@ void SSEffects::generate_ssao(Ref p_render_buffers, SSAORe RD::get_singleton()->draw_command_end_label(); // Interleave } RD::get_singleton()->draw_command_end_label(); //SSAO - RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_TRANSFER); // Zeroing importance_map_load_counter depends on us. + RD::get_singleton()->compute_list_end(); int zero[1] = { 0 }; - RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero, 0); //no barrier + RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero); } /* Screen Space Reflection */ @@ -1394,7 +1394,7 @@ void SSEffects::screen_space_reflection(Ref p_render_buffe scene_data.eye_offset[v][3] = 0.0; } - RD::get_singleton()->buffer_update(ssr.ubo, 0, sizeof(ScreenSpaceReflectionSceneData), &scene_data, RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->buffer_update(ssr.ubo, 0, sizeof(ScreenSpaceReflectionSceneData), &scene_data); } uint32_t pipeline_specialization = 0; diff --git a/servers/rendering/renderer_rd/effects/tone_mapper.cpp b/servers/rendering/renderer_rd/effects/tone_mapper.cpp index 48c65114089..ee0b6adb4d6 100644 --- a/servers/rendering/renderer_rd/effects/tone_mapper.cpp +++ b/servers/rendering/renderer_rd/effects/tone_mapper.cpp @@ -166,7 +166,7 @@ void ToneMapper::tonemapper(RID p_source_color, RID p_dst_framebuffer, const Ton RID shader = tonemap.shader.version_get_shader(tonemap.shader_version, mode); ERR_FAIL_COND(shader.is_null()); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dst_framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dst_framebuffer, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, tonemap.pipelines[mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dst_framebuffer), false, RD::get_singleton()->draw_list_get_current_pass())); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_color), 0); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 1, u_exposure_texture), 1); diff --git a/servers/rendering/renderer_rd/effects/vrs.cpp b/servers/rendering/renderer_rd/effects/vrs.cpp index 63c99facdd7..41a6b2d6221 100644 --- a/servers/rendering/renderer_rd/effects/vrs.cpp +++ b/servers/rendering/renderer_rd/effects/vrs.cpp @@ -82,7 +82,7 @@ void VRS::copy_vrs(RID p_source_rd_texture, RID p_dest_framebuffer, bool p_multi RID shader = vrs_shader.shader.version_get_shader(vrs_shader.shader_version, mode); ERR_FAIL_COND(shader.is_null()); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD, Vector()); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD, Vector()); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, vrs_shader.pipelines[mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_rd_texture), 0); // RD::get_singleton()->draw_list_set_push_constant(draw_list, &vrs_shader.push_constant, sizeof(VRSPushConstant)); diff --git a/servers/rendering/renderer_rd/environment/fog.cpp b/servers/rendering/renderer_rd/environment/fog.cpp index 2befb194f7b..78ab6f3650f 100644 --- a/servers/rendering/renderer_rd/environment/fog.cpp +++ b/servers/rendering/renderer_rd/environment/fog.cpp @@ -570,7 +570,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P RendererRD::MaterialStorage::store_transform(to_prev_cam_view, params.to_prev_view); RendererRD::MaterialStorage::store_transform(p_cam_transform, params.transform); - RD::get_singleton()->buffer_update(volumetric_fog.volume_ubo, 0, sizeof(VolumetricFogShader::VolumeUBO), ¶ms, RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->buffer_update(volumetric_fog.volume_ubo, 0, sizeof(VolumetricFogShader::VolumeUBO), ¶ms); if (fog->fog_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(fog->fog_uniform_set)) { Vector uniforms; @@ -1086,7 +1086,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P RD::get_singleton()->draw_command_begin_label("Render Volumetric Fog"); RENDER_TIMESTAMP("Render Fog"); - RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), ¶ms, RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->buffer_update(volumetric_fog.params_ubo, 0, sizeof(VolumetricFogShader::ParamsUBO), ¶ms); RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); @@ -1140,7 +1140,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P RD::get_singleton()->compute_list_bind_uniform_set(compute_list, fog->gi_dependent_sets.process_uniform_set, 0); RD::get_singleton()->compute_list_dispatch_threads(compute_list, fog->width, fog->height, 1); - RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_RASTER); + RD::get_singleton()->compute_list_end(); RENDER_TIMESTAMP("< Volumetric Fog"); RD::get_singleton()->draw_command_end_label(); diff --git a/servers/rendering/renderer_rd/environment/gi.cpp b/servers/rendering/renderer_rd/environment/gi.cpp index d968736037e..6da828df45e 100644 --- a/servers/rendering/renderer_rd/environment/gi.cpp +++ b/servers/rendering/renderer_rd/environment/gi.cpp @@ -583,7 +583,8 @@ void GI::SDFGI::create(RID p_env, const Vector3 &p_world_position, uint32_t p_re /* Buffers */ cascade.solid_cell_buffer = RD::get_singleton()->storage_buffer_create(sizeof(SDFGI::Cascade::SolidCell) * solid_cell_count); - cascade.solid_cell_dispatch_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 4, Vector(), RD::STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT); + cascade.solid_cell_dispatch_buffer_storage = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 4, Vector()); + cascade.solid_cell_dispatch_buffer_call = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 4, Vector(), RD::STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT); cascade.lights_buffer = RD::get_singleton()->storage_buffer_create(sizeof(SDFGIShader::Light) * MAX(SDFGI::MAX_STATIC_LIGHTS, SDFGI::MAX_DYNAMIC_LIGHTS)); { Vector uniforms; @@ -650,7 +651,7 @@ void GI::SDFGI::create(RID p_env, const Vector3 &p_world_position, uint32_t p_re RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.binding = 10; - u.append_id(cascade.solid_cell_dispatch_buffer); + u.append_id(cascade.solid_cell_dispatch_buffer_storage); uniforms.push_back(u); } { @@ -698,7 +699,7 @@ void GI::SDFGI::create(RID p_env, const Vector3 &p_world_position, uint32_t p_re RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.binding = 5; - u.append_id(cascade.solid_cell_dispatch_buffer); + u.append_id(cascade.solid_cell_dispatch_buffer_storage); uniforms.push_back(u); } { @@ -761,7 +762,7 @@ void GI::SDFGI::create(RID p_env, const Vector3 &p_world_position, uint32_t p_re RD::Uniform u; u.binding = 3; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.append_id(cascade.solid_cell_dispatch_buffer); + u.append_id(cascade.solid_cell_dispatch_buffer_storage); uniforms.push_back(u); } { @@ -1129,7 +1130,8 @@ GI::SDFGI::~SDFGI() { RD::get_singleton()->free(c.light_aniso_0_tex); RD::get_singleton()->free(c.light_aniso_1_tex); RD::get_singleton()->free(c.sdf_tex); - RD::get_singleton()->free(c.solid_cell_dispatch_buffer); + RD::get_singleton()->free(c.solid_cell_dispatch_buffer_storage); + RD::get_singleton()->free(c.solid_cell_dispatch_buffer_call); RD::get_singleton()->free(c.solid_cell_buffer); RD::get_singleton()->free(c.lightprobe_history_tex); RD::get_singleton()->free(c.lightprobe_average_tex); @@ -1238,6 +1240,10 @@ void GI::SDFGI::update(RID p_env, const Vector3 &p_world_position) { void GI::SDFGI::update_light() { RD::get_singleton()->draw_command_begin_label("SDFGI Update dynamic Light"); + for (uint32_t i = 0; i < cascades.size(); i++) { + RD::get_singleton()->buffer_copy(cascades[i].solid_cell_dispatch_buffer_storage, cascades[i].solid_cell_dispatch_buffer_call, 0, 0, sizeof(uint32_t) * 4); + } + /* Update dynamic light */ RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); @@ -1276,9 +1282,9 @@ void GI::SDFGI::update_light() { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascade.sdf_direct_light_dynamic_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::DirectLightPushConstant)); - RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cascade.solid_cell_dispatch_buffer, 0); + RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cascade.solid_cell_dispatch_buffer_call, 0); } - RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->compute_list_end(); RD::get_singleton()->draw_command_end_label(); } @@ -1351,7 +1357,7 @@ void GI::SDFGI::update_probes(RID p_env, SkyRD::Sky *p_sky) { render_pass++; - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(true); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.integrate_pipeline[SDFGIShader::INTEGRATE_MODE_PROCESS]); int32_t probe_divisor = cascade_size / SDFGI::PROBE_DIVISOR; @@ -1368,14 +1374,11 @@ void GI::SDFGI::update_probes(RID p_env, SkyRD::Sky *p_sky) { RD::get_singleton()->compute_list_dispatch_threads(compute_list, probe_axis_count * probe_axis_count, probe_axis_count, 1); } - //end later after raster to avoid barriering on layout changes - //RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); - + RD::get_singleton()->compute_list_end(); RD::get_singleton()->draw_command_end_label(); } void GI::SDFGI::store_probes() { - RD::get_singleton()->barrier(RD::BARRIER_MASK_COMPUTE, RD::BARRIER_MASK_COMPUTE); RD::get_singleton()->draw_command_begin_label("SDFGI Store Probes"); SDFGIShader::IntegratePushConstant push_constant; @@ -1414,7 +1417,7 @@ void GI::SDFGI::store_probes() { RD::get_singleton()->compute_list_dispatch_threads(compute_list, probe_axis_count * probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1); } - RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->compute_list_end(); RD::get_singleton()->draw_command_end_label(); } @@ -1493,7 +1496,7 @@ void GI::SDFGI::update_cascades() { cascade_data[i].pad = 0; } - RD::get_singleton()->buffer_update(cascades_ubo, 0, sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES, cascade_data, RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->buffer_update(cascades_ubo, 0, sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES, cascade_data); } void GI::SDFGI::debug_draw(uint32_t p_view_count, const Projection *p_projections, const Transform3D &p_transform, int p_width, int p_height, RID p_render_target, RID p_texture, const Vector &p_texture_views) { @@ -1636,7 +1639,7 @@ void GI::SDFGI::debug_draw(uint32_t p_view_count, const Projection *p_projection copy_effects->copy_to_fb_rect(p_texture, texture_storage->render_target_get_rd_framebuffer(p_render_target), Rect2i(Point2i(), rtsize), true, false, false, false, RID(), p_view_count > 1); } -void GI::SDFGI::debug_probes(RID p_framebuffer, const uint32_t p_view_count, const Projection *p_camera_with_transforms, bool p_will_continue_color, bool p_will_continue_depth) { +void GI::SDFGI::debug_probes(RID p_framebuffer, const uint32_t p_view_count, const Projection *p_camera_with_transforms) { RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); // setup scene data @@ -1651,7 +1654,7 @@ void GI::SDFGI::debug_probes(RID p_framebuffer, const uint32_t p_view_count, con RendererRD::MaterialStorage::store_camera(p_camera_with_transforms[v], scene_data.projection[v]); } - RD::get_singleton()->buffer_update(debug_probes_scene_data_ubo, 0, sizeof(SDFGIShader::DebugProbesSceneData), &scene_data, RD::BARRIER_MASK_RASTER); + RD::get_singleton()->buffer_update(debug_probes_scene_data_ubo, 0, sizeof(SDFGIShader::DebugProbesSceneData), &scene_data); } // setup push constant @@ -1718,7 +1721,7 @@ void GI::SDFGI::debug_probes(RID p_framebuffer, const uint32_t p_view_count, con SDFGIShader::ProbeDebugMode mode = p_view_count > 1 ? SDFGIShader::PROBE_DEBUG_PROBES_MULTIVIEW : SDFGIShader::PROBE_DEBUG_PROBES; - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CONTINUE, p_will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, p_will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE); RD::get_singleton()->draw_command_begin_label("Debug SDFGI"); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, gi->sdfgi_shader.debug_probes_pipeline[mode].get_render_pipeline(RD::INVALID_FORMAT_ID, RD::get_singleton()->framebuffer_get_format(p_framebuffer))); @@ -1861,7 +1864,7 @@ void GI::SDFGI::pre_process_gi(const Transform3D &p_transform, RenderDataRD *p_r } } - RD::get_singleton()->buffer_update(gi->sdfgi_ubo, 0, sizeof(SDFGIData), &sdfgi_data, RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->buffer_update(gi->sdfgi_ubo, 0, sizeof(SDFGIData), &sdfgi_data); /* Update dynamic lights in SDFGI cascades */ @@ -1983,7 +1986,7 @@ void GI::SDFGI::pre_process_gi(const Transform3D &p_transform, RenderDataRD *p_r } if (idx > 0) { - RD::get_singleton()->buffer_update(cascade.lights_buffer, 0, idx * sizeof(SDFGIShader::Light), lights, RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->buffer_update(cascade.lights_buffer, 0, idx * sizeof(SDFGIShader::Light), lights); } cascade_dynamic_light_count[i] = idx; @@ -2046,6 +2049,8 @@ void GI::SDFGI::render_region(Ref p_render_buffers, int p_ push_constant.cascade = cascade; if (cascades[cascade].dirty_regions != SDFGI::Cascade::DIRTY_ALL) { + RD::get_singleton()->buffer_copy(cascades[cascade].solid_cell_dispatch_buffer_storage, cascades[cascade].solid_cell_dispatch_buffer_call, 0, 0, sizeof(uint32_t) * 4); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); //must pre scroll existing data because not all is dirty @@ -2053,7 +2058,7 @@ void GI::SDFGI::render_region(Ref p_render_buffers, int p_ RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[cascade].scroll_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cascades[cascade].solid_cell_dispatch_buffer, 0); + RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cascades[cascade].solid_cell_dispatch_buffer_call, 0); // no barrier do all together RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_SCROLL_OCCLUSION]); @@ -2142,7 +2147,7 @@ void GI::SDFGI::render_region(Ref p_render_buffers, int p_ //clear dispatch indirect data uint32_t dispatch_indirct_data[4] = { 0, 0, 0, 0 }; - RD::get_singleton()->buffer_update(cascades[cascade].solid_cell_dispatch_buffer, 0, sizeof(uint32_t) * 4, dispatch_indirct_data); + RD::get_singleton()->buffer_update(cascades[cascade].solid_cell_dispatch_buffer_storage, 0, sizeof(uint32_t) * 4, dispatch_indirct_data); RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); @@ -2451,6 +2456,15 @@ void GI::SDFGI::render_static_lights(RenderDataRD *p_render_data, Ref= cascades.size()); + + SDFGI::Cascade &cc = cascades[p_cascade_indices[i]]; + if (light_count[i] > 0) { + RD::get_singleton()->buffer_copy(cc.solid_cell_dispatch_buffer_storage, cc.solid_cell_dispatch_buffer_call, 0, 0, sizeof(uint32_t) * 4); + } + } + /* Static Lights */ RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); @@ -2482,7 +2496,7 @@ void GI::SDFGI::render_static_lights(RenderDataRD *p_render_data, Ref 0) { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cc.sdf_direct_light_static_uniform_set, 0); RD::get_singleton()->compute_list_set_push_constant(compute_list, &dl_push_constant, sizeof(SDFGIShader::DirectLightPushConstant)); - RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cc.solid_cell_dispatch_buffer, 0); + RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cc.solid_cell_dispatch_buffer_call, 0); } } @@ -3716,7 +3730,7 @@ void GI::setup_voxel_gi_instances(RenderDataRD *p_render_data, Ref 0) { RD::get_singleton()->draw_command_begin_label("VoxelGIs Setup"); - RD::get_singleton()->buffer_update(voxel_gi_buffer, 0, sizeof(VoxelGIData) * MIN((uint64_t)MAX_VOXEL_GI_INSTANCES, p_voxel_gi_instances.size()), voxel_gi_data, RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->buffer_update(voxel_gi_buffer, 0, sizeof(VoxelGIData) * MIN((uint64_t)MAX_VOXEL_GI_INSTANCES, p_voxel_gi_instances.size()), voxel_gi_data); RD::get_singleton()->draw_command_end_label(); } @@ -3804,11 +3818,11 @@ void GI::process_gi(Ref p_render_buffers, const RID *p_nor scene_data.screen_size[0] = internal_size.x; scene_data.screen_size[1] = internal_size.y; - RD::get_singleton()->buffer_update(rbgi->scene_data_ubo, 0, sizeof(SceneData), &scene_data, RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->buffer_update(rbgi->scene_data_ubo, 0, sizeof(SceneData), &scene_data); } // Now compute the contents of our buffers. - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(true); + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); // Render each eye separately. // We need to look into whether we can make our compute shader use Multiview but not sure that works or makes a difference.. @@ -4038,8 +4052,7 @@ void GI::process_gi(Ref p_render_buffers, const RID *p_nor } } - //do barrier later to allow oeverlap - //RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); //no barriers, let other compute, raster and transfer happen at the same time + RD::get_singleton()->compute_list_end(); RD::get_singleton()->draw_command_end_label(); } diff --git a/servers/rendering/renderer_rd/environment/gi.h b/servers/rendering/renderer_rd/environment/gi.h index c46d4cbd25b..011493f1f6e 100644 --- a/servers/rendering/renderer_rd/environment/gi.h +++ b/servers/rendering/renderer_rd/environment/gi.h @@ -584,7 +584,9 @@ public: uint32_t static_light_aniso; }; - RID solid_cell_dispatch_buffer; //buffer for indirect compute dispatch + // Buffers for indirect compute dispatch. + RID solid_cell_dispatch_buffer_storage; + RID solid_cell_dispatch_buffer_call; RID solid_cell_buffer; RID lightprobe_history_tex; @@ -686,7 +688,7 @@ public: void update_cascades(); void debug_draw(uint32_t p_view_count, const Projection *p_projections, const Transform3D &p_transform, int p_width, int p_height, RID p_render_target, RID p_texture, const Vector &p_texture_views); - void debug_probes(RID p_framebuffer, const uint32_t p_view_count, const Projection *p_camera_with_transforms, bool p_will_continue_color, bool p_will_continue_depth); + void debug_probes(RID p_framebuffer, const uint32_t p_view_count, const Projection *p_camera_with_transforms); void pre_process_gi(const Transform3D &p_transform, RenderDataRD *p_render_data); void render_region(Ref p_render_buffers, int p_region, const PagedArray &p_instances, float p_exposure_normalization); diff --git a/servers/rendering/renderer_rd/environment/sky.cpp b/servers/rendering/renderer_rd/environment/sky.cpp index ba72ab3b08b..41609dc74d3 100644 --- a/servers/rendering/renderer_rd/environment/sky.cpp +++ b/servers/rendering/renderer_rd/environment/sky.cpp @@ -1307,7 +1307,7 @@ void SkyRD::update_radiance_buffers(Ref p_render_buffers, Basis local_view = Basis::looking_at(view_normals[i], view_up[i]); RID texture_uniform_set = sky->get_textures(SKY_TEXTURE_SET_CUBEMAP_QUARTER_RES, sky_shader.default_shader_rd, p_render_buffers); - cubemap_draw_list = RD::get_singleton()->draw_list_begin(sky->reflection.layers[0].mipmaps[2].framebuffers[i], RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + cubemap_draw_list = RD::get_singleton()->draw_list_begin(sky->reflection.layers[0].mipmaps[2].framebuffers[i], RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); _render_sky(cubemap_draw_list, p_time, sky->reflection.layers[0].mipmaps[2].framebuffers[i], pipeline, material->uniform_set, texture_uniform_set, cm, local_view, p_global_pos, p_luminance_multiplier); RD::get_singleton()->draw_list_end(); } @@ -1328,7 +1328,7 @@ void SkyRD::update_radiance_buffers(Ref p_render_buffers, Basis local_view = Basis::looking_at(view_normals[i], view_up[i]); RID texture_uniform_set = sky->get_textures(SKY_TEXTURE_SET_CUBEMAP_HALF_RES, sky_shader.default_shader_rd, p_render_buffers); - cubemap_draw_list = RD::get_singleton()->draw_list_begin(sky->reflection.layers[0].mipmaps[1].framebuffers[i], RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + cubemap_draw_list = RD::get_singleton()->draw_list_begin(sky->reflection.layers[0].mipmaps[1].framebuffers[i], RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); _render_sky(cubemap_draw_list, p_time, sky->reflection.layers[0].mipmaps[1].framebuffers[i], pipeline, material->uniform_set, texture_uniform_set, cm, local_view, p_global_pos, p_luminance_multiplier); RD::get_singleton()->draw_list_end(); } @@ -1345,7 +1345,7 @@ void SkyRD::update_radiance_buffers(Ref p_render_buffers, Basis local_view = Basis::looking_at(view_normals[i], view_up[i]); RID texture_uniform_set = sky->get_textures(SKY_TEXTURE_SET_CUBEMAP, sky_shader.default_shader_rd, p_render_buffers); - cubemap_draw_list = RD::get_singleton()->draw_list_begin(sky->reflection.layers[0].mipmaps[0].framebuffers[i], RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + cubemap_draw_list = RD::get_singleton()->draw_list_begin(sky->reflection.layers[0].mipmaps[0].framebuffers[i], RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); _render_sky(cubemap_draw_list, p_time, sky->reflection.layers[0].mipmaps[0].framebuffers[i], pipeline, material->uniform_set, texture_uniform_set, cm, local_view, p_global_pos, p_luminance_multiplier); RD::get_singleton()->draw_list_end(); } @@ -1469,7 +1469,7 @@ void SkyRD::update_res_buffers(Ref p_render_buffers, RID p Vector clear_colors; clear_colors.push_back(Color(0.0, 0.0, 0.0)); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_DISCARD, clear_colors); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_DISCARD, clear_colors); _render_sky(draw_list, p_time, framebuffer, pipeline, material->uniform_set, texture_uniform_set, projection, sky_transform, sky_scene_state.cam_transform.origin, p_luminance_multiplier); RD::get_singleton()->draw_list_end(); } @@ -1488,7 +1488,7 @@ void SkyRD::update_res_buffers(Ref p_render_buffers, RID p Vector clear_colors; clear_colors.push_back(Color(0.0, 0.0, 0.0)); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_DISCARD, clear_colors); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_DISCARD, clear_colors); _render_sky(draw_list, p_time, framebuffer, pipeline, material->uniform_set, texture_uniform_set, projection, sky_transform, sky_scene_state.cam_transform.origin, p_luminance_multiplier); RD::get_singleton()->draw_list_end(); } diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp index 76e814e1ee6..0c0062662ac 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp @@ -576,31 +576,13 @@ void RenderForwardClustered::_render_list(RenderingDevice::DrawListID p_draw_lis } } -void RenderForwardClustered::_render_list_thread_function(uint32_t p_thread, RenderListParameters *p_params) { - uint32_t render_total = p_params->element_count; - uint32_t total_threads = WorkerThreadPool::get_singleton()->get_thread_count(); - uint32_t render_from = p_thread * render_total / total_threads; - uint32_t render_to = (p_thread + 1 == total_threads) ? render_total : ((p_thread + 1) * render_total / total_threads); - _render_list(thread_draw_lists[p_thread], p_params->framebuffer_format, p_params, render_from, render_to); -} - -void RenderForwardClustered::_render_list_with_threads(RenderListParameters *p_params, RID p_framebuffer, RD::InitialAction p_initial_color_action, RD::FinalAction p_final_color_action, RD::InitialAction p_initial_depth_action, RD::FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector &p_storage_textures) { +void RenderForwardClustered::_render_list_with_draw_list(RenderListParameters *p_params, RID p_framebuffer, RD::InitialAction p_initial_color_action, RD::FinalAction p_final_color_action, RD::InitialAction p_initial_depth_action, RD::FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region) { RD::FramebufferFormatID fb_format = RD::get_singleton()->framebuffer_get_format(p_framebuffer); p_params->framebuffer_format = fb_format; - if ((uint32_t)p_params->element_count > render_list_thread_threshold && false) { // secondary command buffers need more testing at this time - //multi threaded - thread_draw_lists.resize(WorkerThreadPool::get_singleton()->get_thread_count()); - RD::get_singleton()->draw_list_begin_split(p_framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures); - WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RenderForwardClustered::_render_list_thread_function, p_params, thread_draw_lists.size(), -1, true, SNAME("ForwardClusteredRenderList")); - WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); - RD::get_singleton()->draw_list_end(p_params->barrier); - } else { - //single threaded - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures); - _render_list(draw_list, fb_format, p_params, 0, p_params->element_count); - RD::get_singleton()->draw_list_end(p_params->barrier); - } + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region); + _render_list(draw_list, fb_format, p_params, 0, p_params->element_count); + RD::get_singleton()->draw_list_end(); } void RenderForwardClustered::_setup_environment(const RenderDataRD *p_render_data, bool p_no_fog, const Size2i &p_screen_size, bool p_flip_y, const Color &p_default_bg_color, bool p_opaque_render_buffers, bool p_apply_alpha_multiplier, bool p_pancake_shadows, int p_index) { @@ -683,7 +665,7 @@ void RenderForwardClustered::_setup_environment(const RenderDataRD *p_render_dat } } - RD::get_singleton()->buffer_update(scene_state.implementation_uniform_buffers[p_index], 0, sizeof(SceneState::UBO), &scene_state.ubo, RD::BARRIER_MASK_RASTER); + RD::get_singleton()->buffer_update(scene_state.implementation_uniform_buffers[p_index], 0, sizeof(SceneState::UBO), &scene_state.ubo); } void RenderForwardClustered::_update_instance_data_buffer(RenderListType p_render_list) { @@ -696,7 +678,7 @@ void RenderForwardClustered::_update_instance_data_buffer(RenderListType p_rende scene_state.instance_buffer[p_render_list] = RD::get_singleton()->storage_buffer_create(new_size * sizeof(SceneState::InstanceData)); scene_state.instance_buffer_size[p_render_list] = new_size; } - RD::get_singleton()->buffer_update(scene_state.instance_buffer[p_render_list], 0, sizeof(SceneState::InstanceData) * scene_state.instance_data[p_render_list].size(), scene_state.instance_data[p_render_list].ptr(), RD::BARRIER_MASK_RASTER); + RD::get_singleton()->buffer_update(scene_state.instance_buffer[p_render_list], 0, sizeof(SceneState::InstanceData) * scene_state.instance_data[p_render_list].size(), scene_state.instance_data[p_render_list].ptr()); } } void RenderForwardClustered::_fill_instance_data(RenderListType p_render_list, int *p_render_info, uint32_t p_offset, int32_t p_max_elements, bool p_update_buffer) { @@ -1097,7 +1079,7 @@ void RenderForwardClustered::_fill_render_list(RenderListType p_render_list, con } if (p_render_list == RENDER_LIST_OPAQUE && lightmap_captures_used) { - RD::get_singleton()->buffer_update(scene_state.lightmap_capture_buffer, 0, sizeof(LightmapCaptureData) * lightmap_captures_used, scene_state.lightmap_captures, RD::BARRIER_MASK_RASTER); + RD::get_singleton()->buffer_update(scene_state.lightmap_capture_buffer, 0, sizeof(LightmapCaptureData) * lightmap_captures_used, scene_state.lightmap_captures); } } @@ -1135,7 +1117,7 @@ void RenderForwardClustered::_setup_lightmaps(const RenderDataRD *p_render_data, scene_state.lightmaps_used++; } if (scene_state.lightmaps_used > 0) { - RD::get_singleton()->buffer_update(scene_state.lightmap_buffer, 0, sizeof(LightmapData) * scene_state.lightmaps_used, scene_state.lightmaps, RD::BARRIER_MASK_RASTER); + RD::get_singleton()->buffer_update(scene_state.lightmap_buffer, 0, sizeof(LightmapData) * scene_state.lightmaps_used, scene_state.lightmaps); } } @@ -1427,7 +1409,7 @@ void RenderForwardClustered::_pre_opaque_render(RenderDataRD *p_render_data, boo if (p_render_data->directional_shadows.size()) { //open the pass for directional shadows light_storage->update_directional_shadow_atlas(); - RD::get_singleton()->draw_list_begin(light_storage->direction_shadow_get_fb(), RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_CONTINUE); + RD::get_singleton()->draw_list_begin(light_storage->direction_shadow_get_fb(), RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE); RD::get_singleton()->draw_list_end(); } } @@ -1461,18 +1443,12 @@ void RenderForwardClustered::_pre_opaque_render(RenderDataRD *p_render_data, boo _render_shadow_process(); } - //start GI if (render_gi) { gi.process_gi(rb, p_normal_roughness_slices, p_voxel_gi_buffer, p_render_data->environment, p_render_data->scene_data->view_count, p_render_data->scene_data->view_projection, p_render_data->scene_data->view_eye_offset, p_render_data->scene_data->cam_transform, *p_render_data->voxel_gi_instances); } - //Do shadow rendering (in parallel with GI) if (render_shadows) { - _render_shadow_end(RD::BARRIER_MASK_NO_BARRIER); - } - - if (render_gi) { - RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); //use a later barrier + _render_shadow_end(); } if (rb_data.is_valid() && ss_effects) { @@ -1496,9 +1472,6 @@ void RenderForwardClustered::_pre_opaque_render(RenderDataRD *p_render_data, boo } } - //full barrier here, we need raster, transfer and compute and it depends from the previous work - RD::get_singleton()->barrier(RD::BARRIER_MASK_ALL_BARRIERS, RD::BARRIER_MASK_ALL_BARRIERS); - if (current_cluster_builder) { // Note: when rendering stereoscopic (multiview) we are using our combined frustum projection to create // our cluster data. We use reprojection in the shader to adjust for our left/right eye. @@ -1814,7 +1787,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co float sky_energy_multiplier = 1.0 / _render_buffers_get_luminance_multiplier(); Color clear_color; - bool keep_color = false; + bool load_color = false; if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_OVERDRAW) { clear_color = Color(0, 0, 0, 1); //in overdraw mode, BG should always be black @@ -1857,10 +1830,10 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co bool convert_to_linear = !RendererRD::TextureStorage::get_singleton()->render_target_is_using_hdr(rb->get_render_target()); copy_effects->copy_to_fb_rect(texture, color_only_framebuffer, Rect2i(), false, false, false, false, RID(), false, false, convert_to_linear); } - keep_color = true; + load_color = true; } break; case RS::ENV_BG_KEEP: { - keep_color = true; + load_color = true; } break; case RS::ENV_BG_CAMERA_FEED: { } break; @@ -1912,7 +1885,6 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co bool depth_pre_pass = bool(GLOBAL_GET("rendering/driver/depth_prepass/enable")) && depth_framebuffer.is_valid(); bool using_ssao = depth_pre_pass && !is_reflection_probe && p_render_data->environment.is_valid() && environment_get_ssao_enabled(p_render_data->environment); - bool continue_depth = false; if (depth_pre_pass) { //depth pre pass bool needs_pre_resolve = _needs_post_prepass_render(p_render_data, using_sdfgi || using_voxelgi); @@ -1923,7 +1895,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co } if (needs_pre_resolve) { //pre clear the depth framebuffer, as AMD (and maybe others?) use compute for it, and barrier other compute shaders. - RD::get_singleton()->draw_list_begin(depth_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_CONTINUE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_CONTINUE, depth_pass_clear); + RD::get_singleton()->draw_list_begin(depth_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, depth_pass_clear); RD::get_singleton()->draw_list_end(); //start compute processes here, so they run at the same time as depth pre-pass _post_prepass_render(p_render_data, using_sdfgi || using_voxelgi); @@ -1935,21 +1907,14 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co bool finish_depth = using_ssao || using_ssil || using_sdfgi || using_voxelgi; RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), reverse_cull, depth_pass_mode, 0, rb_data.is_null(), p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count); - _render_list_with_threads(&render_list_params, depth_framebuffer, needs_pre_resolve ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, needs_pre_resolve ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_CLEAR, finish_depth ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, needs_pre_resolve ? Vector() : depth_pass_clear); + _render_list_with_draw_list(&render_list_params, depth_framebuffer, needs_pre_resolve ? RD::INITIAL_ACTION_LOAD : RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, needs_pre_resolve ? RD::INITIAL_ACTION_LOAD : RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, needs_pre_resolve ? Vector() : depth_pass_clear); RD::get_singleton()->draw_command_end_label(); - if (needs_pre_resolve) { - _pre_resolve_render(p_render_data, using_sdfgi || using_voxelgi); - } - if (rb->get_msaa_3d() != RS::VIEWPORT_MSAA_DISABLED) { RENDER_TIMESTAMP("Resolve Depth Pre-Pass (MSAA)"); RD::get_singleton()->draw_command_begin_label("Resolve Depth Pre-Pass (MSAA)"); if (depth_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS || depth_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS_VOXEL_GI) { - if (needs_pre_resolve) { - RD::get_singleton()->barrier(RD::BARRIER_MASK_RASTER, RD::BARRIER_MASK_COMPUTE); - } for (uint32_t v = 0; v < rb->get_view_count(); v++) { resolve_effects->resolve_gi(rb->get_depth_msaa(v), rb_data->get_normal_roughness_msaa(v), using_voxelgi ? rb_data->get_voxelgi_msaa(v) : RID(), rb->get_depth_texture(v), rb_data->get_normal_roughness(v), using_voxelgi ? rb_data->get_voxelgi(v) : RID(), rb->get_internal_size(), texture_multisamples[rb->get_msaa_3d()]); } @@ -1960,8 +1925,6 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co } RD::get_singleton()->draw_command_end_label(); } - - continue_depth = !finish_depth; } RID normal_roughness_views[RendererSceneRender::MAX_RENDER_VIEWS]; @@ -1990,10 +1953,6 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co { bool render_motion_pass = !render_list[RENDER_LIST_MOTION].elements.is_empty(); - bool will_continue_color = (can_continue_color || draw_sky || draw_sky_fog_only || debug_voxelgis || debug_sdfgi_probes); - bool will_continue_depth = (can_continue_depth || draw_sky || draw_sky_fog_only || debug_voxelgis || debug_sdfgi_probes); - RD::FinalAction final_color_action = will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ; - RD::FinalAction final_depth_action = will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ; { Vector c; @@ -2014,7 +1973,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co uint32_t opaque_color_pass_flags = using_motion_pass ? (color_pass_flags & ~COLOR_PASS_FLAG_MOTION_VECTORS) : color_pass_flags; RID opaque_framebuffer = using_motion_pass ? rb_data->get_color_pass_fb(opaque_color_pass_flags) : color_framebuffer; RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), reverse_cull, PASS_MODE_COLOR, opaque_color_pass_flags, rb_data.is_null(), p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count); - _render_list_with_threads(&render_list_params, opaque_framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, render_motion_pass ? RD::FINAL_ACTION_CONTINUE : final_color_action, depth_pre_pass ? (continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP) : RD::INITIAL_ACTION_CLEAR, render_motion_pass ? RD::FINAL_ACTION_CONTINUE : final_depth_action, c, 1.0, 0); + _render_list_with_draw_list(&render_list_params, opaque_framebuffer, load_color ? RD::INITIAL_ACTION_LOAD : RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, depth_pre_pass ? RD::INITIAL_ACTION_LOAD : RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, c, 1.0, 0); } RD::get_singleton()->draw_command_end_label(); @@ -2022,7 +1981,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co if (using_motion_pass) { Vector motion_vector_clear_colors; motion_vector_clear_colors.push_back(Color(-1, -1, 0, 0)); - RD::get_singleton()->draw_list_begin(rb_data->get_velocity_only_fb(), RD::INITIAL_ACTION_CLEAR, render_motion_pass ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_CONTINUE, motion_vector_clear_colors); + RD::get_singleton()->draw_list_begin(rb_data->get_velocity_only_fb(), RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, motion_vector_clear_colors); RD::get_singleton()->draw_list_end(); } @@ -2034,33 +1993,17 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_MOTION, p_render_data, radiance_texture, samplers, true); RenderListParameters render_list_params(render_list[RENDER_LIST_MOTION].elements.ptr(), render_list[RENDER_LIST_MOTION].element_info.ptr(), render_list[RENDER_LIST_MOTION].elements.size(), reverse_cull, PASS_MODE_COLOR, color_pass_flags, rb_data.is_null(), p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count); - _render_list_with_threads(&render_list_params, color_framebuffer, RD::INITIAL_ACTION_CONTINUE, final_color_action, RD::INITIAL_ACTION_CONTINUE, final_depth_action); + _render_list_with_draw_list(&render_list_params, color_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE); RD::get_singleton()->draw_command_end_label(); - - if (will_continue_color) { - // Close the motion vectors framebuffer as it'll no longer be used. - RD::get_singleton()->draw_list_begin(rb_data->get_velocity_only_fb(), RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_CONTINUE); - RD::get_singleton()->draw_list_end(); - } - } - - if (will_continue_color && using_separate_specular) { - // Close the specular framebuffer as it'll no longer be used. - RD::get_singleton()->draw_list_begin(rb_data->get_specular_only_fb(), RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_CONTINUE); - RD::get_singleton()->draw_list_end(); } } if (debug_voxelgis) { - //debug voxelgis - bool will_continue_color = (can_continue_color || draw_sky || draw_sky_fog_only); - bool will_continue_depth = (can_continue_depth || draw_sky || draw_sky_fog_only); - Projection dc; dc.set_depth_correction(true); Projection cm = (dc * p_render_data->scene_data->cam_projection) * Projection(p_render_data->scene_data->cam_transform.affine_inverse()); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(color_only_framebuffer, RD::INITIAL_ACTION_CONTINUE, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(color_only_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE); RD::get_singleton()->draw_command_begin_label("Debug VoxelGIs"); for (int i = 0; i < (int)p_render_data->voxel_gi_instances->size(); i++) { gi.debug_voxel_gi((*p_render_data->voxel_gi_instances)[i], draw_list, color_only_framebuffer, cm, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_VOXEL_GI_LIGHTING, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_VOXEL_GI_EMISSION, 1.0); @@ -2070,24 +2013,20 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co } if (debug_sdfgi_probes) { - //debug sdfgi - bool will_continue_color = (can_continue_color || draw_sky || draw_sky_fog_only); - bool will_continue_depth = (can_continue_depth || draw_sky || draw_sky_fog_only); - Projection dc; dc.set_depth_correction(true); Projection cms[RendererSceneRender::MAX_RENDER_VIEWS]; for (uint32_t v = 0; v < p_render_data->scene_data->view_count; v++) { cms[v] = (dc * p_render_data->scene_data->view_projection[v]) * Projection(p_render_data->scene_data->cam_transform.affine_inverse()); } - _debug_sdfgi_probes(rb, color_only_framebuffer, p_render_data->scene_data->view_count, cms, will_continue_color, will_continue_depth); + _debug_sdfgi_probes(rb, color_only_framebuffer, p_render_data->scene_data->view_count, cms); } if (draw_sky || draw_sky_fog_only) { RENDER_TIMESTAMP("Render Sky"); RD::get_singleton()->draw_command_begin_label("Draw Sky"); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(color_only_framebuffer, RD::INITIAL_ACTION_CONTINUE, can_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, can_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(color_only_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE); sky.draw_sky(draw_list, rb, p_render_data->environment, color_only_framebuffer, time, sky_energy_multiplier); @@ -2146,7 +2085,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co RENDER_TIMESTAMP("Clear Separate Specular (Canvas Background Mode)"); Vector blank_clear_color; blank_clear_color.push_back(Color(0.0, 0.0, 0.0)); - RD::get_singleton()->draw_list_begin(rb_data->get_specular_only_fb(), RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, blank_clear_color); + RD::get_singleton()->draw_list_begin(rb_data->get_specular_only_fb(), RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD, blank_clear_color); RD::get_singleton()->draw_list_end(); } @@ -2187,7 +2126,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co RID alpha_framebuffer = rb_data.is_valid() ? rb_data->get_color_pass_fb(transparent_color_pass_flags) : color_only_framebuffer; RenderListParameters render_list_params(render_list[RENDER_LIST_ALPHA].elements.ptr(), render_list[RENDER_LIST_ALPHA].element_info.ptr(), render_list[RENDER_LIST_ALPHA].elements.size(), false, PASS_MODE_COLOR, transparent_color_pass_flags, rb_data.is_null(), p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count); - _render_list_with_threads(&render_list_params, alpha_framebuffer, can_continue_color ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, can_continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ); + _render_list_with_draw_list(&render_list_params, alpha_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE); } RD::get_singleton()->draw_command_end_label(); @@ -2226,7 +2165,9 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co exposure = luminance->get_current_luminance_buffer(rb); } + RD::get_singleton()->draw_command_begin_label("FSR2"); RENDER_TIMESTAMP("FSR2"); + for (uint32_t v = 0; v < rb->get_view_count(); v++) { real_t fov = p_render_data->scene_data->cam_projection.get_fov(); real_t aspect = p_render_data->scene_data->cam_projection.get_aspect(); @@ -2257,9 +2198,13 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co fsr2_effect->upscale(params); } + + RD::get_singleton()->draw_command_end_label(); } else if (using_taa) { + RD::get_singleton()->draw_command_begin_label("TAA"); RENDER_TIMESTAMP("TAA"); taa->process(rb, _render_buffers_get_color_format(), p_render_data->scene_data->z_near, p_render_data->scene_data->z_far); + RD::get_singleton()->draw_command_end_label(); } } @@ -2571,8 +2516,7 @@ void RenderForwardClustered::_render_shadow_append(RID p_framebuffer, const Page shadow_pass.lod_distance_multiplier = scene_data.lod_distance_multiplier; shadow_pass.framebuffer = p_framebuffer; - shadow_pass.initial_depth_action = p_begin ? (p_clear_region ? RD::INITIAL_ACTION_CLEAR_REGION : RD::INITIAL_ACTION_CLEAR) : (p_clear_region ? RD::INITIAL_ACTION_CLEAR_REGION_CONTINUE : RD::INITIAL_ACTION_CONTINUE); - shadow_pass.final_depth_action = p_end ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE; + shadow_pass.initial_depth_action = p_begin ? RD::INITIAL_ACTION_CLEAR : (p_clear_region ? RD::INITIAL_ACTION_CLEAR : RD::INITIAL_ACTION_LOAD); shadow_pass.rect = p_rect; scene_state.shadow_passes.push_back(shadow_pass); @@ -2591,17 +2535,14 @@ void RenderForwardClustered::_render_shadow_process() { RD::get_singleton()->draw_command_end_label(); } -void RenderForwardClustered::_render_shadow_end(uint32_t p_barrier) { +void RenderForwardClustered::_render_shadow_end() { RD::get_singleton()->draw_command_begin_label("Shadow Render"); for (SceneState::ShadowPass &shadow_pass : scene_state.shadow_passes) { - RenderListParameters render_list_parameters(render_list[RENDER_LIST_SECONDARY].elements.ptr() + shadow_pass.element_from, render_list[RENDER_LIST_SECONDARY].element_info.ptr() + shadow_pass.element_from, shadow_pass.element_count, shadow_pass.flip_cull, shadow_pass.pass_mode, 0, true, false, shadow_pass.rp_uniform_set, false, Vector2(), shadow_pass.lod_distance_multiplier, shadow_pass.screen_mesh_lod_threshold, 1, shadow_pass.element_from, RD::BARRIER_MASK_NO_BARRIER); - _render_list_with_threads(&render_list_parameters, shadow_pass.framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, shadow_pass.initial_depth_action, shadow_pass.final_depth_action, Vector(), 1.0, 0, shadow_pass.rect); + RenderListParameters render_list_parameters(render_list[RENDER_LIST_SECONDARY].elements.ptr() + shadow_pass.element_from, render_list[RENDER_LIST_SECONDARY].element_info.ptr() + shadow_pass.element_from, shadow_pass.element_count, shadow_pass.flip_cull, shadow_pass.pass_mode, 0, true, false, shadow_pass.rp_uniform_set, false, Vector2(), shadow_pass.lod_distance_multiplier, shadow_pass.screen_mesh_lod_threshold, 1, shadow_pass.element_from); + _render_list_with_draw_list(&render_list_parameters, shadow_pass.framebuffer, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD, shadow_pass.initial_depth_action, RD::FINAL_ACTION_STORE, Vector(), 1.0, 0, shadow_pass.rect); } - if (p_barrier != RD::BARRIER_MASK_NO_BARRIER) { - RD::get_singleton()->barrier(RD::BARRIER_MASK_RASTER, p_barrier); - } RD::get_singleton()->draw_command_end_label(); } @@ -2644,7 +2585,7 @@ void RenderForwardClustered::_render_particle_collider_heightfield(RID p_fb, con { //regular forward for now RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), false, pass_mode, 0, true, false, rp_uniform_set); - _render_list_with_threads(&render_list_params, p_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ); + _render_list_with_draw_list(&render_list_params, p_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE); } RD::get_singleton()->draw_command_end_label(); } @@ -2697,7 +2638,7 @@ void RenderForwardClustered::_render_material(const Transform3D &p_cam_transform Color(0, 0, 0, 0) }; - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, clear, 1.0, 0, p_region); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, clear, 1.0, 0, p_region); _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), &render_list_params, 0, render_list_params.element_count); RD::get_singleton()->draw_list_end(); } @@ -2747,7 +2688,7 @@ void RenderForwardClustered::_render_uv2(const PagedArraydraw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, clear, 1.0, 0, p_region); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, clear, 1.0, 0, p_region); const int uv_offset_count = 9; static const Vector2 uv_offsets[uv_offset_count] = { @@ -2803,13 +2744,6 @@ void RenderForwardClustered::_render_sdfgi(Ref p_render_bu Vector3 half_size = p_bounds.size * 0.5; Vector3 center = p_bounds.position + half_size; - Vector sbs = { - p_albedo_texture, - p_emission_texture, - p_emission_aniso_texture, - p_geom_facing_texture - }; - //print_line("re-render " + p_from + " - " + p_size + " bounds " + p_bounds); for (int i = 0; i < 3; i++) { scene_state.ubo.sdf_offset[i] = p_from[i]; @@ -2860,7 +2794,7 @@ void RenderForwardClustered::_render_sdfgi(Ref p_render_bu } RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, 0, true, false, rp_uniform_set, false); - _render_list_with_threads(&render_list_params, E->value, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, Vector(), 1.0, 0, Rect2(), sbs); + _render_list_with_draw_list(&render_list_params, E->value, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD, Vector(), 1.0, 0, Rect2()); } RD::get_singleton()->draw_command_end_label(); @@ -4280,8 +4214,6 @@ RenderForwardClustered::RenderForwardClustered() { best_fit_normal.shader.version_free(best_fit_normal.shader_version); } - render_list_thread_threshold = GLOBAL_GET("rendering/limits/forward_renderer/threaded_render_minimum_instances"); - _update_shader_quality_settings(); resolve_effects = memnew(RendererRD::Resolve()); diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h index 5ff3d9f52a6..5af213bc02f 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h @@ -209,10 +209,9 @@ class RenderForwardClustered : public RendererSceneRenderRD { float screen_mesh_lod_threshold = 0.0; RD::FramebufferFormatID framebuffer_format = 0; uint32_t element_offset = 0; - uint32_t barrier = RD::BARRIER_MASK_ALL_BARRIERS; bool use_directional_soft_shadow = false; - RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, RenderElementInfo *p_element_info, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, uint32_t p_color_pass_flags, bool p_no_gi, bool p_use_directional_soft_shadows, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), float p_lod_distance_multiplier = 0.0, float p_screen_mesh_lod_threshold = 0.0, uint32_t p_view_count = 1, uint32_t p_element_offset = 0, uint32_t p_barrier = RD::BARRIER_MASK_ALL_BARRIERS) { + RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, RenderElementInfo *p_element_info, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, uint32_t p_color_pass_flags, bool p_no_gi, bool p_use_directional_soft_shadows, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), float p_lod_distance_multiplier = 0.0, float p_screen_mesh_lod_threshold = 0.0, uint32_t p_view_count = 1, uint32_t p_element_offset = 0) { elements = p_elements; element_info = p_element_info; element_count = p_element_count; @@ -227,7 +226,6 @@ class RenderForwardClustered : public RendererSceneRenderRD { lod_distance_multiplier = p_lod_distance_multiplier; screen_mesh_lod_threshold = p_screen_mesh_lod_threshold; element_offset = p_element_offset; - barrier = p_barrier; use_directional_soft_shadow = p_use_directional_soft_shadows; } }; @@ -352,7 +350,6 @@ class RenderForwardClustered : public RendererSceneRenderRD { RID framebuffer; RD::InitialAction initial_depth_action; - RD::FinalAction final_depth_action; Rect2i rect; }; @@ -378,14 +375,8 @@ class RenderForwardClustered : public RendererSceneRenderRD { template _FORCE_INLINE_ void _render_list_template(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element); - void _render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element); - - LocalVector thread_draw_lists; - void _render_list_thread_function(uint32_t p_thread, RenderListParameters *p_params); - void _render_list_with_threads(RenderListParameters *p_params, RID p_framebuffer, RD::InitialAction p_initial_color_action, RD::FinalAction p_final_color_action, RD::InitialAction p_initial_depth_action, RD::FinalAction p_final_depth_action, const Vector &p_clear_color_values = Vector(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const Vector &p_storage_textures = Vector()); - - uint32_t render_list_thread_threshold = 500; + void _render_list_with_draw_list(RenderListParameters *p_params, RID p_framebuffer, RD::InitialAction p_initial_color_action, RD::FinalAction p_final_color_action, RD::InitialAction p_initial_depth_action, RD::FinalAction p_final_depth_action, const Vector &p_clear_color_values = Vector(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2()); void _update_instance_data_buffer(RenderListType p_render_list); void _fill_instance_data(RenderListType p_render_list, int *p_render_info = nullptr, uint32_t p_offset = 0, int32_t p_max_elements = -1, bool p_update_buffer = true); @@ -604,7 +595,7 @@ class RenderForwardClustered : public RendererSceneRenderRD { void _render_shadow_begin(); void _render_shadow_append(RID p_framebuffer, const PagedArray &p_instances, const Projection &p_projection, const Transform3D &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_reverse_cull_face, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_mesh_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true, RenderingMethod::RenderInfo *p_render_info = nullptr, const Size2i &p_viewport_size = Size2i(1, 1)); void _render_shadow_process(); - void _render_shadow_end(uint32_t p_barrier = RD::BARRIER_MASK_ALL_BARRIERS); + void _render_shadow_end(); /* Render Scene */ void _process_ssao(Ref p_render_buffers, RID p_environment, const RID *p_normal_buffers, const Projection *p_projections); diff --git a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp index bba1f620239..86852ce020f 100644 --- a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp @@ -446,7 +446,7 @@ void SceneShaderForwardClustered::MaterialData::set_next_pass(RID p_pass) { bool SceneShaderForwardClustered::MaterialData::update_parameters(const HashMap &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) { SceneShaderForwardClustered *shader_singleton = (SceneShaderForwardClustered *)SceneShaderForwardClustered::singleton; - return update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set, shader_singleton->shader.version_get_shader(shader_data->version, 0), RenderForwardClustered::MATERIAL_UNIFORM_SET, true, true, RD::BARRIER_MASK_RASTER); + return update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set, shader_singleton->shader.version_get_shader(shader_data->version, 0), RenderForwardClustered::MATERIAL_UNIFORM_SET, true, true); } SceneShaderForwardClustered::MaterialData::~MaterialData() { diff --git a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp index b7d7105daa0..da04e6f9389 100644 --- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp @@ -596,7 +596,7 @@ void RenderForwardMobile::_setup_lightmaps(const RenderDataRD *p_render_data, co scene_state.lightmaps_used++; } if (scene_state.lightmaps_used > 0) { - RD::get_singleton()->buffer_update(scene_state.lightmap_buffer, 0, sizeof(LightmapData) * scene_state.lightmaps_used, scene_state.lightmaps, RD::BARRIER_MASK_RASTER); + RD::get_singleton()->buffer_update(scene_state.lightmap_buffer, 0, sizeof(LightmapData) * scene_state.lightmaps_used, scene_state.lightmaps); } } @@ -631,7 +631,7 @@ void RenderForwardMobile::_pre_opaque_render(RenderDataRD *p_render_data) { if (p_render_data->directional_shadows.size()) { //open the pass for directional shadows light_storage->update_directional_shadow_atlas(); - RD::get_singleton()->draw_list_begin(light_storage->direction_shadow_get_fb(), RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_CONTINUE); + RD::get_singleton()->draw_list_begin(light_storage->direction_shadow_get_fb(), RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE); RD::get_singleton()->draw_list_end(); } } @@ -655,11 +655,8 @@ void RenderForwardMobile::_pre_opaque_render(RenderDataRD *p_render_data) { _render_shadow_process(); - _render_shadow_end(RD::BARRIER_MASK_NO_BARRIER); + _render_shadow_end(); } - - //full barrier here, we need raster, transfer and compute and it depends from the previous work - RD::get_singleton()->barrier(RD::BARRIER_MASK_ALL_BARRIERS, RD::BARRIER_MASK_ALL_BARRIERS); } void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color &p_default_bg_color) { @@ -811,7 +808,7 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color float sky_energy_multiplier = inverse_luminance_multiplier; Color clear_color = p_default_bg_color; - bool keep_color = false; + bool load_color = false; bool copy_canvas = false; if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_OVERDRAW) { @@ -855,7 +852,7 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color } } break; case RS::ENV_BG_KEEP: { - keep_color = true; + load_color = true; } break; case RS::ENV_BG_CAMERA_FEED: { } break; @@ -955,6 +952,7 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color if (rb_data.is_valid()) { cc.a = 0; // For transparent viewport backgrounds. } + c.push_back(cc); // Our render buffer. if (rb_data.is_valid()) { if (p_render_data->render_buffers->get_msaa_3d() != RS::VIEWPORT_MSAA_DISABLED) { @@ -966,7 +964,7 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color } } - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, merge_transparent_pass ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, RD::INITIAL_ACTION_CLEAR, merge_transparent_pass ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, c, 1.0, 0); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, load_color ? RD::INITIAL_ACTION_LOAD : RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, c, 1.0, 0); RD::FramebufferFormatID fb_format = RD::get_singleton()->framebuffer_get_format(framebuffer); if (copy_canvas) { @@ -1026,12 +1024,12 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color RD::get_singleton()->draw_command_end_label(); // Render 3D Pass / Render Reflection Probe Pass - RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_ALL_BARRIERS); + RD::get_singleton()->draw_list_end(); } else { // We're done with our subpasses so end our container pass // note, if MSAA is used we should get an automatic resolve here - RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_ALL_BARRIERS); + RD::get_singleton()->draw_list_end(); RD::get_singleton()->draw_command_end_label(); // Render 3D Pass / Render Reflection Probe Pass @@ -1062,9 +1060,9 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color render_list_params.framebuffer_format = fb_format; render_list_params.subpass = RD::get_singleton()->draw_list_get_current_pass(); // Should now always be 0. - draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ); + draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE); _render_list(draw_list, fb_format, &render_list_params, 0, render_list_params.element_count); - RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_ALL_BARRIERS); + RD::get_singleton()->draw_list_end(); RD::get_singleton()->draw_command_end_label(); // Render Transparent Pass } @@ -1248,15 +1246,15 @@ void RenderForwardMobile::_render_shadow_pass(RID p_light, RID p_shadow_atlas, i _render_shadow_append(render_fb, p_instances, light_projection, light_transform, zfar, 0, 0, false, false, use_pancake, p_camera_plane, p_lod_distance_multiplier, p_screen_mesh_lod_threshold, Rect2(), false, true, true, true, p_render_info); if (finalize_cubemap) { _render_shadow_process(); - _render_shadow_end(RD::BARRIER_MASK_FRAGMENT); + _render_shadow_end(); // reblit Rect2 atlas_rect_norm = atlas_rect; atlas_rect_norm.position /= float(atlas_size); atlas_rect_norm.size /= float(atlas_size); - copy_effects->copy_cubemap_to_dp(render_texture, atlas_fb, atlas_rect_norm, atlas_rect.size, light_projection.get_z_near(), light_projection.get_z_far(), false, RD::BARRIER_MASK_NO_BARRIER); + copy_effects->copy_cubemap_to_dp(render_texture, atlas_fb, atlas_rect_norm, atlas_rect.size, light_projection.get_z_near(), light_projection.get_z_far(), false); atlas_rect_norm.position += Vector2(dual_paraboloid_offset) * atlas_rect_norm.size; - copy_effects->copy_cubemap_to_dp(render_texture, atlas_fb, atlas_rect_norm, atlas_rect.size, light_projection.get_z_near(), light_projection.get_z_far(), true, RD::BARRIER_MASK_NO_BARRIER); + copy_effects->copy_cubemap_to_dp(render_texture, atlas_fb, atlas_rect_norm, atlas_rect.size, light_projection.get_z_near(), light_projection.get_z_far(), true); //restore transform so it can be properly used light_storage->light_instance_set_shadow_transform(p_light, Projection(), light_storage->light_instance_get_base_transform(p_light), zfar, 0, 0, 0); @@ -1337,8 +1335,7 @@ void RenderForwardMobile::_render_shadow_append(RID p_framebuffer, const PagedAr shadow_pass.lod_distance_multiplier = scene_data.lod_distance_multiplier; shadow_pass.framebuffer = p_framebuffer; - shadow_pass.initial_depth_action = p_begin ? (p_clear_region ? RD::INITIAL_ACTION_CLEAR_REGION : RD::INITIAL_ACTION_CLEAR) : (p_clear_region ? RD::INITIAL_ACTION_CLEAR_REGION_CONTINUE : RD::INITIAL_ACTION_CONTINUE); - shadow_pass.final_depth_action = p_end ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE; + shadow_pass.initial_depth_action = p_begin ? RD::INITIAL_ACTION_CLEAR : (p_clear_region ? RD::INITIAL_ACTION_CLEAR : RD::INITIAL_ACTION_LOAD); shadow_pass.rect = p_rect; scene_state.shadow_passes.push_back(shadow_pass); @@ -1357,17 +1354,14 @@ void RenderForwardMobile::_render_shadow_process() { RD::get_singleton()->draw_command_end_label(); } -void RenderForwardMobile::_render_shadow_end(uint32_t p_barrier) { +void RenderForwardMobile::_render_shadow_end() { RD::get_singleton()->draw_command_begin_label("Shadow Render"); for (SceneState::ShadowPass &shadow_pass : scene_state.shadow_passes) { - RenderListParameters render_list_parameters(render_list[RENDER_LIST_SECONDARY].elements.ptr() + shadow_pass.element_from, render_list[RENDER_LIST_SECONDARY].element_info.ptr() + shadow_pass.element_from, shadow_pass.element_count, shadow_pass.flip_cull, shadow_pass.pass_mode, shadow_pass.rp_uniform_set, 0, false, Vector2(), shadow_pass.lod_distance_multiplier, shadow_pass.screen_mesh_lod_threshold, 1, shadow_pass.element_from, RD::BARRIER_MASK_NO_BARRIER); - _render_list_with_threads(&render_list_parameters, shadow_pass.framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, shadow_pass.initial_depth_action, shadow_pass.final_depth_action, Vector(), 1.0, 0, shadow_pass.rect); + RenderListParameters render_list_parameters(render_list[RENDER_LIST_SECONDARY].elements.ptr() + shadow_pass.element_from, render_list[RENDER_LIST_SECONDARY].element_info.ptr() + shadow_pass.element_from, shadow_pass.element_count, shadow_pass.flip_cull, shadow_pass.pass_mode, shadow_pass.rp_uniform_set, 0, false, Vector2(), shadow_pass.lod_distance_multiplier, shadow_pass.screen_mesh_lod_threshold, 1, shadow_pass.element_from); + _render_list_with_draw_list(&render_list_parameters, shadow_pass.framebuffer, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD, shadow_pass.initial_depth_action, RD::FINAL_ACTION_STORE, Vector(), 1.0, 0, shadow_pass.rect); } - if (p_barrier != RD::BARRIER_MASK_NO_BARRIER) { - RD::get_singleton()->barrier(RD::BARRIER_MASK_FRAGMENT, p_barrier); - } RD::get_singleton()->draw_command_end_label(); } @@ -1416,7 +1410,7 @@ void RenderForwardMobile::_render_material(const Transform3D &p_cam_transform, c Color(0, 0, 0, 0), Color(0, 0, 0, 0) }; - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, clear, 1.0, 0, p_region); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, clear, 1.0, 0, p_region); _render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), &render_list_params, 0, render_list_params.element_count); RD::get_singleton()->draw_list_end(); } @@ -1462,7 +1456,7 @@ void RenderForwardMobile::_render_uv2(const PagedArray Color(0, 0, 0, 0) }; - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, clear, 1.0, 0, p_region); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, clear, 1.0, 0, p_region); const int uv_offset_count = 9; static const Vector2 uv_offsets[uv_offset_count] = { @@ -1535,7 +1529,7 @@ void RenderForwardMobile::_render_particle_collider_heightfield(RID p_fb, const { //regular forward for now RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), false, pass_mode, rp_uniform_set, 0); - _render_list_with_threads(&render_list_params, p_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ); + _render_list_with_draw_list(&render_list_params, p_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE); } RD::get_singleton()->draw_command_end_label(); } @@ -1672,7 +1666,7 @@ void RenderForwardMobile::_update_instance_data_buffer(RenderListType p_render_l scene_state.instance_buffer[p_render_list] = RD::get_singleton()->storage_buffer_create(new_size * sizeof(SceneState::InstanceData)); scene_state.instance_buffer_size[p_render_list] = new_size; } - RD::get_singleton()->buffer_update(scene_state.instance_buffer[p_render_list], 0, sizeof(SceneState::InstanceData) * scene_state.instance_data[p_render_list].size(), scene_state.instance_data[p_render_list].ptr(), RD::BARRIER_MASK_RASTER); + RD::get_singleton()->buffer_update(scene_state.instance_buffer[p_render_list], 0, sizeof(SceneState::InstanceData) * scene_state.instance_data[p_render_list].size(), scene_state.instance_data[p_render_list].ptr()); } } @@ -1991,32 +1985,13 @@ void RenderForwardMobile::_render_list(RenderingDevice::DrawListID p_draw_list, } } -void RenderForwardMobile::_render_list_thread_function(uint32_t p_thread, RenderListParameters *p_params) { - uint32_t render_total = p_params->element_count; - uint32_t total_threads = WorkerThreadPool::get_singleton()->get_thread_count(); - uint32_t render_from = p_thread * render_total / total_threads; - uint32_t render_to = (p_thread + 1 == total_threads) ? render_total : ((p_thread + 1) * render_total / total_threads); - _render_list(thread_draw_lists[p_thread], p_params->framebuffer_format, p_params, render_from, render_to); -} - -void RenderForwardMobile::_render_list_with_threads(RenderListParameters *p_params, RID p_framebuffer, RD::InitialAction p_initial_color_action, RD::FinalAction p_final_color_action, RD::InitialAction p_initial_depth_action, RD::FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector &p_storage_textures) { +void RenderForwardMobile::_render_list_with_draw_list(RenderListParameters *p_params, RID p_framebuffer, RD::InitialAction p_initial_color_action, RD::FinalAction p_final_color_action, RD::InitialAction p_initial_depth_action, RD::FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region) { RD::FramebufferFormatID fb_format = RD::get_singleton()->framebuffer_get_format(p_framebuffer); p_params->framebuffer_format = fb_format; - if ((uint32_t)p_params->element_count > render_list_thread_threshold && false) { // secondary command buffers need more testing at this time - //multi threaded - thread_draw_lists.resize(WorkerThreadPool::get_singleton()->get_thread_count()); - RD::get_singleton()->draw_list_begin_split(p_framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures); - WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RenderForwardMobile::_render_list_thread_function, p_params, thread_draw_lists.size(), -1, true, SNAME("ForwardMobileRenderSubpass")); - WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); - - RD::get_singleton()->draw_list_end(p_params->barrier); - } else { - //single threaded - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures); - _render_list(draw_list, fb_format, p_params, 0, p_params->element_count); - RD::get_singleton()->draw_list_end(p_params->barrier); - } + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region); + _render_list(draw_list, fb_format, p_params, 0, p_params->element_count); + RD::get_singleton()->draw_list_end(); } template @@ -2813,9 +2788,6 @@ RenderForwardMobile::RenderForwardMobile() { scene_shader.init(defines); - // !BAS! maybe we need a mobile version of this setting? - render_list_thread_threshold = GLOBAL_GET("rendering/limits/forward_renderer/threaded_render_minimum_instances"); - _update_shader_quality_settings(); } diff --git a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h index da96ca2124e..f1f6bb3db4f 100644 --- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h +++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h @@ -155,10 +155,9 @@ private: float screen_mesh_lod_threshold = 0.0; RD::FramebufferFormatID framebuffer_format = 0; uint32_t element_offset = 0; - uint32_t barrier = RD::BARRIER_MASK_ALL_BARRIERS; uint32_t subpass = 0; - RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, RenderElementInfo *p_element_info, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, RID p_render_pass_uniform_set, uint32_t p_spec_constant_base_flags = 0, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), float p_lod_distance_multiplier = 0.0, float p_screen_mesh_lod_threshold = 0.0, uint32_t p_view_count = 1, uint32_t p_element_offset = 0, uint32_t p_barrier = RD::BARRIER_MASK_ALL_BARRIERS) { + RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, RenderElementInfo *p_element_info, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, RID p_render_pass_uniform_set, uint32_t p_spec_constant_base_flags = 0, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), float p_lod_distance_multiplier = 0.0, float p_screen_mesh_lod_threshold = 0.0, uint32_t p_view_count = 1, uint32_t p_element_offset = 0) { elements = p_elements; element_info = p_element_info; element_count = p_element_count; @@ -172,7 +171,6 @@ private: lod_distance_multiplier = p_lod_distance_multiplier; screen_mesh_lod_threshold = p_screen_mesh_lod_threshold; element_offset = p_element_offset; - barrier = p_barrier; spec_constant_base_flags = p_spec_constant_base_flags; } }; @@ -183,7 +181,7 @@ private: void _render_shadow_begin(); void _render_shadow_append(RID p_framebuffer, const PagedArray &p_instances, const Projection &p_projection, const Transform3D &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_mesh_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true, RenderingMethod::RenderInfo *p_render_info = nullptr); void _render_shadow_process(); - void _render_shadow_end(uint32_t p_barrier = RD::BARRIER_MASK_ALL_BARRIERS); + void _render_shadow_end(); /* Render Scene */ @@ -277,7 +275,6 @@ private: RID framebuffer; RD::InitialAction initial_depth_action; - RD::FinalAction final_depth_action; Rect2i rect; }; @@ -351,14 +348,8 @@ private: template _FORCE_INLINE_ void _render_list_template(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element); - void _render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element); - - LocalVector thread_draw_lists; - void _render_list_thread_function(uint32_t p_thread, RenderListParameters *p_params); - void _render_list_with_threads(RenderListParameters *p_params, RID p_framebuffer, RD::InitialAction p_initial_color_action, RD::FinalAction p_final_color_action, RD::InitialAction p_initial_depth_action, RD::FinalAction p_final_depth_action, const Vector &p_clear_color_values = Vector(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const Vector &p_storage_textures = Vector()); - - uint32_t render_list_thread_threshold = 500; + void _render_list_with_draw_list(RenderListParameters *p_params, RID p_framebuffer, RD::InitialAction p_initial_color_action, RD::FinalAction p_final_color_action, RD::InitialAction p_initial_depth_action, RD::FinalAction p_final_depth_action, const Vector &p_clear_color_values = Vector(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2()); RenderList render_list[RENDER_LIST_MAX]; diff --git a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp index f1cec0e07cf..043cdbc8e5c 100644 --- a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp @@ -398,7 +398,7 @@ void SceneShaderForwardMobile::MaterialData::set_next_pass(RID p_pass) { bool SceneShaderForwardMobile::MaterialData::update_parameters(const HashMap &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) { SceneShaderForwardMobile *shader_singleton = (SceneShaderForwardMobile *)SceneShaderForwardMobile::singleton; - return update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set, shader_singleton->shader.version_get_shader(shader_data->version, 0), RenderForwardMobile::MATERIAL_UNIFORM_SET, true, true, RD::BARRIER_MASK_RASTER); + return update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set, shader_singleton->shader.version_get_shader(shader_data->version, 0), RenderForwardMobile::MATERIAL_UNIFORM_SET, true, true); } SceneShaderForwardMobile::MaterialData::~MaterialData() { diff --git a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp index 657628111ae..28fccbaf882 100644 --- a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp @@ -1177,7 +1177,7 @@ void RendererCanvasRenderRD::_render_items(RID p_to_render_target, int p_item_co RD::FramebufferFormatID fb_format = RD::get_singleton()->framebuffer_get_format(framebuffer); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, clear ? RD::INITIAL_ACTION_CLEAR : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD, clear_colors); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, clear ? RD::INITIAL_ACTION_CLEAR : RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD, clear_colors); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, fb_uniform_set, BASE_UNIFORM_SET); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, state.default_transforms_uniform_set, TRANSFORMS_UNIFORM_SET); @@ -1721,8 +1721,8 @@ void RendererCanvasRenderRD::light_update_shadow(RID p_rid, int p_shadow_index, //light.basis.scale(Vector3(to_light.elements[0].length(),to_light.elements[1].length(),1)); Rect2i rect((state.shadow_texture_size / 4) * i, p_shadow_index * 2, (state.shadow_texture_size / 4), 2); - RD::InitialAction initial_action = i == 0 ? RD::INITIAL_ACTION_CLEAR_REGION : RD::INITIAL_ACTION_CLEAR_REGION_CONTINUE; - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(state.shadow_fb, initial_action, i != 3 ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, initial_action, RD::FINAL_ACTION_DISCARD, cc, 1.0, 0, rect); + RD::InitialAction initial_action = i == 0 ? RD::INITIAL_ACTION_CLEAR : RD::INITIAL_ACTION_LOAD; + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(state.shadow_fb, initial_action, RD::FINAL_ACTION_STORE, initial_action, RD::FINAL_ACTION_DISCARD, cc, 1.0, 0, rect); Projection projection; { @@ -1811,7 +1811,7 @@ void RendererCanvasRenderRD::light_update_directional_shadow(RID p_rid, int p_sh cc.push_back(Color(1, 1, 1, 1)); Rect2i rect(0, p_shadow_index * 2, state.shadow_texture_size, 2); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(state.shadow_fb, RD::INITIAL_ACTION_CLEAR_REGION, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR_REGION, RD::FINAL_ACTION_DISCARD, cc, 1.0, 0, rect); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(state.shadow_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_DISCARD, cc, 1.0, 0, rect); Projection projection; projection.set_orthogonal(-half_size, half_size, -0.5, 0.5, 0.0, distance); @@ -1881,7 +1881,7 @@ void RendererCanvasRenderRD::render_sdf(RID p_render_target, LightOccluderInstan Vector cc; cc.push_back(Color(0, 0, 0, 0)); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_DISCARD, cc); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_DISCARD, cc); Projection projection; @@ -2371,8 +2371,8 @@ RendererRD::MaterialStorage::ShaderData *RendererCanvasRenderRD::_create_shader_ bool RendererCanvasRenderRD::CanvasMaterialData::update_parameters(const HashMap &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) { RendererCanvasRenderRD *canvas_singleton = static_cast(RendererCanvasRender::singleton); - bool uniform_set_changed = update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set, canvas_singleton->shader.canvas_shader.version_get_shader(shader_data->version, 0), MATERIAL_UNIFORM_SET, true, false, RD::BARRIER_MASK_ALL_BARRIERS); - bool uniform_set_srgb_changed = update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set_srgb, canvas_singleton->shader.canvas_shader.version_get_shader(shader_data->version, 0), MATERIAL_UNIFORM_SET, false, false, RD::BARRIER_MASK_ALL_BARRIERS); + bool uniform_set_changed = update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set, canvas_singleton->shader.canvas_shader.version_get_shader(shader_data->version, 0), MATERIAL_UNIFORM_SET, true, false); + bool uniform_set_srgb_changed = update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set_srgb, canvas_singleton->shader.canvas_shader.version_get_shader(shader_data->version, 0), MATERIAL_UNIFORM_SET, false, false); return uniform_set_changed || uniform_set_srgb_changed; } diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp index 30c9b97aa42..3b05431f4a3 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp @@ -220,7 +220,7 @@ void RendererSceneRenderRD::voxel_gi_update(RID p_probe, bool p_update_light_ins gi.voxel_gi_update(p_probe, p_update_light_instances, p_light_instances, p_dynamic_objects); } -void RendererSceneRenderRD::_debug_sdfgi_probes(Ref p_render_buffers, RID p_framebuffer, const uint32_t p_view_count, const Projection *p_camera_with_transforms, bool p_will_continue_color, bool p_will_continue_depth) { +void RendererSceneRenderRD::_debug_sdfgi_probes(Ref p_render_buffers, RID p_framebuffer, const uint32_t p_view_count, const Projection *p_camera_with_transforms) { ERR_FAIL_COND(p_render_buffers.is_null()); if (!p_render_buffers->has_custom_data(RB_SCOPE_SDFGI)) { @@ -229,7 +229,7 @@ void RendererSceneRenderRD::_debug_sdfgi_probes(Ref p_rend Ref sdfgi = p_render_buffers->get_custom_data(RB_SCOPE_SDFGI); - sdfgi->debug_probes(p_framebuffer, p_view_count, p_camera_with_transforms, p_will_continue_color, p_will_continue_depth); + sdfgi->debug_probes(p_framebuffer, p_view_count, p_camera_with_transforms); } //////////////////////////////// @@ -987,14 +987,6 @@ void RendererSceneRenderRD::_post_prepass_render(RenderDataRD *p_render_data, bo } } -void RendererSceneRenderRD::_pre_resolve_render(RenderDataRD *p_render_data, bool p_use_gi) { - if (p_render_data->render_buffers.is_valid()) { - if (p_use_gi) { - RD::get_singleton()->compute_list_end(); - } - } -} - void RendererSceneRenderRD::render_scene(const Ref &p_render_buffers, const CameraData *p_camera_data, const CameraData *p_prev_camera_data, const PagedArray &p_instances, const PagedArray &p_lights, const PagedArray &p_reflection_probes, const PagedArray &p_voxel_gi_instances, const PagedArray &p_decals, const PagedArray &p_lightmaps, const PagedArray &p_fog_volumes, RID p_environment, RID p_camera_attributes, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data, RenderingMethod::RenderInfo *r_render_info) { RendererRD::LightStorage *light_storage = RendererRD::LightStorage::get_singleton(); RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton(); diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.h b/servers/rendering/renderer_rd/renderer_scene_render_rd.h index 211d191039d..4811ae3b44e 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.h @@ -137,14 +137,13 @@ protected: virtual void _render_sdfgi(Ref p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture, float p_exposure_normalization) = 0; virtual void _render_particle_collider_heightfield(RID p_fb, const Transform3D &p_cam_transform, const Projection &p_cam_projection, const PagedArray &p_instances) = 0; - void _debug_sdfgi_probes(Ref p_render_buffers, RID p_framebuffer, uint32_t p_view_count, const Projection *p_camera_with_transforms, bool p_will_continue_color, bool p_will_continue_depth); + void _debug_sdfgi_probes(Ref p_render_buffers, RID p_framebuffer, uint32_t p_view_count, const Projection *p_camera_with_transforms); virtual RID _render_buffers_get_normal_texture(Ref p_render_buffers) = 0; virtual RID _render_buffers_get_velocity_texture(Ref p_render_buffers) = 0; bool _needs_post_prepass_render(RenderDataRD *p_render_data, bool p_use_gi); void _post_prepass_render(RenderDataRD *p_render_data, bool p_use_gi); - void _pre_resolve_render(RenderDataRD *p_render_data, bool p_use_gi); void _render_buffers_copy_screen_texture(const RenderDataRD *p_render_data); void _render_buffers_copy_depth_texture(const RenderDataRD *p_render_data); diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/SCsub b/servers/rendering/renderer_rd/shaders/effects/fsr2/SCsub index f06a2d86e24..5b8bbc343b7 100644 --- a/servers/rendering/renderer_rd/shaders/effects/fsr2/SCsub +++ b/servers/rendering/renderer_rd/shaders/effects/fsr2/SCsub @@ -6,6 +6,11 @@ if "RD_GLSL" in env["BUILDERS"]: # find all include files gl_include_files = [str(f) for f in Glob("*_inc.glsl")] + [str(f) for f in Glob("../*_inc.glsl")] + # Add all FSR2 shader and header files. + fsr2_dir = "#thirdparty/amd-fsr2/shaders" + gl_include_files += [str(f) for f in Glob(fsr2_dir + "/*.h")] + gl_include_files += [str(f) for f in Glob(fsr2_dir + "/*.glsl")] + # find all shader code(all glsl files excluding our include files) glsl_files = [str(f) for f in Glob("*.glsl") if str(f) not in gl_include_files] diff --git a/servers/rendering/renderer_rd/storage_rd/light_storage.cpp b/servers/rendering/renderer_rd/storage_rd/light_storage.cpp index 5f4bf6c8ed4..21c6425a878 100644 --- a/servers/rendering/renderer_rd/storage_rd/light_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/light_storage.cpp @@ -996,15 +996,15 @@ void LightStorage::update_light_buffers(RenderDataRD *p_render_data, const Paged //update without barriers if (omni_light_count) { - RD::get_singleton()->buffer_update(omni_light_buffer, 0, sizeof(LightData) * omni_light_count, omni_lights, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->buffer_update(omni_light_buffer, 0, sizeof(LightData) * omni_light_count, omni_lights); } if (spot_light_count) { - RD::get_singleton()->buffer_update(spot_light_buffer, 0, sizeof(LightData) * spot_light_count, spot_lights, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->buffer_update(spot_light_buffer, 0, sizeof(LightData) * spot_light_count, spot_lights); } if (r_directional_light_count) { - RD::get_singleton()->buffer_update(directional_light_buffer, 0, sizeof(DirectionalLightData) * r_directional_light_count, directional_lights, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->buffer_update(directional_light_buffer, 0, sizeof(DirectionalLightData) * r_directional_light_count, directional_lights); } } @@ -1722,7 +1722,7 @@ void LightStorage::update_reflection_probe_buffer(RenderDataRD *p_render_data, c } if (reflection_count) { - RD::get_singleton()->buffer_update(reflection_buffer, 0, reflection_count * sizeof(ReflectionData), reflections, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->buffer_update(reflection_buffer, 0, reflection_count * sizeof(ReflectionData), reflections); } } diff --git a/servers/rendering/renderer_rd/storage_rd/material_storage.cpp b/servers/rendering/renderer_rd/storage_rd/material_storage.cpp index 2e8c9d7f8ef..1c3076b128f 100644 --- a/servers/rendering/renderer_rd/storage_rd/material_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/material_storage.cpp @@ -986,7 +986,7 @@ void MaterialStorage::MaterialData::free_parameters_uniform_set(RID p_uniform_se } } -bool MaterialStorage::MaterialData::update_parameters_uniform_set(const HashMap &p_parameters, bool p_uniform_dirty, bool p_textures_dirty, const HashMap &p_uniforms, const uint32_t *p_uniform_offsets, const Vector &p_texture_uniforms, const HashMap> &p_default_texture_params, uint32_t p_ubo_size, RID &uniform_set, RID p_shader, uint32_t p_shader_uniform_set, bool p_use_linear_color, bool p_3d_material, uint32_t p_barrier) { +bool MaterialStorage::MaterialData::update_parameters_uniform_set(const HashMap &p_parameters, bool p_uniform_dirty, bool p_textures_dirty, const HashMap &p_uniforms, const uint32_t *p_uniform_offsets, const Vector &p_texture_uniforms, const HashMap> &p_default_texture_params, uint32_t p_ubo_size, RID &uniform_set, RID p_shader, uint32_t p_shader_uniform_set, bool p_use_linear_color, bool p_3d_material) { if ((uint32_t)ubo_data.size() != p_ubo_size) { p_uniform_dirty = true; if (uniform_buffer.is_valid()) { @@ -1011,7 +1011,7 @@ bool MaterialStorage::MaterialData::update_parameters_uniform_set(const HashMap< //check whether buffer changed if (p_uniform_dirty && ubo_data.size()) { update_uniform_buffer(p_uniforms, p_uniform_offsets, p_parameters, ubo_data.ptrw(), ubo_data.size(), p_use_linear_color); - RD::get_singleton()->buffer_update(uniform_buffer, 0, ubo_data.size(), ubo_data.ptrw(), p_barrier); + RD::get_singleton()->buffer_update(uniform_buffer, 0, ubo_data.size(), ubo_data.ptrw()); } uint32_t tex_uniform_count = 0U; diff --git a/servers/rendering/renderer_rd/storage_rd/material_storage.h b/servers/rendering/renderer_rd/storage_rd/material_storage.h index 403fd286b46..fe769a778d5 100644 --- a/servers/rendering/renderer_rd/storage_rd/material_storage.h +++ b/servers/rendering/renderer_rd/storage_rd/material_storage.h @@ -87,7 +87,7 @@ public: virtual ~MaterialData(); //to be used internally by update_parameters, in the most common configuration of material parameters - bool update_parameters_uniform_set(const HashMap &p_parameters, bool p_uniform_dirty, bool p_textures_dirty, const HashMap &p_uniforms, const uint32_t *p_uniform_offsets, const Vector &p_texture_uniforms, const HashMap> &p_default_texture_params, uint32_t p_ubo_size, RID &r_uniform_set, RID p_shader, uint32_t p_shader_uniform_set, bool p_use_linear_color, bool p_3d_material, uint32_t p_barrier = RD::BARRIER_MASK_ALL_BARRIERS); + bool update_parameters_uniform_set(const HashMap &p_parameters, bool p_uniform_dirty, bool p_textures_dirty, const HashMap &p_uniforms, const uint32_t *p_uniform_offsets, const Vector &p_texture_uniforms, const HashMap> &p_default_texture_params, uint32_t p_ubo_size, RID &r_uniform_set, RID p_shader, uint32_t p_shader_uniform_set, bool p_use_linear_color, bool p_3d_material); void free_parameters_uniform_set(RID p_uniform_set); private: diff --git a/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp b/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp index 01ee4f3c013..b97ce2d0066 100644 --- a/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp @@ -1458,8 +1458,7 @@ void MeshStorage::_multimesh_enable_motion_vectors(MultiMesh *multimesh) { if (multimesh->buffer_set && multimesh->data_cache.is_empty()) { // If the buffer was set but there's no data cached in the CPU, we copy the buffer directly on the GPU. - RD::get_singleton()->barrier(); - RD::get_singleton()->buffer_copy(multimesh->buffer, new_buffer, 0, 0, buffer_size, RD::BARRIER_MASK_NO_BARRIER); + RD::get_singleton()->buffer_copy(multimesh->buffer, new_buffer, 0, 0, buffer_size); RD::get_singleton()->buffer_copy(multimesh->buffer, new_buffer, 0, buffer_size, buffer_size); } else if (!multimesh->data_cache.is_empty()) { // Simply upload the data cached in the CPU, which should already be doubled in size. @@ -2037,10 +2036,9 @@ void MeshStorage::_update_dirty_multimeshes() { uint32_t offset = i * region_size; uint32_t size = multimesh->stride_cache * (uint32_t)multimesh->instances * (uint32_t)sizeof(float); uint32_t region_start_index = multimesh->stride_cache * MULTIMESH_DIRTY_REGION_SIZE * i; - RD::get_singleton()->buffer_update(multimesh->buffer, buffer_offset * sizeof(float) + offset, MIN(region_size, size - offset), &data[region_start_index], RD::BARRIER_MASK_NO_BARRIER); + RD::get_singleton()->buffer_update(multimesh->buffer, buffer_offset * sizeof(float) + offset, MIN(region_size, size - offset), &data[region_start_index]); } } - RD::get_singleton()->barrier(RD::BARRIER_MASK_NO_BARRIER, RD::BARRIER_MASK_ALL_BARRIERS); } memcpy(multimesh->previous_data_cache_dirty_regions, multimesh->data_cache_dirty_regions, data_cache_dirty_region_count * sizeof(bool)); diff --git a/servers/rendering/renderer_rd/storage_rd/particles_storage.cpp b/servers/rendering/renderer_rd/storage_rd/particles_storage.cpp index 3d3cb585ac1..a854e78f536 100644 --- a/servers/rendering/renderer_rd/storage_rd/particles_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/particles_storage.cpp @@ -307,6 +307,11 @@ void ParticlesStorage::_particles_free_data(Particles *particles) { particles->emission_storage_buffer = RID(); } + if (particles->unused_storage_buffer.is_valid()) { + RD::get_singleton()->free(particles->unused_storage_buffer); + particles->unused_storage_buffer = RID(); + } + if (RD::get_singleton()->uniform_set_is_valid(particles->particles_material_uniform_set)) { //will need to be re-created RD::get_singleton()->free(particles->particles_material_uniform_set); @@ -530,6 +535,12 @@ void ParticlesStorage::_particles_allocate_emission_buffer(Particles *particles) } } +void ParticlesStorage::_particles_ensure_unused_buffer(Particles *particles) { + if (particles->unused_storage_buffer.is_null()) { + particles->unused_storage_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 4); + } +} + void ParticlesStorage::particles_set_subemitter(RID p_particles, RID p_subemitter_particles) { Particles *particles = particles_owner.get_or_null(p_particles); ERR_FAIL_NULL(particles); @@ -757,7 +768,8 @@ void ParticlesStorage::_particles_process(Particles *p_particles, double p_delta if (p_particles->emission_storage_buffer.is_valid()) { u.append_id(p_particles->emission_storage_buffer); } else { - u.append_id(MeshStorage::get_singleton()->get_default_rd_storage_buffer()); + _particles_ensure_unused_buffer(p_particles); + u.append_id(p_particles->unused_storage_buffer); } uniforms.push_back(u); } @@ -772,7 +784,8 @@ void ParticlesStorage::_particles_process(Particles *p_particles, double p_delta } u.append_id(sub_emitter->emission_storage_buffer); } else { - u.append_id(MeshStorage::get_singleton()->get_default_rd_storage_buffer()); + _particles_ensure_unused_buffer(p_particles); + u.append_id(p_particles->unused_storage_buffer); } uniforms.push_back(u); } @@ -1463,7 +1476,8 @@ void ParticlesStorage::update_particles() { if (particles->trail_bind_pose_buffer.is_valid()) { u.append_id(particles->trail_bind_pose_buffer); } else { - u.append_id(MeshStorage::get_singleton()->get_default_rd_storage_buffer()); + _particles_ensure_unused_buffer(particles); + u.append_id(particles->unused_storage_buffer); } uniforms.push_back(u); } diff --git a/servers/rendering/renderer_rd/storage_rd/particles_storage.h b/servers/rendering/renderer_rd/storage_rd/particles_storage.h index a28d7b41540..33f44f3045a 100644 --- a/servers/rendering/renderer_rd/storage_rd/particles_storage.h +++ b/servers/rendering/renderer_rd/storage_rd/particles_storage.h @@ -247,6 +247,8 @@ private: ParticleEmissionBuffer *emission_buffer = nullptr; RID emission_storage_buffer; + RID unused_storage_buffer; + HashSet collisions; Dependency dependency; @@ -263,6 +265,7 @@ private: void _particles_process(Particles *p_particles, double p_delta); void _particles_allocate_emission_buffer(Particles *particles); + void _particles_ensure_unused_buffer(Particles *particles); void _particles_free_data(Particles *particles); void _particles_update_buffers(Particles *particles); diff --git a/servers/rendering/renderer_rd/storage_rd/render_scene_data_rd.cpp b/servers/rendering/renderer_rd/storage_rd/render_scene_data_rd.cpp index 40891f9a631..f2231664fa9 100644 --- a/servers/rendering/renderer_rd/storage_rd/render_scene_data_rd.cpp +++ b/servers/rendering/renderer_rd/storage_rd/render_scene_data_rd.cpp @@ -252,7 +252,7 @@ void RenderSceneDataRD::update_ubo(RID p_uniform_buffer, RS::ViewportDebugDraw p } uniform_buffer = p_uniform_buffer; - RD::get_singleton()->buffer_update(uniform_buffer, 0, sizeof(UBODATA), &ubo, RD::BARRIER_MASK_RASTER); + RD::get_singleton()->buffer_update(uniform_buffer, 0, sizeof(UBODATA), &ubo); } RID RenderSceneDataRD::get_uniform_buffer() { diff --git a/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp b/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp index 380e325ffa2..d8baf260f97 100644 --- a/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp @@ -2703,7 +2703,7 @@ void TextureStorage::update_decal_atlas() { Vector cc; cc.push_back(clear_color); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(mm.fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, cc); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(mm.fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD, cc); for (const KeyValue &E : decal_atlas.textures) { DecalAtlas::Texture *t = decal_atlas.textures.getptr(E.key); @@ -2981,7 +2981,7 @@ void TextureStorage::update_decal_buffer(const PagedArray &p_decals, const } if (decal_count > 0) { - RD::get_singleton()->buffer_update(decal_buffer, 0, sizeof(DecalData) * decal_count, decals, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->buffer_update(decal_buffer, 0, sizeof(DecalData) * decal_count, decals); } } @@ -3384,7 +3384,7 @@ void TextureStorage::render_target_do_msaa_resolve(RID p_render_target) { if (!rt->msaa_needs_resolve) { return; } - RD::get_singleton()->draw_list_begin(rt->get_framebuffer(), RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD); + RD::get_singleton()->draw_list_begin(rt->get_framebuffer(), RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD); RD::get_singleton()->draw_list_end(); rt->msaa_needs_resolve = false; } @@ -3501,7 +3501,7 @@ void TextureStorage::render_target_do_clear_request(RID p_render_target) { } Vector clear_colors; clear_colors.push_back(rt->use_hdr ? rt->clear_color.srgb_to_linear() : rt->clear_color); - RD::get_singleton()->draw_list_begin(rt->get_framebuffer(), RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD, clear_colors); + RD::get_singleton()->draw_list_begin(rt->get_framebuffer(), RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD, clear_colors); RD::get_singleton()->draw_list_end(); rt->clear_requested = false; rt->msaa_needs_resolve = false; diff --git a/servers/rendering/rendering_device.compat.inc b/servers/rendering/rendering_device.compat.inc index 8e7beda6f31..f79c9c9f7c3 100644 --- a/servers/rendering/rendering_device.compat.inc +++ b/servers/rendering/rendering_device.compat.inc @@ -34,39 +34,87 @@ RID RenderingDevice::_shader_create_from_bytecode_bind_compat_79606(const Vector return shader_create_from_bytecode(p_shader_binary, RID()); } -BitField RenderingDevice::_convert_barrier_mask_81356(BitField p_old_barrier) { - if (p_old_barrier == 7) { - return BARRIER_MASK_ALL_BARRIERS; - } else if (p_old_barrier == 16) { - return BARRIER_MASK_NO_BARRIER; - } - - BitField new_barrier; - if (p_old_barrier & 1) { - new_barrier.set_flag(BARRIER_MASK_VERTEX); - } - if (p_old_barrier & 2) { - new_barrier.set_flag(BARRIER_MASK_FRAGMENT); - } - if (p_old_barrier & 4) { - new_barrier.set_flag(BARRIER_MASK_COMPUTE); - } - if (p_old_barrier & 8) { - new_barrier.set_flag(BARRIER_MASK_TRANSFER); - } - return new_barrier; -} - void RenderingDevice::_draw_list_end_bind_compat_81356(BitField p_post_barrier) { - draw_list_end(_convert_barrier_mask_81356(p_post_barrier)); + draw_list_end(); } void RenderingDevice::_compute_list_end_bind_compat_81356(BitField p_post_barrier) { - compute_list_end(_convert_barrier_mask_81356(p_post_barrier)); + compute_list_end(); } void RenderingDevice::_barrier_bind_compat_81356(BitField p_from, BitField p_to) { - barrier(_convert_barrier_mask_81356(p_from), _convert_barrier_mask_81356(p_to)); + // Does nothing. +} + +void RenderingDevice::_draw_list_end_bind_compat_84976(BitField p_post_barrier) { + draw_list_end(); +} + +void RenderingDevice::_compute_list_end_bind_compat_84976(BitField p_post_barrier) { + compute_list_end(); +} + +RenderingDevice::InitialAction RenderingDevice::_convert_initial_action_84976(InitialAction p_old_initial_action) { + switch (uint32_t(p_old_initial_action)) { + case 0: // INITIAL_ACTION_CLEAR + return INITIAL_ACTION_CLEAR; + case 1: // INITIAL_ACTION_CLEAR_REGION + return INITIAL_ACTION_CLEAR; + case 2: // INITIAL_ACTION_CLEAR_REGION_CONTINUE + case 3: // INITIAL_ACTION_KEEP + return INITIAL_ACTION_LOAD; + case 4: // INITIAL_ACTION_DROP + return INITIAL_ACTION_DISCARD; + case 5: // INITIAL_ACTION_CONTINUE + return INITIAL_ACTION_LOAD; + default: + return INITIAL_ACTION_LOAD; + } +} + +RenderingDevice::FinalAction RenderingDevice::_convert_final_action_84976(FinalAction p_old_final_action) { + switch (uint32_t(p_old_final_action)) { + case 0: // FINAL_ACTION_READ + return FINAL_ACTION_STORE; + case 1: // FINAL_ACTION_DISCARD + return FINAL_ACTION_DISCARD; + case 2: // FINAL_ACTION_CONTINUE + return FINAL_ACTION_STORE; + default: + return FINAL_ACTION_STORE; + } +} + +RenderingDevice::DrawListID RenderingDevice::_draw_list_begin_bind_compat_84976(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const TypedArray &p_storage_textures) { + return draw_list_begin(p_framebuffer, _convert_initial_action_84976(p_initial_color_action), _convert_final_action_84976(p_final_color_action), _convert_initial_action_84976(p_initial_depth_action), _convert_final_action_84976(p_final_depth_action), p_clear_color_values, p_clear_depth, p_clear_stencil, p_region); +} + +RenderingDevice::ComputeListID RenderingDevice::_compute_list_begin_bind_compat_84976(bool p_allow_draw_overlap) { + return compute_list_begin(); +} + +Error RenderingDevice::_buffer_update_bind_compat_84976(RID p_buffer, uint32_t p_offset, uint32_t p_size, const Vector &p_data, BitField p_post_barrier) { + return _buffer_update_bind(p_buffer, p_offset, p_size, p_data); +} + +Error RenderingDevice::_buffer_clear_bind_compat_84976(RID p_buffer, uint32_t p_offset, uint32_t p_size, BitField p_post_barrier) { + return buffer_clear(p_buffer, p_offset, p_size); +} + +Error RenderingDevice::_texture_update_bind_compat_84976(RID p_texture, uint32_t p_layer, const Vector &p_data, BitField p_post_barrier) { + return texture_update(p_texture, p_layer, p_data); +} + +Error RenderingDevice::_texture_copy_bind_compat_84976(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, BitField p_post_barrier) { + return texture_copy(p_from_texture, p_to_texture, p_from, p_to, p_size, p_src_mipmap, p_dst_mipmap, p_src_layer, p_dst_layer); +} + +Error RenderingDevice::_texture_clear_bind_compat_84976(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, BitField p_post_barrier) { + return texture_clear(p_texture, p_color, p_base_mipmap, p_mipmaps, p_base_layer, p_layers); +} + +Error RenderingDevice::_texture_resolve_multisample_bind_compat_84976(RID p_from_texture, RID p_to_texture, BitField p_post_barrier) { + return texture_resolve_multisample(p_from_texture, p_to_texture); } void RenderingDevice::_bind_compatibility_methods() { @@ -74,6 +122,16 @@ void RenderingDevice::_bind_compatibility_methods() { ClassDB::bind_compatibility_method(D_METHOD("draw_list_end", "post_barrier"), &RenderingDevice::_draw_list_end_bind_compat_81356, DEFVAL(7)); ClassDB::bind_compatibility_method(D_METHOD("compute_list_end", "post_barrier"), &RenderingDevice::_compute_list_end_bind_compat_81356, DEFVAL(7)); ClassDB::bind_compatibility_method(D_METHOD("barrier", "from", "to"), &RenderingDevice::_barrier_bind_compat_81356, DEFVAL(7), DEFVAL(7)); + ClassDB::bind_compatibility_method(D_METHOD("draw_list_end", "post_barrier"), &RenderingDevice::_draw_list_end_bind_compat_84976, DEFVAL(0x7FFF)); + ClassDB::bind_compatibility_method(D_METHOD("compute_list_end", "post_barrier"), &RenderingDevice::_compute_list_end_bind_compat_84976, DEFVAL(0x7FFF)); + ClassDB::bind_compatibility_method(D_METHOD("draw_list_begin", "framebuffer", "initial_color_action", "final_color_action", "initial_depth_action", "final_depth_action", "clear_color_values", "clear_depth", "clear_stencil", "region", "storage_textures"), &RenderingDevice::_draw_list_begin_bind_compat_84976, DEFVAL(Vector()), DEFVAL(1.0), DEFVAL(0), DEFVAL(Rect2()), DEFVAL(TypedArray())); + ClassDB::bind_compatibility_method(D_METHOD("compute_list_begin", "allow_draw_overlap"), &RenderingDevice::_compute_list_begin_bind_compat_84976, DEFVAL(false)); + ClassDB::bind_compatibility_method(D_METHOD("buffer_update", "buffer", "offset", "size_bytes", "data", "post_barrier"), &RenderingDevice::_buffer_update_bind_compat_84976, DEFVAL(0x7FFF)); + ClassDB::bind_compatibility_method(D_METHOD("buffer_clear", "buffer", "offset", "size_bytes", "post_barrier"), &RenderingDevice::_buffer_clear_bind_compat_84976, DEFVAL(0x7FFF)); + ClassDB::bind_compatibility_method(D_METHOD("texture_update", "texture", "layer", "data", "post_barrier"), &RenderingDevice::_texture_update_bind_compat_84976, DEFVAL(0x7FFF)); + ClassDB::bind_compatibility_method(D_METHOD("texture_copy", "from_texture", "to_texture", "from_pos", "to_pos", "size", "src_mipmap", "dst_mipmap", "src_layer", "dst_layer", "post_barrier"), &RenderingDevice::_texture_copy_bind_compat_84976, DEFVAL(0x7FFF)); + ClassDB::bind_compatibility_method(D_METHOD("texture_clear", "texture", "color", "base_mipmap", "mipmap_count", "base_layer", "layer_count", "post_barrier"), &RenderingDevice::_texture_clear_bind_compat_84976, DEFVAL(0x7FFF)); + ClassDB::bind_compatibility_method(D_METHOD("texture_resolve_multisample", "from_texture", "to_texture", "post_barrier"), &RenderingDevice::_texture_resolve_multisample_bind_compat_84976, DEFVAL(0x7FFF)); } #endif diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 9e7c32094bf..03ffe967e26 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -37,7 +37,26 @@ #include "core/io/dir_access.h" #include "servers/rendering/renderer_rd/api_context_rd.h" -//#define FORCE_FULL_BARRIER +// When true, the command graph will attempt to reorder the rendering commands submitted by the user based on the dependencies detected from +// the commands automatically. This should improve rendering performance in most scenarios at the cost of some extra CPU overhead. +// +// This behavior can be disabled if it's suspected that the graph is not detecting dependencies correctly and more control over the order of +// the commands is desired (e.g. debugging). + +#define RENDER_GRAPH_REORDER 1 + +// Synchronization barriers are issued between the graph's levels only with the necessary amount of detail to achieve the correct result. If +// it's suspected that the graph is not doing this correctly, full barriers can be issued instead that will block all types of operations +// between the synchronization levels. This setting will have a very negative impact on performance when enabled, so it's only intended for +// debugging purposes. + +#define RENDER_GRAPH_FULL_BARRIERS 0 + +// The command graph can automatically issue secondary command buffers and record them on background threads when they reach an arbitrary +// size threshold. This can be very beneficial towards reducing the time the main thread takes to record all the rendering commands. However, +// this setting is not enabled by default as it's been shown to cause some strange issues with certain IHVs that have yet to be understood. + +#define SECONDARY_COMMAND_BUFFERS_PER_FRAME 0 RenderingDevice *RenderingDevice::singleton = nullptr; @@ -131,127 +150,23 @@ RID RenderingDevice::shader_create_from_spirv(const Vector return shader_create_from_bytecode(bytecode); } -/******************/ -/**** BARRIERS ****/ -/******************/ - -void RenderingDevice::_full_barrier(bool p_sync_with_draw) { - // Used for debug. - - RDD::MemoryBarrier mb; - mb.src_access = (RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT | - RDD::BARRIER_ACCESS_INDEX_READ_BIT | - RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | - RDD::BARRIER_ACCESS_UNIFORM_READ_BIT | - RDD::BARRIER_ACCESS_INPUT_ATTACHMENT_READ_BIT | - RDD::BARRIER_ACCESS_SHADER_READ_BIT | - RDD::BARRIER_ACCESS_SHADER_WRITE_BIT | - RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT | - RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | - RDD::BARRIER_ACCESS_TRANSFER_READ_BIT | - RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT | - RDD::BARRIER_ACCESS_HOST_READ_BIT | - RDD::BARRIER_ACCESS_HOST_WRITE_BIT); - mb.dst_access = (RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT | - RDD::BARRIER_ACCESS_INDEX_READ_BIT | - RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | - RDD::BARRIER_ACCESS_UNIFORM_READ_BIT | - RDD::BARRIER_ACCESS_INPUT_ATTACHMENT_READ_BIT | - RDD::BARRIER_ACCESS_SHADER_READ_BIT | - RDD::BARRIER_ACCESS_SHADER_WRITE_BIT | - RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT | - RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | - RDD::BARRIER_ACCESS_TRANSFER_READ_BIT | - RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT | - RDD::BARRIER_ACCESS_HOST_READ_BIT | - RDD::BARRIER_ACCESS_HOST_WRITE_BIT); - - RDD::CommandBufferID cmd_buffer = p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer; - driver->command_pipeline_barrier(cmd_buffer, RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, mb, {}, {}); -} - /***************************/ /**** BUFFER MANAGEMENT ****/ /***************************/ -RenderingDevice::Buffer *RenderingDevice::_get_buffer_from_owner(RID p_buffer, BitField &r_stages, BitField &r_access, BitField p_post_barrier) { +RenderingDevice::Buffer *RenderingDevice::_get_buffer_from_owner(RID p_buffer) { Buffer *buffer = nullptr; - r_stages.clear(); - r_access.clear(); if (vertex_buffer_owner.owns(p_buffer)) { buffer = vertex_buffer_owner.get_or_null(p_buffer); - - r_stages.set_flag(RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT); - r_access.set_flag(RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT); - if (buffer->usage & RDD::BUFFER_USAGE_STORAGE_BIT) { - if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) { - r_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - r_stages.set_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) { - r_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - r_stages.set_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { - r_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - r_stages.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); - } - } } else if (index_buffer_owner.owns(p_buffer)) { - r_stages.set_flag(RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT); - r_access.set_flag(RDD::BARRIER_ACCESS_INDEX_READ_BIT); buffer = index_buffer_owner.get_or_null(p_buffer); } else if (uniform_buffer_owner.owns(p_buffer)) { - if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) { - r_stages.set_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) { - r_stages.set_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { - r_stages.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); - } - r_access.set_flag(RDD::BARRIER_ACCESS_UNIFORM_READ_BIT); buffer = uniform_buffer_owner.get_or_null(p_buffer); } else if (texture_buffer_owner.owns(p_buffer)) { - if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) { - r_stages.set_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT); - r_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) { - r_stages.set_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT); - r_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { - r_stages.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); - r_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT); - } - - // FIXME: Broken. + DEV_ASSERT(false && "FIXME: Broken."); //buffer = texture_buffer_owner.get_or_null(p_buffer)->buffer; } else if (storage_buffer_owner.owns(p_buffer)) { buffer = storage_buffer_owner.get_or_null(p_buffer); - if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) { - r_stages.set_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT); - r_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) { - r_stages.set_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT); - r_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { - r_stages.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); - r_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - - if (buffer->usage.has_flag(RDD::BUFFER_USAGE_INDIRECT_BIT)) { - r_stages.set_flag(RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT); - r_access.set_flag(RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT); - } } return buffer; } @@ -269,10 +184,11 @@ Error RenderingDevice::_insert_staging_block() { return OK; } -Error RenderingDevice::_staging_buffer_allocate(uint32_t p_amount, uint32_t p_required_align, uint32_t &r_alloc_offset, uint32_t &r_alloc_size, bool p_can_segment) { +Error RenderingDevice::_staging_buffer_allocate(uint32_t p_amount, uint32_t p_required_align, uint32_t &r_alloc_offset, uint32_t &r_alloc_size, StagingRequiredAction &r_required_action, bool p_can_segment) { // Determine a block to use. r_alloc_size = p_amount; + r_required_action = STAGING_REQUIRED_ACTION_NONE; while (true) { r_alloc_offset = 0; @@ -324,23 +240,7 @@ Error RenderingDevice::_staging_buffer_allocate(uint32_t p_amount, uint32_t p_re // and this frame is not even done. // If this is the main thread, it means the user is likely loading a lot of resources at once,. // Otherwise, the thread should just be blocked until the next frame (currently unimplemented). - - if (false) { // Separate thread from render. - - //block_until_next_frame() - continue; - } else { - // Flush EVERYTHING including setup commands. IF not immediate, also need to flush the draw commands. - _flush(true); - - // Clear the whole staging buffer. - for (int i = 0; i < staging_buffer_blocks.size(); i++) { - staging_buffer_blocks.write[i].frame_used = 0; - staging_buffer_blocks.write[i].fill_amount = 0; - } - // Claim current. - staging_buffer_blocks.write[staging_buffer_current].frame_used = frames_drawn; - } + r_required_action = STAGING_REQUIRED_ACTION_FLUSH_CURRENT; } } else { @@ -368,28 +268,7 @@ Error RenderingDevice::_staging_buffer_allocate(uint32_t p_amount, uint32_t p_re // Let's flush older frames. // The logic here is that if a game is loading a lot of data from the main thread, it will need to be stalled anyway. // If loading from a separate thread, we can block that thread until next frame when more room is made (not currently implemented, though). - - if (false) { - // Separate thread from render. - //block_until_next_frame() - continue; // And try again. - } else { - _flush(false); - - for (int i = 0; i < staging_buffer_blocks.size(); i++) { - // Clear all blocks but the ones from this frame. - int block_idx = (i + staging_buffer_current) % staging_buffer_blocks.size(); - if (staging_buffer_blocks[block_idx].frame_used == frames_drawn) { - break; // Ok, we reached something from this frame, abort. - } - - staging_buffer_blocks.write[block_idx].frame_used = 0; - staging_buffer_blocks.write[block_idx].fill_amount = 0; - } - - // Claim for current frame. - staging_buffer_blocks.write[staging_buffer_current].frame_used = frames_drawn; - } + r_required_action = STAGING_REQUIRED_ACTION_FLUSH_OLDER; } } @@ -402,20 +281,78 @@ Error RenderingDevice::_staging_buffer_allocate(uint32_t p_amount, uint32_t p_re return OK; } -Error RenderingDevice::_buffer_update(Buffer *p_buffer, size_t p_offset, const uint8_t *p_data, size_t p_data_size, bool p_use_draw_command_buffer, uint32_t p_required_align) { +void RenderingDevice::_staging_buffer_execute_required_action(StagingRequiredAction p_required_action) { + switch (p_required_action) { + case STAGING_REQUIRED_ACTION_NONE: { + // Do nothing. + } break; + case STAGING_REQUIRED_ACTION_FLUSH_CURRENT: { + // Flush EVERYTHING including setup commands. IF not immediate, also need to flush the draw commands. + _flush(true); + + // Clear the whole staging buffer. + for (int i = 0; i < staging_buffer_blocks.size(); i++) { + staging_buffer_blocks.write[i].frame_used = 0; + staging_buffer_blocks.write[i].fill_amount = 0; + } + + // Claim for current frame. + staging_buffer_blocks.write[staging_buffer_current].frame_used = frames_drawn; + } break; + case STAGING_REQUIRED_ACTION_FLUSH_OLDER: { + _flush(false); + + for (int i = 0; i < staging_buffer_blocks.size(); i++) { + // Clear all blocks but the ones from this frame. + int block_idx = (i + staging_buffer_current) % staging_buffer_blocks.size(); + if (staging_buffer_blocks[block_idx].frame_used == frames_drawn) { + break; // Ok, we reached something from this frame, abort. + } + + staging_buffer_blocks.write[block_idx].frame_used = 0; + staging_buffer_blocks.write[block_idx].fill_amount = 0; + } + + // Claim for current frame. + staging_buffer_blocks.write[staging_buffer_current].frame_used = frames_drawn; + } break; + default: { + DEV_ASSERT(false && "Unknown required action."); + } break; + } +} + +Error RenderingDevice::_buffer_update(Buffer *p_buffer, RID p_buffer_id, size_t p_offset, const uint8_t *p_data, size_t p_data_size, bool p_use_draw_queue, uint32_t p_required_align) { // Submitting may get chunked for various reasons, so convert this to a task. size_t to_submit = p_data_size; size_t submit_from = 0; + thread_local LocalVector command_buffer_copies_vector; + command_buffer_copies_vector.clear(); + while (to_submit > 0) { uint32_t block_write_offset; uint32_t block_write_amount; + StagingRequiredAction required_action; - Error err = _staging_buffer_allocate(MIN(to_submit, staging_buffer_block_size), p_required_align, block_write_offset, block_write_amount); + Error err = _staging_buffer_allocate(MIN(to_submit, staging_buffer_block_size), p_required_align, block_write_offset, block_write_amount, required_action); if (err) { return err; } + if (p_use_draw_queue && !command_buffer_copies_vector.is_empty() && required_action == STAGING_REQUIRED_ACTION_FLUSH_CURRENT) { + if (_buffer_make_mutable(p_buffer, p_buffer_id)) { + // The buffer must be mutable to be used as a copy destination. + draw_graph.add_synchronization(); + } + + // If we're using the draw queue and the staging buffer requires flushing everything, we submit the command early and clear the current vector. + draw_graph.add_buffer_update(p_buffer->driver_id, p_buffer->draw_tracker, command_buffer_copies_vector); + command_buffer_copies_vector.clear(); + } + + _staging_buffer_execute_required_action(required_action); + // Map staging buffer (It's CPU and coherent). uint8_t *data_ptr = driver->buffer_map(staging_buffer_blocks[staging_buffer_current].driver_id); ERR_FAIL_NULL_V(data_ptr, ERR_CANT_CREATE); @@ -427,12 +364,19 @@ Error RenderingDevice::_buffer_update(Buffer *p_buffer, size_t p_offset, const u driver->buffer_unmap(staging_buffer_blocks[staging_buffer_current].driver_id); // Insert a command to copy this. - RDD::BufferCopyRegion region; region.src_offset = block_write_offset; region.dst_offset = submit_from + p_offset; region.size = block_write_amount; - driver->command_copy_buffer(p_use_draw_command_buffer ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, staging_buffer_blocks[staging_buffer_current].driver_id, p_buffer->driver_id, region); + + if (p_use_draw_queue) { + RDG::RecordedBufferCopy buffer_copy; + buffer_copy.source = staging_buffer_blocks[staging_buffer_current].driver_id; + buffer_copy.region = region; + command_buffer_copies_vector.push_back(buffer_copy); + } else { + driver->command_copy_buffer(frames[frame].setup_command_buffer, staging_buffer_blocks[staging_buffer_current].driver_id, p_buffer->driver_id, region); + } staging_buffer_blocks.write[staging_buffer_current].fill_amount = block_write_offset + block_write_amount; @@ -440,10 +384,19 @@ Error RenderingDevice::_buffer_update(Buffer *p_buffer, size_t p_offset, const u submit_from += block_write_amount; } + if (p_use_draw_queue && !command_buffer_copies_vector.is_empty()) { + if (_buffer_make_mutable(p_buffer, p_buffer_id)) { + // The buffer must be mutable to be used as a copy destination. + draw_graph.add_synchronization(); + } + + draw_graph.add_buffer_update(p_buffer->driver_id, p_buffer->draw_tracker, command_buffer_copies_vector); + } + return OK; } -Error RenderingDevice::buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size, BitField p_post_barrier) { +Error RenderingDevice::buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, @@ -451,25 +404,12 @@ Error RenderingDevice::buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER, "Copying buffers is forbidden during creation of a compute list"); - // This method assumes the barriers have been pushed prior to being called, therefore no barriers are pushed - // for the source or destination buffers before performing the copy. These masks are effectively ignored. - BitField src_stages; - BitField src_access; - Buffer *src_buffer = _get_buffer_from_owner(p_src_buffer, src_stages, src_access, BARRIER_MASK_NO_BARRIER); + Buffer *src_buffer = _get_buffer_from_owner(p_src_buffer); if (!src_buffer) { ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Source buffer argument is not a valid buffer of any type."); } - BitField dst_stages; - BitField dst_access; - if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { - // If the post barrier mask defines it, we indicate the destination buffer will require a barrier with these flags set - // after the copy command is queued. - dst_stages.set_flag(RDD::PIPELINE_STAGE_TRANSFER_BIT); - dst_access.set_flag(RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT); - } - - Buffer *dst_buffer = _get_buffer_from_owner(p_dst_buffer, dst_stages, dst_access, p_post_barrier); + Buffer *dst_buffer = _get_buffer_from_owner(p_dst_buffer); if (!dst_buffer) { ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Destination buffer argument is not a valid buffer of any type."); } @@ -483,31 +423,18 @@ Error RenderingDevice::buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t region.src_offset = p_src_offset; region.dst_offset = p_dst_offset; region.size = p_size; - driver->command_copy_buffer(frames[frame].draw_command_buffer, src_buffer->driver_id, dst_buffer->driver_id, region); -#ifdef FORCE_FULL_BARRIER - _full_barrier(true); -#else - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS) && p_post_barrier != RD::BARRIER_MASK_NO_BARRIER) { - if (dst_stages.is_empty()) { - dst_stages = RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - } - - // As indicated by the post barrier mask, push a new barrier. - RDD::BufferBarrier bb; - bb.buffer = dst_buffer->driver_id; - bb.src_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - bb.dst_access = dst_access; - bb.offset = p_dst_offset; - bb.size = p_size; - driver->command_pipeline_barrier(frames[frame].draw_command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, dst_stages, {}, bb, {}); + if (_buffer_make_mutable(dst_buffer, p_dst_buffer)) { + // The destination buffer must be mutable to be used as a copy destination. + draw_graph.add_synchronization(); } -#endif + + draw_graph.add_buffer_copy(src_buffer->driver_id, src_buffer->draw_tracker, dst_buffer->driver_id, dst_buffer->draw_tracker, region); return OK; } -Error RenderingDevice::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField p_post_barrier) { +Error RenderingDevice::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, @@ -515,14 +442,7 @@ Error RenderingDevice::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER, "Updating buffers is forbidden during creation of a compute list"); - BitField dst_stages; - BitField dst_access; - if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { - // Protect subsequent updates. - dst_stages.set_flag(RDD::PIPELINE_STAGE_TRANSFER_BIT); - dst_access.set_flag(RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT); - } - Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stages, dst_access, p_post_barrier); + Buffer *buffer = _get_buffer_from_owner(p_buffer); if (!buffer) { ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type."); } @@ -530,33 +450,10 @@ Error RenderingDevice::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER, "Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end."); - Error err = _buffer_update(buffer, p_offset, (uint8_t *)p_data, p_size, true); - if (err) { - return err; - } - -#ifdef FORCE_FULL_BARRIER - _full_barrier(true); -#else - if (dst_stages.is_empty()) { - dst_stages.set_flag(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); - } - - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS) && p_post_barrier != RD::BARRIER_MASK_NO_BARRIER) { - RDD::BufferBarrier bb; - bb.buffer = buffer->driver_id; - bb.src_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - bb.dst_access = dst_access; - bb.offset = p_offset; - bb.size = p_size; - driver->command_pipeline_barrier(frames[frame].draw_command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, dst_stages, {}, bb, {}); - } - -#endif - return err; + return _buffer_update(buffer, p_buffer, p_offset, (uint8_t *)p_data, p_size, true); } -Error RenderingDevice::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, BitField p_post_barrier) { +Error RenderingDevice::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG((p_size % 4) != 0, ERR_INVALID_PARAMETER, @@ -566,15 +463,7 @@ Error RenderingDevice::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_ ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER, "Updating buffers is forbidden during creation of a compute list"); - BitField dst_stages; - BitField dst_access; - if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { - // Protect subsequent updates. - dst_stages.set_flag(RDD::PIPELINE_STAGE_TRANSFER_BIT); - dst_access.set_flag(RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT); - } - - Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stages, dst_access, p_post_barrier); + Buffer *buffer = _get_buffer_from_owner(p_buffer); if (!buffer) { ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type."); } @@ -582,51 +471,24 @@ Error RenderingDevice::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_ ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER, "Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end."); - driver->command_clear_buffer(frames[frame].draw_command_buffer, buffer->driver_id, p_offset, p_size); - -#ifdef FORCE_FULL_BARRIER - _full_barrier(true); -#else - if (dst_stages.is_empty()) { - dst_stages.set_flag(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); + if (_buffer_make_mutable(buffer, p_buffer)) { + // The destination buffer must be mutable to be used as a clear destination. + draw_graph.add_synchronization(); } - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - RDD::BufferBarrier bb; - bb.buffer = buffer->driver_id; - bb.src_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - bb.dst_access = dst_access; - bb.offset = p_offset; - bb.size = p_size; - driver->command_pipeline_barrier(frames[frame].draw_command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, dst_stages, {}, bb, {}); - } + draw_graph.add_buffer_clear(buffer->driver_id, buffer->draw_tracker, p_offset, p_size); -#endif return OK; } Vector RenderingDevice::buffer_get_data(RID p_buffer, uint32_t p_offset, uint32_t p_size) { _THREAD_SAFE_METHOD_ - // It could be this buffer was just created. - BitField src_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; - BitField src_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - // Get the vulkan buffer and the potential stage/access possible. - Buffer *buffer = _get_buffer_from_owner(p_buffer, src_stages, src_access, BARRIER_MASK_ALL_BARRIERS); + Buffer *buffer = _get_buffer_from_owner(p_buffer); if (!buffer) { ERR_FAIL_V_MSG(Vector(), "Buffer is either invalid or this type of buffer can't be retrieved. Only Index and Vertex buffers allow retrieving."); } - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - // Make sure no one is using the buffer -- the "true" gets us to the same command buffer as below. - RDD::BufferBarrier bb; - bb.buffer = buffer->driver_id; - bb.src_access = src_access; - bb.dst_access = RDD::BARRIER_ACCESS_TRANSFER_READ_BIT; - bb.size = buffer->size; - driver->command_pipeline_barrier(frames[frame].draw_command_buffer, src_stages, RDD::PIPELINE_STAGE_TRANSFER_BIT, {}, bb, {}); - } - // Size of buffer to retrieve. if (!p_size) { p_size = buffer->size; @@ -641,7 +503,9 @@ Vector RenderingDevice::buffer_get_data(RID p_buffer, uint32_t p_offset RDD::BufferCopyRegion region; region.src_offset = p_offset; region.size = p_size; - driver->command_copy_buffer(frames[frame].draw_command_buffer, buffer->driver_id, tmp_buffer, region); + + draw_graph.add_buffer_get_data(buffer->driver_id, buffer->draw_tracker, tmp_buffer, region); + // Flush everything so memory can be safely mapped. _flush(true); @@ -676,23 +540,21 @@ RID RenderingDevice::storage_buffer_create(uint32_t p_size_bytes, const Vectorbuffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); ERR_FAIL_COND_V(!buffer.driver_id, RID()); + // Storage buffers are assumed to be mutable. + buffer.draw_tracker = RDG::resource_tracker_create(); + buffer.draw_tracker->buffer_driver_id = buffer.driver_id; + if (p_data.size()) { - uint64_t data_size = p_data.size(); - const uint8_t *r = p_data.ptr(); - _buffer_update(&buffer, 0, r, data_size); - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - RDD::BufferBarrier bb; - bb.buffer = buffer.driver_id; - bb.src_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - bb.dst_access = (RDD::BARRIER_ACCESS_SHADER_READ_BIT | RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - bb.size = data_size; - driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT | RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT | RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT, {}, bb, {}); - } + _buffer_update(&buffer, RID(), 0, p_data.ptr(), p_data.size()); } buffer_memory += buffer.size; - return storage_buffer_owner.make_rid(buffer); + RID id = storage_buffer_owner.make_rid(buffer); +#ifdef DEV_ENABLED + set_resource_name(id, "RID:" + itos(id.get_id())); +#endif + return id; } RID RenderingDevice::texture_buffer_create(uint32_t p_size_elements, DataFormat p_format, const Vector &p_data) { @@ -710,6 +572,12 @@ RID RenderingDevice::texture_buffer_create(uint32_t p_size_elements, DataFormat texture_buffer.driver_id = driver->buffer_create(size_bytes, usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); ERR_FAIL_COND_V(!texture_buffer.driver_id, RID()); + // Texture buffers are assumed to be immutable unless they don't have initial data. + if (p_data.is_empty()) { + texture_buffer.draw_tracker = RDG::resource_tracker_create(); + texture_buffer.draw_tracker->buffer_driver_id = texture_buffer.driver_id; + } + bool ok = driver->buffer_set_texel_format(texture_buffer.driver_id, p_format); if (!ok) { driver->buffer_free(texture_buffer.driver_id); @@ -717,15 +585,7 @@ RID RenderingDevice::texture_buffer_create(uint32_t p_size_elements, DataFormat } if (p_data.size()) { - _buffer_update(&texture_buffer, 0, p_data.ptr(), p_data.size()); - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - RDD::BufferBarrier bb; - bb.buffer = texture_buffer.driver_id; - bb.src_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - bb.dst_access = RDD::BARRIER_ACCESS_SHADER_READ_BIT; - bb.size = size_bytes; - driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, (RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT | RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT | RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT), {}, bb, {}); - } + _buffer_update(&texture_buffer, RID(), 0, p_data.ptr(), p_data.size()); } buffer_memory += size_bytes; @@ -787,10 +647,8 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture ERR_FAIL_COND_V_MSG(required_mipmaps < format.mipmaps, RID(), "Too many mipmaps requested for texture format and dimensions (" + itos(format.mipmaps) + "), maximum allowed: (" + itos(required_mipmaps) + ")."); + uint32_t forced_usage_bits = 0; if (p_data.size()) { - ERR_FAIL_COND_V_MSG(!(format.usage_bits & TEXTURE_USAGE_CAN_UPDATE_BIT), RID(), - "Texture needs the TEXTURE_USAGE_CAN_UPDATE_BIT usage flag in order to be updated at initialization or later"); - ERR_FAIL_COND_V_MSG(p_data.size() != (int)format.array_layers, RID(), "Default supplied data for image format is of invalid length (" + itos(p_data.size()) + "), should be (" + itos(format.array_layers) + ")."); @@ -799,6 +657,10 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture ERR_FAIL_COND_V_MSG((uint32_t)p_data[i].size() != required_size, RID(), "Data for slice index " + itos(i) + " (mapped to layer " + itos(i) + ") differs in size (supplied: " + itos(p_data[i].size()) + ") than what is required by the format (" + itos(required_size) + ")."); } + + if (!(format.usage_bits & TEXTURE_USAGE_CAN_UPDATE_BIT)) { + forced_usage_bits = TEXTURE_USAGE_CAN_UPDATE_BIT; + } } { @@ -849,7 +711,7 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture // Create. Texture texture; - + format.usage_bits |= forced_usage_bits; texture.driver_id = driver->texture_create(format, tv); ERR_FAIL_COND_V(!texture.driver_id, RID()); texture.type = format.texture_type; @@ -862,26 +724,10 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture texture.base_mipmap = 0; texture.base_layer = 0; texture.is_resolve_buffer = format.is_resolve_buffer; - texture.usage_flags = format.usage_bits; + texture.usage_flags = format.usage_bits & ~forced_usage_bits; texture.samples = format.samples; texture.allowed_shared_formats = format.shareable_formats; - - // Set base layout based on usage priority. - - if ((format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT)) { - // First priority, readable. - texture.layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - } else if ((format.usage_bits & TEXTURE_USAGE_STORAGE_BIT)) { - // Second priority, storage. - texture.layout = RDD::TEXTURE_LAYOUT_GENERAL; - } else if ((format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT)) { - // Third priority, color or depth. - texture.layout = RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - } else if ((format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) { - texture.layout = RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - } else { - texture.layout = RDD::TEXTURE_LAYOUT_GENERAL; - } + texture.has_initial_data = !p_data.is_empty(); if ((format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) { texture.read_aspect_flags.set_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT); @@ -896,18 +742,10 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture texture.bound = false; - // Barrier to set layout. - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - RDD::TextureBarrier tb; - tb.texture = texture.driver_id; - tb.dst_access = RDD::BARRIER_ACCESS_SHADER_READ_BIT; - tb.prev_layout = RDD::TEXTURE_LAYOUT_UNDEFINED; - tb.next_layout = texture.layout; - tb.subresources.aspect = texture.barrier_aspect_flags; - tb.subresources.mipmap_count = format.mipmaps; - tb.subresources.layer_count = format.array_layers; - - driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_TOP_OF_PIPE_BIT, RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT | RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT | RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT, {}, {}, tb); + // Textures are only assumed to be immutable if they have initial data and none of the other bits that indicate write usage are enabled. + bool texture_mutable_by_default = texture.usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_STORAGE_ATOMIC_BIT | TEXTURE_USAGE_VRS_ATTACHMENT_BIT); + if (p_data.is_empty() || texture_mutable_by_default) { + _texture_make_mutable(&texture, RID()); } texture_memory += driver->texture_get_allocation_size(texture.driver_id); @@ -919,9 +757,15 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture if (p_data.size()) { for (uint32_t i = 0; i < p_format.array_layers; i++) { - _texture_update(id, i, p_data[i], BARRIER_MASK_ALL_BARRIERS, true); + _texture_update(id, i, p_data[i], true, false); + } + + if (texture.draw_tracker != nullptr) { + // Draw tracker can assume the texture will be in transfer destination. + texture.draw_tracker->usage = RDG::RESOURCE_USAGE_TRANSFER_TO; } } + return id; } @@ -959,6 +803,12 @@ RID RenderingDevice::texture_create_shared(const TextureView &p_view, RID p_with texture.driver_id = driver->texture_create_shared(texture.driver_id, tv); ERR_FAIL_COND_V(!texture.driver_id, RID()); + texture.slice_trackers.clear(); + + if (texture.draw_tracker != nullptr) { + texture.draw_tracker->reference_count++; + } + texture.owner = p_with_texture; RID id = texture_owner.make_rid(texture); #ifdef DEV_ENABLED @@ -988,23 +838,6 @@ RID RenderingDevice::texture_create_from_extension(TextureType p_type, DataForma texture.allowed_shared_formats.push_back(RD::DATA_FORMAT_R8G8B8A8_UNORM); texture.allowed_shared_formats.push_back(RD::DATA_FORMAT_R8G8B8A8_SRGB); - // Set base layout based on usage priority. - - if (p_usage.has_flag(TEXTURE_USAGE_SAMPLING_BIT)) { - // First priority, readable. - texture.layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - } else if (p_usage.has_flag(TEXTURE_USAGE_STORAGE_BIT)) { - // Second priority, storage. - texture.layout = RDD::TEXTURE_LAYOUT_GENERAL; - } else if (p_usage.has_flag(TEXTURE_USAGE_COLOR_ATTACHMENT_BIT)) { - // Third priority, color or depth. - texture.layout = RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - } else if (p_usage.has_flag(TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) { - texture.layout = RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - } else { - texture.layout = RDD::TEXTURE_LAYOUT_GENERAL; - } - if (p_usage.has_flag(TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) { texture.read_aspect_flags.set_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT); texture.barrier_aspect_flags.set_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT); @@ -1019,19 +852,7 @@ RID RenderingDevice::texture_create_from_extension(TextureType p_type, DataForma texture.driver_id = driver->texture_create_from_extension(p_image, p_type, p_format, p_layers, (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); ERR_FAIL_COND_V(!texture.driver_id, RID()); - // Barrier to set layout. - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - RDD::TextureBarrier tb; - tb.texture = texture.driver_id; - tb.dst_access = RDD::BARRIER_ACCESS_SHADER_READ_BIT; - tb.prev_layout = RDD::TEXTURE_LAYOUT_UNDEFINED; - tb.next_layout = texture.layout; - tb.subresources.aspect = texture.barrier_aspect_flags; - tb.subresources.mipmap_count = texture.mipmaps; - tb.subresources.layer_count = texture.layers; - - driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_TOP_OF_PIPE_BIT, RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT, {}, {}, tb); - } + _texture_make_mutable(&texture, RID()); RID id = texture_owner.make_rid(texture); #ifdef DEV_ENABLED @@ -1081,6 +902,7 @@ RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view, } Texture texture = *src_texture; + get_image_format_required_size(texture.format, texture.width, texture.height, texture.depth, p_mipmap + 1, &texture.width, &texture.height); texture.mipmaps = p_mipmaps; texture.layers = slice_layers; @@ -1118,7 +940,17 @@ RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view, texture.driver_id = driver->texture_create_shared_from_slice(src_texture->driver_id, tv, p_slice_type, p_layer, slice_layers, p_mipmap, p_mipmaps); ERR_FAIL_COND_V(!texture.driver_id, RID()); + const Rect2i slice_rect(p_mipmap, p_layer, p_mipmaps, slice_layers); texture.owner = p_with_texture; + texture.slice_type = p_slice_type; + texture.slice_rect = slice_rect; + + // If parent is mutable, make slice mutable by default. + if (src_texture->draw_tracker != nullptr) { + texture.draw_tracker = nullptr; + _texture_make_mutable(&texture, RID()); + } + RID id = texture_owner.make_rid(texture); #ifdef DEV_ENABLED set_resource_name(id, "RID:" + itos(id.get_id())); @@ -1128,8 +960,8 @@ RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view, return id; } -Error RenderingDevice::texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data, BitField p_post_barrier) { - return _texture_update(p_texture, p_layer, p_data, p_post_barrier, false); +Error RenderingDevice::texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data) { + return _texture_update(p_texture, p_layer, p_data, false, true); } static _ALWAYS_INLINE_ void _copy_region(uint8_t const *__restrict p_src, uint8_t *__restrict p_dst, uint32_t p_src_x, uint32_t p_src_y, uint32_t p_src_w, uint32_t p_src_h, uint32_t p_src_full_w, uint32_t p_dst_pitch, uint32_t p_unit_size) { @@ -1148,7 +980,7 @@ static _ALWAYS_INLINE_ void _copy_region(uint8_t const *__restrict p_src, uint8_ } } -Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data, BitField p_post_barrier, bool p_use_setup_queue) { +Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data, bool p_use_setup_queue, bool p_validate_can_update) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG((draw_list || compute_list) && !p_use_setup_queue, ERR_INVALID_PARAMETER, @@ -1166,7 +998,7 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve ERR_FAIL_COND_V_MSG(texture->bound, ERR_CANT_ACQUIRE_RESOURCE, "Texture can't be updated while a draw list that uses it as part of a framebuffer is being created. Ensure the draw list is finalized (and that the color/depth texture using it is not set to `RenderingDevice.FINAL_ACTION_CONTINUE`) to update this texture."); - ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_CAN_UPDATE_BIT), ERR_INVALID_PARAMETER, + ERR_FAIL_COND_V_MSG(p_validate_can_update && !(texture->usage_flags & TEXTURE_USAGE_CAN_UPDATE_BIT), ERR_INVALID_PARAMETER, "Texture requires the `RenderingDevice.TEXTURE_USAGE_CAN_UPDATE_BIT` to be set to be updatable."); uint32_t layer_count = texture->layers; @@ -1191,21 +1023,22 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve const uint8_t *r = p_data.ptr(); - RDD::CommandBufferID command_buffer = p_use_setup_queue ? frames[frame].setup_command_buffer : frames[frame].draw_command_buffer; + thread_local LocalVector command_buffer_to_texture_copies_vector; + command_buffer_to_texture_copies_vector.clear(); - // Barrier to transfer. - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { + if (p_use_setup_queue && driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { + // When using the setup queue directly, we transition the texture to the optimal layout. RDD::TextureBarrier tb; tb.texture = texture->driver_id; tb.dst_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - tb.prev_layout = texture->layout; + tb.prev_layout = RDD::TEXTURE_LAYOUT_UNDEFINED; tb.next_layout = RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL; tb.subresources.aspect = texture->barrier_aspect_flags; tb.subresources.mipmap_count = texture->mipmaps; tb.subresources.base_layer = p_layer; tb.subresources.layer_count = 1; - driver->command_pipeline_barrier(command_buffer, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, RDD::PIPELINE_STAGE_TRANSFER_BIT, {}, {}, tb); + driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, RDD::PIPELINE_STAGE_TRANSFER_BIT, {}, {}, tb); } uint32_t mipmap_offset = 0; @@ -1240,12 +1073,26 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve uint32_t pitch_step = driver->api_trait_get(RDD::API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP); region_pitch = STEPIFY(region_pitch, pitch_step); uint32_t to_allocate = region_pitch * region_h; - uint32_t alloc_offset = 0, alloc_size = 0; - Error err = _staging_buffer_allocate(to_allocate, required_align, alloc_offset, alloc_size, false); + StagingRequiredAction required_action; + Error err = _staging_buffer_allocate(to_allocate, required_align, alloc_offset, alloc_size, required_action, false); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - uint8_t *write_ptr = nullptr; + if (!p_use_setup_queue && !command_buffer_to_texture_copies_vector.is_empty() && required_action == STAGING_REQUIRED_ACTION_FLUSH_CURRENT) { + if (_texture_make_mutable(texture, p_texture)) { + // The texture must be mutable to be used as a copy destination. + draw_graph.add_synchronization(); + } + + // If we're using the draw queue and the staging buffer requires flushing everything, we submit the command early and clear the current vector. + draw_graph.add_texture_update(texture->driver_id, texture->draw_tracker, command_buffer_to_texture_copies_vector); + command_buffer_to_texture_copies_vector.clear(); + } + + _staging_buffer_execute_required_action(required_action); + + uint8_t *write_ptr; + { // Map. uint8_t *data_ptr = driver->buffer_map(staging_buffer_blocks[staging_buffer_current].driver_id); ERR_FAIL_NULL_V(data_ptr, ERR_CANT_CREATE); @@ -1288,7 +1135,14 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve copy_region.texture_offset = Vector3i(x, y, z); copy_region.texture_region_size = Vector3i(region_logic_w, region_logic_h, 1); - driver->command_copy_buffer_to_texture(command_buffer, staging_buffer_blocks[staging_buffer_current].driver_id, texture->driver_id, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, copy_region); + if (p_use_setup_queue) { + driver->command_copy_buffer_to_texture(frames[frame].setup_command_buffer, staging_buffer_blocks[staging_buffer_current].driver_id, texture->driver_id, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, copy_region); + } else { + RDG::RecordedBufferToTextureCopy buffer_to_texture_copy; + buffer_to_texture_copy.from_buffer = staging_buffer_blocks[staging_buffer_current].driver_id; + buffer_to_texture_copy.region = copy_region; + command_buffer_to_texture_copies_vector.push_back(buffer_to_texture_copy); + } staging_buffer_blocks.write[staging_buffer_current].fill_amount = alloc_offset + alloc_size; } @@ -1300,50 +1154,25 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve logic_height = MAX(1u, logic_height >> 1); } - // Barrier to restore layout. - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - BitField stages; - BitField access; - if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { - stages.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) { - stages.set_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) { - stages.set_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { - stages.set_flag(RDD::PIPELINE_STAGE_TRANSFER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT); - } - - if (stages.is_empty()) { - stages.set_flag(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); - } - + if (p_use_setup_queue && (texture->draw_tracker == nullptr) && driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { + // If the texture does not have a tracker, it means it must be transitioned to the sampling state. RDD::TextureBarrier tb; tb.texture = texture->driver_id; tb.src_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - tb.dst_access = access; tb.prev_layout = RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL; - tb.next_layout = texture->layout; + tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; tb.subresources.aspect = texture->barrier_aspect_flags; tb.subresources.mipmap_count = texture->mipmaps; tb.subresources.base_layer = p_layer; tb.subresources.layer_count = 1; - - driver->command_pipeline_barrier(command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, stages, {}, {}, tb); - - if (texture->used_in_frame != frames_drawn) { - texture->used_in_raster = false; - texture->used_in_compute = false; - texture->used_in_frame = frames_drawn; + driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, tb); + } else if (!p_use_setup_queue && !command_buffer_to_texture_copies_vector.is_empty()) { + if (_texture_make_mutable(texture, p_texture)) { + // The texture must be mutable to be used as a copy destination. + draw_graph.add_synchronization(); } - texture->used_in_transfer = true; + + draw_graph.add_texture_update(texture->driver_id, texture->draw_tracker, command_buffer_to_texture_copies_vector); } return OK; @@ -1455,63 +1284,35 @@ Vector RenderingDevice::texture_get_data(RID p_texture, uint32_t p_laye RDD::BufferID tmp_buffer = driver->buffer_create(work_buffer_size, RDD::BUFFER_USAGE_TRANSFER_TO_BIT, RDD::MEMORY_ALLOCATION_TYPE_CPU); ERR_FAIL_COND_V(!tmp_buffer, Vector()); - RDD::CommandBufferID command_buffer = frames[frame].draw_command_buffer; // Makes more sense to retrieve. + thread_local LocalVector command_buffer_texture_copy_regions_vector; + command_buffer_texture_copy_regions_vector.clear(); - // Pre-copy barrier. - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - RDD::TextureBarrier tb; - tb.texture = tex->driver_id; - tb.dst_access = RDD::BARRIER_ACCESS_TRANSFER_READ_BIT; - tb.prev_layout = tex->layout; - tb.next_layout = RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL; - tb.subresources.aspect = tex->barrier_aspect_flags; - tb.subresources.mipmap_count = tex->mipmaps; - tb.subresources.base_layer = p_layer; - tb.subresources.layer_count = 1; + uint32_t w = tex->width; + uint32_t h = tex->height; + uint32_t d = tex->depth; + for (uint32_t i = 0; i < tex->mipmaps; i++) { + RDD::BufferTextureCopyRegion copy_region; + copy_region.buffer_offset = mip_layouts[i].offset; + copy_region.texture_subresources.aspect = tex->read_aspect_flags; + copy_region.texture_subresources.mipmap = i; + copy_region.texture_subresources.base_layer = p_layer; + copy_region.texture_subresources.layer_count = 1; + copy_region.texture_region_size.x = w; + copy_region.texture_region_size.y = h; + copy_region.texture_region_size.z = d; + command_buffer_texture_copy_regions_vector.push_back(copy_region); - driver->command_pipeline_barrier(command_buffer, RDD::PIPELINE_STAGE_TOP_OF_PIPE_BIT, RDD::PIPELINE_STAGE_TRANSFER_BIT, {}, {}, tb); + w = MAX(1u, w >> 1); + h = MAX(1u, h >> 1); + d = MAX(1u, d >> 1); } - { - uint32_t w = tex->width; - uint32_t h = tex->height; - uint32_t d = tex->depth; - for (uint32_t i = 0; i < tex->mipmaps; i++) { - RDD::BufferTextureCopyRegion copy_region; - copy_region.buffer_offset = mip_layouts[i].offset; - copy_region.texture_subresources.aspect = tex->read_aspect_flags; - copy_region.texture_subresources.mipmap = i; - copy_region.texture_subresources.base_layer = p_layer; - copy_region.texture_subresources.layer_count = 1; - copy_region.texture_region_size.x = w; - copy_region.texture_region_size.y = h; - copy_region.texture_region_size.z = d; - driver->command_copy_texture_to_buffer(command_buffer, tex->driver_id, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, tmp_buffer, copy_region); - - w = MAX(1u, w >> 1); - h = MAX(1u, h >> 1); - d = MAX(1u, d >> 1); - } + if (_texture_make_mutable(tex, p_texture)) { + // The texture must be mutable to be used as a copy source due to layout transitions. + draw_graph.add_synchronization(); } - // Post-copy barrier. - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - RDD::TextureBarrier tb; - tb.texture = tex->driver_id; - tb.src_access = RDD::BARRIER_ACCESS_TRANSFER_READ_BIT; - tb.dst_access = RDD::BARRIER_ACCESS_SHADER_READ_BIT; - if ((tex->usage_flags & TEXTURE_USAGE_STORAGE_BIT)) { - tb.dst_access.set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - tb.prev_layout = RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL; - tb.next_layout = tex->layout; - tb.subresources.aspect = tex->barrier_aspect_flags; - tb.subresources.mipmap_count = tex->mipmaps; - tb.subresources.base_layer = p_layer; - tb.subresources.layer_count = 1; - - driver->command_pipeline_barrier(command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT | RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT | RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT, {}, {}, tb); - } + draw_graph.add_texture_get_data(tex->driver_id, tex->draw_tracker, tmp_buffer, command_buffer_texture_copy_regions_vector); _flush(true); @@ -1519,39 +1320,35 @@ Vector RenderingDevice::texture_get_data(RID p_texture, uint32_t p_laye ERR_FAIL_NULL_V(read_ptr, Vector()); Vector buffer_data; - { - uint32_t tight_buffer_size = get_image_format_required_size(tex->format, tex->width, tex->height, tex->depth, tex->mipmaps); - buffer_data.resize(tight_buffer_size); + uint32_t tight_buffer_size = get_image_format_required_size(tex->format, tex->width, tex->height, tex->depth, tex->mipmaps); + buffer_data.resize(tight_buffer_size); - uint8_t *write_ptr = buffer_data.ptrw(); + uint8_t *write_ptr = buffer_data.ptrw(); - uint32_t w = tex->width; - uint32_t h = tex->height; - uint32_t d = tex->depth; - for (uint32_t i = 0; i < tex->mipmaps; i++) { - uint32_t width = 0, height = 0, depth = 0; - uint32_t tight_mip_size = get_image_format_required_size(tex->format, w, h, d, 1, &width, &height, &depth); - uint32_t block_w = 0, block_h = 0; - get_compressed_image_format_block_dimensions(tex->format, block_w, block_h); - uint32_t tight_row_pitch = tight_mip_size / ((height / block_h) * depth); + w = tex->width; + h = tex->height; + d = tex->depth; + for (uint32_t i = 0; i < tex->mipmaps; i++) { + uint32_t width = 0, height = 0, depth = 0; + uint32_t tight_mip_size = get_image_format_required_size(tex->format, w, h, d, 1, &width, &height, &depth); + uint32_t block_w = 0, block_h = 0; + get_compressed_image_format_block_dimensions(tex->format, block_w, block_h); + uint32_t tight_row_pitch = tight_mip_size / ((height / block_h) * depth); - { - // Copy row-by-row to erase padding due to alignments. - const uint8_t *rp = read_ptr; - uint8_t *wp = write_ptr; - for (uint32_t row = h * d / block_h; row != 0; row--) { - memcpy(wp, rp, tight_row_pitch); - rp += mip_layouts[i].row_pitch; - wp += tight_row_pitch; - } - } - - w = MAX(1u, w >> 1); - h = MAX(1u, h >> 1); - d = MAX(1u, d >> 1); - read_ptr += mip_layouts[i].size; - write_ptr += tight_mip_size; + // Copy row-by-row to erase padding due to alignments. + const uint8_t *rp = read_ptr; + uint8_t *wp = write_ptr; + for (uint32_t row = h * d / block_h; row != 0; row--) { + memcpy(wp, rp, tight_row_pitch); + rp += mip_layouts[i].row_pitch; + wp += tight_row_pitch; } + + w = MAX(1u, w >> 1); + h = MAX(1u, h >> 1); + d = MAX(1u, d >> 1); + read_ptr += mip_layouts[i].size; + write_ptr += tight_mip_size; } driver->buffer_unmap(tmp_buffer); @@ -1610,7 +1407,7 @@ uint64_t RenderingDevice::texture_get_native_handle(RID p_texture) { } #endif -Error RenderingDevice::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, BitField p_post_barrier) { +Error RenderingDevice::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.get_or_null(p_from_texture); @@ -1658,133 +1455,34 @@ Error RenderingDevice::texture_copy(RID p_from_texture, RID p_to_texture, const ERR_FAIL_COND_V_MSG(src_tex->read_aspect_flags != dst_tex->read_aspect_flags, ERR_INVALID_PARAMETER, "Source and destination texture must be of the same type (color or depth)."); - RDD::CommandBufferID command_buffer = frames[frame].draw_command_buffer; + RDD::TextureCopyRegion copy_region; + copy_region.src_subresources.aspect = src_tex->read_aspect_flags; + copy_region.src_subresources.mipmap = p_src_mipmap; + copy_region.src_subresources.base_layer = p_src_layer; + copy_region.src_subresources.layer_count = 1; + copy_region.src_offset = p_from; - // PRE Copy the image. + copy_region.dst_subresources.aspect = dst_tex->read_aspect_flags; + copy_region.dst_subresources.mipmap = p_dst_mipmap; + copy_region.dst_subresources.base_layer = p_dst_layer; + copy_region.dst_subresources.layer_count = 1; + copy_region.dst_offset = p_to; - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - { // Source. - RDD::TextureBarrier tb; - tb.texture = src_tex->driver_id; - tb.dst_access = RDD::BARRIER_ACCESS_TRANSFER_READ_BIT; - tb.prev_layout = src_tex->layout; - tb.next_layout = RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL; - tb.subresources.aspect = src_tex->barrier_aspect_flags; - tb.subresources.base_mipmap = p_src_mipmap; - tb.subresources.mipmap_count = 1; - tb.subresources.base_layer = p_src_layer; - tb.subresources.layer_count = 1; + copy_region.size = p_size; - driver->command_pipeline_barrier(command_buffer, RDD::PIPELINE_STAGE_TOP_OF_PIPE_BIT, RDD::PIPELINE_STAGE_TRANSFER_BIT, {}, {}, tb); - } - { // Dest. - RDD::TextureBarrier tb; - tb.texture = dst_tex->driver_id; - tb.dst_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - tb.prev_layout = dst_tex->layout; - tb.next_layout = RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL; - tb.subresources.aspect = dst_tex->read_aspect_flags; - tb.subresources.base_mipmap = p_dst_mipmap; - tb.subresources.mipmap_count = 1; - tb.subresources.base_layer = p_dst_layer; - tb.subresources.layer_count = 1; - - driver->command_pipeline_barrier(command_buffer, RDD::PIPELINE_STAGE_TOP_OF_PIPE_BIT, RDD::PIPELINE_STAGE_TRANSFER_BIT, {}, {}, tb); - } + // The textures must be mutable to be used in the copy operation. + bool src_made_mutable = _texture_make_mutable(src_tex, p_from_texture); + bool dst_made_mutable = _texture_make_mutable(dst_tex, p_to_texture); + if (src_made_mutable || dst_made_mutable) { + draw_graph.add_synchronization(); } - // COPY. - - { - RDD::TextureCopyRegion copy_region; - copy_region.src_subresources.aspect = src_tex->read_aspect_flags; - copy_region.src_subresources.mipmap = p_src_mipmap; - copy_region.src_subresources.base_layer = p_src_layer; - copy_region.src_subresources.layer_count = 1; - copy_region.src_offset = p_from; - - copy_region.dst_subresources.aspect = dst_tex->read_aspect_flags; - copy_region.dst_subresources.mipmap = p_dst_mipmap; - copy_region.dst_subresources.base_layer = p_dst_layer; - copy_region.dst_subresources.layer_count = 1; - copy_region.dst_offset = p_to; - - copy_region.size = p_size; - - driver->command_copy_texture(command_buffer, src_tex->driver_id, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_tex->driver_id, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, copy_region); - } - - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - // RESTORE LAYOUT for SRC and DST. - - BitField stages; - BitField access; - if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { - stages.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) { - stages.set_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) { - stages.set_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { - stages.set_flag(RDD::PIPELINE_STAGE_TRANSFER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT); - } - - if (stages.is_empty()) { - stages.set_flag(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); - } - - { // Restore src. - RDD::TextureBarrier tb; - tb.texture = src_tex->driver_id; - tb.src_access = RDD::BARRIER_ACCESS_TRANSFER_READ_BIT; - tb.dst_access = access; - tb.prev_layout = RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL; - tb.next_layout = src_tex->layout; - tb.subresources.aspect = src_tex->barrier_aspect_flags; - tb.subresources.base_mipmap = p_src_mipmap; - tb.subresources.mipmap_count = 1; - tb.subresources.base_layer = p_src_layer; - tb.subresources.layer_count = 1; - - driver->command_pipeline_barrier(command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, stages, {}, {}, tb); - } - - { // Make dst readable. - - RDD::TextureBarrier tb; - tb.texture = dst_tex->driver_id; - tb.src_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - tb.dst_access = access; - tb.prev_layout = RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL; - tb.next_layout = dst_tex->layout; - tb.subresources.aspect = dst_tex->read_aspect_flags; - tb.subresources.base_mipmap = p_dst_mipmap; - tb.subresources.mipmap_count = 1; - tb.subresources.base_layer = p_dst_layer; - tb.subresources.layer_count = 1; - - driver->command_pipeline_barrier(command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, stages, {}, {}, tb); - } - - if (dst_tex->used_in_frame != frames_drawn) { - dst_tex->used_in_raster = false; - dst_tex->used_in_compute = false; - dst_tex->used_in_frame = frames_drawn; - } - dst_tex->used_in_transfer = true; - } + draw_graph.add_texture_copy(src_tex->driver_id, src_tex->draw_tracker, dst_tex->driver_id, dst_tex->draw_tracker, copy_region); return OK; } -Error RenderingDevice::texture_resolve_multisample(RID p_from_texture, RID p_to_texture, BitField p_post_barrier) { +Error RenderingDevice::texture_resolve_multisample(RID p_from_texture, RID p_to_texture) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.get_or_null(p_from_texture); @@ -1815,108 +1513,19 @@ Error RenderingDevice::texture_resolve_multisample(RID p_from_texture, RID p_to_ ERR_FAIL_COND_V_MSG(src_tex->read_aspect_flags != dst_tex->read_aspect_flags, ERR_INVALID_PARAMETER, "Source and destination texture must be of the same type (color or depth)."); - RDD::CommandBufferID command_buffer = frames[frame].draw_command_buffer; - - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - // PRE Copy the image. - - { // Source. - RDD::TextureBarrier tb; - tb.texture = src_tex->driver_id; - tb.dst_access = RDD::BARRIER_ACCESS_TRANSFER_READ_BIT; - tb.prev_layout = src_tex->layout; - tb.next_layout = RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL; - tb.subresources.aspect = src_tex->barrier_aspect_flags; - tb.subresources.base_mipmap = src_tex->base_mipmap; - tb.subresources.mipmap_count = 1; - tb.subresources.base_layer = src_tex->base_layer; - tb.subresources.layer_count = 1; - - driver->command_pipeline_barrier(command_buffer, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, RDD::PIPELINE_STAGE_TRANSFER_BIT, {}, {}, tb); - } - { // Dest. - RDD::TextureBarrier tb; - tb.texture = dst_tex->driver_id; - tb.dst_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - tb.prev_layout = dst_tex->layout; - tb.next_layout = RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL; - tb.subresources.aspect = dst_tex->barrier_aspect_flags; - tb.subresources.base_mipmap = dst_tex->base_mipmap; - tb.subresources.mipmap_count = 1; - tb.subresources.base_layer = dst_tex->base_layer; - tb.subresources.layer_count = 1; - - driver->command_pipeline_barrier(command_buffer, RDD::PIPELINE_STAGE_TOP_OF_PIPE_BIT, RDD::PIPELINE_STAGE_TRANSFER_BIT, {}, {}, tb); - } + // The textures must be mutable to be used in the resolve operation. + bool src_made_mutable = _texture_make_mutable(src_tex, p_from_texture); + bool dst_made_mutable = _texture_make_mutable(dst_tex, p_to_texture); + if (src_made_mutable || dst_made_mutable) { + draw_graph.add_synchronization(); } - // RESOLVE. - driver->command_resolve_texture(command_buffer, src_tex->driver_id, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_tex->base_layer, src_tex->base_mipmap, dst_tex->driver_id, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_tex->base_layer, dst_tex->base_mipmap); - - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - // RESTORE LAYOUT for SRC and DST. - - BitField stages; - BitField access; - if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { - stages.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) { - stages.set_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) { - stages.set_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { - stages.set_flag(RDD::PIPELINE_STAGE_TRANSFER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT); - } - - if (stages.is_empty()) { - stages.set_flag(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); - } - - { // Restore src. - RDD::TextureBarrier tb; - tb.texture = src_tex->driver_id; - tb.src_access = RDD::BARRIER_ACCESS_TRANSFER_READ_BIT; - tb.dst_access = access; - tb.prev_layout = RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL; - tb.next_layout = src_tex->layout; - tb.subresources.aspect = src_tex->barrier_aspect_flags; - tb.subresources.base_mipmap = src_tex->base_mipmap; - tb.subresources.mipmap_count = 1; - tb.subresources.base_layer = src_tex->base_layer; - tb.subresources.layer_count = 1; - - driver->command_pipeline_barrier(command_buffer, RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT, stages, {}, {}, tb); - } - - { // Make dst readable. - - RDD::TextureBarrier tb; - tb.texture = dst_tex->driver_id; - tb.src_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - tb.dst_access = access; - tb.prev_layout = RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL; - tb.next_layout = dst_tex->layout; - tb.subresources.aspect = RDD::TEXTURE_ASPECT_COLOR_BIT; - tb.subresources.base_mipmap = dst_tex->base_mipmap; - tb.subresources.mipmap_count = 1; - tb.subresources.base_layer = dst_tex->base_layer; - tb.subresources.layer_count = 1; - - driver->command_pipeline_barrier(command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, stages, {}, {}, tb); - } - } + draw_graph.add_texture_resolve(src_tex->driver_id, src_tex->draw_tracker, dst_tex->driver_id, dst_tex->draw_tracker, src_tex->base_layer, src_tex->base_mipmap, dst_tex->base_layer, dst_tex->base_mipmap); return OK; } -Error RenderingDevice::texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, BitField p_post_barrier) { +Error RenderingDevice::texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers) { _THREAD_SAFE_METHOD_ Texture *src_tex = texture_owner.get_or_null(p_texture); @@ -1939,33 +1548,6 @@ Error RenderingDevice::texture_clear(RID p_texture, const Color &p_color, uint32 ERR_FAIL_COND_V(p_base_mipmap + p_mipmaps > src_tex->mipmaps, ERR_INVALID_PARAMETER); ERR_FAIL_COND_V(p_base_layer + p_layers > src_layer_count, ERR_INVALID_PARAMETER); - RDD::CommandBufferID command_buffer = frames[frame].draw_command_buffer; - - RDD::TextureLayout clear_layout = (src_tex->layout == RDD::TEXTURE_LAYOUT_GENERAL) ? RDD::TEXTURE_LAYOUT_GENERAL : RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL; - - // NOTE: Perhaps the valid stages/accesses for a given owner should be a property of the owner. (Here and places like _get_buffer_from_owner.) - const BitField valid_texture_stages = RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT | RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT | RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT; - constexpr BitField read_access = RDD::BARRIER_ACCESS_SHADER_READ_BIT; - constexpr BitField read_write_access = RDD::BARRIER_ACCESS_SHADER_READ_BIT | RDD::BARRIER_ACCESS_SHADER_WRITE_BIT; - const BitField valid_texture_access = (src_tex->usage_flags & TEXTURE_USAGE_STORAGE_BIT) ? read_write_access : read_access; - - // Barrier from previous access with optional layout change (see clear_layout logic above). - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - RDD::TextureBarrier tb; - tb.texture = src_tex->driver_id; - tb.src_access = valid_texture_access; - tb.dst_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - tb.prev_layout = src_tex->layout; - tb.next_layout = clear_layout; - tb.subresources.aspect = src_tex->read_aspect_flags; - tb.subresources.base_mipmap = src_tex->base_mipmap + p_base_mipmap; - tb.subresources.mipmap_count = p_mipmaps; - tb.subresources.base_layer = src_tex->base_layer + p_base_layer; - tb.subresources.layer_count = p_layers; - - driver->command_pipeline_barrier(command_buffer, valid_texture_stages, RDD::PIPELINE_STAGE_TRANSFER_BIT, {}, {}, tb); - } - RDD::TextureSubresourceRange range; range.aspect = src_tex->read_aspect_flags; range.base_mipmap = src_tex->base_mipmap + p_base_mipmap; @@ -1973,55 +1555,13 @@ Error RenderingDevice::texture_clear(RID p_texture, const Color &p_color, uint32 range.base_layer = src_tex->base_layer + p_base_layer; range.layer_count = p_layers; - driver->command_clear_color_texture(command_buffer, src_tex->driver_id, clear_layout, p_color, range); - - // Barrier to post clear accesses (changing back the layout if needed). - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - BitField stages; - BitField access; - if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { - stages.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) { - stages.set_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) { - stages.set_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { - stages.set_flag(RDD::PIPELINE_STAGE_TRANSFER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT); - } - - if (stages.is_empty()) { - stages.set_flag(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); - } - - RDD::TextureBarrier tb; - tb.texture = src_tex->driver_id; - tb.src_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - tb.dst_access = access; - tb.prev_layout = clear_layout; - tb.next_layout = src_tex->layout; - tb.subresources.aspect = src_tex->read_aspect_flags; - tb.subresources.base_mipmap = src_tex->base_mipmap + p_base_mipmap; - tb.subresources.mipmap_count = p_mipmaps; - tb.subresources.base_layer = src_tex->base_layer + p_base_layer; - tb.subresources.layer_count = p_layers; - - driver->command_pipeline_barrier(command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, stages, {}, {}, tb); - - if (src_tex->used_in_frame != frames_drawn) { - src_tex->used_in_raster = false; - src_tex->used_in_compute = false; - src_tex->used_in_frame = frames_drawn; - } - src_tex->used_in_transfer = true; + if (_texture_make_mutable(src_tex, p_texture)) { + // The texture must be mutable to be used as a clear destination. + draw_graph.add_synchronization(); } + draw_graph.add_texture_clear(src_tex->driver_id, src_tex->draw_tracker, p_color, range); + return OK; } @@ -2040,6 +1580,30 @@ bool RenderingDevice::texture_is_format_supported_for_usage(DataFormat p_format, /**** FRAMEBUFFER ****/ /*********************/ +static RDD::AttachmentLoadOp initial_action_to_load_op(RenderingDevice::InitialAction p_action) { + switch (p_action) { + case RenderingDevice::INITIAL_ACTION_LOAD: + return RDD::ATTACHMENT_LOAD_OP_LOAD; + case RenderingDevice::INITIAL_ACTION_CLEAR: + return RDD::ATTACHMENT_LOAD_OP_CLEAR; + case RenderingDevice::INITIAL_ACTION_DISCARD: + return RDD::ATTACHMENT_LOAD_OP_DONT_CARE; + default: + ERR_FAIL_V_MSG(RDD::ATTACHMENT_LOAD_OP_DONT_CARE, "Invalid initial action value (" + itos(p_action) + ")"); + } +} + +static RDD::AttachmentStoreOp final_action_to_store_op(RenderingDevice::FinalAction p_action) { + switch (p_action) { + case RenderingDevice::FINAL_ACTION_STORE: + return RDD::ATTACHMENT_STORE_OP_STORE; + case RenderingDevice::FINAL_ACTION_DISCARD: + return RDD::ATTACHMENT_STORE_OP_DONT_CARE; + default: + ERR_FAIL_V_MSG(RDD::ATTACHMENT_STORE_OP_DONT_CARE, "Invalid final action value (" + itos(p_action) + ")"); + } +} + RDD::RenderPassID RenderingDevice::_render_pass_create(const Vector &p_attachments, const Vector &p_passes, InitialAction p_initial_action, FinalAction p_final_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, uint32_t p_view_count, Vector *r_samples) { // NOTE: // Before the refactor to RenderingDevice-RenderingDeviceDriver, there was commented out code to @@ -2077,209 +1641,40 @@ RDD::RenderPassID RenderingDevice::_render_pass_create(const Vector samples; - RDD::RenderPassID render_pass = _render_pass_create(p_attachments, p_passes, INITIAL_ACTION_CLEAR, FINAL_ACTION_READ, INITIAL_ACTION_CLEAR, FINAL_ACTION_READ, p_view_count, &samples); // Actions don't matter for this use case. + RDD::RenderPassID render_pass = _render_pass_create(p_attachments, p_passes, INITIAL_ACTION_CLEAR, FINAL_ACTION_STORE, INITIAL_ACTION_CLEAR, FINAL_ACTION_STORE, p_view_count, &samples); // Actions don't matter for this use case. if (!render_pass) { // Was likely invalid. return INVALID_ID; @@ -2701,18 +2096,14 @@ RID RenderingDevice::vertex_buffer_create(uint32_t p_size_bytes, const Vectorbuffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); ERR_FAIL_COND_V(!buffer.driver_id, RID()); + // Vertex buffers are assumed to be immutable unless they don't have initial data or they've been marked for storage explicitly. + if (p_data.is_empty() || p_use_as_storage) { + buffer.draw_tracker = RDG::resource_tracker_create(); + buffer.draw_tracker->buffer_driver_id = buffer.driver_id; + } + if (p_data.size()) { - uint64_t data_size = p_data.size(); - const uint8_t *r = p_data.ptr(); - _buffer_update(&buffer, 0, r, data_size); - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - RDD::BufferBarrier bb; - bb.buffer = buffer.driver_id; - bb.src_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - bb.dst_access = RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; - bb.size = data_size; - driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT, {}, bb, {}); - } + _buffer_update(&buffer, RID(), 0, p_data.ptr(), p_data.size()); } buffer_memory += buffer.size; @@ -2809,6 +2200,12 @@ RID RenderingDevice::vertex_array_create(uint32_t p_vertex_count, VertexFormatID } vertex_array.buffers.push_back(buffer->driver_id); + + if (buffer->draw_tracker != nullptr) { + vertex_array.draw_trackers.push_back(buffer->draw_tracker); + } else { + vertex_array.untracked_buffers.insert(p_src_buffers[i]); + } } RID id = vertex_array_owner.make_rid(vertex_array); @@ -2863,18 +2260,14 @@ RID RenderingDevice::index_buffer_create(uint32_t p_index_count, IndexBufferForm index_buffer.driver_id = driver->buffer_create(index_buffer.size, index_buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); ERR_FAIL_COND_V(!index_buffer.driver_id, RID()); + // Index buffers are assumed to be immutable unless they don't have initial data. + if (p_data.is_empty()) { + index_buffer.draw_tracker = RDG::resource_tracker_create(); + index_buffer.draw_tracker->buffer_driver_id = index_buffer.driver_id; + } + if (p_data.size()) { - uint64_t data_size = p_data.size(); - const uint8_t *r = p_data.ptr(); - _buffer_update(&index_buffer, 0, r, data_size); - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - RDD::BufferBarrier bb; - bb.buffer = index_buffer.driver_id; - bb.src_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - bb.dst_access = RDD::BARRIER_ACCESS_INDEX_READ_BIT; - bb.size = data_size; - driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT, {}, bb, {}); - } + _buffer_update(&index_buffer, RID(), 0, p_data.ptr(), p_data.size()); } buffer_memory += index_buffer.size; @@ -2899,6 +2292,7 @@ RID RenderingDevice::index_array_create(RID p_index_buffer, uint32_t p_index_off IndexArray index_array; index_array.max_index = index_buffer->max_index; index_array.driver_id = index_buffer->driver_id; + index_array.draw_tracker = index_buffer->draw_tracker; index_array.offset = p_index_offset; index_array.indices = p_index_count; index_array.format = index_buffer->format; @@ -2991,6 +2385,29 @@ RID RenderingDevice::shader_create_from_bytecode(const Vector &p_shader shader->set_formats.push_back(format); } + for (ShaderStage stage : shader_desc.stages) { + switch (stage) { + case SHADER_STAGE_VERTEX: + shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT); + break; + case SHADER_STAGE_FRAGMENT: + shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + break; + case SHADER_STAGE_TESSELATION_CONTROL: + shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT); + break; + case SHADER_STAGE_TESSELATION_EVALUATION: + shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT); + break; + case SHADER_STAGE_COMPUTE: + shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); + break; + default: + DEV_ASSERT(false && "Unknown shader stage."); + break; + } + } + #ifdef DEV_ENABLED set_resource_name(id, "RID:" + itos(id.get_id())); #endif @@ -3025,18 +2442,14 @@ RID RenderingDevice::uniform_buffer_create(uint32_t p_size_bytes, const Vectorbuffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU); ERR_FAIL_COND_V(!buffer.driver_id, RID()); + // Uniform buffers are assumed to be immutable unless they don't have initial data. + if (p_data.is_empty()) { + buffer.draw_tracker = RDG::resource_tracker_create(); + buffer.draw_tracker->buffer_driver_id = buffer.driver_id; + } + if (p_data.size()) { - uint64_t data_size = p_data.size(); - const uint8_t *r = p_data.ptr(); - _buffer_update(&buffer, 0, r, data_size); - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - RDD::BufferBarrier bb; - bb.buffer = buffer.driver_id; - bb.src_access = RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; - bb.dst_access = RDD::BARRIER_ACCESS_UNIFORM_READ_BIT; - bb.size = data_size; - driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_TRANSFER_BIT, RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT | RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT | RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT, {}, bb, {}); - } + _buffer_update(&buffer, RID(), 0, p_data.ptr(), p_data.size()); } buffer_memory += buffer.size; @@ -3073,8 +2486,9 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p // Used for verification to make sure a uniform set does not use a framebuffer bound texture. LocalVector attachable_textures; - Vector mutable_sampled_textures; - Vector mutable_storage_textures; + Vector draw_trackers; + Vector draw_trackers_usage; + HashMap untracked_usage; for (uint32_t i = 0; i < set_uniform_count; i++) { const ShaderUniform &set_uniform = set_uniforms[i]; @@ -3126,7 +2540,8 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p RDD::SamplerID *sampler_driver_id = sampler_owner.get_or_null(uniform.get_id(j + 0)); ERR_FAIL_COND_V_MSG(!sampler_driver_id, RID(), "SamplerBuffer (binding: " + itos(uniform.binding) + ", index " + itos(j + 1) + ") is not a valid sampler."); - Texture *texture = texture_owner.get_or_null(uniform.get_id(j + 1)); + RID texture_id = uniform.get_id(j + 1); + Texture *texture = texture_owner.get_or_null(texture_id); ERR_FAIL_NULL_V_MSG(texture, RID(), "Texture (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture."); ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT), RID(), @@ -3139,8 +2554,11 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p attachable_textures.push_back(attachable_texture); } - if ((texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT)) { - mutable_sampled_textures.push_back(texture); + if (texture->draw_tracker != nullptr) { + draw_trackers.push_back(texture->draw_tracker); + draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_TEXTURE_SAMPLE); + } else { + untracked_usage[texture_id] = RDG::RESOURCE_USAGE_TEXTURE_SAMPLE; } DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner)); @@ -3159,7 +2577,8 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p } for (uint32_t j = 0; j < uniform.get_id_count(); j++) { - Texture *texture = texture_owner.get_or_null(uniform.get_id(j)); + RID texture_id = uniform.get_id(j); + Texture *texture = texture_owner.get_or_null(texture_id); ERR_FAIL_NULL_V_MSG(texture, RID(), "Texture (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture."); ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT), RID(), @@ -3172,8 +2591,11 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p attachable_textures.push_back(attachable_texture); } - if ((texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT)) { - mutable_sampled_textures.push_back(texture); + if (texture->draw_tracker != nullptr) { + draw_trackers.push_back(texture->draw_tracker); + draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_TEXTURE_SAMPLE); + } else { + untracked_usage[texture_id] = RDG::RESOURCE_USAGE_TEXTURE_SAMPLE; } DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner)); @@ -3191,7 +2613,8 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p } for (uint32_t j = 0; j < uniform.get_id_count(); j++) { - Texture *texture = texture_owner.get_or_null(uniform.get_id(j)); + RID texture_id = uniform.get_id(j); + Texture *texture = texture_owner.get_or_null(texture_id); ERR_FAIL_NULL_V_MSG(texture, RID(), "Image (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture."); @@ -3199,8 +2622,19 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT), RID(), "Image (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") needs the TEXTURE_USAGE_STORAGE_BIT usage flag set in order to be used as uniform."); - if ((texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT)) { - mutable_storage_textures.push_back(texture); + if (_texture_make_mutable(texture, texture_id)) { + // The texture must be mutable as a layout transition will be required. + draw_graph.add_synchronization(); + } + + if (texture->draw_tracker != nullptr) { + draw_trackers.push_back(texture->draw_tracker); + + if (set_uniform.writable) { + draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE); + } else { + draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_STORAGE_IMAGE_READ); + } } DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner)); @@ -3218,9 +2652,27 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p } for (uint32_t j = 0; j < uniform.get_id_count(); j++) { - Buffer *buffer = texture_buffer_owner.get_or_null(uniform.get_id(j)); + RID buffer_id = uniform.get_id(j); + Buffer *buffer = texture_buffer_owner.get_or_null(buffer_id); ERR_FAIL_NULL_V_MSG(buffer, RID(), "Texture Buffer (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture buffer."); + if (set_uniform.writable && _buffer_make_mutable(buffer, buffer_id)) { + // The buffer must be mutable if it's used for writing. + draw_graph.add_synchronization(); + } + + if (buffer->draw_tracker != nullptr) { + draw_trackers.push_back(buffer->draw_tracker); + + if (set_uniform.writable) { + draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_TEXTURE_BUFFER_READ_WRITE); + } else { + draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_TEXTURE_BUFFER_READ); + } + } else { + untracked_usage[buffer_id] = RDG::RESOURCE_USAGE_TEXTURE_BUFFER_READ; + } + driver_uniform.ids.push_back(buffer->driver_id); } } break; @@ -3237,9 +2689,17 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p RDD::SamplerID *sampler_driver_id = sampler_owner.get_or_null(uniform.get_id(j + 0)); ERR_FAIL_COND_V_MSG(!sampler_driver_id, RID(), "SamplerBuffer (binding: " + itos(uniform.binding) + ", index " + itos(j + 1) + ") is not a valid sampler."); - Buffer *buffer = texture_buffer_owner.get_or_null(uniform.get_id(j + 1)); + RID buffer_id = uniform.get_id(j + 1); + Buffer *buffer = texture_buffer_owner.get_or_null(buffer_id); ERR_FAIL_NULL_V_MSG(buffer, RID(), "SamplerBuffer (binding: " + itos(uniform.binding) + ", index " + itos(j + 1) + ") is not a valid texture buffer."); + if (buffer->draw_tracker != nullptr) { + draw_trackers.push_back(buffer->draw_tracker); + draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_TEXTURE_BUFFER_READ); + } else { + untracked_usage[buffer_id] = RDG::RESOURCE_USAGE_TEXTURE_BUFFER_READ; + } + driver_uniform.ids.push_back(*sampler_driver_id); driver_uniform.ids.push_back(buffer->driver_id); } @@ -3251,12 +2711,20 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p ERR_FAIL_COND_V_MSG(uniform.get_id_count() != 1, RID(), "Uniform buffer supplied (binding: " + itos(uniform.binding) + ") must provide one ID (" + itos(uniform.get_id_count()) + " provided)."); - Buffer *buffer = uniform_buffer_owner.get_or_null(uniform.get_id(0)); + RID buffer_id = uniform.get_id(0); + Buffer *buffer = uniform_buffer_owner.get_or_null(buffer_id); ERR_FAIL_NULL_V_MSG(buffer, RID(), "Uniform buffer supplied (binding: " + itos(uniform.binding) + ") is invalid."); ERR_FAIL_COND_V_MSG(buffer->size < (uint32_t)set_uniform.length, RID(), "Uniform buffer supplied (binding: " + itos(uniform.binding) + ") size (" + itos(buffer->size) + " is smaller than size of shader uniform: (" + itos(set_uniform.length) + ")."); + if (buffer->draw_tracker != nullptr) { + draw_trackers.push_back(buffer->draw_tracker); + draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_UNIFORM_BUFFER_READ); + } else { + untracked_usage[buffer_id] = RDG::RESOURCE_USAGE_UNIFORM_BUFFER_READ; + } + driver_uniform.ids.push_back(buffer->driver_id); } break; case UNIFORM_TYPE_STORAGE_BUFFER: { @@ -3265,10 +2733,11 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p Buffer *buffer = nullptr; - if (storage_buffer_owner.owns(uniform.get_id(0))) { - buffer = storage_buffer_owner.get_or_null(uniform.get_id(0)); - } else if (vertex_buffer_owner.owns(uniform.get_id(0))) { - buffer = vertex_buffer_owner.get_or_null(uniform.get_id(0)); + RID buffer_id = uniform.get_id(0); + if (storage_buffer_owner.owns(buffer_id)) { + buffer = storage_buffer_owner.get_or_null(buffer_id); + } else if (vertex_buffer_owner.owns(buffer_id)) { + buffer = vertex_buffer_owner.get_or_null(buffer_id); ERR_FAIL_COND_V_MSG(!(buffer->usage.has_flag(RDD::BUFFER_USAGE_STORAGE_BIT)), RID(), "Vertex buffer supplied (binding: " + itos(uniform.binding) + ") was not created with storage flag."); } @@ -3278,6 +2747,23 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p ERR_FAIL_COND_V_MSG(set_uniform.length > 0 && buffer->size != (uint32_t)set_uniform.length, RID(), "Storage buffer supplied (binding: " + itos(uniform.binding) + ") size (" + itos(buffer->size) + " does not match size of shader uniform: (" + itos(set_uniform.length) + ")."); + if (set_uniform.writable && _buffer_make_mutable(buffer, buffer_id)) { + // The buffer must be mutable if it's used for writing. + draw_graph.add_synchronization(); + } + + if (buffer->draw_tracker != nullptr) { + draw_trackers.push_back(buffer->draw_tracker); + + if (set_uniform.writable) { + draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_STORAGE_BUFFER_READ_WRITE); + } else { + draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_STORAGE_BUFFER_READ); + } + } else { + untracked_usage[buffer_id] = RDG::RESOURCE_USAGE_STORAGE_BUFFER_READ; + } + driver_uniform.ids.push_back(buffer->driver_id); } break; case UNIFORM_TYPE_INPUT_ATTACHMENT: { @@ -3292,7 +2778,8 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p } for (uint32_t j = 0; j < uniform.get_id_count(); j++) { - Texture *texture = texture_owner.get_or_null(uniform.get_id(j)); + RID texture_id = uniform.get_id(j); + Texture *texture = texture_owner.get_or_null(texture_id); ERR_FAIL_NULL_V_MSG(texture, RID(), "InputAttachment (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture."); @@ -3302,6 +2789,17 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner)); + if (_texture_make_mutable(texture, texture_id)) { + // The texture must be mutable as a layout transition will be required. + draw_graph.add_synchronization(); + } + + if (texture->draw_tracker != nullptr) { + bool depth_stencil_read = (texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); + draw_trackers.push_back(texture->draw_tracker); + draw_trackers_usage.push_back(depth_stencil_read ? RDG::RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ : RDG::RESOURCE_USAGE_ATTACHMENT_COLOR_READ); + } + driver_uniform.ids.push_back(texture->driver_id); } } break; @@ -3317,8 +2815,9 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p uniform_set.driver_id = driver_uniform_set; uniform_set.format = shader->set_formats[p_shader_set]; uniform_set.attachable_textures = attachable_textures; - uniform_set.mutable_sampled_textures = mutable_sampled_textures; - uniform_set.mutable_storage_textures = mutable_storage_textures; + uniform_set.draw_trackers = draw_trackers; + uniform_set.draw_trackers_usage = draw_trackers_usage; + uniform_set.untracked_usage = untracked_usage; uniform_set.shader_set = p_shader_set; uniform_set.shader_id = p_shader; @@ -3486,6 +2985,7 @@ RID RenderingDevice::render_pipeline_create(RID p_shader, FramebufferFormatID p_ pipeline.shader_layout_hash = shader->layout_hash; pipeline.set_formats = shader->set_formats; pipeline.push_constant_size = shader->push_constant_size; + pipeline.stage_bits = shader->stage_bits; #ifdef DEBUG_ENABLED pipeline.validation.dynamic_state = p_dynamic_state_flags; @@ -3623,15 +3123,13 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin_for_screen(DisplayS ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time."); - RDD::CommandBufferID command_buffer = frames[frame].draw_command_buffer; - if (!context->window_is_valid_swapchain(p_screen)) { return INVALID_ID; } - Size2i size = Size2i(context->window_get_width(p_screen), context->window_get_height(p_screen)); + Rect2i viewport = Rect2i(0, 0, context->window_get_width(p_screen), context->window_get_height(p_screen)); - _draw_list_allocate(Rect2i(Vector2i(), size), 0, 0); + _draw_list_allocate(viewport, 0); #ifdef DEBUG_ENABLED draw_list_framebuffer_format = screen_get_framebuffer_format(); #endif @@ -3639,16 +3137,11 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin_for_screen(DisplayS RDD::RenderPassClearValue clear_value; clear_value.color = p_clear_color; - driver->command_begin_render_pass( - command_buffer, - context->window_get_render_pass(p_screen), - context->window_get_framebuffer(p_screen), - RDD::COMMAND_BUFFER_TYPE_PRIMARY, - Rect2i(0, 0, size.width, size.height), - VectorView(&clear_value, 1)); - driver->command_render_set_viewport(command_buffer, Rect2i(Point2i(), size)); - driver->command_render_set_scissor(command_buffer, Rect2i(Point2i(), size)); + draw_graph.add_draw_list_begin(context->window_get_render_pass(p_screen), context->window_get_framebuffer(p_screen), viewport, clear_value, true, false); + + _draw_list_set_viewport(viewport); + _draw_list_set_scissor(viewport); return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT; } @@ -3694,8 +3187,12 @@ Error RenderingDevice::_draw_list_setup_framebuffer(Framebuffer *p_framebuffer, return OK; } -Error RenderingDevice::_draw_list_render_pass_begin(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_colors, float p_clear_depth, uint32_t p_clear_stencil, Point2i p_viewport_offset, Point2i p_viewport_size, RDD::FramebufferID p_framebuffer_driver_id, RDD::RenderPassID p_render_pass, RDD::CommandBufferID p_command_buffer, RDD::CommandBufferType p_cmd_buffer_mode, const Vector &p_storage_textures, bool p_constrained_to_region) { +Error RenderingDevice::_draw_list_render_pass_begin(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_colors, float p_clear_depth, uint32_t p_clear_stencil, Point2i p_viewport_offset, Point2i p_viewport_size, RDD::FramebufferID p_framebuffer_driver_id, RDD::RenderPassID p_render_pass) { LocalVector clear_values; + LocalVector resource_trackers; + LocalVector resource_usages; + bool uses_color = false; + bool uses_depth = false; clear_values.resize(p_framebuffer->texture_ids.size()); int clear_values_count = 0; { @@ -3709,69 +3206,33 @@ Error RenderingDevice::_draw_list_render_pass_begin(Framebuffer *p_framebuffer, continue; } - if (color_index < p_clear_colors.size() && texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { - ERR_FAIL_INDEX_V(color_index, p_clear_colors.size(), ERR_BUG); // A bug. - clear_value.color = p_clear_colors[color_index]; - color_index++; + if (texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { + if (color_index < p_clear_colors.size()) { + ERR_FAIL_INDEX_V(color_index, p_clear_colors.size(), ERR_BUG); // A bug. + clear_value.color = p_clear_colors[color_index]; + color_index++; + } + + resource_trackers.push_back(texture->draw_tracker); + resource_usages.push_back(RDG::RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE); + uses_color = true; } else if (texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { clear_value.depth = p_clear_depth; clear_value.stencil = p_clear_stencil; + resource_trackers.push_back(texture->draw_tracker); + resource_usages.push_back(RDG::RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE); + uses_depth = true; } clear_values[clear_values_count++] = clear_value; } } - for (int i = 0; i < p_storage_textures.size(); i++) { - Texture *texture = texture_owner.get_or_null(p_storage_textures[i]); - if (!texture) { - continue; - } - ERR_CONTINUE_MSG(!(texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT), "Supplied storage texture " + itos(i) + " for draw list is not set to be used for storage."); - - if (texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT) { - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - // Must change layout to general. - RDD::TextureBarrier tb; - tb.texture = texture->driver_id; - tb.src_access = (RDD::BARRIER_ACCESS_SHADER_READ_BIT | RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - tb.dst_access = (RDD::BARRIER_ACCESS_SHADER_READ_BIT | RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - tb.prev_layout = texture->layout; - tb.next_layout = RDD::TEXTURE_LAYOUT_GENERAL; - tb.subresources.aspect = texture->read_aspect_flags; - tb.subresources.base_mipmap = texture->base_mipmap; - tb.subresources.mipmap_count = texture->mipmaps; - tb.subresources.base_layer = texture->base_layer; - tb.subresources.layer_count = texture->layers; - - driver->command_pipeline_barrier(p_command_buffer, RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT | RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT, RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT | RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT, {}, {}, tb); - - texture->layout = RDD::TEXTURE_LAYOUT_GENERAL; - } - - draw_list_storage_textures.push_back(p_storage_textures[i]); - } - } - - Rect2i region; - if (p_constrained_to_region) { - region = Rect2i(p_viewport_offset, p_viewport_size); - } else { - region = Rect2i(Point2i(), p_framebuffer->size); - } - - driver->command_begin_render_pass( - p_command_buffer, - p_render_pass, - p_framebuffer_driver_id, - p_cmd_buffer_mode, - region, - clear_values); + draw_graph.add_draw_list_begin(p_render_pass, p_framebuffer_driver_id, Rect2i(p_viewport_offset, p_viewport_size), clear_values, uses_color, uses_depth); + draw_graph.add_draw_list_usages(resource_trackers, resource_usages); // Mark textures as bound. draw_list_bound_textures.clear(); - draw_list_unbind_color_textures = p_final_color_action != FINAL_ACTION_CONTINUE; - draw_list_unbind_depth_textures = p_final_depth_action != FINAL_ACTION_CONTINUE; for (int i = 0; i < p_framebuffer->texture_ids.size(); i++) { Texture *texture = texture_owner.get_or_null(p_framebuffer->texture_ids[i]); @@ -3785,6 +3246,14 @@ Error RenderingDevice::_draw_list_render_pass_begin(Framebuffer *p_framebuffer, return OK; } +void RenderingDevice::_draw_list_set_viewport(Rect2i p_rect) { + draw_graph.add_draw_list_set_viewport(p_rect); +} + +void RenderingDevice::_draw_list_set_scissor(Rect2i p_rect) { + draw_graph.add_draw_list_set_scissor(p_rect); +} + void RenderingDevice::_draw_list_insert_clear_region(DrawList *p_draw_list, Framebuffer *p_framebuffer, Point2i p_viewport_offset, Point2i p_viewport_size, bool p_clear_color, const Vector &p_clear_colors, bool p_clear_depth, float p_depth, uint32_t p_stencil) { LocalVector clear_attachments; int color_index = 0; @@ -3818,24 +3287,19 @@ void RenderingDevice::_draw_list_insert_clear_region(DrawList *p_draw_list, Fram } Rect2i rect = Rect2i(p_viewport_offset, p_viewport_size); - - driver->command_render_clear_attachments(p_draw_list->command_buffer, clear_attachments, rect); + draw_graph.add_draw_list_clear_attachments(clear_attachments, rect); } -RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector &p_storage_textures) { +RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region) { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); - ERR_FAIL_COND_V_MSG(compute_list != nullptr && !compute_list->state.allow_draw_overlap, INVALID_ID, "Only one draw/compute list can be active at the same time."); Framebuffer *framebuffer = framebuffer_owner.get_or_null(p_framebuffer); ERR_FAIL_NULL_V(framebuffer, INVALID_ID); Point2i viewport_offset; Point2i viewport_size = framebuffer->size; - bool constrained_to_region = false; - bool needs_clear_color = false; - bool needs_clear_depth = false; if (p_region != Rect2() && p_region != Rect2(Vector2(), viewport_size)) { // Check custom region. Rect2i viewport(viewport_offset, viewport_size); @@ -3848,34 +3312,9 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, viewport_offset = regioni.position; viewport_size = regioni.size; - - // If clearing regions both in color and depth, we can switch to a fast path where we let Vulkan to the clears - // and we constrain the render area to the region. - if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION && p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION) { - constrained_to_region = true; - p_initial_color_action = INITIAL_ACTION_CLEAR; - p_initial_depth_action = INITIAL_ACTION_CLEAR; - } else { - if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) { - needs_clear_color = true; - p_initial_color_action = INITIAL_ACTION_CONTINUE; - } - if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) { - needs_clear_depth = true; - p_initial_depth_action = INITIAL_ACTION_CONTINUE; - } - if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION) { - needs_clear_color = true; - p_initial_color_action = INITIAL_ACTION_KEEP; - } - if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION) { - needs_clear_depth = true; - p_initial_depth_action = INITIAL_ACTION_KEEP; - } - } } - if (p_initial_color_action == INITIAL_ACTION_CLEAR || needs_clear_color) { // Check clear values. + if (p_initial_color_action == INITIAL_ACTION_CLEAR) { // Check clear values. int color_count = 0; for (int i = 0; i < framebuffer->texture_ids.size(); i++) { Texture *texture = texture_owner.get_or_null(framebuffer->texture_ids[i]); @@ -3897,8 +3336,7 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, Error err = _draw_list_setup_framebuffer(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, &fb_driver_id, &render_pass, &draw_list_subpass_count); ERR_FAIL_COND_V(err != OK, INVALID_ID); - RDD::CommandBufferID command_buffer = frames[frame].draw_command_buffer; - err = _draw_list_render_pass_begin(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, viewport_offset, viewport_size, fb_driver_id, render_pass, command_buffer, RDD::COMMAND_BUFFER_TYPE_PRIMARY, p_storage_textures, constrained_to_region); + err = _draw_list_render_pass_begin(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, viewport_offset, viewport_size, fb_driver_id, render_pass); if (err != OK) { return INVALID_ID; @@ -3907,135 +3345,23 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, draw_list_render_pass = render_pass; draw_list_vkframebuffer = fb_driver_id; - _draw_list_allocate(Rect2i(viewport_offset, viewport_size), 0, 0); + _draw_list_allocate(Rect2i(viewport_offset, viewport_size), 0); #ifdef DEBUG_ENABLED draw_list_framebuffer_format = framebuffer->format_id; #endif draw_list_current_subpass = 0; - if (needs_clear_color || needs_clear_depth) { - DEV_ASSERT(!constrained_to_region); - _draw_list_insert_clear_region(draw_list, framebuffer, viewport_offset, viewport_size, needs_clear_color, p_clear_color_values, needs_clear_depth, p_clear_depth, p_clear_stencil); - } - - driver->command_render_set_viewport(command_buffer, Rect2i(viewport_offset, viewport_size)); - driver->command_render_set_scissor(command_buffer, Rect2i(viewport_offset, viewport_size)); + _draw_list_set_viewport(Rect2i(viewport_offset, viewport_size)); + _draw_list_set_scissor(Rect2i(viewport_offset, viewport_size)); return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT; } +#ifndef DISABLE_DEPRECATED Error RenderingDevice::draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, DrawListID *r_split_ids, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector &p_storage_textures) { - _THREAD_SAFE_METHOD_ - - ERR_FAIL_COND_V_MSG(draw_list != nullptr, ERR_BUSY, "Only one draw list can be active at the same time."); - ERR_FAIL_COND_V_MSG(compute_list != nullptr && !compute_list->state.allow_draw_overlap, ERR_BUSY, "Only one draw/compute list can be active at the same time."); - - ERR_FAIL_COND_V(p_splits < 1, ERR_INVALID_DECLARATION); - - Framebuffer *framebuffer = framebuffer_owner.get_or_null(p_framebuffer); - ERR_FAIL_NULL_V(framebuffer, ERR_INVALID_DECLARATION); - - Point2i viewport_offset; - Point2i viewport_size = framebuffer->size; - bool constrained_to_region = false; - bool needs_clear_color = false; - bool needs_clear_depth = false; - - if (p_region != Rect2() && p_region != Rect2(Vector2(), viewport_size)) { // Check custom region. - Rect2i viewport(viewport_offset, viewport_size); - Rect2i regioni = p_region; - if (!(regioni.position.x >= viewport.position.x) && (regioni.position.y >= viewport.position.y) && - ((regioni.position.x + regioni.size.x) <= (viewport.position.x + viewport.size.x)) && - ((regioni.position.y + regioni.size.y) <= (viewport.position.y + viewport.size.y))) { - ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "When supplying a custom region, it must be contained within the framebuffer rectangle"); - } - - viewport_offset = regioni.position; - viewport_size = regioni.size; - - // If clearing regions both in color and depth, we can switch to a fast path where we let Vulkan to the clears - // and we constrain the render area to the region. - if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION && p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION) { - constrained_to_region = true; - p_initial_color_action = INITIAL_ACTION_CLEAR; - p_initial_depth_action = INITIAL_ACTION_CLEAR; - } else { - if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) { - needs_clear_color = true; - p_initial_color_action = INITIAL_ACTION_CONTINUE; - } - if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) { - needs_clear_depth = true; - p_initial_depth_action = INITIAL_ACTION_CONTINUE; - } - if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION) { - needs_clear_color = true; - p_initial_color_action = INITIAL_ACTION_KEEP; - } - if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION) { - needs_clear_depth = true; - p_initial_depth_action = INITIAL_ACTION_KEEP; - } - } - } - - if (p_initial_color_action == INITIAL_ACTION_CLEAR || needs_clear_color) { // Check clear values. - - int color_count = 0; - for (int i = 0; i < framebuffer->texture_ids.size(); i++) { - Texture *texture = texture_owner.get_or_null(framebuffer->texture_ids[i]); - - if (!texture || !(texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) { - color_count++; - } - } - - ERR_FAIL_COND_V_MSG(p_clear_color_values.size() != color_count, ERR_INVALID_PARAMETER, - "Clear color values supplied (" + itos(p_clear_color_values.size()) + ") differ from the amount required for framebuffer (" + itos(color_count) + ")."); - } - - RDD::FramebufferID fb_driver_id; - RDD::RenderPassID render_pass; - - Error err = _draw_list_setup_framebuffer(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, &fb_driver_id, &render_pass, &draw_list_subpass_count); - ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); - - RDD::CommandBufferID frame_command_buffer = frames[frame].draw_command_buffer; - err = _draw_list_render_pass_begin(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, viewport_offset, viewport_size, fb_driver_id, render_pass, frame_command_buffer, RDD::COMMAND_BUFFER_TYPE_SECONDARY, p_storage_textures, constrained_to_region); - - if (err != OK) { - return ERR_CANT_CREATE; - } - - draw_list_current_subpass = 0; - -#ifdef DEBUG_ENABLED - draw_list_framebuffer_format = framebuffer->format_id; -#endif - draw_list_render_pass = render_pass; - draw_list_vkframebuffer = fb_driver_id; - - err = _draw_list_allocate(Rect2i(viewport_offset, viewport_size), p_splits, 0); - if (err != OK) { - return err; - } - - if (needs_clear_color || needs_clear_depth) { - DEV_ASSERT(!constrained_to_region); - _draw_list_insert_clear_region(&draw_list[0], framebuffer, viewport_offset, viewport_size, needs_clear_color, p_clear_color_values, needs_clear_depth, p_clear_depth, p_clear_stencil); - } - - bool secondary_viewport_scissor = driver->api_trait_get(RDD::API_TRAIT_SECONDARY_VIEWPORT_SCISSOR); - for (uint32_t i = 0; i < p_splits; i++) { - if (secondary_viewport_scissor) { - driver->command_render_set_viewport(draw_list[i].command_buffer, Rect2i(viewport_offset, viewport_size)); - driver->command_render_set_scissor(draw_list[i].command_buffer, Rect2i(viewport_offset, viewport_size)); - } - r_split_ids[i] = (int64_t(ID_TYPE_SPLIT_DRAW_LIST) << ID_BASE_SHIFT) + i; - } - - return OK; + ERR_FAIL_V_MSG(ERR_UNAVAILABLE, "Deprecated. Split draw lists are used automatically by RenderingDevice."); } +#endif RenderingDevice::DrawList *RenderingDevice::_get_draw_list_ptr(DrawListID p_id) { if (p_id < 0) { @@ -4045,22 +3371,7 @@ RenderingDevice::DrawList *RenderingDevice::_get_draw_list_ptr(DrawListID p_id) if (!draw_list) { return nullptr; } else if (p_id == (int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT)) { - if (draw_list_split) { - return nullptr; - } return draw_list; - } else if (p_id >> DrawListID(ID_BASE_SHIFT) == ID_TYPE_SPLIT_DRAW_LIST) { - if (!draw_list_split) { - return nullptr; - } - - uint64_t index = p_id & ((DrawListID(1) << DrawListID(ID_BASE_SHIFT)) - 1); // Mask. - - if (index >= draw_list_count) { - return nullptr; - } - - return &draw_list[index]; } else { return nullptr; } @@ -4073,7 +3384,7 @@ void RenderingDevice::draw_list_set_blend_constants(DrawListID p_list, const Col ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified."); #endif - driver->command_render_set_blend_constants(dl->command_buffer, p_color); + draw_graph.add_draw_list_set_blend_constants(p_color); } void RenderingDevice::draw_list_bind_render_pipeline(DrawListID p_list, RID p_render_pipeline) { @@ -4095,7 +3406,7 @@ void RenderingDevice::draw_list_bind_render_pipeline(DrawListID p_list, RID p_re dl->state.pipeline = p_render_pipeline; - driver->command_bind_render_pipeline(dl->command_buffer, pipeline->driver_id); + draw_graph.add_draw_list_bind_pipeline(pipeline->driver_id, pipeline->stage_bits); if (dl->state.pipeline_shader != pipeline->shader) { // Shader changed, so descriptor sets may become incompatible. @@ -4183,21 +3494,6 @@ void RenderingDevice::draw_list_bind_uniform_set(DrawListID p_list, RID p_unifor dl->state.sets[p_index].uniform_set_format = uniform_set->format; dl->state.sets[p_index].uniform_set = p_uniform_set; - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - uint32_t mst_count = uniform_set->mutable_storage_textures.size(); - if (mst_count) { - Texture **mst_textures = const_cast(uniform_set)->mutable_storage_textures.ptrw(); - for (uint32_t i = 0; i < mst_count; i++) { - if (mst_textures[i]->used_in_frame != frames_drawn) { - mst_textures[i]->used_in_frame = frames_drawn; - mst_textures[i]->used_in_transfer = false; - mst_textures[i]->used_in_compute = false; - } - mst_textures[i]->used_in_raster = true; - } - } - } - #ifdef DEBUG_ENABLED { // Validate that textures bound are not attached as framebuffer bindings. uint32_t attachable_count = uniform_set->attachable_textures.size(); @@ -4235,7 +3531,12 @@ void RenderingDevice::draw_list_bind_vertex_array(DrawListID p_list, RID p_verte dl->validation.vertex_max_instances_allowed = vertex_array->max_instances_allowed; #endif dl->validation.vertex_array_size = vertex_array->vertex_count; - driver->command_render_bind_vertex_buffers(dl->command_buffer, vertex_array->buffers.size(), vertex_array->buffers.ptr(), vertex_array->offsets.ptr()); + + draw_graph.add_draw_list_bind_vertex_buffers(vertex_array->buffers, vertex_array->offsets); + + for (int i = 0; i < vertex_array->draw_trackers.size(); i++) { + draw_graph.add_draw_list_usage(vertex_array->draw_trackers[i], RDG::RESOURCE_USAGE_VERTEX_BUFFER_READ); + } } void RenderingDevice::draw_list_bind_index_array(DrawListID p_list, RID p_index_array) { @@ -4259,7 +3560,11 @@ void RenderingDevice::draw_list_bind_index_array(DrawListID p_list, RID p_index_ dl->validation.index_array_size = index_array->indices; dl->validation.index_array_offset = index_array->offset; - driver->command_render_bind_index_buffer(dl->command_buffer, index_array->driver_id, index_array->format, index_array->offset); + draw_graph.add_draw_list_bind_index_buffer(index_array->driver_id, index_array->format, index_array->offset); + + if (index_array->draw_tracker != nullptr) { + draw_graph.add_draw_list_usage(index_array->draw_tracker, RDG::RESOURCE_USAGE_INDEX_BUFFER_READ); + } } void RenderingDevice::draw_list_set_line_width(DrawListID p_list, float p_width) { @@ -4269,7 +3574,7 @@ void RenderingDevice::draw_list_set_line_width(DrawListID p_list, float p_width) ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified."); #endif - driver->command_render_set_line_width(dl->command_buffer, p_width); + draw_graph.add_draw_list_set_line_width(p_width); } void RenderingDevice::draw_list_set_push_constant(DrawListID p_list, const void *p_data, uint32_t p_data_size) { @@ -4284,7 +3589,9 @@ void RenderingDevice::draw_list_set_push_constant(DrawListID p_list, const void ERR_FAIL_COND_MSG(p_data_size != dl->validation.pipeline_push_constant_size, "This render pipeline requires (" + itos(dl->validation.pipeline_push_constant_size) + ") bytes of push constant data, supplied: (" + itos(p_data_size) + ")"); #endif - driver->command_bind_push_constants(dl->command_buffer, dl->state.pipeline_shader_driver_id, 0, VectorView((const uint32_t *)p_data, p_data_size / sizeof(uint32_t))); + + draw_graph.add_draw_list_set_push_constant(dl->state.pipeline_shader_driver_id, p_data, p_data_size); + #ifdef DEBUG_ENABLED dl->validation.pipeline_push_constant_supplied = true; #endif @@ -4338,14 +3645,19 @@ void RenderingDevice::draw_list_draw(DrawListID p_list, bool p_use_indices, uint } } #endif - driver->command_uniform_set_prepare_for_use(dl->command_buffer, dl->state.sets[i].uniform_set_driver_id, dl->state.pipeline_shader_driver_id, i); + draw_graph.add_draw_list_uniform_set_prepare_for_use(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i); } for (uint32_t i = 0; i < dl->state.set_count; i++) { if (dl->state.sets[i].pipeline_expected_format == 0) { continue; // Nothing expected by this pipeline. } if (!dl->state.sets[i].bound) { - driver->command_bind_render_uniform_set(dl->command_buffer, dl->state.sets[i].uniform_set_driver_id, dl->state.pipeline_shader_driver_id, i); + // All good, see if this requires re-binding. + draw_graph.add_draw_list_bind_uniform_set(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i); + + UniformSet *uniform_set = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set); + draw_graph.add_draw_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); + dl->state.sets[i].bound = true; } } @@ -4370,7 +3682,8 @@ void RenderingDevice::draw_list_draw(DrawListID p_list, bool p_use_indices, uint ERR_FAIL_COND_MSG((to_draw % dl->validation.pipeline_primitive_divisor) != 0, "Index amount (" + itos(to_draw) + ") must be a multiple of the amount of indices required by the render primitive (" + itos(dl->validation.pipeline_primitive_divisor) + ")."); #endif - driver->command_render_draw_indexed(dl->command_buffer, to_draw, p_instances, dl->validation.index_array_offset, 0, 0); + + draw_graph.add_draw_list_draw_indexed(to_draw, p_instances, dl->validation.index_array_offset); } else { uint32_t to_draw; @@ -4396,7 +3709,7 @@ void RenderingDevice::draw_list_draw(DrawListID p_list, bool p_use_indices, uint "Vertex amount (" + itos(to_draw) + ") must be a multiple of the amount of vertices required by the render primitive (" + itos(dl->validation.pipeline_primitive_divisor) + ")."); #endif - driver->command_render_draw(dl->command_buffer, to_draw, p_instances, 0, 0); + draw_graph.add_draw_list_draw(to_draw, p_instances); } } @@ -4416,7 +3729,7 @@ void RenderingDevice::draw_list_enable_scissor(DrawListID p_list, const Rect2 &p return; } - driver->command_render_set_scissor(dl->command_buffer, rect); + _draw_list_set_scissor(rect); } void RenderingDevice::draw_list_disable_scissor(DrawListID p_list) { @@ -4426,7 +3739,7 @@ void RenderingDevice::draw_list_disable_scissor(DrawListID p_list) { ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified."); #endif - driver->command_render_set_scissor(dl->command_buffer, dl->viewport); + _draw_list_set_scissor(dl->viewport); } uint32_t RenderingDevice::draw_list_get_current_pass() { @@ -4443,230 +3756,80 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_switch_to_next_pass() { Rect2i viewport; _draw_list_free(&viewport); - driver->command_next_render_subpass(frames[frame].draw_command_buffer, RDD::COMMAND_BUFFER_TYPE_PRIMARY); + draw_graph.add_draw_list_next_subpass(RDD::COMMAND_BUFFER_TYPE_PRIMARY); - _draw_list_allocate(viewport, 0, draw_list_current_subpass); + _draw_list_allocate(viewport, draw_list_current_subpass); return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT; } + +#ifndef DISABLE_DEPRECATED Error RenderingDevice::draw_list_switch_to_next_pass_split(uint32_t p_splits, DrawListID *r_split_ids) { - _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V(draw_list == nullptr, ERR_INVALID_PARAMETER); - ERR_FAIL_COND_V(draw_list_current_subpass >= draw_list_subpass_count - 1, ERR_INVALID_PARAMETER); - - draw_list_current_subpass++; - - Rect2i viewport; - _draw_list_free(&viewport); - - driver->command_next_render_subpass(frames[frame].draw_command_buffer, RDD::COMMAND_BUFFER_TYPE_PRIMARY); - - _draw_list_allocate(viewport, p_splits, draw_list_current_subpass); - - for (uint32_t i = 0; i < p_splits; i++) { - r_split_ids[i] = (int64_t(ID_TYPE_SPLIT_DRAW_LIST) << ID_BASE_SHIFT) + i; - } - - return OK; + ERR_FAIL_V_MSG(ERR_UNAVAILABLE, "Deprecated. Split draw lists are used automatically by RenderingDevice."); } +#endif -Error RenderingDevice::_draw_list_allocate(const Rect2i &p_viewport, uint32_t p_splits, uint32_t p_subpass) { +Error RenderingDevice::_draw_list_allocate(const Rect2i &p_viewport, uint32_t p_subpass) { // Lock while draw_list is active. _THREAD_SAFE_LOCK_ - if (p_splits == 0) { - draw_list = memnew(DrawList); - draw_list->command_buffer = frames[frame].draw_command_buffer; - draw_list->viewport = p_viewport; - draw_list_count = 0; - draw_list_split = false; - } else { - if (p_splits > (uint32_t)split_draw_list_allocators.size()) { - uint32_t from = split_draw_list_allocators.size(); - split_draw_list_allocators.resize(p_splits); - for (uint32_t i = from; i < p_splits; i++) { - RDD::CommandPoolID cmd_pool = driver->command_pool_create(RDD::COMMAND_BUFFER_TYPE_SECONDARY); - ERR_FAIL_COND_V(!cmd_pool, ERR_CANT_CREATE); - split_draw_list_allocators.write[i].command_pool = cmd_pool; - - for (int j = 0; j < frame_count; j++) { - RDD::CommandBufferID cmd_buffer = driver->command_buffer_create(RDD::COMMAND_BUFFER_TYPE_SECONDARY, cmd_pool); - ERR_FAIL_COND_V(!cmd_buffer, ERR_CANT_CREATE); - split_draw_list_allocators.write[i].command_buffers.push_back(cmd_buffer); - } - } - } - draw_list = memnew_arr(DrawList, p_splits); - draw_list_count = p_splits; - draw_list_split = true; - - for (uint32_t i = 0; i < p_splits; i++) { - // Take a command buffer and initialize it. - RDD::CommandBufferID cmd_buffer = split_draw_list_allocators[i].command_buffers[frame]; - - bool ok = driver->command_buffer_begin_secondary(cmd_buffer, draw_list_render_pass, p_subpass, draw_list_vkframebuffer); - if (!ok) { - memdelete_arr(draw_list); - draw_list = nullptr; - ERR_FAIL_V(ERR_CANT_CREATE); - } - - draw_list[i].command_buffer = cmd_buffer; - draw_list[i].viewport = p_viewport; - } - } + draw_list = memnew(DrawList); + draw_list->viewport = p_viewport; + draw_list_count = 0; return OK; } void RenderingDevice::_draw_list_free(Rect2i *r_last_viewport) { - if (draw_list_split) { - // Send all command buffers. - RDD::CommandBufferID *command_buffers = (RDD::CommandBufferID *)alloca(sizeof(RDD::CommandBufferID) * draw_list_count); - for (uint32_t i = 0; i < draw_list_count; i++) { - driver->command_buffer_end(draw_list[i].command_buffer); - command_buffers[i] = draw_list[i].command_buffer; - if (r_last_viewport) { - if (i == 0 || draw_list[i].viewport_set) { - *r_last_viewport = draw_list[i].viewport; - } - } - } - - driver->command_buffer_execute_secondary(frames[frame].draw_command_buffer, VectorView(command_buffers, draw_list_count)); - memdelete_arr(draw_list); - draw_list = nullptr; - - } else { - if (r_last_viewport) { - *r_last_viewport = draw_list->viewport; - } - // Just end the list. - memdelete(draw_list); - draw_list = nullptr; + if (r_last_viewport) { + *r_last_viewport = draw_list->viewport; } + // Just end the list. + memdelete(draw_list); + draw_list = nullptr; // Draw_list is no longer active. _THREAD_SAFE_UNLOCK_ } -void RenderingDevice::draw_list_end(BitField p_post_barrier) { +void RenderingDevice::draw_list_end() { _THREAD_SAFE_METHOD_ ERR_FAIL_COND_MSG(!draw_list, "Immediate draw list is already inactive."); - _draw_list_free(); + draw_graph.add_draw_list_end(); - driver->command_end_render_pass(frames[frame].draw_command_buffer); + _draw_list_free(); for (int i = 0; i < draw_list_bound_textures.size(); i++) { Texture *texture = texture_owner.get_or_null(draw_list_bound_textures[i]); ERR_CONTINUE(!texture); // Wtf. - if (draw_list_unbind_color_textures && (texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT)) { + if (texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { texture->bound = false; } - if (draw_list_unbind_depth_textures && (texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) { + if (texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { texture->bound = false; } } + draw_list_bound_textures.clear(); - - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - BitField dst_stages; - BitField dst_access; - if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { - dst_stages.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); - dst_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) { - dst_stages.set_flag(RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT).set_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT); // RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT - dst_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT).set_flag(RDD::BARRIER_ACCESS_INDEX_READ_BIT).set_flag(RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT); // RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT - } - if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) { - dst_stages.set_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT); // RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT - dst_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); // RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT - } - if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { - dst_stages.set_flag(RDD::PIPELINE_STAGE_TRANSFER_BIT); - dst_access.set_flag(RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT).set_flag(RDD::BARRIER_ACCESS_TRANSFER_READ_BIT); - } - - if (dst_stages.is_empty()) { - dst_stages.set_flag(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); - } - - RDD::TextureBarrier *texture_barriers = nullptr; - - uint32_t texture_barrier_count = draw_list_storage_textures.size(); - - if (texture_barrier_count) { - texture_barriers = (RDD::TextureBarrier *)alloca(sizeof(RDD::TextureBarrier) * draw_list_storage_textures.size()); - } - - BitField src_stage(RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | - RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT); - BitField src_access( - RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); - - if (texture_barrier_count) { - src_stage.set_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT).set_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT); - src_access.set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - - for (uint32_t i = 0; i < texture_barrier_count; i++) { - Texture *texture = texture_owner.get_or_null(draw_list_storage_textures[i]); - - RDD::TextureBarrier &tb = texture_barriers[i]; - tb.texture = texture->driver_id; - tb.src_access = src_access; - tb.dst_access = dst_access; - tb.prev_layout = texture->layout; - tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - tb.subresources.aspect = texture->read_aspect_flags; - tb.subresources.base_mipmap = texture->base_mipmap; - tb.subresources.mipmap_count = texture->mipmaps; - tb.subresources.base_layer = texture->base_layer; - tb.subresources.layer_count = texture->layers; - - texture->layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - } - - // To ensure proper synchronization, we must make sure rendering is done before: - // * Some buffer is copied. - // * Another render pass happens (since we may be done). - - RDD::MemoryBarrier mb; - mb.src_access = src_access; - mb.dst_access = dst_access; - - if (texture_barrier_count > 0 || p_post_barrier != BARRIER_MASK_NO_BARRIER) { - driver->command_pipeline_barrier(frames[frame].draw_command_buffer, src_stage, dst_stages, mb, {}, VectorView(texture_barriers, texture_barrier_count)); - } - } - - draw_list_storage_textures.clear(); - -#ifdef FORCE_FULL_BARRIER - _full_barrier(true); -#endif } /***********************/ /**** COMPUTE LISTS ****/ /***********************/ -RenderingDevice::ComputeListID RenderingDevice::compute_list_begin(bool p_allow_draw_overlap) { +RenderingDevice::ComputeListID RenderingDevice::compute_list_begin() { _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V_MSG(!p_allow_draw_overlap && draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time."); // Lock while compute_list is active. _THREAD_SAFE_LOCK_ compute_list = memnew(ComputeList); - compute_list->command_buffer = frames[frame].draw_command_buffer; - compute_list->state.allow_draw_overlap = p_allow_draw_overlap; + + draw_graph.add_compute_list_begin(); return ID_TYPE_COMPUTE_LIST; } @@ -4688,7 +3851,7 @@ void RenderingDevice::compute_list_bind_compute_pipeline(ComputeListID p_list, R cl->state.pipeline = p_compute_pipeline; - driver->command_bind_compute_pipeline(cl->command_buffer, pipeline->driver_id); + draw_graph.add_compute_list_bind_pipeline(pipeline->driver_id); if (cl->state.pipeline_shader != pipeline->shader) { // Shader changed, so descriptor sets may become incompatible. @@ -4779,109 +3942,6 @@ void RenderingDevice::compute_list_bind_uniform_set(ComputeListID p_list, RID p_ cl->state.sets[p_index].uniform_set_format = uniform_set->format; cl->state.sets[p_index].uniform_set = p_uniform_set; - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - uint32_t textures_to_sampled_count = uniform_set->mutable_sampled_textures.size(); - uint32_t textures_to_storage_count = uniform_set->mutable_storage_textures.size(); - - Texture **textures_to_sampled = uniform_set->mutable_sampled_textures.ptrw(); - - RDD::TextureBarrier *texture_barriers = nullptr; - - if (textures_to_sampled_count + textures_to_storage_count) { - texture_barriers = (RDD::TextureBarrier *)alloca(sizeof(RDD::TextureBarrier) * (textures_to_sampled_count + textures_to_storage_count)); - } - uint32_t texture_barrier_count = 0; - - BitField src_stages; - - for (uint32_t i = 0; i < textures_to_sampled_count; i++) { - if (textures_to_sampled[i]->layout != RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { - src_stages.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); - - RDD::TextureBarrier &tb = texture_barriers[texture_barrier_count++]; - tb.texture = textures_to_sampled[i]->driver_id; - tb.src_access = (RDD::BARRIER_ACCESS_SHADER_READ_BIT | RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - tb.dst_access = (RDD::BARRIER_ACCESS_SHADER_READ_BIT | RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - tb.prev_layout = textures_to_sampled[i]->layout; - tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - tb.subresources.aspect = textures_to_sampled[i]->read_aspect_flags; - tb.subresources.base_mipmap = textures_to_sampled[i]->base_mipmap; - tb.subresources.mipmap_count = textures_to_sampled[i]->mipmaps; - tb.subresources.base_layer = textures_to_sampled[i]->base_layer; - tb.subresources.layer_count = textures_to_sampled[i]->layers; - - textures_to_sampled[i]->layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - - cl->state.textures_to_sampled_layout.erase(textures_to_sampled[i]); - } - - if (textures_to_sampled[i]->used_in_frame != frames_drawn) { - textures_to_sampled[i]->used_in_frame = frames_drawn; - textures_to_sampled[i]->used_in_transfer = false; - textures_to_sampled[i]->used_in_raster = false; - } - textures_to_sampled[i]->used_in_compute = true; - } - - Texture **textures_to_storage = uniform_set->mutable_storage_textures.ptrw(); - - for (uint32_t i = 0; i < textures_to_storage_count; i++) { - if (textures_to_storage[i]->layout != RDD::TEXTURE_LAYOUT_GENERAL) { - BitField src_access; - - if (textures_to_storage[i]->used_in_frame == frames_drawn) { - if (textures_to_storage[i]->used_in_compute) { - src_stages.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); - src_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (textures_to_storage[i]->used_in_raster) { - src_stages.set_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT).set_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT); - src_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (textures_to_storage[i]->used_in_transfer) { - src_stages.set_flag(RDD::PIPELINE_STAGE_TRANSFER_BIT); - src_access.set_flag(RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT).set_flag(RDD::BARRIER_ACCESS_TRANSFER_READ_BIT); - } - - textures_to_storage[i]->used_in_compute = false; - textures_to_storage[i]->used_in_raster = false; - textures_to_storage[i]->used_in_transfer = false; - - } else { - src_access.clear(); - textures_to_storage[i]->used_in_compute = false; - textures_to_storage[i]->used_in_raster = false; - textures_to_storage[i]->used_in_transfer = false; - textures_to_storage[i]->used_in_frame = frames_drawn; - } - - RDD::TextureBarrier &tb = texture_barriers[texture_barrier_count++]; - tb.texture = textures_to_storage[i]->driver_id; - tb.src_access = src_access; - tb.dst_access = (RDD::BARRIER_ACCESS_SHADER_READ_BIT | RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - tb.prev_layout = textures_to_storage[i]->layout; - tb.next_layout = RDD::TEXTURE_LAYOUT_GENERAL; - tb.subresources.aspect = textures_to_storage[i]->read_aspect_flags; - tb.subresources.base_mipmap = textures_to_storage[i]->base_mipmap; - tb.subresources.mipmap_count = textures_to_storage[i]->mipmaps; - tb.subresources.base_layer = textures_to_storage[i]->base_layer; - tb.subresources.layer_count = textures_to_storage[i]->layers; - - textures_to_storage[i]->layout = RDD::TEXTURE_LAYOUT_GENERAL; - - cl->state.textures_to_sampled_layout.insert(textures_to_storage[i]); // Needs to go back to sampled layout afterwards. - } - } - - if (texture_barrier_count) { - if (src_stages.is_empty()) { - src_stages.set_flag(RDD::PIPELINE_STAGE_TOP_OF_PIPE_BIT); - } - - driver->command_pipeline_barrier(cl->command_buffer, src_stages, RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT, {}, {}, VectorView(texture_barriers, texture_barrier_count)); - } - } - #if 0 { // Validate that textures bound are not attached as framebuffer bindings. uint32_t attachable_count = uniform_set->attachable_textures.size(); @@ -4901,6 +3961,7 @@ void RenderingDevice::compute_list_bind_uniform_set(ComputeListID p_list, RID p_ void RenderingDevice::compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size) { ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); ERR_FAIL_NULL(compute_list); + ERR_FAIL_COND_MSG(p_data_size > MAX_PUSH_CONSTANT_SIZE, "Push constants can't be bigger than 128 bytes to maintain compatibility."); ComputeList *cl = compute_list; @@ -4912,7 +3973,13 @@ void RenderingDevice::compute_list_set_push_constant(ComputeListID p_list, const ERR_FAIL_COND_MSG(p_data_size != cl->validation.pipeline_push_constant_size, "This compute pipeline requires (" + itos(cl->validation.pipeline_push_constant_size) + ") bytes of push constant data, supplied: (" + itos(p_data_size) + ")"); #endif - driver->command_bind_push_constants(cl->command_buffer, cl->state.pipeline_shader_driver_id, 0, VectorView((const uint32_t *)p_data, p_data_size / sizeof(uint32_t))); + + draw_graph.add_compute_list_set_push_constant(cl->state.pipeline_shader_driver_id, p_data, p_data_size); + + // Store it in the state in case we need to restart the compute list. + memcpy(cl->state.push_constant_data, p_data, p_data_size); + cl->state.push_constant_size = p_data_size; + #ifdef DEBUG_ENABLED cl->validation.pipeline_push_constant_supplied = true; #endif @@ -4970,19 +4037,24 @@ void RenderingDevice::compute_list_dispatch(ComputeListID p_list, uint32_t p_x_g } } #endif - driver->command_uniform_set_prepare_for_use(cl->command_buffer, cl->state.sets[i].uniform_set_driver_id, cl->state.pipeline_shader_driver_id, i); + draw_graph.add_compute_list_uniform_set_prepare_for_use(cl->state.pipeline_shader_driver_id, cl->state.sets[i].uniform_set_driver_id, i); } for (uint32_t i = 0; i < cl->state.set_count; i++) { if (cl->state.sets[i].pipeline_expected_format == 0) { continue; // Nothing expected by this pipeline. } if (!cl->state.sets[i].bound) { - driver->command_bind_compute_uniform_set(cl->command_buffer, cl->state.sets[i].uniform_set_driver_id, cl->state.pipeline_shader_driver_id, i); + // All good, see if this requires re-binding. + draw_graph.add_compute_list_bind_uniform_set(cl->state.pipeline_shader_driver_id, cl->state.sets[i].uniform_set_driver_id, i); + + UniformSet *uniform_set = uniform_set_owner.get_or_null(cl->state.sets[i].uniform_set); + draw_graph.add_compute_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); + cl->state.sets[i].bound = true; } } - driver->command_compute_dispatch(cl->command_buffer, p_x_groups, p_y_groups, p_z_groups); + draw_graph.add_compute_list_dispatch(p_x_groups, p_y_groups, p_z_groups); } void RenderingDevice::compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads) { @@ -5058,106 +4130,56 @@ void RenderingDevice::compute_list_dispatch_indirect(ComputeListID p_list, RID p } } #endif - driver->command_uniform_set_prepare_for_use(cl->command_buffer, cl->state.sets[i].uniform_set_driver_id, cl->state.pipeline_shader_driver_id, i); + draw_graph.add_compute_list_uniform_set_prepare_for_use(cl->state.pipeline_shader_driver_id, cl->state.sets[i].uniform_set_driver_id, i); } for (uint32_t i = 0; i < cl->state.set_count; i++) { if (cl->state.sets[i].pipeline_expected_format == 0) { continue; // Nothing expected by this pipeline. } if (!cl->state.sets[i].bound) { - driver->command_bind_compute_uniform_set(cl->command_buffer, cl->state.sets[i].uniform_set_driver_id, cl->state.pipeline_shader_driver_id, i); + // All good, see if this requires re-binding. + draw_graph.add_compute_list_bind_uniform_set(cl->state.pipeline_shader_driver_id, cl->state.sets[i].uniform_set_driver_id, i); + + UniformSet *uniform_set = uniform_set_owner.get_or_null(cl->state.sets[i].uniform_set); + draw_graph.add_compute_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); + cl->state.sets[i].bound = true; } } - driver->command_compute_dispatch_indirect(cl->command_buffer, buffer->driver_id, p_offset); + draw_graph.add_compute_list_dispatch_indirect(buffer->driver_id, p_offset); + + if (buffer->draw_tracker != nullptr) { + draw_graph.add_compute_list_usage(buffer->draw_tracker, RDG::RESOURCE_USAGE_INDIRECT_BUFFER_READ); + } } void RenderingDevice::compute_list_add_barrier(ComputeListID p_list) { // Must be called within a compute list, the class mutex is locked during that time - BitField stages(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); - BitField access(RDD::BARRIER_ACCESS_SHADER_READ_BIT); - _compute_list_add_barrier(BARRIER_MASK_COMPUTE, stages, access); + compute_list_barrier_state = compute_list->state; + compute_list_end(); + compute_list_begin(); + + if (compute_list_barrier_state.pipeline.is_valid()) { + compute_list_bind_compute_pipeline(p_list, compute_list_barrier_state.pipeline); + } + + for (uint32_t i = 0; i < compute_list_barrier_state.set_count; i++) { + if (compute_list_barrier_state.sets[i].uniform_set.is_valid()) { + compute_list_bind_uniform_set(p_list, compute_list_barrier_state.sets[i].uniform_set, i); + } + } + + if (compute_list_barrier_state.push_constant_size > 0) { + compute_list_set_push_constant(p_list, compute_list_barrier_state.push_constant_data, compute_list_barrier_state.push_constant_size); + } } -void RenderingDevice::_compute_list_add_barrier(BitField p_post_barrier, BitField p_stages, BitField p_access) { +void RenderingDevice::compute_list_end() { ERR_FAIL_NULL(compute_list); - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - RDD::TextureBarrier *texture_barriers = nullptr; - - uint32_t texture_barrier_count = compute_list->state.textures_to_sampled_layout.size(); - - if (texture_barrier_count) { - texture_barriers = (RDD::TextureBarrier *)alloca(sizeof(RDD::TextureBarrier) * texture_barrier_count); - } - - texture_barrier_count = 0; // We'll count how many we end up issuing. - - for (Texture *E : compute_list->state.textures_to_sampled_layout) { - if (E->layout != RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { - RDD::TextureBarrier &tb = texture_barriers[texture_barrier_count++]; - tb.texture = E->driver_id; - tb.src_access = RDD::BARRIER_ACCESS_SHADER_READ_BIT | RDD::BARRIER_ACCESS_SHADER_WRITE_BIT; - tb.dst_access = p_access; - tb.prev_layout = E->layout; - tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - tb.subresources.aspect = E->read_aspect_flags; - tb.subresources.base_mipmap = E->base_mipmap; - tb.subresources.mipmap_count = E->mipmaps; - tb.subresources.base_layer = E->base_layer; - tb.subresources.layer_count = E->layers; - - E->layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - } - - if (E->used_in_frame != frames_drawn) { - E->used_in_transfer = false; - E->used_in_raster = false; - E->used_in_compute = false; - E->used_in_frame = frames_drawn; - } - } - - if (p_stages) { - RDD::MemoryBarrier mb; - mb.src_access = RDD::BARRIER_ACCESS_SHADER_WRITE_BIT; - mb.dst_access = p_access; - driver->command_pipeline_barrier(compute_list->command_buffer, RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT, p_stages, mb, {}, VectorView(texture_barriers, texture_barrier_count)); - - } else if (texture_barrier_count) { - driver->command_pipeline_barrier(compute_list->command_buffer, RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, VectorView(texture_barriers, texture_barrier_count)); - } - } - -#ifdef FORCE_FULL_BARRIER - _full_barrier(true); -#endif -} - -void RenderingDevice::compute_list_end(BitField p_post_barrier) { - ERR_FAIL_NULL(compute_list); - - BitField stages; - BitField access; - if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) { - stages.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) { - stages.set_flag(RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT).set_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT).set_flag(RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT); - access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT).set_flag(RDD::BARRIER_ACCESS_INDEX_READ_BIT).set_flag(RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT).set_flag(RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) { - stages.set_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT).set_flag(RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT); - access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT).set_flag(RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT); - } - if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { - stages.set_flag(RDD::PIPELINE_STAGE_TRANSFER_BIT); - access.set_flag(RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT).set_flag(RDD::BARRIER_ACCESS_TRANSFER_READ_BIT); - } - _compute_list_add_barrier(p_post_barrier, stages, access); + draw_graph.add_compute_list_end(); memdelete(compute_list); compute_list = nullptr; @@ -5166,66 +4188,168 @@ void RenderingDevice::compute_list_end(BitField p_post_barrier) { _THREAD_SAFE_UNLOCK_ } +#ifndef DISABLE_DEPRECATED void RenderingDevice::barrier(BitField p_from, BitField p_to) { - if (!driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - return; - } - - BitField src_stages; - BitField src_access; - - if (p_from == 0) { - src_stages.set_flag(RDD::PIPELINE_STAGE_TOP_OF_PIPE_BIT); - } else { - if (p_from.has_flag(BARRIER_MASK_COMPUTE)) { - src_stages.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); - src_access.set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_from.has_flag(BARRIER_MASK_FRAGMENT)) { - src_stages.set_flag(RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT).set_flag(RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT).set_flag(RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT); - src_access.set_flag(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT).set_flag(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); - } - if (p_from.has_flag(BARRIER_MASK_TRANSFER)) { - src_stages.set_flag(RDD::PIPELINE_STAGE_TRANSFER_BIT); - src_access.set_flag(RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT); - } - } - - BitField dst_stages; - BitField dst_access; - - if (p_to == 0) { - dst_stages.set_flag(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); - } else { - if (p_to.has_flag(BARRIER_MASK_COMPUTE)) { - dst_stages.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); - dst_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); - } - if (p_to.has_flag(BARRIER_MASK_VERTEX)) { - dst_stages.set_flag(RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT).set_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT).set_flag(RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT); - dst_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT).set_flag(RDD::BARRIER_ACCESS_INDEX_READ_BIT).set_flag(RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT).set_flag(RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT); - } - if (p_to.has_flag(BARRIER_MASK_FRAGMENT)) { - dst_stages.set_flag(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT).set_flag(RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT); - dst_access.set_flag(RDD::BARRIER_ACCESS_SHADER_READ_BIT).set_flag(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT).set_flag(RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT); - } - if (p_to.has_flag(BARRIER_MASK_TRANSFER)) { - dst_stages.set_flag(RDD::PIPELINE_STAGE_TRANSFER_BIT); - dst_access.set_flag(RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT).set_flag(RDD::BARRIER_ACCESS_TRANSFER_READ_BIT); - } - } - - RDD::MemoryBarrier mb; - mb.src_access = src_access; - mb.dst_access = dst_access; - driver->command_pipeline_barrier(frames[frame].draw_command_buffer, src_stages, dst_stages, mb, {}, {}); + WARN_PRINT("Deprecated. Barriers are automatically inserted by RenderingDevice."); } void RenderingDevice::full_barrier() { -#ifndef DEBUG_ENABLED - ERR_PRINT("Full barrier is debug-only, should not be used in production"); + WARN_PRINT("Deprecated. Barriers are automatically inserted by RenderingDevice."); +} #endif - _full_barrier(true); + +/***********************/ +/**** COMMAND GRAPH ****/ +/***********************/ + +bool RenderingDevice::_texture_make_mutable(Texture *p_texture, RID p_texture_id) { + if (p_texture->draw_tracker != nullptr) { + // Texture already has a tracker. + return false; + } else { + if (p_texture->owner.is_valid()) { + // Texture has an owner. + Texture *owner_texture = texture_owner.get_or_null(p_texture->owner); + ERR_FAIL_COND_V(!owner_texture, false); + + if (owner_texture->draw_tracker != nullptr) { + // Create a tracker for this dependency in particular. + if (p_texture->slice_type == TEXTURE_SLICE_MAX) { + // Shared texture. + p_texture->draw_tracker = owner_texture->draw_tracker; + p_texture->draw_tracker->reference_count++; + } else { + // Slice texture. + HashMap::ConstIterator draw_tracker_iterator = owner_texture->slice_trackers.find(p_texture->slice_rect); + RDG::ResourceTracker *draw_tracker = nullptr; + if (draw_tracker_iterator != owner_texture->slice_trackers.end()) { + // Reuse the tracker at the matching rectangle. + draw_tracker = draw_tracker_iterator->value; + } else { + // Create a new tracker and store it on the map. + draw_tracker = RDG::resource_tracker_create(); + draw_tracker->parent = owner_texture->draw_tracker; + draw_tracker->texture_driver_id = p_texture->driver_id; + draw_tracker->texture_subresources = p_texture->barrier_range(); + draw_tracker->texture_slice_or_dirty_rect = p_texture->slice_rect; + owner_texture->slice_trackers[p_texture->slice_rect] = draw_tracker; + } + + p_texture->slice_trackers.clear(); + p_texture->draw_tracker = draw_tracker; + p_texture->draw_tracker->reference_count++; + } + + if (p_texture_id.is_valid()) { + _dependencies_make_mutable(p_texture_id, p_texture->draw_tracker); + } + } else { + // Delegate this to the owner instead, as it'll make all its dependencies mutable. + _texture_make_mutable(owner_texture, p_texture->owner); + } + } else { + // Regular texture. + p_texture->draw_tracker = RDG::resource_tracker_create(); + p_texture->draw_tracker->texture_driver_id = p_texture->driver_id; + p_texture->draw_tracker->texture_subresources = p_texture->barrier_range(); + p_texture->draw_tracker->reference_count = 1; + + if (p_texture_id.is_valid()) { + if (p_texture->has_initial_data) { + // If the texture was initialized with initial data but wasn't made mutable from the start, assume the texture sampling usage. + p_texture->draw_tracker->usage = RDG::RESOURCE_USAGE_TEXTURE_SAMPLE; + } + + _dependencies_make_mutable(p_texture_id, p_texture->draw_tracker); + } + } + + return true; + } +} + +bool RenderingDevice::_buffer_make_mutable(Buffer *p_buffer, RID p_buffer_id) { + if (p_buffer->draw_tracker != nullptr) { + // Buffer already has a tracker. + return false; + } else { + // Create a tracker for the buffer and make all its dependencies mutable. + p_buffer->draw_tracker = RDG::resource_tracker_create(); + p_buffer->draw_tracker->buffer_driver_id = p_buffer->driver_id; + if (p_buffer_id.is_valid()) { + _dependencies_make_mutable(p_buffer_id, p_buffer->draw_tracker); + } + + return true; + } +} + +bool RenderingDevice::_vertex_array_make_mutable(VertexArray *p_vertex_array, RID p_resource_id, RDG::ResourceTracker *p_resource_tracker) { + if (!p_vertex_array->untracked_buffers.has(p_resource_id)) { + // Vertex array thinks the buffer is already tracked or does not use it. + return false; + } else { + // Vertex array is aware of the buffer but it isn't being tracked. + p_vertex_array->draw_trackers.push_back(p_resource_tracker); + p_vertex_array->untracked_buffers.erase(p_resource_id); + return true; + } +} + +bool RenderingDevice::_index_array_make_mutable(IndexArray *p_index_array, RDG::ResourceTracker *p_resource_tracker) { + if (p_index_array->draw_tracker != nullptr) { + // Index array already has a tracker. + return false; + } else { + // Index array should assign the tracker from the buffer. + p_index_array->draw_tracker = p_resource_tracker; + return true; + } +} + +bool RenderingDevice::_uniform_set_make_mutable(UniformSet *p_uniform_set, RID p_resource_id, RDG::ResourceTracker *p_resource_tracker) { + HashMap::Iterator E = p_uniform_set->untracked_usage.find(p_resource_id); + if (!E) { + // Uniform set thinks the resource is already tracked or does not use it. + return false; + } else { + // Uniform set has seen the resource but hasn't added its tracker yet. + p_uniform_set->draw_trackers.push_back(p_resource_tracker); + p_uniform_set->draw_trackers_usage.push_back(E->value); + p_uniform_set->untracked_usage.remove(E); + return true; + } +} + +bool RenderingDevice::_dependency_make_mutable(RID p_id, RID p_resource_id, RDG::ResourceTracker *p_resource_tracker) { + if (texture_owner.owns(p_id)) { + Texture *texture = texture_owner.get_or_null(p_id); + return _texture_make_mutable(texture, p_id); + } else if (vertex_array_owner.owns(p_id)) { + VertexArray *vertex_array = vertex_array_owner.get_or_null(p_id); + return _vertex_array_make_mutable(vertex_array, p_resource_id, p_resource_tracker); + } else if (index_array_owner.owns(p_id)) { + IndexArray *index_array = index_array_owner.get_or_null(p_id); + return _index_array_make_mutable(index_array, p_resource_tracker); + } else if (uniform_set_owner.owns(p_id)) { + UniformSet *uniform_set = uniform_set_owner.get_or_null(p_id); + return _uniform_set_make_mutable(uniform_set, p_resource_id, p_resource_tracker); + } else { + DEV_ASSERT(false && "Unknown resource type to make mutable."); + return false; + } +} + +bool RenderingDevice::_dependencies_make_mutable(RID p_id, RDG::ResourceTracker *p_resource_tracker) { + bool made_mutable = false; + HashMap>::Iterator E = dependency_map.find(p_id); + if (E) { + for (RID rid : E->value) { + made_mutable = _dependency_make_mutable(rid, p_id, p_resource_tracker) || made_mutable; + } + } + + return made_mutable; } /**************************/ @@ -5251,6 +4375,22 @@ void RenderingDevice::_free_internal(RID p_id) { // Push everything so it's disposed of next time this frame index is processed (means, it's safe to do it). if (texture_owner.owns(p_id)) { Texture *texture = texture_owner.get_or_null(p_id); + RDG::ResourceTracker *draw_tracker = texture->draw_tracker; + if (draw_tracker != nullptr) { + draw_tracker->reference_count--; + if (draw_tracker->reference_count == 0) { + RDG::resource_tracker_free(draw_tracker); + + if (texture->owner.is_valid() && (texture->slice_type != TEXTURE_SLICE_MAX)) { + // If this was a texture slice, erase the tracker from the map. + Texture *owner_texture = texture_owner.get_or_null(texture->owner); + if (owner_texture != nullptr) { + owner_texture->slice_trackers.erase(texture->slice_rect); + } + } + } + } + frames[frame].textures_to_dispose_of.push_back(*texture); texture_owner.free(p_id); } else if (framebuffer_owner.owns(p_id)) { @@ -5268,12 +4408,14 @@ void RenderingDevice::_free_internal(RID p_id) { sampler_owner.free(p_id); } else if (vertex_buffer_owner.owns(p_id)) { Buffer *vertex_buffer = vertex_buffer_owner.get_or_null(p_id); + RDG::resource_tracker_free(vertex_buffer->draw_tracker); frames[frame].buffers_to_dispose_of.push_back(*vertex_buffer); vertex_buffer_owner.free(p_id); } else if (vertex_array_owner.owns(p_id)) { vertex_array_owner.free(p_id); } else if (index_buffer_owner.owns(p_id)) { IndexBuffer *index_buffer = index_buffer_owner.get_or_null(p_id); + RDG::resource_tracker_free(index_buffer->draw_tracker); frames[frame].buffers_to_dispose_of.push_back(*index_buffer); index_buffer_owner.free(p_id); } else if (index_array_owner.owns(p_id)) { @@ -5286,14 +4428,17 @@ void RenderingDevice::_free_internal(RID p_id) { shader_owner.free(p_id); } else if (uniform_buffer_owner.owns(p_id)) { Buffer *uniform_buffer = uniform_buffer_owner.get_or_null(p_id); + RDG::resource_tracker_free(uniform_buffer->draw_tracker); frames[frame].buffers_to_dispose_of.push_back(*uniform_buffer); uniform_buffer_owner.free(p_id); } else if (texture_buffer_owner.owns(p_id)) { Buffer *texture_buffer = texture_buffer_owner.get_or_null(p_id); + RDG::resource_tracker_free(texture_buffer->draw_tracker); frames[frame].buffers_to_dispose_of.push_back(*texture_buffer); texture_buffer_owner.free(p_id); } else if (storage_buffer_owner.owns(p_id)) { Buffer *storage_buffer = storage_buffer_owner.get_or_null(p_id); + RDG::resource_tracker_free(storage_buffer->draw_tracker); frames[frame].buffers_to_dispose_of.push_back(*storage_buffer); storage_buffer_owner.free(p_id); } else if (uniform_set_owner.owns(p_id)) { @@ -5370,18 +4515,21 @@ void RenderingDevice::set_resource_name(RID p_id, const String &p_name) { } void RenderingDevice::draw_command_begin_label(String p_label_name, const Color &p_color) { - _THREAD_SAFE_METHOD_ - context->command_begin_label(frames[frame].draw_command_buffer, p_label_name, p_color); + if (!context->is_debug_utils_enabled()) { + return; + } + + draw_graph.begin_label(p_label_name, p_color); } +#ifndef DISABLE_DEPRECATED void RenderingDevice::draw_command_insert_label(String p_label_name, const Color &p_color) { - _THREAD_SAFE_METHOD_ - context->command_insert_label(frames[frame].draw_command_buffer, p_label_name, p_color); + WARN_PRINT("Deprecated. Inserting labels no longer applies due to command reordering."); } +#endif void RenderingDevice::draw_command_end_label() { - _THREAD_SAFE_METHOD_ - context->command_end_label(frames[frame].draw_command_buffer); + draw_graph.end_label(); } String RenderingDevice::get_device_vendor_name() const { @@ -5404,7 +4552,7 @@ String RenderingDevice::get_device_pipeline_cache_uuid() const { return context->get_device_pipeline_cache_uuid(); } -void RenderingDevice::_finalize_command_bufers() { +void RenderingDevice::_finalize_command_buffers(bool p_postpare) { if (draw_list) { ERR_PRINT("Found open draw list at the end of the frame, this should never happen (further drawing will likely not work)."); } @@ -5413,7 +4561,13 @@ void RenderingDevice::_finalize_command_bufers() { ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work)."); } - { + { // Complete the setup buffer (that needs to be processed before anything else). + draw_graph.end(frames[frame].draw_command_buffer, RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS); + + if (p_postpare) { + context->postpare_buffers(frames[frame].draw_command_buffer); + } + driver->end_segment(); driver->command_buffer_end(frames[frame].setup_command_buffer); driver->command_buffer_end(frames[frame].draw_command_buffer); @@ -5421,6 +4575,8 @@ void RenderingDevice::_finalize_command_bufers() { } void RenderingDevice::_begin_frame() { + draw_graph.begin(); + // Erase pending resources. _free_pending_resources(frame); @@ -5464,8 +4620,7 @@ void RenderingDevice::swap_buffers() { ERR_FAIL_COND_MSG(local_device.is_valid(), "Local devices can't swap buffers."); _THREAD_SAFE_METHOD_ - context->postpare_buffers(frames[frame].draw_command_buffer); - _finalize_command_bufers(); + _finalize_command_buffers(true); // Swap buffers. if (!screen_prepared) { @@ -5486,7 +4641,7 @@ void RenderingDevice::submit() { ERR_FAIL_COND_MSG(local_device.is_null(), "Only local devices can submit and sync."); ERR_FAIL_COND_MSG(local_device_processing, "device already submitted, call sync to wait until done."); - _finalize_command_bufers(); + _finalize_command_buffers(false); RDD::CommandBufferID command_buffers[2] = { frames[frame].setup_command_buffer, frames[frame].draw_command_buffer }; context->local_device_push_command_buffers(local_device, command_buffers, 2); @@ -5618,11 +4773,14 @@ void RenderingDevice::_flush(bool p_current_frame) { if (local_device.is_valid() && !p_current_frame) { return; // Flushing previous frames has no effect with local device. } + // Not doing this crashes RADV (undefined behavior). if (p_current_frame) { + draw_graph.end(frames[frame].draw_command_buffer, RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS); driver->end_segment(); driver->command_buffer_end(frames[frame].setup_command_buffer); driver->command_buffer_end(frames[frame].draw_command_buffer); + draw_graph.begin(); } if (local_device.is_valid()) { @@ -5743,7 +4901,6 @@ void RenderingDevice::initialize(ApiContextRD *p_context, bool p_local_device) { draw_list = nullptr; draw_list_count = 0; - draw_list_split = false; compute_list = nullptr; @@ -5760,6 +4917,8 @@ void RenderingDevice::initialize(ApiContextRD *p_context, bool p_local_device) { pipelines_cache_size = driver->pipeline_cache_query_size(); print_verbose(vformat("Startup PSO cache (%.1f MiB)", pipelines_cache_size / (1024.0f * 1024.0f))); } + + draw_graph.initialize(driver, frame_count, SECONDARY_COMMAND_BUFFERS_PER_FRAME); } Vector RenderingDevice::_load_pipeline_cache() { @@ -5857,46 +5016,11 @@ void RenderingDevice::_free_rids(T &p_owner, const char *p_type) { void RenderingDevice::capture_timestamp(const String &p_name) { ERR_FAIL_COND_MSG(draw_list != nullptr, "Capturing timestamps during draw list creation is not allowed. Offending timestamp was: " + p_name); + ERR_FAIL_COND_MSG(compute_list != nullptr, "Capturing timestamps during compute list creation is not allowed. Offending timestamp was: " + p_name); ERR_FAIL_COND(frames[frame].timestamp_count >= max_timestamp_query_elements); - // This should be optional for profiling, else it will slow things down. - if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - RDD::MemoryBarrier mb; - mb.src_access = (RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT | - RDD::BARRIER_ACCESS_INDEX_READ_BIT | - RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | - RDD::BARRIER_ACCESS_UNIFORM_READ_BIT | - RDD::BARRIER_ACCESS_INPUT_ATTACHMENT_READ_BIT | - RDD::BARRIER_ACCESS_SHADER_READ_BIT | - RDD::BARRIER_ACCESS_SHADER_WRITE_BIT | - RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT | - RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | - RDD::BARRIER_ACCESS_TRANSFER_READ_BIT | - RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT | - RDD::BARRIER_ACCESS_HOST_READ_BIT | - RDD::BARRIER_ACCESS_HOST_WRITE_BIT); - mb.dst_access = (RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT | - RDD::BARRIER_ACCESS_INDEX_READ_BIT | - RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | - RDD::BARRIER_ACCESS_UNIFORM_READ_BIT | - RDD::BARRIER_ACCESS_INPUT_ATTACHMENT_READ_BIT | - RDD::BARRIER_ACCESS_SHADER_READ_BIT | - RDD::BARRIER_ACCESS_SHADER_WRITE_BIT | - RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT | - RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | - RDD::BARRIER_ACCESS_TRANSFER_READ_BIT | - RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT | - RDD::BARRIER_ACCESS_HOST_READ_BIT | - RDD::BARRIER_ACCESS_HOST_WRITE_BIT); + draw_graph.add_capture_timestamp(frames[frame].timestamp_pool, frames[frame].timestamp_count); - driver->command_pipeline_barrier(frames[frame].draw_command_buffer, RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, mb, {}, {}); - } - - driver->command_timestamp_write(frames[frame].draw_command_buffer, frames[frame].timestamp_pool, frames[frame].timestamp_count); frames[frame].timestamp_names[frames[frame].timestamp_count] = p_name; frames[frame].timestamp_cpu_values[frames[frame].timestamp_count] = OS::get_singleton()->get_ticks_usec(); frames[frame].timestamp_count++; @@ -6064,10 +5188,6 @@ void RenderingDevice::finalize() { driver->pipeline_cache_free(); } - for (int i = 0; i < split_draw_list_allocators.size(); i++) { - driver->command_pool_free(split_draw_list_allocators[i].command_pool); - } - frames.clear(); for (int i = 0; i < staging_buffer_blocks.size(); i++) { @@ -6106,7 +5226,7 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("texture_create_shared_from_slice", "view", "with_texture", "layer", "mipmap", "mipmaps", "slice_type"), &RenderingDevice::_texture_create_shared_from_slice, DEFVAL(1), DEFVAL(TEXTURE_SLICE_2D)); ClassDB::bind_method(D_METHOD("texture_create_from_extension", "type", "format", "samples", "usage_flags", "image", "width", "height", "depth", "layers"), &RenderingDevice::texture_create_from_extension); - ClassDB::bind_method(D_METHOD("texture_update", "texture", "layer", "data", "post_barrier"), &RenderingDevice::texture_update, DEFVAL(BARRIER_MASK_ALL_BARRIERS)); + ClassDB::bind_method(D_METHOD("texture_update", "texture", "layer", "data"), &RenderingDevice::texture_update); ClassDB::bind_method(D_METHOD("texture_get_data", "texture", "layer"), &RenderingDevice::texture_get_data); ClassDB::bind_method(D_METHOD("texture_is_format_supported_for_usage", "format", "usage_flags"), &RenderingDevice::texture_is_format_supported_for_usage); @@ -6114,9 +5234,9 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("texture_is_shared", "texture"), &RenderingDevice::texture_is_shared); ClassDB::bind_method(D_METHOD("texture_is_valid", "texture"), &RenderingDevice::texture_is_valid); - ClassDB::bind_method(D_METHOD("texture_copy", "from_texture", "to_texture", "from_pos", "to_pos", "size", "src_mipmap", "dst_mipmap", "src_layer", "dst_layer", "post_barrier"), &RenderingDevice::texture_copy, DEFVAL(BARRIER_MASK_ALL_BARRIERS)); - ClassDB::bind_method(D_METHOD("texture_clear", "texture", "color", "base_mipmap", "mipmap_count", "base_layer", "layer_count", "post_barrier"), &RenderingDevice::texture_clear, DEFVAL(BARRIER_MASK_ALL_BARRIERS)); - ClassDB::bind_method(D_METHOD("texture_resolve_multisample", "from_texture", "to_texture", "post_barrier"), &RenderingDevice::texture_resolve_multisample, DEFVAL(BARRIER_MASK_ALL_BARRIERS)); + ClassDB::bind_method(D_METHOD("texture_copy", "from_texture", "to_texture", "from_pos", "to_pos", "size", "src_mipmap", "dst_mipmap", "src_layer", "dst_layer"), &RenderingDevice::texture_copy); + ClassDB::bind_method(D_METHOD("texture_clear", "texture", "color", "base_mipmap", "mipmap_count", "base_layer", "layer_count"), &RenderingDevice::texture_clear); + ClassDB::bind_method(D_METHOD("texture_resolve_multisample", "from_texture", "to_texture"), &RenderingDevice::texture_resolve_multisample); ClassDB::bind_method(D_METHOD("texture_get_format", "texture"), &RenderingDevice::_texture_get_format); #ifndef DISABLE_DEPRECATED @@ -6158,8 +5278,9 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("uniform_set_create", "uniforms", "shader", "shader_set"), &RenderingDevice::_uniform_set_create); ClassDB::bind_method(D_METHOD("uniform_set_is_valid", "uniform_set"), &RenderingDevice::uniform_set_is_valid); - ClassDB::bind_method(D_METHOD("buffer_update", "buffer", "offset", "size_bytes", "data", "post_barrier"), &RenderingDevice::_buffer_update_bind, DEFVAL(BARRIER_MASK_ALL_BARRIERS)); - ClassDB::bind_method(D_METHOD("buffer_clear", "buffer", "offset", "size_bytes", "post_barrier"), &RenderingDevice::buffer_clear, DEFVAL(BARRIER_MASK_ALL_BARRIERS)); + ClassDB::bind_method(D_METHOD("buffer_copy", "src_buffer", "dst_buffer", "src_offset", "dst_offset", "size"), &RenderingDevice::buffer_copy); + ClassDB::bind_method(D_METHOD("buffer_update", "buffer", "offset", "size_bytes", "data"), &RenderingDevice::_buffer_update_bind); + ClassDB::bind_method(D_METHOD("buffer_clear", "buffer", "offset", "size_bytes"), &RenderingDevice::buffer_clear); ClassDB::bind_method(D_METHOD("buffer_get_data", "buffer", "offset_bytes", "size_bytes"), &RenderingDevice::buffer_get_data, DEFVAL(0), DEFVAL(0)); ClassDB::bind_method(D_METHOD("render_pipeline_create", "shader", "framebuffer_format", "vertex_format", "primitive", "rasterization_state", "multisample_state", "stencil_state", "color_blend_state", "dynamic_state_flags", "for_render_pass", "specialization_constants"), &RenderingDevice::_render_pipeline_create, DEFVAL(0), DEFVAL(0), DEFVAL(TypedArray())); @@ -6174,8 +5295,10 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("draw_list_begin_for_screen", "screen", "clear_color"), &RenderingDevice::draw_list_begin_for_screen, DEFVAL(DisplayServer::MAIN_WINDOW_ID), DEFVAL(Color())); - ClassDB::bind_method(D_METHOD("draw_list_begin", "framebuffer", "initial_color_action", "final_color_action", "initial_depth_action", "final_depth_action", "clear_color_values", "clear_depth", "clear_stencil", "region", "storage_textures"), &RenderingDevice::_draw_list_begin, DEFVAL(Vector()), DEFVAL(1.0), DEFVAL(0), DEFVAL(Rect2()), DEFVAL(TypedArray())); + ClassDB::bind_method(D_METHOD("draw_list_begin", "framebuffer", "initial_color_action", "final_color_action", "initial_depth_action", "final_depth_action", "clear_color_values", "clear_depth", "clear_stencil", "region"), &RenderingDevice::draw_list_begin, DEFVAL(Vector()), DEFVAL(1.0), DEFVAL(0), DEFVAL(Rect2())); +#ifndef DISABLE_DEPRECATED ClassDB::bind_method(D_METHOD("draw_list_begin_split", "framebuffer", "splits", "initial_color_action", "final_color_action", "initial_depth_action", "final_depth_action", "clear_color_values", "clear_depth", "clear_stencil", "region", "storage_textures"), &RenderingDevice::_draw_list_begin_split, DEFVAL(Vector()), DEFVAL(1.0), DEFVAL(0), DEFVAL(Rect2()), DEFVAL(TypedArray())); +#endif ClassDB::bind_method(D_METHOD("draw_list_set_blend_constants", "draw_list", "color"), &RenderingDevice::draw_list_set_blend_constants); ClassDB::bind_method(D_METHOD("draw_list_bind_render_pipeline", "draw_list", "render_pipeline"), &RenderingDevice::draw_list_bind_render_pipeline); @@ -6190,17 +5313,19 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("draw_list_disable_scissor", "draw_list"), &RenderingDevice::draw_list_disable_scissor); ClassDB::bind_method(D_METHOD("draw_list_switch_to_next_pass"), &RenderingDevice::draw_list_switch_to_next_pass); +#ifndef DISABLE_DEPRECATED ClassDB::bind_method(D_METHOD("draw_list_switch_to_next_pass_split", "splits"), &RenderingDevice::_draw_list_switch_to_next_pass_split); +#endif - ClassDB::bind_method(D_METHOD("draw_list_end", "post_barrier"), &RenderingDevice::draw_list_end, DEFVAL(BARRIER_MASK_ALL_BARRIERS)); + ClassDB::bind_method(D_METHOD("draw_list_end"), &RenderingDevice::draw_list_end); - ClassDB::bind_method(D_METHOD("compute_list_begin", "allow_draw_overlap"), &RenderingDevice::compute_list_begin, DEFVAL(false)); + ClassDB::bind_method(D_METHOD("compute_list_begin"), &RenderingDevice::compute_list_begin); ClassDB::bind_method(D_METHOD("compute_list_bind_compute_pipeline", "compute_list", "compute_pipeline"), &RenderingDevice::compute_list_bind_compute_pipeline); ClassDB::bind_method(D_METHOD("compute_list_set_push_constant", "compute_list", "buffer", "size_bytes"), &RenderingDevice::_compute_list_set_push_constant); ClassDB::bind_method(D_METHOD("compute_list_bind_uniform_set", "compute_list", "uniform_set", "set_index"), &RenderingDevice::compute_list_bind_uniform_set); ClassDB::bind_method(D_METHOD("compute_list_dispatch", "compute_list", "x_groups", "y_groups", "z_groups"), &RenderingDevice::compute_list_dispatch); ClassDB::bind_method(D_METHOD("compute_list_add_barrier", "compute_list"), &RenderingDevice::compute_list_add_barrier); - ClassDB::bind_method(D_METHOD("compute_list_end", "post_barrier"), &RenderingDevice::compute_list_end, DEFVAL(BARRIER_MASK_ALL_BARRIERS)); + ClassDB::bind_method(D_METHOD("compute_list_end"), &RenderingDevice::compute_list_end); ClassDB::bind_method(D_METHOD("free_rid", "rid"), &RenderingDevice::free); @@ -6216,15 +5341,19 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("submit"), &RenderingDevice::submit); ClassDB::bind_method(D_METHOD("sync"), &RenderingDevice::sync); +#ifndef DISABLE_DEPRECATED ClassDB::bind_method(D_METHOD("barrier", "from", "to"), &RenderingDevice::barrier, DEFVAL(BARRIER_MASK_ALL_BARRIERS), DEFVAL(BARRIER_MASK_ALL_BARRIERS)); ClassDB::bind_method(D_METHOD("full_barrier"), &RenderingDevice::full_barrier); +#endif ClassDB::bind_method(D_METHOD("create_local_device"), &RenderingDevice::create_local_device); ClassDB::bind_method(D_METHOD("set_resource_name", "id", "name"), &RenderingDevice::set_resource_name); ClassDB::bind_method(D_METHOD("draw_command_begin_label", "name", "color"), &RenderingDevice::draw_command_begin_label); +#ifndef DISABLE_DEPRECATED ClassDB::bind_method(D_METHOD("draw_command_insert_label", "name", "color"), &RenderingDevice::draw_command_insert_label); +#endif ClassDB::bind_method(D_METHOD("draw_command_end_label"), &RenderingDevice::draw_command_end_label); ClassDB::bind_method(D_METHOD("get_device_vendor_name"), &RenderingDevice::get_device_vendor_name); @@ -6491,6 +5620,7 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(DATA_FORMAT_G16_B16_R16_3PLANE_444_UNORM); BIND_ENUM_CONSTANT(DATA_FORMAT_MAX); +#ifndef DISABLE_DEPRECATED BIND_BITFIELD_FLAG(BARRIER_MASK_VERTEX); BIND_BITFIELD_FLAG(BARRIER_MASK_FRAGMENT); BIND_BITFIELD_FLAG(BARRIER_MASK_COMPUTE); @@ -6498,6 +5628,7 @@ void RenderingDevice::_bind_methods() { BIND_BITFIELD_FLAG(BARRIER_MASK_RASTER); BIND_BITFIELD_FLAG(BARRIER_MASK_ALL_BARRIERS); BIND_BITFIELD_FLAG(BARRIER_MASK_NO_BARRIER); +#endif BIND_ENUM_CONSTANT(TEXTURE_TYPE_1D); BIND_ENUM_CONSTANT(TEXTURE_TYPE_2D); @@ -6672,18 +5803,25 @@ void RenderingDevice::_bind_methods() { BIND_BITFIELD_FLAG(DYNAMIC_STATE_STENCIL_WRITE_MASK); BIND_BITFIELD_FLAG(DYNAMIC_STATE_STENCIL_REFERENCE); - BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR); //start rendering and clear the framebuffer (supply params) - BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR_REGION); //start rendering and clear the framebuffer (supply params) - BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR_REGION_CONTINUE); //continue rendering and clear the framebuffer (supply params) - BIND_ENUM_CONSTANT(INITIAL_ACTION_KEEP); //start rendering); but keep attached color texture contents (depth will be cleared) - BIND_ENUM_CONSTANT(INITIAL_ACTION_DROP); //start rendering); ignore what is there); just write above it - BIND_ENUM_CONSTANT(INITIAL_ACTION_CONTINUE); //continue rendering (framebuffer must have been left in "continue" state as final action previously) + BIND_ENUM_CONSTANT(INITIAL_ACTION_LOAD); + BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR); + BIND_ENUM_CONSTANT(INITIAL_ACTION_DISCARD); BIND_ENUM_CONSTANT(INITIAL_ACTION_MAX); +#ifndef DISABLE_DEPRECATED + BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR_REGION); + BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR_REGION_CONTINUE); + BIND_ENUM_CONSTANT(INITIAL_ACTION_KEEP); + BIND_ENUM_CONSTANT(INITIAL_ACTION_DROP); + BIND_ENUM_CONSTANT(INITIAL_ACTION_CONTINUE); +#endif - BIND_ENUM_CONSTANT(FINAL_ACTION_READ); //will no longer render to it); allows attached textures to be read again); but depth buffer contents will be dropped (Can't be read from) - BIND_ENUM_CONSTANT(FINAL_ACTION_DISCARD); // discard contents after rendering - BIND_ENUM_CONSTANT(FINAL_ACTION_CONTINUE); //will continue rendering later); attached textures can't be read until re-bound with "finish" + BIND_ENUM_CONSTANT(FINAL_ACTION_STORE); + BIND_ENUM_CONSTANT(FINAL_ACTION_DISCARD); BIND_ENUM_CONSTANT(FINAL_ACTION_MAX); +#ifndef DISABLE_DEPRECATED + BIND_ENUM_CONSTANT(FINAL_ACTION_READ); + BIND_ENUM_CONSTANT(FINAL_ACTION_CONTINUE); +#endif BIND_ENUM_CONSTANT(SHADER_STAGE_VERTEX); BIND_ENUM_CONSTANT(SHADER_STAGE_FRAGMENT); @@ -6950,8 +6088,8 @@ RID RenderingDevice::_uniform_set_create(const TypedArray &p_uniforms return uniform_set_create(uniforms, p_shader, p_shader_set); } -Error RenderingDevice::_buffer_update_bind(RID p_buffer, uint32_t p_offset, uint32_t p_size, const Vector &p_data, BitField p_post_barrier) { - return buffer_update(p_buffer, p_offset, p_size, p_data.ptr(), p_post_barrier); +Error RenderingDevice::_buffer_update_bind(RID p_buffer, uint32_t p_offset, uint32_t p_size, const Vector &p_data) { + return buffer_update(p_buffer, p_offset, p_size, p_data.ptr()); } static Vector _get_spec_constants(const TypedArray &p_constants) { @@ -7022,47 +6160,15 @@ RID RenderingDevice::_compute_pipeline_create(RID p_shader, const TypedArray &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const TypedArray &p_storage_textures) { - Vector stextures; - for (int i = 0; i < p_storage_textures.size(); i++) { - stextures.push_back(p_storage_textures[i]); - } - return draw_list_begin(p_framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, stextures); -} - +#ifndef DISABLE_DEPRECATED Vector RenderingDevice::_draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const TypedArray &p_storage_textures) { - Vector splits; - splits.resize(p_splits); - Vector stextures; - for (int i = 0; i < p_storage_textures.size(); i++) { - stextures.push_back(p_storage_textures[i]); - } - draw_list_begin_split(p_framebuffer, p_splits, splits.ptrw(), p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, stextures); - - Vector split_ids; - split_ids.resize(splits.size()); - for (int i = 0; i < splits.size(); i++) { - split_ids.write[i] = splits[i]; - } - - return split_ids; + ERR_FAIL_V_MSG(Vector(), "Deprecated. Split draw lists are used automatically by RenderingDevice."); } Vector RenderingDevice::_draw_list_switch_to_next_pass_split(uint32_t p_splits) { - Vector splits; - splits.resize(p_splits); - - Error err = draw_list_switch_to_next_pass_split(p_splits, splits.ptrw()); - ERR_FAIL_COND_V(err != OK, Vector()); - - Vector split_ids; - split_ids.resize(splits.size()); - for (int i = 0; i < splits.size(); i++) { - split_ids.write[i] = splits[i]; - } - - return split_ids; + ERR_FAIL_V_MSG(Vector(), "Deprecated. Split draw lists are used automatically by RenderingDevice."); } +#endif void RenderingDevice::_draw_list_set_push_constant(DrawListID p_list, const Vector &p_data, uint32_t p_data_size) { ERR_FAIL_COND((uint32_t)p_data.size() > p_data_size); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 90ed2828c07..50353fa267b 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -42,6 +42,7 @@ #include "servers/rendering/rendering_device.h" #include "servers/rendering/rendering_device_commons.h" #include "servers/rendering/rendering_device_driver.h" +#include "servers/rendering/rendering_device_graph.h" class RDTextureFormat; class RDTextureView; @@ -93,6 +94,9 @@ public: uint32_t version_minor = 0; }; + typedef int64_t DrawListID; + typedef int64_t ComputeListID; + typedef String (*ShaderSPIRVGetCacheKeyFunction)(const RenderingDevice *p_render_device); typedef Vector (*ShaderCompileToSPIRVFunction)(ShaderStage p_stage, const String &p_source_code, ShaderLanguage p_language, String *r_error, const RenderingDevice *p_render_device); typedef Vector (*ShaderCacheFunction)(ShaderStage p_stage, const String &p_source_code, ShaderLanguage p_language); @@ -131,8 +135,7 @@ public: ID_TYPE_FRAMEBUFFER_FORMAT, ID_TYPE_VERTEX_FORMAT, ID_TYPE_DRAW_LIST, - ID_TYPE_SPLIT_DRAW_LIST, - ID_TYPE_COMPUTE_LIST, + ID_TYPE_COMPUTE_LIST = 4, ID_TYPE_MAX, ID_BASE_SHIFT = 58, // 5 bits for ID types. ID_MASK = (ID_BASE_SHIFT - 1), @@ -145,25 +148,7 @@ private: void _add_dependency(RID p_id, RID p_depends_on); void _free_dependencies(RID p_id); - /*****************/ - /**** BARRIER ****/ - /*****************/ - -public: - enum BarrierMask { - BARRIER_MASK_VERTEX = 1, - BARRIER_MASK_FRAGMENT = 8, - BARRIER_MASK_COMPUTE = 2, - BARRIER_MASK_TRANSFER = 4, - - BARRIER_MASK_RASTER = BARRIER_MASK_VERTEX | BARRIER_MASK_FRAGMENT, // 9, - BARRIER_MASK_ALL_BARRIERS = 0x7FFF, // all flags set - BARRIER_MASK_NO_BARRIER = 0x8000, - }; - private: - void _full_barrier(bool p_sync_with_draw); - /***************************/ /**** BUFFER MANAGEMENT ****/ /***************************/ @@ -201,26 +186,34 @@ private: uint64_t staging_buffer_max_size = 0; bool staging_buffer_used = false; - Error _staging_buffer_allocate(uint32_t p_amount, uint32_t p_required_align, uint32_t &r_alloc_offset, uint32_t &r_alloc_size, bool p_can_segment = true); + enum StagingRequiredAction { + STAGING_REQUIRED_ACTION_NONE, + STAGING_REQUIRED_ACTION_FLUSH_CURRENT, + STAGING_REQUIRED_ACTION_FLUSH_OLDER + }; + + Error _staging_buffer_allocate(uint32_t p_amount, uint32_t p_required_align, uint32_t &r_alloc_offset, uint32_t &r_alloc_size, StagingRequiredAction &r_required_action, bool p_can_segment = true); + void _staging_buffer_execute_required_action(StagingRequiredAction p_required_action); Error _insert_staging_block(); struct Buffer { RDD::BufferID driver_id; uint32_t size = 0; BitField usage; + RDG::ResourceTracker *draw_tracker = nullptr; }; - Buffer *_get_buffer_from_owner(RID p_buffer, BitField &r_stages, BitField &r_access, BitField p_post_barrier); - Error _buffer_update(Buffer *p_buffer, size_t p_offset, const uint8_t *p_data, size_t p_data_size, bool p_use_draw_command_buffer = false, uint32_t p_required_align = 32); + Buffer *_get_buffer_from_owner(RID p_buffer); + Error _buffer_update(Buffer *p_buffer, RID p_buffer_id, size_t p_offset, const uint8_t *p_data, size_t p_data_size, bool p_use_draw_queue = false, uint32_t p_required_align = 32); RID_Owner uniform_buffer_owner; RID_Owner storage_buffer_owner; RID_Owner texture_buffer_owner; public: - Error buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS); - Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS); - Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS); + Error buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size); + Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data); + Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size); Vector buffer_get_data(RID p_buffer, uint32_t p_offset = 0, uint32_t p_size = 0); // This causes stall, only use to retrieve large buffers for saving. /*****************/ @@ -245,6 +238,8 @@ public: TextureType type = TEXTURE_TYPE_MAX; DataFormat format = DATA_FORMAT_MAX; TextureSamples samples = TEXTURE_SAMPLES_MAX; + TextureSliceType slice_type = TEXTURE_SLICE_MAX; + Rect2i slice_rect; uint32_t width = 0; uint32_t height = 0; uint32_t depth = 0; @@ -256,26 +251,33 @@ public: Vector allowed_shared_formats; - RDD::TextureLayout layout = RDD::TEXTURE_LAYOUT_UNDEFINED; - - uint64_t used_in_frame = 0; - bool used_in_transfer = false; - bool used_in_raster = false; - bool used_in_compute = false; - bool is_resolve_buffer = false; + bool has_initial_data = false; BitField read_aspect_flags; BitField barrier_aspect_flags; - bool bound = false; // Bound to framebffer. + bool bound = false; // Bound to framebuffer. RID owner; + + RDG::ResourceTracker *draw_tracker = nullptr; + HashMap slice_trackers; + + RDD::TextureSubresourceRange barrier_range() const { + RDD::TextureSubresourceRange r; + r.aspect = barrier_aspect_flags; + r.base_mipmap = base_mipmap; + r.mipmap_count = mipmaps; + r.base_layer = base_layer; + r.layer_count = layers; + return r; + } }; RID_Owner texture_owner; uint32_t texture_upload_region_size_px = 0; Vector _texture_get_data(Texture *tex, uint32_t p_layer, bool p_2d = false); - Error _texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data, BitField p_post_barrier, bool p_use_setup_queue); + Error _texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data, bool p_use_setup_queue, bool p_validate_can_update); public: struct TextureView { @@ -306,7 +308,7 @@ public: RID texture_create_shared(const TextureView &p_view, RID p_with_texture); RID texture_create_from_extension(TextureType p_type, DataFormat p_format, TextureSamples p_samples, BitField p_usage, uint64_t p_image, uint64_t p_width, uint64_t p_height, uint64_t p_depth, uint64_t p_layers); RID texture_create_shared_from_slice(const TextureView &p_view, RID p_with_texture, uint32_t p_layer, uint32_t p_mipmap, uint32_t p_mipmaps = 1, TextureSliceType p_slice_type = TEXTURE_SLICE_2D, uint32_t p_layers = 0); - Error texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS); + Error texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data); Vector texture_get_data(RID p_texture, uint32_t p_layer); // CPU textures will return immediately, while GPU textures will most likely force a flush bool texture_is_format_supported_for_usage(DataFormat p_format, BitField p_usage) const; @@ -318,29 +320,36 @@ public: uint64_t texture_get_native_handle(RID p_texture); #endif - Error texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS); - Error texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS); - Error texture_resolve_multisample(RID p_from_texture, RID p_to_texture, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS); + Error texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer); + Error texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers); + Error texture_resolve_multisample(RID p_from_texture, RID p_to_texture); /************************/ /**** DRAW LISTS (I) ****/ /************************/ enum InitialAction { - INITIAL_ACTION_CLEAR, // Start rendering and clear the whole framebuffer. - INITIAL_ACTION_CLEAR_REGION, // Start rendering and clear the framebuffer in the specified region. - INITIAL_ACTION_CLEAR_REGION_CONTINUE, // Continue rendering and clear the framebuffer in the specified region. Framebuffer must have been left in `FINAL_ACTION_CONTINUE` state as the final action previously. - INITIAL_ACTION_KEEP, // Start rendering, but keep attached color texture contents. If the framebuffer was previously used to read in a shader, this will automatically insert a layout transition. - INITIAL_ACTION_DROP, // Start rendering, ignore what is there; write above it. In general, this is the fastest option when you will be writing every single pixel and you don't need a clear color. - INITIAL_ACTION_CONTINUE, // Continue rendering. Framebuffer must have been left in `FINAL_ACTION_CONTINUE` state as the final action previously. - INITIAL_ACTION_MAX + INITIAL_ACTION_LOAD, + INITIAL_ACTION_CLEAR, + INITIAL_ACTION_DISCARD, + INITIAL_ACTION_MAX, +#ifndef DISABLE_DEPRECATED + INITIAL_ACTION_CLEAR_REGION = INITIAL_ACTION_CLEAR, + INITIAL_ACTION_CLEAR_REGION_CONTINUE = INITIAL_ACTION_LOAD, + INITIAL_ACTION_KEEP = INITIAL_ACTION_LOAD, + INITIAL_ACTION_DROP = INITIAL_ACTION_DISCARD, + INITIAL_ACTION_CONTINUE = INITIAL_ACTION_LOAD, +#endif }; enum FinalAction { - FINAL_ACTION_READ, // Store the texture for reading and make it read-only if it has the `TEXTURE_USAGE_SAMPLING_BIT` bit (only applies to color, depth and stencil attachments). - FINAL_ACTION_DISCARD, // Discard the texture data and make it read-only if it has the `TEXTURE_USAGE_SAMPLING_BIT` bit (only applies to color, depth and stencil attachments). - FINAL_ACTION_CONTINUE, // Store the texture and continue for further processing. Similar to `FINAL_ACTION_READ`, but does not make the texture read-only if it has the `TEXTURE_USAGE_SAMPLING_BIT` bit. - FINAL_ACTION_MAX + FINAL_ACTION_STORE, + FINAL_ACTION_DISCARD, + FINAL_ACTION_MAX, +#ifndef DISABLE_DEPRECATED + FINAL_ACTION_READ = FINAL_ACTION_STORE, + FINAL_ACTION_CONTINUE = FINAL_ACTION_STORE, +#endif }; /*********************/ @@ -668,7 +677,9 @@ private: uint32_t max_instances_allowed = 0; Vector buffers; // Not owned, just referenced. + Vector draw_trackers; // Not owned, just referenced. Vector offsets; + HashSet untracked_buffers; }; RID_Owner vertex_array_owner; @@ -685,6 +696,7 @@ private: struct IndexArray { uint32_t max_index = 0; // Remember the maximum index here too, for validation. RDD::BufferID driver_id; // Not owned, inherited from index buffer. + RDG::ResourceTracker *draw_tracker = nullptr; // Not owned, inherited from index buffer. uint32_t offset = 0; uint32_t indices = 0; IndexBufferFormat format = INDEX_BUFFER_FORMAT_UINT16; @@ -762,6 +774,7 @@ private: String name; // Used for debug. RDD::ShaderID driver_id; uint32_t layout_hash = 0; + BitField stage_bits; Vector set_formats; }; @@ -770,10 +783,42 @@ private: RID_Owner shader_owner; #ifndef DISABLE_DEPRECATED - BitField _convert_barrier_mask_81356(BitField p_old_barrier); +public: + enum BarrierMask{ + BARRIER_MASK_VERTEX = 1, + BARRIER_MASK_FRAGMENT = 8, + BARRIER_MASK_COMPUTE = 2, + BARRIER_MASK_TRANSFER = 4, + + BARRIER_MASK_RASTER = BARRIER_MASK_VERTEX | BARRIER_MASK_FRAGMENT, // 9, + BARRIER_MASK_ALL_BARRIERS = 0x7FFF, // all flags set + BARRIER_MASK_NO_BARRIER = 0x8000, + }; + + void barrier(BitField p_from = BARRIER_MASK_ALL_BARRIERS, BitField p_to = BARRIER_MASK_ALL_BARRIERS); + void full_barrier(); + void draw_command_insert_label(String p_label_name, const Color &p_color = Color(1, 1, 1, 1)); + Error draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, DrawListID *r_split_ids, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values = Vector(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const Vector &p_storage_textures = Vector()); + Error draw_list_switch_to_next_pass_split(uint32_t p_splits, DrawListID *r_split_ids); + Vector _draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values = Vector(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const TypedArray &p_storage_textures = TypedArray()); + Vector _draw_list_switch_to_next_pass_split(uint32_t p_splits); + +private: void _draw_list_end_bind_compat_81356(BitField p_post_barrier); void _compute_list_end_bind_compat_81356(BitField p_post_barrier); void _barrier_bind_compat_81356(BitField p_from, BitField p_to); + void _draw_list_end_bind_compat_84976(BitField p_post_barrier); + void _compute_list_end_bind_compat_84976(BitField p_post_barrier); + InitialAction _convert_initial_action_84976(InitialAction p_old_initial_action); + FinalAction _convert_final_action_84976(FinalAction p_old_final_action); + DrawListID _draw_list_begin_bind_compat_84976(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const TypedArray &p_storage_textures); + ComputeListID _compute_list_begin_bind_compat_84976(bool p_allow_draw_overlap); + Error _buffer_update_bind_compat_84976(RID p_buffer, uint32_t p_offset, uint32_t p_size, const Vector &p_data, BitField p_post_barrier); + Error _buffer_clear_bind_compat_84976(RID p_buffer, uint32_t p_offset, uint32_t p_size, BitField p_post_barrier); + Error _texture_update_bind_compat_84976(RID p_texture, uint32_t p_layer, const Vector &p_data, BitField p_post_barrier); + Error _texture_copy_bind_compat_84976(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, BitField p_post_barrier); + Error _texture_clear_bind_compat_84976(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, BitField p_post_barrier); + Error _texture_resolve_multisample_bind_compat_84976(RID p_from_texture, RID p_to_texture, BitField p_post_barrier); #endif public: @@ -875,6 +920,9 @@ public: }; private: + static const uint32_t MAX_UNIFORM_SETS = 16; + static const uint32_t MAX_PUSH_CONSTANT_SIZE = 128; + // This structure contains the descriptor set. They _need_ to be allocated // for a shader (and will be erased when this shader is erased), but should // work for other shaders as long as the hash matches. This covers using @@ -894,8 +942,9 @@ private: }; LocalVector attachable_textures; // Used for validation. - Vector mutable_sampled_textures; // Used for layout change. - Vector mutable_storage_textures; // Used for layout change. + Vector draw_trackers; + Vector draw_trackers_usage; + HashMap untracked_usage; InvalidationCallback invalidated_callback = nullptr; void *invalidated_callback_userdata = nullptr; }; @@ -941,6 +990,7 @@ private: uint32_t shader_layout_hash = 0; Vector set_formats; RDD::PipelineID driver_id; + BitField stage_bits; uint32_t push_constant_size = 0; }; @@ -986,8 +1036,6 @@ public: /**** DRAW LISTS (II) ****/ /*************************/ - typedef int64_t DrawListID; - private: // Draw list contains both the command buffer // used for drawing as well as a LOT of @@ -995,20 +1043,7 @@ private: // validation is cheap so most of it can // also run in release builds. - // When using split command lists, this is - // implemented internally using secondary command - // buffers. As they can be created in threads, - // each needs its own command pool. - - struct SplitDrawListAllocator { - RDD::CommandPoolID command_pool; - Vector command_buffers; // One for each frame. - }; - - Vector split_draw_list_allocators; - struct DrawList { - RDD::CommandBufferID command_buffer; // If persistent, this is owned, otherwise it's shared with the ringbuffer. Rect2i viewport; bool viewport_set = false; @@ -1066,7 +1101,7 @@ private: #endif }; - DrawList *draw_list = nullptr; // One for regular draw lists, multiple for split. + DrawList *draw_list = nullptr; uint32_t draw_list_subpass_count = 0; uint32_t draw_list_count = 0; RDD::RenderPassID draw_list_render_pass; @@ -1076,23 +1111,20 @@ private: #endif uint32_t draw_list_current_subpass = 0; - bool draw_list_split = false; Vector draw_list_bound_textures; - Vector draw_list_storage_textures; - bool draw_list_unbind_color_textures = false; - bool draw_list_unbind_depth_textures = false; void _draw_list_insert_clear_region(DrawList *p_draw_list, Framebuffer *p_framebuffer, Point2i p_viewport_offset, Point2i p_viewport_size, bool p_clear_color, const Vector &p_clear_colors, bool p_clear_depth, float p_depth, uint32_t p_stencil); Error _draw_list_setup_framebuffer(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, RDD::FramebufferID *r_framebuffer, RDD::RenderPassID *r_render_pass, uint32_t *r_subpass_count); - Error _draw_list_render_pass_begin(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_colors, float p_clear_depth, uint32_t p_clear_stencil, Point2i p_viewport_offset, Point2i p_viewport_size, RDD::FramebufferID p_framebuffer_driver_id, RDD::RenderPassID p_render_pass, RDD::CommandBufferID p_command_buffer, RDD::CommandBufferType p_cmd_buffer_mode, const Vector &p_storage_textures, bool p_constrained_to_region); + Error _draw_list_render_pass_begin(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_colors, float p_clear_depth, uint32_t p_clear_stencil, Point2i p_viewport_offset, Point2i p_viewport_size, RDD::FramebufferID p_framebuffer_driver_id, RDD::RenderPassID p_render_pass); + void _draw_list_set_viewport(Rect2i p_rect); + void _draw_list_set_scissor(Rect2i p_rect); _FORCE_INLINE_ DrawList *_get_draw_list_ptr(DrawListID p_id); - Error _draw_list_allocate(const Rect2i &p_viewport, uint32_t p_splits, uint32_t p_subpass); + Error _draw_list_allocate(const Rect2i &p_viewport, uint32_t p_subpass); void _draw_list_free(Rect2i *r_last_viewport = nullptr); public: DrawListID draw_list_begin_for_screen(DisplayServer::WindowID p_screen = 0, const Color &p_clear_color = Color()); - DrawListID draw_list_begin(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values = Vector(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const Vector &p_storage_textures = Vector()); - Error draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, DrawListID *r_split_ids, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values = Vector(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const Vector &p_storage_textures = Vector()); + DrawListID draw_list_begin(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values = Vector(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2()); void draw_list_set_blend_constants(DrawListID p_list, const Color &p_color); void draw_list_bind_render_pipeline(DrawListID p_list, RID p_render_pipeline); @@ -1109,20 +1141,15 @@ public: uint32_t draw_list_get_current_pass(); DrawListID draw_list_switch_to_next_pass(); - Error draw_list_switch_to_next_pass_split(uint32_t p_splits, DrawListID *r_split_ids); - void draw_list_end(BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS); + void draw_list_end(); +private: /***********************/ /**** COMPUTE LISTS ****/ /***********************/ - typedef int64_t ComputeListID; - -private: struct ComputeList { - RDD::CommandBufferID command_buffer; // If persistent, this is owned, otherwise it's shared with the ringbuffer. - struct SetState { uint32_t pipeline_expected_format = 0; uint32_t uniform_set_format = 0; @@ -1132,7 +1159,6 @@ private: }; struct State { - HashSet textures_to_sampled_layout; SetState sets[MAX_UNIFORM_SETS]; uint32_t set_count = 0; RID pipeline; @@ -1140,7 +1166,8 @@ private: RDD::ShaderID pipeline_shader_driver_id; uint32_t pipeline_shader_layout_hash = 0; uint32_t local_group_size[3] = { 0, 0, 0 }; - bool allow_draw_overlap; + uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE] = {}; + uint32_t push_constant_size = 0; } state; #ifdef DEBUG_ENABLED @@ -1160,11 +1187,10 @@ private: }; ComputeList *compute_list = nullptr; - - void _compute_list_add_barrier(BitField p_post_barrier, BitField p_stages, BitField p_access); + ComputeList::State compute_list_barrier_state; public: - ComputeListID compute_list_begin(bool p_allow_draw_overlap = false); + ComputeListID compute_list_begin(); void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline); void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index); void compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size); @@ -1173,10 +1199,22 @@ public: void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset); void compute_list_add_barrier(ComputeListID p_list); - void compute_list_end(BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS); + void compute_list_end(); - void barrier(BitField p_from = BARRIER_MASK_ALL_BARRIERS, BitField p_to = BARRIER_MASK_ALL_BARRIERS); - void full_barrier(); +private: + /***********************/ + /**** COMMAND GRAPH ****/ + /***********************/ + + bool _texture_make_mutable(Texture *p_texture, RID p_texture_id); + bool _buffer_make_mutable(Buffer *p_buffer, RID p_buffer_id); + bool _vertex_array_make_mutable(VertexArray *p_vertex_array, RID p_resource_id, RDG::ResourceTracker *p_resource_tracker); + bool _index_array_make_mutable(IndexArray *p_index_array, RDG::ResourceTracker *p_resource_tracker); + bool _uniform_set_make_mutable(UniformSet *p_uniform_set, RID p_resource_id, RDG::ResourceTracker *p_resource_tracker); + bool _dependency_make_mutable(RID p_id, RID p_resource_id, RDG::ResourceTracker *p_resource_tracker); + bool _dependencies_make_mutable(RID p_id, RDG::ResourceTracker *p_resource_tracker); + + RenderingDeviceGraph draw_graph; /**************************/ /**** FRAME MANAGEMENT ****/ @@ -1258,7 +1296,7 @@ private: template void _free_rids(T &p_owner, const char *p_type); - void _finalize_command_bufers(); + void _finalize_command_buffers(bool p_postpare); void _begin_frame(); #ifdef DEV_ENABLED @@ -1311,7 +1349,6 @@ public: void set_resource_name(RID p_id, const String &p_name); void draw_command_begin_label(String p_label_name, const Color &p_color = Color(1, 1, 1, 1)); - void draw_command_insert_label(String p_label_name, const Color &p_color = Color(1, 1, 1, 1)); void draw_command_end_label(); String get_device_vendor_name() const; @@ -1353,16 +1390,13 @@ private: RID _uniform_set_create(const TypedArray &p_uniforms, RID p_shader, uint32_t p_shader_set); - Error _buffer_update_bind(RID p_buffer, uint32_t p_offset, uint32_t p_size, const Vector &p_data, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS); + Error _buffer_update_bind(RID p_buffer, uint32_t p_offset, uint32_t p_size, const Vector &p_data); RID _render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const Ref &p_rasterization_state, const Ref &p_multisample_state, const Ref &p_depth_stencil_state, const Ref &p_blend_state, BitField p_dynamic_state_flags, uint32_t p_for_render_pass, const TypedArray &p_specialization_constants); RID _compute_pipeline_create(RID p_shader, const TypedArray &p_specialization_constants); - DrawListID _draw_list_begin(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values = Vector(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const TypedArray &p_storage_textures = TypedArray()); - Vector _draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values = Vector(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const TypedArray &p_storage_textures = TypedArray()); void _draw_list_set_push_constant(DrawListID p_list, const Vector &p_data, uint32_t p_data_size); void _compute_list_set_push_constant(ComputeListID p_list, const Vector &p_data, uint32_t p_data_size); - Vector _draw_list_switch_to_next_pass_split(uint32_t p_splits); }; VARIANT_ENUM_CAST(RenderingDevice::DeviceType) @@ -1371,7 +1405,6 @@ VARIANT_ENUM_CAST(RenderingDevice::ShaderStage) VARIANT_ENUM_CAST(RenderingDevice::ShaderLanguage) VARIANT_ENUM_CAST(RenderingDevice::CompareOperator) VARIANT_ENUM_CAST(RenderingDevice::DataFormat) -VARIANT_BITFIELD_CAST(RenderingDevice::BarrierMask); VARIANT_ENUM_CAST(RenderingDevice::TextureType) VARIANT_ENUM_CAST(RenderingDevice::TextureSamples) VARIANT_BITFIELD_CAST(RenderingDevice::TextureUsageBits) @@ -1399,6 +1432,10 @@ VARIANT_ENUM_CAST(RenderingDevice::Limit) VARIANT_ENUM_CAST(RenderingDevice::MemoryType) VARIANT_ENUM_CAST(RenderingDevice::Features) +#ifndef DISABLE_DEPRECATED +VARIANT_BITFIELD_CAST(RenderingDevice::BarrierMask); +#endif + typedef RenderingDevice RD; #endif // RENDERING_DEVICE_H diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h index dabd0c08678..a8936f8cca1 100644 --- a/servers/rendering/rendering_device_commons.h +++ b/servers/rendering/rendering_device_commons.h @@ -379,6 +379,7 @@ public: TEXTURE_SLICE_CUBEMAP, TEXTURE_SLICE_3D, TEXTURE_SLICE_2D_ARRAY, + TEXTURE_SLICE_MAX }; /*****************/ @@ -910,6 +911,7 @@ protected: Vector> uniform_sets; Vector specialization_constants; + Vector stages; }; struct ShaderReflection : public ShaderDescription { diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index 3fe3c8ac5e7..663222e69da 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -181,6 +181,10 @@ public: BUFFER_USAGE_INDIRECT_BIT = (1 << 8), }; + enum { + BUFFER_WHOLE_SIZE = ~0ULL + }; + virtual BufferID buffer_create(uint64_t p_size, BitField p_usage, MemoryAllocationType p_allocation_type) = 0; // Only for a buffer with BUFFER_USAGE_TEXEL_BIT. virtual bool buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) = 0; @@ -622,6 +626,13 @@ public: virtual void command_timestamp_query_pool_reset(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_query_count) = 0; virtual void command_timestamp_write(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_index) = 0; + /****************/ + /**** LABELS ****/ + /****************/ + + virtual void command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) = 0; + virtual void command_end_label(CommandBufferID p_cmd_buffer) = 0; + /****************/ /**** SCREEN ****/ /****************/ diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp new file mode 100644 index 00000000000..bfacd38065e --- /dev/null +++ b/servers/rendering/rendering_device_graph.cpp @@ -0,0 +1,1930 @@ +/**************************************************************************/ +/* rendering_device_graph.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "rendering_device_graph.h" + +#define PRINT_RENDER_GRAPH 0 +#define FORCE_FULL_ACCESS_BITS 0 +#define PRINT_RESOURCE_TRACKER_TOTAL 0 + +RenderingDeviceGraph::RenderingDeviceGraph() { + // Default initialization. +} + +RenderingDeviceGraph::~RenderingDeviceGraph() { + _wait_for_secondary_command_buffer_tasks(); + + for (Frame &f : frames) { + for (SecondaryCommandBuffer &secondary : f.secondary_command_buffers) { + if (secondary.command_pool.id != 0) { + driver->command_pool_free(secondary.command_pool); + } + } + } +} + +bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) { + switch (p_usage) { + case RESOURCE_USAGE_TRANSFER_FROM: + case RESOURCE_USAGE_UNIFORM_BUFFER_READ: + case RESOURCE_USAGE_INDIRECT_BUFFER_READ: + case RESOURCE_USAGE_TEXTURE_BUFFER_READ: + case RESOURCE_USAGE_STORAGE_BUFFER_READ: + case RESOURCE_USAGE_VERTEX_BUFFER_READ: + case RESOURCE_USAGE_INDEX_BUFFER_READ: + case RESOURCE_USAGE_TEXTURE_SAMPLE: + case RESOURCE_USAGE_STORAGE_IMAGE_READ: + case RESOURCE_USAGE_ATTACHMENT_COLOR_READ: + case RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ: + return false; + case RESOURCE_USAGE_TRANSFER_TO: + case RESOURCE_USAGE_TEXTURE_BUFFER_READ_WRITE: + case RESOURCE_USAGE_STORAGE_BUFFER_READ_WRITE: + case RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE: + case RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE: + case RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE: + return true; + default: + DEV_ASSERT(false && "Invalid resource tracker usage."); + return false; + } +} + +RDD::TextureLayout RenderingDeviceGraph::_usage_to_image_layout(ResourceUsage p_usage) { + switch (p_usage) { + case RESOURCE_USAGE_TRANSFER_FROM: + return RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL; + case RESOURCE_USAGE_TRANSFER_TO: + return RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL; + case RESOURCE_USAGE_TEXTURE_SAMPLE: + return RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + case RESOURCE_USAGE_STORAGE_IMAGE_READ: + case RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE: + return RDD::TEXTURE_LAYOUT_GENERAL; + case RESOURCE_USAGE_ATTACHMENT_COLOR_READ: + case RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE: + return RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + case RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ: + return RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; + case RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE: + return RDD::TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + case RESOURCE_USAGE_NONE: + return RDD::TEXTURE_LAYOUT_UNDEFINED; + default: + DEV_ASSERT(false && "Invalid resource tracker usage or not an image usage."); + return RDD::TEXTURE_LAYOUT_UNDEFINED; + } +} + +RDD::BarrierAccessBits RenderingDeviceGraph::_usage_to_access_bits(ResourceUsage p_usage) { +#if FORCE_FULL_ACCESS_BITS + return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT); +#else + switch (p_usage) { + case RESOURCE_USAGE_NONE: + return RDD::BarrierAccessBits(0); + case RESOURCE_USAGE_TRANSFER_FROM: + return RDD::BARRIER_ACCESS_TRANSFER_READ_BIT; + case RESOURCE_USAGE_TRANSFER_TO: + return RDD::BARRIER_ACCESS_TRANSFER_WRITE_BIT; + case RESOURCE_USAGE_UNIFORM_BUFFER_READ: + return RDD::BARRIER_ACCESS_UNIFORM_READ_BIT; + case RESOURCE_USAGE_INDIRECT_BUFFER_READ: + return RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT; + case RESOURCE_USAGE_STORAGE_BUFFER_READ: + case RESOURCE_USAGE_STORAGE_IMAGE_READ: + case RESOURCE_USAGE_TEXTURE_BUFFER_READ: + case RESOURCE_USAGE_TEXTURE_SAMPLE: + return RDD::BARRIER_ACCESS_SHADER_READ_BIT; + case RESOURCE_USAGE_TEXTURE_BUFFER_READ_WRITE: + case RESOURCE_USAGE_STORAGE_BUFFER_READ_WRITE: + case RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE: + return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_SHADER_READ_BIT | RDD::BARRIER_ACCESS_SHADER_WRITE_BIT); + case RESOURCE_USAGE_VERTEX_BUFFER_READ: + return RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + case RESOURCE_USAGE_INDEX_BUFFER_READ: + return RDD::BARRIER_ACCESS_INDEX_READ_BIT; + case RESOURCE_USAGE_ATTACHMENT_COLOR_READ: + return RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT; + case RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE: + return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT | RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); + case RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ: + return RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + case RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE: + return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); + default: + DEV_ASSERT(false && "Invalid usage."); + return RDD::BarrierAccessBits(0); + } +#endif +} + +int32_t RenderingDeviceGraph::_add_to_command_list(int32_t p_command_index, int32_t p_list_index) { + DEV_ASSERT(p_command_index < int32_t(command_count)); + DEV_ASSERT(p_list_index < int32_t(command_list_nodes.size())); + + int32_t next_index = int32_t(command_list_nodes.size()); + command_list_nodes.resize(next_index + 1); + + RecordedCommandListNode &new_node = command_list_nodes[next_index]; + new_node.command_index = p_command_index; + new_node.next_list_index = p_list_index; + return next_index; +} + +void RenderingDeviceGraph::_add_adjacent_command(int32_t p_previous_command_index, int32_t p_command_index, RecordedCommand *r_command) { + const uint32_t previous_command_data_offset = command_data_offsets[p_previous_command_index]; + RecordedCommand &previous_command = *reinterpret_cast(&command_data[previous_command_data_offset]); + previous_command.adjacent_command_list_index = _add_to_command_list(p_command_index, previous_command.adjacent_command_list_index); + r_command->src_stages = r_command->src_stages | previous_command.dst_stages; +} + +int32_t RenderingDeviceGraph::_add_to_write_list(int32_t p_command_index, Rect2i suberesources, int32_t p_list_index) { + DEV_ASSERT(p_command_index < int32_t(command_count)); + DEV_ASSERT(p_list_index < int32_t(write_list_nodes.size())); + + int32_t next_index = int32_t(write_list_nodes.size()); + write_list_nodes.resize(next_index + 1); + + RecordedWriteListNode &new_node = write_list_nodes[next_index]; + new_node.command_index = p_command_index; + new_node.next_list_index = p_list_index; + new_node.subresources = suberesources; + return next_index; +} + +RenderingDeviceGraph::RecordedCommand *RenderingDeviceGraph::_allocate_command(uint32_t p_command_size, int32_t &r_command_index) { + uint32_t command_data_offset = command_data.size(); + command_data_offsets.push_back(command_data_offset); + command_data.resize(command_data_offset + p_command_size); + r_command_index = command_count++; + RecordedCommand *new_command = reinterpret_cast(&command_data[command_data_offset]); + *new_command = RecordedCommand(); + return new_command; +} + +RenderingDeviceGraph::DrawListInstruction *RenderingDeviceGraph::_allocate_draw_list_instruction(uint32_t p_instruction_size) { + uint32_t draw_list_data_offset = draw_instruction_list.data.size(); + draw_instruction_list.data.resize(draw_list_data_offset + p_instruction_size); + return reinterpret_cast(&draw_instruction_list.data[draw_list_data_offset]); +} + +RenderingDeviceGraph::ComputeListInstruction *RenderingDeviceGraph::_allocate_compute_list_instruction(uint32_t p_instruction_size) { + uint32_t compute_list_data_offset = compute_instruction_list.data.size(); + compute_instruction_list.data.resize(compute_list_data_offset + p_instruction_size); + return reinterpret_cast(&compute_instruction_list.data[compute_list_data_offset]); +} + +void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_trackers, ResourceUsage *p_resource_usages, uint32_t p_resource_count, int32_t p_command_index, RecordedCommand *r_command) { + if (command_label_index >= 0) { + // If a label is active, tag the command with the label. + r_command->label_index = command_label_index; + } + + if (r_command->type == RecordedCommand::TYPE_CAPTURE_TIMESTAMP) { + // All previous commands starting from the previous timestamp should be adjacent to this command. + int32_t start_command_index = uint32_t(MAX(command_timestamp_index, 0)); + for (int32_t i = start_command_index; i < p_command_index; i++) { + _add_adjacent_command(i, p_command_index, r_command); + } + + // Make this command the new active timestamp command. + command_timestamp_index = p_command_index; + } else if (command_timestamp_index >= 0) { + // Timestamp command should be adjacent to this command. + _add_adjacent_command(command_timestamp_index, p_command_index, r_command); + } + + if (command_synchronization_pending) { + // All previous commands should be adjacent to this command. + int32_t start_command_index = uint32_t(MAX(command_synchronization_index, 0)); + for (int32_t i = start_command_index; i < p_command_index; i++) { + _add_adjacent_command(i, p_command_index, r_command); + } + + command_synchronization_index = p_command_index; + command_synchronization_pending = false; + } else if (command_synchronization_index >= 0) { + // Synchronization command should be adjacent to this command. + _add_adjacent_command(command_synchronization_index, p_command_index, r_command); + } + + for (uint32_t i = 0; i < p_resource_count; i++) { + ResourceTracker *resource_tracker = p_resource_trackers[i]; + DEV_ASSERT(resource_tracker != nullptr); + + resource_tracker->reset_if_outdated(tracking_frame); + + ResourceUsage new_resource_usage = p_resource_usages[i]; + bool write_usage = _is_write_usage(new_resource_usage); + BitField new_usage_access = _usage_to_access_bits(new_resource_usage); + bool is_resource_a_slice = resource_tracker->parent != nullptr; + if (is_resource_a_slice) { + // This resource depends on a parent resource. + resource_tracker->parent->reset_if_outdated(tracking_frame); + + if (resource_tracker->texture_slice_command_index != p_command_index) { + // Indicate this slice has been used by this command. + resource_tracker->texture_slice_command_index = p_command_index; + } + + if (resource_tracker->parent->usage == RESOURCE_USAGE_NONE) { + if (resource_tracker->parent->texture_driver_id != 0) { + // If the resource is a texture, we transition it entirely to the layout determined by the first slice that uses it. + _add_texture_barrier_to_command(resource_tracker->parent->texture_driver_id, RDD::BarrierAccessBits(0), new_usage_access, RDG::RESOURCE_USAGE_NONE, new_resource_usage, resource_tracker->parent->texture_subresources, command_normalization_barriers, r_command->normalization_barrier_index, r_command->normalization_barrier_count); + } + + // If the parent hasn't been used yet, we assign the usage of the slice to the entire resource. + resource_tracker->parent->usage = new_resource_usage; + + // Also assign the usage to the slice and consider it a write operation. + resource_tracker->usage = new_resource_usage; + write_usage = true; + } else if (resource_tracker->in_parent_dirty_list) { + if (resource_tracker->parent->usage == new_resource_usage) { + // The slice will be transitioned to the resource of the parent and can be deleted from the dirty list. + ResourceTracker *previous_tracker = nullptr; + ResourceTracker *current_tracker = resource_tracker->parent->dirty_shared_list; + bool initialized_dirty_rect = false; + while (current_tracker != nullptr) { + if (current_tracker == resource_tracker) { + current_tracker->in_parent_dirty_list = false; + + if (previous_tracker != nullptr) { + previous_tracker->next_shared = current_tracker->next_shared; + } else { + resource_tracker->parent->dirty_shared_list = current_tracker->next_shared; + } + + current_tracker = current_tracker->next_shared; + } else { + if (initialized_dirty_rect) { + resource_tracker->parent->texture_slice_or_dirty_rect = resource_tracker->parent->texture_slice_or_dirty_rect.merge(current_tracker->texture_slice_or_dirty_rect); + } else { + resource_tracker->parent->texture_slice_or_dirty_rect = current_tracker->texture_slice_or_dirty_rect; + initialized_dirty_rect = true; + } + + previous_tracker = current_tracker; + current_tracker = current_tracker->next_shared; + } + } + } + } else { + if (resource_tracker->parent->dirty_shared_list != nullptr && resource_tracker->parent->texture_slice_or_dirty_rect.intersects(resource_tracker->texture_slice_or_dirty_rect)) { + // There's an intersection with the current dirty area of the parent and the slice. We must verify if the intersection is against a slice + // that was used in this command or not. Any slice we can find that wasn't used by this command must be reverted to the layout of the parent. + ResourceTracker *previous_tracker = nullptr; + ResourceTracker *current_tracker = resource_tracker->parent->dirty_shared_list; + bool initialized_dirty_rect = false; + while (current_tracker != nullptr) { + if (current_tracker->texture_slice_or_dirty_rect.intersects(resource_tracker->texture_slice_or_dirty_rect)) { + if (current_tracker->command_frame == tracking_frame && current_tracker->texture_slice_command_index == p_command_index) { + ERR_FAIL_MSG("Texture slices that overlap can't be used in the same command."); + } else { + // Delete the slice from the dirty list and revert it to the usage of the parent. + if (current_tracker->texture_driver_id != 0) { + _add_texture_barrier_to_command(current_tracker->texture_driver_id, current_tracker->usage_access, new_usage_access, current_tracker->usage, resource_tracker->parent->usage, current_tracker->texture_subresources, command_normalization_barriers, r_command->normalization_barrier_index, r_command->normalization_barrier_count); + } + + current_tracker->in_parent_dirty_list = false; + + if (previous_tracker != nullptr) { + previous_tracker->next_shared = current_tracker->next_shared; + } else { + resource_tracker->parent->dirty_shared_list = current_tracker->next_shared; + } + + current_tracker = current_tracker->next_shared; + } + } else { + // Recalculate the dirty rect of the parent so the deleted slices are excluded. + if (initialized_dirty_rect) { + resource_tracker->parent->texture_slice_or_dirty_rect = resource_tracker->parent->texture_slice_or_dirty_rect.merge(current_tracker->texture_slice_or_dirty_rect); + } else { + resource_tracker->parent->texture_slice_or_dirty_rect = current_tracker->texture_slice_or_dirty_rect; + initialized_dirty_rect = true; + } + + previous_tracker = current_tracker; + current_tracker = current_tracker->next_shared; + } + } + } + + // If it wasn't in the list, assume the usage is the same as the parent. + resource_tracker->usage = resource_tracker->parent->usage; + + if (resource_tracker->usage != new_resource_usage) { + // Insert to the dirty list if the requested usage is different. + resource_tracker->next_shared = resource_tracker->parent->dirty_shared_list; + resource_tracker->parent->dirty_shared_list = resource_tracker; + resource_tracker->in_parent_dirty_list = true; + if (resource_tracker->parent->dirty_shared_list != nullptr) { + resource_tracker->parent->texture_slice_or_dirty_rect = resource_tracker->parent->texture_slice_or_dirty_rect.merge(resource_tracker->texture_slice_or_dirty_rect); + } else { + resource_tracker->parent->texture_slice_or_dirty_rect = resource_tracker->texture_slice_or_dirty_rect; + } + } + } + } else { + if (resource_tracker->dirty_shared_list != nullptr) { + // Consider the usage as write if we must transition any of the slices. + write_usage = true; + } + + while (resource_tracker->dirty_shared_list != nullptr) { + if (resource_tracker->dirty_shared_list->texture_driver_id != 0) { + // Transition all slices to the layout of the parent resource. + _add_texture_barrier_to_command(resource_tracker->dirty_shared_list->texture_driver_id, resource_tracker->dirty_shared_list->usage_access, new_usage_access, resource_tracker->dirty_shared_list->usage, resource_tracker->usage, resource_tracker->dirty_shared_list->texture_subresources, command_normalization_barriers, r_command->normalization_barrier_index, r_command->normalization_barrier_count); + } + + resource_tracker->dirty_shared_list->in_parent_dirty_list = false; + resource_tracker->dirty_shared_list = resource_tracker->dirty_shared_list->next_shared; + } + } + + // Use the resource's parent tracker directly for all search operations. + bool resource_has_parent = resource_tracker->parent != nullptr; + ResourceTracker *search_tracker = resource_has_parent ? resource_tracker->parent : resource_tracker; + const RDD::TextureSubresourceRange &subresources = resource_tracker->texture_subresources; + Rect2i resource_tracker_rect(subresources.base_mipmap, subresources.base_layer, subresources.mipmap_count, subresources.layer_count); + bool different_usage = resource_tracker->usage != new_resource_usage; + bool write_usage_after_write = (write_usage && search_tracker->write_command_or_list_index >= 0); + if (different_usage || write_usage_after_write) { + // A barrier must be pushed if the usage is different of it's a write usage and there was already a command that wrote to this resource previously. + if (resource_tracker->texture_driver_id.id != 0) { + if (resource_tracker->usage_access.is_empty()) { + // FIXME: If the tracker does not know the previous type of usage, assume the generic memory write one. + // Tracking access bits across texture slices can be tricky, so this failsafe can be removed once that's improved. + resource_tracker->usage_access = RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT; + } + + _add_texture_barrier_to_command(resource_tracker->texture_driver_id, resource_tracker->usage_access, new_usage_access, resource_tracker->usage, new_resource_usage, resource_tracker->texture_subresources, command_transition_barriers, r_command->transition_barrier_index, r_command->transition_barrier_count); + } else if (resource_tracker->buffer_driver_id.id != 0) { +#if USE_BUFFER_BARRIERS + _add_buffer_barrier_to_command(resource_tracker->buffer_driver_id, resource_tracker->usage_access, new_usage_access, r_command->buffer_barrier_index, r_command->buffer_barrier_count); +#endif + // FIXME: Memory barriers are currently pushed regardless of whether buffer barriers are being used or not. Refer to the comment on the + // definition of USE_BUFFER_BARRIERS for the reason behind this. This can be fixed to be one case or the other once it's been confirmed + // the buffer and memory barrier behavior discrepancy has been solved. + r_command->memory_barrier.src_access = resource_tracker->usage_access; + r_command->memory_barrier.dst_access = new_usage_access; + } else { + DEV_ASSERT(false && "Resource tracker does not contain a valid buffer or texture ID."); + } + } + + // Always update the access of the tracker according to the latest usage. + resource_tracker->usage_access = new_usage_access; + + if (different_usage) { + // Even if the usage of the resource isn't a write usage explicitly, a different usage implies a transition and it should therefore be considered a write. + write_usage = true; + resource_tracker->usage = new_resource_usage; + } + + if (search_tracker->write_command_or_list_index >= 0) { + if (search_tracker->write_command_list_enabled) { + // Make this command adjacent to any commands that wrote to this resource and intersect with the slice if it applies. + // For buffers or textures that never use slices, this list will only be one element long at most. + int32_t previous_write_list_index = -1; + int32_t write_list_index = search_tracker->write_command_or_list_index; + while (write_list_index >= 0) { + const RecordedWriteListNode &write_list_node = write_list_nodes[write_list_index]; + if (!resource_has_parent || resource_tracker_rect.intersects(write_list_node.subresources)) { + if (write_list_node.command_index == p_command_index) { + ERR_FAIL_COND_MSG(!resource_has_parent, "Command can't have itself as a dependency."); + } else { + // Command is dependent on this command. Add this command to the adjacency list of the write command. + _add_adjacent_command(write_list_node.command_index, p_command_index, r_command); + + if (resource_has_parent && write_usage && resource_tracker_rect.encloses(write_list_node.subresources)) { + // Eliminate redundant writes from the list. + if (previous_write_list_index >= 0) { + RecordedWriteListNode &previous_list_node = write_list_nodes[previous_write_list_index]; + previous_list_node.next_list_index = write_list_node.next_list_index; + } else { + search_tracker->write_command_or_list_index = write_list_node.next_list_index; + } + + write_list_index = write_list_node.next_list_index; + continue; + } + } + } + + previous_write_list_index = write_list_index; + write_list_index = write_list_node.next_list_index; + } + } else { + // The index is just the latest command index that wrote to the resource. + if (search_tracker->write_command_or_list_index == p_command_index) { + ERR_FAIL_MSG("Command can't have itself as a dependency."); + } else { + _add_adjacent_command(search_tracker->write_command_or_list_index, p_command_index, r_command); + } + } + } + + if (write_usage) { + if (resource_has_parent) { + if (!search_tracker->write_command_list_enabled && search_tracker->write_command_or_list_index >= 0) { + // Write command list was not being used but there was a write command recorded. Add a new node with the entire parent resource's subresources and the recorded command index to the list. + const RDD::TextureSubresourceRange &tracker_subresources = search_tracker->texture_subresources; + Rect2i tracker_rect(tracker_subresources.base_mipmap, tracker_subresources.base_layer, tracker_subresources.mipmap_count, tracker_subresources.layer_count); + search_tracker->write_command_or_list_index = _add_to_write_list(search_tracker->write_command_or_list_index, tracker_rect, -1); + } + + search_tracker->write_command_or_list_index = _add_to_write_list(p_command_index, resource_tracker_rect, search_tracker->write_command_or_list_index); + search_tracker->write_command_list_enabled = true; + } else { + search_tracker->write_command_or_list_index = p_command_index; + search_tracker->write_command_list_enabled = false; + } + + // We add this command to the adjacency list of all commands that were reading from this resource. We clear the list in the process. + int32_t previous_command_list_index = -1; + int32_t read_command_list_index = search_tracker->read_command_list_index; + while (read_command_list_index >= 0) { + const RecordedCommandListNode &command_list_node = command_list_nodes[read_command_list_index]; + if (command_list_node.command_index == p_command_index) { + if (!resource_has_parent) { + // Slices are allowed to be in different usages in the same command as they are guaranteed to have no overlap in the same command. + ERR_FAIL_MSG("Command can't have itself as a dependency."); + } else { + // Advance to the next element. + read_command_list_index = command_list_node.next_list_index; + previous_command_list_index = read_command_list_index; + } + } else { + if (previous_command_list_index >= 0) { + // Erase this element and connect the previous one to the next element. + command_list_nodes[previous_command_list_index].next_list_index = command_list_node.next_list_index; + read_command_list_index = command_list_node.next_list_index; + previous_command_list_index = read_command_list_index; + } else { + // Erase this element from the head of the list. + DEV_ASSERT(search_tracker->read_command_list_index == read_command_list_index); + read_command_list_index = command_list_node.next_list_index; + search_tracker->read_command_list_index = read_command_list_index; + } + + // Add this command to the adjacency list of each command that was reading this resource. + _add_adjacent_command(command_list_node.command_index, p_command_index, r_command); + } + } + } else { + // We add a read dependency to the tracker to indicate this command reads from the resource. + search_tracker->read_command_list_index = _add_to_command_list(p_command_index, search_tracker->read_command_list_index); + } + } +} + +void RenderingDeviceGraph::_add_texture_barrier_to_command(RDD::TextureID p_texture_id, BitField p_src_access, BitField p_dst_access, ResourceUsage p_prev_usage, ResourceUsage p_next_usage, RDD::TextureSubresourceRange p_subresources, LocalVector &r_barrier_vector, int32_t &r_barrier_index, int32_t &r_barrier_count) { + if (!driver_honors_barriers) { + return; + } + + if (r_barrier_index < 0) { + r_barrier_index = r_barrier_vector.size(); + } + + RDD::TextureBarrier texture_barrier; + texture_barrier.texture = p_texture_id; + texture_barrier.src_access = p_src_access; + texture_barrier.dst_access = p_dst_access; + texture_barrier.prev_layout = _usage_to_image_layout(p_prev_usage); + texture_barrier.next_layout = _usage_to_image_layout(p_next_usage); + texture_barrier.subresources = p_subresources; + r_barrier_vector.push_back(texture_barrier); + r_barrier_count++; +} + +#if USE_BUFFER_BARRIERS +void RenderingDeviceGraph::_add_buffer_barrier_to_command(RDD::BufferID p_buffer_id, BitField p_src_access, BitField p_dst_access, int32_t &r_barrier_index, int32_t &r_barrier_count) { + if (!driver_honors_barriers) { + return; + } + + if (r_barrier_index < 0) { + r_barrier_index = command_buffer_barriers.size(); + } + + RDD::BufferBarrier buffer_barrier; + buffer_barrier.buffer = p_buffer_id; + buffer_barrier.src_access = p_src_access; + buffer_barrier.dst_access = p_dst_access; + buffer_barrier.offset = 0; + buffer_barrier.size = RDD::BUFFER_WHOLE_SIZE; + command_buffer_barriers.push_back(buffer_barrier); + r_barrier_count++; +} +#endif + +void RenderingDeviceGraph::_run_compute_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { + uint32_t instruction_data_cursor = 0; + while (instruction_data_cursor < p_instruction_data_size) { + DEV_ASSERT((instruction_data_cursor + sizeof(ComputeListInstruction)) <= p_instruction_data_size); + + const ComputeListInstruction *instruction = reinterpret_cast(&p_instruction_data[instruction_data_cursor]); + switch (instruction->type) { + case ComputeListInstruction::TYPE_BIND_PIPELINE: { + const ComputeListBindPipelineInstruction *bind_pipeline_instruction = reinterpret_cast(instruction); + driver->command_bind_compute_pipeline(p_command_buffer, bind_pipeline_instruction->pipeline); + instruction_data_cursor += sizeof(ComputeListBindPipelineInstruction); + } break; + case ComputeListInstruction::TYPE_BIND_UNIFORM_SET: { + const ComputeListBindUniformSetInstruction *bind_uniform_set_instruction = reinterpret_cast(instruction); + driver->command_bind_compute_uniform_set(p_command_buffer, bind_uniform_set_instruction->uniform_set, bind_uniform_set_instruction->shader, bind_uniform_set_instruction->set_index); + instruction_data_cursor += sizeof(ComputeListBindUniformSetInstruction); + } break; + case ComputeListInstruction::TYPE_DISPATCH: { + const ComputeListDispatchInstruction *dispatch_instruction = reinterpret_cast(instruction); + driver->command_compute_dispatch(p_command_buffer, dispatch_instruction->x_groups, dispatch_instruction->y_groups, dispatch_instruction->z_groups); + instruction_data_cursor += sizeof(ComputeListDispatchInstruction); + } break; + case ComputeListInstruction::TYPE_DISPATCH_INDIRECT: { + const ComputeListDispatchIndirectInstruction *dispatch_indirect_instruction = reinterpret_cast(instruction); + driver->command_compute_dispatch_indirect(p_command_buffer, dispatch_indirect_instruction->buffer, dispatch_indirect_instruction->offset); + instruction_data_cursor += sizeof(ComputeListDispatchIndirectInstruction); + } break; + case ComputeListInstruction::TYPE_SET_PUSH_CONSTANT: { + const ComputeListSetPushConstantInstruction *set_push_constant_instruction = reinterpret_cast(instruction); + const VectorView push_constant_data_view(reinterpret_cast(set_push_constant_instruction->data()), set_push_constant_instruction->size / sizeof(uint32_t)); + driver->command_bind_push_constants(p_command_buffer, set_push_constant_instruction->shader, 0, push_constant_data_view); + instruction_data_cursor += sizeof(ComputeListSetPushConstantInstruction); + instruction_data_cursor += set_push_constant_instruction->size; + } break; + case ComputeListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: { + const ComputeListUniformSetPrepareForUseInstruction *uniform_set_prepare_for_use_instruction = reinterpret_cast(instruction); + driver->command_uniform_set_prepare_for_use(p_command_buffer, uniform_set_prepare_for_use_instruction->uniform_set, uniform_set_prepare_for_use_instruction->shader, uniform_set_prepare_for_use_instruction->set_index); + instruction_data_cursor += sizeof(ComputeListUniformSetPrepareForUseInstruction); + } break; + default: + DEV_ASSERT(false && "Unknown compute list instruction type."); + return; + } + } +} + +void RenderingDeviceGraph::_run_draw_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { + uint32_t instruction_data_cursor = 0; + while (instruction_data_cursor < p_instruction_data_size) { + DEV_ASSERT((instruction_data_cursor + sizeof(DrawListInstruction)) <= p_instruction_data_size); + + const DrawListInstruction *instruction = reinterpret_cast(&p_instruction_data[instruction_data_cursor]); + switch (instruction->type) { + case DrawListInstruction::TYPE_BIND_INDEX_BUFFER: { + const DrawListBindIndexBufferInstruction *bind_index_buffer_instruction = reinterpret_cast(instruction); + driver->command_render_bind_index_buffer(p_command_buffer, bind_index_buffer_instruction->buffer, bind_index_buffer_instruction->format, bind_index_buffer_instruction->offset); + instruction_data_cursor += sizeof(DrawListBindIndexBufferInstruction); + } break; + case DrawListInstruction::TYPE_BIND_PIPELINE: { + const DrawListBindPipelineInstruction *bind_pipeline_instruction = reinterpret_cast(instruction); + driver->command_bind_render_pipeline(p_command_buffer, bind_pipeline_instruction->pipeline); + instruction_data_cursor += sizeof(DrawListBindPipelineInstruction); + } break; + case DrawListInstruction::TYPE_BIND_UNIFORM_SET: { + const DrawListBindUniformSetInstruction *bind_uniform_set_instruction = reinterpret_cast(instruction); + driver->command_bind_render_uniform_set(p_command_buffer, bind_uniform_set_instruction->uniform_set, bind_uniform_set_instruction->shader, bind_uniform_set_instruction->set_index); + instruction_data_cursor += sizeof(DrawListBindUniformSetInstruction); + } break; + case DrawListInstruction::TYPE_BIND_VERTEX_BUFFERS: { + const DrawListBindVertexBuffersInstruction *bind_vertex_buffers_instruction = reinterpret_cast(instruction); + driver->command_render_bind_vertex_buffers(p_command_buffer, bind_vertex_buffers_instruction->vertex_buffers_count, bind_vertex_buffers_instruction->vertex_buffers(), bind_vertex_buffers_instruction->vertex_buffer_offsets()); + instruction_data_cursor += sizeof(DrawListBindVertexBuffersInstruction); + instruction_data_cursor += sizeof(RDD::BufferID) * bind_vertex_buffers_instruction->vertex_buffers_count; + instruction_data_cursor += sizeof(uint64_t) * bind_vertex_buffers_instruction->vertex_buffers_count; + } break; + case DrawListInstruction::TYPE_CLEAR_ATTACHMENTS: { + const DrawListClearAttachmentsInstruction *clear_attachments_instruction = reinterpret_cast(instruction); + const VectorView attachments_clear_view(clear_attachments_instruction->attachments_clear(), clear_attachments_instruction->attachments_clear_count); + const VectorView attachments_clear_rect_view(clear_attachments_instruction->attachments_clear_rect(), clear_attachments_instruction->attachments_clear_rect_count); + driver->command_render_clear_attachments(p_command_buffer, attachments_clear_view, attachments_clear_rect_view); + instruction_data_cursor += sizeof(DrawListClearAttachmentsInstruction); + instruction_data_cursor += sizeof(RDD::AttachmentClear) * clear_attachments_instruction->attachments_clear_count; + instruction_data_cursor += sizeof(Rect2i) * clear_attachments_instruction->attachments_clear_rect_count; + } break; + case DrawListInstruction::TYPE_DRAW: { + const DrawListDrawInstruction *draw_instruction = reinterpret_cast(instruction); + driver->command_render_draw(p_command_buffer, draw_instruction->vertex_count, draw_instruction->instance_count, 0, 0); + instruction_data_cursor += sizeof(DrawListDrawInstruction); + } break; + case DrawListInstruction::TYPE_DRAW_INDEXED: { + const DrawListDrawIndexedInstruction *draw_indexed_instruction = reinterpret_cast(instruction); + driver->command_render_draw_indexed(p_command_buffer, draw_indexed_instruction->index_count, draw_indexed_instruction->instance_count, draw_indexed_instruction->first_index, 0, 0); + instruction_data_cursor += sizeof(DrawListDrawIndexedInstruction); + } break; + case DrawListInstruction::TYPE_EXECUTE_COMMANDS: { + const DrawListExecuteCommandsInstruction *execute_commands_instruction = reinterpret_cast(instruction); + driver->command_buffer_execute_secondary(p_command_buffer, execute_commands_instruction->command_buffer); + instruction_data_cursor += sizeof(DrawListExecuteCommandsInstruction); + } break; + case DrawListInstruction::TYPE_NEXT_SUBPASS: { + const DrawListNextSubpassInstruction *next_subpass_instruction = reinterpret_cast(instruction); + driver->command_next_render_subpass(p_command_buffer, next_subpass_instruction->command_buffer_type); + instruction_data_cursor += sizeof(DrawListNextSubpassInstruction); + } break; + case DrawListInstruction::TYPE_SET_BLEND_CONSTANTS: { + const DrawListSetBlendConstantsInstruction *set_blend_constants_instruction = reinterpret_cast(instruction); + driver->command_render_set_blend_constants(p_command_buffer, set_blend_constants_instruction->color); + instruction_data_cursor += sizeof(DrawListSetBlendConstantsInstruction); + } break; + case DrawListInstruction::TYPE_SET_LINE_WIDTH: { + const DrawListSetLineWidthInstruction *set_line_width_instruction = reinterpret_cast(instruction); + driver->command_render_set_line_width(p_command_buffer, set_line_width_instruction->width); + instruction_data_cursor += sizeof(DrawListSetLineWidthInstruction); + } break; + case DrawListInstruction::TYPE_SET_PUSH_CONSTANT: { + const DrawListSetPushConstantInstruction *set_push_constant_instruction = reinterpret_cast(instruction); + const VectorView push_constant_data_view(reinterpret_cast(set_push_constant_instruction->data()), set_push_constant_instruction->size / sizeof(uint32_t)); + driver->command_bind_push_constants(p_command_buffer, set_push_constant_instruction->shader, 0, push_constant_data_view); + instruction_data_cursor += sizeof(DrawListSetPushConstantInstruction); + instruction_data_cursor += set_push_constant_instruction->size; + } break; + case DrawListInstruction::TYPE_SET_SCISSOR: { + const DrawListSetScissorInstruction *set_scissor_instruction = reinterpret_cast(instruction); + driver->command_render_set_scissor(p_command_buffer, set_scissor_instruction->rect); + instruction_data_cursor += sizeof(DrawListSetScissorInstruction); + } break; + case DrawListInstruction::TYPE_SET_VIEWPORT: { + const DrawListSetViewportInstruction *set_viewport_instruction = reinterpret_cast(instruction); + driver->command_render_set_viewport(p_command_buffer, set_viewport_instruction->rect); + instruction_data_cursor += sizeof(DrawListSetViewportInstruction); + } break; + case DrawListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: { + const DrawListUniformSetPrepareForUseInstruction *uniform_set_prepare_for_use_instruction = reinterpret_cast(instruction); + driver->command_uniform_set_prepare_for_use(p_command_buffer, uniform_set_prepare_for_use_instruction->uniform_set, uniform_set_prepare_for_use_instruction->shader, uniform_set_prepare_for_use_instruction->set_index); + instruction_data_cursor += sizeof(DrawListUniformSetPrepareForUseInstruction); + } break; + default: + DEV_ASSERT(false && "Unknown draw list instruction type."); + return; + } + } +} + +void RenderingDeviceGraph::_run_secondary_command_buffer_task(const SecondaryCommandBuffer *p_secondary) { + driver->command_buffer_begin_secondary(p_secondary->command_buffer, p_secondary->render_pass, 0, p_secondary->framebuffer); + _run_draw_list_command(p_secondary->command_buffer, p_secondary->instruction_data.ptr(), p_secondary->instruction_data.size()); + driver->command_buffer_end(p_secondary->command_buffer); +} + +void RenderingDeviceGraph::_wait_for_secondary_command_buffer_tasks() { + for (uint32_t i = 0; i < frames[frame].secondary_command_buffers_used; i++) { + WorkerThreadPool::TaskID &task = frames[frame].secondary_command_buffers[i].task; + if (task != WorkerThreadPool::INVALID_TASK_ID) { + WorkerThreadPool::get_singleton()->wait_for_task_completion(task); + task = WorkerThreadPool::INVALID_TASK_ID; + } + } +} + +void RenderingDeviceGraph::_run_render_commands(RDD::CommandBufferID p_command_buffer, int32_t p_level, const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, int32_t &r_current_label_index, int32_t &r_current_label_level) { + for (uint32_t i = 0; i < p_sorted_commands_count; i++) { + const uint32_t command_index = p_sorted_commands[i].index; + const uint32_t command_data_offset = command_data_offsets[command_index]; + const RecordedCommand *command = reinterpret_cast(&command_data[command_data_offset]); + _run_label_command_change(p_command_buffer, command->label_index, p_level, false, true, &p_sorted_commands[i], p_sorted_commands_count - i, r_current_label_index, r_current_label_level); + + switch (command->type) { + case RecordedCommand::TYPE_BUFFER_CLEAR: { + const RecordedBufferClearCommand *buffer_clear_command = reinterpret_cast(command); + driver->command_clear_buffer(p_command_buffer, buffer_clear_command->buffer, buffer_clear_command->offset, buffer_clear_command->size); + } break; + case RecordedCommand::TYPE_BUFFER_COPY: { + const RecordedBufferCopyCommand *buffer_copy_command = reinterpret_cast(command); + driver->command_copy_buffer(p_command_buffer, buffer_copy_command->source, buffer_copy_command->destination, buffer_copy_command->region); + } break; + case RecordedCommand::TYPE_BUFFER_GET_DATA: { + const RecordedBufferGetDataCommand *buffer_get_data_command = reinterpret_cast(command); + driver->command_copy_buffer(p_command_buffer, buffer_get_data_command->source, buffer_get_data_command->destination, buffer_get_data_command->region); + } break; + case RecordedCommand::TYPE_BUFFER_UPDATE: { + const RecordedBufferUpdateCommand *buffer_update_command = reinterpret_cast(command); + const RecordedBufferCopy *command_buffer_copies = buffer_update_command->buffer_copies(); + for (uint32_t j = 0; j < buffer_update_command->buffer_copies_count; j++) { + driver->command_copy_buffer(p_command_buffer, command_buffer_copies[j].source, buffer_update_command->destination, command_buffer_copies[j].region); + } + } break; + case RecordedCommand::TYPE_COMPUTE_LIST: { + const RecordedComputeListCommand *compute_list_command = reinterpret_cast(command); + _run_compute_list_command(p_command_buffer, compute_list_command->instruction_data(), compute_list_command->instruction_data_size); + } break; + case RecordedCommand::TYPE_DRAW_LIST: { + const RecordedDrawListCommand *draw_list_command = reinterpret_cast(command); + const VectorView clear_values(draw_list_command->clear_values(), draw_list_command->clear_values_count); + driver->command_begin_render_pass(p_command_buffer, draw_list_command->render_pass, draw_list_command->framebuffer, draw_list_command->command_buffer_type, draw_list_command->region, clear_values); + _run_draw_list_command(p_command_buffer, draw_list_command->instruction_data(), draw_list_command->instruction_data_size); + driver->command_end_render_pass(p_command_buffer); + } break; + case RecordedCommand::TYPE_TEXTURE_CLEAR: { + const RecordedTextureClearCommand *texture_clear_command = reinterpret_cast(command); + driver->command_clear_color_texture(p_command_buffer, texture_clear_command->texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_clear_command->color, texture_clear_command->range); + } break; + case RecordedCommand::TYPE_TEXTURE_COPY: { + const RecordedTextureCopyCommand *texture_copy_command = reinterpret_cast(command); + driver->command_copy_texture(p_command_buffer, texture_copy_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_copy_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_copy_command->region); + } break; + case RecordedCommand::TYPE_TEXTURE_GET_DATA: { + const RecordedTextureGetDataCommand *texture_get_data_command = reinterpret_cast(command); + const VectorView command_buffer_texture_copy_regions_view(texture_get_data_command->buffer_texture_copy_regions(), texture_get_data_command->buffer_texture_copy_regions_count); + driver->command_copy_texture_to_buffer(p_command_buffer, texture_get_data_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_get_data_command->to_buffer, command_buffer_texture_copy_regions_view); + } break; + case RecordedCommand::TYPE_TEXTURE_RESOLVE: { + const RecordedTextureResolveCommand *texture_resolve_command = reinterpret_cast(command); + driver->command_resolve_texture(p_command_buffer, texture_resolve_command->from_texture, RDD::TEXTURE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture_resolve_command->src_layer, texture_resolve_command->src_mipmap, texture_resolve_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, texture_resolve_command->dst_layer, texture_resolve_command->dst_mipmap); + } break; + case RecordedCommand::TYPE_TEXTURE_UPDATE: { + const RecordedTextureUpdateCommand *texture_update_command = reinterpret_cast(command); + const RecordedBufferToTextureCopy *command_buffer_to_texture_copies = texture_update_command->buffer_to_texture_copies(); + for (uint32_t j = 0; j < texture_update_command->buffer_to_texture_copies_count; j++) { + driver->command_copy_buffer_to_texture(p_command_buffer, command_buffer_to_texture_copies[j].from_buffer, texture_update_command->to_texture, RDD::TEXTURE_LAYOUT_TRANSFER_DST_OPTIMAL, command_buffer_to_texture_copies[j].region); + } + } break; + case RecordedCommand::TYPE_CAPTURE_TIMESTAMP: { + const RecordedCaptureTimestampCommand *texture_capture_timestamp_command = reinterpret_cast(command); + driver->command_timestamp_write(p_command_buffer, texture_capture_timestamp_command->pool, texture_capture_timestamp_command->index); + } break; + default: { + DEV_ASSERT(false && "Unknown recorded command type."); + return; + } + } + } +} + +void RenderingDeviceGraph::_run_label_command_change(RDD::CommandBufferID p_command_buffer, int32_t p_new_label_index, int32_t p_new_level, bool p_ignore_previous_value, bool p_use_label_for_empty, const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, int32_t &r_current_label_index, int32_t &r_current_label_level) { + if (command_label_count == 0) { + // Ignore any label operations if no labels were pushed. + return; + } + + if (p_ignore_previous_value || p_new_label_index != r_current_label_index || p_new_level != r_current_label_level) { + if (!p_ignore_previous_value && (p_use_label_for_empty || r_current_label_index >= 0)) { + // End the current label. + driver->command_end_label(p_command_buffer); + } + + String label_name; + Color label_color; + if (p_new_label_index >= 0) { + const char *label_chars = &command_label_chars[command_label_offsets[p_new_label_index]]; + label_name.parse_utf8(label_chars); + label_color = command_label_colors[p_new_label_index]; + } else if (p_use_label_for_empty) { + label_name = "Command graph"; + label_color = Color(1, 1, 1, 1); + } + + // Add the level to the name. + label_name += " (L" + itos(p_new_level) + ")"; + + if (p_sorted_commands != nullptr && p_sorted_commands_count > 0) { + // Analyze the commands in the level that have the same label to detect what type of operations are performed. + bool copy_commands = false; + bool compute_commands = false; + bool draw_commands = false; + for (uint32_t i = 0; i < p_sorted_commands_count; i++) { + const uint32_t command_index = p_sorted_commands[i].index; + const uint32_t command_data_offset = command_data_offsets[command_index]; + const RecordedCommand *command = reinterpret_cast(&command_data[command_data_offset]); + if (command->label_index != p_new_label_index) { + break; + } + + switch (command->type) { + case RecordedCommand::TYPE_BUFFER_CLEAR: + case RecordedCommand::TYPE_BUFFER_COPY: + case RecordedCommand::TYPE_BUFFER_GET_DATA: + case RecordedCommand::TYPE_BUFFER_UPDATE: + case RecordedCommand::TYPE_TEXTURE_CLEAR: + case RecordedCommand::TYPE_TEXTURE_COPY: + case RecordedCommand::TYPE_TEXTURE_GET_DATA: + case RecordedCommand::TYPE_TEXTURE_RESOLVE: + case RecordedCommand::TYPE_TEXTURE_UPDATE: { + copy_commands = true; + } break; + case RecordedCommand::TYPE_COMPUTE_LIST: { + compute_commands = true; + } break; + case RecordedCommand::TYPE_DRAW_LIST: { + draw_commands = true; + } break; + default: { + // Ignore command. + } break; + } + + if (copy_commands && compute_commands && draw_commands) { + // There's no more command types to find. + break; + } + } + + if (copy_commands || compute_commands || draw_commands) { + // Add the operations to the name. + bool plus_after_copy = copy_commands && (compute_commands || draw_commands); + bool plus_after_compute = compute_commands && draw_commands; + label_name += " ("; + label_name += copy_commands ? "Copy" : ""; + label_name += plus_after_copy ? "+" : ""; + label_name += compute_commands ? "Compute" : ""; + label_name += plus_after_compute ? "+" : ""; + label_name += draw_commands ? "Draw" : ""; + label_name += ")"; + } + } + + // Start the new label. + CharString label_name_utf8 = label_name.utf8(); + driver->command_begin_label(p_command_buffer, label_name_utf8.get_data(), label_color); + + r_current_label_index = p_new_label_index; + r_current_label_level = p_new_level; + } +} + +void RenderingDeviceGraph::_boost_priority_for_render_commands(RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, uint32_t &r_boosted_priority) { + if (p_sorted_commands_count == 0) { + return; + } + + const uint32_t boosted_priority_value = 0; + if (r_boosted_priority > 0) { + bool perform_sort = false; + for (uint32_t j = 0; j < p_sorted_commands_count; j++) { + if (p_sorted_commands[j].priority == r_boosted_priority) { + p_sorted_commands[j].priority = boosted_priority_value; + perform_sort = true; + } + } + + if (perform_sort) { + SortArray command_sorter; + command_sorter.sort(p_sorted_commands, p_sorted_commands_count); + } + } + + if (p_sorted_commands[p_sorted_commands_count - 1].priority != boosted_priority_value) { + r_boosted_priority = p_sorted_commands[p_sorted_commands_count - 1].priority; + } +} + +void RenderingDeviceGraph::_group_barriers_for_render_commands(RDD::CommandBufferID p_command_buffer, const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, bool p_full_memory_barrier) { + if (!driver_honors_barriers) { + return; + } + + barrier_group.clear(); + barrier_group.src_stages = RDD::PIPELINE_STAGE_TOP_OF_PIPE_BIT; + barrier_group.dst_stages = RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + + for (uint32_t i = 0; i < p_sorted_commands_count; i++) { + const uint32_t command_index = p_sorted_commands[i].index; + const uint32_t command_data_offset = command_data_offsets[command_index]; + const RecordedCommand *command = reinterpret_cast(&command_data[command_data_offset]); + + // Merge command's stage bits with the barrier group. + barrier_group.src_stages = barrier_group.src_stages | command->src_stages; + barrier_group.dst_stages = barrier_group.dst_stages | command->dst_stages; + + // Merge command's memory barrier bits with the barrier group. + barrier_group.memory_barrier.src_access = barrier_group.memory_barrier.src_access | command->memory_barrier.src_access; + barrier_group.memory_barrier.dst_access = barrier_group.memory_barrier.dst_access | command->memory_barrier.dst_access; + + // Gather texture barriers. + for (int32_t j = 0; j < command->normalization_barrier_count; j++) { + const RDD::TextureBarrier &recorded_barrier = command_normalization_barriers[command->normalization_barrier_index + j]; + barrier_group.normalization_barriers.push_back(recorded_barrier); + } + + for (int32_t j = 0; j < command->transition_barrier_count; j++) { + const RDD::TextureBarrier &recorded_barrier = command_transition_barriers[command->transition_barrier_index + j]; + barrier_group.transition_barriers.push_back(recorded_barrier); + } + +#if USE_BUFFER_BARRIERS + // Gather buffer barriers. + for (int32_t j = 0; j < command->buffer_barrier_count; j++) { + const RDD::BufferBarrier &recorded_barrier = command_buffer_barriers[command->buffer_barrier_index + j]; + barrier_group.buffer_barriers.push_back(recorded_barrier); + } +#endif + } + + if (p_full_memory_barrier) { + barrier_group.src_stages = RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT; + barrier_group.dst_stages = RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT; + barrier_group.memory_barrier.src_access = RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT; + barrier_group.memory_barrier.dst_access = RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT; + } + + const bool is_memory_barrier_empty = barrier_group.memory_barrier.src_access.is_empty() && barrier_group.memory_barrier.dst_access.is_empty(); + const bool are_texture_barriers_empty = barrier_group.normalization_barriers.is_empty() && barrier_group.transition_barriers.is_empty(); +#if USE_BUFFER_BARRIERS + const bool are_buffer_barriers_empty = barrier_group.buffer_barriers.is_empty(); +#else + const bool are_buffer_barriers_empty = true; +#endif + if (is_memory_barrier_empty && are_texture_barriers_empty && are_buffer_barriers_empty) { + // Commands don't require synchronization. + return; + } + + const VectorView memory_barriers = !is_memory_barrier_empty ? barrier_group.memory_barrier : VectorView(); + const VectorView texture_barriers = barrier_group.normalization_barriers.is_empty() ? barrier_group.transition_barriers : barrier_group.normalization_barriers; +#if USE_BUFFER_BARRIERS + const VectorView buffer_barriers = !are_buffer_barriers_empty ? barrier_group.buffer_barriers : VectorView(); +#else + const VectorView buffer_barriers = VectorView(); +#endif + + driver->command_pipeline_barrier(p_command_buffer, barrier_group.src_stages, barrier_group.dst_stages, memory_barriers, buffer_barriers, texture_barriers); + + bool separate_texture_barriers = !barrier_group.normalization_barriers.is_empty() && !barrier_group.transition_barriers.is_empty(); + if (separate_texture_barriers) { + driver->command_pipeline_barrier(p_command_buffer, barrier_group.src_stages, barrier_group.dst_stages, VectorView(), VectorView(), barrier_group.transition_barriers); + } +} + +void RenderingDeviceGraph::_print_render_commands(const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count) { + for (uint32_t i = 0; i < p_sorted_commands_count; i++) { + const uint32_t command_index = p_sorted_commands[i].index; + const uint32_t command_level = p_sorted_commands[i].level; + const uint32_t command_data_offset = command_data_offsets[command_index]; + const RecordedCommand *command = reinterpret_cast(&command_data[command_data_offset]); + switch (command->type) { + case RecordedCommand::TYPE_BUFFER_CLEAR: { + const RecordedBufferClearCommand *buffer_clear_command = reinterpret_cast(command); + print_line(command_index, "LEVEL", command_level, "BUFFER CLEAR DESTINATION", itos(buffer_clear_command->buffer.id)); + } break; + case RecordedCommand::TYPE_BUFFER_COPY: { + const RecordedBufferCopyCommand *buffer_copy_command = reinterpret_cast(command); + print_line(command_index, "LEVEL", command_level, "BUFFER COPY SOURCE", itos(buffer_copy_command->source.id), "DESTINATION", itos(buffer_copy_command->destination.id)); + } break; + case RecordedCommand::TYPE_BUFFER_GET_DATA: { + const RecordedBufferGetDataCommand *buffer_get_data_command = reinterpret_cast(command); + print_line(command_index, "LEVEL", command_level, "BUFFER GET DATA DESTINATION", itos(buffer_get_data_command->destination.id)); + } break; + case RecordedCommand::TYPE_BUFFER_UPDATE: { + const RecordedBufferUpdateCommand *buffer_update_command = reinterpret_cast(command); + print_line(command_index, "LEVEL", command_level, "BUFFER UPDATE DESTINATION", itos(buffer_update_command->destination.id), "COPIES", buffer_update_command->buffer_copies_count); + } break; + case RecordedCommand::TYPE_COMPUTE_LIST: { + const RecordedComputeListCommand *compute_list_command = reinterpret_cast(command); + print_line(command_index, "LEVEL", command_level, "COMPUTE LIST SIZE", compute_list_command->instruction_data_size); + } break; + case RecordedCommand::TYPE_DRAW_LIST: { + const RecordedDrawListCommand *draw_list_command = reinterpret_cast(command); + print_line(command_index, "LEVEL", command_level, "DRAW LIST SIZE", draw_list_command->instruction_data_size); + } break; + case RecordedCommand::TYPE_TEXTURE_CLEAR: { + const RecordedTextureClearCommand *texture_clear_command = reinterpret_cast(command); + print_line(command_index, "LEVEL", command_level, "TEXTURE CLEAR", itos(texture_clear_command->texture.id), "COLOR", texture_clear_command->color); + } break; + case RecordedCommand::TYPE_TEXTURE_COPY: { + const RecordedTextureCopyCommand *texture_copy_command = reinterpret_cast(command); + print_line(command_index, "LEVEL", command_level, "TEXTURE COPY FROM", itos(texture_copy_command->from_texture.id), "TO", itos(texture_copy_command->to_texture.id)); + } break; + case RecordedCommand::TYPE_TEXTURE_GET_DATA: { + print_line(command_index, "LEVEL", command_level, "TEXTURE GET DATA"); + } break; + case RecordedCommand::TYPE_TEXTURE_RESOLVE: { + const RecordedTextureResolveCommand *texture_resolve_command = reinterpret_cast(command); + print_line(command_index, "LEVEL", command_level, "TEXTURE RESOLVE FROM", itos(texture_resolve_command->from_texture.id), "TO", itos(texture_resolve_command->to_texture.id)); + } break; + case RecordedCommand::TYPE_TEXTURE_UPDATE: { + const RecordedTextureUpdateCommand *texture_update_command = reinterpret_cast(command); + print_line(command_index, "LEVEL", command_level, "TEXTURE UPDATE TO", itos(texture_update_command->to_texture.id)); + } break; + case RecordedCommand::TYPE_CAPTURE_TIMESTAMP: { + const RecordedCaptureTimestampCommand *texture_capture_timestamp_command = reinterpret_cast(command); + print_line(command_index, "LEVEL", command_level, "CAPTURE TIMESTAMP POOL", itos(texture_capture_timestamp_command->pool.id), "INDEX", texture_capture_timestamp_command->index); + } break; + default: + DEV_ASSERT(false && "Unknown recorded command type."); + return; + } + } +} + +void RenderingDeviceGraph::_print_draw_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { + uint32_t instruction_data_cursor = 0; + while (instruction_data_cursor < p_instruction_data_size) { + DEV_ASSERT((instruction_data_cursor + sizeof(DrawListInstruction)) <= p_instruction_data_size); + + const DrawListInstruction *instruction = reinterpret_cast(&p_instruction_data[instruction_data_cursor]); + switch (instruction->type) { + case DrawListInstruction::TYPE_BIND_INDEX_BUFFER: { + const DrawListBindIndexBufferInstruction *bind_index_buffer_instruction = reinterpret_cast(instruction); + print_line("\tBIND INDEX BUFFER ID", itos(bind_index_buffer_instruction->buffer.id), "FORMAT", bind_index_buffer_instruction->format, "OFFSET", bind_index_buffer_instruction->offset); + instruction_data_cursor += sizeof(DrawListBindIndexBufferInstruction); + } break; + case DrawListInstruction::TYPE_BIND_PIPELINE: { + const DrawListBindPipelineInstruction *bind_pipeline_instruction = reinterpret_cast(instruction); + print_line("\tBIND PIPELINE ID", itos(bind_pipeline_instruction->pipeline.id)); + instruction_data_cursor += sizeof(DrawListBindPipelineInstruction); + } break; + case DrawListInstruction::TYPE_BIND_UNIFORM_SET: { + const DrawListBindUniformSetInstruction *bind_uniform_set_instruction = reinterpret_cast(instruction); + print_line("\tBIND UNIFORM SET ID", itos(bind_uniform_set_instruction->uniform_set.id), "SET INDEX", bind_uniform_set_instruction->set_index); + instruction_data_cursor += sizeof(DrawListBindUniformSetInstruction); + } break; + case DrawListInstruction::TYPE_BIND_VERTEX_BUFFERS: { + const DrawListBindVertexBuffersInstruction *bind_vertex_buffers_instruction = reinterpret_cast(instruction); + print_line("\tBIND VERTEX BUFFERS COUNT", bind_vertex_buffers_instruction->vertex_buffers_count); + instruction_data_cursor += sizeof(DrawListBindVertexBuffersInstruction); + instruction_data_cursor += sizeof(RDD::BufferID) * bind_vertex_buffers_instruction->vertex_buffers_count; + instruction_data_cursor += sizeof(uint64_t) * bind_vertex_buffers_instruction->vertex_buffers_count; + } break; + case DrawListInstruction::TYPE_CLEAR_ATTACHMENTS: { + const DrawListClearAttachmentsInstruction *clear_attachments_instruction = reinterpret_cast(instruction); + print_line("\tATTACHMENTS CLEAR COUNT", clear_attachments_instruction->attachments_clear_count, "RECT COUNT", clear_attachments_instruction->attachments_clear_rect_count); + instruction_data_cursor += sizeof(DrawListClearAttachmentsInstruction); + instruction_data_cursor += sizeof(RDD::AttachmentClear) * clear_attachments_instruction->attachments_clear_count; + instruction_data_cursor += sizeof(Rect2i) * clear_attachments_instruction->attachments_clear_rect_count; + } break; + case DrawListInstruction::TYPE_DRAW: { + const DrawListDrawInstruction *draw_instruction = reinterpret_cast(instruction); + print_line("\tDRAW VERTICES", draw_instruction->vertex_count, "INSTANCES", draw_instruction->instance_count); + instruction_data_cursor += sizeof(DrawListDrawInstruction); + } break; + case DrawListInstruction::TYPE_DRAW_INDEXED: { + const DrawListDrawIndexedInstruction *draw_indexed_instruction = reinterpret_cast(instruction); + print_line("\tDRAW INDICES", draw_indexed_instruction->index_count, "INSTANCES", draw_indexed_instruction->instance_count, "FIRST INDEX", draw_indexed_instruction->first_index); + instruction_data_cursor += sizeof(DrawListDrawIndexedInstruction); + } break; + case DrawListInstruction::TYPE_EXECUTE_COMMANDS: { + print_line("\tEXECUTE COMMANDS"); + instruction_data_cursor += sizeof(DrawListExecuteCommandsInstruction); + } break; + case DrawListInstruction::TYPE_NEXT_SUBPASS: { + print_line("\tNEXT SUBPASS"); + instruction_data_cursor += sizeof(DrawListNextSubpassInstruction); + } break; + case DrawListInstruction::TYPE_SET_BLEND_CONSTANTS: { + const DrawListSetBlendConstantsInstruction *set_blend_constants_instruction = reinterpret_cast(instruction); + print_line("\tSET BLEND CONSTANTS COLOR", set_blend_constants_instruction->color); + instruction_data_cursor += sizeof(DrawListSetBlendConstantsInstruction); + } break; + case DrawListInstruction::TYPE_SET_LINE_WIDTH: { + const DrawListSetLineWidthInstruction *set_line_width_instruction = reinterpret_cast(instruction); + print_line("\tSET LINE WIDTH", set_line_width_instruction->width); + instruction_data_cursor += sizeof(DrawListSetLineWidthInstruction); + } break; + case DrawListInstruction::TYPE_SET_PUSH_CONSTANT: { + const DrawListSetPushConstantInstruction *set_push_constant_instruction = reinterpret_cast(instruction); + print_line("\tSET PUSH CONSTANT SIZE", set_push_constant_instruction->size); + instruction_data_cursor += sizeof(DrawListSetPushConstantInstruction); + instruction_data_cursor += set_push_constant_instruction->size; + } break; + case DrawListInstruction::TYPE_SET_SCISSOR: { + const DrawListSetScissorInstruction *set_scissor_instruction = reinterpret_cast(instruction); + print_line("\tSET SCISSOR", set_scissor_instruction->rect); + instruction_data_cursor += sizeof(DrawListSetScissorInstruction); + } break; + case DrawListInstruction::TYPE_SET_VIEWPORT: { + const DrawListSetViewportInstruction *set_viewport_instruction = reinterpret_cast(instruction); + print_line("\tSET VIEWPORT", set_viewport_instruction->rect); + instruction_data_cursor += sizeof(DrawListSetViewportInstruction); + } break; + case DrawListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: { + const DrawListUniformSetPrepareForUseInstruction *uniform_set_prepare_for_use_instruction = reinterpret_cast(instruction); + print_line("\tUNIFORM SET PREPARE FOR USE ID", itos(uniform_set_prepare_for_use_instruction->uniform_set.id), "SHADER ID", itos(uniform_set_prepare_for_use_instruction->shader.id), "INDEX", uniform_set_prepare_for_use_instruction->set_index); + instruction_data_cursor += sizeof(DrawListUniformSetPrepareForUseInstruction); + } break; + default: + DEV_ASSERT(false && "Unknown draw list instruction type."); + return; + } + } +} + +void RenderingDeviceGraph::_print_compute_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { + uint32_t instruction_data_cursor = 0; + while (instruction_data_cursor < p_instruction_data_size) { + DEV_ASSERT((instruction_data_cursor + sizeof(ComputeListInstruction)) <= p_instruction_data_size); + + const ComputeListInstruction *instruction = reinterpret_cast(&p_instruction_data[instruction_data_cursor]); + switch (instruction->type) { + case ComputeListInstruction::TYPE_BIND_PIPELINE: { + const ComputeListBindPipelineInstruction *bind_pipeline_instruction = reinterpret_cast(instruction); + print_line("\tBIND PIPELINE ID", itos(bind_pipeline_instruction->pipeline.id)); + instruction_data_cursor += sizeof(ComputeListBindPipelineInstruction); + } break; + case ComputeListInstruction::TYPE_BIND_UNIFORM_SET: { + const ComputeListBindUniformSetInstruction *bind_uniform_set_instruction = reinterpret_cast(instruction); + print_line("\tBIND UNIFORM SET ID", itos(bind_uniform_set_instruction->uniform_set.id), "SHADER ID", itos(bind_uniform_set_instruction->shader.id)); + instruction_data_cursor += sizeof(ComputeListBindUniformSetInstruction); + } break; + case ComputeListInstruction::TYPE_DISPATCH: { + const ComputeListDispatchInstruction *dispatch_instruction = reinterpret_cast(instruction); + print_line("\tDISPATCH", dispatch_instruction->x_groups, dispatch_instruction->y_groups, dispatch_instruction->z_groups); + instruction_data_cursor += sizeof(ComputeListDispatchInstruction); + } break; + case ComputeListInstruction::TYPE_DISPATCH_INDIRECT: { + const ComputeListDispatchIndirectInstruction *dispatch_indirect_instruction = reinterpret_cast(instruction); + print_line("\tDISPATCH INDIRECT BUFFER ID", itos(dispatch_indirect_instruction->buffer.id), "OFFSET", dispatch_indirect_instruction->offset); + instruction_data_cursor += sizeof(ComputeListDispatchIndirectInstruction); + } break; + case ComputeListInstruction::TYPE_SET_PUSH_CONSTANT: { + const ComputeListSetPushConstantInstruction *set_push_constant_instruction = reinterpret_cast(instruction); + print_line("\tSET PUSH CONSTANT SIZE", set_push_constant_instruction->size); + instruction_data_cursor += sizeof(ComputeListSetPushConstantInstruction); + instruction_data_cursor += set_push_constant_instruction->size; + } break; + case ComputeListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: { + const ComputeListUniformSetPrepareForUseInstruction *uniform_set_prepare_for_use_instruction = reinterpret_cast(instruction); + print_line("\tUNIFORM SET PREPARE FOR USE ID", itos(uniform_set_prepare_for_use_instruction->uniform_set.id), "SHADER ID", itos(uniform_set_prepare_for_use_instruction->shader.id), "INDEX", itos(uniform_set_prepare_for_use_instruction->set_index)); + instruction_data_cursor += sizeof(ComputeListUniformSetPrepareForUseInstruction); + } break; + default: + DEV_ASSERT(false && "Unknown compute list instruction type."); + return; + } + } +} + +void RenderingDeviceGraph::initialize(RDD *p_driver, uint32_t p_frame_count, uint32_t p_secondary_command_buffers_per_frame) { + driver = p_driver; + frames.resize(p_frame_count); + + for (uint32_t i = 0; i < p_frame_count; i++) { + frames[i].secondary_command_buffers.resize(p_secondary_command_buffers_per_frame); + + for (uint32_t j = 0; j < p_secondary_command_buffers_per_frame; j++) { + SecondaryCommandBuffer &secondary = frames[i].secondary_command_buffers[j]; + secondary.command_pool = driver->command_pool_create(RDD::COMMAND_BUFFER_TYPE_SECONDARY); + secondary.command_buffer = driver->command_buffer_create(RDD::COMMAND_BUFFER_TYPE_SECONDARY, secondary.command_pool); + secondary.task = WorkerThreadPool::INVALID_TASK_ID; + } + } + + driver_honors_barriers = driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS); +} + +void RenderingDeviceGraph::begin() { + command_data.clear(); + command_data_offsets.clear(); + command_normalization_barriers.clear(); + command_transition_barriers.clear(); + command_label_chars.clear(); + command_label_colors.clear(); + command_label_offsets.clear(); + command_list_nodes.clear(); + write_list_nodes.clear(); + command_count = 0; + command_label_count = 0; + command_timestamp_index = -1; + command_synchronization_index = -1; + command_synchronization_pending = false; + command_label_index = -1; + frames[frame].secondary_command_buffers_used = 0; + draw_instruction_list.index = 0; + compute_instruction_list.index = 0; + tracking_frame++; + +#ifdef DEV_ENABLED + write_dependency_counters.clear(); +#endif +} + +void RenderingDeviceGraph::add_buffer_clear(RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, uint32_t p_offset, uint32_t p_size) { + DEV_ASSERT(p_dst_tracker != nullptr); + + int32_t command_index; + RecordedBufferClearCommand *command = static_cast(_allocate_command(sizeof(RecordedBufferClearCommand), command_index)); + command->type = RecordedCommand::TYPE_BUFFER_CLEAR; + command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->buffer = p_dst; + command->offset = p_offset; + command->size = p_size; + + ResourceUsage usage = RESOURCE_USAGE_TRANSFER_TO; + _add_command_to_graph(&p_dst_tracker, &usage, 1, command_index, command); +} + +void RenderingDeviceGraph::add_buffer_copy(RDD::BufferID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, RDD::BufferCopyRegion p_region) { + // Source tracker is allowed to be null as it could be a read-only buffer. + DEV_ASSERT(p_dst_tracker != nullptr); + + int32_t command_index; + RecordedBufferCopyCommand *command = static_cast(_allocate_command(sizeof(RecordedBufferCopyCommand), command_index)); + command->type = RecordedCommand::TYPE_BUFFER_COPY; + command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->source = p_src; + command->destination = p_dst; + command->region = p_region; + + ResourceTracker *trackers[2] = { p_dst_tracker, p_src_tracker }; + ResourceUsage usages[2] = { RESOURCE_USAGE_TRANSFER_TO, RESOURCE_USAGE_TRANSFER_FROM }; + _add_command_to_graph(trackers, usages, p_src_tracker != nullptr ? 2 : 1, command_index, command); +} + +void RenderingDeviceGraph::add_buffer_get_data(RDD::BufferID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, RDD::BufferCopyRegion p_region) { + // Source tracker is allowed to be null as it could be a read-only buffer. + int32_t command_index; + RecordedBufferGetDataCommand *command = static_cast(_allocate_command(sizeof(RecordedBufferGetDataCommand), command_index)); + command->type = RecordedCommand::TYPE_BUFFER_GET_DATA; + command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->source = p_src; + command->destination = p_dst; + command->region = p_region; + + if (p_src_tracker != nullptr) { + ResourceUsage usage = RESOURCE_USAGE_TRANSFER_FROM; + _add_command_to_graph(&p_src_tracker, &usage, 1, command_index, command); + } else { + _add_command_to_graph(nullptr, nullptr, 0, command_index, command); + } +} + +void RenderingDeviceGraph::add_buffer_update(RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, VectorView p_buffer_copies) { + DEV_ASSERT(p_dst_tracker != nullptr); + + size_t buffer_copies_size = p_buffer_copies.size() * sizeof(RecordedBufferCopy); + uint64_t command_size = sizeof(RecordedBufferUpdateCommand) + buffer_copies_size; + int32_t command_index; + RecordedBufferUpdateCommand *command = static_cast(_allocate_command(command_size, command_index)); + command->type = RecordedCommand::TYPE_BUFFER_UPDATE; + command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->destination = p_dst; + command->buffer_copies_count = p_buffer_copies.size(); + + RecordedBufferCopy *buffer_copies = command->buffer_copies(); + for (uint32_t i = 0; i < command->buffer_copies_count; i++) { + buffer_copies[i] = p_buffer_copies[i]; + } + + ResourceUsage buffer_usage = RESOURCE_USAGE_TRANSFER_TO; + _add_command_to_graph(&p_dst_tracker, &buffer_usage, 1, command_index, command); +} + +void RenderingDeviceGraph::add_compute_list_begin() { + compute_instruction_list.clear(); + compute_instruction_list.index++; +} + +void RenderingDeviceGraph::add_compute_list_bind_pipeline(RDD::PipelineID p_pipeline) { + ComputeListBindPipelineInstruction *instruction = reinterpret_cast(_allocate_compute_list_instruction(sizeof(ComputeListBindPipelineInstruction))); + instruction->type = ComputeListInstruction::TYPE_BIND_PIPELINE; + instruction->pipeline = p_pipeline; + compute_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); +} + +void RenderingDeviceGraph::add_compute_list_bind_uniform_set(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index) { + ComputeListBindUniformSetInstruction *instruction = reinterpret_cast(_allocate_compute_list_instruction(sizeof(ComputeListBindUniformSetInstruction))); + instruction->type = ComputeListInstruction::TYPE_BIND_UNIFORM_SET; + instruction->shader = p_shader; + instruction->uniform_set = p_uniform_set; + instruction->set_index = set_index; +} + +void RenderingDeviceGraph::add_compute_list_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { + ComputeListDispatchInstruction *instruction = reinterpret_cast(_allocate_compute_list_instruction(sizeof(ComputeListDispatchInstruction))); + instruction->type = ComputeListInstruction::TYPE_DISPATCH; + instruction->x_groups = p_x_groups; + instruction->y_groups = p_y_groups; + instruction->z_groups = p_z_groups; +} + +void RenderingDeviceGraph::add_compute_list_dispatch_indirect(RDD::BufferID p_buffer, uint32_t p_offset) { + ComputeListDispatchIndirectInstruction *instruction = reinterpret_cast(_allocate_compute_list_instruction(sizeof(ComputeListDispatchIndirectInstruction))); + instruction->type = ComputeListInstruction::TYPE_DISPATCH_INDIRECT; + instruction->buffer = p_buffer; + instruction->offset = p_offset; + compute_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT); +} + +void RenderingDeviceGraph::add_compute_list_set_push_constant(RDD::ShaderID p_shader, const void *p_data, uint32_t p_data_size) { + uint32_t instruction_size = sizeof(ComputeListSetPushConstantInstruction) + p_data_size; + ComputeListSetPushConstantInstruction *instruction = reinterpret_cast(_allocate_compute_list_instruction(instruction_size)); + instruction->type = ComputeListInstruction::TYPE_SET_PUSH_CONSTANT; + instruction->size = p_data_size; + instruction->shader = p_shader; + memcpy(instruction->data(), p_data, p_data_size); +} + +void RenderingDeviceGraph::add_compute_list_uniform_set_prepare_for_use(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index) { + ComputeListUniformSetPrepareForUseInstruction *instruction = reinterpret_cast(_allocate_compute_list_instruction(sizeof(ComputeListUniformSetPrepareForUseInstruction))); + instruction->type = ComputeListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE; + instruction->shader = p_shader; + instruction->uniform_set = p_uniform_set; + instruction->set_index = set_index; +} + +void RenderingDeviceGraph::add_compute_list_usage(ResourceTracker *p_tracker, ResourceUsage p_usage) { + DEV_ASSERT(p_tracker != nullptr); + + p_tracker->reset_if_outdated(tracking_frame); + + if (p_tracker->compute_list_index != compute_instruction_list.index) { + compute_instruction_list.command_trackers.push_back(p_tracker); + compute_instruction_list.command_tracker_usages.push_back(p_usage); + p_tracker->compute_list_index = compute_instruction_list.index; + } +} + +void RenderingDeviceGraph::add_compute_list_usages(VectorView p_trackers, VectorView p_usages) { + DEV_ASSERT(p_trackers.size() == p_usages.size()); + + for (uint32_t i = 0; i < p_trackers.size(); i++) { + add_compute_list_usage(p_trackers[i], p_usages[i]); + } +} + +void RenderingDeviceGraph::add_compute_list_end() { + int32_t command_index; + uint32_t instruction_data_size = compute_instruction_list.data.size(); + uint32_t command_size = sizeof(RecordedComputeListCommand) + instruction_data_size; + RecordedComputeListCommand *command = static_cast(_allocate_command(command_size, command_index)); + command->type = RecordedCommand::TYPE_COMPUTE_LIST; + command->dst_stages = compute_instruction_list.stages; + command->instruction_data_size = instruction_data_size; + memcpy(command->instruction_data(), compute_instruction_list.data.ptr(), instruction_data_size); + _add_command_to_graph(compute_instruction_list.command_trackers.ptr(), compute_instruction_list.command_tracker_usages.ptr(), compute_instruction_list.command_trackers.size(), command_index, command); +} + +void RenderingDeviceGraph::add_draw_list_begin(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_clear_values, bool p_uses_color, bool p_uses_depth) { + draw_instruction_list.clear(); + draw_instruction_list.index++; + draw_instruction_list.render_pass = p_render_pass; + draw_instruction_list.framebuffer = p_framebuffer; + draw_instruction_list.region = p_region; + draw_instruction_list.clear_values.resize(p_clear_values.size()); + for (uint32_t i = 0; i < p_clear_values.size(); i++) { + draw_instruction_list.clear_values[i] = p_clear_values[i]; + } + + if (p_uses_color) { + draw_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); + } + + if (p_uses_depth) { + draw_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT); + draw_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT); + } +} + +void RenderingDeviceGraph::add_draw_list_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint32_t p_offset) { + DrawListBindIndexBufferInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListBindIndexBufferInstruction))); + instruction->type = DrawListInstruction::TYPE_BIND_INDEX_BUFFER; + instruction->buffer = p_buffer; + instruction->format = p_format; + instruction->offset = p_offset; + + if (instruction->buffer.id != 0) { + draw_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT); + } +} + +void RenderingDeviceGraph::add_draw_list_bind_pipeline(RDD::PipelineID p_pipeline, BitField p_pipeline_stage_bits) { + DrawListBindPipelineInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListBindPipelineInstruction))); + instruction->type = DrawListInstruction::TYPE_BIND_PIPELINE; + instruction->pipeline = p_pipeline; + draw_instruction_list.stages = draw_instruction_list.stages | p_pipeline_stage_bits; +} + +void RenderingDeviceGraph::add_draw_list_bind_uniform_set(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index) { + DrawListBindUniformSetInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListBindUniformSetInstruction))); + instruction->type = DrawListInstruction::TYPE_BIND_UNIFORM_SET; + instruction->shader = p_shader; + instruction->uniform_set = p_uniform_set; + instruction->set_index = set_index; +} + +void RenderingDeviceGraph::add_draw_list_bind_vertex_buffers(VectorView p_vertex_buffers, VectorView p_vertex_buffer_offsets) { + DEV_ASSERT(p_vertex_buffers.size() == p_vertex_buffer_offsets.size()); + + uint32_t instruction_size = sizeof(DrawListBindVertexBuffersInstruction) + sizeof(RDD::BufferID) * p_vertex_buffers.size() + sizeof(uint64_t) * p_vertex_buffer_offsets.size(); + DrawListBindVertexBuffersInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(instruction_size)); + instruction->type = DrawListInstruction::TYPE_BIND_VERTEX_BUFFERS; + instruction->vertex_buffers_count = p_vertex_buffers.size(); + + RDD::BufferID *vertex_buffers = instruction->vertex_buffers(); + uint64_t *vertex_buffer_offsets = instruction->vertex_buffer_offsets(); + for (uint32_t i = 0; i < instruction->vertex_buffers_count; i++) { + vertex_buffers[i] = p_vertex_buffers[i]; + vertex_buffer_offsets[i] = p_vertex_buffer_offsets[i]; + } + + if (instruction->vertex_buffers_count > 0) { + draw_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT); + } +} + +void RenderingDeviceGraph::add_draw_list_clear_attachments(VectorView p_attachments_clear, VectorView p_attachments_clear_rect) { + uint32_t instruction_size = sizeof(DrawListClearAttachmentsInstruction) + sizeof(RDD::AttachmentClear) * p_attachments_clear.size() + sizeof(Rect2i) * p_attachments_clear_rect.size(); + DrawListClearAttachmentsInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(instruction_size)); + instruction->type = DrawListInstruction::TYPE_CLEAR_ATTACHMENTS; + instruction->attachments_clear_count = p_attachments_clear.size(); + instruction->attachments_clear_rect_count = p_attachments_clear_rect.size(); + + RDD::AttachmentClear *attachments_clear = instruction->attachments_clear(); + Rect2i *attachments_clear_rect = instruction->attachments_clear_rect(); + for (uint32_t i = 0; i < instruction->attachments_clear_count; i++) { + attachments_clear[i] = p_attachments_clear[i]; + } + + for (uint32_t i = 0; i < instruction->attachments_clear_rect_count; i++) { + attachments_clear_rect[i] = p_attachments_clear_rect[i]; + } +} + +void RenderingDeviceGraph::add_draw_list_draw(uint32_t p_vertex_count, uint32_t p_instance_count) { + DrawListDrawInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListDrawInstruction))); + instruction->type = DrawListInstruction::TYPE_DRAW; + instruction->vertex_count = p_vertex_count; + instruction->instance_count = p_instance_count; +} + +void RenderingDeviceGraph::add_draw_list_draw_indexed(uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index) { + DrawListDrawIndexedInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListDrawIndexedInstruction))); + instruction->type = DrawListInstruction::TYPE_DRAW_INDEXED; + instruction->index_count = p_index_count; + instruction->instance_count = p_instance_count; + instruction->first_index = p_first_index; +} + +void RenderingDeviceGraph::add_draw_list_execute_commands(RDD::CommandBufferID p_command_buffer) { + DrawListExecuteCommandsInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListExecuteCommandsInstruction))); + instruction->type = DrawListInstruction::TYPE_EXECUTE_COMMANDS; + instruction->command_buffer = p_command_buffer; +} + +void RenderingDeviceGraph::add_draw_list_next_subpass(RDD::CommandBufferType p_command_buffer_type) { + DrawListNextSubpassInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListNextSubpassInstruction))); + instruction->type = DrawListInstruction::TYPE_NEXT_SUBPASS; + instruction->command_buffer_type = p_command_buffer_type; +} + +void RenderingDeviceGraph::add_draw_list_set_blend_constants(const Color &p_color) { + DrawListSetBlendConstantsInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListSetBlendConstantsInstruction))); + instruction->type = DrawListInstruction::TYPE_SET_BLEND_CONSTANTS; + instruction->color = p_color; +} + +void RenderingDeviceGraph::add_draw_list_set_line_width(float p_width) { + DrawListSetLineWidthInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListSetLineWidthInstruction))); + instruction->type = DrawListInstruction::TYPE_SET_LINE_WIDTH; + instruction->width = p_width; +} + +void RenderingDeviceGraph::add_draw_list_set_push_constant(RDD::ShaderID p_shader, const void *p_data, uint32_t p_data_size) { + uint32_t instruction_size = sizeof(DrawListSetPushConstantInstruction) + p_data_size; + DrawListSetPushConstantInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(instruction_size)); + instruction->type = DrawListInstruction::TYPE_SET_PUSH_CONSTANT; + instruction->size = p_data_size; + instruction->shader = p_shader; + memcpy(instruction->data(), p_data, p_data_size); +} + +void RenderingDeviceGraph::add_draw_list_set_scissor(Rect2i p_rect) { + DrawListSetScissorInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListSetScissorInstruction))); + instruction->type = DrawListInstruction::TYPE_SET_SCISSOR; + instruction->rect = p_rect; +} + +void RenderingDeviceGraph::add_draw_list_set_viewport(Rect2i p_rect) { + DrawListSetViewportInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListSetViewportInstruction))); + instruction->type = DrawListInstruction::TYPE_SET_VIEWPORT; + instruction->rect = p_rect; +} + +void RenderingDeviceGraph::add_draw_list_uniform_set_prepare_for_use(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index) { + DrawListUniformSetPrepareForUseInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListUniformSetPrepareForUseInstruction))); + instruction->type = DrawListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE; + instruction->shader = p_shader; + instruction->uniform_set = p_uniform_set; + instruction->set_index = set_index; +} + +void RenderingDeviceGraph::add_draw_list_usage(ResourceTracker *p_tracker, ResourceUsage p_usage) { + p_tracker->reset_if_outdated(tracking_frame); + + if (p_tracker->draw_list_index != draw_instruction_list.index) { + draw_instruction_list.command_trackers.push_back(p_tracker); + draw_instruction_list.command_tracker_usages.push_back(p_usage); + p_tracker->draw_list_index = draw_instruction_list.index; + } +} + +void RenderingDeviceGraph::add_draw_list_usages(VectorView p_trackers, VectorView p_usages) { + DEV_ASSERT(p_trackers.size() == p_usages.size()); + + for (uint32_t i = 0; i < p_trackers.size(); i++) { + add_draw_list_usage(p_trackers[i], p_usages[i]); + } +} + +void RenderingDeviceGraph::add_draw_list_end() { + // Arbitrary size threshold to evaluate if it'd be best to record the draw list on the background as a secondary buffer. + const uint32_t instruction_data_threshold_for_secondary = 16384; + RDD::CommandBufferType command_buffer_type; + uint32_t &secondary_buffers_used = frames[frame].secondary_command_buffers_used; + if (draw_instruction_list.data.size() > instruction_data_threshold_for_secondary && secondary_buffers_used < frames[frame].secondary_command_buffers.size()) { + // Copy the current instruction list data into another array that will be used by the secondary command buffer worker. + SecondaryCommandBuffer &secondary = frames[frame].secondary_command_buffers[secondary_buffers_used]; + secondary.render_pass = draw_instruction_list.render_pass; + secondary.framebuffer = draw_instruction_list.framebuffer; + secondary.instruction_data.resize(draw_instruction_list.data.size()); + memcpy(secondary.instruction_data.ptr(), draw_instruction_list.data.ptr(), draw_instruction_list.data.size()); + + // Run a background task for recording the secondary command buffer. + secondary.task = WorkerThreadPool::get_singleton()->add_template_task(this, &RenderingDeviceGraph::_run_secondary_command_buffer_task, &secondary, true); + + // Clear the instruction list and add a single command for executing the secondary command buffer instead. + draw_instruction_list.data.clear(); + add_draw_list_execute_commands(secondary.command_buffer); + secondary_buffers_used++; + + command_buffer_type = RDD::COMMAND_BUFFER_TYPE_SECONDARY; + } else { + command_buffer_type = RDD::COMMAND_BUFFER_TYPE_PRIMARY; + } + + int32_t command_index; + uint32_t clear_values_size = sizeof(RDD::RenderPassClearValue) * draw_instruction_list.clear_values.size(); + uint32_t instruction_data_size = draw_instruction_list.data.size(); + uint32_t command_size = sizeof(RecordedDrawListCommand) + clear_values_size + instruction_data_size; + RecordedDrawListCommand *command = static_cast(_allocate_command(command_size, command_index)); + command->type = RecordedCommand::TYPE_DRAW_LIST; + command->dst_stages = draw_instruction_list.stages; + command->instruction_data_size = instruction_data_size; + command->render_pass = draw_instruction_list.render_pass; + command->framebuffer = draw_instruction_list.framebuffer; + command->command_buffer_type = command_buffer_type; + command->region = draw_instruction_list.region; + command->clear_values_count = draw_instruction_list.clear_values.size(); + + RDD::RenderPassClearValue *clear_values = command->clear_values(); + for (uint32_t i = 0; i < command->clear_values_count; i++) { + clear_values[i] = draw_instruction_list.clear_values[i]; + } + + memcpy(command->instruction_data(), draw_instruction_list.data.ptr(), instruction_data_size); + _add_command_to_graph(draw_instruction_list.command_trackers.ptr(), draw_instruction_list.command_tracker_usages.ptr(), draw_instruction_list.command_trackers.size(), command_index, command); +} + +void RenderingDeviceGraph::add_texture_clear(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, const Color &p_color, const RDD::TextureSubresourceRange &p_range) { + DEV_ASSERT(p_dst_tracker != nullptr); + + int32_t command_index; + RecordedTextureClearCommand *command = static_cast(_allocate_command(sizeof(RecordedTextureClearCommand), command_index)); + command->type = RecordedCommand::TYPE_TEXTURE_CLEAR; + command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->texture = p_dst; + command->color = p_color; + command->range = p_range; + + ResourceUsage usage = RESOURCE_USAGE_TRANSFER_TO; + _add_command_to_graph(&p_dst_tracker, &usage, 1, command_index, command); +} + +void RenderingDeviceGraph::add_texture_copy(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, RDD::TextureCopyRegion p_region) { + DEV_ASSERT(p_src_tracker != nullptr); + DEV_ASSERT(p_dst_tracker != nullptr); + + int32_t command_index; + RecordedTextureCopyCommand *command = static_cast(_allocate_command(sizeof(RecordedTextureCopyCommand), command_index)); + command->type = RecordedCommand::TYPE_TEXTURE_COPY; + command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->from_texture = p_src; + command->to_texture = p_dst; + command->region = p_region; + + ResourceTracker *trackers[2] = { p_dst_tracker, p_src_tracker }; + ResourceUsage usages[2] = { RESOURCE_USAGE_TRANSFER_TO, RESOURCE_USAGE_TRANSFER_FROM }; + _add_command_to_graph(trackers, usages, 2, command_index, command); +} + +void RenderingDeviceGraph::add_texture_get_data(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, VectorView p_buffer_texture_copy_regions) { + DEV_ASSERT(p_src_tracker != nullptr); + + int32_t command_index; + uint64_t command_size = sizeof(RecordedTextureGetDataCommand) + p_buffer_texture_copy_regions.size() * sizeof(RDD::BufferTextureCopyRegion); + RecordedTextureGetDataCommand *command = static_cast(_allocate_command(command_size, command_index)); + command->type = RecordedCommand::TYPE_TEXTURE_GET_DATA; + command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->from_texture = p_src; + command->to_buffer = p_dst; + command->buffer_texture_copy_regions_count = p_buffer_texture_copy_regions.size(); + + RDD::BufferTextureCopyRegion *buffer_texture_copy_regions = command->buffer_texture_copy_regions(); + for (uint32_t i = 0; i < command->buffer_texture_copy_regions_count; i++) { + buffer_texture_copy_regions[i] = p_buffer_texture_copy_regions[i]; + } + + ResourceUsage usage = RESOURCE_USAGE_TRANSFER_FROM; + _add_command_to_graph(&p_src_tracker, &usage, 1, command_index, command); +} + +void RenderingDeviceGraph::add_texture_resolve(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, uint32_t p_src_layer, uint32_t p_src_mipmap, uint32_t p_dst_layer, uint32_t p_dst_mipmap) { + DEV_ASSERT(p_src_tracker != nullptr); + DEV_ASSERT(p_dst_tracker != nullptr); + + int32_t command_index; + RecordedTextureResolveCommand *command = static_cast(_allocate_command(sizeof(RecordedTextureResolveCommand), command_index)); + command->type = RecordedCommand::TYPE_TEXTURE_RESOLVE; + command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->from_texture = p_src; + command->to_texture = p_dst; + command->src_layer = p_src_layer; + command->src_mipmap = p_src_mipmap; + command->dst_layer = p_dst_layer; + command->dst_mipmap = p_dst_mipmap; + + ResourceTracker *trackers[2] = { p_dst_tracker, p_src_tracker }; + ResourceUsage usages[2] = { RESOURCE_USAGE_TRANSFER_TO, RESOURCE_USAGE_TRANSFER_FROM }; + _add_command_to_graph(trackers, usages, 2, command_index, command); +} + +void RenderingDeviceGraph::add_texture_update(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView p_buffer_copies) { + DEV_ASSERT(p_dst_tracker != nullptr); + + int32_t command_index; + uint64_t command_size = sizeof(RecordedTextureUpdateCommand) + p_buffer_copies.size() * sizeof(RecordedBufferToTextureCopy); + RecordedTextureUpdateCommand *command = static_cast(_allocate_command(command_size, command_index)); + command->type = RecordedCommand::TYPE_TEXTURE_UPDATE; + command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT; + command->to_texture = p_dst; + command->buffer_to_texture_copies_count = p_buffer_copies.size(); + + RecordedBufferToTextureCopy *buffer_to_texture_copies = command->buffer_to_texture_copies(); + for (uint32_t i = 0; i < command->buffer_to_texture_copies_count; i++) { + buffer_to_texture_copies[i] = p_buffer_copies[i]; + } + + ResourceUsage usage = RESOURCE_USAGE_TRANSFER_TO; + _add_command_to_graph(&p_dst_tracker, &usage, 1, command_index, command); +} + +void RenderingDeviceGraph::add_capture_timestamp(RDD::QueryPoolID p_query_pool, uint32_t p_index) { + int32_t command_index; + RecordedCaptureTimestampCommand *command = static_cast(_allocate_command(sizeof(RecordedCaptureTimestampCommand), command_index)); + command->type = RecordedCommand::TYPE_CAPTURE_TIMESTAMP; + command->dst_stages = 0; + command->pool = p_query_pool; + command->index = p_index; + _add_command_to_graph(nullptr, nullptr, 0, command_index, command); +} + +void RenderingDeviceGraph::add_synchronization() { + // Synchronization is only acknowledged if commands have been recorded on the graph already. + if (command_count > 0) { + command_synchronization_pending = true; + } +} + +void RenderingDeviceGraph::begin_label(const String &p_label_name, const Color &p_color) { + uint32_t command_label_offset = command_label_chars.size(); + PackedByteArray command_label_utf8 = p_label_name.to_utf8_buffer(); + int command_label_utf8_size = command_label_utf8.size(); + command_label_chars.resize(command_label_offset + command_label_utf8_size + 1); + memcpy(&command_label_chars[command_label_offset], command_label_utf8.ptr(), command_label_utf8.size()); + command_label_chars[command_label_offset + command_label_utf8_size] = '\0'; + command_label_colors.push_back(p_color); + command_label_offsets.push_back(command_label_offset); + command_label_index = command_label_count; + command_label_count++; +} + +void RenderingDeviceGraph::end_label() { + command_label_index = -1; +} + +void RenderingDeviceGraph::end(RDD::CommandBufferID p_command_buffer, bool p_reorder_commands, bool p_full_barriers) { + if (command_count == 0) { + // No commands have been logged, do nothing. + return; + } + + thread_local LocalVector commands_sorted; + if (p_reorder_commands) { + thread_local LocalVector command_stack; + thread_local LocalVector sorted_command_indices; + thread_local LocalVector command_degrees; + int32_t adjacency_list_index = 0; + int32_t command_index; + + // Count all the incoming connections to every node by traversing their adjacency list. + command_degrees.resize(command_count); + memset(command_degrees.ptr(), 0, sizeof(uint32_t) * command_degrees.size()); + for (uint32_t i = 0; i < command_count; i++) { + const RecordedCommand &recorded_command = *reinterpret_cast(&command_data[command_data_offsets[i]]); + adjacency_list_index = recorded_command.adjacent_command_list_index; + while (adjacency_list_index >= 0) { + const RecordedCommandListNode &command_list_node = command_list_nodes[adjacency_list_index]; + DEV_ASSERT((command_list_node.command_index != int32_t(i)) && "Command can't have itself as a dependency."); + command_degrees[command_list_node.command_index] += 1; + adjacency_list_index = command_list_node.next_list_index; + } + } + + // Push to the stack all nodes that have no incoming connections. + command_stack.clear(); + for (uint32_t i = 0; i < command_count; i++) { + if (command_degrees[i] == 0) { + command_stack.push_back(i); + } + } + + sorted_command_indices.clear(); + while (!command_stack.is_empty()) { + // Pop command from the stack. + command_index = command_stack[command_stack.size() - 1]; + command_stack.resize(command_stack.size() - 1); + + // Add it to the sorted commands. + sorted_command_indices.push_back(command_index); + + // Search for its adjacents and lower their degree for every visit. If the degree reaches zero, we push the command to the stack. + const uint32_t command_data_offset = command_data_offsets[command_index]; + const RecordedCommand &recorded_command = *reinterpret_cast(&command_data[command_data_offset]); + adjacency_list_index = recorded_command.adjacent_command_list_index; + while (adjacency_list_index >= 0) { + const RecordedCommandListNode &command_list_node = command_list_nodes[adjacency_list_index]; + uint32_t &command_degree = command_degrees[command_list_node.command_index]; + DEV_ASSERT(command_degree > 0); + command_degree--; + if (command_degree == 0) { + command_stack.push_back(command_list_node.command_index); + } + + adjacency_list_index = command_list_node.next_list_index; + } + } + + // Batch buffer, texture, draw lists and compute operations together. + const uint32_t PriorityTable[RecordedCommand::TYPE_MAX] = { + 0, // TYPE_NONE + 1, // TYPE_BUFFER_CLEAR + 1, // TYPE_BUFFER_COPY + 1, // TYPE_BUFFER_GET_DATA + 1, // TYPE_BUFFER_UPDATE + 4, // TYPE_COMPUTE_LIST + 3, // TYPE_DRAW_LIST + 2, // TYPE_TEXTURE_CLEAR + 2, // TYPE_TEXTURE_COPY + 2, // TYPE_TEXTURE_GET_DATA + 2, // TYPE_TEXTURE_RESOLVE + 2, // TYPE_TEXTURE_UPDATE + }; + + commands_sorted.clear(); + commands_sorted.resize(command_count); + + for (uint32_t i = 0; i < command_count; i++) { + const int32_t sorted_command_index = sorted_command_indices[i]; + const uint32_t command_data_offset = command_data_offsets[sorted_command_index]; + const RecordedCommand recorded_command = *reinterpret_cast(&command_data[command_data_offset]); + const uint32_t next_command_level = commands_sorted[sorted_command_index].level + 1; + adjacency_list_index = recorded_command.adjacent_command_list_index; + while (adjacency_list_index >= 0) { + const RecordedCommandListNode &command_list_node = command_list_nodes[adjacency_list_index]; + uint32_t &adjacent_command_level = commands_sorted[command_list_node.command_index].level; + if (adjacent_command_level < next_command_level) { + adjacent_command_level = next_command_level; + } + + adjacency_list_index = command_list_node.next_list_index; + } + + commands_sorted[sorted_command_index].index = sorted_command_index; + commands_sorted[sorted_command_index].priority = PriorityTable[recorded_command.type]; + } + } else { + commands_sorted.clear(); + commands_sorted.resize(command_count); + + for (uint32_t i = 0; i < command_count; i++) { + commands_sorted[i].index = i; + } + } + + _wait_for_secondary_command_buffer_tasks(); + + if (command_count > 0) { + int32_t current_label_index = -1; + int32_t current_label_level = -1; + _run_label_command_change(p_command_buffer, -1, -1, true, true, nullptr, 0, current_label_index, current_label_level); + + if (p_reorder_commands) { +#if PRINT_RENDER_GRAPH + print_line("BEFORE SORT"); + _print_render_commands(commands_sorted.ptr(), command_count); +#endif + + commands_sorted.sort(); + +#if PRINT_RENDER_GRAPH + print_line("AFTER SORT"); + _print_render_commands(commands_sorted.ptr(), command_count); +#endif + + uint32_t boosted_priority = 0; + uint32_t current_level = commands_sorted[0].level; + uint32_t current_level_start = 0; + for (uint32_t i = 0; i < command_count; i++) { + if (current_level != commands_sorted[i].level) { + RecordedCommandSort *level_command_ptr = &commands_sorted[current_level_start]; + uint32_t level_command_count = i - current_level_start; + _boost_priority_for_render_commands(level_command_ptr, level_command_count, boosted_priority); + _group_barriers_for_render_commands(p_command_buffer, level_command_ptr, level_command_count, p_full_barriers); + _run_render_commands(p_command_buffer, current_level, level_command_ptr, level_command_count, current_label_index, current_label_level); + current_level = commands_sorted[i].level; + current_level_start = i; + } + } + + RecordedCommandSort *level_command_ptr = &commands_sorted[current_level_start]; + uint32_t level_command_count = command_count - current_level_start; + _boost_priority_for_render_commands(level_command_ptr, level_command_count, boosted_priority); + _group_barriers_for_render_commands(p_command_buffer, level_command_ptr, level_command_count, p_full_barriers); + _run_render_commands(p_command_buffer, current_level, level_command_ptr, level_command_count, current_label_index, current_label_level); + +#if PRINT_RENDER_GRAPH + print_line("COMMANDS", command_count, "LEVELS", current_level + 1); +#endif + } else { + for (uint32_t i = 0; i < command_count; i++) { + _group_barriers_for_render_commands(p_command_buffer, &commands_sorted[i], 1, p_full_barriers); + _run_render_commands(p_command_buffer, i, &commands_sorted[i], 1, current_label_index, current_label_level); + } + } + + _run_label_command_change(p_command_buffer, -1, -1, true, false, nullptr, 0, current_label_index, current_label_level); + } + + // Advance the frame counter. It's not necessary to do this if no commands are recorded because that means no secondary command buffers were used. + frame = (frame + 1) % frames.size(); +} + +#if PRINT_RESOURCE_TRACKER_TOTAL +static uint32_t resource_tracker_total = 0; +#endif + +RenderingDeviceGraph::ResourceTracker *RenderingDeviceGraph::resource_tracker_create() { +#if PRINT_RESOURCE_TRACKER_TOTAL + print_line("Resource trackers:", ++resource_tracker_total); +#endif + return memnew(ResourceTracker); +} + +void RenderingDeviceGraph::resource_tracker_free(ResourceTracker *tracker) { + if (tracker == nullptr) { + return; + } + + if (tracker->in_parent_dirty_list) { + // Delete the tracker from the parent's dirty linked list. + if (tracker->parent->dirty_shared_list == tracker) { + tracker->parent->dirty_shared_list = tracker->next_shared; + } else { + ResourceTracker *node = tracker->parent->dirty_shared_list; + while (node != nullptr) { + if (node->next_shared == tracker) { + node->next_shared = tracker->next_shared; + node = nullptr; + } else { + node = node->next_shared; + } + } + } + } + + memdelete(tracker); + +#if PRINT_RESOURCE_TRACKER_TOTAL + print_line("Resource trackers:", --resource_tracker_total); +#endif +} diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h new file mode 100644 index 00000000000..84fbe020955 --- /dev/null +++ b/servers/rendering/rendering_device_graph.h @@ -0,0 +1,668 @@ +/**************************************************************************/ +/* rendering_device_graph.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef RENDERING_DEVICE_GRAPH_H +#define RENDERING_DEVICE_GRAPH_H + +#include "core/object/worker_thread_pool.h" +#include "rendering_device_commons.h" +#include "rendering_device_driver.h" + +// Buffer barriers have not shown any significant improvement or shown to be +// even detrimental to performance. However, there are currently some known +// cases where using them can solve problems that using singular memory +// barriers does not, probably due to driver issues (see comment on PR #84976 +// https://github.com/godotengine/godot/pull/84976#issuecomment-1878566830). + +#define USE_BUFFER_BARRIERS 1 + +class RenderingDeviceGraph { +public: + struct ComputeListInstruction { + enum Type { + TYPE_NONE, + TYPE_BIND_PIPELINE, + TYPE_BIND_UNIFORM_SET, + TYPE_DISPATCH, + TYPE_DISPATCH_INDIRECT, + TYPE_SET_PUSH_CONSTANT, + TYPE_UNIFORM_SET_PREPARE_FOR_USE + }; + + Type type = TYPE_NONE; + }; + + struct DrawListInstruction { + enum Type { + TYPE_NONE, + TYPE_BIND_INDEX_BUFFER, + TYPE_BIND_PIPELINE, + TYPE_BIND_UNIFORM_SET, + TYPE_BIND_VERTEX_BUFFERS, + TYPE_CLEAR_ATTACHMENTS, + TYPE_DRAW, + TYPE_DRAW_INDEXED, + TYPE_EXECUTE_COMMANDS, + TYPE_NEXT_SUBPASS, + TYPE_SET_BLEND_CONSTANTS, + TYPE_SET_LINE_WIDTH, + TYPE_SET_PUSH_CONSTANT, + TYPE_SET_SCISSOR, + TYPE_SET_VIEWPORT, + TYPE_UNIFORM_SET_PREPARE_FOR_USE + }; + + Type type = TYPE_NONE; + }; + + struct RecordedCommand { + enum Type { + TYPE_NONE, + TYPE_BUFFER_CLEAR, + TYPE_BUFFER_COPY, + TYPE_BUFFER_GET_DATA, + TYPE_BUFFER_UPDATE, + TYPE_COMPUTE_LIST, + TYPE_DRAW_LIST, + TYPE_TEXTURE_CLEAR, + TYPE_TEXTURE_COPY, + TYPE_TEXTURE_GET_DATA, + TYPE_TEXTURE_RESOLVE, + TYPE_TEXTURE_UPDATE, + TYPE_CAPTURE_TIMESTAMP, + TYPE_MAX + }; + + Type type = TYPE_NONE; + int32_t adjacent_command_list_index = -1; + RDD::MemoryBarrier memory_barrier; + int32_t normalization_barrier_index = -1; + int normalization_barrier_count = 0; + int32_t transition_barrier_index = -1; + int32_t transition_barrier_count = 0; +#if USE_BUFFER_BARRIERS + int32_t buffer_barrier_index = -1; + int32_t buffer_barrier_count = 0; +#endif + int32_t label_index = -1; + BitField src_stages; + BitField dst_stages; + }; + + struct RecordedBufferCopy { + RDD::BufferID source; + RDD::BufferCopyRegion region; + }; + + struct RecordedBufferToTextureCopy { + RDD::BufferID from_buffer; + RDD::BufferTextureCopyRegion region; + }; + + enum ResourceUsage { + RESOURCE_USAGE_NONE, + RESOURCE_USAGE_TRANSFER_FROM, + RESOURCE_USAGE_TRANSFER_TO, + RESOURCE_USAGE_UNIFORM_BUFFER_READ, + RESOURCE_USAGE_INDIRECT_BUFFER_READ, + RESOURCE_USAGE_TEXTURE_BUFFER_READ, + RESOURCE_USAGE_TEXTURE_BUFFER_READ_WRITE, + RESOURCE_USAGE_STORAGE_BUFFER_READ, + RESOURCE_USAGE_STORAGE_BUFFER_READ_WRITE, + RESOURCE_USAGE_VERTEX_BUFFER_READ, + RESOURCE_USAGE_INDEX_BUFFER_READ, + RESOURCE_USAGE_TEXTURE_SAMPLE, + RESOURCE_USAGE_STORAGE_IMAGE_READ, + RESOURCE_USAGE_STORAGE_IMAGE_READ_WRITE, + RESOURCE_USAGE_ATTACHMENT_COLOR_READ, + RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE, + RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ, + RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE + }; + + struct ResourceTracker { + uint32_t reference_count = 0; + int64_t command_frame = -1; + int32_t read_command_list_index = -1; + int32_t write_command_or_list_index = -1; + int32_t draw_list_index = -1; + int32_t compute_list_index = -1; + ResourceUsage usage = RESOURCE_USAGE_NONE; + BitField usage_access; + RDD::BufferID buffer_driver_id; + RDD::TextureID texture_driver_id; + RDD::TextureSubresourceRange texture_subresources; + int32_t texture_slice_command_index = -1; + ResourceTracker *parent = nullptr; + ResourceTracker *dirty_shared_list = nullptr; + ResourceTracker *next_shared = nullptr; + Rect2i texture_slice_or_dirty_rect; + bool in_parent_dirty_list = false; + bool write_command_list_enabled = false; + + _FORCE_INLINE_ void reset_if_outdated(int64_t new_command_frame) { + if (new_command_frame != command_frame) { + usage_access.clear(); + command_frame = new_command_frame; + read_command_list_index = -1; + write_command_or_list_index = -1; + draw_list_index = -1; + compute_list_index = -1; + texture_slice_command_index = -1; + write_command_list_enabled = false; + } + } + }; + +private: + struct InstructionList { + LocalVector data; + LocalVector command_trackers; + LocalVector command_tracker_usages; + BitField stages; + int32_t index = 0; + + void clear() { + data.clear(); + command_trackers.clear(); + command_tracker_usages.clear(); + stages.clear(); + } + }; + + struct ComputeInstructionList : InstructionList { + // No extra contents. + }; + + struct DrawInstructionList : InstructionList { + RDD::RenderPassID render_pass; + RDD::FramebufferID framebuffer; + Rect2i region; + LocalVector clear_values; + }; + + struct RecordedCommandSort { + uint32_t level = 0; + uint32_t priority = 0; + int32_t index = -1; + + RecordedCommandSort() = default; + + bool operator<(const RecordedCommandSort &p_other) const { + if (level < p_other.level) { + return true; + } else if (level > p_other.level) { + return false; + } + + if (priority < p_other.priority) { + return true; + } else if (priority > p_other.priority) { + return false; + } + + return index < p_other.index; + } + }; + + struct RecordedCommandListNode { + int32_t command_index = -1; + int32_t next_list_index = -1; + }; + + struct RecordedWriteListNode { + int32_t command_index = -1; + int32_t next_list_index = -1; + Rect2i subresources; + }; + + struct RecordedBufferClearCommand : RecordedCommand { + RDD::BufferID buffer; + uint32_t offset = 0; + uint32_t size = 0; + }; + + struct RecordedBufferCopyCommand : RecordedCommand { + RDD::BufferID source; + RDD::BufferID destination; + RDD::BufferCopyRegion region; + }; + + struct RecordedBufferGetDataCommand : RecordedCommand { + RDD::BufferID source; + RDD::BufferID destination; + RDD::BufferCopyRegion region; + }; + + struct RecordedBufferUpdateCommand : RecordedCommand { + RDD::BufferID destination; + uint32_t buffer_copies_count = 0; + + _FORCE_INLINE_ RecordedBufferCopy *buffer_copies() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const RecordedBufferCopy *buffer_copies() const { + return reinterpret_cast(&this[1]); + } + }; + + struct RecordedComputeListCommand : RecordedCommand { + uint32_t instruction_data_size = 0; + + _FORCE_INLINE_ uint8_t *instruction_data() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const uint8_t *instruction_data() const { + return reinterpret_cast(&this[1]); + } + }; + + struct RecordedDrawListCommand : RecordedCommand { + uint32_t instruction_data_size = 0; + RDD::RenderPassID render_pass; + RDD::FramebufferID framebuffer; + RDD::CommandBufferType command_buffer_type; + Rect2i region; + uint32_t clear_values_count = 0; + + _FORCE_INLINE_ RDD::RenderPassClearValue *clear_values() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const RDD::RenderPassClearValue *clear_values() const { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ uint8_t *instruction_data() { + return reinterpret_cast(&clear_values()[clear_values_count]); + } + + _FORCE_INLINE_ const uint8_t *instruction_data() const { + return reinterpret_cast(&clear_values()[clear_values_count]); + } + }; + + struct RecordedTextureClearCommand : RecordedCommand { + RDD::TextureID texture; + RDD::TextureSubresourceRange range; + Color color; + }; + + struct RecordedTextureCopyCommand : RecordedCommand { + RDD::TextureID from_texture; + RDD::TextureID to_texture; + RDD::TextureCopyRegion region; + }; + + struct RecordedTextureGetDataCommand : RecordedCommand { + RDD::TextureID from_texture; + RDD::BufferID to_buffer; + uint32_t buffer_texture_copy_regions_count = 0; + + _FORCE_INLINE_ RDD::BufferTextureCopyRegion *buffer_texture_copy_regions() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const RDD::BufferTextureCopyRegion *buffer_texture_copy_regions() const { + return reinterpret_cast(&this[1]); + } + }; + + struct RecordedTextureResolveCommand : RecordedCommand { + RDD::TextureID from_texture; + RDD::TextureID to_texture; + uint32_t src_layer = 0; + uint32_t src_mipmap = 0; + uint32_t dst_layer = 0; + uint32_t dst_mipmap = 0; + }; + + struct RecordedTextureUpdateCommand : RecordedCommand { + RDD::TextureID to_texture; + uint32_t buffer_to_texture_copies_count = 0; + + _FORCE_INLINE_ RecordedBufferToTextureCopy *buffer_to_texture_copies() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const RecordedBufferToTextureCopy *buffer_to_texture_copies() const { + return reinterpret_cast(&this[1]); + } + }; + + struct RecordedCaptureTimestampCommand : RecordedCommand { + RDD::QueryPoolID pool; + uint32_t index = 0; + }; + + struct DrawListBindIndexBufferInstruction : DrawListInstruction { + RDD::BufferID buffer; + RenderingDeviceCommons::IndexBufferFormat format; + uint32_t offset = 0; + }; + + struct DrawListBindPipelineInstruction : DrawListInstruction { + RDD::PipelineID pipeline; + }; + + struct DrawListBindUniformSetInstruction : DrawListInstruction { + RDD::UniformSetID uniform_set; + RDD::ShaderID shader; + uint32_t set_index = 0; + }; + + struct DrawListBindVertexBuffersInstruction : DrawListInstruction { + uint32_t vertex_buffers_count = 0; + + _FORCE_INLINE_ RDD::BufferID *vertex_buffers() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const RDD::BufferID *vertex_buffers() const { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ uint64_t *vertex_buffer_offsets() { + return reinterpret_cast(&vertex_buffers()[vertex_buffers_count]); + } + + _FORCE_INLINE_ const uint64_t *vertex_buffer_offsets() const { + return reinterpret_cast(&vertex_buffers()[vertex_buffers_count]); + } + }; + + struct DrawListClearAttachmentsInstruction : DrawListInstruction { + uint32_t attachments_clear_count = 0; + uint32_t attachments_clear_rect_count = 0; + + _FORCE_INLINE_ RDD::AttachmentClear *attachments_clear() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const RDD::AttachmentClear *attachments_clear() const { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ Rect2i *attachments_clear_rect() { + return reinterpret_cast(&attachments_clear()[attachments_clear_count]); + } + + _FORCE_INLINE_ const Rect2i *attachments_clear_rect() const { + return reinterpret_cast(&attachments_clear()[attachments_clear_count]); + } + }; + + struct DrawListDrawInstruction : DrawListInstruction { + uint32_t vertex_count = 0; + uint32_t instance_count = 0; + }; + + struct DrawListDrawIndexedInstruction : DrawListInstruction { + uint32_t index_count = 0; + uint32_t instance_count = 0; + uint32_t first_index = 0; + }; + + struct DrawListEndRenderPassInstruction : DrawListInstruction { + // No contents. + }; + + struct DrawListExecuteCommandsInstruction : DrawListInstruction { + RDD::CommandBufferID command_buffer; + }; + + struct DrawListSetPushConstantInstruction : DrawListInstruction { + uint32_t size = 0; + RDD::ShaderID shader; + + _FORCE_INLINE_ uint8_t *data() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const uint8_t *data() const { + return reinterpret_cast(&this[1]); + } + }; + + struct DrawListNextSubpassInstruction : DrawListInstruction { + RDD::CommandBufferType command_buffer_type; + }; + + struct DrawListSetBlendConstantsInstruction : DrawListInstruction { + Color color; + }; + + struct DrawListSetLineWidthInstruction : DrawListInstruction { + float width; + }; + + struct DrawListSetScissorInstruction : DrawListInstruction { + Rect2i rect; + }; + + struct DrawListSetViewportInstruction : DrawListInstruction { + Rect2i rect; + }; + + struct DrawListUniformSetPrepareForUseInstruction : DrawListInstruction { + RDD::UniformSetID uniform_set; + RDD::ShaderID shader; + uint32_t set_index = 0; + }; + + struct ComputeListBindPipelineInstruction : ComputeListInstruction { + RDD::PipelineID pipeline; + }; + + struct ComputeListBindUniformSetInstruction : ComputeListInstruction { + RDD::UniformSetID uniform_set; + RDD::ShaderID shader; + uint32_t set_index = 0; + }; + + struct ComputeListDispatchInstruction : ComputeListInstruction { + uint32_t x_groups = 0; + uint32_t y_groups = 0; + uint32_t z_groups = 0; + }; + + struct ComputeListDispatchIndirectInstruction : ComputeListInstruction { + RDD::BufferID buffer; + uint32_t offset = 0; + }; + + struct ComputeListSetPushConstantInstruction : ComputeListInstruction { + uint32_t size = 0; + RDD::ShaderID shader; + + _FORCE_INLINE_ uint8_t *data() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const uint8_t *data() const { + return reinterpret_cast(&this[1]); + } + }; + + struct ComputeListUniformSetPrepareForUseInstruction : ComputeListInstruction { + RDD::UniformSetID uniform_set; + RDD::ShaderID shader; + uint32_t set_index = 0; + }; + + struct BarrierGroup { + BitField src_stages; + BitField dst_stages; + RDD::MemoryBarrier memory_barrier; + LocalVector normalization_barriers; + LocalVector transition_barriers; +#if USE_BUFFER_BARRIERS + LocalVector buffer_barriers; +#endif + + void clear() { + src_stages.clear(); + dst_stages.clear(); + memory_barrier.src_access.clear(); + memory_barrier.dst_access.clear(); + normalization_barriers.clear(); + transition_barriers.clear(); +#if USE_BUFFER_BARRIERS + buffer_barriers.clear(); +#endif + } + }; + + struct SecondaryCommandBuffer { + LocalVector instruction_data; + RDD::CommandBufferID command_buffer; + RDD::CommandPoolID command_pool; + RDD::RenderPassID render_pass; + RDD::FramebufferID framebuffer; + WorkerThreadPool::TaskID task; + }; + + struct Frame { + TightLocalVector secondary_command_buffers; + uint32_t secondary_command_buffers_used = 0; + }; + + RDD *driver = nullptr; + int64_t tracking_frame = 0; + LocalVector command_data; + LocalVector command_data_offsets; + LocalVector command_normalization_barriers; + LocalVector command_transition_barriers; + LocalVector command_buffer_barriers; + LocalVector command_label_chars; + LocalVector command_label_colors; + LocalVector command_label_offsets; + int32_t command_label_index = -1; + DrawInstructionList draw_instruction_list; + ComputeInstructionList compute_instruction_list; + uint32_t command_count = 0; + uint32_t command_label_count = 0; + LocalVector command_list_nodes; + LocalVector write_list_nodes; + int32_t command_timestamp_index = -1; + int32_t command_synchronization_index = -1; + bool command_synchronization_pending = false; + BarrierGroup barrier_group; + bool driver_honors_barriers = false; + TightLocalVector frames; + uint32_t frame = 0; + +#ifdef DEV_ENABLED + RBMap write_dependency_counters; +#endif + + static bool _is_write_usage(ResourceUsage p_usage); + static RDD::TextureLayout _usage_to_image_layout(ResourceUsage p_usage); + static RDD::BarrierAccessBits _usage_to_access_bits(ResourceUsage p_usage); + int32_t _add_to_command_list(int32_t p_command_index, int32_t p_list_index); + void _add_adjacent_command(int32_t p_previous_command_index, int32_t p_command_index, RecordedCommand *r_command); + int32_t _add_to_write_list(int32_t p_command_index, Rect2i suberesources, int32_t p_list_index); + RecordedCommand *_allocate_command(uint32_t p_command_size, int32_t &r_command_index); + DrawListInstruction *_allocate_draw_list_instruction(uint32_t p_instruction_size); + ComputeListInstruction *_allocate_compute_list_instruction(uint32_t p_instruction_size); + void _add_command_to_graph(ResourceTracker **p_resource_trackers, ResourceUsage *p_resource_usages, uint32_t p_resource_count, int32_t p_command_index, RecordedCommand *r_command); + void _add_texture_barrier_to_command(RDD::TextureID p_texture_id, BitField p_src_access, BitField p_dst_access, ResourceUsage p_prev_usage, ResourceUsage p_next_usage, RDD::TextureSubresourceRange p_subresources, LocalVector &r_barrier_vector, int32_t &r_barrier_index, int32_t &r_barrier_count); +#if USE_BUFFER_BARRIERS + void _add_buffer_barrier_to_command(RDD::BufferID p_buffer_id, BitField p_src_access, BitField p_dst_access, int32_t &r_barrier_index, int32_t &r_barrier_count); +#endif + void _run_compute_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); + void _run_draw_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); + void _run_secondary_command_buffer_task(const SecondaryCommandBuffer *p_secondary); + void _wait_for_secondary_command_buffer_tasks(); + void _run_render_commands(RDD::CommandBufferID p_command_buffer, int32_t p_level, const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, int32_t &r_current_label_index, int32_t &r_current_label_level); + void _run_label_command_change(RDD::CommandBufferID p_command_buffer, int32_t p_new_label_index, int32_t p_new_level, bool p_ignore_previous_value, bool p_use_label_for_empty, const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, int32_t &r_current_label_index, int32_t &r_current_label_level); + void _boost_priority_for_render_commands(RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, uint32_t &r_boosted_priority); + void _group_barriers_for_render_commands(RDD::CommandBufferID p_command_buffer, const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, bool p_full_memory_barrier); + void _print_render_commands(const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count); + void _print_draw_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); + void _print_compute_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); + +public: + RenderingDeviceGraph(); + ~RenderingDeviceGraph(); + void initialize(RDD *p_driver, uint32_t p_frame_count, uint32_t p_secondary_command_buffers_per_frame); + void begin(); + void add_buffer_clear(RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, uint32_t p_offset, uint32_t p_size); + void add_buffer_copy(RDD::BufferID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, RDD::BufferCopyRegion p_region); + void add_buffer_get_data(RDD::BufferID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, RDD::BufferCopyRegion p_region); + void add_buffer_update(RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, VectorView p_buffer_copies); + void add_compute_list_begin(); + void add_compute_list_bind_pipeline(RDD::PipelineID p_pipeline); + void add_compute_list_bind_uniform_set(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); + void add_compute_list_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); + void add_compute_list_dispatch_indirect(RDD::BufferID p_buffer, uint32_t p_offset); + void add_compute_list_set_push_constant(RDD::ShaderID p_shader, const void *p_data, uint32_t p_data_size); + void add_compute_list_uniform_set_prepare_for_use(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); + void add_compute_list_usage(ResourceTracker *p_tracker, ResourceUsage p_usage); + void add_compute_list_usages(VectorView p_trackers, VectorView p_usages); + void add_compute_list_end(); + void add_draw_list_begin(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_clear_values, bool p_uses_color, bool p_uses_depth); + void add_draw_list_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint32_t p_offset); + void add_draw_list_bind_pipeline(RDD::PipelineID p_pipeline, BitField p_pipeline_stage_bits); + void add_draw_list_bind_uniform_set(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); + void add_draw_list_bind_vertex_buffers(VectorView p_vertex_buffers, VectorView p_vertex_buffer_offsets); + void add_draw_list_clear_attachments(VectorView p_attachments_clear, VectorView p_attachments_clear_rect); + void add_draw_list_draw(uint32_t p_vertex_count, uint32_t p_instance_count); + void add_draw_list_draw_indexed(uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index); + void add_draw_list_execute_commands(RDD::CommandBufferID p_command_buffer); + void add_draw_list_next_subpass(RDD::CommandBufferType p_command_buffer_type); + void add_draw_list_set_blend_constants(const Color &p_color); + void add_draw_list_set_line_width(float p_width); + void add_draw_list_set_push_constant(RDD::ShaderID p_shader, const void *p_data, uint32_t p_data_size); + void add_draw_list_set_scissor(Rect2i p_rect); + void add_draw_list_set_viewport(Rect2i p_rect); + void add_draw_list_uniform_set_prepare_for_use(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); + void add_draw_list_usage(ResourceTracker *p_tracker, ResourceUsage p_usage); + void add_draw_list_usages(VectorView p_trackers, VectorView p_usages); + void add_draw_list_end(); + void add_texture_clear(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, const Color &p_color, const RDD::TextureSubresourceRange &p_range); + void add_texture_copy(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, RDD::TextureCopyRegion p_region); + void add_texture_get_data(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, VectorView p_buffer_texture_copy_regions); + void add_texture_resolve(RDD::TextureID p_src, ResourceTracker *p_src_tracker, RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, uint32_t p_src_layer, uint32_t p_src_mipmap, uint32_t p_dst_layer, uint32_t p_dst_mipmap); + void add_texture_update(RDD::TextureID p_dst, ResourceTracker *p_dst_tracker, VectorView p_buffer_copies); + void add_capture_timestamp(RDD::QueryPoolID p_query_pool, uint32_t p_index); + void add_synchronization(); + void begin_label(const String &p_label_name, const Color &p_color); + void end_label(); + void end(RDD::CommandBufferID p_command_buffer, bool p_reorder_commands, bool p_full_barriers); + static ResourceTracker *resource_tracker_create(); + static void resource_tracker_free(ResourceTracker *tracker); +}; + +using RDG = RenderingDeviceGraph; + +#endif // RENDERING_DEVICE_GRAPH_H diff --git a/servers/rendering_server.cpp b/servers/rendering_server.cpp index 00015b74a1d..e8708e8da8b 100644 --- a/servers/rendering_server.cpp +++ b/servers/rendering_server.cpp @@ -3552,7 +3552,6 @@ void RenderingServer::init() { GLOBAL_DEF_RST(PropertyInfo(Variant::INT, "rendering/limits/spatial_indexer/update_iterations_per_frame", PROPERTY_HINT_RANGE, "0,1024,1"), 10); GLOBAL_DEF_RST(PropertyInfo(Variant::INT, "rendering/limits/spatial_indexer/threaded_cull_minimum_instances", PROPERTY_HINT_RANGE, "32,65536,1"), 1000); - GLOBAL_DEF_RST(PropertyInfo(Variant::INT, "rendering/limits/forward_renderer/threaded_render_minimum_instances", PROPERTY_HINT_RANGE, "32,65536,1"), 500); GLOBAL_DEF(PropertyInfo(Variant::FLOAT, "rendering/limits/cluster_builder/max_clustered_elements", PROPERTY_HINT_RANGE, "32,8192,1"), 512); diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl index 65cc8b67eff..d989416eb38 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl +++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl @@ -29,7 +29,6 @@ #define FSR2_BIND_SRV_DILATED_DEPTH 2 #define FSR2_BIND_SRV_REACTIVE_MASK 3 #define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4 -#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 5 #define FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS 6 #define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 7 #define FSR2_BIND_SRV_INPUT_COLOR 8