From d36a8724324b5c8f1e4679717d009ae3210afd20 Mon Sep 17 00:00:00 2001 From: thimenesup Date: Fri, 20 Sep 2024 21:05:50 +0200 Subject: [PATCH] Add draw indirect to Rendering Device --- doc/classes/RenderingDevice.xml | 12 ++ servers/rendering/rendering_device.cpp | 112 +++++++++++++++++++ servers/rendering/rendering_device.h | 1 + servers/rendering/rendering_device_graph.cpp | 40 +++++++ servers/rendering/rendering_device_graph.h | 18 +++ 5 files changed, 183 insertions(+) diff --git a/doc/classes/RenderingDevice.xml b/doc/classes/RenderingDevice.xml index 2ff7e934e9c..fe23f791192 100644 --- a/doc/classes/RenderingDevice.xml +++ b/doc/classes/RenderingDevice.xml @@ -324,6 +324,18 @@ Submits [param draw_list] for rendering on the GPU. This is the raster equivalent to [method compute_list_dispatch]. + + + + + + + + + + Submits [param draw_list] for rendering on the GPU with the given parameters stored in the [param buffer] at [param offset]. Parameters being integers: vertex count, instance count, first vertex, first instance. And when using indices: index count, instance count, first index, vertex offset, first instance. Buffer must have been created with [constant STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT] flag. + + diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index e322bba7687..e0271309016 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -4191,6 +4191,117 @@ void RenderingDevice::draw_list_draw(DrawListID p_list, bool p_use_indices, uint dl->state.draw_count++; } +void RenderingDevice::draw_list_draw_indirect(DrawListID p_list, bool p_use_indices, RID p_buffer, uint32_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + ERR_RENDER_THREAD_GUARD(); + + DrawList *dl = _get_draw_list_ptr(p_list); + ERR_FAIL_NULL(dl); + + Buffer *buffer = storage_buffer_owner.get_or_null(p_buffer); + ERR_FAIL_NULL(buffer); + + ERR_FAIL_COND_MSG(!buffer->usage.has_flag(RDD::BUFFER_USAGE_INDIRECT_BIT), "Buffer provided was not created to do indirect dispatch."); + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified."); +#endif + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!dl->validation.pipeline_active, + "No render pipeline was set before attempting to draw."); + if (dl->validation.pipeline_vertex_format != INVALID_ID) { + // Pipeline uses vertices, validate format. + ERR_FAIL_COND_MSG(dl->validation.vertex_format == INVALID_ID, + "No vertex array was bound, and render pipeline expects vertices."); + // Make sure format is right. + ERR_FAIL_COND_MSG(dl->validation.pipeline_vertex_format != dl->validation.vertex_format, + "The vertex format used to create the pipeline does not match the vertex format bound."); + } + + if (dl->validation.pipeline_push_constant_size > 0) { + // Using push constants, check that they were supplied. + ERR_FAIL_COND_MSG(!dl->validation.pipeline_push_constant_supplied, + "The shader in this pipeline requires a push constant to be set before drawing, but it's not present."); + } +#endif + +#ifdef DEBUG_ENABLED + for (uint32_t i = 0; i < dl->state.set_count; i++) { + if (dl->state.sets[i].pipeline_expected_format == 0) { + // Nothing expected by this pipeline. + continue; + } + + if (dl->state.sets[i].pipeline_expected_format != dl->state.sets[i].uniform_set_format) { + if (dl->state.sets[i].uniform_set_format == 0) { + ERR_FAIL_MSG(vformat("Uniforms were never supplied for set (%d) at the time of drawing, which are required by the pipeline.", i)); + } else if (uniform_set_owner.owns(dl->state.sets[i].uniform_set)) { + UniformSet *us = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set); + ERR_FAIL_MSG(vformat("Uniforms supplied for set (%d):\n%s\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n%s", i, _shader_uniform_debug(us->shader_id, us->shader_set), _shader_uniform_debug(dl->state.pipeline_shader))); + } else { + ERR_FAIL_MSG(vformat("Uniforms supplied for set (%s, which was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n%s", i, _shader_uniform_debug(dl->state.pipeline_shader))); + } + } + } +#endif + + // Prepare descriptor sets if the API doesn't use pipeline barriers. + if (!driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { + for (uint32_t i = 0; i < dl->state.set_count; i++) { + if (dl->state.sets[i].pipeline_expected_format == 0) { + // Nothing expected by this pipeline. + continue; + } + + draw_graph.add_draw_list_uniform_set_prepare_for_use(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i); + } + } + + // Bind descriptor sets. + for (uint32_t i = 0; i < dl->state.set_count; i++) { + if (dl->state.sets[i].pipeline_expected_format == 0) { + continue; // Nothing expected by this pipeline. + } + if (!dl->state.sets[i].bound) { + // All good, see if this requires re-binding. + draw_graph.add_draw_list_bind_uniform_set(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i); + + UniformSet *uniform_set = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set); + _uniform_set_update_shared(uniform_set); + + draw_graph.add_draw_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); + + dl->state.sets[i].bound = true; + } + } + + if (p_use_indices) { +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!dl->validation.index_array_count, + "Draw command requested indices, but no index buffer was set."); + + ERR_FAIL_COND_MSG(dl->validation.pipeline_uses_restart_indices != dl->validation.index_buffer_uses_restart_indices, + "The usage of restart indices in index buffer does not match the render primitive in the pipeline."); +#endif + + ERR_FAIL_COND_MSG(p_offset + 20 > buffer->size, "Offset provided (+20) is past the end of buffer."); + + draw_graph.add_draw_list_draw_indexed_indirect(buffer->driver_id, p_offset, p_draw_count, p_stride); + } else { + ERR_FAIL_COND_MSG(p_offset + 16 > buffer->size, "Offset provided (+16) is past the end of buffer."); + + draw_graph.add_draw_list_draw_indirect(buffer->driver_id, p_offset, p_draw_count, p_stride); + } + + dl->state.draw_count++; + + if (buffer->draw_tracker != nullptr) { + draw_graph.add_draw_list_usage(buffer->draw_tracker, RDG::RESOURCE_USAGE_INDIRECT_BUFFER_READ); + } + + _check_transfer_worker_buffer(buffer); +} + void RenderingDevice::draw_list_enable_scissor(DrawListID p_list, const Rect2 &p_rect) { DrawList *dl = _get_draw_list_ptr(p_list); @@ -6023,6 +6134,7 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("draw_list_set_push_constant", "draw_list", "buffer", "size_bytes"), &RenderingDevice::_draw_list_set_push_constant); ClassDB::bind_method(D_METHOD("draw_list_draw", "draw_list", "use_indices", "instances", "procedural_vertex_count"), &RenderingDevice::draw_list_draw, DEFVAL(0)); + ClassDB::bind_method(D_METHOD("draw_list_draw_indirect", "draw_list", "use_indices", "buffer", "offset", "draw_count", "stride"), &RenderingDevice::draw_list_draw_indirect, DEFVAL(0), DEFVAL(1), DEFVAL(0)); ClassDB::bind_method(D_METHOD("draw_list_enable_scissor", "draw_list", "rect"), &RenderingDevice::draw_list_enable_scissor, DEFVAL(Rect2())); ClassDB::bind_method(D_METHOD("draw_list_disable_scissor", "draw_list"), &RenderingDevice::draw_list_disable_scissor); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 1405f585b24..f4adcc1b79c 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -1172,6 +1172,7 @@ public: void draw_list_set_push_constant(DrawListID p_list, const void *p_data, uint32_t p_data_size); void draw_list_draw(DrawListID p_list, bool p_use_indices, uint32_t p_instances = 1, uint32_t p_procedural_vertices = 0); + void draw_list_draw_indirect(DrawListID p_list, bool p_use_indices, RID p_buffer, uint32_t p_offset = 0, uint32_t p_draw_count = 1, uint32_t p_stride = 0); void draw_list_enable_scissor(DrawListID p_list, const Rect2 &p_rect); void draw_list_disable_scissor(DrawListID p_list); diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index abcb76cd43a..b6b33b28091 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -699,6 +699,16 @@ void RenderingDeviceGraph::_run_draw_list_command(RDD::CommandBufferID p_command driver->command_render_draw_indexed(p_command_buffer, draw_indexed_instruction->index_count, draw_indexed_instruction->instance_count, draw_indexed_instruction->first_index, 0, 0); instruction_data_cursor += sizeof(DrawListDrawIndexedInstruction); } break; + case DrawListInstruction::TYPE_DRAW_INDIRECT: { + const DrawListDrawIndirectInstruction *draw_indirect_instruction = reinterpret_cast(instruction); + driver->command_render_draw_indirect(p_command_buffer, draw_indirect_instruction->buffer, draw_indirect_instruction->offset, draw_indirect_instruction->draw_count, draw_indirect_instruction->stride); + instruction_data_cursor += sizeof(DrawListDrawIndirectInstruction); + } break; + case DrawListInstruction::TYPE_DRAW_INDEXED_INDIRECT: { + const DrawListDrawIndexedIndirectInstruction *draw_indexed_indirect_instruction = reinterpret_cast(instruction); + driver->command_render_draw_indexed_indirect(p_command_buffer, draw_indexed_indirect_instruction->buffer, draw_indexed_indirect_instruction->offset, draw_indexed_indirect_instruction->draw_count, draw_indexed_indirect_instruction->stride); + instruction_data_cursor += sizeof(DrawListDrawIndexedIndirectInstruction); + } break; case DrawListInstruction::TYPE_EXECUTE_COMMANDS: { const DrawListExecuteCommandsInstruction *execute_commands_instruction = reinterpret_cast(instruction); driver->command_buffer_execute_secondary(p_command_buffer, execute_commands_instruction->command_buffer); @@ -1178,6 +1188,16 @@ void RenderingDeviceGraph::_print_draw_list(const uint8_t *p_instruction_data, u print_line("\tDRAW INDICES", draw_indexed_instruction->index_count, "INSTANCES", draw_indexed_instruction->instance_count, "FIRST INDEX", draw_indexed_instruction->first_index); instruction_data_cursor += sizeof(DrawListDrawIndexedInstruction); } break; + case DrawListInstruction::TYPE_DRAW_INDIRECT: { + const DrawListDrawIndirectInstruction *draw_indirect_instruction = reinterpret_cast(instruction); + print_line("\tDRAW INDIRECT BUFFER ID", itos(draw_indirect_instruction->buffer.id), "OFFSET", draw_indirect_instruction->offset, "DRAW COUNT", draw_indirect_instruction->draw_count, "STRIDE", draw_indirect_instruction->stride); + instruction_data_cursor += sizeof(DrawListDrawIndirectInstruction); + } break; + case DrawListInstruction::TYPE_DRAW_INDEXED_INDIRECT: { + const DrawListDrawIndexedIndirectInstruction *draw_indexed_indirect_instruction = reinterpret_cast(instruction); + print_line("\tDRAW INDEXED INDIRECT BUFFER ID", itos(draw_indexed_indirect_instruction->buffer.id), "OFFSET", draw_indexed_indirect_instruction->offset, "DRAW COUNT", draw_indexed_indirect_instruction->draw_count, "STRIDE", draw_indexed_indirect_instruction->stride); + instruction_data_cursor += sizeof(DrawListDrawIndexedIndirectInstruction); + } break; case DrawListInstruction::TYPE_EXECUTE_COMMANDS: { print_line("\tEXECUTE COMMANDS"); instruction_data_cursor += sizeof(DrawListExecuteCommandsInstruction); @@ -1596,6 +1616,26 @@ void RenderingDeviceGraph::add_draw_list_draw_indexed(uint32_t p_index_count, ui instruction->first_index = p_first_index; } +void RenderingDeviceGraph::add_draw_list_draw_indirect(RDD::BufferID p_buffer, uint32_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + DrawListDrawIndirectInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListDrawIndirectInstruction))); + instruction->type = DrawListInstruction::TYPE_DRAW_INDIRECT; + instruction->buffer = p_buffer; + instruction->offset = p_offset; + instruction->draw_count = p_draw_count; + instruction->stride = p_stride; + draw_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT); +} + +void RenderingDeviceGraph::add_draw_list_draw_indexed_indirect(RDD::BufferID p_buffer, uint32_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + DrawListDrawIndexedIndirectInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListDrawIndexedIndirectInstruction))); + instruction->type = DrawListInstruction::TYPE_DRAW_INDEXED_INDIRECT; + instruction->buffer = p_buffer; + instruction->offset = p_offset; + instruction->draw_count = p_draw_count; + instruction->stride = p_stride; + draw_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT); +} + void RenderingDeviceGraph::add_draw_list_execute_commands(RDD::CommandBufferID p_command_buffer) { DrawListExecuteCommandsInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListExecuteCommandsInstruction))); instruction->type = DrawListInstruction::TYPE_EXECUTE_COMMANDS; diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index e13e3a04298..f8502c721f8 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -69,6 +69,8 @@ public: TYPE_CLEAR_ATTACHMENTS, TYPE_DRAW, TYPE_DRAW_INDEXED, + TYPE_DRAW_INDIRECT, + TYPE_DRAW_INDEXED_INDIRECT, TYPE_EXECUTE_COMMANDS, TYPE_NEXT_SUBPASS, TYPE_SET_BLEND_CONSTANTS, @@ -463,6 +465,20 @@ private: uint32_t first_index = 0; }; + struct DrawListDrawIndirectInstruction : DrawListInstruction { + RDD::BufferID buffer; + uint32_t offset = 0; + uint32_t draw_count = 0; + uint32_t stride = 0; + }; + + struct DrawListDrawIndexedIndirectInstruction : DrawListInstruction { + RDD::BufferID buffer; + uint32_t offset = 0; + uint32_t draw_count = 0; + uint32_t stride = 0; + }; + struct DrawListEndRenderPassInstruction : DrawListInstruction { // No contents. }; @@ -675,6 +691,8 @@ public: void add_draw_list_clear_attachments(VectorView p_attachments_clear, VectorView p_attachments_clear_rect); void add_draw_list_draw(uint32_t p_vertex_count, uint32_t p_instance_count); void add_draw_list_draw_indexed(uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index); + void add_draw_list_draw_indirect(RDD::BufferID p_buffer, uint32_t p_offset, uint32_t p_draw_count, uint32_t p_stride); + void add_draw_list_draw_indexed_indirect(RDD::BufferID p_buffer, uint32_t p_offset, uint32_t p_draw_count, uint32_t p_stride); void add_draw_list_execute_commands(RDD::CommandBufferID p_command_buffer); void add_draw_list_next_subpass(RDD::CommandBufferType p_command_buffer_type); void add_draw_list_set_blend_constants(const Color &p_color);