diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index ab270e5e821..0cc89dfacaa 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -6177,6 +6177,8 @@ uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) { return false; case API_TRAIT_USE_GENERAL_IN_COPY_QUEUES: return true; + case API_TRAIT_BUFFERS_REQUIRE_TRANSITIONS: + return !barrier_capabilities.enhanced_barriers_supported; default: return RenderingDeviceDriver::api_trait_get(p_trait); } diff --git a/servers/rendering/renderer_rd/effects/copy_effects.cpp b/servers/rendering/renderer_rd/effects/copy_effects.cpp index c7a7532d769..808c82f712a 100644 --- a/servers/rendering/renderer_rd/effects/copy_effects.cpp +++ b/servers/rendering/renderer_rd/effects/copy_effects.cpp @@ -1002,15 +1002,19 @@ void CopyEffects::copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuf MaterialStorage *material_storage = MaterialStorage::get_singleton(); ERR_FAIL_NULL(material_storage); + Rect2i screen_rect; + float atlas_width = p_dst_size.width / p_rect.size.width; + float atlas_height = p_dst_size.height / p_rect.size.height; + screen_rect.position.x = (int32_t)(Math::round(p_rect.position.x * atlas_width)); + screen_rect.position.y = (int32_t)(Math::round(p_rect.position.y * atlas_height)); + screen_rect.size.width = (int32_t)(Math::round(p_dst_size.width)); + screen_rect.size.height = (int32_t)(Math::round(p_dst_size.height)); + CopyToDPPushConstant push_constant; - push_constant.screen_rect[0] = p_rect.position.x; - push_constant.screen_rect[1] = p_rect.position.y; - push_constant.screen_rect[2] = p_rect.size.width; - push_constant.screen_rect[3] = p_rect.size.height; push_constant.z_far = p_z_far; push_constant.z_near = p_z_near; - push_constant.texel_size[0] = 1.0f / p_dst_size.x; - push_constant.texel_size[1] = 1.0f / p_dst_size.y; + push_constant.texel_size[0] = 1.0f / p_dst_size.width; + push_constant.texel_size[1] = 1.0f / p_dst_size.height; push_constant.texel_size[0] *= p_dp_flip ? -1.0f : 1.0f; // Encode dp flip as x size sign // setup our uniforms @@ -1021,7 +1025,7 @@ void CopyEffects::copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuf RID shader = cube_to_dp.shader.version_get_shader(cube_to_dp.shader_version, 0); ERR_FAIL_COND(shader.is_null()); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dst_framebuffer, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dst_framebuffer, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, Vector(), 1.0f, 0, screen_rect); RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, cube_to_dp.pipeline.get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dst_framebuffer))); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_rd_texture), 0); RD::get_singleton()->draw_list_bind_index_array(draw_list, material_storage->get_quad_index_array()); diff --git a/servers/rendering/renderer_rd/effects/copy_effects.h b/servers/rendering/renderer_rd/effects/copy_effects.h index 014f78e2b9b..b6fc95d0f11 100644 --- a/servers/rendering/renderer_rd/effects/copy_effects.h +++ b/servers/rendering/renderer_rd/effects/copy_effects.h @@ -217,7 +217,6 @@ private: float z_far; float z_near; float texel_size[2]; - float screen_rect[4]; }; struct CopyToDP { diff --git a/servers/rendering/renderer_rd/shaders/effects/cube_to_dp.glsl b/servers/rendering/renderer_rd/shaders/effects/cube_to_dp.glsl index 3fb93dda356..b45e310b613 100644 --- a/servers/rendering/renderer_rd/shaders/effects/cube_to_dp.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/cube_to_dp.glsl @@ -8,7 +8,6 @@ layout(push_constant, std430) uniform Params { float z_far; float z_near; vec2 texel_size; - vec4 screen_rect; } params; @@ -17,8 +16,7 @@ layout(location = 0) out vec2 uv_interp; void main() { vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0)); uv_interp = base_arr[gl_VertexIndex]; - vec2 screen_pos = uv_interp * params.screen_rect.zw + params.screen_rect.xy; - gl_Position = vec4(screen_pos * 2.0 - 1.0, 0.0, 1.0); + gl_Position = vec4(uv_interp * 2.0 - 1.0, 0.0, 1.0); } #[fragment] @@ -35,7 +33,6 @@ layout(push_constant, std430) uniform Params { float z_far; float z_near; vec2 texel_size; - vec4 screen_rect; } params; diff --git a/servers/rendering/renderer_rd/storage_rd/particles_storage.cpp b/servers/rendering/renderer_rd/storage_rd/particles_storage.cpp index 314cbf9aa91..6efb8c176fc 100644 --- a/servers/rendering/renderer_rd/storage_rd/particles_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/particles_storage.cpp @@ -306,9 +306,14 @@ void ParticlesStorage::_particles_free_data(Particles *particles) { particles->emission_storage_buffer = RID(); } - if (particles->unused_storage_buffer.is_valid()) { - RD::get_singleton()->free(particles->unused_storage_buffer); - particles->unused_storage_buffer = RID(); + if (particles->unused_emission_storage_buffer.is_valid()) { + RD::get_singleton()->free(particles->unused_emission_storage_buffer); + particles->unused_emission_storage_buffer = RID(); + } + + if (particles->unused_trail_storage_buffer.is_valid()) { + RD::get_singleton()->free(particles->unused_trail_storage_buffer); + particles->unused_trail_storage_buffer = RID(); } if (RD::get_singleton()->uniform_set_is_valid(particles->particles_material_uniform_set)) { @@ -534,9 +539,15 @@ void ParticlesStorage::_particles_allocate_emission_buffer(Particles *particles) } } -void ParticlesStorage::_particles_ensure_unused_buffer(Particles *particles) { - if (particles->unused_storage_buffer.is_null()) { - particles->unused_storage_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 4); +void ParticlesStorage::_particles_ensure_unused_emission_buffer(Particles *particles) { + if (particles->unused_emission_storage_buffer.is_null()) { + particles->unused_emission_storage_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 4); + } +} + +void ParticlesStorage::_particles_ensure_unused_trail_buffer(Particles *particles) { + if (particles->unused_trail_storage_buffer.is_null()) { + particles->unused_trail_storage_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 4); } } @@ -763,8 +774,8 @@ void ParticlesStorage::_particles_process(Particles *p_particles, double p_delta if (p_particles->emission_storage_buffer.is_valid()) { u.append_id(p_particles->emission_storage_buffer); } else { - _particles_ensure_unused_buffer(p_particles); - u.append_id(p_particles->unused_storage_buffer); + _particles_ensure_unused_emission_buffer(p_particles); + u.append_id(p_particles->unused_emission_storage_buffer); } uniforms.push_back(u); } @@ -779,8 +790,8 @@ void ParticlesStorage::_particles_process(Particles *p_particles, double p_delta } u.append_id(sub_emitter->emission_storage_buffer); } else { - _particles_ensure_unused_buffer(p_particles); - u.append_id(p_particles->unused_storage_buffer); + _particles_ensure_unused_emission_buffer(p_particles); + u.append_id(p_particles->unused_emission_storage_buffer); } uniforms.push_back(u); } @@ -1481,8 +1492,8 @@ void ParticlesStorage::update_particles() { if (particles->trail_bind_pose_buffer.is_valid()) { u.append_id(particles->trail_bind_pose_buffer); } else { - _particles_ensure_unused_buffer(particles); - u.append_id(particles->unused_storage_buffer); + _particles_ensure_unused_trail_buffer(particles); + u.append_id(particles->unused_trail_storage_buffer); } uniforms.push_back(u); } diff --git a/servers/rendering/renderer_rd/storage_rd/particles_storage.h b/servers/rendering/renderer_rd/storage_rd/particles_storage.h index 33f44f3045a..98c693f4773 100644 --- a/servers/rendering/renderer_rd/storage_rd/particles_storage.h +++ b/servers/rendering/renderer_rd/storage_rd/particles_storage.h @@ -247,7 +247,8 @@ private: ParticleEmissionBuffer *emission_buffer = nullptr; RID emission_storage_buffer; - RID unused_storage_buffer; + RID unused_emission_storage_buffer; + RID unused_trail_storage_buffer; HashSet collisions; @@ -265,7 +266,8 @@ private: void _particles_process(Particles *p_particles, double p_delta); void _particles_allocate_emission_buffer(Particles *particles); - void _particles_ensure_unused_buffer(Particles *particles); + void _particles_ensure_unused_emission_buffer(Particles *particles); + void _particles_ensure_unused_trail_buffer(Particles *particles); void _particles_free_data(Particles *particles); void _particles_update_buffers(Particles *particles); diff --git a/servers/rendering/rendering_device_driver.cpp b/servers/rendering/rendering_device_driver.cpp index 9ff7b83215e..c1a3f34af89 100644 --- a/servers/rendering/rendering_device_driver.cpp +++ b/servers/rendering/rendering_device_driver.cpp @@ -376,6 +376,8 @@ uint64_t RenderingDeviceDriver::api_trait_get(ApiTrait p_trait) { return true; case API_TRAIT_USE_GENERAL_IN_COPY_QUEUES: return false; + case API_TRAIT_BUFFERS_REQUIRE_TRANSITIONS: + return false; default: ERR_FAIL_V(0); } diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index 953401e9bdf..001e7e7eeba 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -756,6 +756,7 @@ public: API_TRAIT_SECONDARY_VIEWPORT_SCISSOR, API_TRAIT_CLEARS_WITH_COPY_ENGINE, API_TRAIT_USE_GENERAL_IN_COPY_QUEUES, + API_TRAIT_BUFFERS_REQUIRE_TRANSITIONS, }; enum ShaderChangeInvalidation { diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index 750753d796b..ec2f336f3c0 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -140,6 +140,25 @@ RDD::BarrierAccessBits RenderingDeviceGraph::_usage_to_access_bits(ResourceUsage #endif } +bool RenderingDeviceGraph::_check_command_intersection(ResourceTracker *p_resource_tracker, int32_t p_previous_command_index, int32_t p_command_index) const { + if (p_resource_tracker->usage != RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE && p_resource_tracker->usage != RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE) { + // We don't check possible intersections for usages that aren't consecutive color or depth writes. + return true; + } + + const uint32_t previous_command_data_offset = command_data_offsets[p_previous_command_index]; + const uint32_t current_command_data_offset = command_data_offsets[p_command_index]; + const RecordedDrawListCommand &previous_draw_list_command = *reinterpret_cast(&command_data[previous_command_data_offset]); + const RecordedDrawListCommand ¤t_draw_list_command = *reinterpret_cast(&command_data[current_command_data_offset]); + if (previous_draw_list_command.type != RecordedCommand::TYPE_DRAW_LIST || current_draw_list_command.type != RecordedCommand::TYPE_DRAW_LIST) { + // We don't check possible intersections if both commands aren't draw lists. + return true; + } + + // We check if the region used by both draw lists have an intersection. + return previous_draw_list_command.region.intersects(current_draw_list_command.region); +} + int32_t RenderingDeviceGraph::_add_to_command_list(int32_t p_command_index, int32_t p_list_index) { DEV_ASSERT(p_command_index < int32_t(command_count)); DEV_ASSERT(p_list_index < int32_t(command_list_nodes.size())); @@ -425,11 +444,9 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr #if USE_BUFFER_BARRIERS _add_buffer_barrier_to_command(resource_tracker->buffer_driver_id, resource_tracker->usage_access, new_usage_access, r_command->buffer_barrier_index, r_command->buffer_barrier_count); #endif - // FIXME: Memory barriers are currently pushed regardless of whether buffer barriers are being used or not. Refer to the comment on the - // definition of USE_BUFFER_BARRIERS for the reason behind this. This can be fixed to be one case or the other once it's been confirmed - // the buffer and memory barrier behavior discrepancy has been solved. - r_command->memory_barrier.src_access = resource_tracker->usage_access; - r_command->memory_barrier.dst_access = new_usage_access; + // Memory barriers are pushed regardless of buffer barriers being used or not. + r_command->memory_barrier.src_access = r_command->memory_barrier.src_access | resource_tracker->usage_access; + r_command->memory_barrier.dst_access = r_command->memory_barrier.dst_access | new_usage_access; } else { DEV_ASSERT(false && "Resource tracker does not contain a valid buffer or texture ID."); } @@ -449,10 +466,12 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr if (different_usage) { // Even if the usage of the resource isn't a write usage explicitly, a different usage implies a transition and it should therefore be considered a write. - write_usage = true; + // In the case of buffers however, this is not exactly necessary if the driver does not consider different buffer usages as different states. + write_usage = write_usage || bool(resource_tracker->texture_driver_id) || driver_buffers_require_transitions; resource_tracker->usage = new_resource_usage; } + bool command_intersection_failed = false; if (search_tracker->write_command_or_list_index >= 0) { if (search_tracker->write_command_list_enabled) { // Make this command adjacent to any commands that wrote to this resource and intersect with the slice if it applies. @@ -464,7 +483,7 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr if (!resource_has_parent || search_tracker_rect.intersects(write_list_node.subresources)) { if (write_list_node.command_index == p_command_index) { ERR_FAIL_COND_MSG(!resource_has_parent, "Command can't have itself as a dependency."); - } else { + } else if (_check_command_intersection(resource_tracker, write_list_node.command_index, p_command_index)) { // Command is dependent on this command. Add this command to the adjacency list of the write command. _add_adjacent_command(write_list_node.command_index, p_command_index, r_command); @@ -480,6 +499,8 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr write_list_index = write_list_node.next_list_index; continue; } + } else { + command_intersection_failed = true; } } @@ -490,14 +511,16 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr // The index is just the latest command index that wrote to the resource. if (search_tracker->write_command_or_list_index == p_command_index) { ERR_FAIL_MSG("Command can't have itself as a dependency."); - } else { + } else if (_check_command_intersection(resource_tracker, search_tracker->write_command_or_list_index, p_command_index)) { _add_adjacent_command(search_tracker->write_command_or_list_index, p_command_index, r_command); + } else { + command_intersection_failed = true; } } } if (write_usage) { - if (resource_has_parent) { + if (resource_has_parent || command_intersection_failed) { if (!search_tracker->write_command_list_enabled && search_tracker->write_command_or_list_index >= 0) { // Write command list was not being used but there was a write command recorded. Add a new node with the entire parent resource's subresources and the recorded command index to the list. const RDD::TextureSubresourceRange &tracker_subresources = search_tracker->texture_subresources; @@ -1318,6 +1341,7 @@ void RenderingDeviceGraph::initialize(RDD *p_driver, RenderingContextDriver::Dev driver_honors_barriers = driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS); driver_clears_with_copy_engine = driver->api_trait_get(RDD::API_TRAIT_CLEARS_WITH_COPY_ENGINE); + driver_buffers_require_transitions = driver->api_trait_get(RDD::API_TRAIT_BUFFERS_REQUIRE_TRANSITIONS); } void RenderingDeviceGraph::finalize() { diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index e52ab0c2f50..9ddd70bc80d 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -637,6 +637,7 @@ private: BarrierGroup barrier_group; bool driver_honors_barriers : 1; bool driver_clears_with_copy_engine : 1; + bool driver_buffers_require_transitions : 1; WorkaroundsState workarounds_state; TightLocalVector frames; uint32_t frame = 0; @@ -648,6 +649,7 @@ private: static bool _is_write_usage(ResourceUsage p_usage); static RDD::TextureLayout _usage_to_image_layout(ResourceUsage p_usage); static RDD::BarrierAccessBits _usage_to_access_bits(ResourceUsage p_usage); + bool _check_command_intersection(ResourceTracker *p_resource_tracker, int32_t p_previous_command_index, int32_t p_command_index) const; int32_t _add_to_command_list(int32_t p_command_index, int32_t p_list_index); void _add_adjacent_command(int32_t p_previous_command_index, int32_t p_command_index, RecordedCommand *r_command); int32_t _add_to_slice_read_list(int32_t p_command_index, Rect2i p_subresources, int32_t p_list_index);