mirror of
https://github.com/godotengine/godot.git
synced 2024-11-21 19:42:43 +00:00
Merge pull request #98271 from DarioSamo/d3d12-enhanced-barrier-fix
Some checks are pending
🔗 GHA / 📊 Static checks (push) Waiting to run
🔗 GHA / 🤖 Android (push) Blocked by required conditions
🔗 GHA / 🍏 iOS (push) Blocked by required conditions
🔗 GHA / 🐧 Linux (push) Blocked by required conditions
🔗 GHA / 🍎 macOS (push) Blocked by required conditions
🔗 GHA / 🏁 Windows (push) Blocked by required conditions
🔗 GHA / 🌐 Web (push) Blocked by required conditions
🔗 GHA / 🪲 Godot CPP (push) Blocked by required conditions
Some checks are pending
🔗 GHA / 📊 Static checks (push) Waiting to run
🔗 GHA / 🤖 Android (push) Blocked by required conditions
🔗 GHA / 🍏 iOS (push) Blocked by required conditions
🔗 GHA / 🐧 Linux (push) Blocked by required conditions
🔗 GHA / 🍎 macOS (push) Blocked by required conditions
🔗 GHA / 🏁 Windows (push) Blocked by required conditions
🔗 GHA / 🌐 Web (push) Blocked by required conditions
🔗 GHA / 🪲 Godot CPP (push) Blocked by required conditions
Move transitions of textures from transfer workers to the graphics queue.
This commit is contained in:
commit
58a7f9b4d8
@ -2003,6 +2003,8 @@ static D3D12_BARRIER_LAYOUT _rd_texture_layout_to_d3d12_barrier_layout(RDD::Text
|
|||||||
switch (p_texture_layout) {
|
switch (p_texture_layout) {
|
||||||
case RDD::TEXTURE_LAYOUT_UNDEFINED:
|
case RDD::TEXTURE_LAYOUT_UNDEFINED:
|
||||||
return D3D12_BARRIER_LAYOUT_UNDEFINED;
|
return D3D12_BARRIER_LAYOUT_UNDEFINED;
|
||||||
|
case RDD::TEXTURE_LAYOUT_GENERAL:
|
||||||
|
return D3D12_BARRIER_LAYOUT_COMMON;
|
||||||
case RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL:
|
case RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL:
|
||||||
return D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS;
|
return D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS;
|
||||||
case RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
|
case RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
|
||||||
@ -6175,6 +6177,8 @@ uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) {
|
|||||||
return false;
|
return false;
|
||||||
case API_TRAIT_CLEARS_WITH_COPY_ENGINE:
|
case API_TRAIT_CLEARS_WITH_COPY_ENGINE:
|
||||||
return false;
|
return false;
|
||||||
|
case API_TRAIT_USE_GENERAL_IN_COPY_QUEUES:
|
||||||
|
return true;
|
||||||
default:
|
default:
|
||||||
return RenderingDeviceDriver::api_trait_get(p_trait);
|
return RenderingDeviceDriver::api_trait_get(p_trait);
|
||||||
}
|
}
|
||||||
|
@ -266,6 +266,7 @@ static const VkFormat RD_TO_VK_FORMAT[RDD::DATA_FORMAT_MAX] = {
|
|||||||
|
|
||||||
static VkImageLayout RD_TO_VK_LAYOUT[RDD::TEXTURE_LAYOUT_MAX] = {
|
static VkImageLayout RD_TO_VK_LAYOUT[RDD::TEXTURE_LAYOUT_MAX] = {
|
||||||
VK_IMAGE_LAYOUT_UNDEFINED, // TEXTURE_LAYOUT_UNDEFINED
|
VK_IMAGE_LAYOUT_UNDEFINED, // TEXTURE_LAYOUT_UNDEFINED
|
||||||
|
VK_IMAGE_LAYOUT_GENERAL, // TEXTURE_LAYOUT_GENERAL
|
||||||
VK_IMAGE_LAYOUT_GENERAL, // TEXTURE_LAYOUT_STORAGE_OPTIMAL
|
VK_IMAGE_LAYOUT_GENERAL, // TEXTURE_LAYOUT_STORAGE_OPTIMAL
|
||||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
|
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
|
||||||
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
|
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
|
||||||
|
@ -1243,6 +1243,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
|
|||||||
TransferWorker *transfer_worker = nullptr;
|
TransferWorker *transfer_worker = nullptr;
|
||||||
const uint8_t *read_ptr = p_data.ptr();
|
const uint8_t *read_ptr = p_data.ptr();
|
||||||
uint8_t *write_ptr = nullptr;
|
uint8_t *write_ptr = nullptr;
|
||||||
|
const RDD::TextureLayout copy_dst_layout = driver->api_trait_get(RDD::API_TRAIT_USE_GENERAL_IN_COPY_QUEUES) ? RDD::TEXTURE_LAYOUT_GENERAL : RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
|
||||||
for (uint32_t pass = 0; pass < 2; pass++) {
|
for (uint32_t pass = 0; pass < 2; pass++) {
|
||||||
const bool copy_pass = (pass == 1);
|
const bool copy_pass = (pass == 1);
|
||||||
if (copy_pass) {
|
if (copy_pass) {
|
||||||
@ -1267,7 +1268,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
|
|||||||
tb.texture = texture->driver_id;
|
tb.texture = texture->driver_id;
|
||||||
tb.dst_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
|
tb.dst_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
|
||||||
tb.prev_layout = RDD::TEXTURE_LAYOUT_UNDEFINED;
|
tb.prev_layout = RDD::TEXTURE_LAYOUT_UNDEFINED;
|
||||||
tb.next_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
|
tb.next_layout = copy_dst_layout;
|
||||||
tb.subresources.aspect = texture->barrier_aspect_flags;
|
tb.subresources.aspect = texture->barrier_aspect_flags;
|
||||||
tb.subresources.mipmap_count = texture->mipmaps;
|
tb.subresources.mipmap_count = texture->mipmaps;
|
||||||
tb.subresources.base_layer = p_layer;
|
tb.subresources.base_layer = p_layer;
|
||||||
@ -1313,7 +1314,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
|
|||||||
copy_region.texture_subresources.layer_count = 1;
|
copy_region.texture_subresources.layer_count = 1;
|
||||||
copy_region.texture_offset = Vector3i(0, 0, z);
|
copy_region.texture_offset = Vector3i(0, 0, z);
|
||||||
copy_region.texture_region_size = Vector3i(logic_width, logic_height, 1);
|
copy_region.texture_region_size = Vector3i(logic_width, logic_height, 1);
|
||||||
driver->command_copy_buffer_to_texture(transfer_worker->command_buffer, transfer_worker->staging_buffer, texture->driver_id, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, copy_region);
|
driver->command_copy_buffer_to_texture(transfer_worker->command_buffer, transfer_worker->staging_buffer, texture->driver_id, copy_dst_layout, copy_region);
|
||||||
}
|
}
|
||||||
|
|
||||||
staging_local_offset += to_allocate;
|
staging_local_offset += to_allocate;
|
||||||
@ -1332,14 +1333,13 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
|
|||||||
RDD::TextureBarrier tb;
|
RDD::TextureBarrier tb;
|
||||||
tb.texture = texture->driver_id;
|
tb.texture = texture->driver_id;
|
||||||
tb.src_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
|
tb.src_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
|
||||||
tb.prev_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
|
tb.prev_layout = copy_dst_layout;
|
||||||
tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||||
tb.subresources.aspect = texture->barrier_aspect_flags;
|
tb.subresources.aspect = texture->barrier_aspect_flags;
|
||||||
tb.subresources.mipmap_count = texture->mipmaps;
|
tb.subresources.mipmap_count = texture->mipmaps;
|
||||||
tb.subresources.base_layer = p_layer;
|
tb.subresources.base_layer = p_layer;
|
||||||
tb.subresources.layer_count = 1;
|
tb.subresources.layer_count = 1;
|
||||||
|
transfer_worker->texture_barriers.push_back(tb);
|
||||||
driver->command_pipeline_barrier(transfer_worker->command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, tb);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_release_transfer_worker(transfer_worker);
|
_release_transfer_worker(transfer_worker);
|
||||||
@ -5152,6 +5152,21 @@ void RenderingDevice::_wait_for_transfer_worker(TransferWorker *p_transfer_worke
|
|||||||
MutexLock lock(p_transfer_worker->operations_mutex);
|
MutexLock lock(p_transfer_worker->operations_mutex);
|
||||||
p_transfer_worker->operations_processed = p_transfer_worker->operations_submitted;
|
p_transfer_worker->operations_processed = p_transfer_worker->operations_submitted;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!p_transfer_worker->texture_barriers.is_empty()) {
|
||||||
|
MutexLock transfer_worker_lock(transfer_worker_pool_mutex);
|
||||||
|
_flush_barriers_for_transfer_worker(p_transfer_worker);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RenderingDevice::_flush_barriers_for_transfer_worker(TransferWorker *p_transfer_worker) {
|
||||||
|
if (!p_transfer_worker->texture_barriers.is_empty()) {
|
||||||
|
for (uint32_t i = 0; i < p_transfer_worker->texture_barriers.size(); i++) {
|
||||||
|
transfer_worker_pool_texture_barriers.push_back(p_transfer_worker->texture_barriers[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
p_transfer_worker->texture_barriers.clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RenderingDevice::_check_transfer_worker_operation(uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation) {
|
void RenderingDevice::_check_transfer_worker_operation(uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation) {
|
||||||
@ -5193,11 +5208,11 @@ void RenderingDevice::_check_transfer_worker_index_array(IndexArray *p_index_arr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RenderingDevice::_submit_transfer_workers(bool p_operations_used_by_draw) {
|
void RenderingDevice::_submit_transfer_workers(RDD::CommandBufferID p_draw_command_buffer) {
|
||||||
MutexLock transfer_worker_lock(transfer_worker_pool_mutex);
|
MutexLock transfer_worker_lock(transfer_worker_pool_mutex);
|
||||||
for (uint32_t i = 0; i < transfer_worker_pool.size(); i++) {
|
for (uint32_t i = 0; i < transfer_worker_pool.size(); i++) {
|
||||||
TransferWorker *worker = transfer_worker_pool[i];
|
TransferWorker *worker = transfer_worker_pool[i];
|
||||||
if (p_operations_used_by_draw) {
|
if (p_draw_command_buffer) {
|
||||||
MutexLock lock(worker->operations_mutex);
|
MutexLock lock(worker->operations_mutex);
|
||||||
if (worker->operations_processed >= transfer_worker_operation_used_by_draw[worker->index]) {
|
if (worker->operations_processed >= transfer_worker_operation_used_by_draw[worker->index]) {
|
||||||
// The operation used by the draw has already been processed, we don't need to wait on the worker.
|
// The operation used by the draw has already been processed, we don't need to wait on the worker.
|
||||||
@ -5208,12 +5223,21 @@ void RenderingDevice::_submit_transfer_workers(bool p_operations_used_by_draw) {
|
|||||||
{
|
{
|
||||||
MutexLock lock(worker->thread_mutex);
|
MutexLock lock(worker->thread_mutex);
|
||||||
if (worker->recording) {
|
if (worker->recording) {
|
||||||
VectorView<RDD::SemaphoreID> semaphores = p_operations_used_by_draw ? frames[frame].transfer_worker_semaphores[i] : VectorView<RDD::SemaphoreID>();
|
VectorView<RDD::SemaphoreID> semaphores = p_draw_command_buffer ? frames[frame].transfer_worker_semaphores[i] : VectorView<RDD::SemaphoreID>();
|
||||||
_end_transfer_worker(worker);
|
_end_transfer_worker(worker);
|
||||||
_submit_transfer_worker(worker, semaphores);
|
_submit_transfer_worker(worker, semaphores);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (p_draw_command_buffer) {
|
||||||
|
_flush_barriers_for_transfer_worker(worker);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (p_draw_command_buffer && !transfer_worker_pool_texture_barriers.is_empty()) {
|
||||||
|
driver->command_pipeline_barrier(p_draw_command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, {}, {}, transfer_worker_pool_texture_barriers);
|
||||||
|
transfer_worker_pool_texture_barriers.clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RenderingDevice::_wait_for_transfer_workers() {
|
void RenderingDevice::_wait_for_transfer_workers() {
|
||||||
@ -5807,10 +5831,10 @@ void RenderingDevice::_end_frame() {
|
|||||||
ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work).");
|
ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work).");
|
||||||
}
|
}
|
||||||
|
|
||||||
_submit_transfer_workers(true);
|
|
||||||
|
|
||||||
// The command buffer must be copied into a stack variable as the driver workarounds can change the command buffer in use.
|
// The command buffer must be copied into a stack variable as the driver workarounds can change the command buffer in use.
|
||||||
RDD::CommandBufferID command_buffer = frames[frame].command_buffer;
|
RDD::CommandBufferID command_buffer = frames[frame].command_buffer;
|
||||||
|
_submit_transfer_workers(command_buffer);
|
||||||
|
|
||||||
draw_graph.end(RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS, command_buffer, frames[frame].command_buffer_pool);
|
draw_graph.end(RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS, command_buffer, frames[frame].command_buffer_pool);
|
||||||
driver->command_buffer_end(command_buffer);
|
driver->command_buffer_end(command_buffer);
|
||||||
driver->end_segment();
|
driver->end_segment();
|
||||||
@ -6387,7 +6411,7 @@ void RenderingDevice::finalize() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Wait for transfer workers to finish.
|
// Wait for transfer workers to finish.
|
||||||
_submit_transfer_workers(false);
|
_submit_transfer_workers();
|
||||||
_wait_for_transfer_workers();
|
_wait_for_transfer_workers();
|
||||||
|
|
||||||
// Delete everything the graph has created.
|
// Delete everything the graph has created.
|
||||||
|
@ -1267,6 +1267,7 @@ private:
|
|||||||
RDD::CommandBufferID command_buffer;
|
RDD::CommandBufferID command_buffer;
|
||||||
RDD::CommandPoolID command_pool;
|
RDD::CommandPoolID command_pool;
|
||||||
RDD::FenceID command_fence;
|
RDD::FenceID command_fence;
|
||||||
|
LocalVector<RDD::TextureBarrier> texture_barriers;
|
||||||
bool recording = false;
|
bool recording = false;
|
||||||
bool submitted = false;
|
bool submitted = false;
|
||||||
BinaryMutex thread_mutex;
|
BinaryMutex thread_mutex;
|
||||||
@ -1280,6 +1281,7 @@ private:
|
|||||||
uint32_t transfer_worker_pool_max_size = 1;
|
uint32_t transfer_worker_pool_max_size = 1;
|
||||||
LocalVector<uint64_t> transfer_worker_operation_used_by_draw;
|
LocalVector<uint64_t> transfer_worker_operation_used_by_draw;
|
||||||
LocalVector<uint32_t> transfer_worker_pool_available_list;
|
LocalVector<uint32_t> transfer_worker_pool_available_list;
|
||||||
|
LocalVector<RDD::TextureBarrier> transfer_worker_pool_texture_barriers;
|
||||||
BinaryMutex transfer_worker_pool_mutex;
|
BinaryMutex transfer_worker_pool_mutex;
|
||||||
ConditionVariable transfer_worker_pool_condition;
|
ConditionVariable transfer_worker_pool_condition;
|
||||||
|
|
||||||
@ -1288,12 +1290,13 @@ private:
|
|||||||
void _end_transfer_worker(TransferWorker *p_transfer_worker);
|
void _end_transfer_worker(TransferWorker *p_transfer_worker);
|
||||||
void _submit_transfer_worker(TransferWorker *p_transfer_worker, VectorView<RDD::SemaphoreID> p_signal_semaphores = VectorView<RDD::SemaphoreID>());
|
void _submit_transfer_worker(TransferWorker *p_transfer_worker, VectorView<RDD::SemaphoreID> p_signal_semaphores = VectorView<RDD::SemaphoreID>());
|
||||||
void _wait_for_transfer_worker(TransferWorker *p_transfer_worker);
|
void _wait_for_transfer_worker(TransferWorker *p_transfer_worker);
|
||||||
|
void _flush_barriers_for_transfer_worker(TransferWorker *p_transfer_worker);
|
||||||
void _check_transfer_worker_operation(uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation);
|
void _check_transfer_worker_operation(uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation);
|
||||||
void _check_transfer_worker_buffer(Buffer *p_buffer);
|
void _check_transfer_worker_buffer(Buffer *p_buffer);
|
||||||
void _check_transfer_worker_texture(Texture *p_texture);
|
void _check_transfer_worker_texture(Texture *p_texture);
|
||||||
void _check_transfer_worker_vertex_array(VertexArray *p_vertex_array);
|
void _check_transfer_worker_vertex_array(VertexArray *p_vertex_array);
|
||||||
void _check_transfer_worker_index_array(IndexArray *p_index_array);
|
void _check_transfer_worker_index_array(IndexArray *p_index_array);
|
||||||
void _submit_transfer_workers(bool p_operations_used_by_draw);
|
void _submit_transfer_workers(RDD::CommandBufferID p_draw_command_buffer = RDD::CommandBufferID());
|
||||||
void _wait_for_transfer_workers();
|
void _wait_for_transfer_workers();
|
||||||
void _free_transfer_workers();
|
void _free_transfer_workers();
|
||||||
|
|
||||||
|
@ -374,6 +374,8 @@ uint64_t RenderingDeviceDriver::api_trait_get(ApiTrait p_trait) {
|
|||||||
return 1;
|
return 1;
|
||||||
case API_TRAIT_CLEARS_WITH_COPY_ENGINE:
|
case API_TRAIT_CLEARS_WITH_COPY_ENGINE:
|
||||||
return true;
|
return true;
|
||||||
|
case API_TRAIT_USE_GENERAL_IN_COPY_QUEUES:
|
||||||
|
return false;
|
||||||
default:
|
default:
|
||||||
ERR_FAIL_V(0);
|
ERR_FAIL_V(0);
|
||||||
}
|
}
|
||||||
|
@ -220,6 +220,7 @@ public:
|
|||||||
|
|
||||||
enum TextureLayout {
|
enum TextureLayout {
|
||||||
TEXTURE_LAYOUT_UNDEFINED,
|
TEXTURE_LAYOUT_UNDEFINED,
|
||||||
|
TEXTURE_LAYOUT_GENERAL,
|
||||||
TEXTURE_LAYOUT_STORAGE_OPTIMAL,
|
TEXTURE_LAYOUT_STORAGE_OPTIMAL,
|
||||||
TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||||
TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
|
TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
|
||||||
@ -750,6 +751,7 @@ public:
|
|||||||
API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP,
|
API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP,
|
||||||
API_TRAIT_SECONDARY_VIEWPORT_SCISSOR,
|
API_TRAIT_SECONDARY_VIEWPORT_SCISSOR,
|
||||||
API_TRAIT_CLEARS_WITH_COPY_ENGINE,
|
API_TRAIT_CLEARS_WITH_COPY_ENGINE,
|
||||||
|
API_TRAIT_USE_GENERAL_IN_COPY_QUEUES,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum ShaderChangeInvalidation {
|
enum ShaderChangeInvalidation {
|
||||||
|
Loading…
Reference in New Issue
Block a user