diff --git a/doc/classes/RenderingDevice.xml b/doc/classes/RenderingDevice.xml
index 6ec7918d3b2..3daa463f47b 100644
--- a/doc/classes/RenderingDevice.xml
+++ b/doc/classes/RenderingDevice.xml
@@ -191,6 +191,51 @@
Sets the push constant data to [param buffer] for the specified [param compute_list]. The shader determines how this binary data is used. The buffer's size in bytes must also be specified in [param size_bytes] (this can be obtained by calling the [method PackedByteArray.size] method on the passed [param buffer]).
+
+
+
+
+
+ Sets the signal semaphores explicitly for the specified [param compute_list]. For usage, refer to [method compute_list_set_wait_semaphores].
+
+
+
+
+
+
+
+ Sets the wait semaphores explicitly for the specified [param compute_list]. Before using it, some previously created compute lists must have an explicit signal semaphore.
+ [b]Note:[/b] A semaphore can only be used once as a signal semaphore and once as a wait semaphore before syncing.
+ A simple code example for this situation (code is not a complete example):
+ [codeblock]
+ var rd = RenderingDevice.new()
+ var prev_compute_list = rd.compute_list_begin()
+ var prev_semaphore1 = rd.semaphore_create()
+ var prev_semaphore2 = rd.semaphore_create()
+ rd.compute_list_set_signal_semaphores(prev_compute_list, [prev_semaphore1, prev_semaphore2])
+ rd.compute_list_end()
+
+ var cur_compute_list1 = rd.compute_list_begin()
+ var cur_semaphore1 = rd.semaphore_create()
+ rd.compute_list_set_wait_semaphores(cur_compute_list1, [prev_semaphore1])
+ rd.compute_list_set_signal_semaphores(cur_compute_list1, [cur_semaphore1])
+ # Replace with the custom code
+ rd.compute_list_end()
+
+ var cur_compute_list2 = rd.compute_list_begin()
+ var cur_semaphore2 = rd.semaphore_create()
+ rd.compute_list_set_wait_semaphores(cur_compute_list2, [prev_semaphore2])
+ rd.compute_list_set_signal_semaphores(cur_compute_list2, [cur_semaphore2])
+ # Replace with the custom code
+ rd.compute_list_end()
+
+ var next_compute_list = rd.compute_list_begin()
+ rd.compute_list_set_wait_semaphores(next_compute_list, [cur_semaphore1, cur_semaphore2])
+ # Replace with the custom code
+ rd.compute_list_end()
+ [/codeblock]
+
+
@@ -395,6 +440,22 @@
Sets the push constant data to [param buffer] for the specified [param draw_list]. The shader determines how this binary data is used. The buffer's size in bytes must also be specified in [param size_bytes] (this can be obtained by calling the [method PackedByteArray.size] method on the passed [param buffer]).
+
+
+
+
+
+ Sets the signal semaphores explicitly for the specified [param draw_list]. Its usage is similar to [method compute_list_set_signal_semaphores].
+
+
+
+
+
+
+
+ Sets the wait semaphores explicitly for the specified [param draw_list]. Its usage is similar to [method compute_list_set_wait_semaphores].
+
+
@@ -783,6 +844,27 @@
[b]Note:[/b] Only the main [RenderingDevice] returned by [method RenderingServer.get_rendering_device] has a width. If called on a local [RenderingDevice], this method prints an error and returns [constant INVALID_ID].
+
+
+
+ Creates a new semaphore. It can be accessed with the RID that is returned.
+ Once finished with your RID, you will want to free the RID using the RenderingDevice's [method free_rid] method.
+
+
+
+
+
+
+ Returns an RID for an existing [param semaphore] ([code]VkSemaphore[/code]). This can be used to allow foreign semaphores to participate in Godot's rendering pipeline.
+
+
+
+
+
+
+ Returns [code]true[/code] if the semaphore specified by the [param semaphore] RID is valid, [code]false[/code] otherwise.
+
+
@@ -1193,6 +1275,9 @@
- Vulkan: [code]VkPipeline[/code].
+
+ - Vulkan: [code]VkSemaphore[/code].
+
diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp
index 88d0ac28ebe..95bc00e7cbe 100644
--- a/drivers/d3d12/rendering_device_driver_d3d12.cpp
+++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp
@@ -2194,6 +2194,14 @@ RDD::SemaphoreID RenderingDeviceDriverD3D12::semaphore_create() {
return SemaphoreID(semaphore);
}
+RDD::SemaphoreID RenderingDeviceDriverD3D12::semaphore_create_from_extension(uint64_t p_native_semaphore) {
+ ComPtr d3d_fence((ID3D12Fence *)p_native_semaphore);
+
+ SemaphoreInfo *semaphore = memnew(SemaphoreInfo);
+ semaphore->d3d_fence = d3d_fence;
+ return SemaphoreID(semaphore);
+}
+
void RenderingDeviceDriverD3D12::semaphore_free(SemaphoreID p_semaphore) {
SemaphoreInfo *semaphore = (SemaphoreInfo *)(p_semaphore.id);
memdelete(semaphore);
@@ -6122,6 +6130,10 @@ void RenderingDeviceDriverD3D12::set_object_name(ObjectType p_type, ID p_driver_
const PipelineInfo *pipeline_info = (const PipelineInfo *)p_driver_id.id;
_set_object_name(pipeline_info->pso, p_name);
} break;
+ case OBJECT_TYPE_SEMAPHORE: {
+ const SemaphoreInfo *semaphore_info = (const SemaphoreInfo *)p_driver_id.id;
+ _set_object_name(semaphore_info->d3d_fence.Get(), p_name);
+ } break;
default: {
DEV_ASSERT(false);
}
@@ -6167,6 +6179,13 @@ uint64_t RenderingDeviceDriverD3D12::get_resource_native_handle(DriverResource p
case DRIVER_RESOURCE_COMPUTE_PIPELINE:
case DRIVER_RESOURCE_RENDER_PIPELINE: {
return p_driver_id.id;
+ } break;
+ case DRIVER_RESOURCE_SEMAPHORE: {
+ // Note: Here only the fence of d3d12 is returned.
+ // When using this resource explicitly, we need to clearly understand the changes of the `fence_value`
+ // inside the engine during the wait and signal process.
+ const SemaphoreInfo *semaphore_info = (const SemaphoreInfo *)p_driver_id.id;
+ return reinterpret_cast(semaphore_info->d3d_fence.Get());
}
default: {
return 0;
diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h
index 50d0f2cddf8..dc5eb5a30bc 100644
--- a/drivers/d3d12/rendering_device_driver_d3d12.h
+++ b/drivers/d3d12/rendering_device_driver_d3d12.h
@@ -404,6 +404,7 @@ private:
};
virtual SemaphoreID semaphore_create() override;
+ virtual SemaphoreID semaphore_create_from_extension(uint64_t p_native_semaphore) override final;
virtual void semaphore_free(SemaphoreID p_semaphore) override;
/******************/
diff --git a/drivers/metal/rendering_device_driver_metal.h b/drivers/metal/rendering_device_driver_metal.h
index 5ac1bab4c1b..3ffbe29f380 100644
--- a/drivers/metal/rendering_device_driver_metal.h
+++ b/drivers/metal/rendering_device_driver_metal.h
@@ -169,6 +169,7 @@ public:
public:
virtual SemaphoreID semaphore_create() override final;
+ virtual SemaphoreID semaphore_create_from_extension(uint64_t p_native_semaphore) override final;
virtual void semaphore_free(SemaphoreID p_semaphore) override final;
#pragma mark - Commands
diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm
index 2e32e99c2eb..3c6d891920a 100644
--- a/drivers/metal/rendering_device_driver_metal.mm
+++ b/drivers/metal/rendering_device_driver_metal.mm
@@ -840,6 +840,11 @@ RDD::SemaphoreID RenderingDeviceDriverMetal::semaphore_create() {
return SemaphoreID(1);
}
+RDD::SemaphoreID RenderingDeviceDriverMetal::semaphore_create_from_extension(uint64_t p_native_semaphore) {
+ // TODO: Use a wrapper of MTLFence as Metal Semaphore.
+ return SemaphoreID(1);
+}
+
void RenderingDeviceDriverMetal::semaphore_free(SemaphoreID p_semaphore) {
}
@@ -3833,6 +3838,9 @@ void RenderingDeviceDriverMetal::set_object_name(ObjectType p_type, ID p_driver_
case OBJECT_TYPE_PIPELINE: {
// Can't set label after creation.
} break;
+ case OBJECT_TYPE_SEMAPHORE: {
+ // Semaphore is not currently implemented on the Metal backend
+ } break;
default: {
DEV_ASSERT(false);
}
@@ -3882,6 +3890,9 @@ uint64_t RenderingDeviceDriverMetal::get_resource_native_handle(DriverResource p
MDRenderPipeline *pipeline = (MDRenderPipeline *)(p_driver_id.id);
return (uint64_t)(uintptr_t)(__bridge void *)pipeline->state;
}
+ case DRIVER_RESOURCE_SEMAPHORE: {
+ return p_driver_id.id;
+ }
default: {
return 0;
}
diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp
index 45c574914d6..253c75bf20d 100644
--- a/drivers/vulkan/rendering_device_driver_vulkan.cpp
+++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp
@@ -515,6 +515,17 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() {
_register_requested_device_extension(VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME, false);
_register_requested_device_extension(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, false);
+ // Enable external memory and synchronization to facilitate the use of `texture_create_from_extension` and `semaphore_create_from_extension`.
+ _register_requested_device_extension(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME, false);
+ _register_requested_device_extension(VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME, false);
+#ifdef _WIN64
+ _register_requested_device_extension(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, false);
+ _register_requested_device_extension(VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, false);
+#else
+ _register_requested_device_extension(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, false);
+ _register_requested_device_extension(VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, false);
+#endif
+
if (Engine::get_singleton()->is_generate_spirv_debug_info_enabled()) {
_register_requested_device_extension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, true);
}
@@ -2424,6 +2435,10 @@ RDD::SemaphoreID RenderingDeviceDriverVulkan::semaphore_create() {
return SemaphoreID(semaphore);
}
+RDD::SemaphoreID RenderingDeviceDriverVulkan::semaphore_create_from_extension(uint64_t p_native_semaphore) {
+ return SemaphoreID((VkSemaphore)p_native_semaphore);
+}
+
void RenderingDeviceDriverVulkan::semaphore_free(SemaphoreID p_semaphore) {
vkDestroySemaphore(vk_device, VkSemaphore(p_semaphore.id), VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE));
}
@@ -5732,6 +5747,9 @@ void RenderingDeviceDriverVulkan::set_object_name(ObjectType p_type, ID p_driver
case OBJECT_TYPE_PIPELINE: {
_set_object_name(VK_OBJECT_TYPE_PIPELINE, (uint64_t)p_driver_id.id, p_name);
} break;
+ case OBJECT_TYPE_SEMAPHORE: {
+ _set_object_name(VK_OBJECT_TYPE_SEMAPHORE, (uint64_t)p_driver_id.id, p_name);
+ } break;
default: {
DEV_ASSERT(false);
}
@@ -5772,7 +5790,8 @@ uint64_t RenderingDeviceDriverVulkan::get_resource_native_handle(DriverResource
case DRIVER_RESOURCE_UNIFORM_SET:
case DRIVER_RESOURCE_BUFFER:
case DRIVER_RESOURCE_COMPUTE_PIPELINE:
- case DRIVER_RESOURCE_RENDER_PIPELINE: {
+ case DRIVER_RESOURCE_RENDER_PIPELINE:
+ case DRIVER_RESOURCE_SEMAPHORE: {
return p_driver_id.id;
}
default: {
diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h
index ea12450a4cd..3dadb0360ef 100644
--- a/drivers/vulkan/rendering_device_driver_vulkan.h
+++ b/drivers/vulkan/rendering_device_driver_vulkan.h
@@ -296,6 +296,7 @@ public:
/********************/
virtual SemaphoreID semaphore_create() override final;
+ virtual SemaphoreID semaphore_create_from_extension(uint64_t p_native_semaphore) override final;
virtual void semaphore_free(SemaphoreID p_semaphore) override final;
/******************/
diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp
index 9716a42c97b..8ad15be4f59 100644
--- a/servers/rendering/rendering_device.cpp
+++ b/servers/rendering/rendering_device.cpp
@@ -4086,6 +4086,34 @@ bool RenderingDevice::compute_pipeline_is_valid(RID p_pipeline) {
return compute_pipeline_owner.owns(p_pipeline);
}
+/*******************/
+/**** SEMAPHORE ****/
+/*******************/
+
+RID RenderingDevice::semaphore_create() {
+ ERR_FAIL_COND_V_MSG(::OS::get_singleton()->get_current_rendering_driver_name() == "metal", RID(), "The current metal backend does not use Semaphore.");
+ Semaphore semaphore;
+ semaphore.driver_id = driver->semaphore_create();
+ RID id = semaphore_owner.make_rid(semaphore);
+
+ return id;
+}
+
+RID RenderingDevice::semaphore_create_from_extension(uint64_t p_image) {
+ ERR_FAIL_COND_V_MSG(::OS::get_singleton()->get_current_rendering_driver_name() == "metal", RID(), "The current metal backend does not use Semaphore.");
+ Semaphore semaphore;
+ semaphore.driver_id = driver->semaphore_create();
+ RID id = semaphore_owner.make_rid(semaphore);
+
+ return id;
+}
+
+bool RenderingDevice::semaphore_is_valid(RID p_semaphore) {
+ _THREAD_SAFE_METHOD_
+
+ return semaphore_owner.owns(p_semaphore);
+}
+
/****************/
/**** SCREEN ****/
/****************/
@@ -4636,6 +4664,38 @@ void RenderingDevice::draw_list_set_push_constant(DrawListID p_list, const void
#endif
}
+void RenderingDevice::draw_list_set_signal_semaphores(DrawListID p_list, const TypedArray &p_signal_semaphores) {
+ ERR_RENDER_THREAD_GUARD();
+
+ DrawList *dl = _get_draw_list_ptr(p_list);
+ ERR_FAIL_NULL(dl);
+
+ LocalVector signal_semaphores;
+ for (int i = 0; i < p_signal_semaphores.size(); i++) {
+ const Semaphore *signal_semaphore = semaphore_owner.get_or_null(p_signal_semaphores[i]);
+ ERR_FAIL_NULL(signal_semaphore);
+ signal_semaphores.push_back(signal_semaphore->driver_id);
+ }
+
+ draw_graph.add_draw_list_set_signal_semaphores(signal_semaphores);
+}
+
+void RenderingDevice::draw_list_set_wait_semaphores(DrawListID p_list, const TypedArray &p_wait_semaphores) {
+ ERR_RENDER_THREAD_GUARD();
+
+ DrawList *dl = _get_draw_list_ptr(p_list);
+ ERR_FAIL_NULL(dl);
+
+ LocalVector wait_semaphores;
+ for (int i = 0; i < p_wait_semaphores.size(); i++) {
+ const Semaphore *wait_semaphore = semaphore_owner.get_or_null(p_wait_semaphores[i]);
+ ERR_FAIL_NULL(wait_semaphore);
+ wait_semaphores.push_back(wait_semaphore->driver_id);
+ }
+
+ draw_graph.add_draw_list_set_wait_semaphores(wait_semaphores);
+}
+
void RenderingDevice::draw_list_draw(DrawListID p_list, bool p_use_indices, uint32_t p_instances, uint32_t p_procedural_vertices) {
ERR_RENDER_THREAD_GUARD();
@@ -5201,6 +5261,46 @@ void RenderingDevice::compute_list_set_push_constant(ComputeListID p_list, const
#endif
}
+void RenderingDevice::compute_list_set_signal_semaphores(ComputeListID p_list, const TypedArray &p_signal_semaphores) {
+ ERR_RENDER_THREAD_GUARD();
+
+ ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
+ ERR_FAIL_NULL(compute_list);
+ ComputeList *cl = compute_list;
+
+ LocalVector signal_semaphores;
+ for (int i = 0; i < p_signal_semaphores.size(); i++) {
+ const Semaphore *signal_semaphore = semaphore_owner.get_or_null(p_signal_semaphores[i]);
+ ERR_FAIL_NULL(signal_semaphore);
+ signal_semaphores.push_back(signal_semaphore->driver_id);
+ }
+
+ draw_graph.add_compute_list_set_signal_semaphores(signal_semaphores);
+
+ // Store it in the state in case we need to restart the compute list.
+ cl->state.signal_semaphores = p_signal_semaphores;
+}
+
+void RenderingDevice::compute_list_set_wait_semaphores(ComputeListID p_list, const TypedArray &p_wait_semaphores) {
+ ERR_RENDER_THREAD_GUARD();
+
+ ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
+ ERR_FAIL_NULL(compute_list);
+ ComputeList *cl = compute_list;
+
+ LocalVector wait_semaphores;
+ for (int i = 0; i < p_wait_semaphores.size(); i++) {
+ const Semaphore *wait_semaphore = semaphore_owner.get_or_null(p_wait_semaphores[i]);
+ ERR_FAIL_NULL(wait_semaphore);
+ wait_semaphores.push_back(wait_semaphore->driver_id);
+ }
+
+ draw_graph.add_compute_list_set_wait_semaphores(wait_semaphores);
+
+ // Store it in the state in case we need to restart the compute list.
+ cl->state.wait_semaphores = p_wait_semaphores;
+}
+
void RenderingDevice::compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
ERR_RENDER_THREAD_GUARD();
@@ -5483,6 +5583,9 @@ void RenderingDevice::compute_list_add_barrier(ComputeListID p_list) {
compute_list_bind_compute_pipeline(p_list, compute_list_barrier_state.pipeline);
}
+ ERR_FAIL_COND_MSG(compute_list_barrier_state.signal_semaphores.size() > 0, "Barriers cannot be added to a compute list that contains explicit signal semaphores.");
+ ERR_FAIL_COND_MSG(compute_list_barrier_state.wait_semaphores.size() > 0, "Barriers cannot be added to a compute list that contains explicit wait semaphores.");
+
for (uint32_t i = 0; i < compute_list_barrier_state.set_count; i++) {
if (compute_list_barrier_state.sets[i].uniform_set.is_valid()) {
compute_list_bind_uniform_set(p_list, compute_list_barrier_state.sets[i].uniform_set, i);
@@ -6099,6 +6202,10 @@ void RenderingDevice::_free_internal(RID p_id) {
ComputePipeline *pipeline = compute_pipeline_owner.get_or_null(p_id);
frames[frame].compute_pipelines_to_dispose_of.push_back(*pipeline);
compute_pipeline_owner.free(p_id);
+ } else if (semaphore_owner.owns(p_id)) {
+ Semaphore *semaphore = semaphore_owner.get_or_null(p_id);
+ frames[frame].semaphores_to_dispose_of.push_back(*semaphore);
+ semaphore_owner.free(p_id);
} else {
#ifdef DEV_ENABLED
ERR_PRINT("Attempted to free invalid ID: " + itos(p_id.get_id()) + " " + resource_name);
@@ -6151,6 +6258,9 @@ void RenderingDevice::set_resource_name(RID p_id, const String &p_name) {
} else if (compute_pipeline_owner.owns(p_id)) {
ComputePipeline *pipeline = compute_pipeline_owner.get_or_null(p_id);
driver->set_object_name(RDD::OBJECT_TYPE_PIPELINE, pipeline->driver_id, p_name);
+ } else if (semaphore_owner.owns(p_id)) {
+ Semaphore *semaphore = semaphore_owner.get_or_null(p_id);
+ driver->set_object_name(RDD::OBJECT_TYPE_SEMAPHORE, semaphore->driver_id, p_name);
} else {
ERR_PRINT("Attempted to name invalid ID: " + itos(p_id.get_id()));
return;
@@ -6243,6 +6353,15 @@ void RenderingDevice::sync() {
void RenderingDevice::_free_pending_resources(int p_frame) {
// Free in dependency usage order, so nothing weird happens.
+ // Semaphores
+ while (frames[p_frame].semaphores_to_dispose_of.front()) {
+ Semaphore *semaphore = &frames[p_frame].semaphores_to_dispose_of.front()->get();
+
+ driver->semaphore_free(semaphore->driver_id);
+
+ frames[p_frame].semaphores_to_dispose_of.pop_front();
+ }
+
// Pipelines.
while (frames[p_frame].render_pipelines_to_dispose_of.front()) {
RenderPipeline *pipeline = &frames[p_frame].render_pipelines_to_dispose_of.front()->get();
@@ -6432,10 +6551,31 @@ void RenderingDevice::execute_chained_cmds(bool p_present_swap_chain, RenderingD
thread_local LocalVector wait_semaphores;
wait_semaphores = frames[frame].semaphores_to_wait_on;
+ uint32_t semaphores_list_count = buffer_pool.signal_semaphores_list.size();
+ if (semaphores_list_count < command_buffer_count) {
+ semaphores_list_count = command_buffer_count;
+ }
+ buffer_pool.signal_semaphores_list.resize(semaphores_list_count);
+ buffer_pool.wait_semaphores_list.resize(semaphores_list_count);
+
+ bool has_explicit_semaphores = false;
+ for (uint32_t i = 0; i < semaphores_list_count; i++) {
+ const LocalVector &_signal_semaphores = buffer_pool.signal_semaphores_list[i];
+ if (_signal_semaphores.size() > 0) {
+ has_explicit_semaphores = true;
+ break;
+ }
+ }
+ has_explicit_semaphores = has_explicit_semaphores && command_buffer_count == semaphores_list_count;
+
for (uint32_t i = 0; i < command_buffer_count; i++) {
RDD::CommandBufferID command_buffer;
RDD::SemaphoreID signal_semaphore;
RDD::FenceID signal_fence;
+
+ LocalVector signal_semaphores;
+ const LocalVector &_signal_semaphores = buffer_pool.signal_semaphores_list[i];
+
if (i > 0) {
command_buffer = buffer_pool.buffers[i - 1];
} else {
@@ -6455,14 +6595,26 @@ void RenderingDevice::execute_chained_cmds(bool p_present_swap_chain, RenderingD
signal_semaphore = buffer_pool.semaphores[i];
// Semaphores always need to be signaled if it's not the last command buffer.
}
+ if (has_explicit_semaphores && _signal_semaphores.size() > 0) {
+ signal_semaphores = _signal_semaphores;
+ buffer_pool.signal_semaphores_list[i].resize(0);
+ } else if (signal_semaphore) {
+ signal_semaphores.push_back(signal_semaphore);
+ }
driver->command_queue_execute_and_present(main_queue, wait_semaphores, command_buffer,
- signal_semaphore ? signal_semaphore : VectorView(), signal_fence,
+ signal_semaphores, signal_fence,
swap_chains);
- // Make the next command buffer wait on the semaphore signaled by this one.
- wait_semaphores.resize(1);
- wait_semaphores[0] = signal_semaphore;
+ const LocalVector &_wait_semaphores = buffer_pool.wait_semaphores_list[i];
+ if (has_explicit_semaphores && _wait_semaphores.size() > 0) {
+ wait_semaphores = _wait_semaphores;
+ buffer_pool.wait_semaphores_list[i].resize(0);
+ } else {
+ // Make the next command buffer wait on the semaphore signaled by this one.
+ wait_semaphores.resize(1);
+ wait_semaphores[0] = signal_semaphore;
+ }
}
frames[frame].semaphores_to_wait_on.clear();
@@ -7000,6 +7152,12 @@ uint64_t RenderingDevice::get_driver_resource(DriverResource p_resource, RID p_r
driver_id = render_pipeline->driver_id.id;
} break;
+ case DRIVER_RESOURCE_SEMAPHORE: {
+ Semaphore *semaphore = semaphore_owner.get_or_null(p_rid);
+ ERR_FAIL_NULL_V(semaphore, 0);
+
+ driver_id = semaphore->driver_id.id;
+ } break;
default: {
ERR_FAIL_V(0);
} break;
@@ -7099,6 +7257,7 @@ void RenderingDevice::finalize() {
draw_graph.finalize();
// Free all resources.
+ _free_rids(semaphore_owner, "Semaphore");
_free_rids(render_pipeline_owner, "Pipeline");
_free_rids(compute_pipeline_owner, "Compute");
_free_rids(uniform_set_owner, "UniformSet");
@@ -7163,6 +7322,7 @@ void RenderingDevice::finalize() {
RDG::CommandBufferPool &buffer_pool = frames[i].command_buffer_pool;
for (uint32_t j = 0; j < buffer_pool.buffers.size(); j++) {
driver->semaphore_free(buffer_pool.semaphores[j]);
+ // For explicitly set wait_semaphores and signal_semaphores, they need to be free manually.
}
for (uint32_t j = 0; j < frames[i].transfer_worker_semaphores.size(); j++) {
@@ -7329,6 +7489,9 @@ void RenderingDevice::_bind_methods() {
ClassDB::bind_method(D_METHOD("compute_pipeline_create", "shader", "specialization_constants"), &RenderingDevice::_compute_pipeline_create, DEFVAL(TypedArray()));
ClassDB::bind_method(D_METHOD("compute_pipeline_is_valid", "compute_pipeline"), &RenderingDevice::compute_pipeline_is_valid);
+ ClassDB::bind_method(D_METHOD("semaphore_create"), &RenderingDevice::semaphore_create);
+ ClassDB::bind_method(D_METHOD("semaphore_is_valid", "semaphore"), &RenderingDevice::semaphore_is_valid);
+
ClassDB::bind_method(D_METHOD("screen_get_width", "screen"), &RenderingDevice::screen_get_width, DEFVAL(DisplayServer::MAIN_WINDOW_ID));
ClassDB::bind_method(D_METHOD("screen_get_height", "screen"), &RenderingDevice::screen_get_height, DEFVAL(DisplayServer::MAIN_WINDOW_ID));
ClassDB::bind_method(D_METHOD("screen_get_framebuffer_format", "screen"), &RenderingDevice::screen_get_framebuffer_format, DEFVAL(DisplayServer::MAIN_WINDOW_ID));
@@ -7346,6 +7509,8 @@ void RenderingDevice::_bind_methods() {
ClassDB::bind_method(D_METHOD("draw_list_bind_vertex_array", "draw_list", "vertex_array"), &RenderingDevice::draw_list_bind_vertex_array);
ClassDB::bind_method(D_METHOD("draw_list_bind_index_array", "draw_list", "index_array"), &RenderingDevice::draw_list_bind_index_array);
ClassDB::bind_method(D_METHOD("draw_list_set_push_constant", "draw_list", "buffer", "size_bytes"), &RenderingDevice::_draw_list_set_push_constant);
+ ClassDB::bind_method(D_METHOD("draw_list_set_signal_semaphores", "draw_list", "signal_semaphores"), &RenderingDevice::draw_list_set_signal_semaphores);
+ ClassDB::bind_method(D_METHOD("draw_list_set_wait_semaphores", "draw_list", "wait_semaphores"), &RenderingDevice::draw_list_set_wait_semaphores);
ClassDB::bind_method(D_METHOD("draw_list_draw", "draw_list", "use_indices", "instances", "procedural_vertex_count"), &RenderingDevice::draw_list_draw, DEFVAL(0));
ClassDB::bind_method(D_METHOD("draw_list_draw_indirect", "draw_list", "use_indices", "buffer", "offset", "draw_count", "stride"), &RenderingDevice::draw_list_draw_indirect, DEFVAL(0), DEFVAL(1), DEFVAL(0));
@@ -7363,6 +7528,8 @@ void RenderingDevice::_bind_methods() {
ClassDB::bind_method(D_METHOD("compute_list_begin"), &RenderingDevice::compute_list_begin);
ClassDB::bind_method(D_METHOD("compute_list_bind_compute_pipeline", "compute_list", "compute_pipeline"), &RenderingDevice::compute_list_bind_compute_pipeline);
ClassDB::bind_method(D_METHOD("compute_list_set_push_constant", "compute_list", "buffer", "size_bytes"), &RenderingDevice::_compute_list_set_push_constant);
+ ClassDB::bind_method(D_METHOD("compute_list_set_signal_semaphores", "compute_list", "signal_semaphores"), &RenderingDevice::compute_list_set_signal_semaphores);
+ ClassDB::bind_method(D_METHOD("compute_list_set_wait_semaphores", "compute_list", "wait_semaphores"), &RenderingDevice::compute_list_set_wait_semaphores);
ClassDB::bind_method(D_METHOD("compute_list_bind_uniform_set", "compute_list", "uniform_set", "set_index"), &RenderingDevice::compute_list_bind_uniform_set);
ClassDB::bind_method(D_METHOD("compute_list_dispatch", "compute_list", "x_groups", "y_groups", "z_groups"), &RenderingDevice::compute_list_dispatch);
ClassDB::bind_method(D_METHOD("compute_list_dispatch_indirect", "compute_list", "buffer", "offset"), &RenderingDevice::compute_list_dispatch_indirect);
@@ -7441,6 +7608,7 @@ void RenderingDevice::_bind_methods() {
BIND_ENUM_CONSTANT(DRIVER_RESOURCE_BUFFER);
BIND_ENUM_CONSTANT(DRIVER_RESOURCE_COMPUTE_PIPELINE);
BIND_ENUM_CONSTANT(DRIVER_RESOURCE_RENDER_PIPELINE);
+ BIND_ENUM_CONSTANT(DRIVER_RESOURCE_SEMAPHORE);
#ifndef DISABLE_DEPRECATED
BIND_ENUM_CONSTANT(DRIVER_RESOURCE_VULKAN_DEVICE);
BIND_ENUM_CONSTANT(DRIVER_RESOURCE_VULKAN_PHYSICAL_DEVICE);
diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h
index 10723392a1e..0ff5d5f9693 100644
--- a/servers/rendering/rendering_device.h
+++ b/servers/rendering/rendering_device.h
@@ -1138,6 +1138,23 @@ public:
RID compute_pipeline_create(RID p_shader, const Vector &p_specialization_constants = Vector());
bool compute_pipeline_is_valid(RID p_pipeline);
+ /*******************/
+ /**** SEMAPHORE ****/
+ /*******************/
+
+private:
+ struct Semaphore {
+ RDD::SemaphoreID driver_id;
+ };
+
+ RID_Owner semaphore_owner;
+
+public:
+ RID semaphore_create();
+ RID semaphore_create_from_extension(uint64_t p_image);
+
+ bool semaphore_is_valid(RID p_semaphore);
+
private:
/****************/
/**** SCREEN ****/
@@ -1279,6 +1296,8 @@ public:
void draw_list_bind_index_array(DrawListID p_list, RID p_index_array);
void draw_list_set_line_width(DrawListID p_list, float p_width);
void draw_list_set_push_constant(DrawListID p_list, const void *p_data, uint32_t p_data_size);
+ void draw_list_set_signal_semaphores(DrawListID p_list, const TypedArray &p_signal_semaphores);
+ void draw_list_set_wait_semaphores(DrawListID p_list, const TypedArray &p_wait_semaphores);
void draw_list_draw(DrawListID p_list, bool p_use_indices, uint32_t p_instances = 1, uint32_t p_procedural_vertices = 0);
void draw_list_draw_indirect(DrawListID p_list, bool p_use_indices, RID p_buffer, uint32_t p_offset = 0, uint32_t p_draw_count = 1, uint32_t p_stride = 0);
@@ -1316,6 +1335,8 @@ private:
uint32_t local_group_size[3] = { 0, 0, 0 };
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE] = {};
uint32_t push_constant_size = 0;
+ TypedArray signal_semaphores;
+ TypedArray wait_semaphores;
uint32_t dispatch_count = 0;
} state;
@@ -1343,6 +1364,8 @@ public:
void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline);
void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index);
void compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size);
+ void compute_list_set_signal_semaphores(ComputeListID p_list, const TypedArray &p_signal_semaphores);
+ void compute_list_set_wait_semaphores(ComputeListID p_list, const TypedArray &p_wait_semaphores);
void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups);
void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads);
void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset);
@@ -1453,6 +1476,7 @@ private:
List uniform_sets_to_dispose_of;
List render_pipelines_to_dispose_of;
List compute_pipelines_to_dispose_of;
+ List semaphores_to_dispose_of;
// Pending asynchronous data transfer for buffers.
LocalVector download_buffer_staging_buffers;
diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h
index 67946fc4875..84e3562aa73 100644
--- a/servers/rendering/rendering_device_commons.h
+++ b/servers/rendering/rendering_device_commons.h
@@ -809,6 +809,7 @@ public:
DRIVER_RESOURCE_BUFFER,
DRIVER_RESOURCE_COMPUTE_PIPELINE,
DRIVER_RESOURCE_RENDER_PIPELINE,
+ DRIVER_RESOURCE_SEMAPHORE,
#ifndef DISABLE_DEPRECATED
DRIVER_RESOURCE_VULKAN_DEVICE = DRIVER_RESOURCE_LOGICAL_DEVICE,
DRIVER_RESOURCE_VULKAN_PHYSICAL_DEVICE = DRIVER_RESOURCE_PHYSICAL_DEVICE,
diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h
index 1f65cdcaa9a..8d58f8fe276 100644
--- a/servers/rendering/rendering_device_driver.h
+++ b/servers/rendering/rendering_device_driver.h
@@ -404,6 +404,7 @@ public:
/********************/
virtual SemaphoreID semaphore_create() = 0;
+ virtual SemaphoreID semaphore_create_from_extension(uint64_t p_native_semaphore) = 0;
virtual void semaphore_free(SemaphoreID p_semaphore) = 0;
/*************************/
@@ -777,6 +778,7 @@ public:
OBJECT_TYPE_SHADER,
OBJECT_TYPE_UNIFORM_SET,
OBJECT_TYPE_PIPELINE,
+ OBJECT_TYPE_SEMAPHORE,
};
struct MultiviewCapabilities {
diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp
index ae91e1b3e4a..9b2bdaeadd0 100644
--- a/servers/rendering/rendering_device_graph.cpp
+++ b/servers/rendering/rendering_device_graph.cpp
@@ -733,6 +733,54 @@ void RenderingDeviceGraph::_add_buffer_barrier_to_command(RDD::BufferID p_buffer
}
#endif
+const RenderingDeviceGraph::ComputeListInstruction *RenderingDeviceGraph::_get_compute_list_command_instruction(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size, ComputeListInstruction::Type p_type) {
+ uint32_t instruction_data_cursor = 0;
+ while (instruction_data_cursor < p_instruction_data_size) {
+ DEV_ASSERT((instruction_data_cursor + sizeof(ComputeListInstruction)) <= p_instruction_data_size);
+
+ const ComputeListInstruction *instruction = reinterpret_cast(&p_instruction_data[instruction_data_cursor]);
+ if (instruction->type == p_type) {
+ return instruction;
+ }
+ switch (instruction->type) {
+ case ComputeListInstruction::TYPE_BIND_PIPELINE: {
+ instruction_data_cursor += sizeof(ComputeListBindPipelineInstruction);
+ } break;
+ case ComputeListInstruction::TYPE_BIND_UNIFORM_SETS: {
+ const ComputeListBindUniformSetsInstruction *bind_uniform_sets_instruction = reinterpret_cast(instruction);
+ instruction_data_cursor += sizeof(ComputeListBindUniformSetsInstruction) + sizeof(RDD::UniformSetID) * bind_uniform_sets_instruction->set_count;
+ } break;
+ case ComputeListInstruction::TYPE_DISPATCH: {
+ instruction_data_cursor += sizeof(ComputeListDispatchInstruction);
+ } break;
+ case ComputeListInstruction::TYPE_DISPATCH_INDIRECT: {
+ instruction_data_cursor += sizeof(ComputeListDispatchIndirectInstruction);
+ } break;
+ case ComputeListInstruction::TYPE_SET_PUSH_CONSTANT: {
+ const ComputeListSetPushConstantInstruction *set_push_constant_instruction = reinterpret_cast(instruction);
+ const VectorView push_constant_data_view(reinterpret_cast(set_push_constant_instruction->data()), set_push_constant_instruction->size / sizeof(uint32_t));
+ instruction_data_cursor += sizeof(ComputeListSetPushConstantInstruction);
+ instruction_data_cursor += set_push_constant_instruction->size;
+ } break;
+ case ComputeListInstruction::TYPE_SET_SIGNAL_SEMAPHORES: {
+ const ComputeListSemaphoresInstruction *signal_semaphore_instruction = reinterpret_cast(instruction);
+ instruction_data_cursor += sizeof(ComputeListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * signal_semaphore_instruction->count;
+ } break;
+ case ComputeListInstruction::TYPE_SET_WAIT_SEMAPHORES: {
+ const ComputeListSemaphoresInstruction *wait_semaphore_instruction = reinterpret_cast(instruction);
+ instruction_data_cursor += sizeof(ComputeListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * wait_semaphore_instruction->count;
+ } break;
+ case ComputeListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: {
+ instruction_data_cursor += sizeof(ComputeListUniformSetPrepareForUseInstruction);
+ } break;
+ default:
+ DEV_ASSERT(false && "Unknown compute list instruction type.");
+ return nullptr;
+ }
+ }
+ return nullptr;
+}
+
void RenderingDeviceGraph::_run_compute_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) {
uint32_t instruction_data_cursor = 0;
while (instruction_data_cursor < p_instruction_data_size) {
@@ -767,6 +815,14 @@ void RenderingDeviceGraph::_run_compute_list_command(RDD::CommandBufferID p_comm
instruction_data_cursor += sizeof(ComputeListSetPushConstantInstruction);
instruction_data_cursor += set_push_constant_instruction->size;
} break;
+ case ComputeListInstruction::TYPE_SET_SIGNAL_SEMAPHORES: {
+ const ComputeListSemaphoresInstruction *signal_semaphore_instruction = reinterpret_cast(instruction);
+ instruction_data_cursor += sizeof(ComputeListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * signal_semaphore_instruction->count;
+ } break;
+ case ComputeListInstruction::TYPE_SET_WAIT_SEMAPHORES: {
+ const ComputeListSemaphoresInstruction *wait_semaphore_instruction = reinterpret_cast(instruction);
+ instruction_data_cursor += sizeof(ComputeListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * wait_semaphore_instruction->count;
+ } break;
case ComputeListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: {
const ComputeListUniformSetPrepareForUseInstruction *uniform_set_prepare_for_use_instruction = reinterpret_cast(instruction);
driver->command_uniform_set_prepare_for_use(p_command_buffer, uniform_set_prepare_for_use_instruction->uniform_set, uniform_set_prepare_for_use_instruction->shader, uniform_set_prepare_for_use_instruction->set_index);
@@ -811,6 +867,92 @@ void RenderingDeviceGraph::_get_draw_list_render_pass_and_framebuffer(const Reco
r_framebuffer = it->value.framebuffer;
}
+const RenderingDeviceGraph::DrawListInstruction *RenderingDeviceGraph::_get_draw_list_command_instruction(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size, DrawListInstruction::Type p_type) {
+ uint32_t instruction_data_cursor = 0;
+ while (instruction_data_cursor < p_instruction_data_size) {
+ DEV_ASSERT((instruction_data_cursor + sizeof(DrawListInstruction)) <= p_instruction_data_size);
+
+ const DrawListInstruction *instruction = reinterpret_cast(&p_instruction_data[instruction_data_cursor]);
+ if (instruction->type == p_type) {
+ return instruction;
+ }
+ switch (instruction->type) {
+ case DrawListInstruction::TYPE_BIND_INDEX_BUFFER: {
+ instruction_data_cursor += sizeof(DrawListBindIndexBufferInstruction);
+ } break;
+ case DrawListInstruction::TYPE_BIND_PIPELINE: {
+ instruction_data_cursor += sizeof(DrawListBindPipelineInstruction);
+ } break;
+ case DrawListInstruction::TYPE_BIND_UNIFORM_SETS: {
+ const DrawListBindUniformSetsInstruction *bind_uniform_sets_instruction = reinterpret_cast(instruction);
+ instruction_data_cursor += sizeof(DrawListBindUniformSetsInstruction) + sizeof(RDD::UniformSetID) * bind_uniform_sets_instruction->set_count;
+ } break;
+ case DrawListInstruction::TYPE_BIND_VERTEX_BUFFERS: {
+ const DrawListBindVertexBuffersInstruction *bind_vertex_buffers_instruction = reinterpret_cast(instruction);
+ instruction_data_cursor += sizeof(DrawListBindVertexBuffersInstruction);
+ instruction_data_cursor += sizeof(RDD::BufferID) * bind_vertex_buffers_instruction->vertex_buffers_count;
+ instruction_data_cursor += sizeof(uint64_t) * bind_vertex_buffers_instruction->vertex_buffers_count;
+ } break;
+ case DrawListInstruction::TYPE_CLEAR_ATTACHMENTS: {
+ const DrawListClearAttachmentsInstruction *clear_attachments_instruction = reinterpret_cast(instruction);
+ instruction_data_cursor += sizeof(DrawListClearAttachmentsInstruction);
+ instruction_data_cursor += sizeof(RDD::AttachmentClear) * clear_attachments_instruction->attachments_clear_count;
+ instruction_data_cursor += sizeof(Rect2i) * clear_attachments_instruction->attachments_clear_rect_count;
+ } break;
+ case DrawListInstruction::TYPE_DRAW: {
+ instruction_data_cursor += sizeof(DrawListDrawInstruction);
+ } break;
+ case DrawListInstruction::TYPE_DRAW_INDEXED: {
+ instruction_data_cursor += sizeof(DrawListDrawIndexedInstruction);
+ } break;
+ case DrawListInstruction::TYPE_DRAW_INDIRECT: {
+ instruction_data_cursor += sizeof(DrawListDrawIndirectInstruction);
+ } break;
+ case DrawListInstruction::TYPE_DRAW_INDEXED_INDIRECT: {
+ instruction_data_cursor += sizeof(DrawListDrawIndexedIndirectInstruction);
+ } break;
+ case DrawListInstruction::TYPE_EXECUTE_COMMANDS: {
+ instruction_data_cursor += sizeof(DrawListExecuteCommandsInstruction);
+ } break;
+ case DrawListInstruction::TYPE_NEXT_SUBPASS: {
+ instruction_data_cursor += sizeof(DrawListNextSubpassInstruction);
+ } break;
+ case DrawListInstruction::TYPE_SET_BLEND_CONSTANTS: {
+ instruction_data_cursor += sizeof(DrawListSetBlendConstantsInstruction);
+ } break;
+ case DrawListInstruction::TYPE_SET_LINE_WIDTH: {
+ instruction_data_cursor += sizeof(DrawListSetLineWidthInstruction);
+ } break;
+ case DrawListInstruction::TYPE_SET_PUSH_CONSTANT: {
+ const DrawListSetPushConstantInstruction *set_push_constant_instruction = reinterpret_cast(instruction);
+ instruction_data_cursor += sizeof(DrawListSetPushConstantInstruction);
+ instruction_data_cursor += set_push_constant_instruction->size;
+ } break;
+ case DrawListInstruction::TYPE_SET_SCISSOR: {
+ instruction_data_cursor += sizeof(DrawListSetScissorInstruction);
+ } break;
+ case DrawListInstruction::TYPE_SET_SIGNAL_SEMAPHORES: {
+ const DrawListSemaphoresInstruction *signal_semaphore_instruction = reinterpret_cast(instruction);
+ instruction_data_cursor += sizeof(DrawListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * signal_semaphore_instruction->count;
+ } break;
+ case DrawListInstruction::TYPE_SET_VIEWPORT: {
+ instruction_data_cursor += sizeof(DrawListSetViewportInstruction);
+ } break;
+ case DrawListInstruction::TYPE_SET_WAIT_SEMAPHORES: {
+ const DrawListSemaphoresInstruction *wait_semaphore_instruction = reinterpret_cast(instruction);
+ instruction_data_cursor += sizeof(DrawListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * wait_semaphore_instruction->count;
+ } break;
+ case DrawListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: {
+ instruction_data_cursor += sizeof(DrawListUniformSetPrepareForUseInstruction);
+ } break;
+ default:
+ DEV_ASSERT(false && "Unknown draw list instruction type.");
+ return nullptr;
+ }
+ }
+ return nullptr;
+}
+
void RenderingDeviceGraph::_run_draw_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) {
uint32_t instruction_data_cursor = 0;
while (instruction_data_cursor < p_instruction_data_size) {
@@ -901,11 +1043,19 @@ void RenderingDeviceGraph::_run_draw_list_command(RDD::CommandBufferID p_command
driver->command_render_set_scissor(p_command_buffer, set_scissor_instruction->rect);
instruction_data_cursor += sizeof(DrawListSetScissorInstruction);
} break;
+ case DrawListInstruction::TYPE_SET_SIGNAL_SEMAPHORES: {
+ const DrawListSemaphoresInstruction *signal_semaphore_instruction = reinterpret_cast(instruction);
+ instruction_data_cursor += sizeof(DrawListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * signal_semaphore_instruction->count;
+ } break;
case DrawListInstruction::TYPE_SET_VIEWPORT: {
const DrawListSetViewportInstruction *set_viewport_instruction = reinterpret_cast(instruction);
driver->command_render_set_viewport(p_command_buffer, set_viewport_instruction->rect);
instruction_data_cursor += sizeof(DrawListSetViewportInstruction);
} break;
+ case DrawListInstruction::TYPE_SET_WAIT_SEMAPHORES: {
+ const DrawListSemaphoresInstruction *wait_semaphore_instruction = reinterpret_cast(instruction);
+ instruction_data_cursor += sizeof(DrawListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * wait_semaphore_instruction->count;
+ } break;
case DrawListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: {
const DrawListUniformSetPrepareForUseInstruction *uniform_set_prepare_for_use_instruction = reinterpret_cast(instruction);
driver->command_uniform_set_prepare_for_use(p_command_buffer, uniform_set_prepare_for_use_instruction->uniform_set, uniform_set_prepare_for_use_instruction->shader, uniform_set_prepare_for_use_instruction->set_index);
@@ -968,6 +1118,11 @@ void RenderingDeviceGraph::_wait_for_secondary_command_buffer_tasks() {
}
void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, RDD::CommandBufferID &r_command_buffer, CommandBufferPool &r_command_buffer_pool, int32_t &r_current_label_index, int32_t &r_current_label_level) {
+ // signal_semaphores_list[0] stores signal_semaphores of frames[frame].command_buffer
+ if (r_command_buffer_pool.signal_semaphores_list.size() == 0) {
+ r_command_buffer_pool.signal_semaphores_list.resize(1);
+ }
+
for (uint32_t i = 0; i < p_sorted_commands_count; i++) {
const uint32_t command_index = p_sorted_commands[i].index;
const uint32_t command_data_offset = command_data_offsets[command_index];
@@ -999,9 +1154,21 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC
driver_callback_command->callback(driver, r_command_buffer, driver_callback_command->userdata);
} break;
case RecordedCommand::TYPE_COMPUTE_LIST: {
- if (device.workarounds.avoid_compute_after_draw && workarounds_state.draw_list_found) {
- // Avoid compute after draw workaround. Refer to the comment that enables this in the Vulkan driver for more information.
- workarounds_state.draw_list_found = false;
+ const RecordedComputeListCommand *compute_list_command = reinterpret_cast(command);
+ const ComputeListSemaphoresInstruction *signal_semaphores_instruction = reinterpret_cast(_get_compute_list_command_instruction(compute_list_command->instruction_data(), compute_list_command->instruction_data_size, ComputeListInstruction::TYPE_SET_SIGNAL_SEMAPHORES));
+ const ComputeListSemaphoresInstruction *wait_semaphores_instruction = reinterpret_cast(_get_compute_list_command_instruction(compute_list_command->instruction_data(), compute_list_command->instruction_data_size, ComputeListInstruction::TYPE_SET_WAIT_SEMAPHORES));
+
+ bool has_signal_semaphores = signal_semaphores_instruction != nullptr && signal_semaphores_instruction->count;
+ bool has_wait_semaphores = wait_semaphores_instruction != nullptr && wait_semaphores_instruction->count;
+ bool split_cmd_buffer = has_wait_semaphores;
+ bool avoid_compute_after_draw = device.workarounds.avoid_compute_after_draw && workarounds_state.draw_list_found;
+ bool has_new_cmd_buffer = split_cmd_buffer || avoid_compute_after_draw;
+
+ if (has_new_cmd_buffer) {
+ if (avoid_compute_after_draw) {
+ // Avoid compute after draw workaround. Refer to the comment that enables this in the Vulkan driver for more information.
+ workarounds_state.draw_list_found = false;
+ }
// Create or reuse a command buffer and finish recording the current one.
driver->command_buffer_end(r_command_buffer);
@@ -1011,15 +1178,44 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC
RDD::SemaphoreID command_semaphore = driver->semaphore_create();
r_command_buffer_pool.buffers.push_back(command_buffer);
r_command_buffer_pool.semaphores.push_back(command_semaphore);
+ r_command_buffer_pool.signal_semaphores_list.push_back(LocalVector());
+ r_command_buffer_pool.wait_semaphores_list.push_back(LocalVector());
}
// Start recording on the next usable command buffer from the pool.
uint32_t command_buffer_index = r_command_buffer_pool.buffers_used++;
r_command_buffer = r_command_buffer_pool.buffers[command_buffer_index];
driver->command_buffer_begin(r_command_buffer);
+
+ // wait_semaphores of buffers[i] are stored in wait_semaphores_list[i]
+ if (has_wait_semaphores) {
+ const RDD::SemaphoreID *ids = wait_semaphores_instruction->semaphore_ids();
+ LocalVector wait_semaphores;
+ for (uint32_t j = 0; j < wait_semaphores_instruction->count; j++) {
+ wait_semaphores.push_back(ids[j]);
+ }
+ r_command_buffer_pool.wait_semaphores_list[command_buffer_index] = wait_semaphores;
+ }
+ }
+
+ if (has_signal_semaphores) {
+ // signal_semaphores of buffers[i] are stored in signal_semaphores_list[i + 1]
+ // signal_semaphores_list[0] stores signal_semaphores of frames[frame].command_buffer
+ uint32_t signal_index = r_command_buffer_pool.buffers_used;
+ const RDD::SemaphoreID *ids = signal_semaphores_instruction->semaphore_ids();
+ LocalVector signal_semaphores;
+ const LocalVector &_signal_semaphores = r_command_buffer_pool.signal_semaphores_list[signal_index];
+
+ for (uint32_t j = 0; j < signal_semaphores_instruction->count; ++j) {
+ signal_semaphores.push_back(ids[j]);
+ }
+ for (uint32_t j = 0; j < _signal_semaphores.size(); j++) {
+ signal_semaphores.push_back(_signal_semaphores[j]);
+ }
+
+ r_command_buffer_pool.signal_semaphores_list[signal_index] = signal_semaphores;
}
- const RecordedComputeListCommand *compute_list_command = reinterpret_cast(command);
_run_compute_list_command(r_command_buffer, compute_list_command->instruction_data(), compute_list_command->instruction_data_size);
} break;
case RecordedCommand::TYPE_DRAW_LIST: {
@@ -1029,8 +1225,14 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC
}
const RecordedDrawListCommand *draw_list_command = reinterpret_cast(command);
+ const DrawListSemaphoresInstruction *signal_semaphores_instruction = reinterpret_cast(_get_draw_list_command_instruction(draw_list_command->instruction_data(), draw_list_command->instruction_data_size, DrawListInstruction::TYPE_SET_SIGNAL_SEMAPHORES));
+ const DrawListSemaphoresInstruction *wait_semaphores_instruction = reinterpret_cast(_get_draw_list_command_instruction(draw_list_command->instruction_data(), draw_list_command->instruction_data_size, DrawListInstruction::TYPE_SET_WAIT_SEMAPHORES));
- if (draw_list_command->split_cmd_buffer) {
+ bool has_signal_semaphores = signal_semaphores_instruction != nullptr && signal_semaphores_instruction->count;
+ bool has_wait_semaphores = wait_semaphores_instruction != nullptr && wait_semaphores_instruction->count;
+ bool split_cmd_buffer = has_wait_semaphores || draw_list_command->split_cmd_buffer;
+
+ if (split_cmd_buffer) {
// Create or reuse a command buffer and finish recording the current one.
driver->command_buffer_end(r_command_buffer);
@@ -1039,12 +1241,42 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC
RDD::SemaphoreID command_semaphore = driver->semaphore_create();
r_command_buffer_pool.buffers.push_back(command_buffer);
r_command_buffer_pool.semaphores.push_back(command_semaphore);
+ r_command_buffer_pool.signal_semaphores_list.push_back(LocalVector());
+ r_command_buffer_pool.wait_semaphores_list.push_back(LocalVector());
}
// Start recording on the next usable command buffer from the pool.
uint32_t command_buffer_index = r_command_buffer_pool.buffers_used++;
r_command_buffer = r_command_buffer_pool.buffers[command_buffer_index];
driver->command_buffer_begin(r_command_buffer);
+
+ // wait_semaphores of buffers[i] are stored in wait_semaphores_list[i]
+ if (has_wait_semaphores) {
+ const RDD::SemaphoreID *ids = wait_semaphores_instruction->semaphore_ids();
+ LocalVector wait_semaphores;
+ for (uint32_t j = 0; j < wait_semaphores_instruction->count; j++) {
+ wait_semaphores.push_back(ids[j]);
+ }
+ r_command_buffer_pool.wait_semaphores_list[command_buffer_index] = wait_semaphores;
+ }
+ }
+
+ if (has_signal_semaphores) {
+ // signal_semaphores of buffers[i] are stored in signal_semaphores_list[i + 1]
+ // signal_semaphores_list[0] stores signal_semaphores of frames[frame].command_buffer
+ uint32_t signal_index = r_command_buffer_pool.buffers_used;
+ const RDD::SemaphoreID *ids = signal_semaphores_instruction->semaphore_ids();
+ LocalVector signal_semaphores;
+ const LocalVector &_signal_semaphores = r_command_buffer_pool.signal_semaphores_list[signal_index];
+
+ for (uint32_t j = 0; j < signal_semaphores_instruction->count; ++j) {
+ signal_semaphores.push_back(ids[j]);
+ }
+ for (uint32_t j = 0; j < _signal_semaphores.size(); j++) {
+ signal_semaphores.push_back(_signal_semaphores[j]);
+ }
+
+ r_command_buffer_pool.signal_semaphores_list[signal_index] = signal_semaphores;
}
const VectorView clear_values(draw_list_command->clear_values(), draw_list_command->clear_values_count);
@@ -1743,6 +1975,30 @@ void RenderingDeviceGraph::add_compute_list_set_push_constant(RDD::ShaderID p_sh
memcpy(instruction->data(), p_data, p_data_size);
}
+void RenderingDeviceGraph::add_compute_list_set_signal_semaphores(VectorView p_signal_semaphores) {
+ uint32_t instruction_size = sizeof(ComputeListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * p_signal_semaphores.size();
+ ComputeListSemaphoresInstruction *instruction = reinterpret_cast(_allocate_compute_list_instruction(instruction_size));
+ instruction->type = ComputeListInstruction::TYPE_SET_SIGNAL_SEMAPHORES;
+ instruction->count = p_signal_semaphores.size();
+
+ RDD::SemaphoreID *ids = instruction->semaphore_ids();
+ for (uint32_t i = 0; i < p_signal_semaphores.size(); i++) {
+ ids[i] = p_signal_semaphores[i];
+ }
+}
+
+void RenderingDeviceGraph::add_compute_list_set_wait_semaphores(VectorView p_wait_semaphores) {
+ uint32_t instruction_size = sizeof(ComputeListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * p_wait_semaphores.size();
+ ComputeListSemaphoresInstruction *instruction = reinterpret_cast(_allocate_compute_list_instruction(instruction_size));
+ instruction->type = ComputeListInstruction::TYPE_SET_WAIT_SEMAPHORES;
+ instruction->count = p_wait_semaphores.size();
+
+ RDD::SemaphoreID *ids = instruction->semaphore_ids();
+ for (uint32_t i = 0; i < p_wait_semaphores.size(); i++) {
+ ids[i] = p_wait_semaphores[i];
+ }
+}
+
void RenderingDeviceGraph::add_compute_list_uniform_set_prepare_for_use(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index) {
ComputeListUniformSetPrepareForUseInstruction *instruction = reinterpret_cast(_allocate_compute_list_instruction(sizeof(ComputeListUniformSetPrepareForUseInstruction)));
instruction->type = ComputeListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE;
@@ -1941,6 +2197,30 @@ void RenderingDeviceGraph::add_draw_list_set_push_constant(RDD::ShaderID p_shade
memcpy(instruction->data(), p_data, p_data_size);
}
+void RenderingDeviceGraph::add_draw_list_set_signal_semaphores(VectorView p_signal_semaphores) {
+ uint32_t instruction_size = sizeof(DrawListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * p_signal_semaphores.size();
+ DrawListSemaphoresInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(instruction_size));
+ instruction->type = DrawListInstruction::TYPE_SET_SIGNAL_SEMAPHORES;
+ instruction->count = p_signal_semaphores.size();
+
+ RDD::SemaphoreID *ids = instruction->semaphore_ids();
+ for (uint32_t i = 0; i < p_signal_semaphores.size(); i++) {
+ ids[i] = p_signal_semaphores[i];
+ }
+}
+
+void RenderingDeviceGraph::add_draw_list_set_wait_semaphores(VectorView p_wait_semaphores) {
+ uint32_t instruction_size = sizeof(DrawListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * p_wait_semaphores.size();
+ DrawListSemaphoresInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(instruction_size));
+ instruction->type = DrawListInstruction::TYPE_SET_WAIT_SEMAPHORES;
+ instruction->count = p_wait_semaphores.size();
+
+ RDD::SemaphoreID *ids = instruction->semaphore_ids();
+ for (uint32_t i = 0; i < p_wait_semaphores.size(); i++) {
+ ids[i] = p_wait_semaphores[i];
+ }
+}
+
void RenderingDeviceGraph::add_draw_list_set_scissor(Rect2i p_rect) {
DrawListSetScissorInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListSetScissorInstruction)));
instruction->type = DrawListInstruction::TYPE_SET_SCISSOR;
diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h
index 567893e6fe8..82fb24876ca 100644
--- a/servers/rendering/rendering_device_graph.h
+++ b/servers/rendering/rendering_device_graph.h
@@ -53,6 +53,8 @@ public:
TYPE_DISPATCH,
TYPE_DISPATCH_INDIRECT,
TYPE_SET_PUSH_CONSTANT,
+ TYPE_SET_SIGNAL_SEMAPHORES,
+ TYPE_SET_WAIT_SEMAPHORES,
TYPE_UNIFORM_SET_PREPARE_FOR_USE
};
@@ -77,7 +79,9 @@ public:
TYPE_SET_LINE_WIDTH,
TYPE_SET_PUSH_CONSTANT,
TYPE_SET_SCISSOR,
+ TYPE_SET_SIGNAL_SEMAPHORES,
TYPE_SET_VIEWPORT,
+ TYPE_SET_WAIT_SEMAPHORES,
TYPE_UNIFORM_SET_PREPARE_FOR_USE
};
@@ -218,7 +222,10 @@ public:
// Created internally by RenderingDeviceGraph.
LocalVector buffers;
+ // buffers[i-1] wait semaphores[i-1] and wait_semaphores_list[i-1], signal semaphores[i] and signal_semaphores_list[i].
LocalVector semaphores;
+ LocalVector> signal_semaphores_list;
+ LocalVector> wait_semaphores_list;
uint32_t buffers_used = 0;
};
@@ -610,6 +617,18 @@ private:
uint32_t set_index = 0;
};
+ struct DrawListSemaphoresInstruction : DrawListInstruction {
+ uint32_t count = 0;
+
+ _FORCE_INLINE_ RDD::SemaphoreID *semaphore_ids() {
+ return reinterpret_cast(&this[1]);
+ }
+
+ _FORCE_INLINE_ const RDD::SemaphoreID *semaphore_ids() const {
+ return reinterpret_cast(&this[1]);
+ }
+ };
+
struct ComputeListBindPipelineInstruction : ComputeListInstruction {
RDD::PipelineID pipeline;
};
@@ -658,6 +677,18 @@ private:
uint32_t set_index = 0;
};
+ struct ComputeListSemaphoresInstruction : ComputeListInstruction {
+ uint32_t count = 0;
+
+ _FORCE_INLINE_ RDD::SemaphoreID *semaphore_ids() {
+ return reinterpret_cast(&this[1]);
+ }
+
+ _FORCE_INLINE_ const RDD::SemaphoreID *semaphore_ids() const {
+ return reinterpret_cast(&this[1]);
+ }
+ };
+
struct BarrierGroup {
BitField src_stages;
BitField dst_stages;
@@ -749,8 +780,10 @@ private:
#if USE_BUFFER_BARRIERS
void _add_buffer_barrier_to_command(RDD::BufferID p_buffer_id, BitField p_src_access, BitField p_dst_access, int32_t &r_barrier_index, int32_t &r_barrier_count);
#endif
+ const ComputeListInstruction *_get_compute_list_command_instruction(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size, ComputeListInstruction::Type p_type);
void _run_compute_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size);
void _get_draw_list_render_pass_and_framebuffer(const RecordedDrawListCommand *p_draw_list_command, RDD::RenderPassID &r_render_pass, RDD::FramebufferID &r_framebuffer);
+ const DrawListInstruction *_get_draw_list_command_instruction(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size, DrawListInstruction::Type p_type);
void _run_draw_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size);
void _add_draw_list_begin(FramebufferCache *p_framebuffer_cache, RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, bool p_uses_color, bool p_uses_depth, uint32_t p_breadcrumb, bool p_split_cmd_buffer);
void _run_secondary_command_buffer_task(const SecondaryCommandBuffer *p_secondary);
@@ -781,6 +814,8 @@ public:
void add_compute_list_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups);
void add_compute_list_dispatch_indirect(RDD::BufferID p_buffer, uint32_t p_offset);
void add_compute_list_set_push_constant(RDD::ShaderID p_shader, const void *p_data, uint32_t p_data_size);
+ void add_compute_list_set_signal_semaphores(VectorView p_signal_semaphore);
+ void add_compute_list_set_wait_semaphores(VectorView p_wait_semaphore);
void add_compute_list_uniform_set_prepare_for_use(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index);
void add_compute_list_usage(ResourceTracker *p_tracker, ResourceUsage p_usage);
void add_compute_list_usages(VectorView p_trackers, VectorView p_usages);
@@ -802,6 +837,8 @@ public:
void add_draw_list_set_blend_constants(const Color &p_color);
void add_draw_list_set_line_width(float p_width);
void add_draw_list_set_push_constant(RDD::ShaderID p_shader, const void *p_data, uint32_t p_data_size);
+ void add_draw_list_set_signal_semaphores(VectorView p_signal_semaphore);
+ void add_draw_list_set_wait_semaphores(VectorView p_wait_semaphore);
void add_draw_list_set_scissor(Rect2i p_rect);
void add_draw_list_set_viewport(Rect2i p_rect);
void add_draw_list_uniform_set_prepare_for_use(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index);