diff --git a/doc/classes/RenderingDevice.xml b/doc/classes/RenderingDevice.xml index 6ec7918d3b2..3daa463f47b 100644 --- a/doc/classes/RenderingDevice.xml +++ b/doc/classes/RenderingDevice.xml @@ -191,6 +191,51 @@ Sets the push constant data to [param buffer] for the specified [param compute_list]. The shader determines how this binary data is used. The buffer's size in bytes must also be specified in [param size_bytes] (this can be obtained by calling the [method PackedByteArray.size] method on the passed [param buffer]). + + + + + + Sets the signal semaphores explicitly for the specified [param compute_list]. For usage, refer to [method compute_list_set_wait_semaphores]. + + + + + + + + Sets the wait semaphores explicitly for the specified [param compute_list]. Before using it, some previously created compute lists must have an explicit signal semaphore. + [b]Note:[/b] A semaphore can only be used once as a signal semaphore and once as a wait semaphore before syncing. + A simple code example for this situation (code is not a complete example): + [codeblock] + var rd = RenderingDevice.new() + var prev_compute_list = rd.compute_list_begin() + var prev_semaphore1 = rd.semaphore_create() + var prev_semaphore2 = rd.semaphore_create() + rd.compute_list_set_signal_semaphores(prev_compute_list, [prev_semaphore1, prev_semaphore2]) + rd.compute_list_end() + + var cur_compute_list1 = rd.compute_list_begin() + var cur_semaphore1 = rd.semaphore_create() + rd.compute_list_set_wait_semaphores(cur_compute_list1, [prev_semaphore1]) + rd.compute_list_set_signal_semaphores(cur_compute_list1, [cur_semaphore1]) + # Replace with the custom code + rd.compute_list_end() + + var cur_compute_list2 = rd.compute_list_begin() + var cur_semaphore2 = rd.semaphore_create() + rd.compute_list_set_wait_semaphores(cur_compute_list2, [prev_semaphore2]) + rd.compute_list_set_signal_semaphores(cur_compute_list2, [cur_semaphore2]) + # Replace with the custom code + rd.compute_list_end() + + var next_compute_list = rd.compute_list_begin() + rd.compute_list_set_wait_semaphores(next_compute_list, [cur_semaphore1, cur_semaphore2]) + # Replace with the custom code + rd.compute_list_end() + [/codeblock] + + @@ -395,6 +440,22 @@ Sets the push constant data to [param buffer] for the specified [param draw_list]. The shader determines how this binary data is used. The buffer's size in bytes must also be specified in [param size_bytes] (this can be obtained by calling the [method PackedByteArray.size] method on the passed [param buffer]). + + + + + + Sets the signal semaphores explicitly for the specified [param draw_list]. Its usage is similar to [method compute_list_set_signal_semaphores]. + + + + + + + + Sets the wait semaphores explicitly for the specified [param draw_list]. Its usage is similar to [method compute_list_set_wait_semaphores]. + + @@ -783,6 +844,27 @@ [b]Note:[/b] Only the main [RenderingDevice] returned by [method RenderingServer.get_rendering_device] has a width. If called on a local [RenderingDevice], this method prints an error and returns [constant INVALID_ID]. + + + + Creates a new semaphore. It can be accessed with the RID that is returned. + Once finished with your RID, you will want to free the RID using the RenderingDevice's [method free_rid] method. + + + + + + + Returns an RID for an existing [param semaphore] ([code]VkSemaphore[/code]). This can be used to allow foreign semaphores to participate in Godot's rendering pipeline. + + + + + + + Returns [code]true[/code] if the semaphore specified by the [param semaphore] RID is valid, [code]false[/code] otherwise. + + @@ -1193,6 +1275,9 @@ - Vulkan: [code]VkPipeline[/code]. + + - Vulkan: [code]VkSemaphore[/code]. + diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index 88d0ac28ebe..95bc00e7cbe 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -2194,6 +2194,14 @@ RDD::SemaphoreID RenderingDeviceDriverD3D12::semaphore_create() { return SemaphoreID(semaphore); } +RDD::SemaphoreID RenderingDeviceDriverD3D12::semaphore_create_from_extension(uint64_t p_native_semaphore) { + ComPtr d3d_fence((ID3D12Fence *)p_native_semaphore); + + SemaphoreInfo *semaphore = memnew(SemaphoreInfo); + semaphore->d3d_fence = d3d_fence; + return SemaphoreID(semaphore); +} + void RenderingDeviceDriverD3D12::semaphore_free(SemaphoreID p_semaphore) { SemaphoreInfo *semaphore = (SemaphoreInfo *)(p_semaphore.id); memdelete(semaphore); @@ -6122,6 +6130,10 @@ void RenderingDeviceDriverD3D12::set_object_name(ObjectType p_type, ID p_driver_ const PipelineInfo *pipeline_info = (const PipelineInfo *)p_driver_id.id; _set_object_name(pipeline_info->pso, p_name); } break; + case OBJECT_TYPE_SEMAPHORE: { + const SemaphoreInfo *semaphore_info = (const SemaphoreInfo *)p_driver_id.id; + _set_object_name(semaphore_info->d3d_fence.Get(), p_name); + } break; default: { DEV_ASSERT(false); } @@ -6167,6 +6179,13 @@ uint64_t RenderingDeviceDriverD3D12::get_resource_native_handle(DriverResource p case DRIVER_RESOURCE_COMPUTE_PIPELINE: case DRIVER_RESOURCE_RENDER_PIPELINE: { return p_driver_id.id; + } break; + case DRIVER_RESOURCE_SEMAPHORE: { + // Note: Here only the fence of d3d12 is returned. + // When using this resource explicitly, we need to clearly understand the changes of the `fence_value` + // inside the engine during the wait and signal process. + const SemaphoreInfo *semaphore_info = (const SemaphoreInfo *)p_driver_id.id; + return reinterpret_cast(semaphore_info->d3d_fence.Get()); } default: { return 0; diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index 50d0f2cddf8..dc5eb5a30bc 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -404,6 +404,7 @@ private: }; virtual SemaphoreID semaphore_create() override; + virtual SemaphoreID semaphore_create_from_extension(uint64_t p_native_semaphore) override final; virtual void semaphore_free(SemaphoreID p_semaphore) override; /******************/ diff --git a/drivers/metal/rendering_device_driver_metal.h b/drivers/metal/rendering_device_driver_metal.h index 5ac1bab4c1b..3ffbe29f380 100644 --- a/drivers/metal/rendering_device_driver_metal.h +++ b/drivers/metal/rendering_device_driver_metal.h @@ -169,6 +169,7 @@ public: public: virtual SemaphoreID semaphore_create() override final; + virtual SemaphoreID semaphore_create_from_extension(uint64_t p_native_semaphore) override final; virtual void semaphore_free(SemaphoreID p_semaphore) override final; #pragma mark - Commands diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index 2e32e99c2eb..3c6d891920a 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -840,6 +840,11 @@ RDD::SemaphoreID RenderingDeviceDriverMetal::semaphore_create() { return SemaphoreID(1); } +RDD::SemaphoreID RenderingDeviceDriverMetal::semaphore_create_from_extension(uint64_t p_native_semaphore) { + // TODO: Use a wrapper of MTLFence as Metal Semaphore. + return SemaphoreID(1); +} + void RenderingDeviceDriverMetal::semaphore_free(SemaphoreID p_semaphore) { } @@ -3833,6 +3838,9 @@ void RenderingDeviceDriverMetal::set_object_name(ObjectType p_type, ID p_driver_ case OBJECT_TYPE_PIPELINE: { // Can't set label after creation. } break; + case OBJECT_TYPE_SEMAPHORE: { + // Semaphore is not currently implemented on the Metal backend + } break; default: { DEV_ASSERT(false); } @@ -3882,6 +3890,9 @@ uint64_t RenderingDeviceDriverMetal::get_resource_native_handle(DriverResource p MDRenderPipeline *pipeline = (MDRenderPipeline *)(p_driver_id.id); return (uint64_t)(uintptr_t)(__bridge void *)pipeline->state; } + case DRIVER_RESOURCE_SEMAPHORE: { + return p_driver_id.id; + } default: { return 0; } diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index 45c574914d6..253c75bf20d 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -515,6 +515,17 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() { _register_requested_device_extension(VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME, false); _register_requested_device_extension(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, false); + // Enable external memory and synchronization to facilitate the use of `texture_create_from_extension` and `semaphore_create_from_extension`. + _register_requested_device_extension(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME, false); +#ifdef _WIN64 + _register_requested_device_extension(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, false); +#else + _register_requested_device_extension(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, false); +#endif + if (Engine::get_singleton()->is_generate_spirv_debug_info_enabled()) { _register_requested_device_extension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, true); } @@ -2424,6 +2435,10 @@ RDD::SemaphoreID RenderingDeviceDriverVulkan::semaphore_create() { return SemaphoreID(semaphore); } +RDD::SemaphoreID RenderingDeviceDriverVulkan::semaphore_create_from_extension(uint64_t p_native_semaphore) { + return SemaphoreID((VkSemaphore)p_native_semaphore); +} + void RenderingDeviceDriverVulkan::semaphore_free(SemaphoreID p_semaphore) { vkDestroySemaphore(vk_device, VkSemaphore(p_semaphore.id), VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE)); } @@ -5732,6 +5747,9 @@ void RenderingDeviceDriverVulkan::set_object_name(ObjectType p_type, ID p_driver case OBJECT_TYPE_PIPELINE: { _set_object_name(VK_OBJECT_TYPE_PIPELINE, (uint64_t)p_driver_id.id, p_name); } break; + case OBJECT_TYPE_SEMAPHORE: { + _set_object_name(VK_OBJECT_TYPE_SEMAPHORE, (uint64_t)p_driver_id.id, p_name); + } break; default: { DEV_ASSERT(false); } @@ -5772,7 +5790,8 @@ uint64_t RenderingDeviceDriverVulkan::get_resource_native_handle(DriverResource case DRIVER_RESOURCE_UNIFORM_SET: case DRIVER_RESOURCE_BUFFER: case DRIVER_RESOURCE_COMPUTE_PIPELINE: - case DRIVER_RESOURCE_RENDER_PIPELINE: { + case DRIVER_RESOURCE_RENDER_PIPELINE: + case DRIVER_RESOURCE_SEMAPHORE: { return p_driver_id.id; } default: { diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index ea12450a4cd..3dadb0360ef 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -296,6 +296,7 @@ public: /********************/ virtual SemaphoreID semaphore_create() override final; + virtual SemaphoreID semaphore_create_from_extension(uint64_t p_native_semaphore) override final; virtual void semaphore_free(SemaphoreID p_semaphore) override final; /******************/ diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 9716a42c97b..8ad15be4f59 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -4086,6 +4086,34 @@ bool RenderingDevice::compute_pipeline_is_valid(RID p_pipeline) { return compute_pipeline_owner.owns(p_pipeline); } +/*******************/ +/**** SEMAPHORE ****/ +/*******************/ + +RID RenderingDevice::semaphore_create() { + ERR_FAIL_COND_V_MSG(::OS::get_singleton()->get_current_rendering_driver_name() == "metal", RID(), "The current metal backend does not use Semaphore."); + Semaphore semaphore; + semaphore.driver_id = driver->semaphore_create(); + RID id = semaphore_owner.make_rid(semaphore); + + return id; +} + +RID RenderingDevice::semaphore_create_from_extension(uint64_t p_image) { + ERR_FAIL_COND_V_MSG(::OS::get_singleton()->get_current_rendering_driver_name() == "metal", RID(), "The current metal backend does not use Semaphore."); + Semaphore semaphore; + semaphore.driver_id = driver->semaphore_create(); + RID id = semaphore_owner.make_rid(semaphore); + + return id; +} + +bool RenderingDevice::semaphore_is_valid(RID p_semaphore) { + _THREAD_SAFE_METHOD_ + + return semaphore_owner.owns(p_semaphore); +} + /****************/ /**** SCREEN ****/ /****************/ @@ -4636,6 +4664,38 @@ void RenderingDevice::draw_list_set_push_constant(DrawListID p_list, const void #endif } +void RenderingDevice::draw_list_set_signal_semaphores(DrawListID p_list, const TypedArray &p_signal_semaphores) { + ERR_RENDER_THREAD_GUARD(); + + DrawList *dl = _get_draw_list_ptr(p_list); + ERR_FAIL_NULL(dl); + + LocalVector signal_semaphores; + for (int i = 0; i < p_signal_semaphores.size(); i++) { + const Semaphore *signal_semaphore = semaphore_owner.get_or_null(p_signal_semaphores[i]); + ERR_FAIL_NULL(signal_semaphore); + signal_semaphores.push_back(signal_semaphore->driver_id); + } + + draw_graph.add_draw_list_set_signal_semaphores(signal_semaphores); +} + +void RenderingDevice::draw_list_set_wait_semaphores(DrawListID p_list, const TypedArray &p_wait_semaphores) { + ERR_RENDER_THREAD_GUARD(); + + DrawList *dl = _get_draw_list_ptr(p_list); + ERR_FAIL_NULL(dl); + + LocalVector wait_semaphores; + for (int i = 0; i < p_wait_semaphores.size(); i++) { + const Semaphore *wait_semaphore = semaphore_owner.get_or_null(p_wait_semaphores[i]); + ERR_FAIL_NULL(wait_semaphore); + wait_semaphores.push_back(wait_semaphore->driver_id); + } + + draw_graph.add_draw_list_set_wait_semaphores(wait_semaphores); +} + void RenderingDevice::draw_list_draw(DrawListID p_list, bool p_use_indices, uint32_t p_instances, uint32_t p_procedural_vertices) { ERR_RENDER_THREAD_GUARD(); @@ -5201,6 +5261,46 @@ void RenderingDevice::compute_list_set_push_constant(ComputeListID p_list, const #endif } +void RenderingDevice::compute_list_set_signal_semaphores(ComputeListID p_list, const TypedArray &p_signal_semaphores) { + ERR_RENDER_THREAD_GUARD(); + + ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); + ERR_FAIL_NULL(compute_list); + ComputeList *cl = compute_list; + + LocalVector signal_semaphores; + for (int i = 0; i < p_signal_semaphores.size(); i++) { + const Semaphore *signal_semaphore = semaphore_owner.get_or_null(p_signal_semaphores[i]); + ERR_FAIL_NULL(signal_semaphore); + signal_semaphores.push_back(signal_semaphore->driver_id); + } + + draw_graph.add_compute_list_set_signal_semaphores(signal_semaphores); + + // Store it in the state in case we need to restart the compute list. + cl->state.signal_semaphores = p_signal_semaphores; +} + +void RenderingDevice::compute_list_set_wait_semaphores(ComputeListID p_list, const TypedArray &p_wait_semaphores) { + ERR_RENDER_THREAD_GUARD(); + + ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); + ERR_FAIL_NULL(compute_list); + ComputeList *cl = compute_list; + + LocalVector wait_semaphores; + for (int i = 0; i < p_wait_semaphores.size(); i++) { + const Semaphore *wait_semaphore = semaphore_owner.get_or_null(p_wait_semaphores[i]); + ERR_FAIL_NULL(wait_semaphore); + wait_semaphores.push_back(wait_semaphore->driver_id); + } + + draw_graph.add_compute_list_set_wait_semaphores(wait_semaphores); + + // Store it in the state in case we need to restart the compute list. + cl->state.wait_semaphores = p_wait_semaphores; +} + void RenderingDevice::compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { ERR_RENDER_THREAD_GUARD(); @@ -5483,6 +5583,9 @@ void RenderingDevice::compute_list_add_barrier(ComputeListID p_list) { compute_list_bind_compute_pipeline(p_list, compute_list_barrier_state.pipeline); } + ERR_FAIL_COND_MSG(compute_list_barrier_state.signal_semaphores.size() > 0, "Barriers cannot be added to a compute list that contains explicit signal semaphores."); + ERR_FAIL_COND_MSG(compute_list_barrier_state.wait_semaphores.size() > 0, "Barriers cannot be added to a compute list that contains explicit wait semaphores."); + for (uint32_t i = 0; i < compute_list_barrier_state.set_count; i++) { if (compute_list_barrier_state.sets[i].uniform_set.is_valid()) { compute_list_bind_uniform_set(p_list, compute_list_barrier_state.sets[i].uniform_set, i); @@ -6099,6 +6202,10 @@ void RenderingDevice::_free_internal(RID p_id) { ComputePipeline *pipeline = compute_pipeline_owner.get_or_null(p_id); frames[frame].compute_pipelines_to_dispose_of.push_back(*pipeline); compute_pipeline_owner.free(p_id); + } else if (semaphore_owner.owns(p_id)) { + Semaphore *semaphore = semaphore_owner.get_or_null(p_id); + frames[frame].semaphores_to_dispose_of.push_back(*semaphore); + semaphore_owner.free(p_id); } else { #ifdef DEV_ENABLED ERR_PRINT("Attempted to free invalid ID: " + itos(p_id.get_id()) + " " + resource_name); @@ -6151,6 +6258,9 @@ void RenderingDevice::set_resource_name(RID p_id, const String &p_name) { } else if (compute_pipeline_owner.owns(p_id)) { ComputePipeline *pipeline = compute_pipeline_owner.get_or_null(p_id); driver->set_object_name(RDD::OBJECT_TYPE_PIPELINE, pipeline->driver_id, p_name); + } else if (semaphore_owner.owns(p_id)) { + Semaphore *semaphore = semaphore_owner.get_or_null(p_id); + driver->set_object_name(RDD::OBJECT_TYPE_SEMAPHORE, semaphore->driver_id, p_name); } else { ERR_PRINT("Attempted to name invalid ID: " + itos(p_id.get_id())); return; @@ -6243,6 +6353,15 @@ void RenderingDevice::sync() { void RenderingDevice::_free_pending_resources(int p_frame) { // Free in dependency usage order, so nothing weird happens. + // Semaphores + while (frames[p_frame].semaphores_to_dispose_of.front()) { + Semaphore *semaphore = &frames[p_frame].semaphores_to_dispose_of.front()->get(); + + driver->semaphore_free(semaphore->driver_id); + + frames[p_frame].semaphores_to_dispose_of.pop_front(); + } + // Pipelines. while (frames[p_frame].render_pipelines_to_dispose_of.front()) { RenderPipeline *pipeline = &frames[p_frame].render_pipelines_to_dispose_of.front()->get(); @@ -6432,10 +6551,31 @@ void RenderingDevice::execute_chained_cmds(bool p_present_swap_chain, RenderingD thread_local LocalVector wait_semaphores; wait_semaphores = frames[frame].semaphores_to_wait_on; + uint32_t semaphores_list_count = buffer_pool.signal_semaphores_list.size(); + if (semaphores_list_count < command_buffer_count) { + semaphores_list_count = command_buffer_count; + } + buffer_pool.signal_semaphores_list.resize(semaphores_list_count); + buffer_pool.wait_semaphores_list.resize(semaphores_list_count); + + bool has_explicit_semaphores = false; + for (uint32_t i = 0; i < semaphores_list_count; i++) { + const LocalVector &_signal_semaphores = buffer_pool.signal_semaphores_list[i]; + if (_signal_semaphores.size() > 0) { + has_explicit_semaphores = true; + break; + } + } + has_explicit_semaphores = has_explicit_semaphores && command_buffer_count == semaphores_list_count; + for (uint32_t i = 0; i < command_buffer_count; i++) { RDD::CommandBufferID command_buffer; RDD::SemaphoreID signal_semaphore; RDD::FenceID signal_fence; + + LocalVector signal_semaphores; + const LocalVector &_signal_semaphores = buffer_pool.signal_semaphores_list[i]; + if (i > 0) { command_buffer = buffer_pool.buffers[i - 1]; } else { @@ -6455,14 +6595,26 @@ void RenderingDevice::execute_chained_cmds(bool p_present_swap_chain, RenderingD signal_semaphore = buffer_pool.semaphores[i]; // Semaphores always need to be signaled if it's not the last command buffer. } + if (has_explicit_semaphores && _signal_semaphores.size() > 0) { + signal_semaphores = _signal_semaphores; + buffer_pool.signal_semaphores_list[i].resize(0); + } else if (signal_semaphore) { + signal_semaphores.push_back(signal_semaphore); + } driver->command_queue_execute_and_present(main_queue, wait_semaphores, command_buffer, - signal_semaphore ? signal_semaphore : VectorView(), signal_fence, + signal_semaphores, signal_fence, swap_chains); - // Make the next command buffer wait on the semaphore signaled by this one. - wait_semaphores.resize(1); - wait_semaphores[0] = signal_semaphore; + const LocalVector &_wait_semaphores = buffer_pool.wait_semaphores_list[i]; + if (has_explicit_semaphores && _wait_semaphores.size() > 0) { + wait_semaphores = _wait_semaphores; + buffer_pool.wait_semaphores_list[i].resize(0); + } else { + // Make the next command buffer wait on the semaphore signaled by this one. + wait_semaphores.resize(1); + wait_semaphores[0] = signal_semaphore; + } } frames[frame].semaphores_to_wait_on.clear(); @@ -7000,6 +7152,12 @@ uint64_t RenderingDevice::get_driver_resource(DriverResource p_resource, RID p_r driver_id = render_pipeline->driver_id.id; } break; + case DRIVER_RESOURCE_SEMAPHORE: { + Semaphore *semaphore = semaphore_owner.get_or_null(p_rid); + ERR_FAIL_NULL_V(semaphore, 0); + + driver_id = semaphore->driver_id.id; + } break; default: { ERR_FAIL_V(0); } break; @@ -7099,6 +7257,7 @@ void RenderingDevice::finalize() { draw_graph.finalize(); // Free all resources. + _free_rids(semaphore_owner, "Semaphore"); _free_rids(render_pipeline_owner, "Pipeline"); _free_rids(compute_pipeline_owner, "Compute"); _free_rids(uniform_set_owner, "UniformSet"); @@ -7163,6 +7322,7 @@ void RenderingDevice::finalize() { RDG::CommandBufferPool &buffer_pool = frames[i].command_buffer_pool; for (uint32_t j = 0; j < buffer_pool.buffers.size(); j++) { driver->semaphore_free(buffer_pool.semaphores[j]); + // For explicitly set wait_semaphores and signal_semaphores, they need to be free manually. } for (uint32_t j = 0; j < frames[i].transfer_worker_semaphores.size(); j++) { @@ -7329,6 +7489,9 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("compute_pipeline_create", "shader", "specialization_constants"), &RenderingDevice::_compute_pipeline_create, DEFVAL(TypedArray())); ClassDB::bind_method(D_METHOD("compute_pipeline_is_valid", "compute_pipeline"), &RenderingDevice::compute_pipeline_is_valid); + ClassDB::bind_method(D_METHOD("semaphore_create"), &RenderingDevice::semaphore_create); + ClassDB::bind_method(D_METHOD("semaphore_is_valid", "semaphore"), &RenderingDevice::semaphore_is_valid); + ClassDB::bind_method(D_METHOD("screen_get_width", "screen"), &RenderingDevice::screen_get_width, DEFVAL(DisplayServer::MAIN_WINDOW_ID)); ClassDB::bind_method(D_METHOD("screen_get_height", "screen"), &RenderingDevice::screen_get_height, DEFVAL(DisplayServer::MAIN_WINDOW_ID)); ClassDB::bind_method(D_METHOD("screen_get_framebuffer_format", "screen"), &RenderingDevice::screen_get_framebuffer_format, DEFVAL(DisplayServer::MAIN_WINDOW_ID)); @@ -7346,6 +7509,8 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("draw_list_bind_vertex_array", "draw_list", "vertex_array"), &RenderingDevice::draw_list_bind_vertex_array); ClassDB::bind_method(D_METHOD("draw_list_bind_index_array", "draw_list", "index_array"), &RenderingDevice::draw_list_bind_index_array); ClassDB::bind_method(D_METHOD("draw_list_set_push_constant", "draw_list", "buffer", "size_bytes"), &RenderingDevice::_draw_list_set_push_constant); + ClassDB::bind_method(D_METHOD("draw_list_set_signal_semaphores", "draw_list", "signal_semaphores"), &RenderingDevice::draw_list_set_signal_semaphores); + ClassDB::bind_method(D_METHOD("draw_list_set_wait_semaphores", "draw_list", "wait_semaphores"), &RenderingDevice::draw_list_set_wait_semaphores); ClassDB::bind_method(D_METHOD("draw_list_draw", "draw_list", "use_indices", "instances", "procedural_vertex_count"), &RenderingDevice::draw_list_draw, DEFVAL(0)); ClassDB::bind_method(D_METHOD("draw_list_draw_indirect", "draw_list", "use_indices", "buffer", "offset", "draw_count", "stride"), &RenderingDevice::draw_list_draw_indirect, DEFVAL(0), DEFVAL(1), DEFVAL(0)); @@ -7363,6 +7528,8 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("compute_list_begin"), &RenderingDevice::compute_list_begin); ClassDB::bind_method(D_METHOD("compute_list_bind_compute_pipeline", "compute_list", "compute_pipeline"), &RenderingDevice::compute_list_bind_compute_pipeline); ClassDB::bind_method(D_METHOD("compute_list_set_push_constant", "compute_list", "buffer", "size_bytes"), &RenderingDevice::_compute_list_set_push_constant); + ClassDB::bind_method(D_METHOD("compute_list_set_signal_semaphores", "compute_list", "signal_semaphores"), &RenderingDevice::compute_list_set_signal_semaphores); + ClassDB::bind_method(D_METHOD("compute_list_set_wait_semaphores", "compute_list", "wait_semaphores"), &RenderingDevice::compute_list_set_wait_semaphores); ClassDB::bind_method(D_METHOD("compute_list_bind_uniform_set", "compute_list", "uniform_set", "set_index"), &RenderingDevice::compute_list_bind_uniform_set); ClassDB::bind_method(D_METHOD("compute_list_dispatch", "compute_list", "x_groups", "y_groups", "z_groups"), &RenderingDevice::compute_list_dispatch); ClassDB::bind_method(D_METHOD("compute_list_dispatch_indirect", "compute_list", "buffer", "offset"), &RenderingDevice::compute_list_dispatch_indirect); @@ -7441,6 +7608,7 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(DRIVER_RESOURCE_BUFFER); BIND_ENUM_CONSTANT(DRIVER_RESOURCE_COMPUTE_PIPELINE); BIND_ENUM_CONSTANT(DRIVER_RESOURCE_RENDER_PIPELINE); + BIND_ENUM_CONSTANT(DRIVER_RESOURCE_SEMAPHORE); #ifndef DISABLE_DEPRECATED BIND_ENUM_CONSTANT(DRIVER_RESOURCE_VULKAN_DEVICE); BIND_ENUM_CONSTANT(DRIVER_RESOURCE_VULKAN_PHYSICAL_DEVICE); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 10723392a1e..0ff5d5f9693 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -1138,6 +1138,23 @@ public: RID compute_pipeline_create(RID p_shader, const Vector &p_specialization_constants = Vector()); bool compute_pipeline_is_valid(RID p_pipeline); + /*******************/ + /**** SEMAPHORE ****/ + /*******************/ + +private: + struct Semaphore { + RDD::SemaphoreID driver_id; + }; + + RID_Owner semaphore_owner; + +public: + RID semaphore_create(); + RID semaphore_create_from_extension(uint64_t p_image); + + bool semaphore_is_valid(RID p_semaphore); + private: /****************/ /**** SCREEN ****/ @@ -1279,6 +1296,8 @@ public: void draw_list_bind_index_array(DrawListID p_list, RID p_index_array); void draw_list_set_line_width(DrawListID p_list, float p_width); void draw_list_set_push_constant(DrawListID p_list, const void *p_data, uint32_t p_data_size); + void draw_list_set_signal_semaphores(DrawListID p_list, const TypedArray &p_signal_semaphores); + void draw_list_set_wait_semaphores(DrawListID p_list, const TypedArray &p_wait_semaphores); void draw_list_draw(DrawListID p_list, bool p_use_indices, uint32_t p_instances = 1, uint32_t p_procedural_vertices = 0); void draw_list_draw_indirect(DrawListID p_list, bool p_use_indices, RID p_buffer, uint32_t p_offset = 0, uint32_t p_draw_count = 1, uint32_t p_stride = 0); @@ -1316,6 +1335,8 @@ private: uint32_t local_group_size[3] = { 0, 0, 0 }; uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE] = {}; uint32_t push_constant_size = 0; + TypedArray signal_semaphores; + TypedArray wait_semaphores; uint32_t dispatch_count = 0; } state; @@ -1343,6 +1364,8 @@ public: void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline); void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index); void compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size); + void compute_list_set_signal_semaphores(ComputeListID p_list, const TypedArray &p_signal_semaphores); + void compute_list_set_wait_semaphores(ComputeListID p_list, const TypedArray &p_wait_semaphores); void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads); void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset); @@ -1453,6 +1476,7 @@ private: List uniform_sets_to_dispose_of; List render_pipelines_to_dispose_of; List compute_pipelines_to_dispose_of; + List semaphores_to_dispose_of; // Pending asynchronous data transfer for buffers. LocalVector download_buffer_staging_buffers; diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h index 67946fc4875..84e3562aa73 100644 --- a/servers/rendering/rendering_device_commons.h +++ b/servers/rendering/rendering_device_commons.h @@ -809,6 +809,7 @@ public: DRIVER_RESOURCE_BUFFER, DRIVER_RESOURCE_COMPUTE_PIPELINE, DRIVER_RESOURCE_RENDER_PIPELINE, + DRIVER_RESOURCE_SEMAPHORE, #ifndef DISABLE_DEPRECATED DRIVER_RESOURCE_VULKAN_DEVICE = DRIVER_RESOURCE_LOGICAL_DEVICE, DRIVER_RESOURCE_VULKAN_PHYSICAL_DEVICE = DRIVER_RESOURCE_PHYSICAL_DEVICE, diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index 1f65cdcaa9a..8d58f8fe276 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -404,6 +404,7 @@ public: /********************/ virtual SemaphoreID semaphore_create() = 0; + virtual SemaphoreID semaphore_create_from_extension(uint64_t p_native_semaphore) = 0; virtual void semaphore_free(SemaphoreID p_semaphore) = 0; /*************************/ @@ -777,6 +778,7 @@ public: OBJECT_TYPE_SHADER, OBJECT_TYPE_UNIFORM_SET, OBJECT_TYPE_PIPELINE, + OBJECT_TYPE_SEMAPHORE, }; struct MultiviewCapabilities { diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index ae91e1b3e4a..9b2bdaeadd0 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -733,6 +733,54 @@ void RenderingDeviceGraph::_add_buffer_barrier_to_command(RDD::BufferID p_buffer } #endif +const RenderingDeviceGraph::ComputeListInstruction *RenderingDeviceGraph::_get_compute_list_command_instruction(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size, ComputeListInstruction::Type p_type) { + uint32_t instruction_data_cursor = 0; + while (instruction_data_cursor < p_instruction_data_size) { + DEV_ASSERT((instruction_data_cursor + sizeof(ComputeListInstruction)) <= p_instruction_data_size); + + const ComputeListInstruction *instruction = reinterpret_cast(&p_instruction_data[instruction_data_cursor]); + if (instruction->type == p_type) { + return instruction; + } + switch (instruction->type) { + case ComputeListInstruction::TYPE_BIND_PIPELINE: { + instruction_data_cursor += sizeof(ComputeListBindPipelineInstruction); + } break; + case ComputeListInstruction::TYPE_BIND_UNIFORM_SETS: { + const ComputeListBindUniformSetsInstruction *bind_uniform_sets_instruction = reinterpret_cast(instruction); + instruction_data_cursor += sizeof(ComputeListBindUniformSetsInstruction) + sizeof(RDD::UniformSetID) * bind_uniform_sets_instruction->set_count; + } break; + case ComputeListInstruction::TYPE_DISPATCH: { + instruction_data_cursor += sizeof(ComputeListDispatchInstruction); + } break; + case ComputeListInstruction::TYPE_DISPATCH_INDIRECT: { + instruction_data_cursor += sizeof(ComputeListDispatchIndirectInstruction); + } break; + case ComputeListInstruction::TYPE_SET_PUSH_CONSTANT: { + const ComputeListSetPushConstantInstruction *set_push_constant_instruction = reinterpret_cast(instruction); + const VectorView push_constant_data_view(reinterpret_cast(set_push_constant_instruction->data()), set_push_constant_instruction->size / sizeof(uint32_t)); + instruction_data_cursor += sizeof(ComputeListSetPushConstantInstruction); + instruction_data_cursor += set_push_constant_instruction->size; + } break; + case ComputeListInstruction::TYPE_SET_SIGNAL_SEMAPHORES: { + const ComputeListSemaphoresInstruction *signal_semaphore_instruction = reinterpret_cast(instruction); + instruction_data_cursor += sizeof(ComputeListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * signal_semaphore_instruction->count; + } break; + case ComputeListInstruction::TYPE_SET_WAIT_SEMAPHORES: { + const ComputeListSemaphoresInstruction *wait_semaphore_instruction = reinterpret_cast(instruction); + instruction_data_cursor += sizeof(ComputeListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * wait_semaphore_instruction->count; + } break; + case ComputeListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: { + instruction_data_cursor += sizeof(ComputeListUniformSetPrepareForUseInstruction); + } break; + default: + DEV_ASSERT(false && "Unknown compute list instruction type."); + return nullptr; + } + } + return nullptr; +} + void RenderingDeviceGraph::_run_compute_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { uint32_t instruction_data_cursor = 0; while (instruction_data_cursor < p_instruction_data_size) { @@ -767,6 +815,14 @@ void RenderingDeviceGraph::_run_compute_list_command(RDD::CommandBufferID p_comm instruction_data_cursor += sizeof(ComputeListSetPushConstantInstruction); instruction_data_cursor += set_push_constant_instruction->size; } break; + case ComputeListInstruction::TYPE_SET_SIGNAL_SEMAPHORES: { + const ComputeListSemaphoresInstruction *signal_semaphore_instruction = reinterpret_cast(instruction); + instruction_data_cursor += sizeof(ComputeListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * signal_semaphore_instruction->count; + } break; + case ComputeListInstruction::TYPE_SET_WAIT_SEMAPHORES: { + const ComputeListSemaphoresInstruction *wait_semaphore_instruction = reinterpret_cast(instruction); + instruction_data_cursor += sizeof(ComputeListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * wait_semaphore_instruction->count; + } break; case ComputeListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: { const ComputeListUniformSetPrepareForUseInstruction *uniform_set_prepare_for_use_instruction = reinterpret_cast(instruction); driver->command_uniform_set_prepare_for_use(p_command_buffer, uniform_set_prepare_for_use_instruction->uniform_set, uniform_set_prepare_for_use_instruction->shader, uniform_set_prepare_for_use_instruction->set_index); @@ -811,6 +867,92 @@ void RenderingDeviceGraph::_get_draw_list_render_pass_and_framebuffer(const Reco r_framebuffer = it->value.framebuffer; } +const RenderingDeviceGraph::DrawListInstruction *RenderingDeviceGraph::_get_draw_list_command_instruction(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size, DrawListInstruction::Type p_type) { + uint32_t instruction_data_cursor = 0; + while (instruction_data_cursor < p_instruction_data_size) { + DEV_ASSERT((instruction_data_cursor + sizeof(DrawListInstruction)) <= p_instruction_data_size); + + const DrawListInstruction *instruction = reinterpret_cast(&p_instruction_data[instruction_data_cursor]); + if (instruction->type == p_type) { + return instruction; + } + switch (instruction->type) { + case DrawListInstruction::TYPE_BIND_INDEX_BUFFER: { + instruction_data_cursor += sizeof(DrawListBindIndexBufferInstruction); + } break; + case DrawListInstruction::TYPE_BIND_PIPELINE: { + instruction_data_cursor += sizeof(DrawListBindPipelineInstruction); + } break; + case DrawListInstruction::TYPE_BIND_UNIFORM_SETS: { + const DrawListBindUniformSetsInstruction *bind_uniform_sets_instruction = reinterpret_cast(instruction); + instruction_data_cursor += sizeof(DrawListBindUniformSetsInstruction) + sizeof(RDD::UniformSetID) * bind_uniform_sets_instruction->set_count; + } break; + case DrawListInstruction::TYPE_BIND_VERTEX_BUFFERS: { + const DrawListBindVertexBuffersInstruction *bind_vertex_buffers_instruction = reinterpret_cast(instruction); + instruction_data_cursor += sizeof(DrawListBindVertexBuffersInstruction); + instruction_data_cursor += sizeof(RDD::BufferID) * bind_vertex_buffers_instruction->vertex_buffers_count; + instruction_data_cursor += sizeof(uint64_t) * bind_vertex_buffers_instruction->vertex_buffers_count; + } break; + case DrawListInstruction::TYPE_CLEAR_ATTACHMENTS: { + const DrawListClearAttachmentsInstruction *clear_attachments_instruction = reinterpret_cast(instruction); + instruction_data_cursor += sizeof(DrawListClearAttachmentsInstruction); + instruction_data_cursor += sizeof(RDD::AttachmentClear) * clear_attachments_instruction->attachments_clear_count; + instruction_data_cursor += sizeof(Rect2i) * clear_attachments_instruction->attachments_clear_rect_count; + } break; + case DrawListInstruction::TYPE_DRAW: { + instruction_data_cursor += sizeof(DrawListDrawInstruction); + } break; + case DrawListInstruction::TYPE_DRAW_INDEXED: { + instruction_data_cursor += sizeof(DrawListDrawIndexedInstruction); + } break; + case DrawListInstruction::TYPE_DRAW_INDIRECT: { + instruction_data_cursor += sizeof(DrawListDrawIndirectInstruction); + } break; + case DrawListInstruction::TYPE_DRAW_INDEXED_INDIRECT: { + instruction_data_cursor += sizeof(DrawListDrawIndexedIndirectInstruction); + } break; + case DrawListInstruction::TYPE_EXECUTE_COMMANDS: { + instruction_data_cursor += sizeof(DrawListExecuteCommandsInstruction); + } break; + case DrawListInstruction::TYPE_NEXT_SUBPASS: { + instruction_data_cursor += sizeof(DrawListNextSubpassInstruction); + } break; + case DrawListInstruction::TYPE_SET_BLEND_CONSTANTS: { + instruction_data_cursor += sizeof(DrawListSetBlendConstantsInstruction); + } break; + case DrawListInstruction::TYPE_SET_LINE_WIDTH: { + instruction_data_cursor += sizeof(DrawListSetLineWidthInstruction); + } break; + case DrawListInstruction::TYPE_SET_PUSH_CONSTANT: { + const DrawListSetPushConstantInstruction *set_push_constant_instruction = reinterpret_cast(instruction); + instruction_data_cursor += sizeof(DrawListSetPushConstantInstruction); + instruction_data_cursor += set_push_constant_instruction->size; + } break; + case DrawListInstruction::TYPE_SET_SCISSOR: { + instruction_data_cursor += sizeof(DrawListSetScissorInstruction); + } break; + case DrawListInstruction::TYPE_SET_SIGNAL_SEMAPHORES: { + const DrawListSemaphoresInstruction *signal_semaphore_instruction = reinterpret_cast(instruction); + instruction_data_cursor += sizeof(DrawListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * signal_semaphore_instruction->count; + } break; + case DrawListInstruction::TYPE_SET_VIEWPORT: { + instruction_data_cursor += sizeof(DrawListSetViewportInstruction); + } break; + case DrawListInstruction::TYPE_SET_WAIT_SEMAPHORES: { + const DrawListSemaphoresInstruction *wait_semaphore_instruction = reinterpret_cast(instruction); + instruction_data_cursor += sizeof(DrawListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * wait_semaphore_instruction->count; + } break; + case DrawListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: { + instruction_data_cursor += sizeof(DrawListUniformSetPrepareForUseInstruction); + } break; + default: + DEV_ASSERT(false && "Unknown draw list instruction type."); + return nullptr; + } + } + return nullptr; +} + void RenderingDeviceGraph::_run_draw_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { uint32_t instruction_data_cursor = 0; while (instruction_data_cursor < p_instruction_data_size) { @@ -901,11 +1043,19 @@ void RenderingDeviceGraph::_run_draw_list_command(RDD::CommandBufferID p_command driver->command_render_set_scissor(p_command_buffer, set_scissor_instruction->rect); instruction_data_cursor += sizeof(DrawListSetScissorInstruction); } break; + case DrawListInstruction::TYPE_SET_SIGNAL_SEMAPHORES: { + const DrawListSemaphoresInstruction *signal_semaphore_instruction = reinterpret_cast(instruction); + instruction_data_cursor += sizeof(DrawListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * signal_semaphore_instruction->count; + } break; case DrawListInstruction::TYPE_SET_VIEWPORT: { const DrawListSetViewportInstruction *set_viewport_instruction = reinterpret_cast(instruction); driver->command_render_set_viewport(p_command_buffer, set_viewport_instruction->rect); instruction_data_cursor += sizeof(DrawListSetViewportInstruction); } break; + case DrawListInstruction::TYPE_SET_WAIT_SEMAPHORES: { + const DrawListSemaphoresInstruction *wait_semaphore_instruction = reinterpret_cast(instruction); + instruction_data_cursor += sizeof(DrawListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * wait_semaphore_instruction->count; + } break; case DrawListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: { const DrawListUniformSetPrepareForUseInstruction *uniform_set_prepare_for_use_instruction = reinterpret_cast(instruction); driver->command_uniform_set_prepare_for_use(p_command_buffer, uniform_set_prepare_for_use_instruction->uniform_set, uniform_set_prepare_for_use_instruction->shader, uniform_set_prepare_for_use_instruction->set_index); @@ -968,6 +1118,11 @@ void RenderingDeviceGraph::_wait_for_secondary_command_buffer_tasks() { } void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count, RDD::CommandBufferID &r_command_buffer, CommandBufferPool &r_command_buffer_pool, int32_t &r_current_label_index, int32_t &r_current_label_level) { + // signal_semaphores_list[0] stores signal_semaphores of frames[frame].command_buffer + if (r_command_buffer_pool.signal_semaphores_list.size() == 0) { + r_command_buffer_pool.signal_semaphores_list.resize(1); + } + for (uint32_t i = 0; i < p_sorted_commands_count; i++) { const uint32_t command_index = p_sorted_commands[i].index; const uint32_t command_data_offset = command_data_offsets[command_index]; @@ -999,9 +1154,21 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC driver_callback_command->callback(driver, r_command_buffer, driver_callback_command->userdata); } break; case RecordedCommand::TYPE_COMPUTE_LIST: { - if (device.workarounds.avoid_compute_after_draw && workarounds_state.draw_list_found) { - // Avoid compute after draw workaround. Refer to the comment that enables this in the Vulkan driver for more information. - workarounds_state.draw_list_found = false; + const RecordedComputeListCommand *compute_list_command = reinterpret_cast(command); + const ComputeListSemaphoresInstruction *signal_semaphores_instruction = reinterpret_cast(_get_compute_list_command_instruction(compute_list_command->instruction_data(), compute_list_command->instruction_data_size, ComputeListInstruction::TYPE_SET_SIGNAL_SEMAPHORES)); + const ComputeListSemaphoresInstruction *wait_semaphores_instruction = reinterpret_cast(_get_compute_list_command_instruction(compute_list_command->instruction_data(), compute_list_command->instruction_data_size, ComputeListInstruction::TYPE_SET_WAIT_SEMAPHORES)); + + bool has_signal_semaphores = signal_semaphores_instruction != nullptr && signal_semaphores_instruction->count; + bool has_wait_semaphores = wait_semaphores_instruction != nullptr && wait_semaphores_instruction->count; + bool split_cmd_buffer = has_wait_semaphores; + bool avoid_compute_after_draw = device.workarounds.avoid_compute_after_draw && workarounds_state.draw_list_found; + bool has_new_cmd_buffer = split_cmd_buffer || avoid_compute_after_draw; + + if (has_new_cmd_buffer) { + if (avoid_compute_after_draw) { + // Avoid compute after draw workaround. Refer to the comment that enables this in the Vulkan driver for more information. + workarounds_state.draw_list_found = false; + } // Create or reuse a command buffer and finish recording the current one. driver->command_buffer_end(r_command_buffer); @@ -1011,15 +1178,44 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC RDD::SemaphoreID command_semaphore = driver->semaphore_create(); r_command_buffer_pool.buffers.push_back(command_buffer); r_command_buffer_pool.semaphores.push_back(command_semaphore); + r_command_buffer_pool.signal_semaphores_list.push_back(LocalVector()); + r_command_buffer_pool.wait_semaphores_list.push_back(LocalVector()); } // Start recording on the next usable command buffer from the pool. uint32_t command_buffer_index = r_command_buffer_pool.buffers_used++; r_command_buffer = r_command_buffer_pool.buffers[command_buffer_index]; driver->command_buffer_begin(r_command_buffer); + + // wait_semaphores of buffers[i] are stored in wait_semaphores_list[i] + if (has_wait_semaphores) { + const RDD::SemaphoreID *ids = wait_semaphores_instruction->semaphore_ids(); + LocalVector wait_semaphores; + for (uint32_t j = 0; j < wait_semaphores_instruction->count; j++) { + wait_semaphores.push_back(ids[j]); + } + r_command_buffer_pool.wait_semaphores_list[command_buffer_index] = wait_semaphores; + } + } + + if (has_signal_semaphores) { + // signal_semaphores of buffers[i] are stored in signal_semaphores_list[i + 1] + // signal_semaphores_list[0] stores signal_semaphores of frames[frame].command_buffer + uint32_t signal_index = r_command_buffer_pool.buffers_used; + const RDD::SemaphoreID *ids = signal_semaphores_instruction->semaphore_ids(); + LocalVector signal_semaphores; + const LocalVector &_signal_semaphores = r_command_buffer_pool.signal_semaphores_list[signal_index]; + + for (uint32_t j = 0; j < signal_semaphores_instruction->count; ++j) { + signal_semaphores.push_back(ids[j]); + } + for (uint32_t j = 0; j < _signal_semaphores.size(); j++) { + signal_semaphores.push_back(_signal_semaphores[j]); + } + + r_command_buffer_pool.signal_semaphores_list[signal_index] = signal_semaphores; } - const RecordedComputeListCommand *compute_list_command = reinterpret_cast(command); _run_compute_list_command(r_command_buffer, compute_list_command->instruction_data(), compute_list_command->instruction_data_size); } break; case RecordedCommand::TYPE_DRAW_LIST: { @@ -1029,8 +1225,14 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC } const RecordedDrawListCommand *draw_list_command = reinterpret_cast(command); + const DrawListSemaphoresInstruction *signal_semaphores_instruction = reinterpret_cast(_get_draw_list_command_instruction(draw_list_command->instruction_data(), draw_list_command->instruction_data_size, DrawListInstruction::TYPE_SET_SIGNAL_SEMAPHORES)); + const DrawListSemaphoresInstruction *wait_semaphores_instruction = reinterpret_cast(_get_draw_list_command_instruction(draw_list_command->instruction_data(), draw_list_command->instruction_data_size, DrawListInstruction::TYPE_SET_WAIT_SEMAPHORES)); - if (draw_list_command->split_cmd_buffer) { + bool has_signal_semaphores = signal_semaphores_instruction != nullptr && signal_semaphores_instruction->count; + bool has_wait_semaphores = wait_semaphores_instruction != nullptr && wait_semaphores_instruction->count; + bool split_cmd_buffer = has_wait_semaphores || draw_list_command->split_cmd_buffer; + + if (split_cmd_buffer) { // Create or reuse a command buffer and finish recording the current one. driver->command_buffer_end(r_command_buffer); @@ -1039,12 +1241,42 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC RDD::SemaphoreID command_semaphore = driver->semaphore_create(); r_command_buffer_pool.buffers.push_back(command_buffer); r_command_buffer_pool.semaphores.push_back(command_semaphore); + r_command_buffer_pool.signal_semaphores_list.push_back(LocalVector()); + r_command_buffer_pool.wait_semaphores_list.push_back(LocalVector()); } // Start recording on the next usable command buffer from the pool. uint32_t command_buffer_index = r_command_buffer_pool.buffers_used++; r_command_buffer = r_command_buffer_pool.buffers[command_buffer_index]; driver->command_buffer_begin(r_command_buffer); + + // wait_semaphores of buffers[i] are stored in wait_semaphores_list[i] + if (has_wait_semaphores) { + const RDD::SemaphoreID *ids = wait_semaphores_instruction->semaphore_ids(); + LocalVector wait_semaphores; + for (uint32_t j = 0; j < wait_semaphores_instruction->count; j++) { + wait_semaphores.push_back(ids[j]); + } + r_command_buffer_pool.wait_semaphores_list[command_buffer_index] = wait_semaphores; + } + } + + if (has_signal_semaphores) { + // signal_semaphores of buffers[i] are stored in signal_semaphores_list[i + 1] + // signal_semaphores_list[0] stores signal_semaphores of frames[frame].command_buffer + uint32_t signal_index = r_command_buffer_pool.buffers_used; + const RDD::SemaphoreID *ids = signal_semaphores_instruction->semaphore_ids(); + LocalVector signal_semaphores; + const LocalVector &_signal_semaphores = r_command_buffer_pool.signal_semaphores_list[signal_index]; + + for (uint32_t j = 0; j < signal_semaphores_instruction->count; ++j) { + signal_semaphores.push_back(ids[j]); + } + for (uint32_t j = 0; j < _signal_semaphores.size(); j++) { + signal_semaphores.push_back(_signal_semaphores[j]); + } + + r_command_buffer_pool.signal_semaphores_list[signal_index] = signal_semaphores; } const VectorView clear_values(draw_list_command->clear_values(), draw_list_command->clear_values_count); @@ -1743,6 +1975,30 @@ void RenderingDeviceGraph::add_compute_list_set_push_constant(RDD::ShaderID p_sh memcpy(instruction->data(), p_data, p_data_size); } +void RenderingDeviceGraph::add_compute_list_set_signal_semaphores(VectorView p_signal_semaphores) { + uint32_t instruction_size = sizeof(ComputeListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * p_signal_semaphores.size(); + ComputeListSemaphoresInstruction *instruction = reinterpret_cast(_allocate_compute_list_instruction(instruction_size)); + instruction->type = ComputeListInstruction::TYPE_SET_SIGNAL_SEMAPHORES; + instruction->count = p_signal_semaphores.size(); + + RDD::SemaphoreID *ids = instruction->semaphore_ids(); + for (uint32_t i = 0; i < p_signal_semaphores.size(); i++) { + ids[i] = p_signal_semaphores[i]; + } +} + +void RenderingDeviceGraph::add_compute_list_set_wait_semaphores(VectorView p_wait_semaphores) { + uint32_t instruction_size = sizeof(ComputeListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * p_wait_semaphores.size(); + ComputeListSemaphoresInstruction *instruction = reinterpret_cast(_allocate_compute_list_instruction(instruction_size)); + instruction->type = ComputeListInstruction::TYPE_SET_WAIT_SEMAPHORES; + instruction->count = p_wait_semaphores.size(); + + RDD::SemaphoreID *ids = instruction->semaphore_ids(); + for (uint32_t i = 0; i < p_wait_semaphores.size(); i++) { + ids[i] = p_wait_semaphores[i]; + } +} + void RenderingDeviceGraph::add_compute_list_uniform_set_prepare_for_use(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index) { ComputeListUniformSetPrepareForUseInstruction *instruction = reinterpret_cast(_allocate_compute_list_instruction(sizeof(ComputeListUniformSetPrepareForUseInstruction))); instruction->type = ComputeListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE; @@ -1941,6 +2197,30 @@ void RenderingDeviceGraph::add_draw_list_set_push_constant(RDD::ShaderID p_shade memcpy(instruction->data(), p_data, p_data_size); } +void RenderingDeviceGraph::add_draw_list_set_signal_semaphores(VectorView p_signal_semaphores) { + uint32_t instruction_size = sizeof(DrawListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * p_signal_semaphores.size(); + DrawListSemaphoresInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(instruction_size)); + instruction->type = DrawListInstruction::TYPE_SET_SIGNAL_SEMAPHORES; + instruction->count = p_signal_semaphores.size(); + + RDD::SemaphoreID *ids = instruction->semaphore_ids(); + for (uint32_t i = 0; i < p_signal_semaphores.size(); i++) { + ids[i] = p_signal_semaphores[i]; + } +} + +void RenderingDeviceGraph::add_draw_list_set_wait_semaphores(VectorView p_wait_semaphores) { + uint32_t instruction_size = sizeof(DrawListSemaphoresInstruction) + sizeof(RDD::UniformSetID) * p_wait_semaphores.size(); + DrawListSemaphoresInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(instruction_size)); + instruction->type = DrawListInstruction::TYPE_SET_WAIT_SEMAPHORES; + instruction->count = p_wait_semaphores.size(); + + RDD::SemaphoreID *ids = instruction->semaphore_ids(); + for (uint32_t i = 0; i < p_wait_semaphores.size(); i++) { + ids[i] = p_wait_semaphores[i]; + } +} + void RenderingDeviceGraph::add_draw_list_set_scissor(Rect2i p_rect) { DrawListSetScissorInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListSetScissorInstruction))); instruction->type = DrawListInstruction::TYPE_SET_SCISSOR; diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index 567893e6fe8..82fb24876ca 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -53,6 +53,8 @@ public: TYPE_DISPATCH, TYPE_DISPATCH_INDIRECT, TYPE_SET_PUSH_CONSTANT, + TYPE_SET_SIGNAL_SEMAPHORES, + TYPE_SET_WAIT_SEMAPHORES, TYPE_UNIFORM_SET_PREPARE_FOR_USE }; @@ -77,7 +79,9 @@ public: TYPE_SET_LINE_WIDTH, TYPE_SET_PUSH_CONSTANT, TYPE_SET_SCISSOR, + TYPE_SET_SIGNAL_SEMAPHORES, TYPE_SET_VIEWPORT, + TYPE_SET_WAIT_SEMAPHORES, TYPE_UNIFORM_SET_PREPARE_FOR_USE }; @@ -218,7 +222,10 @@ public: // Created internally by RenderingDeviceGraph. LocalVector buffers; + // buffers[i-1] wait semaphores[i-1] and wait_semaphores_list[i-1], signal semaphores[i] and signal_semaphores_list[i]. LocalVector semaphores; + LocalVector> signal_semaphores_list; + LocalVector> wait_semaphores_list; uint32_t buffers_used = 0; }; @@ -610,6 +617,18 @@ private: uint32_t set_index = 0; }; + struct DrawListSemaphoresInstruction : DrawListInstruction { + uint32_t count = 0; + + _FORCE_INLINE_ RDD::SemaphoreID *semaphore_ids() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const RDD::SemaphoreID *semaphore_ids() const { + return reinterpret_cast(&this[1]); + } + }; + struct ComputeListBindPipelineInstruction : ComputeListInstruction { RDD::PipelineID pipeline; }; @@ -658,6 +677,18 @@ private: uint32_t set_index = 0; }; + struct ComputeListSemaphoresInstruction : ComputeListInstruction { + uint32_t count = 0; + + _FORCE_INLINE_ RDD::SemaphoreID *semaphore_ids() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const RDD::SemaphoreID *semaphore_ids() const { + return reinterpret_cast(&this[1]); + } + }; + struct BarrierGroup { BitField src_stages; BitField dst_stages; @@ -749,8 +780,10 @@ private: #if USE_BUFFER_BARRIERS void _add_buffer_barrier_to_command(RDD::BufferID p_buffer_id, BitField p_src_access, BitField p_dst_access, int32_t &r_barrier_index, int32_t &r_barrier_count); #endif + const ComputeListInstruction *_get_compute_list_command_instruction(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size, ComputeListInstruction::Type p_type); void _run_compute_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); void _get_draw_list_render_pass_and_framebuffer(const RecordedDrawListCommand *p_draw_list_command, RDD::RenderPassID &r_render_pass, RDD::FramebufferID &r_framebuffer); + const DrawListInstruction *_get_draw_list_command_instruction(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size, DrawListInstruction::Type p_type); void _run_draw_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); void _add_draw_list_begin(FramebufferCache *p_framebuffer_cache, RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, bool p_uses_color, bool p_uses_depth, uint32_t p_breadcrumb, bool p_split_cmd_buffer); void _run_secondary_command_buffer_task(const SecondaryCommandBuffer *p_secondary); @@ -781,6 +814,8 @@ public: void add_compute_list_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); void add_compute_list_dispatch_indirect(RDD::BufferID p_buffer, uint32_t p_offset); void add_compute_list_set_push_constant(RDD::ShaderID p_shader, const void *p_data, uint32_t p_data_size); + void add_compute_list_set_signal_semaphores(VectorView p_signal_semaphore); + void add_compute_list_set_wait_semaphores(VectorView p_wait_semaphore); void add_compute_list_uniform_set_prepare_for_use(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); void add_compute_list_usage(ResourceTracker *p_tracker, ResourceUsage p_usage); void add_compute_list_usages(VectorView p_trackers, VectorView p_usages); @@ -802,6 +837,8 @@ public: void add_draw_list_set_blend_constants(const Color &p_color); void add_draw_list_set_line_width(float p_width); void add_draw_list_set_push_constant(RDD::ShaderID p_shader, const void *p_data, uint32_t p_data_size); + void add_draw_list_set_signal_semaphores(VectorView p_signal_semaphore); + void add_draw_list_set_wait_semaphores(VectorView p_wait_semaphore); void add_draw_list_set_scissor(Rect2i p_rect); void add_draw_list_set_viewport(Rect2i p_rect); void add_draw_list_uniform_set_prepare_for_use(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index);