From 62335d1ac78f3cffd17bfb4f4337c89005364423 Mon Sep 17 00:00:00 2001 From: Skyth <19259897+blueskythlikesclouds@users.noreply.github.com> Date: Fri, 5 Dec 2025 13:17:42 +0300 Subject: [PATCH] Make each SPIR-V -> DXIL conversion thread allocate from its own heap. --- drivers/d3d12/d3d12_godot_nir_bridge.h | 4 + .../rendering_shader_container_d3d12.cpp | 109 ++++++++++++++++-- misc/scripts/install_d3d12_sdk_windows.py | 2 +- 3 files changed, 106 insertions(+), 9 deletions(-) diff --git a/drivers/d3d12/d3d12_godot_nir_bridge.h b/drivers/d3d12/d3d12_godot_nir_bridge.h index d35e3cd7493..59992bee030 100644 --- a/drivers/d3d12/d3d12_godot_nir_bridge.h +++ b/drivers/d3d12/d3d12_godot_nir_bridge.h @@ -52,6 +52,10 @@ typedef struct GodotNirCallbacks { void (*report_bitcode_bit_offset_fn)(uint64_t p_bit_offset, void *p_data); } GodotNirCallbacks; +extern void *godot_nir_malloc(size_t p_size); +extern void *godot_nir_realloc(void *p_block, size_t p_size); +extern void godot_nir_free(void *p_block); + #ifdef __cplusplus } #endif diff --git a/drivers/d3d12/rendering_shader_container_d3d12.cpp b/drivers/d3d12/rendering_shader_container_d3d12.cpp index 61493dcd616..df96f4f8b11 100644 --- a/drivers/d3d12/rendering_shader_container_d3d12.cpp +++ b/drivers/d3d12/rendering_shader_container_d3d12.cpp @@ -72,6 +72,97 @@ GODOT_GCC_WARNING_POP GODOT_CLANG_WARNING_POP GODOT_MSVC_WARNING_POP +// SPIR-V to DXIL does way too many allocations, which causes worker threads +// to bottleneck each other due to sharing the same global process heap. +// This can be solved by making each thread allocate from its own heap. +#define SPIRV_TO_DXIL_ENABLE_HEAP_PER_THREAD + +#ifdef SPIRV_TO_DXIL_ENABLE_HEAP_PER_THREAD + +namespace { +struct Win32Heap { + HANDLE handle; + SafeRefCount ref_count; + + Win32Heap() { + handle = HeapCreate(0, 0, 0); + ref_count.init(); + } + + ~Win32Heap() { + HeapDestroy(handle); + } +}; + +constexpr size_t ALLOC_HEADER_SIZE = sizeof(Win32Heap *) * 2; +} //namespace + +extern "C" { +void *godot_nir_malloc(size_t p_size) { + // This RAII helper is for allowing the heap to be destroyed when the thread quits. + struct Win32HeapHolder { + Win32Heap *win32_heap = nullptr; + + Win32HeapHolder() { + win32_heap = memnew(Win32Heap); + } + + ~Win32HeapHolder() { + if (win32_heap->ref_count.unref()) { + memdelete(win32_heap); + } + } + }; + + thread_local Win32HeapHolder holder; + + void *block = HeapAlloc(holder.win32_heap->handle, 0, p_size + ALLOC_HEADER_SIZE); + + // Store the heap in the allocation for the realloc/free operations. + *(Win32Heap **)block = holder.win32_heap; + holder.win32_heap->ref_count.ref(); + + return (uint8_t *)block + ALLOC_HEADER_SIZE; +} + +void *godot_nir_realloc(void *p_block, size_t p_size) { + uint8_t *actual_block = (uint8_t *)p_block - ALLOC_HEADER_SIZE; + Win32Heap *win32_heap = *(Win32Heap **)actual_block; + return (uint8_t *)HeapReAlloc(win32_heap->handle, 0, actual_block, p_size + ALLOC_HEADER_SIZE) + ALLOC_HEADER_SIZE; +} + +void godot_nir_free(void *p_block) { + if (p_block != nullptr) { + uint8_t *actual_block = (uint8_t *)p_block - ALLOC_HEADER_SIZE; + Win32Heap *win32_heap = *(Win32Heap **)actual_block; + HeapFree(win32_heap->handle, 0, actual_block); + + // Allocations can outlive the threads they were created in if they were stored globally. + if (win32_heap->ref_count.unref()) { + memdelete(win32_heap); + } + } +} +} + +#else + +extern "C" { +void *godot_nir_malloc(size_t p_size) { + return malloc(p_size); +} + +void *godot_nir_realloc(void *p_block, size_t p_size) { + return realloc(p_block, p_size); +} + +void godot_nir_free(void *p_block) { + return free(p_block); +} +} + +#endif + static D3D12_SHADER_VISIBILITY stages_to_d3d12_visibility(uint32_t p_stages_mask) { switch (p_stages_mask) { case RenderingDeviceCommons::SHADER_STAGE_VERTEX_BIT: @@ -274,8 +365,8 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span