mirror of https://github.com/godotengine/godot
Merge pull request #113618 from blueskythlikesclouds/spirv-to-dxil-memory-alloc-opt
Update Mesa NIR to 25.3.1 + Make each SPIR-V -> DXIL conversion thread allocate from its own heap
This commit is contained in:
commit
a6f313c755
|
|
@ -52,6 +52,10 @@ typedef struct GodotNirCallbacks {
|
|||
void (*report_bitcode_bit_offset_fn)(uint64_t p_bit_offset, void *p_data);
|
||||
} GodotNirCallbacks;
|
||||
|
||||
extern void *godot_nir_malloc(size_t p_size);
|
||||
extern void *godot_nir_realloc(void *p_block, size_t p_size);
|
||||
extern void godot_nir_free(void *p_block);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -72,6 +72,97 @@ GODOT_GCC_WARNING_POP
|
|||
GODOT_CLANG_WARNING_POP
|
||||
GODOT_MSVC_WARNING_POP
|
||||
|
||||
// SPIR-V to DXIL does way too many allocations, which causes worker threads
|
||||
// to bottleneck each other due to sharing the same global process heap.
|
||||
// This can be solved by making each thread allocate from its own heap.
|
||||
#define SPIRV_TO_DXIL_ENABLE_HEAP_PER_THREAD
|
||||
|
||||
#ifdef SPIRV_TO_DXIL_ENABLE_HEAP_PER_THREAD
|
||||
|
||||
namespace {
|
||||
struct Win32Heap {
|
||||
HANDLE handle;
|
||||
SafeRefCount ref_count;
|
||||
|
||||
Win32Heap() {
|
||||
handle = HeapCreate(0, 0, 0);
|
||||
ref_count.init();
|
||||
}
|
||||
|
||||
~Win32Heap() {
|
||||
HeapDestroy(handle);
|
||||
}
|
||||
};
|
||||
|
||||
constexpr size_t ALLOC_HEADER_SIZE = sizeof(Win32Heap *) * 2;
|
||||
} //namespace
|
||||
|
||||
extern "C" {
|
||||
void *godot_nir_malloc(size_t p_size) {
|
||||
// This RAII helper is for allowing the heap to be destroyed when the thread quits.
|
||||
struct Win32HeapHolder {
|
||||
Win32Heap *win32_heap = nullptr;
|
||||
|
||||
Win32HeapHolder() {
|
||||
win32_heap = memnew(Win32Heap);
|
||||
}
|
||||
|
||||
~Win32HeapHolder() {
|
||||
if (win32_heap->ref_count.unref()) {
|
||||
memdelete(win32_heap);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
thread_local Win32HeapHolder holder;
|
||||
|
||||
void *block = HeapAlloc(holder.win32_heap->handle, 0, p_size + ALLOC_HEADER_SIZE);
|
||||
|
||||
// Store the heap in the allocation for the realloc/free operations.
|
||||
*(Win32Heap **)block = holder.win32_heap;
|
||||
holder.win32_heap->ref_count.ref();
|
||||
|
||||
return (uint8_t *)block + ALLOC_HEADER_SIZE;
|
||||
}
|
||||
|
||||
void *godot_nir_realloc(void *p_block, size_t p_size) {
|
||||
uint8_t *actual_block = (uint8_t *)p_block - ALLOC_HEADER_SIZE;
|
||||
Win32Heap *win32_heap = *(Win32Heap **)actual_block;
|
||||
return (uint8_t *)HeapReAlloc(win32_heap->handle, 0, actual_block, p_size + ALLOC_HEADER_SIZE) + ALLOC_HEADER_SIZE;
|
||||
}
|
||||
|
||||
void godot_nir_free(void *p_block) {
|
||||
if (p_block != nullptr) {
|
||||
uint8_t *actual_block = (uint8_t *)p_block - ALLOC_HEADER_SIZE;
|
||||
Win32Heap *win32_heap = *(Win32Heap **)actual_block;
|
||||
HeapFree(win32_heap->handle, 0, actual_block);
|
||||
|
||||
// Allocations can outlive the threads they were created in if they were stored globally.
|
||||
if (win32_heap->ref_count.unref()) {
|
||||
memdelete(win32_heap);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
extern "C" {
|
||||
void *godot_nir_malloc(size_t p_size) {
|
||||
return malloc(p_size);
|
||||
}
|
||||
|
||||
void *godot_nir_realloc(void *p_block, size_t p_size) {
|
||||
return realloc(p_block, p_size);
|
||||
}
|
||||
|
||||
void godot_nir_free(void *p_block) {
|
||||
return free(p_block);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static D3D12_SHADER_VISIBILITY stages_to_d3d12_visibility(uint32_t p_stages_mask) {
|
||||
switch (p_stages_mask) {
|
||||
case RenderingDeviceCommons::SHADER_STAGE_VERTEX_BIT:
|
||||
|
|
@ -274,8 +365,8 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStag
|
|||
dxil_spirv_runtime_conf dxil_runtime_conf = {};
|
||||
dxil_runtime_conf.runtime_data_cbv.base_shader_register = RUNTIME_DATA_REGISTER;
|
||||
dxil_runtime_conf.push_constant_cbv.base_shader_register = ROOT_CONSTANT_REGISTER;
|
||||
dxil_runtime_conf.zero_based_vertex_instance_id = true;
|
||||
dxil_runtime_conf.zero_based_compute_workgroup_id = true;
|
||||
dxil_runtime_conf.first_vertex_and_base_instance_mode = DXIL_SPIRV_SYSVAL_TYPE_ZERO;
|
||||
dxil_runtime_conf.workgroup_id_mode = DXIL_SPIRV_SYSVAL_TYPE_ZERO;
|
||||
|
||||
// Explicitly keeping these false because converting UAV descriptors to SRVs do not seem to have real performance benefits on desktop GPUs.
|
||||
// It also makes it easier to implement descriptor heaps and enhanced barriers.
|
||||
|
|
@ -290,7 +381,7 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStag
|
|||
r_stages_processed.set_flag(stage_flag);
|
||||
|
||||
const char *entry_point = "main";
|
||||
static const gl_shader_stage SPIRV_TO_MESA_STAGES[RenderingDeviceCommons::SHADER_STAGE_MAX] = {
|
||||
static const mesa_shader_stage SPIRV_TO_MESA_STAGES[RenderingDeviceCommons::SHADER_STAGE_MAX] = {
|
||||
MESA_SHADER_VERTEX, // SHADER_STAGE_VERTEX
|
||||
MESA_SHADER_FRAGMENT, // SHADER_STAGE_FRAGMENT
|
||||
MESA_SHADER_TESS_CTRL, // SHADER_STAGE_TESSELATION_CONTROL
|
||||
|
|
@ -320,8 +411,8 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStag
|
|||
}
|
||||
|
||||
dxil_spirv_nir_prep(shader);
|
||||
bool requires_runtime_data = false;
|
||||
dxil_spirv_nir_passes(shader, &dxil_runtime_conf, &requires_runtime_data);
|
||||
dxil_spirv_metadata dxil_metadata = {};
|
||||
dxil_spirv_nir_passes(shader, &dxil_runtime_conf, &dxil_metadata);
|
||||
|
||||
r_stages_nir_shaders[stage] = shader;
|
||||
}
|
||||
|
|
@ -360,8 +451,8 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStag
|
|||
}
|
||||
}
|
||||
if (prev_shader) {
|
||||
bool requires_runtime_data = {};
|
||||
dxil_spirv_nir_link(shader, prev_shader, &dxil_runtime_conf, &requires_runtime_data);
|
||||
dxil_spirv_metadata dxil_metadata = {};
|
||||
dxil_spirv_nir_link(shader, prev_shader, &dxil_runtime_conf, &dxil_metadata);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -434,7 +525,9 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_dxil(Span<ReflectShaderSta
|
|||
};
|
||||
|
||||
// This structure must live as long as the shaders are alive.
|
||||
nir_shader_compiler_options compiler_options = *dxil_get_nir_compiler_options();
|
||||
nir_shader_compiler_options compiler_options = {};
|
||||
const unsigned supported_bit_sizes = 16 | 32 | 64;
|
||||
dxil_get_nir_compiler_options(&compiler_options, shader_model_d3d_to_dxil(D3D_SHADER_MODEL(REQUIRED_SHADER_MODEL)), supported_bit_sizes, supported_bit_sizes);
|
||||
compiler_options.lower_base_vertex = false;
|
||||
|
||||
// This is based on spirv2dxil.c. May need updates when it changes.
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ else:
|
|||
|
||||
# Mesa NIR
|
||||
# Check for latest version: https://github.com/godotengine/godot-nir-static/releases/latest
|
||||
mesa_version = "23.1.9-2"
|
||||
mesa_version = "25.3.1"
|
||||
# WinPixEventRuntime
|
||||
# Check for latest version: https://www.nuget.org/api/v2/package/WinPixEventRuntime (check downloaded filename)
|
||||
pix_version = "1.0.240308001"
|
||||
|
|
|
|||
Loading…
Reference in New Issue