From e674379764e0864a001a0859bf3ee36eef4a8340 Mon Sep 17 00:00:00 2001 From: HP van Braam Date: Thu, 12 Dec 2024 17:21:15 +0100 Subject: [PATCH] Fix several ubsan reported misaligned accesses These misaligned accesses are shown in all of our CI hooks. It turned out to not be difficult to fix. It is likely that this will improve performance for aarch64. --- drivers/vulkan/rendering_device_driver_vulkan.cpp | 11 +++++++---- drivers/vulkan/rendering_device_driver_vulkan.h | 3 ++- servers/rendering/rendering_device_graph.cpp | 1 + 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index ae6cddc987f..5917c41de86 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -3441,7 +3441,7 @@ Vector RenderingDeviceDriverVulkan::shader_compile_binary_from_spirv(Ve binary_data.shader_name_len = shader_name_utf.length(); - uint32_t total_size = sizeof(uint32_t) * 3; // Header + version + main datasize;. + uint32_t total_size = sizeof(uint32_t) * 4; // Header + version + pad + main datasize;. total_size += sizeof(ShaderBinary::Data); total_size += STEPIFY(binary_data.shader_name_len, 4); @@ -3470,6 +3470,8 @@ Vector RenderingDeviceDriverVulkan::shader_compile_binary_from_spirv(Ve offset += sizeof(uint32_t); encode_uint32(sizeof(ShaderBinary::Data), binptr + offset); offset += sizeof(uint32_t); + encode_uint32(0, binptr + offset); // Pad to align ShaderBinary::Data to 8 bytes. + offset += sizeof(uint32_t); memcpy(binptr + offset, &binary_data, sizeof(ShaderBinary::Data)); offset += sizeof(ShaderBinary::Data); @@ -3528,7 +3530,7 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec uint32_t read_offset = 0; // Consistency check. - ERR_FAIL_COND_V(binsize < sizeof(uint32_t) * 3 + sizeof(ShaderBinary::Data), ShaderID()); + ERR_FAIL_COND_V(binsize < sizeof(uint32_t) * 4 + sizeof(ShaderBinary::Data), ShaderID()); ERR_FAIL_COND_V(binptr[0] != 'G' || binptr[1] != 'S' || binptr[2] != 'B' || binptr[3] != 'D', ShaderID()); uint32_t bin_version = decode_uint32(binptr + 4); @@ -3536,7 +3538,8 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec uint32_t bin_data_size = decode_uint32(binptr + 8); - const ShaderBinary::Data &binary_data = *(reinterpret_cast(binptr + 12)); + // 16, not 12, to skip alignment padding. + const ShaderBinary::Data &binary_data = *(reinterpret_cast(binptr + 16)); r_shader_desc.push_constant_size = binary_data.push_constant_size; shader_info.vk_push_constant_stages = binary_data.vk_push_constant_stages_mask; @@ -3549,7 +3552,7 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec r_shader_desc.compute_local_size[1] = binary_data.compute_local_size[1]; r_shader_desc.compute_local_size[2] = binary_data.compute_local_size[2]; - read_offset += sizeof(uint32_t) * 3 + bin_data_size; + read_offset += sizeof(uint32_t) * 4 + bin_data_size; if (binary_data.shader_name_len) { r_name.parse_utf8((const char *)(binptr + read_offset), binary_data.shader_name_len); diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index ba478483450..4eec7547f50 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -405,7 +405,8 @@ private: // Version 2: Added shader name. // Version 3: Added writable. // Version 4: 64-bit vertex input mask. - static const uint32_t VERSION = 4; + // Version 5: Add 4 bytes padding to align the Data struct after the change in version 4. + static const uint32_t VERSION = 5; struct DataBinding { uint32_t type = 0; diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index 5b17657d9f7..6f17fde2534 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -228,6 +228,7 @@ int32_t RenderingDeviceGraph::_add_to_write_list(int32_t p_command_index, Rect2i RenderingDeviceGraph::RecordedCommand *RenderingDeviceGraph::_allocate_command(uint32_t p_command_size, int32_t &r_command_index) { uint32_t command_data_offset = command_data.size(); + command_data_offset = STEPIFY(command_data_offset, 8); command_data_offsets.push_back(command_data_offset); command_data.resize(command_data_offset + p_command_size); r_command_index = command_count++;