diff --git a/modules/betsy/CrossPlatformSettings_piece_all.glsl b/modules/betsy/CrossPlatformSettings_piece_all.glsl index 001d8e63b23..663278ad6ba 100644 --- a/modules/betsy/CrossPlatformSettings_piece_all.glsl +++ b/modules/betsy/CrossPlatformSettings_piece_all.glsl @@ -61,9 +61,9 @@ #define OGRE_Load3D(tex, iuv, lod) texelFetch(tex, ivec3(iuv), lod) -#define OGRE_GatherRed(tex, sampler, uv) textureGather(tex, uv, 0) -#define OGRE_GatherGreen(tex, sampler, uv) textureGather(tex, uv, 1) -#define OGRE_GatherBlue(tex, sampler, uv) textureGather(tex, uv, 2) +#define OGRE_GatherRed(tex, uv) textureGather(tex, uv, 0) +#define OGRE_GatherGreen(tex, uv) textureGather(tex, uv, 1) +#define OGRE_GatherBlue(tex, uv) textureGather(tex, uv, 2) #define bufferFetch1(buffer, idx) texelFetch(buffer, idx).x diff --git a/modules/betsy/alpha_stitch.glsl b/modules/betsy/alpha_stitch.glsl index 4a9ee58d97b..69a87d64c05 100644 --- a/modules/betsy/alpha_stitch.glsl +++ b/modules/betsy/alpha_stitch.glsl @@ -8,17 +8,24 @@ #include "CrossPlatformSettings_piece_all.glsl" #include "UavCrossPlatform_piece_all.glsl" +layout(binding = 0) uniform texture2D srcRGB[32]; +layout(binding = 1) uniform texture2D srcAlpha[32]; +layout(binding = 2) uniform sampler SAMPLER_NEAREST_CLAMP; +layout(binding = 3, rgba32ui) uniform restrict writeonly uimage2D dstTextures[32]; + +layout(push_constant, std430) uniform Params { + uint p_textureIndex; + uint p_padding[3]; +} +params; + layout(local_size_x = 8, // local_size_y = 8, // local_size_z = 1) in; -layout(binding = 0) uniform usampler2D srcRGB; -layout(binding = 1) uniform usampler2D srcAlpha; -layout(binding = 2, rgba32ui) uniform restrict writeonly uimage2D dstTexture; - void main() { - uint2 rgbBlock = OGRE_Load2D(srcRGB, int2(gl_GlobalInvocationID.xy), 0).xy; - uint2 alphaBlock = OGRE_Load2D(srcAlpha, int2(gl_GlobalInvocationID.xy), 0).xy; + float2 rgbBlock = OGRE_Load2D(sampler2D(srcRGB[params.p_textureIndex], SAMPLER_NEAREST_CLAMP), int2(gl_GlobalInvocationID.xy), 0).xy; + float2 alphaBlock = OGRE_Load2D(sampler2D(srcAlpha[params.p_textureIndex], SAMPLER_NEAREST_CLAMP), int2(gl_GlobalInvocationID.xy), 0).xy; - imageStore(dstTexture, int2(gl_GlobalInvocationID.xy), uint4(rgbBlock.xy, alphaBlock.xy)); + imageStore(dstTextures[params.p_textureIndex], int2(gl_GlobalInvocationID.xy), floatBitsToUint(float4(rgbBlock.xy, alphaBlock.xy))); } diff --git a/modules/betsy/bc1.glsl b/modules/betsy/bc1.glsl index f1b2c282547..6f9eed2de59 100644 --- a/modules/betsy/bc1.glsl +++ b/modules/betsy/bc1.glsl @@ -11,17 +11,19 @@ dithered = "#define BC1_DITHER"; #define FLT_MAX 340282346638528859811704183484516925440.0f -layout(binding = 0) uniform sampler2D srcTex; -layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture; +layout(binding = 0) uniform texture2D srcTextures[32]; +layout(binding = 1) uniform sampler SAMPLER_NEAREST_CLAMP; +layout(binding = 2, rgba32ui) uniform restrict writeonly uimage2D dstTextures[32]; -layout(std430, binding = 2) readonly restrict buffer globalBuffer { +layout(std430, binding = 3) readonly restrict buffer globalBuffer { float2 c_oMatch5[256]; float2 c_oMatch6[256]; }; layout(push_constant, std430) uniform Params { uint p_numRefinements; - uint p_padding[3]; + uint p_textureIndex; + uint p_padding[2]; } params; @@ -414,7 +416,7 @@ void main() { const uint2 pixelsToLoadBase = gl_GlobalInvocationID.xy << 2u; for (uint i = 0u; i < 16u; ++i) { const uint2 pixelsToLoad = pixelsToLoadBase + uint2(i & 0x03u, i >> 2u); - const float3 srcPixels0 = OGRE_Load2D(srcTex, int2(pixelsToLoad), 0).xyz; + const float3 srcPixels0 = OGRE_Load2D(sampler2D(srcTextures[params.p_textureIndex], SAMPLER_NEAREST_CLAMP), int2(pixelsToLoad), 0).xyz; srcPixelsBlock[i] = packUnorm4x8(float4(srcPixels0, 1.0f)); bAllColorsEqual = bAllColorsEqual && srcPixelsBlock[0] == srcPixelsBlock[i]; } @@ -479,5 +481,5 @@ void main() { outputBytes.y = mask; uint2 dstUV = gl_GlobalInvocationID.xy; - imageStore(dstTexture, int2(dstUV), uint4(outputBytes.xy, 0u, 0u)); + imageStore(dstTextures[params.p_textureIndex], int2(dstUV), uint4(outputBytes.xy, 0u, 0u)); } diff --git a/modules/betsy/bc4.glsl b/modules/betsy/bc4.glsl index b7a5f6a6867..efa99eaa979 100644 --- a/modules/betsy/bc4.glsl +++ b/modules/betsy/bc4.glsl @@ -14,12 +14,14 @@ signed = "#define SNORM"; shared float2 g_minMaxValues[4u * 4u * 4u]; shared uint2 g_mask[4u * 4u]; -layout(binding = 0) uniform sampler2D srcTex; -layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture; +layout(binding = 0) uniform texture2D srcTextures[32]; +layout(binding = 1) uniform sampler SAMPLER_NEAREST_CLAMP; +layout(binding = 2, rgba32ui) uniform restrict writeonly uimage2D dstTextures[32]; layout(push_constant, std430) uniform Params { uint p_channelIdx; - uint p_padding[3]; + uint p_textureIndex; + uint p_padding[2]; } params; @@ -50,7 +52,7 @@ void main() { for (uint i = 0u; i < 4u; ++i) { const uint2 pixelsToLoad = pixelsToLoadBase + uint2(i, blockThreadId); - const float4 value = OGRE_Load2D(srcTex, int2(pixelsToLoad), 0).xyzw; + const float4 value = OGRE_Load2D(sampler2D(srcTextures[params.p_textureIndex], SAMPLER_NEAREST_CLAMP), int2(pixelsToLoad), 0).xyzw; srcPixel[i] = params.p_channelIdx == 0 ? value.x : (params.p_channelIdx == 1 ? value.y : value.w); srcPixel[i] *= 255.0f; } @@ -146,6 +148,6 @@ void main() { outputBytes.y = g_mask[maskIdxBase].y; uint2 dstUV = gl_GlobalInvocationID.yz; - imageStore(dstTexture, int2(dstUV), uint4(outputBytes.xy, 0u, 0u)); + imageStore(dstTextures[params.p_textureIndex], int2(dstUV), uint4(outputBytes.xy, 0u, 0u)); } } diff --git a/modules/betsy/bc6h.glsl b/modules/betsy/bc6h.glsl index 37e7591aea2..ca5a7a7d3a1 100644 --- a/modules/betsy/bc6h.glsl +++ b/modules/betsy/bc6h.glsl @@ -31,12 +31,13 @@ float f16tof32(uint value) { return unpackHalf2x16(value.x).x; } -layout(binding = 0) uniform sampler2D srcTexture; -layout(binding = 1, rgba32ui) uniform restrict writeonly uimage2D dstTexture; +layout(binding = 0) uniform texture2D srcTextures[32]; +layout(binding = 1) uniform sampler SAMPLER_NEAREST_CLAMP; +layout(binding = 2, rgba32ui) uniform restrict writeonly uimage2D dstTextures[32]; layout(push_constant, std430) uniform Params { float2 p_textureSizeRcp; - uint padding0; + uint p_textureIndex; uint padding1; } params; @@ -663,18 +664,18 @@ void main() { float2 block1UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 0.0f); float2 block2UV = uv + float2(0.0f, 2.0f * params.p_textureSizeRcp.y); float2 block3UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 2.0f * params.p_textureSizeRcp.y); - float4 block0X = OGRE_GatherRed(srcTexture, pointSampler, block0UV); - float4 block1X = OGRE_GatherRed(srcTexture, pointSampler, block1UV); - float4 block2X = OGRE_GatherRed(srcTexture, pointSampler, block2UV); - float4 block3X = OGRE_GatherRed(srcTexture, pointSampler, block3UV); - float4 block0Y = OGRE_GatherGreen(srcTexture, pointSampler, block0UV); - float4 block1Y = OGRE_GatherGreen(srcTexture, pointSampler, block1UV); - float4 block2Y = OGRE_GatherGreen(srcTexture, pointSampler, block2UV); - float4 block3Y = OGRE_GatherGreen(srcTexture, pointSampler, block3UV); - float4 block0Z = OGRE_GatherBlue(srcTexture, pointSampler, block0UV); - float4 block1Z = OGRE_GatherBlue(srcTexture, pointSampler, block1UV); - float4 block2Z = OGRE_GatherBlue(srcTexture, pointSampler, block2UV); - float4 block3Z = OGRE_GatherBlue(srcTexture, pointSampler, block3UV); + float4 block0X = OGRE_GatherRed(sampler2D(srcTextures[params.p_textureIndex], SAMPLER_NEAREST_CLAMP), block0UV); + float4 block1X = OGRE_GatherRed(sampler2D(srcTextures[params.p_textureIndex], SAMPLER_NEAREST_CLAMP), block1UV); + float4 block2X = OGRE_GatherRed(sampler2D(srcTextures[params.p_textureIndex], SAMPLER_NEAREST_CLAMP), block2UV); + float4 block3X = OGRE_GatherRed(sampler2D(srcTextures[params.p_textureIndex], SAMPLER_NEAREST_CLAMP), block3UV); + float4 block0Y = OGRE_GatherGreen(sampler2D(srcTextures[params.p_textureIndex], SAMPLER_NEAREST_CLAMP), block0UV); + float4 block1Y = OGRE_GatherGreen(sampler2D(srcTextures[params.p_textureIndex], SAMPLER_NEAREST_CLAMP), block1UV); + float4 block2Y = OGRE_GatherGreen(sampler2D(srcTextures[params.p_textureIndex], SAMPLER_NEAREST_CLAMP), block2UV); + float4 block3Y = OGRE_GatherGreen(sampler2D(srcTextures[params.p_textureIndex], SAMPLER_NEAREST_CLAMP), block3UV); + float4 block0Z = OGRE_GatherBlue(sampler2D(srcTextures[params.p_textureIndex], SAMPLER_NEAREST_CLAMP), block0UV); + float4 block1Z = OGRE_GatherBlue(sampler2D(srcTextures[params.p_textureIndex], SAMPLER_NEAREST_CLAMP), block1UV); + float4 block2Z = OGRE_GatherBlue(sampler2D(srcTextures[params.p_textureIndex], SAMPLER_NEAREST_CLAMP), block2UV); + float4 block3Z = OGRE_GatherBlue(sampler2D(srcTextures[params.p_textureIndex], SAMPLER_NEAREST_CLAMP), block3UV); float3 texels[16]; texels[0] = float3(block0X.w, block0Y.w, block0Z.w); @@ -715,5 +716,5 @@ void main() { EncodeP2Pattern(block, blockMSLE, bestPattern, texels); #endif - imageStore(dstTexture, int2(gl_GlobalInvocationID.xy), block); + imageStore(dstTextures[params.p_textureIndex], int2(gl_GlobalInvocationID.xy), block); } diff --git a/modules/betsy/image_compress_betsy.cpp b/modules/betsy/image_compress_betsy.cpp index 4138ad7679b..0e5ea8c204c 100644 --- a/modules/betsy/image_compress_betsy.cpp +++ b/modules/betsy/image_compress_betsy.cpp @@ -133,6 +133,28 @@ void BetsyCompressor::_init() { src_sampler = compress_rd->sampler_create(src_sampler_state); + RD::TextureFormat default_format; + { + default_format.array_layers = 1; + default_format.width = 1; + default_format.height = 1; + default_format.depth = 1; + default_format.mipmaps = 1; + default_format.texture_type = RD::TEXTURE_TYPE_2D; + default_format.format = RD::DATA_FORMAT_R8_UINT; + default_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + } + + Vector> default_data; + default_data.resize(1); + default_data.write[0].resize(1); + + default_tex = compress_rd->texture_create(default_format, RD::TextureView(), default_data); + + default_format.format = RD::DATA_FORMAT_R32G32B32A32_UINT; + default_format.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; + default_image = compress_rd->texture_create(default_format, RD::TextureView()); + // Initialize RDShaderFiles. { Ref bc1_shader; @@ -252,6 +274,8 @@ void BetsyCompressor::_thread_exit() { } compress_rd->free(src_sampler); + compress_rd->free(default_image); + compress_rd->free(default_tex); // Clear the shader cache, pipelines will be unreferenced automatically. for (int i = 0; i < BETSY_SHADER_MAX; i++) { @@ -282,7 +306,7 @@ void BetsyCompressor::finish() { // Helper functions. -static int get_next_multiple(int n, int m) { +static inline int get_next_multiple(int n, int m) { return n + (m - (n % m)); } @@ -417,19 +441,6 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) { dst_texture_format.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; dst_texture_format.format = dst_rd_format; - RD::TextureFormat dst_texture_format_alpha; - RD::TextureFormat dst_texture_format_combined; - - if (needs_alpha_block) { - dst_texture_format_combined = dst_texture_format; - dst_texture_format_combined.format = RD::DATA_FORMAT_R32G32B32A32_UINT; - - dst_texture_format.usage_bits |= RD::TEXTURE_USAGE_SAMPLING_BIT; - - dst_texture_format_alpha = dst_texture_format; - dst_texture_format_alpha.format = RD::DATA_FORMAT_R32G32_UINT; - } - // Encoding table setup. if ((dest_format == Image::FORMAT_DXT1 || dest_format == Image::FORMAT_DXT5) && dxt1_encoding_table_buffer.is_null()) { Vector data; @@ -441,70 +452,120 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) { const int mip_count = r_img->get_mipmap_count() + 1; - // Container for the compressed data. - Vector dst_data; - dst_data.resize(Image::get_image_data_size(r_img->get_width(), r_img->get_height(), dest_format, r_img->has_mipmaps())); - uint8_t *dst_data_ptr = dst_data.ptrw(); + Vector mipmaps; - Vector> src_images; - src_images.push_back(Vector()); - Vector *src_image_ptr = src_images.ptrw(); + // First pass: Prepare the mipmaps. + { + Vector> src_images; + src_images.push_back(Vector()); + Vector *src_image_ptr = src_images.ptrw(); - // Compress each mipmap. - for (int i = 0; i < mip_count; i++) { - int64_t ofs, size; - int width, height; - r_img->get_mipmap_offset_size_and_dimensions(i, ofs, size, width, height); + RD::TextureFormat dst_texture_format_combined = dst_texture_format; + dst_texture_format_combined.format = RD::DATA_FORMAT_R32G32B32A32_UINT; - // Set the source texture width and size. - src_texture_format.height = height; - src_texture_format.width = width; + if (needs_alpha_block) { + dst_texture_format.usage_bits |= RD::TEXTURE_USAGE_SAMPLING_BIT; + } - // Set the destination texture width and size. - dst_texture_format.height = (height + 3) >> 2; - dst_texture_format.width = (width + 3) >> 2; + RD::TextureFormat dst_texture_format_alpha = dst_texture_format; + dst_texture_format_alpha.format = RD::DATA_FORMAT_R32G32_UINT; - // Create a buffer filled with the source mip layer data. - src_image_ptr[0].resize(size); - memcpy(src_image_ptr[0].ptrw(), r_img->ptr() + ofs, size); + for (int i = 0; i < mip_count; i++) { + int64_t ofs, size; + int width, height; + r_img->get_mipmap_offset_size_and_dimensions(i, ofs, size, width, height); - // Create the textures on the GPU. - RID src_texture = compress_rd->texture_create(src_texture_format, RD::TextureView(), src_images); - RID dst_texture_primary = compress_rd->texture_create(dst_texture_format, RD::TextureView()); + // Set the source texture width and size. + src_texture_format.height = height; + src_texture_format.width = width; - { - Vector uniforms; + // Set the destination texture width and size. + dst_texture_format.height = (height + 3) >> 2; + dst_texture_format.width = (width + 3) >> 2; + + dst_texture_format_combined.height = dst_texture_format.height; + dst_texture_format_combined.width = dst_texture_format.width; + + dst_texture_format_alpha.height = dst_texture_format.height; + dst_texture_format_alpha.width = dst_texture_format.width; + + // Create a buffer filled with the source mip layer data. + src_image_ptr[0].resize(size); + memcpy(src_image_ptr[0].ptrw(), r_img->ptr() + ofs, size); + + // Create the textures on the GPU. + BetsyMipmap mipmap; { - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; - u.binding = 0; - u.append_id(src_sampler); - u.append_id(src_texture); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 1; - u.append_id(dst_texture_primary); - uniforms.push_back(u); - } - - if (dest_format == Image::FORMAT_DXT1 || dest_format == Image::FORMAT_DXT5) { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 2; - u.append_id(dxt1_encoding_table_buffer); - uniforms.push_back(u); + mipmap.src_texture = compress_rd->texture_create(src_texture_format, RD::TextureView(), src_images); + if (needs_alpha_block) { + mipmap.dst_temp_primary_texture = compress_rd->texture_create(dst_texture_format, RD::TextureView()); + mipmap.dst_temp_second_texture = compress_rd->texture_create(dst_texture_format_alpha, RD::TextureView()); + mipmap.dst_texture = compress_rd->texture_create(dst_texture_format_combined, RD::TextureView()); + } else { + mipmap.dst_texture = compress_rd->texture_create(dst_texture_format, RD::TextureView()); } + mipmap.width = width; + mipmap.height = height; } - RID uniform_set = compress_rd->uniform_set_create(uniforms, shader.compiled, 0); - RD::ComputeListID compute_list = compress_rd->compute_list_begin(); + mipmaps.push_back(mipmap); + } + } - compress_rd->compute_list_bind_compute_pipeline(compute_list, shader.pipeline); - compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0); + // Second pass: Compress the mipmaps concurrently. + { + Vector uniforms; + { + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 0; + for (int i = 0; i < 32; i++) { + u.append_id(i < mip_count ? mipmaps[i].src_texture : default_tex); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 1; + u.append_id(src_sampler); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + if (needs_alpha_block) { + for (int i = 0; i < 32; i++) { + u.append_id(i < mip_count ? mipmaps[i].dst_temp_primary_texture : default_image); + } + } else { + for (int i = 0; i < 32; i++) { + u.append_id(i < mip_count ? mipmaps[i].dst_texture : default_image); + } + } + uniforms.push_back(u); + } + + if (dest_format == Image::FORMAT_DXT1 || dest_format == Image::FORMAT_DXT5) { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 3; + u.append_id(dxt1_encoding_table_buffer); + uniforms.push_back(u); + } + } + + RID uniform_set = compress_rd->uniform_set_create(uniforms, shader.compiled, 0); + RD::ComputeListID compute_list = compress_rd->compute_list_begin(); + + compress_rd->compute_list_bind_compute_pipeline(compute_list, shader.pipeline); + compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0); + + for (int i = 0; i < mip_count; i++) { + const int width = mipmaps[i].width; + const int height = mipmaps[i].height; switch (shader_type) { case BETSY_SHADER_BC6_SIGNED: @@ -512,6 +573,7 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) { BC6PushConstant push_constant; push_constant.sizeX = 1.0f / width; push_constant.sizeY = 1.0f / height; + push_constant.index = i; compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC6PushConstant)); compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1); @@ -520,6 +582,7 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) { case BETSY_SHADER_BC1_STANDARD: { BC1PushConstant push_constant; push_constant.num_refines = 2; + push_constant.index = i; compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC1PushConstant)); compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1); @@ -528,6 +591,7 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) { case BETSY_SHADER_BC4_UNSIGNED: { BC4PushConstant push_constant; push_constant.channel_idx = 0; + push_constant.index = i; compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC4PushConstant)); compress_rd->compute_list_dispatch(compute_list, 1, get_next_multiple(width, 16) / 16, get_next_multiple(height, 16) / 16); @@ -536,126 +600,161 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) { default: { } break; } - - compress_rd->compute_list_end(); - - if (!needs_alpha_block) { - compress_rd->submit(); - compress_rd->sync(); - } } - RID dst_texture_rid = dst_texture_primary; - - if (needs_alpha_block) { - // Set the destination texture width and size. - dst_texture_format_alpha.height = (height + 3) >> 2; - dst_texture_format_alpha.width = (width + 3) >> 2; - - RID dst_texture_alpha = compress_rd->texture_create(dst_texture_format_alpha, RD::TextureView()); + compress_rd->compute_list_end(); + } + if (needs_alpha_block) { + // Third pass: Compress the alpha channel. + { + Vector uniforms; { - Vector uniforms; { - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; - u.binding = 0; - u.append_id(src_sampler); - u.append_id(src_texture); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 1; - u.append_id(dst_texture_alpha); - uniforms.push_back(u); + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 0; + for (int i = 0; i < 32; i++) { + u.append_id(i < mip_count ? mipmaps[i].src_texture : default_tex); } + uniforms.push_back(u); } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 1; + u.append_id(src_sampler); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + for (int i = 0; i < 32; i++) { + u.append_id(i < mip_count ? mipmaps[i].dst_temp_second_texture : default_image); + } + uniforms.push_back(u); + } + } - RID uniform_set = compress_rd->uniform_set_create(uniforms, secondary_shader.compiled, 0); - RD::ComputeListID compute_list = compress_rd->compute_list_begin(); + RID uniform_set = compress_rd->uniform_set_create(uniforms, secondary_shader.compiled, 0); + RD::ComputeListID compute_list = compress_rd->compute_list_begin(); - compress_rd->compute_list_bind_compute_pipeline(compute_list, secondary_shader.pipeline); - compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0); + compress_rd->compute_list_bind_compute_pipeline(compute_list, secondary_shader.pipeline); + compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0); + + for (int i = 0; i < mip_count; i++) { + const int width = mipmaps[i].width; + const int height = mipmaps[i].height; BC4PushConstant push_constant; push_constant.channel_idx = dest_format == Image::FORMAT_DXT5 ? 3 : 1; + push_constant.index = i; compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC4PushConstant)); compress_rd->compute_list_dispatch(compute_list, 1, get_next_multiple(width, 16) / 16, get_next_multiple(height, 16) / 16); - - compress_rd->compute_list_end(); } - // Stitching - - // Set the destination texture width and size. - dst_texture_format_combined.height = (height + 3) >> 2; - dst_texture_format_combined.width = (width + 3) >> 2; - - RID dst_texture_combined = compress_rd->texture_create(dst_texture_format_combined, RD::TextureView()); - - { - Vector uniforms; - { - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; - u.binding = 0; - u.append_id(src_sampler); - u.append_id(dest_format == Image::FORMAT_DXT5 ? dst_texture_alpha : dst_texture_primary); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; - u.binding = 1; - u.append_id(src_sampler); - u.append_id(dest_format == Image::FORMAT_DXT5 ? dst_texture_primary : dst_texture_alpha); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 2; - u.append_id(dst_texture_combined); - uniforms.push_back(u); - } - } - - RID uniform_set = compress_rd->uniform_set_create(uniforms, stitch_shader.compiled, 0); - RD::ComputeListID compute_list = compress_rd->compute_list_begin(); - - compress_rd->compute_list_bind_compute_pipeline(compute_list, stitch_shader.pipeline); - compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0); - compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1); - - compress_rd->compute_list_end(); - - compress_rd->submit(); - compress_rd->sync(); - } - - dst_texture_rid = dst_texture_combined; - - compress_rd->free(dst_texture_primary); - compress_rd->free(dst_texture_alpha); + compress_rd->compute_list_end(); } - // Copy data from the GPU to the buffer. - const Vector texture_data = compress_rd->texture_get_data(dst_texture_rid, 0); + // Fourth pass: Stitch the base and alpha channels. + { + Vector uniforms; + { + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 0; + if (dest_format == Image::FORMAT_DXT5) { + for (int i = 0; i < 32; i++) { + u.append_id(i < mip_count ? mipmaps[i].dst_temp_second_texture : default_tex); + } + } else { + for (int i = 0; i < 32; i++) { + u.append_id(i < mip_count ? mipmaps[i].dst_temp_primary_texture : default_tex); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 1; + if (dest_format == Image::FORMAT_DXT5) { + for (int i = 0; i < 32; i++) { + u.append_id(i < mip_count ? mipmaps[i].dst_temp_primary_texture : default_tex); + } + } else { + for (int i = 0; i < 32; i++) { + u.append_id(i < mip_count ? mipmaps[i].dst_temp_second_texture : default_tex); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 2; + u.append_id(src_sampler); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 3; + for (int i = 0; i < 32; i++) { + u.append_id(i < mip_count ? mipmaps[i].dst_texture : default_image); + } + uniforms.push_back(u); + } + } + + RID uniform_set = compress_rd->uniform_set_create(uniforms, stitch_shader.compiled, 0); + RD::ComputeListID compute_list = compress_rd->compute_list_begin(); + + compress_rd->compute_list_bind_compute_pipeline(compute_list, stitch_shader.pipeline); + compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0); + + for (int i = 0; i < mip_count; i++) { + const int width = mipmaps[i].width; + const int height = mipmaps[i].height; + + StitchPushConstant push_constant; + push_constant.index = i; + + compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(StitchPushConstant)); + compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1); + } + + compress_rd->compute_list_end(); + } + } + + compress_rd->submit(); + compress_rd->sync(); + + // Container for the compressed data. + Vector dst_data; + dst_data.resize(Image::get_image_data_size(r_img->get_width(), r_img->get_height(), dest_format, r_img->has_mipmaps())); + uint8_t *dst_data_ptr = dst_data.ptrw(); + + // Copy data from the GPU to the buffer. + for (int i = 0; i < mip_count; i++) { + const Vector texture_data = compress_rd->texture_get_data(mipmaps[i].dst_texture, 0); int64_t dst_ofs = Image::get_image_mipmap_offset(r_img->get_width(), r_img->get_height(), dest_format, i); memcpy(dst_data_ptr + dst_ofs, texture_data.ptr(), texture_data.size()); - // Free the source and dest texture. - compress_rd->free(src_texture); - compress_rd->free(dst_texture_rid); - } + // Clear the textures. + compress_rd->free(mipmaps[i].src_texture); + compress_rd->free(mipmaps[i].dst_texture); - src_images.clear(); + if (needs_alpha_block) { + compress_rd->free(mipmaps[i].dst_temp_primary_texture); + compress_rd->free(mipmaps[i].dst_temp_second_texture); + } + } // Set the compressed data to the image. r_img->set_data(r_img->get_width(), r_img->get_height(), r_img->has_mipmaps(), dest_format, dst_data); diff --git a/modules/betsy/image_compress_betsy.h b/modules/betsy/image_compress_betsy.h index 841b6f61f9c..ff02683837d 100644 --- a/modules/betsy/image_compress_betsy.h +++ b/modules/betsy/image_compress_betsy.h @@ -73,16 +73,24 @@ enum BetsyShaderType { struct BC6PushConstant { float sizeX; float sizeY; - uint32_t padding[2] = { 0 }; + uint32_t index; + uint32_t padding = 0; }; struct BC1PushConstant { uint32_t num_refines; - uint32_t padding[3] = { 0 }; + uint32_t index; + uint32_t padding[2] = { 0 }; }; struct BC4PushConstant { uint32_t channel_idx; + uint32_t index; + uint32_t padding[2] = { 0 }; +}; + +struct StitchPushConstant { + uint32_t index; uint32_t padding[3] = { 0 }; }; @@ -101,11 +109,22 @@ class BetsyCompressor : public Object { RID pipeline; }; + struct BetsyMipmap { + RID src_texture; + RID dst_temp_primary_texture; + RID dst_temp_second_texture; + RID dst_texture; + uint32_t width; + uint32_t height; + }; + // Resources shared by all compression formats. RenderingDevice *compress_rd = nullptr; RenderingContextDriver *compress_rcd = nullptr; BetsyShader cached_shaders[BETSY_SHADER_MAX]; RID src_sampler; + RID default_tex; + RID default_image; // Format-specific resources. RID dxt1_encoding_table_buffer;