1
0
Fork 0

Merge pull request #70065 from clayjohn/GLES3-attribs

Use instanced array buffer instead of UBO for canvas item batching
This commit is contained in:
Rémi Verschelde 2022-12-15 18:45:07 +01:00 committed by GitHub
commit 47ef0549ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 235 additions and 207 deletions

View File

@ -568,9 +568,8 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou
_new_batch(batch_broken, index);
// Override the start position and index as we want to start from where we finished off last time.
state.canvas_instance_batches[state.current_batch_index].start = r_last_index * sizeof(InstanceData);
state.canvas_instance_batches[state.current_batch_index].start = r_last_index;
index = 0;
_align_instance_data_buffer(index);
for (int i = 0; i < p_item_count; i++) {
Item *ci = items[i];
@ -630,14 +629,14 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou
}
// Copy over all data needed for rendering.
glBindBuffer(GL_UNIFORM_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].ubo);
glBindBuffer(GL_ARRAY_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].buffer);
#ifdef WEB_ENABLED
glBufferSubData(GL_UNIFORM_BUFFER, r_last_index * sizeof(InstanceData), sizeof(InstanceData) * index, state.instance_data_array);
glBufferSubData(GL_ARRAY_BUFFER, r_last_index * sizeof(InstanceData), sizeof(InstanceData) * index, state.instance_data_array);
#else
// On Desktop and mobile we map the memory without synchronizing for maximum speed.
void *ubo = glMapBufferRange(GL_UNIFORM_BUFFER, r_last_index * sizeof(InstanceData), index * sizeof(InstanceData), GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
memcpy(ubo, state.instance_data_array, index * sizeof(InstanceData));
glUnmapBuffer(GL_UNIFORM_BUFFER);
void *buffer = glMapBufferRange(GL_ARRAY_BUFFER, r_last_index * sizeof(InstanceData), index * sizeof(InstanceData), GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
memcpy(buffer, state.instance_data_array, index * sizeof(InstanceData));
glUnmapBuffer(GL_ARRAY_BUFFER);
#endif
glDisable(GL_SCISSOR_TEST);
@ -664,7 +663,17 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou
uint64_t specialization = 0;
specialization |= uint64_t(state.canvas_instance_batches[i].lights_disabled);
specialization |= uint64_t(!GLES3::Config::get_singleton()->float_texture_supported) << 1;
bool success = _bind_material(material_data, variant, specialization);
RID shader_version = data.canvas_shader_default_version;
if (material_data) {
if (material_data->shader_data->version.is_valid() && material_data->shader_data->valid) {
// Bind uniform buffer and textures
material_data->bind_uniforms();
shader_version = material_data->shader_data->version;
}
}
bool success = GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(shader_version, variant, specialization);
if (!success) {
continue;
}
@ -1217,26 +1226,15 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) {
_bind_canvas_texture(state.canvas_instance_batches[p_index].tex, state.canvas_instance_batches[p_index].filter, state.canvas_instance_batches[p_index].repeat);
// Bind the region of the UBO used by this batch.
// If region exceeds the boundary of the UBO, just ignore.
uint32_t range_bytes = data.max_instances_per_batch * sizeof(InstanceData);
if (state.canvas_instance_batches[p_index].start >= (data.max_instances_per_ubo - 1) * sizeof(InstanceData)) {
return;
} else if (state.canvas_instance_batches[p_index].start >= (data.max_instances_per_ubo - data.max_instances_per_batch) * sizeof(InstanceData)) {
// If we have less than a full batch at the end, we can just draw it anyway.
// OpenGL will complain about the UBO being smaller than expected, but it should render fine.
range_bytes = (data.max_instances_per_ubo - 1) * sizeof(InstanceData) - state.canvas_instance_batches[p_index].start;
}
uint32_t range_start = state.canvas_instance_batches[p_index].start;
glBindBufferRange(GL_UNIFORM_BUFFER, INSTANCE_UNIFORM_LOCATION, state.canvas_instance_data_buffers[state.current_buffer].ubo, range_start, range_bytes);
switch (state.canvas_instance_batches[p_index].command_type) {
case Item::Command::TYPE_RECT:
case Item::Command::TYPE_NINEPATCH: {
glBindVertexArray(data.indexed_quad_array);
glDrawElements(GL_TRIANGLES, state.canvas_instance_batches[p_index].instance_count * 6, GL_UNSIGNED_INT, 0);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
glBindBuffer(GL_ARRAY_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].buffer);
uint32_t range_start = state.canvas_instance_batches[p_index].start * sizeof(InstanceData);
_enable_attributes(range_start, false);
glDrawElementsInstanced(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0, state.canvas_instance_batches[p_index].instance_count);
glBindVertexArray(0);
} break;
@ -1248,18 +1246,21 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) {
ERR_FAIL_COND(!pb);
glBindVertexArray(pb->vertex_array);
glBindBuffer(GL_ARRAY_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].buffer);
uint32_t range_start = state.canvas_instance_batches[p_index].start * sizeof(InstanceData);
_enable_attributes(range_start, false);
if (pb->color_disabled && pb->color != Color(1.0, 1.0, 1.0, 1.0)) {
glVertexAttrib4f(RS::ARRAY_COLOR, pb->color.r, pb->color.g, pb->color.b, pb->color.a);
}
if (pb->index_buffer != 0) {
glDrawElements(prim[polygon->primitive], pb->count, GL_UNSIGNED_INT, nullptr);
glDrawElementsInstanced(prim[polygon->primitive], pb->count, GL_UNSIGNED_INT, nullptr, 1);
} else {
glDrawArrays(prim[polygon->primitive], 0, pb->count);
glDrawArraysInstanced(prim[polygon->primitive], 0, pb->count, 1);
}
glBindVertexArray(0);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
if (pb->color_disabled && pb->color != Color(1.0, 1.0, 1.0, 1.0)) {
// Reset so this doesn't pollute other draw calls.
@ -1269,14 +1270,16 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) {
case Item::Command::TYPE_PRIMITIVE: {
glBindVertexArray(data.canvas_quad_array);
glBindBuffer(GL_ARRAY_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].buffer);
uint32_t range_start = state.canvas_instance_batches[p_index].start * sizeof(InstanceData);
_enable_attributes(range_start, true);
const GLenum primitive[5] = { GL_POINTS, GL_POINTS, GL_LINES, GL_TRIANGLES, GL_TRIANGLES };
int instance_count = state.canvas_instance_batches[p_index].instance_count;
if (instance_count > 1) {
ERR_FAIL_COND(instance_count <= 0);
if (instance_count >= 1) {
glDrawArraysInstanced(primitive[state.canvas_instance_batches[p_index].primitive_points], 0, state.canvas_instance_batches[p_index].primitive_points, instance_count);
} else {
glDrawArrays(primitive[state.canvas_instance_batches[p_index].primitive_points], 0, state.canvas_instance_batches[p_index].primitive_points);
}
glBindBuffer(GL_UNIFORM_BUFFER, 0);
} break;
@ -1370,6 +1373,11 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) {
index_array_gl = mesh_storage->mesh_surface_get_index_buffer(surface, 0);
bool use_index_buffer = false;
glBindVertexArray(vertex_array_gl);
glBindBuffer(GL_ARRAY_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].buffer);
uint32_t range_start = state.canvas_instance_batches[p_index].start * sizeof(InstanceData);
_enable_attributes(range_start, false, instance_count);
if (index_array_gl != 0) {
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_array_gl);
use_index_buffer = true;
@ -1396,20 +1404,13 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) {
}
GLenum primitive_gl = prim[int(primitive)];
if (instance_count == 1) {
if (use_index_buffer) {
glDrawElements(primitive_gl, mesh_storage->mesh_surface_get_vertices_drawn_count(surface), mesh_storage->mesh_surface_get_index_type(surface), 0);
} else {
glDrawArrays(primitive_gl, 0, mesh_storage->mesh_surface_get_vertices_drawn_count(surface));
}
} else if (instance_count > 1) {
if (use_index_buffer) {
glDrawElementsInstanced(primitive_gl, mesh_storage->mesh_surface_get_vertices_drawn_count(surface), mesh_storage->mesh_surface_get_index_type(surface), 0, instance_count);
} else {
glDrawArraysInstanced(primitive_gl, 0, mesh_storage->mesh_surface_get_vertices_drawn_count(surface), instance_count);
}
}
if (use_index_buffer) {
glDrawElementsInstanced(primitive_gl, mesh_storage->mesh_surface_get_vertices_drawn_count(surface), mesh_storage->mesh_surface_get_index_type(surface), 0, instance_count);
} else {
glDrawArraysInstanced(primitive_gl, 0, mesh_storage->mesh_surface_get_vertices_drawn_count(surface), instance_count);
}
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
if (instance_count > 1) {
glDisableVertexAttribArray(5);
@ -1429,7 +1430,7 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) {
}
void RasterizerCanvasGLES3::_add_to_batch(uint32_t &r_index, bool &r_batch_broken) {
if (r_index >= data.max_instances_per_ubo - 1) {
if (r_index >= data.max_instances_per_buffer - 1) {
ERR_PRINT_ONCE("Trying to draw too many items. Please increase maximum number of items in the project settings 'rendering/gl_compatibility/item_buffer_size'");
return;
}
@ -1457,27 +1458,25 @@ void RasterizerCanvasGLES3::_new_batch(bool &r_batch_broken, uint32_t &r_index)
// Copy the properties of the current batch, we will manually update the things that changed.
Batch new_batch = state.canvas_instance_batches[state.current_batch_index];
new_batch.instance_count = 0;
new_batch.start = state.canvas_instance_batches[state.current_batch_index].start + state.canvas_instance_batches[state.current_batch_index].instance_count * sizeof(InstanceData);
new_batch.start = state.canvas_instance_batches[state.current_batch_index].start + state.canvas_instance_batches[state.current_batch_index].instance_count;
state.current_batch_index++;
state.canvas_instance_batches.push_back(new_batch);
_align_instance_data_buffer(r_index);
}
bool RasterizerCanvasGLES3::_bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant, uint64_t p_specialization) {
if (p_material_data) {
if (p_material_data->shader_data->version.is_valid() && p_material_data->shader_data->valid) {
// Bind uniform buffer and textures
p_material_data->bind_uniforms();
return GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(p_material_data->shader_data->version, p_variant, p_specialization);
} else {
return GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant, p_specialization);
}
} else {
return GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant, p_specialization);
void RasterizerCanvasGLES3::_enable_attributes(uint32_t p_start, bool p_primitive, uint32_t p_rate) {
uint32_t split = p_primitive ? 11 : 12;
for (uint32_t i = 6; i < split; i++) {
glEnableVertexAttribArray(i);
glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, sizeof(InstanceData), CAST_INT_TO_UCHAR_PTR(p_start + (i - 6) * 4 * sizeof(float)));
glVertexAttribDivisor(i, p_rate);
}
for (uint32_t i = split; i <= 13; i++) {
glEnableVertexAttribArray(i);
glVertexAttribIPointer(i, 4, GL_UNSIGNED_INT, sizeof(InstanceData), CAST_INT_TO_UCHAR_PTR(p_start + (i - 6) * 4 * sizeof(float)));
glVertexAttribDivisor(i, p_rate);
}
}
RID RasterizerCanvasGLES3::light_create() {
CanvasLight canvas_light;
return canvas_light_owner.make_rid(canvas_light);
@ -2416,8 +2415,8 @@ void RasterizerCanvasGLES3::_allocate_instance_data_buffer() {
GLuint new_buffers[3];
glGenBuffers(3, new_buffers);
// Batch UBO.
glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[0]);
glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, new_buffers[0]);
glBufferData(GL_ARRAY_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW);
// Light uniform buffer.
glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[1]);
glBufferData(GL_UNIFORM_BUFFER, sizeof(LightUniform) * data.max_lights_per_render, nullptr, GL_STREAM_DRAW);
@ -2427,36 +2426,16 @@ void RasterizerCanvasGLES3::_allocate_instance_data_buffer() {
state.current_buffer = (state.current_buffer + 1);
DataBuffer db;
db.ubo = new_buffers[0];
db.buffer = new_buffers[0];
db.light_ubo = new_buffers[1];
db.state_ubo = new_buffers[2];
db.last_frame_used = RSG::rasterizer->get_frame_number();
state.canvas_instance_data_buffers.insert(state.current_buffer, db);
state.current_buffer = state.current_buffer % state.canvas_instance_data_buffers.size();
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
}
// Batch start positions need to be aligned to the device's GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT.
// This needs to be called anytime a new batch is created.
void RasterizerCanvasGLES3::_align_instance_data_buffer(uint32_t &r_index) {
if (GLES3::Config::get_singleton()->uniform_buffer_offset_alignment > int(sizeof(InstanceData))) {
uint32_t offset = state.canvas_instance_batches[state.current_batch_index].start % GLES3::Config::get_singleton()->uniform_buffer_offset_alignment;
if (offset > 0) {
// uniform_buffer_offset_alignment can be 4, 16, 32, or 256. Our instance batches are 128 bytes.
// Accordingly, this branch is only triggered if we are 128 bytes off.
uint32_t offset_bytes = GLES3::Config::get_singleton()->uniform_buffer_offset_alignment - offset;
state.canvas_instance_batches[state.current_batch_index].start += offset_bytes;
// Offset the instance array so it stays in sync with batch start points.
// This creates gaps in the instance buffer with wasted space, but we can't help it.
r_index += offset_bytes / sizeof(InstanceData);
if (r_index > 0) {
// In this case we need to copy over the basic data.
state.instance_data_array[r_index] = state.instance_data_array[r_index - 1];
}
}
}
}
void RasterizerCanvasGLES3::set_time(double p_time) {
state.time = p_time;
}
@ -2601,12 +2580,12 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() {
data.max_instances_per_batch = 128;
} else {
data.max_lights_per_render = 256;
data.max_instances_per_batch = 512;
data.max_instances_per_batch = 2048;
}
// Reserve 3 Uniform Buffers for instance data Frame N, N+1 and N+2
data.max_instances_per_ubo = MAX(data.max_instances_per_batch, uint32_t(GLOBAL_GET("rendering/gl_compatibility/item_buffer_size")));
data.max_instance_buffer_size = data.max_instances_per_ubo * sizeof(InstanceData); // 16,384 instances * 128 bytes = 2,097,152 bytes = 2,048 kb
data.max_instances_per_buffer = MAX(data.max_instances_per_batch, uint32_t(GLOBAL_GET("rendering/gl_compatibility/item_buffer_size")));
data.max_instance_buffer_size = data.max_instances_per_buffer * sizeof(InstanceData); // 16,384 instances * 128 bytes = 2,097,152 bytes = 2,048 kb
state.canvas_instance_data_buffers.resize(3);
state.canvas_instance_batches.reserve(200);
@ -2614,8 +2593,8 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() {
GLuint new_buffers[3];
glGenBuffers(3, new_buffers);
// Batch UBO.
glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[0]);
glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, new_buffers[0]);
glBufferData(GL_ARRAY_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW);
// Light uniform buffer.
glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[1]);
glBufferData(GL_UNIFORM_BUFFER, sizeof(LightUniform) * data.max_lights_per_render, nullptr, GL_STREAM_DRAW);
@ -2623,41 +2602,28 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() {
glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[2]);
glBufferData(GL_UNIFORM_BUFFER, sizeof(StateBuffer), nullptr, GL_STREAM_DRAW);
DataBuffer db;
db.ubo = new_buffers[0];
db.buffer = new_buffers[0];
db.light_ubo = new_buffers[1];
db.state_ubo = new_buffers[2];
db.last_frame_used = 0;
db.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
state.canvas_instance_data_buffers[i] = db;
}
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
state.instance_data_array = memnew_arr(InstanceData, data.max_instances_per_ubo);
state.instance_data_array = memnew_arr(InstanceData, data.max_instances_per_buffer);
state.light_uniforms = memnew_arr(LightUniform, data.max_lights_per_render);
{
const uint32_t no_of_instances = data.max_instances_per_batch;
const uint32_t indices[6] = { 0, 2, 1, 3, 2, 0 };
glGenVertexArrays(1, &data.indexed_quad_array);
glBindVertexArray(data.indexed_quad_array);
glBindBuffer(GL_ARRAY_BUFFER, data.canvas_quad_vertices);
const uint32_t num_indices = 6;
const uint32_t quad_indices[num_indices] = { 0, 2, 1, 3, 2, 0 };
const uint32_t total_indices = no_of_instances * num_indices;
uint32_t *indices = new uint32_t[total_indices];
for (uint32_t i = 0; i < total_indices; i++) {
uint32_t quad = i / num_indices;
uint32_t quad_local = i % num_indices;
indices[i] = quad_indices[quad_local] + quad * num_indices;
}
glGenBuffers(1, &data.indexed_quad_buffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.indexed_quad_buffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(uint32_t) * total_indices, indices, GL_STATIC_DRAW);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(uint32_t) * 6, indices, GL_STATIC_DRAW);
glBindVertexArray(0);
delete[] indices;
}
String global_defines;

View File

@ -245,7 +245,7 @@ public:
uint32_t max_lights_per_render = 256;
uint32_t max_lights_per_item = 16;
uint32_t max_instances_per_batch = 512;
uint32_t max_instances_per_ubo = 16384;
uint32_t max_instances_per_buffer = 16384;
uint32_t max_instance_buffer_size = 16384 * 128;
} data;
@ -278,7 +278,7 @@ public:
// We track them and ensure that they don't get reused until at least 2 frames have passed
// to avoid the GPU stalling to wait for a resource to become available.
struct DataBuffer {
GLuint ubo = 0;
GLuint buffer = 0;
GLuint light_ubo = 0;
GLuint state_ubo = 0;
uint64_t last_frame_used = -3;
@ -359,6 +359,7 @@ public:
void _add_to_batch(uint32_t &r_index, bool &r_batch_broken);
void _allocate_instance_data_buffer();
void _align_instance_data_buffer(uint32_t &r_index);
void _enable_attributes(uint32_t p_start, bool p_primitive, uint32_t p_rate = 1);
void set_time(double p_time);

View File

@ -11,6 +11,7 @@ mode_instanced = #define USE_ATTRIBUTES \n#define USE_INSTANCING
DISABLE_LIGHTING = false
USE_RGBA_SHADOWS = false
SINGLE_INSTANCE = false
#[vertex]
@ -25,10 +26,75 @@ layout(location = 1) in highp vec4 instance_xform0;
layout(location = 2) in highp vec4 instance_xform1;
layout(location = 5) in highp uvec4 instance_color_custom_data; // Color packed into xy, custom_data packed into zw for compatibility with 3D
#endif // USE_INSTANCING
#endif // USE_ATTRIBUTES
#include "stdlib_inc.glsl"
layout(location = 6) in highp vec4 attrib_A;
layout(location = 7) in highp vec4 attrib_B;
layout(location = 8) in highp vec4 attrib_C;
layout(location = 9) in highp vec4 attrib_D;
layout(location = 10) in highp vec4 attrib_E;
#ifdef USE_PRIMITIVE
layout(location = 11) in highp uvec4 attrib_F;
#else
layout(location = 11) in highp vec4 attrib_F;
#endif
layout(location = 12) in highp uvec4 attrib_G;
layout(location = 13) in highp uvec4 attrib_H;
#define read_draw_data_world_x attrib_A.xy
#define read_draw_data_world_y attrib_A.zw
#define read_draw_data_world_ofs attrib_B.xy
#define read_draw_data_color_texture_pixel_size attrib_B.zw
#ifdef USE_PRIMITIVE
#define read_draw_data_point_a attrib_C.xy
#define read_draw_data_point_b attrib_C.zw
#define read_draw_data_point_c attrib_D.xy
#define read_draw_data_uv_a attrib_D.zw
#define read_draw_data_uv_b attrib_E.xy
#define read_draw_data_uv_c attrib_E.zw
#define read_draw_data_color_a_rg attrib_F.x
#define read_draw_data_color_a_ba attrib_F.y
#define read_draw_data_color_b_rg attrib_F.z
#define read_draw_data_color_b_ba attrib_F.w
#define read_draw_data_color_c_rg attrib_G.x
#define read_draw_data_color_c_ba attrib_G.y
#else
#define read_draw_data_modulation attrib_C
#define read_draw_data_ninepatch_margins attrib_D
#define read_draw_data_dst_rect attrib_E
#define read_draw_data_src_rect attrib_F
#endif
#define read_draw_data_flags attrib_G.z
#define read_draw_data_specular_shininess attrib_G.w
#define read_draw_data_lights attrib_H
// Varyings so the per-instance info can be used in the fragment shader
flat out vec4 varying_A;
flat out vec2 varying_B;
#ifndef USE_PRIMITIVE
flat out vec4 varying_C;
#ifndef USE_ATTRIBUTES
#ifdef USE_NINEPATCH
flat out vec2 varying_D;
#endif
flat out vec4 varying_E;
#endif
#endif
flat out uvec2 varying_F;
flat out uvec4 varying_G;
// This needs to be outside clang-format so the ubo comment is in the right place
#ifdef MATERIAL_UNIFORMS_USED
layout(std140) uniform MaterialUniforms{ //ubo:4
@ -39,12 +105,10 @@ layout(std140) uniform MaterialUniforms{ //ubo:4
#endif
/* clang-format on */
#include "canvas_uniforms_inc.glsl"
#include "stdlib_inc.glsl"
out vec2 uv_interp;
out vec4 color_interp;
out vec2 vertex_interp;
flat out int draw_data_instance;
#ifdef USE_NINEPATCH
@ -55,35 +119,46 @@ out vec2 pixel_size_interp;
#GLOBALS
void main() {
varying_A = vec4(read_draw_data_world_x, read_draw_data_world_y);
varying_B = read_draw_data_color_texture_pixel_size;
#ifndef USE_PRIMITIVE
varying_C = read_draw_data_ninepatch_margins;
#ifndef USE_ATTRIBUTES
#ifdef USE_NINEPATCH
varying_D = vec2(read_draw_data_dst_rect.z, read_draw_data_dst_rect.w);
#endif // USE_NINEPATCH
varying_E = read_draw_data_src_rect;
#endif // !USE_ATTRIBUTES
#endif // USE_PRIMITIVE
varying_F = uvec2(read_draw_data_flags, read_draw_data_specular_shininess);
varying_G = read_draw_data_lights;
vec4 instance_custom = vec4(0.0);
#ifdef USE_PRIMITIVE
draw_data_instance = gl_InstanceID;
vec2 vertex;
vec2 uv;
vec4 color;
if (gl_VertexID % 3 == 0) {
vertex = draw_data[draw_data_instance].point_a;
uv = draw_data[draw_data_instance].uv_a;
color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_a_rg), unpackHalf2x16(draw_data[draw_data_instance].color_a_ba));
vertex = read_draw_data_point_a;
uv = read_draw_data_uv_a;
color = vec4(unpackHalf2x16(read_draw_data_color_a_rg), unpackHalf2x16(read_draw_data_color_a_ba));
} else if (gl_VertexID % 3 == 1) {
vertex = draw_data[draw_data_instance].point_b;
uv = draw_data[draw_data_instance].uv_b;
color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_b_rg), unpackHalf2x16(draw_data[draw_data_instance].color_b_ba));
vertex = read_draw_data_point_b;
uv = read_draw_data_uv_b;
color = vec4(unpackHalf2x16(read_draw_data_color_b_rg), unpackHalf2x16(read_draw_data_color_b_ba));
} else {
vertex = draw_data[draw_data_instance].point_c;
uv = draw_data[draw_data_instance].uv_c;
color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_c_rg), unpackHalf2x16(draw_data[draw_data_instance].color_c_ba));
vertex = read_draw_data_point_c;
uv = read_draw_data_uv_c;
color = vec4(unpackHalf2x16(read_draw_data_color_c_rg), unpackHalf2x16(read_draw_data_color_c_ba));
}
#elif defined(USE_ATTRIBUTES)
draw_data_instance = gl_InstanceID;
#ifdef USE_INSTANCING
draw_data_instance = 0;
#endif
vec2 vertex = vertex_attrib;
vec4 color = color_attrib * draw_data[draw_data_instance].modulation;
vec4 color = color_attrib * read_draw_data_modulation;
vec2 uv = uv_attrib;
#ifdef USE_INSTANCING
@ -93,30 +168,29 @@ void main() {
#endif
#else
draw_data_instance = gl_VertexID / 6;
vec2 vertex_base_arr[6] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0), vec2(0.0, 0.0), vec2(1.0, 1.0));
vec2 vertex_base = vertex_base_arr[gl_VertexID % 6];
vec2 uv = draw_data[draw_data_instance].src_rect.xy + abs(draw_data[draw_data_instance].src_rect.zw) * ((draw_data[draw_data_instance].flags & FLAGS_TRANSPOSE_RECT) != uint(0) ? vertex_base.yx : vertex_base.xy);
vec4 color = draw_data[draw_data_instance].modulation;
vec2 vertex = draw_data[draw_data_instance].dst_rect.xy + abs(draw_data[draw_data_instance].dst_rect.zw) * mix(vertex_base, vec2(1.0, 1.0) - vertex_base, lessThan(draw_data[draw_data_instance].src_rect.zw, vec2(0.0, 0.0)));
vec2 uv = read_draw_data_src_rect.xy + abs(read_draw_data_src_rect.zw) * ((read_draw_data_flags & FLAGS_TRANSPOSE_RECT) != uint(0) ? vertex_base.yx : vertex_base.xy);
vec4 color = read_draw_data_modulation;
vec2 vertex = read_draw_data_dst_rect.xy + abs(read_draw_data_dst_rect.zw) * mix(vertex_base, vec2(1.0, 1.0) - vertex_base, lessThan(read_draw_data_src_rect.zw, vec2(0.0, 0.0)));
#endif
mat4 model_matrix = mat4(vec4(draw_data[draw_data_instance].world_x, 0.0, 0.0), vec4(draw_data[draw_data_instance].world_y, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(draw_data[draw_data_instance].world_ofs, 0.0, 1.0));
mat4 model_matrix = mat4(vec4(read_draw_data_world_x, 0.0, 0.0), vec4(read_draw_data_world_y, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(read_draw_data_world_ofs, 0.0, 1.0));
#ifdef USE_INSTANCING
model_matrix = model_matrix * transpose(mat4(instance_xform0, instance_xform1, vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)));
#endif // USE_INSTANCING
#if !defined(USE_ATTRIBUTES) && !defined(USE_PRIMITIVE)
if (bool(draw_data[draw_data_instance].flags & FLAGS_USING_PARTICLES)) {
if (bool(read_draw_data_flags & FLAGS_USING_PARTICLES)) {
//scale by texture size
vertex /= draw_data[draw_data_instance].color_texture_pixel_size;
vertex /= read_draw_data_color_texture_pixel_size;
}
#endif
vec2 color_texture_pixel_size = draw_data[draw_data_instance].color_texture_pixel_size.xy;
vec2 color_texture_pixel_size = read_draw_data_color_texture_pixel_size;
#ifdef USE_POINT_SIZE
float point_size = 1.0;
@ -126,7 +200,7 @@ void main() {
}
#ifdef USE_NINEPATCH
pixel_size_interp = abs(draw_data[draw_data_instance].dst_rect.zw) * vertex_base;
pixel_size_interp = abs(read_draw_data_dst_rect.zw) * vertex_base;
#endif
#if !defined(SKIP_TRANSFORM_USED)
@ -159,6 +233,46 @@ void main() {
#include "canvas_uniforms_inc.glsl"
#include "stdlib_inc.glsl"
in vec2 uv_interp;
in vec2 vertex_interp;
in vec4 color_interp;
#ifdef USE_NINEPATCH
in vec2 pixel_size_interp;
#endif
// Can all be flat as they are the same for the whole batched instance
flat in vec4 varying_A;
flat in vec2 varying_B;
#define read_draw_data_world_x varying_A.xy
#define read_draw_data_world_y varying_A.zw
#define read_draw_data_color_texture_pixel_size varying_B
#ifndef USE_PRIMITIVE
flat in vec4 varying_C;
#define read_draw_data_ninepatch_margins varying_C
#ifndef USE_ATTRIBUTES
#ifdef USE_NINEPATCH
flat in vec2 varying_D;
#define read_draw_data_dst_rect_z varying_D.x
#define read_draw_data_dst_rect_w varying_D.y
#endif
flat in vec4 varying_E;
#define read_draw_data_src_rect varying_E
#endif // USE_ATTRIBUTES
#endif // USE_PRIMITIVE
flat in uvec2 varying_F;
flat in uvec4 varying_G;
#define read_draw_data_flags varying_F.x
#define read_draw_data_specular_shininess varying_F.y
#define read_draw_data_lights varying_G
#ifndef DISABLE_LIGHTING
uniform sampler2D atlas_texture; //texunit:-2
uniform sampler2D shadow_atlas_texture; //texunit:-3
@ -170,17 +284,6 @@ uniform sampler2D specular_texture; //texunit:-7
uniform sampler2D color_texture; //texunit:0
in vec2 uv_interp;
in vec4 color_interp;
in vec2 vertex_interp;
flat in int draw_data_instance;
#ifdef USE_NINEPATCH
in vec2 pixel_size_interp;
#endif
layout(location = 0) out vec4 frag_color;
#ifdef MATERIAL_UNIFORMS_USED
@ -366,7 +469,7 @@ float map_ninepatch_axis(float pixel, float draw_size, float tex_pixel_size, flo
} else if (pixel >= draw_size - margin_end) {
return (tex_size - (draw_size - pixel)) * tex_pixel_size;
} else {
if (!bool(draw_data[draw_data_instance].flags & FLAGS_NINEPACH_DRAW_CENTER)) {
if (!bool(read_draw_data_flags & FLAGS_NINEPACH_DRAW_CENTER)) {
draw_center--;
}
@ -414,28 +517,26 @@ void main() {
int draw_center = 2;
uv = vec2(
map_ninepatch_axis(pixel_size_interp.x, abs(draw_data[draw_data_instance].dst_rect.z), draw_data[draw_data_instance].color_texture_pixel_size.x, draw_data[draw_data_instance].ninepatch_margins.x, draw_data[draw_data_instance].ninepatch_margins.z, int(draw_data[draw_data_instance].flags >> FLAGS_NINEPATCH_H_MODE_SHIFT) & 0x3, draw_center),
map_ninepatch_axis(pixel_size_interp.y, abs(draw_data[draw_data_instance].dst_rect.w), draw_data[draw_data_instance].color_texture_pixel_size.y, draw_data[draw_data_instance].ninepatch_margins.y, draw_data[draw_data_instance].ninepatch_margins.w, int(draw_data[draw_data_instance].flags >> FLAGS_NINEPATCH_V_MODE_SHIFT) & 0x3, draw_center));
map_ninepatch_axis(pixel_size_interp.x, abs(read_draw_data_dst_rect_z), read_draw_data_color_texture_pixel_size.x, read_draw_data_ninepatch_margins.x, read_draw_data_ninepatch_margins.z, int(read_draw_data_flags >> FLAGS_NINEPATCH_H_MODE_SHIFT) & 0x3, draw_center),
map_ninepatch_axis(pixel_size_interp.y, abs(read_draw_data_dst_rect_w), read_draw_data_color_texture_pixel_size.y, read_draw_data_ninepatch_margins.y, read_draw_data_ninepatch_margins.w, int(read_draw_data_flags >> FLAGS_NINEPATCH_V_MODE_SHIFT) & 0x3, draw_center));
if (draw_center == 0) {
color.a = 0.0;
}
uv = uv * draw_data[draw_data_instance].src_rect.zw + draw_data[draw_data_instance].src_rect.xy; //apply region if needed
uv = uv * read_draw_data_src_rect.zw + read_draw_data_src_rect.xy; //apply region if needed
#endif
if (bool(draw_data[draw_data_instance].flags & FLAGS_CLIP_RECT_UV)) {
uv = clamp(uv, draw_data[draw_data_instance].src_rect.xy, draw_data[draw_data_instance].src_rect.xy + abs(draw_data[draw_data_instance].src_rect.zw));
if (bool(read_draw_data_flags & FLAGS_CLIP_RECT_UV)) {
uv = clamp(uv, read_draw_data_src_rect.xy, read_draw_data_src_rect.xy + abs(read_draw_data_src_rect.zw));
}
#endif
#ifndef USE_PRIMITIVE
if (bool(draw_data[draw_data_instance].flags & FLAGS_USE_MSDF)) {
float px_range = draw_data[draw_data_instance].ninepatch_margins.x;
float outline_thickness = draw_data[draw_data_instance].ninepatch_margins.y;
//float reserved1 = draw_data[draw_data_instance].ninepatch_margins.z;
//float reserved2 = draw_data[draw_data_instance].ninepatch_margins.w;
if (bool(read_draw_data_flags & FLAGS_USE_MSDF)) {
float px_range = read_draw_data_ninepatch_margins.x;
float outline_thickness = read_draw_data_ninepatch_margins.y;
vec4 msdf_sample = texture(color_texture, uv);
vec2 msdf_size = vec2(textureSize(color_texture, 0));
@ -451,7 +552,7 @@ void main() {
float a = clamp(d * px_size + 0.5, 0.0, 1.0);
color.a = a * color.a;
}
} else if (bool(draw_data[draw_data_instance].flags & FLAGS_USE_LCD)) {
} else if (bool(read_draw_data_flags & FLAGS_USE_LCD)) {
vec4 lcd_sample = texture(color_texture, uv);
if (lcd_sample.a == 1.0) {
color.rgb = lcd_sample.rgb * color.a;
@ -465,7 +566,7 @@ void main() {
color *= texture(color_texture, uv);
}
uint light_count = (draw_data[draw_data_instance].flags >> uint(FLAGS_LIGHT_COUNT_SHIFT)) & uint(0xF); //max 16 lights
uint light_count = (read_draw_data_flags >> uint(FLAGS_LIGHT_COUNT_SHIFT)) & uint(0xF); //max 16 lights
bool using_light = light_count > 0u || directional_light_count > 0u;
vec3 normal;
@ -476,7 +577,7 @@ void main() {
bool normal_used = false;
#endif
if (normal_used || (using_light && bool(draw_data[draw_data_instance].flags & FLAGS_DEFAULT_NORMAL_MAP_USED))) {
if (normal_used || (using_light && bool(read_draw_data_flags & FLAGS_DEFAULT_NORMAL_MAP_USED))) {
normal.xy = texture(normal_texture, uv).xy * vec2(2.0, -2.0) - vec2(1.0, -1.0);
normal.z = sqrt(1.0 - dot(normal.xy, normal.xy));
normal_used = true;
@ -493,9 +594,9 @@ void main() {
bool specular_shininess_used = false;
#endif
if (specular_shininess_used || (using_light && normal_used && bool(draw_data[draw_data_instance].flags & FLAGS_DEFAULT_SPECULAR_MAP_USED))) {
if (specular_shininess_used || (using_light && normal_used && bool(read_draw_data_flags & FLAGS_DEFAULT_SPECULAR_MAP_USED))) {
specular_shininess = texture(specular_texture, uv);
specular_shininess *= godot_unpackUnorm4x8(draw_data[draw_data_instance].specular_shininess);
specular_shininess *= godot_unpackUnorm4x8(read_draw_data_specular_shininess);
specular_shininess_used = true;
} else {
specular_shininess = vec4(1.0);
@ -507,7 +608,7 @@ void main() {
vec2 screen_uv = vec2(0.0);
#endif
vec2 color_texture_pixel_size = draw_data[draw_data_instance].color_texture_pixel_size.xy;
vec2 color_texture_pixel_size = read_draw_data_color_texture_pixel_size.xy;
vec3 light_vertex = vec3(vertex, 0.0);
vec2 shadow_vertex = vertex;
@ -529,7 +630,7 @@ void main() {
if (normal_used) {
//convert by item transform
normal.xy = mat2(normalize(draw_data[draw_data_instance].world_x), normalize(draw_data[draw_data_instance].world_y)) * normal.xy;
normal.xy = mat2(normalize(read_draw_data_world_x), normalize(read_draw_data_world_y)) * normal.xy;
//convert by canvas transform
normal = normalize((canvas_normal_transform * vec4(normal, 0.0)).xyz);
}
@ -591,15 +692,15 @@ void main() {
uint light_base;
if (i < 8u) {
if (i < 4u) {
light_base = draw_data[draw_data_instance].lights[0];
light_base = read_draw_data_lights[0];
} else {
light_base = draw_data[draw_data_instance].lights[1];
light_base = read_draw_data_lights[1];
}
} else {
if (i < 12u) {
light_base = draw_data[draw_data_instance].lights[2];
light_base = read_draw_data_lights[2];
} else {
light_base = draw_data[draw_data_instance].lights[3];
light_base = read_draw_data_lights[3];
}
}
light_base >>= (i & 3u) * 8u;

View File

@ -27,38 +27,6 @@
#define FLAGS_USE_MSDF uint(1 << 28)
#define FLAGS_USE_LCD uint(1 << 29)
// must be always 128 bytes long
struct DrawData {
vec2 world_x;
vec2 world_y;
vec2 world_ofs;
vec2 color_texture_pixel_size;
#ifdef USE_PRIMITIVE
vec2 point_a;
vec2 point_b;
vec2 point_c;
vec2 uv_a;
vec2 uv_b;
vec2 uv_c;
uint color_a_rg;
uint color_a_ba;
uint color_b_rg;
uint color_b_ba;
uint color_c_rg;
uint color_c_ba;
#else
vec4 modulation;
vec4 ninepatch_margins;
vec4 dst_rect; //for built-in rect and UV
vec4 src_rect;
uint pad;
uint pad2;
#endif
uint flags;
uint specular_shininess;
uvec4 lights;
};
layout(std140) uniform GlobalShaderUniformData { //ubo:1
vec4 global_shader_uniforms[MAX_GLOBAL_SHADER_UNIFORMS];
};
@ -116,7 +84,3 @@ layout(std140) uniform LightData { //ubo:2
Light light_array[MAX_LIGHTS];
};
#endif // DISABLE_LIGHTING
layout(std140) uniform DrawDataInstances { //ubo:3
DrawData draw_data[MAX_DRAW_DATA_INSTANCES];
};

View File

@ -90,8 +90,6 @@ Config::Config() {
glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &max_uniform_buffer_size);
glGetIntegerv(GL_MAX_VIEWPORT_DIMS, max_viewport_size);
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_offset_alignment);
support_anisotropic_filter = extensions.has("GL_EXT_texture_filter_anisotropic");
if (support_anisotropic_filter) {
glGetFloatv(_GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &anisotropic_level);

View File

@ -67,8 +67,6 @@ public:
int max_renderable_lights = 0;
int max_lights_per_object = 0;
int uniform_buffer_offset_alignment = 0;
// TODO implement wireframe in OpenGL
// bool generate_wireframes;