Merge pull request #92797 from stuartcarnie/sgc/canvas_batching
Some checks are pending
🔗 GHA / 📊 Static checks (push) Waiting to run
🔗 GHA / 🤖 Android (push) Blocked by required conditions
🔗 GHA / 🍏 iOS (push) Blocked by required conditions
🔗 GHA / 🐧 Linux (push) Blocked by required conditions
🔗 GHA / 🍎 macOS (push) Blocked by required conditions
🔗 GHA / 🏁 Windows (push) Blocked by required conditions
🔗 GHA / 🌐 Web (push) Blocked by required conditions
🔗 GHA / 🪲 Godot CPP (push) Blocked by required conditions

Add batching to `RendererCanvasRenderRD`
This commit is contained in:
Rémi Verschelde 2024-09-11 21:56:36 +02:00 committed by GitHub
commit 2c136e6170
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 1301 additions and 919 deletions

View File

@ -2341,6 +2341,9 @@
[b]Note:[/b] This property is only read when the project starts. To change the physics FPS at runtime, set [member Engine.physics_ticks_per_second] instead.
[b]Note:[/b] Only [member physics/common/max_physics_steps_per_frame] physics ticks may be simulated per rendered frame at most. If more physics ticks have to be simulated per rendered frame to keep up with rendering, the project will appear to slow down (even if [code]delta[/code] is used consistently in physics calculations). Therefore, it is recommended to also increase [member physics/common/max_physics_steps_per_frame] if increasing [member physics/common/physics_ticks_per_second] significantly above its default value.
</member>
<member name="rendering/2d/batching/item_buffer_size" type="int" setter="" getter="" default="16384">
Maximum number of canvas item commands that can be batched into a single draw call.
</member>
<member name="rendering/2d/sdf/oversize" type="int" setter="" getter="" default="1">
Controls how much of the original viewport size should be covered by the 2D signed distance field. This SDF can be sampled in [CanvasItem] shaders and is used for [GPUParticles2D] collision. Higher values allow portions of occluders located outside the viewport to still be taken into account in the generated signed distance field, at the cost of performance. If you notice particles falling through [LightOccluder2D]s as the occluders leave the viewport, increase this setting.
The percentage specified is added on each axis and on both sides. For example, with the default setting of 120%, the signed distance field will cover 20% of the viewport's size outside the viewport on each side (top, right, bottom, left).

File diff suppressed because it is too large Load Diff

View File

@ -46,6 +46,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
MATERIAL_UNIFORM_SET = 1,
TRANSFORMS_UNIFORM_SET = 2,
CANVAS_TEXTURE_UNIFORM_SET = 3,
INSTANCE_DATA_UNIFORM_SET = 4,
};
const int SAMPLERS_BINDING_FIRST_INDEX = 10;
@ -335,48 +336,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
//state that does not vary across rendering all items
struct State {
//state buffer
struct Buffer {
float canvas_transform[16];
float screen_transform[16];
float canvas_normal_transform[16];
float canvas_modulate[4];
float screen_pixel_size[2];
float time;
uint32_t use_pixel_snap;
float sdf_to_tex[4];
float sdf_to_screen[2];
float screen_to_sdf[2];
uint32_t directional_light_count;
float tex_to_sdf;
uint32_t pad1;
uint32_t pad2;
};
LightUniform *light_uniforms = nullptr;
RID lights_uniform_buffer;
RID canvas_state_buffer;
RID shadow_sampler;
RID shadow_texture;
RID shadow_depth_texture;
RID shadow_fb;
int shadow_texture_size = 2048;
RID default_transforms_uniform_set;
uint32_t max_lights_per_render;
uint32_t max_lights_per_item;
double time;
} state;
struct PushConstant {
struct InstanceData {
float world[6];
uint32_t flags;
uint32_t specular_shininess;
@ -403,6 +363,173 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
uint32_t lights[4];
};
struct PushConstant {
uint32_t base_instance_index;
uint32_t pad1;
uint32_t pad2;
uint32_t pad3;
};
// TextureState is used to determine when a new batch is required due to a change of texture state.
struct TextureState {
static const uint32_t FILTER_SHIFT = 0;
static const uint32_t FILTER_BITS = 3;
static const uint32_t FILTER_MASK = (1 << FILTER_BITS) - 1;
static const uint32_t REPEAT_SHIFT = FILTER_BITS;
static const uint32_t REPEAT_BITS = 2;
static const uint32_t REPEAT_MASK = (1 << REPEAT_BITS) - 1;
static const uint32_t TEXTURE_IS_DATA_SHIFT = REPEAT_SHIFT + REPEAT_BITS;
static const uint32_t TEXTURE_IS_DATA_BITS = 1;
static const uint32_t TEXTURE_IS_DATA_MASK = (1 << TEXTURE_IS_DATA_BITS) - 1;
static const uint32_t LINEAR_COLORS_SHIFT = TEXTURE_IS_DATA_SHIFT + TEXTURE_IS_DATA_BITS;
static const uint32_t LINEAR_COLORS_BITS = 1;
static const uint32_t LINEAR_COLORS_MASK = (1 << LINEAR_COLORS_BITS) - 1;
RID texture;
uint32_t other = 0;
TextureState() {}
TextureState(RID p_texture, RS::CanvasItemTextureFilter p_base_filter, RS::CanvasItemTextureRepeat p_base_repeat, bool p_texture_is_data, bool p_use_linear_colors) {
texture = p_texture;
other = (((uint32_t)p_base_filter & FILTER_MASK) << FILTER_SHIFT) |
(((uint32_t)p_base_repeat & REPEAT_MASK) << REPEAT_SHIFT) |
(((uint32_t)p_texture_is_data & TEXTURE_IS_DATA_MASK) << TEXTURE_IS_DATA_SHIFT) |
(((uint32_t)p_use_linear_colors & LINEAR_COLORS_MASK) << LINEAR_COLORS_SHIFT);
}
_FORCE_INLINE_ RS::CanvasItemTextureFilter texture_filter() const {
return (RS::CanvasItemTextureFilter)((other >> FILTER_SHIFT) & FILTER_MASK);
}
_FORCE_INLINE_ RS::CanvasItemTextureRepeat texture_repeat() const {
return (RS::CanvasItemTextureRepeat)((other >> REPEAT_SHIFT) & REPEAT_MASK);
}
_FORCE_INLINE_ bool linear_colors() const {
return (other >> LINEAR_COLORS_SHIFT) & LINEAR_COLORS_MASK;
}
_FORCE_INLINE_ bool texture_is_data() const {
return (other >> TEXTURE_IS_DATA_SHIFT) & TEXTURE_IS_DATA_MASK;
}
bool operator==(const TextureState &p_val) const {
return (texture == p_val.texture) && (other == p_val.other);
}
bool operator!=(const TextureState &p_val) const {
return (texture != p_val.texture) || (other != p_val.other);
}
};
struct Batch {
// Position in the UBO measured in bytes
uint32_t start = 0;
uint32_t instance_count = 0;
uint32_t instance_buffer_index = 0;
TextureState tex_state;
RID tex_uniform_set;
// The following tex_ prefixed fields are used to cache the texture data for the current batch.
// These values are applied to new InstanceData for the batch
// The cached specular shininess derived from the current texture.
uint32_t tex_specular_shininess = 0;
// The cached texture flags, such as FLAGS_DEFAULT_SPECULAR_MAP_USED and FLAGS_DEFAULT_NORMAL_MAP_USED
uint32_t tex_flags = 0;
// The cached texture pixel size.
Vector2 tex_texpixel_size;
Color modulate = Color(1.0, 1.0, 1.0, 1.0);
Item *clip = nullptr;
RID material;
CanvasMaterialData *material_data = nullptr;
PipelineLightMode light_mode = PipelineLightMode::PIPELINE_LIGHT_MODE_DISABLED;
PipelineVariant pipeline_variant = PipelineVariant::PIPELINE_VARIANT_QUAD;
const Item::Command *command = nullptr;
Item::Command::Type command_type = Item::Command::TYPE_ANIMATION_SLICE; // Can default to any type that doesn't form a batch.
// batch-specific data
union {
// TYPE_PRIMITIVE
uint32_t primitive_points = 0;
// TYPE_PARTICLES
uint32_t mesh_instance_count;
};
bool has_blend = false;
void set_tex_state(TextureState &p_tex_state) {
tex_state = p_tex_state;
tex_uniform_set = RID();
tex_texpixel_size = Size2();
tex_specular_shininess = 0;
tex_flags = 0;
}
};
struct DataBuffer {
LocalVector<RID> instance_buffers;
};
struct State {
//state buffer
struct Buffer {
float canvas_transform[16];
float screen_transform[16];
float canvas_normal_transform[16];
float canvas_modulate[4];
float screen_pixel_size[2];
float time;
uint32_t use_pixel_snap;
float sdf_to_tex[4];
float sdf_to_screen[2];
float screen_to_sdf[2];
uint32_t directional_light_count;
float tex_to_sdf;
uint32_t pad1;
uint32_t pad2;
};
LocalVector<DataBuffer> canvas_instance_data_buffers;
LocalVector<Batch> canvas_instance_batches;
uint32_t current_data_buffer_index = 0;
uint32_t current_instance_buffer_index = 0;
uint32_t current_batch_index = 0;
uint32_t last_instance_index = 0;
InstanceData *instance_data_array = nullptr;
uint32_t max_instances_per_buffer = 16384;
uint32_t max_instance_buffer_size = 16384 * sizeof(InstanceData);
RID current_tex_uniform_set;
LightUniform *light_uniforms = nullptr;
RID lights_uniform_buffer;
RID canvas_state_buffer;
RID shadow_sampler;
RID shadow_texture;
RID shadow_depth_texture;
RID shadow_fb;
int shadow_texture_size = 2048;
RID default_transforms_uniform_set;
uint32_t max_lights_per_render;
uint32_t max_lights_per_item;
double time;
} state;
Item *items[MAX_RENDER_ITEMS];
bool using_directional_lights = false;
@ -422,9 +549,23 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
Color debug_redraw_color;
double debug_redraw_time = 1.0;
inline void _bind_canvas_texture(RD::DrawListID p_draw_list, RID p_texture, RS::CanvasItemTextureFilter p_base_filter, RS::CanvasItemTextureRepeat p_base_repeat, RID &r_last_texture, PushConstant &push_constant, Size2 &r_texpixel_size, bool p_texture_is_data = false); //recursive, so regular inline used instead.
void _render_item(RenderingDevice::DrawListID p_draw_list, RID p_render_target, const Item *p_item, RenderingDevice::FramebufferFormatID p_framebuffer_format, const Transform2D &p_canvas_transform_inverse, Item *&current_clip, Light *p_lights, PipelineVariants *p_pipeline_variants, bool &r_sdf_used, const Point2 &p_repeat_offset, RenderingMethod::RenderInfo *r_render_info = nullptr);
void _render_items(RID p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool &r_sdf_used, bool p_to_backbuffer = false, RenderingMethod::RenderInfo *r_render_info = nullptr);
// A structure to store cached render target information
struct RenderTarget {
// Current render target for the canvas.
RID render_target;
// The base flags for each InstanceData, derived from the render target.
// Either FLAGS_CONVERT_ATTRIBUTES_TO_LINEAR or 0
uint32_t base_flags = 0;
};
void _render_batch_items(RenderTarget p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool &r_sdf_used, bool p_to_backbuffer = false, RenderingMethod::RenderInfo *r_render_info = nullptr);
void _record_item_commands(const Item *p_item, RenderTarget p_render_target, const Transform2D &p_base_transform, Item *&r_current_clip, Light *p_lights, uint32_t &r_index, bool &r_batch_broken, bool &r_sdf_used);
void _render_batch(RD::DrawListID p_draw_list, PipelineVariants *p_pipeline_variants, RenderingDevice::FramebufferFormatID p_framebuffer_format, Light *p_lights, Batch const *p_batch, RenderingMethod::RenderInfo *r_render_info = nullptr);
void _prepare_batch_texture(Batch *p_current_batch, RID p_texture) const;
void _bind_canvas_texture(RD::DrawListID p_draw_list, RID p_uniform_set);
[[nodiscard]] Batch *_new_batch(bool &r_batch_broken);
void _add_to_batch(uint32_t &r_index, bool &r_batch_broken, Batch *&r_current_batch);
void _allocate_instance_buffer();
_FORCE_INLINE_ void _update_transform_2d_to_mat2x4(const Transform2D &p_transform, float *p_mat2x4);
_FORCE_INLINE_ void _update_transform_2d_to_mat2x3(const Transform2D &p_transform, float *p_mat2x3);

View File

@ -24,6 +24,12 @@ layout(location = 11) in vec4 weight_attrib;
#include "canvas_uniforms_inc.glsl"
#ifndef USE_ATTRIBUTES
layout(location = 4) out flat uint instance_index_interp;
#endif // USE_ATTRIBUTES
layout(location = 0) out vec2 uv_interp;
layout(location = 1) out vec4 color_interp;
layout(location = 2) out vec2 vertex_interp;
@ -59,6 +65,14 @@ void main() {
vec4 custom1 = vec4(0.0);
#endif
#ifdef USE_ATTRIBUTES
uint instance_index = params.base_instance_index;
#else
uint instance_index = gl_InstanceIndex + params.base_instance_index;
instance_index_interp = instance_index;
#endif // USE_ATTRIBUTES
const InstanceData draw_data = instances.data[instance_index];
#ifdef USE_PRIMITIVE
//weird bug,
@ -117,13 +131,10 @@ void main() {
mat4 model_matrix = mat4(vec4(draw_data.world_x, 0.0, 0.0), vec4(draw_data.world_y, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(draw_data.world_ofs, 0.0, 1.0));
#define FLAGS_INSTANCING_MASK 0x7F
#define FLAGS_INSTANCING_HAS_COLORS (1 << 7)
#define FLAGS_INSTANCING_HAS_CUSTOM_DATA (1 << 8)
#ifdef USE_ATTRIBUTES
uint instancing = draw_data.flags & FLAGS_INSTANCING_MASK;
#ifdef USE_ATTRIBUTES
if (instancing > 1) {
// trails
@ -160,38 +171,27 @@ void main() {
vertex = new_vertex;
color *= pcolor;
} else
#endif // USE_ATTRIBUTES
{
if (instancing == 1) {
uint stride = 2;
{
if (bool(draw_data.flags & FLAGS_INSTANCING_HAS_COLORS)) {
stride += 1;
}
if (bool(draw_data.flags & FLAGS_INSTANCING_HAS_CUSTOM_DATA)) {
stride += 1;
}
}
} else if (instancing == 1) {
uint stride = 2 + bitfieldExtract(draw_data.flags, FLAGS_INSTANCING_HAS_COLORS_SHIFT, 1) + bitfieldExtract(draw_data.flags, FLAGS_INSTANCING_HAS_CUSTOM_DATA_SHIFT, 1);
uint offset = stride * gl_InstanceIndex;
uint offset = stride * gl_InstanceIndex;
mat4 matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
offset += 2;
mat4 matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
offset += 2;
if (bool(draw_data.flags & FLAGS_INSTANCING_HAS_COLORS)) {
color *= transforms.data[offset];
offset += 1;
}
if (bool(draw_data.flags & FLAGS_INSTANCING_HAS_CUSTOM_DATA)) {
instance_custom = transforms.data[offset];
}
matrix = transpose(matrix);
model_matrix = model_matrix * matrix;
if (bool(draw_data.flags & FLAGS_INSTANCING_HAS_COLORS)) {
color *= transforms.data[offset];
offset += 1;
}
if (bool(draw_data.flags & FLAGS_INSTANCING_HAS_CUSTOM_DATA)) {
instance_custom = transforms.data[offset];
}
matrix = transpose(matrix);
model_matrix = model_matrix * matrix;
}
#endif // USE_ATTRIBUTES
#ifdef USE_POINT_SIZE
float point_size = 1.0;
@ -241,6 +241,10 @@ void main() {
#include "canvas_uniforms_inc.glsl"
#ifndef USE_ATTRIBUTES
layout(location = 4) in flat uint instance_index;
#endif // USE_ATTRIBUTES
layout(location = 0) in vec2 uv_interp;
layout(location = 1) in vec4 color_interp;
layout(location = 2) in vec2 vertex_interp;
@ -320,6 +324,12 @@ vec4 light_compute(
#ifdef USE_NINEPATCH
float map_ninepatch_axis(float pixel, float draw_size, float tex_pixel_size, float margin_begin, float margin_end, int np_repeat, inout int draw_center) {
#ifdef USE_ATTRIBUTES
const InstanceData draw_data = instances.data[params.base_instance_index];
#else
const InstanceData draw_data = instances.data[instance_index];
#endif // USE_ATTRIBUTES
float tex_size = 1.0 / tex_pixel_size;
if (pixel < margin_begin) {
@ -327,9 +337,7 @@ float map_ninepatch_axis(float pixel, float draw_size, float tex_pixel_size, flo
} else if (pixel >= draw_size - margin_end) {
return (tex_size - (draw_size - pixel)) * tex_pixel_size;
} else {
if (!bool(draw_data.flags & FLAGS_NINEPACH_DRAW_CENTER)) {
draw_center--;
}
draw_center -= 1 - int(bitfieldExtract(draw_data.flags, FLAGS_NINEPACH_DRAW_CENTER_SHIFT, 1));
// np_repeat is passed as uniform using NinePatchRect::AxisStretchMode enum.
if (np_repeat == 0) { // Stretch.
@ -462,14 +470,20 @@ void main() {
vec2 uv = uv_interp;
vec2 vertex = vertex_interp;
#ifdef USE_ATTRIBUTES
const InstanceData draw_data = instances.data[params.base_instance_index];
#else
const InstanceData draw_data = instances.data[instance_index];
#endif // USE_ATTRIBUTES
#if !defined(USE_ATTRIBUTES) && !defined(USE_PRIMITIVE)
#ifdef USE_NINEPATCH
int draw_center = 2;
uv = vec2(
map_ninepatch_axis(pixel_size_interp.x, abs(draw_data.dst_rect.z), draw_data.color_texture_pixel_size.x, draw_data.ninepatch_margins.x, draw_data.ninepatch_margins.z, int(draw_data.flags >> FLAGS_NINEPATCH_H_MODE_SHIFT) & 0x3, draw_center),
map_ninepatch_axis(pixel_size_interp.y, abs(draw_data.dst_rect.w), draw_data.color_texture_pixel_size.y, draw_data.ninepatch_margins.y, draw_data.ninepatch_margins.w, int(draw_data.flags >> FLAGS_NINEPATCH_V_MODE_SHIFT) & 0x3, draw_center));
map_ninepatch_axis(pixel_size_interp.x, abs(draw_data.dst_rect.z), draw_data.color_texture_pixel_size.x, draw_data.ninepatch_margins.x, draw_data.ninepatch_margins.z, int(bitfieldExtract(draw_data.flags, FLAGS_NINEPATCH_H_MODE_SHIFT, 2)), draw_center),
map_ninepatch_axis(pixel_size_interp.y, abs(draw_data.dst_rect.w), draw_data.color_texture_pixel_size.y, draw_data.ninepatch_margins.y, draw_data.ninepatch_margins.w, int(bitfieldExtract(draw_data.flags, FLAGS_NINEPATCH_V_MODE_SHIFT, 2)), draw_center));
if (draw_center == 0) {
color.a = 0.0;
@ -519,8 +533,8 @@ void main() {
color *= texture(sampler2D(color_texture, texture_sampler), uv);
}
uint light_count = (draw_data.flags >> FLAGS_LIGHT_COUNT_SHIFT) & 0xF; //max 16 lights
bool using_light = light_count > 0 || canvas_data.directional_light_count > 0;
uint light_count = bitfieldExtract(draw_data.flags, FLAGS_LIGHT_COUNT_SHIFT, 4); //max 16 lights
bool using_light = (light_count + canvas_data.directional_light_count) > 0;
vec3 normal;
@ -652,9 +666,7 @@ void main() {
if (i >= light_count) {
break;
}
uint light_base = draw_data.lights[i >> 2];
light_base >>= (i & 3) * 8;
light_base &= 0xFF;
uint light_base = bitfieldExtract(draw_data.lights[i >> 2], (int(i) & 0x3) * 8, 8);
vec2 tex_uv = (vec4(vertex, 0.0, 1.0) * mat4(light_array.data[light_base].texture_matrix[0], light_array.data[light_base].texture_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations.
vec2 tex_uv_atlas = tex_uv * light_array.data[light_base].atlas_rect.zw + light_array.data[light_base].atlas_rect.xy;

View File

@ -7,13 +7,16 @@
//1 means enabled, 2+ means trails in use
#define FLAGS_INSTANCING_MASK 0x7F
#define FLAGS_INSTANCING_HAS_COLORS (1 << 7)
#define FLAGS_INSTANCING_HAS_CUSTOM_DATA (1 << 8)
#define FLAGS_INSTANCING_HAS_COLORS_SHIFT 7
#define FLAGS_INSTANCING_HAS_COLORS (1 << FLAGS_INSTANCING_HAS_COLORS_SHIFT)
#define FLAGS_INSTANCING_HAS_CUSTOM_DATA_SHIFT 8
#define FLAGS_INSTANCING_HAS_CUSTOM_DATA (1 << FLAGS_INSTANCING_HAS_CUSTOM_DATA_SHIFT)
#define FLAGS_CLIP_RECT_UV (1 << 9)
#define FLAGS_TRANSPOSE_RECT (1 << 10)
#define FLAGS_CONVERT_ATTRIBUTES_TO_LINEAR (1 << 11)
#define FLAGS_NINEPACH_DRAW_CENTER (1 << 12)
#define FLAGS_NINEPACH_DRAW_CENTER_SHIFT 12
#define FLAGS_NINEPACH_DRAW_CENTER (1 << FLAGS_NINEPACH_DRAW_CENTER_SHIFT)
#define FLAGS_NINEPATCH_H_MODE_SHIFT 16
#define FLAGS_NINEPATCH_V_MODE_SHIFT 18
@ -29,9 +32,7 @@
#define FLAGS_FLIP_H (1 << 30)
#define FLAGS_FLIP_V (1 << 31)
// Push Constant
layout(push_constant, std430) uniform DrawData {
struct InstanceData {
vec2 world_x;
vec2 world_y;
vec2 world_ofs;
@ -51,8 +52,20 @@ layout(push_constant, std430) uniform DrawData {
#endif
vec2 color_texture_pixel_size;
uint lights[4];
};
layout(set = 4, binding = 0, std430) restrict readonly buffer DrawData {
InstanceData data[];
}
draw_data;
instances;
layout(push_constant, std430) uniform Params {
uint base_instance_index; // base index to instance data
uint pad1;
uint pad2;
uint pad3;
}
params;
// In vulkan, sets should always be ordered using the following logic:
// Lower Sets: Sets that change format and layout less often

View File

@ -3548,6 +3548,7 @@ void RenderingServer::init() {
GLOBAL_DEF("rendering/lights_and_shadows/positional_shadow/soft_shadow_filter_quality.mobile", 0);
GLOBAL_DEF(PropertyInfo(Variant::INT, "rendering/2d/shadow_atlas/size", PROPERTY_HINT_RANGE, "128,16384"), 2048);
GLOBAL_DEF_RST(PropertyInfo(Variant::INT, "rendering/2d/batching/item_buffer_size", PROPERTY_HINT_RANGE, "128,1048576,1"), 16384);
// Number of commands that can be drawn per frame.
GLOBAL_DEF_RST(PropertyInfo(Variant::INT, "rendering/gl_compatibility/item_buffer_size", PROPERTY_HINT_RANGE, "128,1048576,1"), 16384);