From 65a83785fd0eded2a8b4811f27b9c965f8af5294 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20J=2E=20Est=C3=A9banez?= Date: Sat, 2 Jul 2022 13:19:12 +0200 Subject: [PATCH] Avoid GL undefined behavior in ubershaders --- drivers/gles3/rasterizer_scene_gles3.cpp | 2 +- drivers/gles3/rasterizer_storage_gles3.cpp | 11 ++++++++- drivers/gles3/rasterizer_storage_gles3.h | 1 + drivers/gles3/shader_gles3.cpp | 28 ++++++++++++++++++++-- drivers/gles3/shader_gles3.h | 7 ++++-- gles_builders.py | 16 +++++++++++++ 6 files changed, 59 insertions(+), 6 deletions(-) diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp index 391bbf0d8e9..87a629a0144 100644 --- a/drivers/gles3/rasterizer_scene_gles3.cpp +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -5297,7 +5297,7 @@ void RasterizerSceneGLES3::initialize() { glFrontFace(GL_CW); if (storage->config.async_compilation_enabled) { - state.scene_shader.init_async_compilation(); + state.scene_shader.init_async_compilation(storage->resources.depth_tex); } } diff --git a/drivers/gles3/rasterizer_storage_gles3.cpp b/drivers/gles3/rasterizer_storage_gles3.cpp index b7b5ae03ed9..ca05e0f83b6 100644 --- a/drivers/gles3/rasterizer_storage_gles3.cpp +++ b/drivers/gles3/rasterizer_storage_gles3.cpp @@ -8241,6 +8241,14 @@ void RasterizerStorageGLES3::initialize() { glGenerateMipmap(GL_TEXTURE_2D); glBindTexture(GL_TEXTURE_2D, 0); + glGenTextures(1, &resources.depth_tex); + unsigned char depthtexdata[8 * 8 * 2] = {}; + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, resources.depth_tex); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, 8, 8, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, depthtexdata); + glBindTexture(GL_TEXTURE_2D, 0); + glGenTextures(1, &resources.white_tex_3d); glActiveTexture(GL_TEXTURE0); @@ -8331,7 +8339,7 @@ void RasterizerStorageGLES3::initialize() { shaders.cubemap_filter.set_conditional(CubemapFilterShaderGLES3::LOW_QUALITY, !ggx_hq); shaders.particles.init(); if (config.async_compilation_enabled) { - shaders.particles.init_async_compilation(); + shaders.particles.init_async_compilation(resources.depth_tex); } #ifdef GLES_OVER_GL @@ -8389,6 +8397,7 @@ void RasterizerStorageGLES3::finalize() { glDeleteTextures(1, &resources.white_tex); glDeleteTextures(1, &resources.black_tex); glDeleteTextures(1, &resources.normal_tex); + glDeleteTextures(1, &resources.depth_tex); } void RasterizerStorageGLES3::update_dirty_resources() { diff --git a/drivers/gles3/rasterizer_storage_gles3.h b/drivers/gles3/rasterizer_storage_gles3.h index a1c5e4774ae..a40285b5586 100644 --- a/drivers/gles3/rasterizer_storage_gles3.h +++ b/drivers/gles3/rasterizer_storage_gles3.h @@ -142,6 +142,7 @@ public: GLuint black_tex; GLuint normal_tex; GLuint aniso_tex; + GLuint depth_tex; GLuint white_tex_3d; GLuint white_tex_array; diff --git a/drivers/gles3/shader_gles3.cpp b/drivers/gles3/shader_gles3.cpp index 0bf4585d8c2..388c3762ffc 100644 --- a/drivers/gles3/shader_gles3.cpp +++ b/drivers/gles3/shader_gles3.cpp @@ -195,6 +195,27 @@ bool ShaderGLES3::_bind_ubershader() { CRASH_COND(new_conditional_version.version >= 0x80000000); #endif glUniform1i(conditionals_uniform, new_conditional_version.version); + + // This is done to avoid running into the GL UB message id 131222. Long explanation: + // If an ubershader has shadow samplers, they are generally not used if the current material has shadowing disabled, + // but that also implies the rasterizer won't do any preparation to the relevant shadow samplers (which won't really exist, + // so that's the correct way in a conditioned shader). + // However, in the case of the ubershader those shadow samplers are unconditionally declared, although potentially unused and + // thus "uninitialized". Sampling in that situation (compare disabled, no depth texture bound) is undefined behavior for GL. + // And that's a problem for us because, even if dynamic branching will serve to avoid using the unprepared sampler when shadowing + // is not enabled, the GPU may still run the other branch. And it's not just that the results from the sampling are undefined + // (that wouldn't be a problem and we could just ignore the warning); the problem is that sampling in that state is fully UB. + for (int i = 0; i < shadow_texunit_count; i++) { + int unit = shadow_texunits[i]; + if (unit >= 0) { + glActiveTexture(GL_TEXTURE0 + unit); + } else { + glActiveTexture(GL_TEXTURE0 + max_image_units + unit); + } + glBindTexture(GL_TEXTURE_2D, depth_tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); + } + return bound; } @@ -1107,7 +1128,7 @@ GLint ShaderGLES3::get_uniform_location(const String &p_name) const { return glGetUniformLocation(version->ids.main, p_name.ascii().get_data()); } -void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start) { +void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const int *p_shadow_texunits, int p_shadow_texunit_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start) { ERR_FAIL_COND(version); conditional_version.key = 0; new_conditional_version.key = 0; @@ -1119,6 +1140,8 @@ void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_co fragment_code = p_fragment_code; texunit_pairs = p_texunit_pairs; texunit_pair_count = p_texunit_pair_count; + shadow_texunits = p_shadow_texunits; + shadow_texunit_count = p_shadow_texunit_count; vertex_code_start = p_vertex_code_start; fragment_code_start = p_fragment_code_start; attribute_pairs = p_attribute_pairs; @@ -1209,7 +1232,8 @@ void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_co glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &max_image_units); } -void ShaderGLES3::init_async_compilation() { +void ShaderGLES3::init_async_compilation(GLuint p_depth_tex) { + depth_tex = p_depth_tex; if (is_async_compilation_supported() && get_ubershader_flags_uniform() != -1) { // Warm up the ubershader for the case of no custom code new_conditional_version.code_version = 0; diff --git a/drivers/gles3/shader_gles3.h b/drivers/gles3/shader_gles3.h index 490a95c9621..1823ce06dde 100644 --- a/drivers/gles3/shader_gles3.h +++ b/drivers/gles3/shader_gles3.h @@ -96,6 +96,7 @@ private: //@TODO Optimize to a fixed set of shader pools and use a LRU int uniform_count; int texunit_pair_count; + int shadow_texunit_count; int conditional_count; int ubo_count; int feedback_count; @@ -245,6 +246,7 @@ private: const char **uniform_names; const AttributePair *attribute_pairs; const TexUnitPair *texunit_pairs; + const int *shadow_texunits; const UBOPair *ubo_pairs; const Feedback *feedbacks; const char *vertex_code; @@ -278,6 +280,7 @@ private: static ShaderGLES3 *active; int max_image_units; + GLuint depth_tex = 0; _FORCE_INLINE_ void _set_uniform_variant(GLint p_uniform, const Variant &p_value) { if (p_uniform < 0) { @@ -375,7 +378,7 @@ protected: _FORCE_INLINE_ int _get_uniform(int p_which) const; _FORCE_INLINE_ void _set_conditional(int p_which, bool p_value); - void setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start); + void setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const int *p_shadow_texunits, int p_shadow_texunit_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start); ShaderGLES3(); @@ -404,7 +407,7 @@ public: _FORCE_INLINE_ bool is_version_valid() const { return version && version->compile_status == Version::COMPILE_STATUS_OK; } virtual void init() = 0; - void init_async_compilation(); + void init_async_compilation(GLuint p_depth_tex); bool is_async_compilation_supported(); void finish(); diff --git a/gles_builders.py b/gles_builders.py index ef0cb811f57..8afc9a49929 100644 --- a/gles_builders.py +++ b/gles_builders.py @@ -19,6 +19,7 @@ class LegacyGLHeaderStruct: self.enums = {} self.texunits = [] self.texunit_names = [] + self.shadow_texunits = [] self.ubos = [] self.ubo_names = [] @@ -106,6 +107,8 @@ def include_file_in_legacygl_header(filename, header_data, depth): if not x in header_data.texunit_names: header_data.texunits += [(x, texunit)] + if line.find("sampler2DShadow") != -1: + header_data.shadow_texunits += [texunit] header_data.texunit_names += [x] elif line.find("uniform") != -1 and line.lower().find("ubo:") != -1: @@ -483,6 +486,15 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs, gles2 else: fd.write("\t\tstatic TexUnitPair *_texunit_pairs=NULL;\n") + if not gles2: + if header_data.shadow_texunits: + fd.write("\t\tstatic int _shadow_texunits[]={") + for x in header_data.shadow_texunits: + fd.write(str(x) + ',') + fd.write("};\n\n") + else: + fd.write("\t\tstatic int *_shadow_texunits=NULL;\n") + if not gles2 and header_data.ubos: fd.write("\t\tstatic UBOPair _ubo_pairs[]={\n") for x in header_data.ubos: @@ -537,6 +549,8 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs, gles2 + str(len(header_data.attributes)) + ", _texunit_pairs," + str(len(header_data.texunits)) + + ", _shadow_texunits," + + str(len(header_data.shadow_texunits)) + ",_ubo_pairs," + str(len(header_data.ubos)) + ",_feedbacks," @@ -566,6 +580,8 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs, gles2 + str(len(header_data.uniforms)) + ",_texunit_pairs," + str(len(header_data.texunits)) + + ",_shadow_texunits," + + str(len(header_data.shadow_texunits)) + ",_enums," + str(len(header_data.enums)) + ",_enum_values,"