diff --git a/drivers/metal/metal_objects.h b/drivers/metal/metal_objects.h index fd7d93bbbd5..5eda826d2aa 100644 --- a/drivers/metal/metal_objects.h +++ b/drivers/metal/metal_objects.h @@ -82,6 +82,9 @@ MTL_CLASS(Texture) } //namespace MTL +/// Metal buffer index for the view mask when rendering multi-view. +const uint32_t VIEW_MASK_BUFFER_INDEX = 24; + enum ShaderStageUsage : uint32_t { None = 0, Vertex = RDD::SHADER_STAGE_VERTEX_BIT, @@ -142,6 +145,12 @@ struct ClearAttKey { const static uint32_t STENCIL_INDEX = DEPTH_INDEX + 1; const static uint32_t ATTACHMENT_COUNT = STENCIL_INDEX + 1; + enum Flags : uint16_t { + CLEAR_FLAGS_NONE = 0, + CLEAR_FLAGS_LAYERED = 1 << 0, + }; + + Flags flags = CLEAR_FLAGS_NONE; uint16_t sample_count = 0; uint16_t pixel_formats[ATTACHMENT_COUNT] = { 0 }; @@ -150,19 +159,22 @@ struct ClearAttKey { _FORCE_INLINE_ void set_stencil_format(MTLPixelFormat p_fmt) { pixel_formats[STENCIL_INDEX] = p_fmt; } _FORCE_INLINE_ MTLPixelFormat depth_format() const { return (MTLPixelFormat)pixel_formats[DEPTH_INDEX]; } _FORCE_INLINE_ MTLPixelFormat stencil_format() const { return (MTLPixelFormat)pixel_formats[STENCIL_INDEX]; } + _FORCE_INLINE_ void enable_layered_rendering() { flags::set(flags, CLEAR_FLAGS_LAYERED); } _FORCE_INLINE_ bool is_enabled(uint32_t p_idx) const { return pixel_formats[p_idx] != 0; } _FORCE_INLINE_ bool is_depth_enabled() const { return pixel_formats[DEPTH_INDEX] != 0; } _FORCE_INLINE_ bool is_stencil_enabled() const { return pixel_formats[STENCIL_INDEX] != 0; } + _FORCE_INLINE_ bool is_layered_rendering_enabled() const { return flags::any(flags, CLEAR_FLAGS_LAYERED); } _FORCE_INLINE_ bool operator==(const ClearAttKey &p_rhs) const { return memcmp(this, &p_rhs, sizeof(ClearAttKey)) == 0; } uint32_t hash() const { - uint32_t h = hash_murmur3_one_32(sample_count); + uint32_t h = hash_murmur3_one_32(flags); + h = hash_murmur3_one_32(sample_count, h); h = hash_murmur3_buffer(pixel_formats, ATTACHMENT_COUNT * sizeof(pixel_formats[0]), h); - return h; + return hash_fmix32(h); } }; @@ -206,6 +218,97 @@ public: ~MDResourceCache() = default; }; +enum class MDAttachmentType : uint8_t { + None = 0, + Color = 1 << 0, + Depth = 1 << 1, + Stencil = 1 << 2, +}; + +_FORCE_INLINE_ MDAttachmentType &operator|=(MDAttachmentType &p_a, MDAttachmentType p_b) { + flags::set(p_a, p_b); + return p_a; +} + +_FORCE_INLINE_ bool operator&(MDAttachmentType p_a, MDAttachmentType p_b) { + return uint8_t(p_a) & uint8_t(p_b); +} + +struct MDSubpass { + uint32_t subpass_index = 0; + uint32_t view_count = 0; + LocalVector input_references; + LocalVector color_references; + RDD::AttachmentReference depth_stencil_reference; + LocalVector resolve_references; + + MTLFmtCaps getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const; +}; + +struct API_AVAILABLE(macos(11.0), ios(14.0)) MDAttachment { +private: + uint32_t index = 0; + uint32_t firstUseSubpassIndex = 0; + uint32_t lastUseSubpassIndex = 0; + +public: + MTLPixelFormat format = MTLPixelFormatInvalid; + MDAttachmentType type = MDAttachmentType::None; + MTLLoadAction loadAction = MTLLoadActionDontCare; + MTLStoreAction storeAction = MTLStoreActionDontCare; + MTLLoadAction stencilLoadAction = MTLLoadActionDontCare; + MTLStoreAction stencilStoreAction = MTLStoreActionDontCare; + uint32_t samples = 1; + + /*! + * @brief Returns true if this attachment is first used in the given subpass. + * @param p_subpass + * @return + */ + _FORCE_INLINE_ bool isFirstUseOf(MDSubpass const &p_subpass) const { + return p_subpass.subpass_index == firstUseSubpassIndex; + } + + /*! + * @brief Returns true if this attachment is last used in the given subpass. + * @param p_subpass + * @return + */ + _FORCE_INLINE_ bool isLastUseOf(MDSubpass const &p_subpass) const { + return p_subpass.subpass_index == lastUseSubpassIndex; + } + + void linkToSubpass(MDRenderPass const &p_pass); + + MTLStoreAction getMTLStoreAction(MDSubpass const &p_subpass, + bool p_is_rendering_entire_area, + bool p_has_resolve, + bool p_can_resolve, + bool p_is_stencil) const; + bool configureDescriptor(MTLRenderPassAttachmentDescriptor *p_desc, + PixelFormats &p_pf, + MDSubpass const &p_subpass, + id p_attachment, + bool p_is_rendering_entire_area, + bool p_has_resolve, + bool p_can_resolve, + bool p_is_stencil) const; + /** Returns whether this attachment should be cleared in the subpass. */ + bool shouldClear(MDSubpass const &p_subpass, bool p_is_stencil) const; +}; + +class API_AVAILABLE(macos(11.0), ios(14.0)) MDRenderPass { +public: + Vector attachments; + Vector subpasses; + + uint32_t get_sample_count() const { + return attachments.is_empty() ? 1 : attachments[0].samples; + } + + MDRenderPass(Vector &p_attachments, Vector &p_subpasses); +}; + class API_AVAILABLE(macos(11.0), ios(14.0)) MDCommandBuffer { private: RenderingDeviceDriverMetal *device_driver = nullptr; @@ -220,8 +323,8 @@ private: void _render_set_dirty_state(); void _render_bind_uniform_sets(); - static void _populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView p_rects); - static uint32_t _populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size); + void _populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView p_rects); + uint32_t _populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size); void _end_render_pass(); void _render_clear_render_area(); @@ -268,34 +371,14 @@ public: // Bit mask of the uniform sets that are dirty, to prevent redundant binding. uint64_t uniform_set_mask = 0; - _FORCE_INLINE_ void reset() { - pass = nil; - frameBuffer = nil; - pipeline = nil; - current_subpass = UINT32_MAX; - render_area = {}; - is_rendering_entire_area = false; - desc = nil; - encoder = nil; - index_buffer = nil; - index_type = MTLIndexTypeUInt16; - dirty = DIRTY_NONE; - uniform_sets.clear(); - uniform_set_mask = 0; - clear_values.clear(); - viewports.clear(); - scissors.clear(); - blend_constants.reset(); - vertex_buffers.clear(); - vertex_offsets.clear(); - // Keep the keys, as they are likely to be used again. - for (KeyValue>> &kv : resource_usage) { - kv.value.clear(); - } - } - + _FORCE_INLINE_ void reset(); void end_encoding(); + _ALWAYS_INLINE_ const MDSubpass &get_subpass() const { + DEV_ASSERT(pass != nullptr); + return pass->subpasses[current_subpass]; + } + _FORCE_INLINE_ void mark_viewport_dirty() { if (viewports.is_empty()) { return; @@ -649,6 +732,7 @@ public: uint32_t size = 0; } frag; } push_constants; + bool needs_view_mask_buffer = false; MDLibrary *vert; MDLibrary *frag; @@ -659,7 +743,10 @@ public: void encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) final; - MDRenderShader(CharString p_name, Vector p_sets, MDLibrary *p_vert, MDLibrary *p_frag); + MDRenderShader(CharString p_name, + bool p_needs_view_mask_buffer, + Vector p_sets, + MDLibrary *p_vert, MDLibrary *p_frag); }; _FORCE_INLINE_ StageResourceUsage &operator|=(StageResourceUsage &p_a, uint32_t p_b) { @@ -702,96 +789,6 @@ public: BoundUniformSet &boundUniformSetForShader(MDShader *p_shader, id p_device); }; -enum class MDAttachmentType : uint8_t { - None = 0, - Color = 1 << 0, - Depth = 1 << 1, - Stencil = 1 << 2, -}; - -_FORCE_INLINE_ MDAttachmentType &operator|=(MDAttachmentType &p_a, MDAttachmentType p_b) { - flags::set(p_a, p_b); - return p_a; -} - -_FORCE_INLINE_ bool operator&(MDAttachmentType p_a, MDAttachmentType p_b) { - return uint8_t(p_a) & uint8_t(p_b); -} - -struct MDSubpass { - uint32_t subpass_index = 0; - LocalVector input_references; - LocalVector color_references; - RDD::AttachmentReference depth_stencil_reference; - LocalVector resolve_references; - - MTLFmtCaps getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const; -}; - -struct API_AVAILABLE(macos(11.0), ios(14.0)) MDAttachment { -private: - uint32_t index = 0; - uint32_t firstUseSubpassIndex = 0; - uint32_t lastUseSubpassIndex = 0; - -public: - MTLPixelFormat format = MTLPixelFormatInvalid; - MDAttachmentType type = MDAttachmentType::None; - MTLLoadAction loadAction = MTLLoadActionDontCare; - MTLStoreAction storeAction = MTLStoreActionDontCare; - MTLLoadAction stencilLoadAction = MTLLoadActionDontCare; - MTLStoreAction stencilStoreAction = MTLStoreActionDontCare; - uint32_t samples = 1; - - /*! - * @brief Returns true if this attachment is first used in the given subpass. - * @param p_subpass - * @return - */ - _FORCE_INLINE_ bool isFirstUseOf(MDSubpass const &p_subpass) const { - return p_subpass.subpass_index == firstUseSubpassIndex; - } - - /*! - * @brief Returns true if this attachment is last used in the given subpass. - * @param p_subpass - * @return - */ - _FORCE_INLINE_ bool isLastUseOf(MDSubpass const &p_subpass) const { - return p_subpass.subpass_index == lastUseSubpassIndex; - } - - void linkToSubpass(MDRenderPass const &p_pass); - - MTLStoreAction getMTLStoreAction(MDSubpass const &p_subpass, - bool p_is_rendering_entire_area, - bool p_has_resolve, - bool p_can_resolve, - bool p_is_stencil) const; - bool configureDescriptor(MTLRenderPassAttachmentDescriptor *p_desc, - PixelFormats &p_pf, - MDSubpass const &p_subpass, - id p_attachment, - bool p_is_rendering_entire_area, - bool p_has_resolve, - bool p_can_resolve, - bool p_is_stencil) const; - /** Returns whether this attachment should be cleared in the subpass. */ - bool shouldClear(MDSubpass const &p_subpass, bool p_is_stencil) const; -}; - -class API_AVAILABLE(macos(11.0), ios(14.0)) MDRenderPass { -public: - Vector attachments; - Vector subpasses; - - uint32_t get_sample_count() const { - return attachments.is_empty() ? 1 : attachments[0].samples; - } - - MDRenderPass(Vector &p_attachments, Vector &p_subpasses); -}; - class API_AVAILABLE(macos(11.0), ios(14.0)) MDPipeline { public: MDPipelineType type; @@ -892,13 +889,39 @@ public: }; class API_AVAILABLE(macos(11.0), ios(14.0)) MDFrameBuffer { -public: Vector textures; + +public: Size2i size; MDFrameBuffer(Vector p_textures, Size2i p_size) : textures(p_textures), size(p_size) {} MDFrameBuffer() {} + /// Returns the texture at the given index. + _ALWAYS_INLINE_ MTL::Texture get_texture(uint32_t p_idx) const { + return textures[p_idx]; + } + + /// Returns true if the texture at the given index is not nil. + _ALWAYS_INLINE_ bool has_texture(uint32_t p_idx) const { + return textures[p_idx] != nil; + } + + /// Set the texture at the given index. + _ALWAYS_INLINE_ void set_texture(uint32_t p_idx, MTL::Texture p_texture) { + textures.write[p_idx] = p_texture; + } + + /// Unset or nil the texture at the given index. + _ALWAYS_INLINE_ void unset_texture(uint32_t p_idx) { + textures.write[p_idx] = nil; + } + + /// Resizes buffers to the specified size. + _ALWAYS_INLINE_ void set_texture_count(uint32_t p_size) { + textures.resize(p_size); + } + virtual ~MDFrameBuffer() = default; }; diff --git a/drivers/metal/metal_objects.mm b/drivers/metal/metal_objects.mm index c3906af1596..11ab209d605 100644 --- a/drivers/metal/metal_objects.mm +++ b/drivers/metal/metal_objects.mm @@ -96,6 +96,9 @@ void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) { MDRenderPipeline *rp = (MDRenderPipeline *)p; if (render.encoder == nil) { + // This error would happen if the render pass failed. + ERR_FAIL_NULL_MSG(render.desc, "Render pass descriptor is null."); + // This condition occurs when there are no attachments when calling render_next_subpass() // and is due to the SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS flag. render.desc.defaultRasterSampleCount = static_cast(rp->sample_count); @@ -223,8 +226,9 @@ void MDCommandBuffer::render_bind_uniform_set(RDD::UniformSetID p_uniform_set, R void MDCommandBuffer::render_clear_attachments(VectorView p_attachment_clears, VectorView p_rects) { DEV_ASSERT(type == MDCommandBufferStateType::Render); - uint32_t vertex_count = p_rects.size() * 6; + const MDSubpass &subpass = render.get_subpass(); + uint32_t vertex_count = p_rects.size() * 6 * subpass.view_count; simd::float4 vertices[vertex_count]; simd::float4 clear_colors[ClearAttKey::ATTACHMENT_COUNT]; @@ -235,6 +239,9 @@ void MDCommandBuffer::render_clear_attachments(VectorView ClearAttKey key; key.sample_count = render.pass->get_sample_count(); + if (subpass.view_count > 1) { + key.enable_layered_rendering(); + } float depth_value = 0; uint32_t stencil_value = 0; @@ -245,7 +252,7 @@ void MDCommandBuffer::render_clear_attachments(VectorView if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_COLOR_BIT)) { attachment_index = attClear.color_attachment; } else { - attachment_index = render.pass->subpasses[render.current_subpass].depth_stencil_reference.attachment; + attachment_index = subpass.depth_stencil_reference.attachment; } MDAttachment const &mda = render.pass->attachments[attachment_index]; @@ -310,6 +317,13 @@ void MDCommandBuffer::render_clear_attachments(VectorView void MDCommandBuffer::_render_set_dirty_state() { _render_bind_uniform_sets(); + MDSubpass const &subpass = render.get_subpass(); + if (subpass.view_count > 1) { + uint32_t view_range[2] = { 0, subpass.view_count }; + [render.encoder setVertexBytes:view_range length:sizeof(view_range) atIndex:VIEW_MASK_BUFFER_INDEX]; + [render.encoder setFragmentBytes:view_range length:sizeof(view_range) atIndex:VIEW_MASK_BUFFER_INDEX]; + } + if (render.dirty.has_flag(RenderState::DIRTY_PIPELINE)) { [render.encoder setRenderPipelineState:render.pipeline->state]; } @@ -492,36 +506,40 @@ uint32_t MDCommandBuffer::_populate_vertices(simd::float4 *p_vertices, uint32_t simd::float4 vtx; uint32_t idx = p_index; - vtx.z = 0.0; - vtx.w = (float)1; + uint32_t endLayer = render.get_subpass().view_count; - // Top left vertex - First triangle. - vtx.y = topPos; - vtx.x = leftPos; - p_vertices[idx++] = vtx; + for (uint32_t layer = 0; layer < endLayer; layer++) { + vtx.z = 0.0; + vtx.w = (float)layer; - // Bottom left vertex. - vtx.y = bottomPos; - vtx.x = leftPos; - p_vertices[idx++] = vtx; + // Top left vertex - First triangle. + vtx.y = topPos; + vtx.x = leftPos; + p_vertices[idx++] = vtx; - // Bottom right vertex. - vtx.y = bottomPos; - vtx.x = rightPos; - p_vertices[idx++] = vtx; + // Bottom left vertex. + vtx.y = bottomPos; + vtx.x = leftPos; + p_vertices[idx++] = vtx; - // Bottom right vertex - Second triangle. - p_vertices[idx++] = vtx; + // Bottom right vertex. + vtx.y = bottomPos; + vtx.x = rightPos; + p_vertices[idx++] = vtx; - // Top right vertex. - vtx.y = topPos; - vtx.x = rightPos; - p_vertices[idx++] = vtx; + // Bottom right vertex - Second triangle. + p_vertices[idx++] = vtx; - // Top left vertex. - vtx.y = topPos; - vtx.x = leftPos; - p_vertices[idx++] = vtx; + // Top right vertex. + vtx.y = topPos; + vtx.x = rightPos; + p_vertices[idx++] = vtx; + + // Top left vertex. + vtx.y = topPos; + vtx.x = leftPos; + p_vertices[idx++] = vtx; + } return idx; } @@ -548,8 +566,7 @@ void MDCommandBuffer::render_begin_pass(RDD::RenderPassID p_render_pass, RDD::Fr void MDCommandBuffer::_end_render_pass() { MDFrameBuffer const &fb_info = *render.frameBuffer; - MDRenderPass const &pass_info = *render.pass; - MDSubpass const &subpass = pass_info.subpasses[render.current_subpass]; + MDSubpass const &subpass = render.get_subpass(); PixelFormats &pf = device_driver->get_pixel_formats(); @@ -557,11 +574,11 @@ void MDCommandBuffer::_end_render_pass() { uint32_t color_index = subpass.color_references[i].attachment; uint32_t resolve_index = subpass.resolve_references[i].attachment; DEV_ASSERT((color_index == RDD::AttachmentReference::UNUSED) == (resolve_index == RDD::AttachmentReference::UNUSED)); - if (color_index == RDD::AttachmentReference::UNUSED || !fb_info.textures[color_index]) { + if (color_index == RDD::AttachmentReference::UNUSED || !fb_info.has_texture(color_index)) { continue; } - id resolve_tex = fb_info.textures[resolve_index]; + id resolve_tex = fb_info.get_texture(resolve_index); CRASH_COND_MSG(!flags::all(pf.getCapabilities(resolve_tex.pixelFormat), kMTLFmtCapsResolve), "not implemented: unresolvable texture types"); // see: https://github.com/KhronosGroup/MoltenVK/blob/d20d13fe2735adb845636a81522df1b9d89c0fba/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm#L407 @@ -572,7 +589,7 @@ void MDCommandBuffer::_end_render_pass() { void MDCommandBuffer::_render_clear_render_area() { MDRenderPass const &pass = *render.pass; - MDSubpass const &subpass = pass.subpasses[render.current_subpass]; + MDSubpass const &subpass = render.get_subpass(); // First determine attachments that should be cleared. LocalVector clears; @@ -619,9 +636,14 @@ void MDCommandBuffer::render_next_subpass() { MDFrameBuffer const &fb = *render.frameBuffer; MDRenderPass const &pass = *render.pass; - MDSubpass const &subpass = pass.subpasses[render.current_subpass]; + MDSubpass const &subpass = render.get_subpass(); MTLRenderPassDescriptor *desc = MTLRenderPassDescriptor.renderPassDescriptor; + + if (subpass.view_count > 1) { + desc.renderTargetArrayLength = subpass.view_count; + } + PixelFormats &pf = device_driver->get_pixel_formats(); uint32_t attachmentCount = 0; @@ -638,7 +660,7 @@ void MDCommandBuffer::render_next_subpass() { bool has_resolve = resolveIdx != RDD::AttachmentReference::UNUSED; bool can_resolve = true; if (resolveIdx != RDD::AttachmentReference::UNUSED) { - id resolve_tex = fb.textures[resolveIdx]; + id resolve_tex = fb.get_texture(resolveIdx); can_resolve = flags::all(pf.getCapabilities(resolve_tex.pixelFormat), kMTLFmtCapsResolve); if (can_resolve) { ca.resolveTexture = resolve_tex; @@ -649,7 +671,9 @@ void MDCommandBuffer::render_next_subpass() { MDAttachment const &attachment = pass.attachments[idx]; - id tex = fb.textures[idx]; + id tex = fb.get_texture(idx); + ERR_FAIL_NULL_MSG(tex, "Frame buffer color texture is null."); + if ((attachment.type & MDAttachmentType::Color)) { if (attachment.configureDescriptor(ca, pf, subpass, tex, render.is_rendering_entire_area, has_resolve, can_resolve, false)) { Color clearColor = render.clear_values[idx].color; @@ -662,7 +686,8 @@ void MDCommandBuffer::render_next_subpass() { attachmentCount += 1; uint32_t idx = subpass.depth_stencil_reference.attachment; MDAttachment const &attachment = pass.attachments[idx]; - id tex = fb.textures[idx]; + id tex = fb.get_texture(idx); + ERR_FAIL_NULL_MSG(tex, "Frame buffer depth / stencil texture is null."); if (attachment.type & MDAttachmentType::Depth) { MTLRenderPassDepthAttachmentDescriptor *da = desc.depthAttachment; if (attachment.configureDescriptor(da, pf, subpass, tex, render.is_rendering_entire_area, false, false, false)) { @@ -702,8 +727,15 @@ void MDCommandBuffer::render_draw(uint32_t p_vertex_count, uint32_t p_base_vertex, uint32_t p_first_instance) { DEV_ASSERT(type == MDCommandBufferStateType::Render); + ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer."); + _render_set_dirty_state(); + MDSubpass const &subpass = render.get_subpass(); + if (subpass.view_count > 1) { + p_instance_count *= subpass.view_count; + } + DEV_ASSERT(render.dirty == 0); id enc = render.encoder; @@ -751,8 +783,15 @@ void MDCommandBuffer::render_draw_indexed(uint32_t p_index_count, int32_t p_vertex_offset, uint32_t p_first_instance) { DEV_ASSERT(type == MDCommandBufferStateType::Render); + ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer."); + _render_set_dirty_state(); + MDSubpass const &subpass = render.get_subpass(); + if (subpass.view_count > 1) { + p_instance_count *= subpass.view_count; + } + id enc = render.encoder; uint32_t index_offset = render.index_offset; @@ -770,6 +809,8 @@ void MDCommandBuffer::render_draw_indexed(uint32_t p_index_count, void MDCommandBuffer::render_draw_indexed_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { DEV_ASSERT(type == MDCommandBufferStateType::Render); + ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer."); + _render_set_dirty_state(); id enc = render.encoder; @@ -794,6 +835,8 @@ void MDCommandBuffer::render_draw_indexed_indirect_count(RDD::BufferID p_indirec void MDCommandBuffer::render_draw_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { DEV_ASSERT(type == MDCommandBufferStateType::Render); + ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer."); + _render_set_dirty_state(); id enc = render.encoder; @@ -813,6 +856,42 @@ void MDCommandBuffer::render_draw_indirect_count(RDD::BufferID p_indirect_buffer ERR_FAIL_MSG("not implemented"); } +void MDCommandBuffer::render_end_pass() { + DEV_ASSERT(type == MDCommandBufferStateType::Render); + + render.end_encoding(); + render.reset(); + type = MDCommandBufferStateType::None; +} + +#pragma mark - RenderState + +void MDCommandBuffer::RenderState::reset() { + pass = nil; + frameBuffer = nil; + pipeline = nil; + current_subpass = UINT32_MAX; + render_area = {}; + is_rendering_entire_area = false; + desc = nil; + encoder = nil; + index_buffer = nil; + index_type = MTLIndexTypeUInt16; + dirty = DIRTY_NONE; + uniform_sets.clear(); + uniform_set_mask = 0; + clear_values.clear(); + viewports.clear(); + scissors.clear(); + blend_constants.reset(); + vertex_buffers.clear(); + vertex_offsets.clear(); + // Keep the keys, as they are likely to be used again. + for (KeyValue>> &kv : resource_usage) { + kv.value.clear(); + } +} + void MDCommandBuffer::RenderState::end_encoding() { if (encoder == nil) { return; @@ -842,6 +921,8 @@ void MDCommandBuffer::RenderState::end_encoding() { encoder = nil; } +#pragma mark - ComputeState + void MDCommandBuffer::ComputeState::end_encoding() { if (encoder == nil) { return; @@ -862,14 +943,6 @@ void MDCommandBuffer::ComputeState::end_encoding() { encoder = nil; } -void MDCommandBuffer::render_end_pass() { - DEV_ASSERT(type == MDCommandBufferStateType::Render); - - render.end_encoding(); - render.reset(); - type = MDCommandBufferStateType::None; -} - #pragma mark - Compute void MDCommandBuffer::compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) { @@ -943,8 +1016,11 @@ void MDComputeShader::encode_push_constant_data(VectorView p_data, MDC [enc setBytes:ptr length:length atIndex:push_constants.binding]; } -MDRenderShader::MDRenderShader(CharString p_name, Vector p_sets, MDLibrary *_Nonnull p_vert, MDLibrary *_Nonnull p_frag) : - MDShader(p_name, p_sets), vert(p_vert), frag(p_frag) { +MDRenderShader::MDRenderShader(CharString p_name, + bool p_needs_view_mask_buffer, + Vector p_sets, + MDLibrary *_Nonnull p_vert, MDLibrary *_Nonnull p_frag) : + MDShader(p_name, p_sets), needs_view_mask_buffer(p_needs_view_mask_buffer), vert(p_vert), frag(p_frag) { } void MDRenderShader::encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) { @@ -1279,7 +1355,7 @@ typedef struct { typedef struct { float4 v_position [[position]]; - uint layer; + uint layer%s; } VaryingsPos; vertex VaryingsPos vertClear(AttributesPos attributes [[stage_in]], constant ClearColorsIn& ccIn [[buffer(0)]]) { @@ -1288,7 +1364,7 @@ vertex VaryingsPos vertClear(AttributesPos attributes [[stage_in]], constant Cle varyings.layer = uint(attributes.a_position.w); return varyings; } -)", ClearAttKey::DEPTH_INDEX]; +)", p_key.is_layered_rendering_enabled() ? " [[render_target_array_index]]" : "", ClearAttKey::DEPTH_INDEX]; return new_func(msl, @"vertClear", nil); } @@ -1578,7 +1654,7 @@ void ShaderCacheEntry::notify_free() const { self->_library = library; self->_error = error; if (error) { - ERR_PRINT(String(U"Error compiling shader %s: %s").format(entry->name.get_data(), error.localizedDescription.UTF8String)); + ERR_PRINT(vformat(U"Error compiling shader %s: %s", entry->name.get_data(), error.localizedDescription.UTF8String)); } { diff --git a/drivers/metal/rendering_context_driver_metal.mm b/drivers/metal/rendering_context_driver_metal.mm index b97b5863528..cf8c7e1c833 100644 --- a/drivers/metal/rendering_context_driver_metal.mm +++ b/drivers/metal/rendering_context_driver_metal.mm @@ -134,7 +134,7 @@ public: frame_buffers.resize(p_desired_framebuffer_count); for (uint32_t i = 0; i < p_desired_framebuffer_count; i++) { // Reserve space for the drawable texture. - frame_buffers[i].textures.resize(1); + frame_buffers[i].set_texture_count(1); } return OK; @@ -154,7 +154,7 @@ public: id drawable = layer.nextDrawable; ERR_FAIL_NULL_V_MSG(drawable, RDD::FramebufferID(), "no drawable available"); drawables[rear] = drawable; - frame_buffer.textures.write[0] = drawable.texture; + frame_buffer.set_texture(0, drawable.texture); return RDD::FramebufferID(&frame_buffer); } @@ -165,7 +165,7 @@ public: } // Release texture and drawable. - frame_buffers[front].textures.write[0] = nil; + frame_buffers[front].unset_texture(0); id drawable = drawables[front]; drawables[front] = nil; diff --git a/drivers/metal/rendering_device_driver_metal.h b/drivers/metal/rendering_device_driver_metal.h index f62a164ef9f..e238de958ea 100644 --- a/drivers/metal/rendering_device_driver_metal.h +++ b/drivers/metal/rendering_device_driver_metal.h @@ -239,7 +239,13 @@ private: friend struct PushConstantData; private: - Error _reflect_spirv16(VectorView p_spirv, ShaderReflection &r_reflection); + /// Contains additional metadata about the shader. + struct ShaderMeta { + /// Indicates whether the shader uses multiview. + bool has_multiview = false; + }; + + Error _reflect_spirv16(VectorView p_spirv, ShaderReflection &r_reflection, ShaderMeta &r_shader_meta); public: virtual String shader_get_binary_cache_key() override final; diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index 4da11ecd219..d90f528a146 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -1026,7 +1026,7 @@ void RenderingDeviceDriverMetal::framebuffer_free(FramebufferID p_framebuffer) { #pragma mark - Shader -const uint32_t SHADER_BINARY_VERSION = 1; +const uint32_t SHADER_BINARY_VERSION = 2; // region Serialization @@ -1503,6 +1503,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData { uint32_t fragment_output_mask = UINT32_MAX; uint32_t spirv_specialization_constants_ids_mask = UINT32_MAX; uint32_t is_compute = UINT32_MAX; + uint32_t needs_view_mask_buffer = UINT32_MAX; ComputeSize compute_local_size; PushConstantData push_constant; LocalVector stages; @@ -1523,6 +1524,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData { size += sizeof(uint32_t); // fragment_output_mask size += sizeof(uint32_t); // spirv_specialization_constants_ids_mask size += sizeof(uint32_t); // is_compute + size += sizeof(uint32_t); // needs_view_mask_buffer size += compute_local_size.serialize_size(); // compute_local_size size += push_constant.serialize_size(); // push_constant size += sizeof(uint32_t); // stages.size() @@ -1547,6 +1549,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData { p_writer.write(fragment_output_mask); p_writer.write(spirv_specialization_constants_ids_mask); p_writer.write(is_compute); + p_writer.write(needs_view_mask_buffer); p_writer.write(compute_local_size); p_writer.write(push_constant); p_writer.write(VectorView(stages)); @@ -1561,6 +1564,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData { p_reader.read(fragment_output_mask); p_reader.read(spirv_specialization_constants_ids_mask); p_reader.read(is_compute); + p_reader.read(needs_view_mask_buffer); p_reader.read(compute_local_size); p_reader.read(push_constant); p_reader.read(stages); @@ -1572,14 +1576,16 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData { // endregion String RenderingDeviceDriverMetal::shader_get_binary_cache_key() { - return "Metal-SV" + uitos(SHADER_BINARY_VERSION); + static const String cache_key = "Metal-SV" + uitos(SHADER_BINARY_VERSION); + return cache_key; } -Error RenderingDeviceDriverMetal::_reflect_spirv16(VectorView p_spirv, ShaderReflection &r_reflection) { +Error RenderingDeviceDriverMetal::_reflect_spirv16(VectorView p_spirv, ShaderReflection &r_reflection, ShaderMeta &r_shader_meta) { using namespace spirv_cross; using spirv_cross::Resource; r_reflection = {}; + r_shader_meta = {}; for (uint32_t i = 0; i < p_spirv.size(); i++) { ShaderStageSPIRVData const &v = p_spirv[i]; @@ -1811,6 +1817,20 @@ Error RenderingDeviceDriverMetal::_reflect_spirv16(VectorView RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec using spirv_cross::Resource; ShaderReflection spirv_data; - ERR_FAIL_COND_V(_reflect_spirv16(p_spirv, spirv_data), Result()); + ShaderMeta shader_meta; + ERR_FAIL_COND_V(_reflect_spirv16(p_spirv, spirv_data, shader_meta), Result()); ShaderBinaryData bin_data{}; if (!p_shader_name.is_empty()) { @@ -1893,6 +1914,7 @@ Vector RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec bin_data.is_compute = spirv_data.is_compute; bin_data.push_constant.size = spirv_data.push_constant_size; bin_data.push_constant.stages = (ShaderStageUsage)(uint8_t)spirv_data.push_constant_stages; + bin_data.needs_view_mask_buffer = shader_meta.has_multiview ? 1 : 0; for (uint32_t i = 0; i < spirv_data.uniform_sets.size(); i++) { const ::Vector &spirv_set = spirv_data.uniform_sets[i]; @@ -1947,6 +1969,11 @@ Vector RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec msl_options.pad_fragment_output_components = true; msl_options.r32ui_alignment_constant_id = R32UI_ALIGNMENT_CONSTANT_ID; msl_options.agx_manual_cube_grad_fixup = true; + if (shader_meta.has_multiview) { + msl_options.multiview = true; + msl_options.multiview_layered_rendering = true; + msl_options.view_mask_buffer_index = VIEW_MASK_BUFFER_INDEX; + } CompilerGLSL::Options options{}; options.vertex.flip_vert_y = true; @@ -2448,7 +2475,7 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect #endif shader = cs; } else { - MDRenderShader *rs = new MDRenderShader(binary_data.shader_name, uniform_sets, libraries[ShaderStage::SHADER_STAGE_VERTEX], libraries[ShaderStage::SHADER_STAGE_FRAGMENT]); + MDRenderShader *rs = new MDRenderShader(binary_data.shader_name, (bool)binary_data.needs_view_mask_buffer, uniform_sets, libraries[ShaderStage::SHADER_STAGE_VERTEX], libraries[ShaderStage::SHADER_STAGE_FRAGMENT]); uint32_t *vert_binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_VERTEX); if (vert_binding) { @@ -2956,6 +2983,7 @@ RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorViewstate; + } + case DRIVER_RESOURCE_RENDER_PIPELINE: { + MDRenderPipeline *pipeline = (MDRenderPipeline *)(p_driver_id.id); + return (uint64_t)(uintptr_t)(__bridge void *)pipeline->state; + } default: { return 0; } @@ -3842,7 +3874,7 @@ uint64_t RenderingDeviceDriverMetal::api_trait_get(ApiTrait p_trait) { bool RenderingDeviceDriverMetal::has_feature(Features p_feature) { switch (p_feature) { case SUPPORTS_MULTIVIEW: - return false; + return multiview_capabilities.is_supported; case SUPPORTS_FSR_HALF_FLOAT: return true; case SUPPORTS_ATTACHMENT_VRS: @@ -3951,6 +3983,18 @@ Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p metal_device_properties = memnew(MetalDeviceProperties(device)); pixel_formats = memnew(PixelFormats(device)); + if (metal_device_properties->features.layeredRendering) { + multiview_capabilities.is_supported = true; + multiview_capabilities.max_view_count = metal_device_properties->limits.maxViewports; + // NOTE: I'm not sure what the limit is as I don't see it referenced anywhere + multiview_capabilities.max_instance_count = UINT32_MAX; + + print_verbose("- Metal multiview supported:"); + print_verbose(" max view count: " + itos(multiview_capabilities.max_view_count)); + print_verbose(" max instances: " + itos(multiview_capabilities.max_instance_count)); + } else { + print_verbose("- Metal multiview not supported"); + } // Check required features and abort if any of them is missing. if (!metal_device_properties->features.imageCubeArray) {