Merge pull request #98803 from stuartcarnie/metal_multiview

Metal: Multiview support
This commit is contained in:
Thaddeus Crews 2024-11-10 12:13:02 -06:00
commit 246e8e9ecc
No known key found for this signature in database
GPG Key ID: 62181B86FE9E5D84
5 changed files with 337 additions and 188 deletions

View File

@ -82,6 +82,9 @@ MTL_CLASS(Texture)
} //namespace MTL
/// Metal buffer index for the view mask when rendering multi-view.
const uint32_t VIEW_MASK_BUFFER_INDEX = 24;
enum ShaderStageUsage : uint32_t {
None = 0,
Vertex = RDD::SHADER_STAGE_VERTEX_BIT,
@ -142,6 +145,12 @@ struct ClearAttKey {
const static uint32_t STENCIL_INDEX = DEPTH_INDEX + 1;
const static uint32_t ATTACHMENT_COUNT = STENCIL_INDEX + 1;
enum Flags : uint16_t {
CLEAR_FLAGS_NONE = 0,
CLEAR_FLAGS_LAYERED = 1 << 0,
};
Flags flags = CLEAR_FLAGS_NONE;
uint16_t sample_count = 0;
uint16_t pixel_formats[ATTACHMENT_COUNT] = { 0 };
@ -150,19 +159,22 @@ struct ClearAttKey {
_FORCE_INLINE_ void set_stencil_format(MTLPixelFormat p_fmt) { pixel_formats[STENCIL_INDEX] = p_fmt; }
_FORCE_INLINE_ MTLPixelFormat depth_format() const { return (MTLPixelFormat)pixel_formats[DEPTH_INDEX]; }
_FORCE_INLINE_ MTLPixelFormat stencil_format() const { return (MTLPixelFormat)pixel_formats[STENCIL_INDEX]; }
_FORCE_INLINE_ void enable_layered_rendering() { flags::set(flags, CLEAR_FLAGS_LAYERED); }
_FORCE_INLINE_ bool is_enabled(uint32_t p_idx) const { return pixel_formats[p_idx] != 0; }
_FORCE_INLINE_ bool is_depth_enabled() const { return pixel_formats[DEPTH_INDEX] != 0; }
_FORCE_INLINE_ bool is_stencil_enabled() const { return pixel_formats[STENCIL_INDEX] != 0; }
_FORCE_INLINE_ bool is_layered_rendering_enabled() const { return flags::any(flags, CLEAR_FLAGS_LAYERED); }
_FORCE_INLINE_ bool operator==(const ClearAttKey &p_rhs) const {
return memcmp(this, &p_rhs, sizeof(ClearAttKey)) == 0;
}
uint32_t hash() const {
uint32_t h = hash_murmur3_one_32(sample_count);
uint32_t h = hash_murmur3_one_32(flags);
h = hash_murmur3_one_32(sample_count, h);
h = hash_murmur3_buffer(pixel_formats, ATTACHMENT_COUNT * sizeof(pixel_formats[0]), h);
return h;
return hash_fmix32(h);
}
};
@ -206,6 +218,97 @@ public:
~MDResourceCache() = default;
};
enum class MDAttachmentType : uint8_t {
None = 0,
Color = 1 << 0,
Depth = 1 << 1,
Stencil = 1 << 2,
};
_FORCE_INLINE_ MDAttachmentType &operator|=(MDAttachmentType &p_a, MDAttachmentType p_b) {
flags::set(p_a, p_b);
return p_a;
}
_FORCE_INLINE_ bool operator&(MDAttachmentType p_a, MDAttachmentType p_b) {
return uint8_t(p_a) & uint8_t(p_b);
}
struct MDSubpass {
uint32_t subpass_index = 0;
uint32_t view_count = 0;
LocalVector<RDD::AttachmentReference> input_references;
LocalVector<RDD::AttachmentReference> color_references;
RDD::AttachmentReference depth_stencil_reference;
LocalVector<RDD::AttachmentReference> resolve_references;
MTLFmtCaps getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const;
};
struct API_AVAILABLE(macos(11.0), ios(14.0)) MDAttachment {
private:
uint32_t index = 0;
uint32_t firstUseSubpassIndex = 0;
uint32_t lastUseSubpassIndex = 0;
public:
MTLPixelFormat format = MTLPixelFormatInvalid;
MDAttachmentType type = MDAttachmentType::None;
MTLLoadAction loadAction = MTLLoadActionDontCare;
MTLStoreAction storeAction = MTLStoreActionDontCare;
MTLLoadAction stencilLoadAction = MTLLoadActionDontCare;
MTLStoreAction stencilStoreAction = MTLStoreActionDontCare;
uint32_t samples = 1;
/*!
* @brief Returns true if this attachment is first used in the given subpass.
* @param p_subpass
* @return
*/
_FORCE_INLINE_ bool isFirstUseOf(MDSubpass const &p_subpass) const {
return p_subpass.subpass_index == firstUseSubpassIndex;
}
/*!
* @brief Returns true if this attachment is last used in the given subpass.
* @param p_subpass
* @return
*/
_FORCE_INLINE_ bool isLastUseOf(MDSubpass const &p_subpass) const {
return p_subpass.subpass_index == lastUseSubpassIndex;
}
void linkToSubpass(MDRenderPass const &p_pass);
MTLStoreAction getMTLStoreAction(MDSubpass const &p_subpass,
bool p_is_rendering_entire_area,
bool p_has_resolve,
bool p_can_resolve,
bool p_is_stencil) const;
bool configureDescriptor(MTLRenderPassAttachmentDescriptor *p_desc,
PixelFormats &p_pf,
MDSubpass const &p_subpass,
id<MTLTexture> p_attachment,
bool p_is_rendering_entire_area,
bool p_has_resolve,
bool p_can_resolve,
bool p_is_stencil) const;
/** Returns whether this attachment should be cleared in the subpass. */
bool shouldClear(MDSubpass const &p_subpass, bool p_is_stencil) const;
};
class API_AVAILABLE(macos(11.0), ios(14.0)) MDRenderPass {
public:
Vector<MDAttachment> attachments;
Vector<MDSubpass> subpasses;
uint32_t get_sample_count() const {
return attachments.is_empty() ? 1 : attachments[0].samples;
}
MDRenderPass(Vector<MDAttachment> &p_attachments, Vector<MDSubpass> &p_subpasses);
};
class API_AVAILABLE(macos(11.0), ios(14.0)) MDCommandBuffer {
private:
RenderingDeviceDriverMetal *device_driver = nullptr;
@ -220,8 +323,8 @@ private:
void _render_set_dirty_state();
void _render_bind_uniform_sets();
static void _populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView<Rect2i> p_rects);
static uint32_t _populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size);
void _populate_vertices(simd::float4 *p_vertices, Size2i p_fb_size, VectorView<Rect2i> p_rects);
uint32_t _populate_vertices(simd::float4 *p_vertices, uint32_t p_index, Rect2i const &p_rect, Size2i p_fb_size);
void _end_render_pass();
void _render_clear_render_area();
@ -268,34 +371,14 @@ public:
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
uint64_t uniform_set_mask = 0;
_FORCE_INLINE_ void reset() {
pass = nil;
frameBuffer = nil;
pipeline = nil;
current_subpass = UINT32_MAX;
render_area = {};
is_rendering_entire_area = false;
desc = nil;
encoder = nil;
index_buffer = nil;
index_type = MTLIndexTypeUInt16;
dirty = DIRTY_NONE;
uniform_sets.clear();
uniform_set_mask = 0;
clear_values.clear();
viewports.clear();
scissors.clear();
blend_constants.reset();
vertex_buffers.clear();
vertex_offsets.clear();
// Keep the keys, as they are likely to be used again.
for (KeyValue<StageResourceUsage, LocalVector<__unsafe_unretained id<MTLResource>>> &kv : resource_usage) {
kv.value.clear();
}
}
_FORCE_INLINE_ void reset();
void end_encoding();
_ALWAYS_INLINE_ const MDSubpass &get_subpass() const {
DEV_ASSERT(pass != nullptr);
return pass->subpasses[current_subpass];
}
_FORCE_INLINE_ void mark_viewport_dirty() {
if (viewports.is_empty()) {
return;
@ -649,6 +732,7 @@ public:
uint32_t size = 0;
} frag;
} push_constants;
bool needs_view_mask_buffer = false;
MDLibrary *vert;
MDLibrary *frag;
@ -659,7 +743,10 @@ public:
void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
MDRenderShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *p_vert, MDLibrary *p_frag);
MDRenderShader(CharString p_name,
bool p_needs_view_mask_buffer,
Vector<UniformSet> p_sets,
MDLibrary *p_vert, MDLibrary *p_frag);
};
_FORCE_INLINE_ StageResourceUsage &operator|=(StageResourceUsage &p_a, uint32_t p_b) {
@ -702,96 +789,6 @@ public:
BoundUniformSet &boundUniformSetForShader(MDShader *p_shader, id<MTLDevice> p_device);
};
enum class MDAttachmentType : uint8_t {
None = 0,
Color = 1 << 0,
Depth = 1 << 1,
Stencil = 1 << 2,
};
_FORCE_INLINE_ MDAttachmentType &operator|=(MDAttachmentType &p_a, MDAttachmentType p_b) {
flags::set(p_a, p_b);
return p_a;
}
_FORCE_INLINE_ bool operator&(MDAttachmentType p_a, MDAttachmentType p_b) {
return uint8_t(p_a) & uint8_t(p_b);
}
struct MDSubpass {
uint32_t subpass_index = 0;
LocalVector<RDD::AttachmentReference> input_references;
LocalVector<RDD::AttachmentReference> color_references;
RDD::AttachmentReference depth_stencil_reference;
LocalVector<RDD::AttachmentReference> resolve_references;
MTLFmtCaps getRequiredFmtCapsForAttachmentAt(uint32_t p_index) const;
};
struct API_AVAILABLE(macos(11.0), ios(14.0)) MDAttachment {
private:
uint32_t index = 0;
uint32_t firstUseSubpassIndex = 0;
uint32_t lastUseSubpassIndex = 0;
public:
MTLPixelFormat format = MTLPixelFormatInvalid;
MDAttachmentType type = MDAttachmentType::None;
MTLLoadAction loadAction = MTLLoadActionDontCare;
MTLStoreAction storeAction = MTLStoreActionDontCare;
MTLLoadAction stencilLoadAction = MTLLoadActionDontCare;
MTLStoreAction stencilStoreAction = MTLStoreActionDontCare;
uint32_t samples = 1;
/*!
* @brief Returns true if this attachment is first used in the given subpass.
* @param p_subpass
* @return
*/
_FORCE_INLINE_ bool isFirstUseOf(MDSubpass const &p_subpass) const {
return p_subpass.subpass_index == firstUseSubpassIndex;
}
/*!
* @brief Returns true if this attachment is last used in the given subpass.
* @param p_subpass
* @return
*/
_FORCE_INLINE_ bool isLastUseOf(MDSubpass const &p_subpass) const {
return p_subpass.subpass_index == lastUseSubpassIndex;
}
void linkToSubpass(MDRenderPass const &p_pass);
MTLStoreAction getMTLStoreAction(MDSubpass const &p_subpass,
bool p_is_rendering_entire_area,
bool p_has_resolve,
bool p_can_resolve,
bool p_is_stencil) const;
bool configureDescriptor(MTLRenderPassAttachmentDescriptor *p_desc,
PixelFormats &p_pf,
MDSubpass const &p_subpass,
id<MTLTexture> p_attachment,
bool p_is_rendering_entire_area,
bool p_has_resolve,
bool p_can_resolve,
bool p_is_stencil) const;
/** Returns whether this attachment should be cleared in the subpass. */
bool shouldClear(MDSubpass const &p_subpass, bool p_is_stencil) const;
};
class API_AVAILABLE(macos(11.0), ios(14.0)) MDRenderPass {
public:
Vector<MDAttachment> attachments;
Vector<MDSubpass> subpasses;
uint32_t get_sample_count() const {
return attachments.is_empty() ? 1 : attachments[0].samples;
}
MDRenderPass(Vector<MDAttachment> &p_attachments, Vector<MDSubpass> &p_subpasses);
};
class API_AVAILABLE(macos(11.0), ios(14.0)) MDPipeline {
public:
MDPipelineType type;
@ -892,13 +889,39 @@ public:
};
class API_AVAILABLE(macos(11.0), ios(14.0)) MDFrameBuffer {
public:
Vector<MTL::Texture> textures;
public:
Size2i size;
MDFrameBuffer(Vector<MTL::Texture> p_textures, Size2i p_size) :
textures(p_textures), size(p_size) {}
MDFrameBuffer() {}
/// Returns the texture at the given index.
_ALWAYS_INLINE_ MTL::Texture get_texture(uint32_t p_idx) const {
return textures[p_idx];
}
/// Returns true if the texture at the given index is not nil.
_ALWAYS_INLINE_ bool has_texture(uint32_t p_idx) const {
return textures[p_idx] != nil;
}
/// Set the texture at the given index.
_ALWAYS_INLINE_ void set_texture(uint32_t p_idx, MTL::Texture p_texture) {
textures.write[p_idx] = p_texture;
}
/// Unset or nil the texture at the given index.
_ALWAYS_INLINE_ void unset_texture(uint32_t p_idx) {
textures.write[p_idx] = nil;
}
/// Resizes buffers to the specified size.
_ALWAYS_INLINE_ void set_texture_count(uint32_t p_size) {
textures.resize(p_size);
}
virtual ~MDFrameBuffer() = default;
};

View File

@ -96,6 +96,9 @@ void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
MDRenderPipeline *rp = (MDRenderPipeline *)p;
if (render.encoder == nil) {
// This error would happen if the render pass failed.
ERR_FAIL_NULL_MSG(render.desc, "Render pass descriptor is null.");
// This condition occurs when there are no attachments when calling render_next_subpass()
// and is due to the SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS flag.
render.desc.defaultRasterSampleCount = static_cast<NSUInteger>(rp->sample_count);
@ -223,8 +226,9 @@ void MDCommandBuffer::render_bind_uniform_set(RDD::UniformSetID p_uniform_set, R
void MDCommandBuffer::render_clear_attachments(VectorView<RDD::AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects) {
DEV_ASSERT(type == MDCommandBufferStateType::Render);
uint32_t vertex_count = p_rects.size() * 6;
const MDSubpass &subpass = render.get_subpass();
uint32_t vertex_count = p_rects.size() * 6 * subpass.view_count;
simd::float4 vertices[vertex_count];
simd::float4 clear_colors[ClearAttKey::ATTACHMENT_COUNT];
@ -235,6 +239,9 @@ void MDCommandBuffer::render_clear_attachments(VectorView<RDD::AttachmentClear>
ClearAttKey key;
key.sample_count = render.pass->get_sample_count();
if (subpass.view_count > 1) {
key.enable_layered_rendering();
}
float depth_value = 0;
uint32_t stencil_value = 0;
@ -245,7 +252,7 @@ void MDCommandBuffer::render_clear_attachments(VectorView<RDD::AttachmentClear>
if (attClear.aspect.has_flag(RDD::TEXTURE_ASPECT_COLOR_BIT)) {
attachment_index = attClear.color_attachment;
} else {
attachment_index = render.pass->subpasses[render.current_subpass].depth_stencil_reference.attachment;
attachment_index = subpass.depth_stencil_reference.attachment;
}
MDAttachment const &mda = render.pass->attachments[attachment_index];
@ -310,6 +317,13 @@ void MDCommandBuffer::render_clear_attachments(VectorView<RDD::AttachmentClear>
void MDCommandBuffer::_render_set_dirty_state() {
_render_bind_uniform_sets();
MDSubpass const &subpass = render.get_subpass();
if (subpass.view_count > 1) {
uint32_t view_range[2] = { 0, subpass.view_count };
[render.encoder setVertexBytes:view_range length:sizeof(view_range) atIndex:VIEW_MASK_BUFFER_INDEX];
[render.encoder setFragmentBytes:view_range length:sizeof(view_range) atIndex:VIEW_MASK_BUFFER_INDEX];
}
if (render.dirty.has_flag(RenderState::DIRTY_PIPELINE)) {
[render.encoder setRenderPipelineState:render.pipeline->state];
}
@ -492,36 +506,40 @@ uint32_t MDCommandBuffer::_populate_vertices(simd::float4 *p_vertices, uint32_t
simd::float4 vtx;
uint32_t idx = p_index;
vtx.z = 0.0;
vtx.w = (float)1;
uint32_t endLayer = render.get_subpass().view_count;
// Top left vertex - First triangle.
vtx.y = topPos;
vtx.x = leftPos;
p_vertices[idx++] = vtx;
for (uint32_t layer = 0; layer < endLayer; layer++) {
vtx.z = 0.0;
vtx.w = (float)layer;
// Bottom left vertex.
vtx.y = bottomPos;
vtx.x = leftPos;
p_vertices[idx++] = vtx;
// Top left vertex - First triangle.
vtx.y = topPos;
vtx.x = leftPos;
p_vertices[idx++] = vtx;
// Bottom right vertex.
vtx.y = bottomPos;
vtx.x = rightPos;
p_vertices[idx++] = vtx;
// Bottom left vertex.
vtx.y = bottomPos;
vtx.x = leftPos;
p_vertices[idx++] = vtx;
// Bottom right vertex - Second triangle.
p_vertices[idx++] = vtx;
// Bottom right vertex.
vtx.y = bottomPos;
vtx.x = rightPos;
p_vertices[idx++] = vtx;
// Top right vertex.
vtx.y = topPos;
vtx.x = rightPos;
p_vertices[idx++] = vtx;
// Bottom right vertex - Second triangle.
p_vertices[idx++] = vtx;
// Top left vertex.
vtx.y = topPos;
vtx.x = leftPos;
p_vertices[idx++] = vtx;
// Top right vertex.
vtx.y = topPos;
vtx.x = rightPos;
p_vertices[idx++] = vtx;
// Top left vertex.
vtx.y = topPos;
vtx.x = leftPos;
p_vertices[idx++] = vtx;
}
return idx;
}
@ -548,8 +566,7 @@ void MDCommandBuffer::render_begin_pass(RDD::RenderPassID p_render_pass, RDD::Fr
void MDCommandBuffer::_end_render_pass() {
MDFrameBuffer const &fb_info = *render.frameBuffer;
MDRenderPass const &pass_info = *render.pass;
MDSubpass const &subpass = pass_info.subpasses[render.current_subpass];
MDSubpass const &subpass = render.get_subpass();
PixelFormats &pf = device_driver->get_pixel_formats();
@ -557,11 +574,11 @@ void MDCommandBuffer::_end_render_pass() {
uint32_t color_index = subpass.color_references[i].attachment;
uint32_t resolve_index = subpass.resolve_references[i].attachment;
DEV_ASSERT((color_index == RDD::AttachmentReference::UNUSED) == (resolve_index == RDD::AttachmentReference::UNUSED));
if (color_index == RDD::AttachmentReference::UNUSED || !fb_info.textures[color_index]) {
if (color_index == RDD::AttachmentReference::UNUSED || !fb_info.has_texture(color_index)) {
continue;
}
id<MTLTexture> resolve_tex = fb_info.textures[resolve_index];
id<MTLTexture> resolve_tex = fb_info.get_texture(resolve_index);
CRASH_COND_MSG(!flags::all(pf.getCapabilities(resolve_tex.pixelFormat), kMTLFmtCapsResolve), "not implemented: unresolvable texture types");
// see: https://github.com/KhronosGroup/MoltenVK/blob/d20d13fe2735adb845636a81522df1b9d89c0fba/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm#L407
@ -572,7 +589,7 @@ void MDCommandBuffer::_end_render_pass() {
void MDCommandBuffer::_render_clear_render_area() {
MDRenderPass const &pass = *render.pass;
MDSubpass const &subpass = pass.subpasses[render.current_subpass];
MDSubpass const &subpass = render.get_subpass();
// First determine attachments that should be cleared.
LocalVector<RDD::AttachmentClear> clears;
@ -619,9 +636,14 @@ void MDCommandBuffer::render_next_subpass() {
MDFrameBuffer const &fb = *render.frameBuffer;
MDRenderPass const &pass = *render.pass;
MDSubpass const &subpass = pass.subpasses[render.current_subpass];
MDSubpass const &subpass = render.get_subpass();
MTLRenderPassDescriptor *desc = MTLRenderPassDescriptor.renderPassDescriptor;
if (subpass.view_count > 1) {
desc.renderTargetArrayLength = subpass.view_count;
}
PixelFormats &pf = device_driver->get_pixel_formats();
uint32_t attachmentCount = 0;
@ -638,7 +660,7 @@ void MDCommandBuffer::render_next_subpass() {
bool has_resolve = resolveIdx != RDD::AttachmentReference::UNUSED;
bool can_resolve = true;
if (resolveIdx != RDD::AttachmentReference::UNUSED) {
id<MTLTexture> resolve_tex = fb.textures[resolveIdx];
id<MTLTexture> resolve_tex = fb.get_texture(resolveIdx);
can_resolve = flags::all(pf.getCapabilities(resolve_tex.pixelFormat), kMTLFmtCapsResolve);
if (can_resolve) {
ca.resolveTexture = resolve_tex;
@ -649,7 +671,9 @@ void MDCommandBuffer::render_next_subpass() {
MDAttachment const &attachment = pass.attachments[idx];
id<MTLTexture> tex = fb.textures[idx];
id<MTLTexture> tex = fb.get_texture(idx);
ERR_FAIL_NULL_MSG(tex, "Frame buffer color texture is null.");
if ((attachment.type & MDAttachmentType::Color)) {
if (attachment.configureDescriptor(ca, pf, subpass, tex, render.is_rendering_entire_area, has_resolve, can_resolve, false)) {
Color clearColor = render.clear_values[idx].color;
@ -662,7 +686,8 @@ void MDCommandBuffer::render_next_subpass() {
attachmentCount += 1;
uint32_t idx = subpass.depth_stencil_reference.attachment;
MDAttachment const &attachment = pass.attachments[idx];
id<MTLTexture> tex = fb.textures[idx];
id<MTLTexture> tex = fb.get_texture(idx);
ERR_FAIL_NULL_MSG(tex, "Frame buffer depth / stencil texture is null.");
if (attachment.type & MDAttachmentType::Depth) {
MTLRenderPassDepthAttachmentDescriptor *da = desc.depthAttachment;
if (attachment.configureDescriptor(da, pf, subpass, tex, render.is_rendering_entire_area, false, false, false)) {
@ -702,8 +727,15 @@ void MDCommandBuffer::render_draw(uint32_t p_vertex_count,
uint32_t p_base_vertex,
uint32_t p_first_instance) {
DEV_ASSERT(type == MDCommandBufferStateType::Render);
ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer.");
_render_set_dirty_state();
MDSubpass const &subpass = render.get_subpass();
if (subpass.view_count > 1) {
p_instance_count *= subpass.view_count;
}
DEV_ASSERT(render.dirty == 0);
id<MTLRenderCommandEncoder> enc = render.encoder;
@ -751,8 +783,15 @@ void MDCommandBuffer::render_draw_indexed(uint32_t p_index_count,
int32_t p_vertex_offset,
uint32_t p_first_instance) {
DEV_ASSERT(type == MDCommandBufferStateType::Render);
ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer.");
_render_set_dirty_state();
MDSubpass const &subpass = render.get_subpass();
if (subpass.view_count > 1) {
p_instance_count *= subpass.view_count;
}
id<MTLRenderCommandEncoder> enc = render.encoder;
uint32_t index_offset = render.index_offset;
@ -770,6 +809,8 @@ void MDCommandBuffer::render_draw_indexed(uint32_t p_index_count,
void MDCommandBuffer::render_draw_indexed_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {
DEV_ASSERT(type == MDCommandBufferStateType::Render);
ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer.");
_render_set_dirty_state();
id<MTLRenderCommandEncoder> enc = render.encoder;
@ -794,6 +835,8 @@ void MDCommandBuffer::render_draw_indexed_indirect_count(RDD::BufferID p_indirec
void MDCommandBuffer::render_draw_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {
DEV_ASSERT(type == MDCommandBufferStateType::Render);
ERR_FAIL_NULL_MSG(render.pipeline, "No pipeline set for render command buffer.");
_render_set_dirty_state();
id<MTLRenderCommandEncoder> enc = render.encoder;
@ -813,6 +856,42 @@ void MDCommandBuffer::render_draw_indirect_count(RDD::BufferID p_indirect_buffer
ERR_FAIL_MSG("not implemented");
}
void MDCommandBuffer::render_end_pass() {
DEV_ASSERT(type == MDCommandBufferStateType::Render);
render.end_encoding();
render.reset();
type = MDCommandBufferStateType::None;
}
#pragma mark - RenderState
void MDCommandBuffer::RenderState::reset() {
pass = nil;
frameBuffer = nil;
pipeline = nil;
current_subpass = UINT32_MAX;
render_area = {};
is_rendering_entire_area = false;
desc = nil;
encoder = nil;
index_buffer = nil;
index_type = MTLIndexTypeUInt16;
dirty = DIRTY_NONE;
uniform_sets.clear();
uniform_set_mask = 0;
clear_values.clear();
viewports.clear();
scissors.clear();
blend_constants.reset();
vertex_buffers.clear();
vertex_offsets.clear();
// Keep the keys, as they are likely to be used again.
for (KeyValue<StageResourceUsage, LocalVector<__unsafe_unretained id<MTLResource>>> &kv : resource_usage) {
kv.value.clear();
}
}
void MDCommandBuffer::RenderState::end_encoding() {
if (encoder == nil) {
return;
@ -842,6 +921,8 @@ void MDCommandBuffer::RenderState::end_encoding() {
encoder = nil;
}
#pragma mark - ComputeState
void MDCommandBuffer::ComputeState::end_encoding() {
if (encoder == nil) {
return;
@ -862,14 +943,6 @@ void MDCommandBuffer::ComputeState::end_encoding() {
encoder = nil;
}
void MDCommandBuffer::render_end_pass() {
DEV_ASSERT(type == MDCommandBufferStateType::Render);
render.end_encoding();
render.reset();
type = MDCommandBufferStateType::None;
}
#pragma mark - Compute
void MDCommandBuffer::compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) {
@ -943,8 +1016,11 @@ void MDComputeShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDC
[enc setBytes:ptr length:length atIndex:push_constants.binding];
}
MDRenderShader::MDRenderShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *_Nonnull p_vert, MDLibrary *_Nonnull p_frag) :
MDShader(p_name, p_sets), vert(p_vert), frag(p_frag) {
MDRenderShader::MDRenderShader(CharString p_name,
bool p_needs_view_mask_buffer,
Vector<UniformSet> p_sets,
MDLibrary *_Nonnull p_vert, MDLibrary *_Nonnull p_frag) :
MDShader(p_name, p_sets), needs_view_mask_buffer(p_needs_view_mask_buffer), vert(p_vert), frag(p_frag) {
}
void MDRenderShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) {
@ -1279,7 +1355,7 @@ typedef struct {
typedef struct {
float4 v_position [[position]];
uint layer;
uint layer%s;
} VaryingsPos;
vertex VaryingsPos vertClear(AttributesPos attributes [[stage_in]], constant ClearColorsIn& ccIn [[buffer(0)]]) {
@ -1288,7 +1364,7 @@ vertex VaryingsPos vertClear(AttributesPos attributes [[stage_in]], constant Cle
varyings.layer = uint(attributes.a_position.w);
return varyings;
}
)", ClearAttKey::DEPTH_INDEX];
)", p_key.is_layered_rendering_enabled() ? " [[render_target_array_index]]" : "", ClearAttKey::DEPTH_INDEX];
return new_func(msl, @"vertClear", nil);
}
@ -1578,7 +1654,7 @@ void ShaderCacheEntry::notify_free() const {
self->_library = library;
self->_error = error;
if (error) {
ERR_PRINT(String(U"Error compiling shader %s: %s").format(entry->name.get_data(), error.localizedDescription.UTF8String));
ERR_PRINT(vformat(U"Error compiling shader %s: %s", entry->name.get_data(), error.localizedDescription.UTF8String));
}
{

View File

@ -134,7 +134,7 @@ public:
frame_buffers.resize(p_desired_framebuffer_count);
for (uint32_t i = 0; i < p_desired_framebuffer_count; i++) {
// Reserve space for the drawable texture.
frame_buffers[i].textures.resize(1);
frame_buffers[i].set_texture_count(1);
}
return OK;
@ -154,7 +154,7 @@ public:
id<CAMetalDrawable> drawable = layer.nextDrawable;
ERR_FAIL_NULL_V_MSG(drawable, RDD::FramebufferID(), "no drawable available");
drawables[rear] = drawable;
frame_buffer.textures.write[0] = drawable.texture;
frame_buffer.set_texture(0, drawable.texture);
return RDD::FramebufferID(&frame_buffer);
}
@ -165,7 +165,7 @@ public:
}
// Release texture and drawable.
frame_buffers[front].textures.write[0] = nil;
frame_buffers[front].unset_texture(0);
id<MTLDrawable> drawable = drawables[front];
drawables[front] = nil;

View File

@ -239,7 +239,13 @@ private:
friend struct PushConstantData;
private:
Error _reflect_spirv16(VectorView<ShaderStageSPIRVData> p_spirv, ShaderReflection &r_reflection);
/// Contains additional metadata about the shader.
struct ShaderMeta {
/// Indicates whether the shader uses multiview.
bool has_multiview = false;
};
Error _reflect_spirv16(VectorView<ShaderStageSPIRVData> p_spirv, ShaderReflection &r_reflection, ShaderMeta &r_shader_meta);
public:
virtual String shader_get_binary_cache_key() override final;

View File

@ -1026,7 +1026,7 @@ void RenderingDeviceDriverMetal::framebuffer_free(FramebufferID p_framebuffer) {
#pragma mark - Shader
const uint32_t SHADER_BINARY_VERSION = 1;
const uint32_t SHADER_BINARY_VERSION = 2;
// region Serialization
@ -1503,6 +1503,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData {
uint32_t fragment_output_mask = UINT32_MAX;
uint32_t spirv_specialization_constants_ids_mask = UINT32_MAX;
uint32_t is_compute = UINT32_MAX;
uint32_t needs_view_mask_buffer = UINT32_MAX;
ComputeSize compute_local_size;
PushConstantData push_constant;
LocalVector<ShaderStageData> stages;
@ -1523,6 +1524,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData {
size += sizeof(uint32_t); // fragment_output_mask
size += sizeof(uint32_t); // spirv_specialization_constants_ids_mask
size += sizeof(uint32_t); // is_compute
size += sizeof(uint32_t); // needs_view_mask_buffer
size += compute_local_size.serialize_size(); // compute_local_size
size += push_constant.serialize_size(); // push_constant
size += sizeof(uint32_t); // stages.size()
@ -1547,6 +1549,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData {
p_writer.write(fragment_output_mask);
p_writer.write(spirv_specialization_constants_ids_mask);
p_writer.write(is_compute);
p_writer.write(needs_view_mask_buffer);
p_writer.write(compute_local_size);
p_writer.write(push_constant);
p_writer.write(VectorView(stages));
@ -1561,6 +1564,7 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData {
p_reader.read(fragment_output_mask);
p_reader.read(spirv_specialization_constants_ids_mask);
p_reader.read(is_compute);
p_reader.read(needs_view_mask_buffer);
p_reader.read(compute_local_size);
p_reader.read(push_constant);
p_reader.read(stages);
@ -1572,14 +1576,16 @@ struct API_AVAILABLE(macos(11.0), ios(14.0)) ShaderBinaryData {
// endregion
String RenderingDeviceDriverMetal::shader_get_binary_cache_key() {
return "Metal-SV" + uitos(SHADER_BINARY_VERSION);
static const String cache_key = "Metal-SV" + uitos(SHADER_BINARY_VERSION);
return cache_key;
}
Error RenderingDeviceDriverMetal::_reflect_spirv16(VectorView<ShaderStageSPIRVData> p_spirv, ShaderReflection &r_reflection) {
Error RenderingDeviceDriverMetal::_reflect_spirv16(VectorView<ShaderStageSPIRVData> p_spirv, ShaderReflection &r_reflection, ShaderMeta &r_shader_meta) {
using namespace spirv_cross;
using spirv_cross::Resource;
r_reflection = {};
r_shader_meta = {};
for (uint32_t i = 0; i < p_spirv.size(); i++) {
ShaderStageSPIRVData const &v = p_spirv[i];
@ -1811,6 +1817,20 @@ Error RenderingDeviceDriverMetal::_reflect_spirv16(VectorView<ShaderStageSPIRVDa
}
}
for (const BuiltInResource &res : resources.builtin_inputs) {
if (res.builtin == spv::BuiltInViewIndex || res.builtin == spv::BuiltInViewportIndex) {
r_shader_meta.has_multiview = true;
}
}
if (!r_shader_meta.has_multiview) {
for (const BuiltInResource &res : resources.builtin_outputs) {
if (res.builtin == spv::BuiltInViewIndex || res.builtin == spv::BuiltInViewportIndex) {
r_shader_meta.has_multiview = true;
}
}
}
// Specialization constants.
for (SpecializationConstant const &constant : compiler.get_specialization_constants()) {
int32_t existing = -1;
@ -1874,7 +1894,8 @@ Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec
using spirv_cross::Resource;
ShaderReflection spirv_data;
ERR_FAIL_COND_V(_reflect_spirv16(p_spirv, spirv_data), Result());
ShaderMeta shader_meta;
ERR_FAIL_COND_V(_reflect_spirv16(p_spirv, spirv_data, shader_meta), Result());
ShaderBinaryData bin_data{};
if (!p_shader_name.is_empty()) {
@ -1893,6 +1914,7 @@ Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec
bin_data.is_compute = spirv_data.is_compute;
bin_data.push_constant.size = spirv_data.push_constant_size;
bin_data.push_constant.stages = (ShaderStageUsage)(uint8_t)spirv_data.push_constant_stages;
bin_data.needs_view_mask_buffer = shader_meta.has_multiview ? 1 : 0;
for (uint32_t i = 0; i < spirv_data.uniform_sets.size(); i++) {
const ::Vector<ShaderUniform> &spirv_set = spirv_data.uniform_sets[i];
@ -1947,6 +1969,11 @@ Vector<uint8_t> RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(Vec
msl_options.pad_fragment_output_components = true;
msl_options.r32ui_alignment_constant_id = R32UI_ALIGNMENT_CONSTANT_ID;
msl_options.agx_manual_cube_grad_fixup = true;
if (shader_meta.has_multiview) {
msl_options.multiview = true;
msl_options.multiview_layered_rendering = true;
msl_options.view_mask_buffer_index = VIEW_MASK_BUFFER_INDEX;
}
CompilerGLSL::Options options{};
options.vertex.flip_vert_y = true;
@ -2448,7 +2475,7 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect
#endif
shader = cs;
} else {
MDRenderShader *rs = new MDRenderShader(binary_data.shader_name, uniform_sets, libraries[ShaderStage::SHADER_STAGE_VERTEX], libraries[ShaderStage::SHADER_STAGE_FRAGMENT]);
MDRenderShader *rs = new MDRenderShader(binary_data.shader_name, (bool)binary_data.needs_view_mask_buffer, uniform_sets, libraries[ShaderStage::SHADER_STAGE_VERTEX], libraries[ShaderStage::SHADER_STAGE_FRAGMENT]);
uint32_t *vert_binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_VERTEX);
if (vert_binding) {
@ -2956,6 +2983,7 @@ RDD::RenderPassID RenderingDeviceDriverMetal::render_pass_create(VectorView<Atta
for (uint32_t i = 0; i < subpass_count; i++) {
MDSubpass &subpass = subpasses.write[i];
subpass.subpass_index = i;
subpass.view_count = p_view_count;
subpass.input_references = p_subpasses[i].input_references;
subpass.color_references = p_subpasses[i].color_references;
subpass.depth_stencil_reference = p_subpasses[i].depth_stencil_reference;
@ -3675,8 +3703,7 @@ void RenderingDeviceDriverMetal::set_object_name(ObjectType p_type, ID p_driver_
uint64_t RenderingDeviceDriverMetal::get_resource_native_handle(DriverResource p_type, ID p_driver_id) {
switch (p_type) {
case DRIVER_RESOURCE_LOGICAL_DEVICE: {
uintptr_t devicePtr = (uintptr_t)(__bridge void *)device;
return (uint64_t)devicePtr;
return (uint64_t)(uintptr_t)(__bridge void *)device;
}
case DRIVER_RESOURCE_PHYSICAL_DEVICE: {
return 0;
@ -3685,7 +3712,7 @@ uint64_t RenderingDeviceDriverMetal::get_resource_native_handle(DriverResource p
return 0;
}
case DRIVER_RESOURCE_COMMAND_QUEUE: {
return 0;
return (uint64_t)(uintptr_t)(__bridge void *)device_queue;
}
case DRIVER_RESOURCE_QUEUE_FAMILY: {
return 0;
@ -3702,15 +3729,20 @@ uint64_t RenderingDeviceDriverMetal::get_resource_native_handle(DriverResource p
case DRIVER_RESOURCE_SAMPLER: {
return p_driver_id.id;
}
case DRIVER_RESOURCE_UNIFORM_SET:
case DRIVER_RESOURCE_UNIFORM_SET: {
return 0;
}
case DRIVER_RESOURCE_BUFFER: {
return p_driver_id.id;
}
case DRIVER_RESOURCE_COMPUTE_PIPELINE:
return 0;
case DRIVER_RESOURCE_RENDER_PIPELINE:
return 0;
case DRIVER_RESOURCE_COMPUTE_PIPELINE: {
MDComputePipeline *pipeline = (MDComputePipeline *)(p_driver_id.id);
return (uint64_t)(uintptr_t)(__bridge void *)pipeline->state;
}
case DRIVER_RESOURCE_RENDER_PIPELINE: {
MDRenderPipeline *pipeline = (MDRenderPipeline *)(p_driver_id.id);
return (uint64_t)(uintptr_t)(__bridge void *)pipeline->state;
}
default: {
return 0;
}
@ -3842,7 +3874,7 @@ uint64_t RenderingDeviceDriverMetal::api_trait_get(ApiTrait p_trait) {
bool RenderingDeviceDriverMetal::has_feature(Features p_feature) {
switch (p_feature) {
case SUPPORTS_MULTIVIEW:
return false;
return multiview_capabilities.is_supported;
case SUPPORTS_FSR_HALF_FLOAT:
return true;
case SUPPORTS_ATTACHMENT_VRS:
@ -3951,6 +3983,18 @@ Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p
metal_device_properties = memnew(MetalDeviceProperties(device));
pixel_formats = memnew(PixelFormats(device));
if (metal_device_properties->features.layeredRendering) {
multiview_capabilities.is_supported = true;
multiview_capabilities.max_view_count = metal_device_properties->limits.maxViewports;
// NOTE: I'm not sure what the limit is as I don't see it referenced anywhere
multiview_capabilities.max_instance_count = UINT32_MAX;
print_verbose("- Metal multiview supported:");
print_verbose(" max view count: " + itos(multiview_capabilities.max_view_count));
print_verbose(" max instances: " + itos(multiview_capabilities.max_instance_count));
} else {
print_verbose("- Metal multiview not supported");
}
// Check required features and abort if any of them is missing.
if (!metal_device_properties->features.imageCubeArray) {