HW resources are divided across the active CCS engines at the compute slice level, with each CCS having priority on one of the cslices. If a compute slice has no enabled DSS, its paired compute engine is not usable in full parallel execution because the other ones already fully saturate the HW, so consider it fused off. v2 (José): - moved it to its own function - fixed definition of ccs_mask v3 (Matt): - Replace fls() condition with a simple IP version test v4 (Matt): - Don't try to calculate a ccs_mask using intel_slicemask_from_dssmask() until we've determined that we're running on an Xe_HP platform where the logic makes sense (and won't overflow). Cc: Stuart Summers <stuart.summers@intel.com> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> Cc: Ashutosh Dixit <ashutosh.dixit@intel.com> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Signed-off-by: Stuart Summers <stuart.summers@intel.com> Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Reviewed-by: Matt Roper <matthew.d.roper@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220302052008.1884985-1-matthew.d.roper@intel.com
125 lines
3.3 KiB
C
125 lines
3.3 KiB
C
/* SPDX-License-Identifier: MIT */
|
|
/*
|
|
* Copyright © 2019 Intel Corporation
|
|
*/
|
|
|
|
#ifndef __INTEL_SSEU_H__
|
|
#define __INTEL_SSEU_H__
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/kernel.h>
|
|
|
|
#include "i915_gem.h"
|
|
|
|
struct drm_i915_private;
|
|
struct intel_gt;
|
|
struct drm_printer;
|
|
|
|
#define GEN_MAX_SLICES (3) /* SKL upper bound */
|
|
#define GEN_MAX_SUBSLICES (32) /* XEHPSDV upper bound */
|
|
#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
|
|
#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
|
|
#define GEN_MAX_EUS (16) /* TGL upper bound */
|
|
#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
|
|
|
|
#define GEN_DSS_PER_GSLICE 4
|
|
#define GEN_DSS_PER_CSLICE 8
|
|
#define GEN_DSS_PER_MSLICE 8
|
|
|
|
#define GEN_MAX_GSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_GSLICE)
|
|
#define GEN_MAX_CSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_CSLICE)
|
|
|
|
struct sseu_dev_info {
|
|
u8 slice_mask;
|
|
u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
|
|
u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
|
|
u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
|
|
u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
|
|
u16 eu_total;
|
|
u8 eu_per_subslice;
|
|
u8 min_eu_in_pool;
|
|
/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
|
|
u8 subslice_7eu[3];
|
|
u8 has_slice_pg:1;
|
|
u8 has_subslice_pg:1;
|
|
u8 has_eu_pg:1;
|
|
|
|
/* Topology fields */
|
|
u8 max_slices;
|
|
u8 max_subslices;
|
|
u8 max_eus_per_subslice;
|
|
|
|
u8 ss_stride;
|
|
u8 eu_stride;
|
|
};
|
|
|
|
/*
|
|
* Powergating configuration for a particular (context,engine).
|
|
*/
|
|
struct intel_sseu {
|
|
u8 slice_mask;
|
|
u8 subslice_mask;
|
|
u8 min_eus_per_subslice;
|
|
u8 max_eus_per_subslice;
|
|
};
|
|
|
|
static inline struct intel_sseu
|
|
intel_sseu_from_device_info(const struct sseu_dev_info *sseu)
|
|
{
|
|
struct intel_sseu value = {
|
|
.slice_mask = sseu->slice_mask,
|
|
.subslice_mask = sseu->subslice_mask[0],
|
|
.min_eus_per_subslice = sseu->max_eus_per_subslice,
|
|
.max_eus_per_subslice = sseu->max_eus_per_subslice,
|
|
};
|
|
|
|
return value;
|
|
}
|
|
|
|
static inline bool
|
|
intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice,
|
|
int subslice)
|
|
{
|
|
u8 mask;
|
|
int ss_idx = subslice / BITS_PER_BYTE;
|
|
|
|
if (slice >= sseu->max_slices ||
|
|
subslice >= sseu->max_subslices)
|
|
return false;
|
|
|
|
GEM_BUG_ON(ss_idx >= sseu->ss_stride);
|
|
|
|
mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx];
|
|
|
|
return mask & BIT(subslice % BITS_PER_BYTE);
|
|
}
|
|
|
|
void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
|
|
u8 max_subslices, u8 max_eus_per_subslice);
|
|
|
|
unsigned int
|
|
intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
|
|
|
|
unsigned int
|
|
intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
|
|
|
|
u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
|
|
|
|
u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu);
|
|
|
|
void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
|
|
u8 *subslice_mask, u32 ss_mask);
|
|
|
|
void intel_sseu_info_init(struct intel_gt *gt);
|
|
|
|
u32 intel_sseu_make_rpcs(struct intel_gt *gt,
|
|
const struct intel_sseu *req_sseu);
|
|
|
|
void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p);
|
|
void intel_sseu_print_topology(const struct sseu_dev_info *sseu,
|
|
struct drm_printer *p);
|
|
|
|
u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice);
|
|
|
|
#endif /* __INTEL_SSEU_H__ */
|