diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 026999b34abd..c0e476fcd1fa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1401,10 +1401,10 @@ static int get_ringsize(struct i915_gem_context *ctx, return 0; } -static int -user_to_context_sseu(struct drm_i915_private *i915, - const struct drm_i915_gem_context_param_sseu *user, - struct intel_sseu *context) +int +i915_gem_user_to_context_sseu(struct drm_i915_private *i915, + const struct drm_i915_gem_context_param_sseu *user, + struct intel_sseu *context) { const struct sseu_dev_info *device = &RUNTIME_INFO(i915)->sseu; @@ -1539,7 +1539,7 @@ static int set_sseu(struct i915_gem_context *ctx, goto out_ce; } - ret = user_to_context_sseu(i915, &user_sseu, &sseu); + ret = i915_gem_user_to_context_sseu(i915, &user_sseu, &sseu); if (ret) goto out_ce; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index f1d884d304bd..3702b2fb27ab 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -225,4 +225,8 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it); struct i915_lut_handle *i915_lut_handle_alloc(void); void i915_lut_handle_free(struct i915_lut_handle *lut); +int i915_gem_user_to_context_sseu(struct drm_i915_private *i915, + const struct drm_i915_gem_context_param_sseu *user, + struct intel_sseu *context); + #endif /* !__I915_GEM_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 74f793423231..d173271c7397 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -65,7 +65,6 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, { const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; bool subslice_pg = sseu->has_subslice_pg; - struct intel_sseu ctx_sseu; u8 slices, subslices; u32 rpcs = 0; @@ -78,31 +77,13 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, /* * If i915/perf is active, we want a stable powergating configuration - * on the system. - * - * We could choose full enablement, but on ICL we know there are use - * cases which disable slices for functional, apart for performance - * reasons. So in this case we select a known stable subset. + * on the system. Use the configuration pinned by i915/perf. */ - if (!i915->perf.exclusive_stream) { - ctx_sseu = *req_sseu; - } else { - ctx_sseu = intel_sseu_from_device_info(sseu); + if (i915->perf.exclusive_stream) + req_sseu = &i915->perf.sseu; - if (IS_GEN(i915, 11)) { - /* - * We only need subslice count so it doesn't matter - * which ones we select - just turn off low bits in the - * amount of half of all available subslices per slice. - */ - ctx_sseu.subslice_mask = - ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2)); - ctx_sseu.slice_mask = 0x1; - } - } - - slices = hweight8(ctx_sseu.slice_mask); - subslices = hweight8(ctx_sseu.subslice_mask); + slices = hweight8(req_sseu->slice_mask); + subslices = hweight8(req_sseu->subslice_mask); /* * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits @@ -175,13 +156,13 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, if (sseu->has_eu_pg) { u32 val; - val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; + val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); val &= GEN8_RPCS_EU_MIN_MASK; rpcs |= val; - val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; + val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); val &= GEN8_RPCS_EU_MAX_MASK; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 86c6abaa3e0e..4611f72cd036 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -344,6 +344,10 @@ static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = { * @oa_periodic: Whether to enable periodic OA unit sampling * @oa_period_exponent: The OA unit sampling period is derived from this * @engine: The engine (typically rcs0) being monitored by the OA unit + * @has_sseu: Whether @sseu was specified by userspace + * @sseu: internal SSEU configuration computed either from the userspace + * specified configuration in the opening parameters or a default value + * (see get_default_sseu_config()) * * As read_properties_unlocked() enumerates and validates the properties given * to open a stream of metrics the configuration is built up in the structure @@ -363,6 +367,9 @@ struct perf_open_properties { int oa_period_exponent; struct intel_engine_cs *engine; + + bool has_sseu; + struct intel_sseu sseu; }; struct i915_oa_config_bo { @@ -2720,6 +2727,39 @@ static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream) return 0; } +static void +get_default_sseu_config(struct intel_sseu *out_sseu, + struct intel_engine_cs *engine) +{ + const struct sseu_dev_info *devinfo_sseu = + &RUNTIME_INFO(engine->i915)->sseu; + + *out_sseu = intel_sseu_from_device_info(devinfo_sseu); + + if (IS_GEN(engine->i915, 11)) { + /* + * We only need subslice count so it doesn't matter which ones + * we select - just turn off low bits in the amount of half of + * all available subslices per slice. + */ + out_sseu->subslice_mask = + ~(~0 << (hweight8(out_sseu->subslice_mask) / 2)); + out_sseu->slice_mask = 0x1; + } +} + +static int +get_sseu_config(struct intel_sseu *out_sseu, + struct intel_engine_cs *engine, + const struct drm_i915_gem_context_param_sseu *drm_sseu) +{ + if (drm_sseu->engine.engine_class != engine->uabi_class || + drm_sseu->engine.engine_instance != engine->uabi_instance) + return -EINVAL; + + return i915_gem_user_to_context_sseu(engine->i915, drm_sseu, out_sseu); +} + /** * i915_oa_stream_init - validate combined props for OA stream and init * @stream: An i915 perf stream @@ -2852,6 +2892,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, goto err_oa_buf_alloc; stream->ops = &i915_oa_stream_ops; + + perf->sseu = props->sseu; WRITE_ONCE(perf->exclusive_stream, stream); ret = i915_perf_stream_enable_sync(stream); @@ -3397,6 +3439,14 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, privileged_op = true; } + /* + * Asking for SSEU configuration is a priviliged operation. + */ + if (props->has_sseu) + privileged_op = true; + else + get_default_sseu_config(&props->sseu, props->engine); + /* Similar to perf's kernel.perf_paranoid_cpu sysctl option * we check a dev.i915.perf_stream_paranoid sysctl option * to determine if it's ok to access system wide OA counters @@ -3492,6 +3542,7 @@ static int read_properties_unlocked(struct i915_perf *perf, { u64 __user *uprop = uprops; u32 i; + int ret; memset(props, 0, sizeof(struct perf_open_properties)); @@ -3523,7 +3574,6 @@ static int read_properties_unlocked(struct i915_perf *perf, for (i = 0; i < n_props; i++) { u64 oa_period, oa_freq_hz; u64 id, value; - int ret; ret = get_user(id, uprop); if (ret) @@ -3609,6 +3659,24 @@ static int read_properties_unlocked(struct i915_perf *perf, case DRM_I915_PERF_PROP_HOLD_PREEMPTION: props->hold_preemption = !!value; break; + case DRM_I915_PERF_PROP_GLOBAL_SSEU: { + struct drm_i915_gem_context_param_sseu user_sseu; + + if (copy_from_user(&user_sseu, + u64_to_user_ptr(value), + sizeof(user_sseu))) { + DRM_DEBUG("Unable to copy global sseu parameter\n"); + return -EFAULT; + } + + ret = get_sseu_config(&props->sseu, props->engine, &user_sseu); + if (ret) { + DRM_DEBUG("Invalid SSEU configuration\n"); + return ret; + } + props->has_sseu = true; + break; + } case DRM_I915_PERF_PROP_MAX: MISSING_CASE(id); return -EINVAL; @@ -4382,8 +4450,12 @@ int i915_perf_ioctl_version(void) * preemption on a particular context so that performance data is * accessible from a delta of MI_RPC reports without looking at the * OA buffer. + * + * 4: Add DRM_I915_PERF_PROP_ALLOWED_SSEU to limit what contexts can + * be run for the duration of the performance recording based on + * their SSEU configuration. */ - return 3; + return 4; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index f4ccd2adfee6..32289cbda648 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -16,6 +16,7 @@ #include #include +#include "gt/intel_sseu.h" #include "i915_reg.h" #include "intel_wakeref.h" @@ -407,6 +408,12 @@ struct i915_perf { */ struct i915_perf_stream *exclusive_stream; + /** + * @sseu: sseu configuration selected to run while perf is active, + * applies to all contexts. + */ + struct intel_sseu sseu; + /** * For rate limiting any notifications of spurious * invalid OA reports diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2813e579b480..db649d03ab52 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1969,6 +1969,17 @@ enum drm_i915_perf_property_id { */ DRM_I915_PERF_PROP_HOLD_PREEMPTION, + /** + * Specifying this pins all contexts to the specified SSEU power + * configuration for the duration of the recording. + * + * This parameter's value is a pointer to a struct + * drm_i915_gem_context_param_sseu. + * + * This property is available in perf revision 4. + */ + DRM_I915_PERF_PROP_GLOBAL_SSEU, + DRM_I915_PERF_PROP_MAX /* non-ABI */ };