fb5cfcaa2e
UAPI Changes: - Deprecate I915_PMU_LAST and optimize state tracking (Tvrtko) Avoid relying on last item ABI marker in i915_drm.h, add a comment to mark as deprecated. Cross-subsystem Changes: Core Changes: Driver Changes: - Restore clear residuals security mitigations for Ivybridge and Baytrail (Chris) - Close #1858: Allow sysadmin to choose applied GPU security mitigations through i915.mitigations=... similar to CPU (Chris) - Fix for #2024: GPU hangs on HSW GT1 (Chris) - Fix for #2707: Driver hang when editing UVs in Blender (Chris, Ville) - Fix for #2797: False positive GuC loading error message (Chris) - Fix for #2859: Missing GuC firmware for older Cometlakes (Chris) - Lessen probability of GPU hang due to DMAR faults [reason 7, next page table ptr is invalid] on Tigerlake (Chris) - Fix REVID macros for TGL to fetch correct stepping (Aditya) - Limit frequency drop to RPe on parking (Chris, Edward) - Limit W/A 1406941453 to TGL, RKL and DG1 (Swathi) - Make W/A 22010271021 permanent on DG1 (Lucas) - Implement W/A 16011163337 to prevent a HS/DS hang on DG1 (Swathi) - Only disable preemption on gen8 render engines (Chris) - Disable arbitration around Braswell's PDP updates (Chris) - Disable arbitration on no-preempt requests (Chris) - Check for arbitration after writing start seqno before busywaiting (Chris) - Retain default context state across shrinking (Venkata, CQ) - Fix mismatch between misplaced vma check and vma insert for 32-bit addressing userspaces (Chris, CQ) - Propagate error for vmap() failure instead kernel NULL deref (Chris) - Propagate error from cancelled submit due to context closure immediately (Chris) - Fix RCU race on HWSP tracking per request (Chris) - Clear CMD parser shadow and GPU reloc batches (Matt A) - Populate logical context during first pin (Maarten) - Optimistically prune dma-resv from the shrinker (Chris) - Fix for virtual engine ownership race (Chris) - Remove timeslice suppression to restore fairness for virtual engines (Chris) - Rearrange IVB/HSW workarounds properly between GT and engine (Chris) - Taint the reset mutex with the shrinker (Chris) - Replace direct submit with direct call to tasklet (Chris) - Multiple corrections to virtual engine dequeue and breadcrumbs code (Chris) - Avoid wakeref from potentially hard IRQ context in PMU (Tvrtko) - Use raw clock for RC6 time estimation in PMU (Tvrtko) - Differentiate OOM failures from invalid map types (Chris) - Fix Gen9 to have 64 MOCS entries similar to Gen11 (Chris) - Ignore repeated attempts to suspend request flow across reset (Chris) - Remove livelock from "do_idle_maps" VT-d W/A (Chris) - Cancel the preemption timeout early in case engine reset fails (Chris) - Code flow optimization in the scheduling code (Chris) - Clear the execlists timers upon reset (Chris) - Drain the breadcrumbs just once (Chris, Matt A) - Track the overall GT awake/busy time (Chris) - Tweak submission tasklet flushing to avoid starvation (Chris) - Track timelines created using the HWSP to restore on resume (Chris) - Use cmpxchg64 for 32b compatilibity for active tracking (Chris) - Prefer recycling an idle GGTT fence to avoid GPU wait (Chris) - Restructure GT code organization for clearer split between GuC and execlists (Chris, Daniele, John, Matt A) - Remove GuC code that will remain unused by new interfaces (Matt B) - Restructure the CS timestamp clocks code to local to GT (Chris) - Fix error return paths in perf code (Zhang) - Replace idr_init() by idr_init_base() in perf (Deepak) - Fix shmem_pin_map error path (Colin) - Drop redundant free_work worker for GEM contexts (Chris, Mika) - Increase readability and understandability of intel_workarounds.c (Lucas) - Defer enabling the breadcrumb interrupt to after submission (Chris) - Deal with buddy alloc block sizes beyond 4G (Venkata, Chris) - Encode fence specific waitqueue behaviour into the wait.flags (Chris) - Don't cancel the breadcrumb interrupt shadow too early (Chris) - Cancel submitted requests upon context reset (Chris) - Use correct locks in GuC code (Tvrtko) - Prevent use of engine->wa_ctx after error (Chris, Matt R) - Fix build warning on 32-bit (Arnd) - Avoid memory leak if platform would have more than 16 W/A (Tvrtko) - Avoid unnecessary #if CONFIG_PM in PMU code (Chris, Tvrtko) - Improve debugging output (Chris, Tvrtko, Matt R) - Make file local variables static (Jani) - Avoid uint*_t types in i915 (Jani) - Selftest improvements (Chris, Matt A, Dan) - Documentation fixes (Chris, Jose) Signed-off-by: Dave Airlie <airlied@redhat.com> # Conflicts: # drivers/gpu/drm/i915/gt/intel_breadcrumbs.c # drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h # drivers/gpu/drm/i915/gt/intel_lrc.c # drivers/gpu/drm/i915/gvt/mmio_context.h # drivers/gpu/drm/i915/i915_drv.h From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210114152232.GA21588@jlahtine-mobl.ger.corp.intel.com
166 lines
4.1 KiB
C
166 lines
4.1 KiB
C
/*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
* Copyright © 2017-2018 Intel Corporation
|
|
*/
|
|
|
|
#ifndef __I915_PMU_H__
|
|
#define __I915_PMU_H__
|
|
|
|
#include <linux/hrtimer.h>
|
|
#include <linux/perf_event.h>
|
|
#include <linux/spinlock_types.h>
|
|
#include <uapi/drm/i915_drm.h>
|
|
|
|
struct drm_i915_private;
|
|
|
|
/**
|
|
* Non-engine events that we need to track enabled-disabled transition and
|
|
* current state.
|
|
*/
|
|
enum i915_pmu_tracked_events {
|
|
__I915_PMU_ACTUAL_FREQUENCY_ENABLED = 0,
|
|
__I915_PMU_REQUESTED_FREQUENCY_ENABLED,
|
|
__I915_PMU_RC6_RESIDENCY_ENABLED,
|
|
__I915_PMU_TRACKED_EVENT_COUNT, /* count marker */
|
|
};
|
|
|
|
/**
|
|
* Slots used from the sampling timer (non-engine events) with some extras for
|
|
* convenience.
|
|
*/
|
|
enum {
|
|
__I915_SAMPLE_FREQ_ACT = 0,
|
|
__I915_SAMPLE_FREQ_REQ,
|
|
__I915_SAMPLE_RC6,
|
|
__I915_SAMPLE_RC6_LAST_REPORTED,
|
|
__I915_NUM_PMU_SAMPLERS
|
|
};
|
|
|
|
/**
|
|
* How many different events we track in the global PMU mask.
|
|
*
|
|
* It is also used to know to needed number of event reference counters.
|
|
*/
|
|
#define I915_PMU_MASK_BITS \
|
|
(I915_ENGINE_SAMPLE_COUNT + __I915_PMU_TRACKED_EVENT_COUNT)
|
|
|
|
#define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_SEMA + 1)
|
|
|
|
struct i915_pmu_sample {
|
|
u64 cur;
|
|
};
|
|
|
|
struct i915_pmu {
|
|
/**
|
|
* @cpuhp: Struct used for CPU hotplug handling.
|
|
*/
|
|
struct {
|
|
struct hlist_node node;
|
|
unsigned int cpu;
|
|
} cpuhp;
|
|
/**
|
|
* @base: PMU base.
|
|
*/
|
|
struct pmu base;
|
|
/**
|
|
* @closed: i915 is unregistering.
|
|
*/
|
|
bool closed;
|
|
/**
|
|
* @name: Name as registered with perf core.
|
|
*/
|
|
const char *name;
|
|
/**
|
|
* @lock: Lock protecting enable mask and ref count handling.
|
|
*/
|
|
spinlock_t lock;
|
|
/**
|
|
* @timer: Timer for internal i915 PMU sampling.
|
|
*/
|
|
struct hrtimer timer;
|
|
/**
|
|
* @enable: Bitmask of specific enabled events.
|
|
*
|
|
* For some events we need to track their state and do some internal
|
|
* house keeping.
|
|
*
|
|
* Each engine event sampler type and event listed in enum
|
|
* i915_pmu_tracked_events gets a bit in this field.
|
|
*
|
|
* Low bits are engine samplers and other events continue from there.
|
|
*/
|
|
u32 enable;
|
|
|
|
/**
|
|
* @timer_last:
|
|
*
|
|
* Timestmap of the previous timer invocation.
|
|
*/
|
|
ktime_t timer_last;
|
|
|
|
/**
|
|
* @enable_count: Reference counts for the enabled events.
|
|
*
|
|
* Array indices are mapped in the same way as bits in the @enable field
|
|
* and they are used to control sampling on/off when multiple clients
|
|
* are using the PMU API.
|
|
*/
|
|
unsigned int enable_count[I915_PMU_MASK_BITS];
|
|
/**
|
|
* @timer_enabled: Should the internal sampling timer be running.
|
|
*/
|
|
bool timer_enabled;
|
|
/**
|
|
* @sample: Current and previous (raw) counters for sampling events.
|
|
*
|
|
* These counters are updated from the i915 PMU sampling timer.
|
|
*
|
|
* Only global counters are held here, while the per-engine ones are in
|
|
* struct intel_engine_cs.
|
|
*/
|
|
struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
|
|
/**
|
|
* @sleep_last: Last time GT parked for RC6 estimation.
|
|
*/
|
|
ktime_t sleep_last;
|
|
/**
|
|
* @irq_count: Number of interrupts
|
|
*
|
|
* Intentionally unsigned long to avoid atomics or heuristics on 32bit.
|
|
* 4e9 interrupts are a lot and postprocessing can really deal with an
|
|
* occasional wraparound easily. It's 32bit after all.
|
|
*/
|
|
unsigned long irq_count;
|
|
/**
|
|
* @events_attr_group: Device events attribute group.
|
|
*/
|
|
struct attribute_group events_attr_group;
|
|
/**
|
|
* @i915_attr: Memory block holding device attributes.
|
|
*/
|
|
void *i915_attr;
|
|
/**
|
|
* @pmu_attr: Memory block holding device attributes.
|
|
*/
|
|
void *pmu_attr;
|
|
};
|
|
|
|
#ifdef CONFIG_PERF_EVENTS
|
|
void i915_pmu_init(void);
|
|
void i915_pmu_exit(void);
|
|
void i915_pmu_register(struct drm_i915_private *i915);
|
|
void i915_pmu_unregister(struct drm_i915_private *i915);
|
|
void i915_pmu_gt_parked(struct drm_i915_private *i915);
|
|
void i915_pmu_gt_unparked(struct drm_i915_private *i915);
|
|
#else
|
|
static inline void i915_pmu_init(void) {}
|
|
static inline void i915_pmu_exit(void) {}
|
|
static inline void i915_pmu_register(struct drm_i915_private *i915) {}
|
|
static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
|
|
static inline void i915_pmu_gt_parked(struct drm_i915_private *i915) {}
|
|
static inline void i915_pmu_gt_unparked(struct drm_i915_private *i915) {}
|
|
#endif
|
|
|
|
#endif
|