drm/i915/gt: Always enable busy-stats for execlists
In the near future, we will utilize the busy-stats on each engine to approximate the C0 cycles of each, and use that as an input to a manual RPS mechanism. That entails having busy-stats always enabled and so we can remove the enable/disable routines and simplify the pmu setup. As a consequence of always having the stats enabled, we can also show the current active time via sysfs/engine/xcs/active_time_ns. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200429205446.3259-1-chris@chris-wilson.co.uk
This commit is contained in:
parent
be1cb55a07
commit
426d0073fb
@ -310,9 +310,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
|
||||
struct drm_printer *m,
|
||||
const char *header, ...);
|
||||
|
||||
int intel_enable_engine_stats(struct intel_engine_cs *engine);
|
||||
void intel_disable_engine_stats(struct intel_engine_cs *engine);
|
||||
|
||||
ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
|
||||
|
||||
struct i915_request *
|
||||
|
@ -1589,58 +1589,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
|
||||
intel_engine_print_breadcrumbs(engine, m);
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_enable_engine_stats() - Enable engine busy tracking on engine
|
||||
* @engine: engine to enable stats collection
|
||||
*
|
||||
* Start collecting the engine busyness data for @engine.
|
||||
*
|
||||
* Returns 0 on success or a negative error code.
|
||||
*/
|
||||
int intel_enable_engine_stats(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct intel_engine_execlists *execlists = &engine->execlists;
|
||||
unsigned long flags;
|
||||
int err = 0;
|
||||
|
||||
if (!intel_engine_supports_stats(engine))
|
||||
return -ENODEV;
|
||||
|
||||
execlists_active_lock_bh(execlists);
|
||||
write_seqlock_irqsave(&engine->stats.lock, flags);
|
||||
|
||||
if (unlikely(engine->stats.enabled == ~0)) {
|
||||
err = -EBUSY;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (engine->stats.enabled++ == 0) {
|
||||
struct i915_request * const *port;
|
||||
struct i915_request *rq;
|
||||
|
||||
engine->stats.enabled_at = ktime_get();
|
||||
|
||||
/* XXX submission method oblivious? */
|
||||
for (port = execlists->active; (rq = *port); port++)
|
||||
engine->stats.active++;
|
||||
|
||||
for (port = execlists->pending; (rq = *port); port++) {
|
||||
/* Exclude any contexts already counted in active */
|
||||
if (!intel_context_inflight_count(rq->context))
|
||||
engine->stats.active++;
|
||||
}
|
||||
|
||||
if (engine->stats.active)
|
||||
engine->stats.start = engine->stats.enabled_at;
|
||||
}
|
||||
|
||||
unlock:
|
||||
write_sequnlock_irqrestore(&engine->stats.lock, flags);
|
||||
execlists_active_unlock_bh(execlists);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
|
||||
{
|
||||
ktime_t total = engine->stats.total;
|
||||
@ -1649,7 +1597,7 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
|
||||
* If the engine is executing something at the moment
|
||||
* add it to the total.
|
||||
*/
|
||||
if (engine->stats.active)
|
||||
if (atomic_read(&engine->stats.active))
|
||||
total = ktime_add(total,
|
||||
ktime_sub(ktime_get(), engine->stats.start));
|
||||
|
||||
@ -1675,28 +1623,6 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
|
||||
return total;
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_disable_engine_stats() - Disable engine busy tracking on engine
|
||||
* @engine: engine to disable stats collection
|
||||
*
|
||||
* Stops collecting the engine busyness data for @engine.
|
||||
*/
|
||||
void intel_disable_engine_stats(struct intel_engine_cs *engine)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!intel_engine_supports_stats(engine))
|
||||
return;
|
||||
|
||||
write_seqlock_irqsave(&engine->stats.lock, flags);
|
||||
WARN_ON_ONCE(engine->stats.enabled == 0);
|
||||
if (--engine->stats.enabled == 0) {
|
||||
engine->stats.total = __intel_engine_get_busy_time(engine);
|
||||
engine->stats.active = 0;
|
||||
}
|
||||
write_sequnlock_irqrestore(&engine->stats.lock, flags);
|
||||
}
|
||||
|
||||
static bool match_ring(struct i915_request *rq)
|
||||
{
|
||||
u32 ring = ENGINE_READ(rq->engine, RING_START);
|
||||
|
@ -531,28 +531,16 @@ struct intel_engine_cs {
|
||||
u32 (*get_cmd_length_mask)(u32 cmd_header);
|
||||
|
||||
struct {
|
||||
/**
|
||||
* @active: Number of contexts currently scheduled in.
|
||||
*/
|
||||
atomic_t active;
|
||||
|
||||
/**
|
||||
* @lock: Lock protecting the below fields.
|
||||
*/
|
||||
seqlock_t lock;
|
||||
/**
|
||||
* @enabled: Reference count indicating number of listeners.
|
||||
*/
|
||||
unsigned int enabled;
|
||||
/**
|
||||
* @active: Number of contexts currently scheduled in.
|
||||
*/
|
||||
unsigned int active;
|
||||
/**
|
||||
* @enabled_at: Timestamp when busy stats were enabled.
|
||||
*/
|
||||
ktime_t enabled_at;
|
||||
/**
|
||||
* @start: Timestamp of the last idle to active transition.
|
||||
*
|
||||
* Idle is defined as active == 0, active is active > 0.
|
||||
*/
|
||||
ktime_t start;
|
||||
|
||||
/**
|
||||
* @total: Total time this engine was busy.
|
||||
*
|
||||
@ -560,6 +548,13 @@ struct intel_engine_cs {
|
||||
* where engine is currently busy (active > 0).
|
||||
*/
|
||||
ktime_t total;
|
||||
|
||||
/**
|
||||
* @start: Timestamp of the last idle to active transition.
|
||||
*
|
||||
* Idle is defined as active == 0, active is active > 0.
|
||||
*/
|
||||
ktime_t start;
|
||||
} stats;
|
||||
|
||||
struct {
|
||||
|
@ -1199,17 +1199,14 @@ static void intel_engine_context_in(struct intel_engine_cs *engine)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (READ_ONCE(engine->stats.enabled) == 0)
|
||||
if (atomic_add_unless(&engine->stats.active, 1, 0))
|
||||
return;
|
||||
|
||||
write_seqlock_irqsave(&engine->stats.lock, flags);
|
||||
|
||||
if (engine->stats.enabled > 0) {
|
||||
if (engine->stats.active++ == 0)
|
||||
engine->stats.start = ktime_get();
|
||||
GEM_BUG_ON(engine->stats.active == 0);
|
||||
if (!atomic_add_unless(&engine->stats.active, 1, 0)) {
|
||||
engine->stats.start = ktime_get();
|
||||
atomic_inc(&engine->stats.active);
|
||||
}
|
||||
|
||||
write_sequnlock_irqrestore(&engine->stats.lock, flags);
|
||||
}
|
||||
|
||||
@ -1217,36 +1214,17 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (READ_ONCE(engine->stats.enabled) == 0)
|
||||
GEM_BUG_ON(!atomic_read(&engine->stats.active));
|
||||
|
||||
if (atomic_add_unless(&engine->stats.active, -1, 1))
|
||||
return;
|
||||
|
||||
write_seqlock_irqsave(&engine->stats.lock, flags);
|
||||
|
||||
if (engine->stats.enabled > 0) {
|
||||
ktime_t last;
|
||||
|
||||
if (engine->stats.active && --engine->stats.active == 0) {
|
||||
/*
|
||||
* Decrement the active context count and in case GPU
|
||||
* is now idle add up to the running total.
|
||||
*/
|
||||
last = ktime_sub(ktime_get(), engine->stats.start);
|
||||
|
||||
engine->stats.total = ktime_add(engine->stats.total,
|
||||
last);
|
||||
} else if (engine->stats.active == 0) {
|
||||
/*
|
||||
* After turning on engine stats, context out might be
|
||||
* the first event in which case we account from the
|
||||
* time stats gathering was turned on.
|
||||
*/
|
||||
last = ktime_sub(ktime_get(), engine->stats.enabled_at);
|
||||
|
||||
engine->stats.total = ktime_add(engine->stats.total,
|
||||
last);
|
||||
}
|
||||
if (atomic_dec_and_test(&engine->stats.active)) {
|
||||
engine->stats.total =
|
||||
ktime_add(engine->stats.total,
|
||||
ktime_sub(ktime_get(), engine->stats.start));
|
||||
}
|
||||
|
||||
write_sequnlock_irqrestore(&engine->stats.lock, flags);
|
||||
}
|
||||
|
||||
|
@ -439,29 +439,9 @@ static u64 count_interrupts(struct drm_i915_private *i915)
|
||||
return sum;
|
||||
}
|
||||
|
||||
static void engine_event_destroy(struct perf_event *event)
|
||||
{
|
||||
struct drm_i915_private *i915 =
|
||||
container_of(event->pmu, typeof(*i915), pmu.base);
|
||||
struct intel_engine_cs *engine;
|
||||
|
||||
engine = intel_engine_lookup_user(i915,
|
||||
engine_event_class(event),
|
||||
engine_event_instance(event));
|
||||
if (drm_WARN_ON_ONCE(&i915->drm, !engine))
|
||||
return;
|
||||
|
||||
if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
|
||||
intel_engine_supports_stats(engine))
|
||||
intel_disable_engine_stats(engine);
|
||||
}
|
||||
|
||||
static void i915_pmu_event_destroy(struct perf_event *event)
|
||||
{
|
||||
WARN_ON(event->parent);
|
||||
|
||||
if (is_engine_event(event))
|
||||
engine_event_destroy(event);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -514,23 +494,13 @@ static int engine_event_init(struct perf_event *event)
|
||||
struct drm_i915_private *i915 =
|
||||
container_of(event->pmu, typeof(*i915), pmu.base);
|
||||
struct intel_engine_cs *engine;
|
||||
u8 sample;
|
||||
int ret;
|
||||
|
||||
engine = intel_engine_lookup_user(i915, engine_event_class(event),
|
||||
engine_event_instance(event));
|
||||
if (!engine)
|
||||
return -ENODEV;
|
||||
|
||||
sample = engine_event_sample(event);
|
||||
ret = engine_event_status(engine, sample);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
|
||||
ret = intel_enable_engine_stats(engine);
|
||||
|
||||
return ret;
|
||||
return engine_event_status(engine, engine_event_sample(event));
|
||||
}
|
||||
|
||||
static int i915_pmu_event_init(struct perf_event *event)
|
||||
|
@ -1679,8 +1679,7 @@ static int perf_series_engines(void *arg)
|
||||
p->engine = ps->ce[idx]->engine;
|
||||
intel_engine_pm_get(p->engine);
|
||||
|
||||
if (intel_engine_supports_stats(p->engine) &&
|
||||
!intel_enable_engine_stats(p->engine))
|
||||
if (intel_engine_supports_stats(p->engine))
|
||||
p->busy = intel_engine_get_busy_time(p->engine) + 1;
|
||||
p->runtime = -intel_context_get_total_runtime_ns(ce);
|
||||
p->time = ktime_get();
|
||||
@ -1700,7 +1699,6 @@ static int perf_series_engines(void *arg)
|
||||
if (p->busy) {
|
||||
p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
|
||||
p->busy - 1);
|
||||
intel_disable_engine_stats(p->engine);
|
||||
}
|
||||
|
||||
err = switch_to_kernel_sync(ce, err);
|
||||
@ -1762,8 +1760,7 @@ static int p_sync0(void *arg)
|
||||
}
|
||||
|
||||
busy = false;
|
||||
if (intel_engine_supports_stats(engine) &&
|
||||
!intel_enable_engine_stats(engine)) {
|
||||
if (intel_engine_supports_stats(engine)) {
|
||||
p->busy = intel_engine_get_busy_time(engine);
|
||||
busy = true;
|
||||
}
|
||||
@ -1796,7 +1793,6 @@ static int p_sync0(void *arg)
|
||||
if (busy) {
|
||||
p->busy = ktime_sub(intel_engine_get_busy_time(engine),
|
||||
p->busy);
|
||||
intel_disable_engine_stats(engine);
|
||||
}
|
||||
|
||||
err = switch_to_kernel_sync(ce, err);
|
||||
@ -1830,8 +1826,7 @@ static int p_sync1(void *arg)
|
||||
}
|
||||
|
||||
busy = false;
|
||||
if (intel_engine_supports_stats(engine) &&
|
||||
!intel_enable_engine_stats(engine)) {
|
||||
if (intel_engine_supports_stats(engine)) {
|
||||
p->busy = intel_engine_get_busy_time(engine);
|
||||
busy = true;
|
||||
}
|
||||
@ -1866,7 +1861,6 @@ static int p_sync1(void *arg)
|
||||
if (busy) {
|
||||
p->busy = ktime_sub(intel_engine_get_busy_time(engine),
|
||||
p->busy);
|
||||
intel_disable_engine_stats(engine);
|
||||
}
|
||||
|
||||
err = switch_to_kernel_sync(ce, err);
|
||||
@ -1899,8 +1893,7 @@ static int p_many(void *arg)
|
||||
}
|
||||
|
||||
busy = false;
|
||||
if (intel_engine_supports_stats(engine) &&
|
||||
!intel_enable_engine_stats(engine)) {
|
||||
if (intel_engine_supports_stats(engine)) {
|
||||
p->busy = intel_engine_get_busy_time(engine);
|
||||
busy = true;
|
||||
}
|
||||
@ -1924,7 +1917,6 @@ static int p_many(void *arg)
|
||||
if (busy) {
|
||||
p->busy = ktime_sub(intel_engine_get_busy_time(engine),
|
||||
p->busy);
|
||||
intel_disable_engine_stats(engine);
|
||||
}
|
||||
|
||||
err = switch_to_kernel_sync(ce, err);
|
||||
|
Loading…
Reference in New Issue
Block a user