Merge tag 'drm-intel-next-fixes-2019-11-28' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
- Important fix to uAPI alignment on query IOCTL - Fixes for the power regression introduced by the previous security patches - Avoid regressing super heavy benchmarks by increasing the default request pre-emption timeout from 100 ms to 640 ms to - Resulting set of smaller fixes done while problem was inspected - Display fixes for EHL voltage level programming and TGL DKL PHY vswing for HDMI Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191128141524.GA11992@jlahtine-desk.ger.corp.intel.com
This commit is contained in:
commit
3e25dbca8b
@ -25,7 +25,7 @@ config DRM_I915_HEARTBEAT_INTERVAL
|
||||
|
||||
config DRM_I915_PREEMPT_TIMEOUT
|
||||
int "Preempt timeout (ms, jiffy granularity)"
|
||||
default 100 # milliseconds
|
||||
default 640 # milliseconds
|
||||
help
|
||||
How long to wait (in milliseconds) for a preemption event to occur
|
||||
when submitting a new context via execlists. If the current context
|
||||
|
@ -1273,7 +1273,9 @@ static u8 icl_calc_voltage_level(int cdclk)
|
||||
|
||||
static u8 ehl_calc_voltage_level(int cdclk)
|
||||
{
|
||||
if (cdclk > 312000)
|
||||
if (cdclk > 326400)
|
||||
return 3;
|
||||
else if (cdclk > 312000)
|
||||
return 2;
|
||||
else if (cdclk > 180000)
|
||||
return 1;
|
||||
|
@ -593,7 +593,7 @@ struct tgl_dkl_phy_ddi_buf_trans {
|
||||
u32 dkl_de_emphasis_control;
|
||||
};
|
||||
|
||||
static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = {
|
||||
static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans[] = {
|
||||
/* VS pre-emp Non-trans mV Pre-emph dB */
|
||||
{ 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */
|
||||
{ 0x5, 0x0, 0x03 }, /* 0 1 400mV 3.5 dB */
|
||||
@ -607,6 +607,20 @@ static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = {
|
||||
{ 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */
|
||||
};
|
||||
|
||||
static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_hdmi_ddi_trans[] = {
|
||||
/* HDMI Preset VS Pre-emph */
|
||||
{ 0x7, 0x0, 0x0 }, /* 1 400mV 0dB */
|
||||
{ 0x6, 0x0, 0x0 }, /* 2 500mV 0dB */
|
||||
{ 0x4, 0x0, 0x0 }, /* 3 650mV 0dB */
|
||||
{ 0x2, 0x0, 0x0 }, /* 4 800mV 0dB */
|
||||
{ 0x0, 0x0, 0x0 }, /* 5 1000mV 0dB */
|
||||
{ 0x0, 0x0, 0x5 }, /* 6 Full -1.5 dB */
|
||||
{ 0x0, 0x0, 0x6 }, /* 7 Full -1.8 dB */
|
||||
{ 0x0, 0x0, 0x7 }, /* 8 Full -2 dB */
|
||||
{ 0x0, 0x0, 0x8 }, /* 9 Full -2.5 dB */
|
||||
{ 0x0, 0x0, 0xA }, /* 10 Full -3 dB */
|
||||
};
|
||||
|
||||
static const struct ddi_buf_trans *
|
||||
bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries)
|
||||
{
|
||||
@ -898,7 +912,7 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por
|
||||
icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI,
|
||||
0, &n_entries);
|
||||
else
|
||||
n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations);
|
||||
n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans);
|
||||
default_entry = n_entries - 1;
|
||||
} else if (INTEL_GEN(dev_priv) == 11) {
|
||||
if (intel_phy_is_combo(dev_priv, phy))
|
||||
@ -2371,7 +2385,7 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder)
|
||||
icl_get_combo_buf_trans(dev_priv, encoder->type,
|
||||
intel_dp->link_rate, &n_entries);
|
||||
else
|
||||
n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations);
|
||||
n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans);
|
||||
} else if (INTEL_GEN(dev_priv) == 11) {
|
||||
if (intel_phy_is_combo(dev_priv, phy))
|
||||
icl_get_combo_buf_trans(dev_priv, encoder->type,
|
||||
@ -2823,8 +2837,13 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock,
|
||||
const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations;
|
||||
u32 n_entries, val, ln, dpcnt_mask, dpcnt_val;
|
||||
|
||||
n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations);
|
||||
ddi_translations = tgl_dkl_phy_ddi_translations;
|
||||
if (encoder->type == INTEL_OUTPUT_HDMI) {
|
||||
n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans);
|
||||
ddi_translations = tgl_dkl_phy_hdmi_ddi_trans;
|
||||
} else {
|
||||
n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans);
|
||||
ddi_translations = tgl_dkl_phy_dp_ddi_trans;
|
||||
}
|
||||
|
||||
if (level >= n_entries)
|
||||
level = n_entries - 1;
|
||||
|
@ -310,10 +310,23 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
|
||||
GEM_BUG_ON(rq->hw_context == ce);
|
||||
|
||||
if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
|
||||
err = mutex_lock_interruptible_nested(&tl->mutex,
|
||||
SINGLE_DEPTH_NESTING);
|
||||
if (err)
|
||||
return err;
|
||||
/*
|
||||
* Ideally, we just want to insert our foreign fence as
|
||||
* a barrier into the remove context, such that this operation
|
||||
* occurs after all current operations in that context, and
|
||||
* all future operations must occur after this.
|
||||
*
|
||||
* Currently, the timeline->last_request tracking is guarded
|
||||
* by its mutex and so we must obtain that to atomically
|
||||
* insert our barrier. However, since we already hold our
|
||||
* timeline->mutex, we must be careful against potential
|
||||
* inversion if we are the kernel_context as the remote context
|
||||
* will itself poke at the kernel_context when it needs to
|
||||
* unpin. Ergo, if already locked, we drop both locks and
|
||||
* try again (through the magic of userspace repeating EAGAIN).
|
||||
*/
|
||||
if (!mutex_trylock(&tl->mutex))
|
||||
return -EAGAIN;
|
||||
|
||||
/* Queue this switch after current activity by this context. */
|
||||
err = i915_active_fence_set(&tl->last_request, rq);
|
||||
|
@ -100,9 +100,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists)
|
||||
static inline struct i915_request *
|
||||
execlists_active(const struct intel_engine_execlists *execlists)
|
||||
{
|
||||
GEM_BUG_ON(execlists->active - execlists->inflight >
|
||||
execlists_num_ports(execlists));
|
||||
return READ_ONCE(*execlists->active);
|
||||
return *READ_ONCE(execlists->active);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
@ -28,13 +28,13 @@
|
||||
|
||||
#include "i915_drv.h"
|
||||
|
||||
#include "gt/intel_gt.h"
|
||||
|
||||
#include "intel_context.h"
|
||||
#include "intel_engine.h"
|
||||
#include "intel_engine_pm.h"
|
||||
#include "intel_engine_pool.h"
|
||||
#include "intel_engine_user.h"
|
||||
#include "intel_context.h"
|
||||
#include "intel_gt.h"
|
||||
#include "intel_gt_requests.h"
|
||||
#include "intel_lrc.h"
|
||||
#include "intel_reset.h"
|
||||
#include "intel_ring.h"
|
||||
@ -616,6 +616,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
|
||||
intel_engine_init_execlists(engine);
|
||||
intel_engine_init_cmd_parser(engine);
|
||||
intel_engine_init__pm(engine);
|
||||
intel_engine_init_retire(engine);
|
||||
|
||||
intel_engine_pool_init(&engine->pool);
|
||||
|
||||
@ -838,6 +839,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
|
||||
|
||||
cleanup_status_page(engine);
|
||||
|
||||
intel_engine_fini_retire(engine);
|
||||
intel_engine_pool_fini(&engine->pool);
|
||||
intel_engine_fini_breadcrumbs(engine);
|
||||
intel_engine_cleanup_cmd_parser(engine);
|
||||
|
@ -73,8 +73,42 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
|
||||
|
||||
#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
|
||||
|
||||
static void
|
||||
__queue_and_release_pm(struct i915_request *rq,
|
||||
struct intel_timeline *tl,
|
||||
struct intel_engine_cs *engine)
|
||||
{
|
||||
struct intel_gt_timelines *timelines = &engine->gt->timelines;
|
||||
|
||||
GEM_TRACE("%s\n", engine->name);
|
||||
|
||||
/*
|
||||
* We have to serialise all potential retirement paths with our
|
||||
* submission, as we don't want to underflow either the
|
||||
* engine->wakeref.counter or our timeline->active_count.
|
||||
*
|
||||
* Equally, we cannot allow a new submission to start until
|
||||
* after we finish queueing, nor could we allow that submitter
|
||||
* to retire us before we are ready!
|
||||
*/
|
||||
spin_lock(&timelines->lock);
|
||||
|
||||
/* Let intel_gt_retire_requests() retire us (acquired under lock) */
|
||||
if (!atomic_fetch_inc(&tl->active_count))
|
||||
list_add_tail(&tl->link, &timelines->active_list);
|
||||
|
||||
/* Hand the request over to HW and so engine_retire() */
|
||||
__i915_request_queue(rq, NULL);
|
||||
|
||||
/* Let new submissions commence (and maybe retire this timeline) */
|
||||
__intel_wakeref_defer_park(&engine->wakeref);
|
||||
|
||||
spin_unlock(&timelines->lock);
|
||||
}
|
||||
|
||||
static bool switch_to_kernel_context(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct intel_context *ce = engine->kernel_context;
|
||||
struct i915_request *rq;
|
||||
unsigned long flags;
|
||||
bool result = true;
|
||||
@ -98,16 +132,31 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
|
||||
* This should hold true as we can only park the engine after
|
||||
* retiring the last request, thus all rings should be empty and
|
||||
* all timelines idle.
|
||||
*
|
||||
* For unlocking, there are 2 other parties and the GPU who have a
|
||||
* stake here.
|
||||
*
|
||||
* A new gpu user will be waiting on the engine-pm to start their
|
||||
* engine_unpark. New waiters are predicated on engine->wakeref.count
|
||||
* and so intel_wakeref_defer_park() acts like a mutex_unlock of the
|
||||
* engine->wakeref.
|
||||
*
|
||||
* The other party is intel_gt_retire_requests(), which is walking the
|
||||
* list of active timelines looking for completions. Meanwhile as soon
|
||||
* as we call __i915_request_queue(), the GPU may complete our request.
|
||||
* Ergo, if we put ourselves on the timelines.active_list
|
||||
* (se intel_timeline_enter()) before we increment the
|
||||
* engine->wakeref.count, we may see the request completion and retire
|
||||
* it causing an undeflow of the engine->wakeref.
|
||||
*/
|
||||
flags = __timeline_mark_lock(engine->kernel_context);
|
||||
flags = __timeline_mark_lock(ce);
|
||||
GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
|
||||
|
||||
rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
|
||||
rq = __i915_request_create(ce, GFP_NOWAIT);
|
||||
if (IS_ERR(rq))
|
||||
/* Context switch failed, hope for the best! Maybe reset? */
|
||||
goto out_unlock;
|
||||
|
||||
intel_timeline_enter(i915_request_timeline(rq));
|
||||
|
||||
/* Check again on the next retirement. */
|
||||
engine->wakeref_serial = engine->serial + 1;
|
||||
i915_request_add_active_barriers(rq);
|
||||
@ -116,13 +165,12 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
|
||||
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
|
||||
__i915_request_commit(rq);
|
||||
|
||||
/* Release our exclusive hold on the engine */
|
||||
__intel_wakeref_defer_park(&engine->wakeref);
|
||||
__i915_request_queue(rq, NULL);
|
||||
/* Expose ourselves to the world */
|
||||
__queue_and_release_pm(rq, ce->timeline, engine);
|
||||
|
||||
result = false;
|
||||
out_unlock:
|
||||
__timeline_mark_unlock(engine->kernel_context, flags);
|
||||
__timeline_mark_unlock(ce, flags);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -177,7 +225,8 @@ static int __engine_park(struct intel_wakeref *wf)
|
||||
|
||||
engine->execlists.no_priolist = false;
|
||||
|
||||
intel_gt_pm_put(engine->gt);
|
||||
/* While gt calls i915_vma_parked(), we have to break the lock cycle */
|
||||
intel_gt_pm_put_async(engine->gt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -31,6 +31,16 @@ static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
|
||||
intel_wakeref_put(&engine->wakeref);
|
||||
}
|
||||
|
||||
static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine)
|
||||
{
|
||||
intel_wakeref_put_async(&engine->wakeref);
|
||||
}
|
||||
|
||||
static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
|
||||
{
|
||||
intel_wakeref_unlock_wait(&engine->wakeref);
|
||||
}
|
||||
|
||||
void intel_engine_init__pm(struct intel_engine_cs *engine);
|
||||
|
||||
#endif /* INTEL_ENGINE_PM_H */
|
||||
|
@ -451,6 +451,14 @@ struct intel_engine_cs {
|
||||
|
||||
struct intel_engine_execlists execlists;
|
||||
|
||||
/*
|
||||
* Keep track of completed timelines on this engine for early
|
||||
* retirement with the goal of quickly enabling powersaving as
|
||||
* soon as the engine is idle.
|
||||
*/
|
||||
struct intel_timeline *retire;
|
||||
struct work_struct retire_work;
|
||||
|
||||
/* status_notifier: list of callbacks for context-switch changes */
|
||||
struct atomic_notifier_head context_status_notifier;
|
||||
|
||||
|
@ -105,7 +105,6 @@ static int __gt_park(struct intel_wakeref *wf)
|
||||
static const struct intel_wakeref_ops wf_ops = {
|
||||
.get = __gt_unpark,
|
||||
.put = __gt_park,
|
||||
.flags = INTEL_WAKEREF_PUT_ASYNC,
|
||||
};
|
||||
|
||||
void intel_gt_pm_init_early(struct intel_gt *gt)
|
||||
@ -272,7 +271,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt)
|
||||
|
||||
static suspend_state_t pm_suspend_target(void)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_PM_SLEEP)
|
||||
#if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP)
|
||||
return pm_suspend_target_state;
|
||||
#else
|
||||
return PM_SUSPEND_TO_IDLE;
|
||||
|
@ -32,6 +32,11 @@ static inline void intel_gt_pm_put(struct intel_gt *gt)
|
||||
intel_wakeref_put(>->wakeref);
|
||||
}
|
||||
|
||||
static inline void intel_gt_pm_put_async(struct intel_gt *gt)
|
||||
{
|
||||
intel_wakeref_put_async(>->wakeref);
|
||||
}
|
||||
|
||||
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
|
||||
{
|
||||
return intel_wakeref_wait_for_idle(>->wakeref);
|
||||
|
@ -4,6 +4,8 @@
|
||||
* Copyright © 2019 Intel Corporation
|
||||
*/
|
||||
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
#include "i915_drv.h" /* for_each_engine() */
|
||||
#include "i915_request.h"
|
||||
#include "intel_gt.h"
|
||||
@ -29,6 +31,79 @@ static void flush_submission(struct intel_gt *gt)
|
||||
intel_engine_flush_submission(engine);
|
||||
}
|
||||
|
||||
static void engine_retire(struct work_struct *work)
|
||||
{
|
||||
struct intel_engine_cs *engine =
|
||||
container_of(work, typeof(*engine), retire_work);
|
||||
struct intel_timeline *tl = xchg(&engine->retire, NULL);
|
||||
|
||||
do {
|
||||
struct intel_timeline *next = xchg(&tl->retire, NULL);
|
||||
|
||||
/*
|
||||
* Our goal here is to retire _idle_ timelines as soon as
|
||||
* possible (as they are idle, we do not expect userspace
|
||||
* to be cleaning up anytime soon).
|
||||
*
|
||||
* If the timeline is currently locked, either it is being
|
||||
* retired elsewhere or about to be!
|
||||
*/
|
||||
if (mutex_trylock(&tl->mutex)) {
|
||||
retire_requests(tl);
|
||||
mutex_unlock(&tl->mutex);
|
||||
}
|
||||
intel_timeline_put(tl);
|
||||
|
||||
GEM_BUG_ON(!next);
|
||||
tl = ptr_mask_bits(next, 1);
|
||||
} while (tl);
|
||||
}
|
||||
|
||||
static bool add_retire(struct intel_engine_cs *engine,
|
||||
struct intel_timeline *tl)
|
||||
{
|
||||
struct intel_timeline *first;
|
||||
|
||||
/*
|
||||
* We open-code a llist here to include the additional tag [BIT(0)]
|
||||
* so that we know when the timeline is already on a
|
||||
* retirement queue: either this engine or another.
|
||||
*
|
||||
* However, we rely on that a timeline can only be active on a single
|
||||
* engine at any one time and that add_retire() is called before the
|
||||
* engine releases the timeline and transferred to another to retire.
|
||||
*/
|
||||
|
||||
if (READ_ONCE(tl->retire)) /* already queued */
|
||||
return false;
|
||||
|
||||
intel_timeline_get(tl);
|
||||
first = READ_ONCE(engine->retire);
|
||||
do
|
||||
tl->retire = ptr_pack_bits(first, 1, 1);
|
||||
while (!try_cmpxchg(&engine->retire, &first, tl));
|
||||
|
||||
return !first;
|
||||
}
|
||||
|
||||
void intel_engine_add_retire(struct intel_engine_cs *engine,
|
||||
struct intel_timeline *tl)
|
||||
{
|
||||
if (add_retire(engine, tl))
|
||||
schedule_work(&engine->retire_work);
|
||||
}
|
||||
|
||||
void intel_engine_init_retire(struct intel_engine_cs *engine)
|
||||
{
|
||||
INIT_WORK(&engine->retire_work, engine_retire);
|
||||
}
|
||||
|
||||
void intel_engine_fini_retire(struct intel_engine_cs *engine)
|
||||
{
|
||||
flush_work(&engine->retire_work);
|
||||
GEM_BUG_ON(engine->retire);
|
||||
}
|
||||
|
||||
long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
|
||||
{
|
||||
struct intel_gt_timelines *timelines = >->timelines;
|
||||
@ -52,8 +127,8 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
|
||||
}
|
||||
|
||||
intel_timeline_get(tl);
|
||||
GEM_BUG_ON(!tl->active_count);
|
||||
tl->active_count++; /* pin the list element */
|
||||
GEM_BUG_ON(!atomic_read(&tl->active_count));
|
||||
atomic_inc(&tl->active_count); /* pin the list element */
|
||||
spin_unlock_irqrestore(&timelines->lock, flags);
|
||||
|
||||
if (timeout > 0) {
|
||||
@ -74,7 +149,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
|
||||
|
||||
/* Resume iteration after dropping lock */
|
||||
list_safe_reset_next(tl, tn, link);
|
||||
if (!--tl->active_count)
|
||||
if (atomic_dec_and_test(&tl->active_count))
|
||||
list_del(&tl->link);
|
||||
else
|
||||
active_count += !!rcu_access_pointer(tl->last_request.fence);
|
||||
@ -83,7 +158,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
|
||||
|
||||
/* Defer the final release to after the spinlock */
|
||||
if (refcount_dec_and_test(&tl->kref.refcount)) {
|
||||
GEM_BUG_ON(tl->active_count);
|
||||
GEM_BUG_ON(atomic_read(&tl->active_count));
|
||||
list_add(&tl->link, &free);
|
||||
}
|
||||
}
|
||||
|
@ -7,7 +7,9 @@
|
||||
#ifndef INTEL_GT_REQUESTS_H
|
||||
#define INTEL_GT_REQUESTS_H
|
||||
|
||||
struct intel_engine_cs;
|
||||
struct intel_gt;
|
||||
struct intel_timeline;
|
||||
|
||||
long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
|
||||
static inline void intel_gt_retire_requests(struct intel_gt *gt)
|
||||
@ -15,6 +17,11 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt)
|
||||
intel_gt_retire_requests_timeout(gt, 0);
|
||||
}
|
||||
|
||||
void intel_engine_init_retire(struct intel_engine_cs *engine);
|
||||
void intel_engine_add_retire(struct intel_engine_cs *engine,
|
||||
struct intel_timeline *tl);
|
||||
void intel_engine_fini_retire(struct intel_engine_cs *engine);
|
||||
|
||||
int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
|
||||
|
||||
void intel_gt_init_requests(struct intel_gt *gt);
|
||||
|
@ -142,6 +142,7 @@
|
||||
#include "intel_engine_pm.h"
|
||||
#include "intel_gt.h"
|
||||
#include "intel_gt_pm.h"
|
||||
#include "intel_gt_requests.h"
|
||||
#include "intel_lrc_reg.h"
|
||||
#include "intel_mocs.h"
|
||||
#include "intel_reset.h"
|
||||
@ -1115,9 +1116,17 @@ __execlists_schedule_out(struct i915_request *rq,
|
||||
* refrain from doing non-trivial work here.
|
||||
*/
|
||||
|
||||
/*
|
||||
* If we have just completed this context, the engine may now be
|
||||
* idle and we want to re-enter powersaving.
|
||||
*/
|
||||
if (list_is_last(&rq->link, &ce->timeline->requests) &&
|
||||
i915_request_completed(rq))
|
||||
intel_engine_add_retire(engine, ce->timeline);
|
||||
|
||||
intel_engine_context_out(engine);
|
||||
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
|
||||
intel_gt_pm_put(engine->gt);
|
||||
intel_gt_pm_put_async(engine->gt);
|
||||
|
||||
/*
|
||||
* If this is part of a virtual engine, its next request may
|
||||
@ -1937,16 +1946,17 @@ skip_submit:
|
||||
static void
|
||||
cancel_port_requests(struct intel_engine_execlists * const execlists)
|
||||
{
|
||||
struct i915_request * const *port, *rq;
|
||||
struct i915_request * const *port;
|
||||
|
||||
for (port = execlists->pending; (rq = *port); port++)
|
||||
execlists_schedule_out(rq);
|
||||
for (port = execlists->pending; *port; port++)
|
||||
execlists_schedule_out(*port);
|
||||
memset(execlists->pending, 0, sizeof(execlists->pending));
|
||||
|
||||
for (port = execlists->active; (rq = *port); port++)
|
||||
execlists_schedule_out(rq);
|
||||
execlists->active =
|
||||
memset(execlists->inflight, 0, sizeof(execlists->inflight));
|
||||
/* Mark the end of active before we overwrite *active */
|
||||
for (port = xchg(&execlists->active, execlists->pending); *port; port++)
|
||||
execlists_schedule_out(*port);
|
||||
WRITE_ONCE(execlists->active,
|
||||
memset(execlists->inflight, 0, sizeof(execlists->inflight)));
|
||||
}
|
||||
|
||||
static inline void
|
||||
@ -2099,23 +2109,27 @@ static void process_csb(struct intel_engine_cs *engine)
|
||||
else
|
||||
promote = gen8_csb_parse(execlists, buf + 2 * head);
|
||||
if (promote) {
|
||||
struct i915_request * const *old = execlists->active;
|
||||
|
||||
/* Point active to the new ELSP; prevent overwriting */
|
||||
WRITE_ONCE(execlists->active, execlists->pending);
|
||||
set_timeslice(engine);
|
||||
|
||||
if (!inject_preempt_hang(execlists))
|
||||
ring_set_paused(engine, 0);
|
||||
|
||||
/* cancel old inflight, prepare for switch */
|
||||
trace_ports(execlists, "preempted", execlists->active);
|
||||
while (*execlists->active)
|
||||
execlists_schedule_out(*execlists->active++);
|
||||
trace_ports(execlists, "preempted", old);
|
||||
while (*old)
|
||||
execlists_schedule_out(*old++);
|
||||
|
||||
/* switch pending to inflight */
|
||||
GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
|
||||
execlists->active =
|
||||
memcpy(execlists->inflight,
|
||||
execlists->pending,
|
||||
execlists_num_ports(execlists) *
|
||||
sizeof(*execlists->pending));
|
||||
|
||||
set_timeslice(engine);
|
||||
WRITE_ONCE(execlists->active,
|
||||
memcpy(execlists->inflight,
|
||||
execlists->pending,
|
||||
execlists_num_ports(execlists) *
|
||||
sizeof(*execlists->pending)));
|
||||
|
||||
WRITE_ONCE(execlists->pending[0], NULL);
|
||||
} else {
|
||||
|
@ -1114,7 +1114,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
|
||||
out:
|
||||
intel_engine_cancel_stop_cs(engine);
|
||||
reset_finish_engine(engine);
|
||||
intel_engine_pm_put(engine);
|
||||
intel_engine_pm_put_async(engine);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -57,9 +57,10 @@ int intel_ring_pin(struct intel_ring *ring)
|
||||
|
||||
i915_vma_make_unshrinkable(vma);
|
||||
|
||||
GEM_BUG_ON(ring->vaddr);
|
||||
ring->vaddr = addr;
|
||||
/* Discard any unused bytes beyond that submitted to hw. */
|
||||
intel_ring_reset(ring, ring->emit);
|
||||
|
||||
ring->vaddr = addr;
|
||||
return 0;
|
||||
|
||||
err_ring:
|
||||
@ -85,20 +86,14 @@ void intel_ring_unpin(struct intel_ring *ring)
|
||||
if (!atomic_dec_and_test(&ring->pin_count))
|
||||
return;
|
||||
|
||||
/* Discard any unused bytes beyond that submitted to hw. */
|
||||
intel_ring_reset(ring, ring->emit);
|
||||
|
||||
i915_vma_unset_ggtt_write(vma);
|
||||
if (i915_vma_is_map_and_fenceable(vma))
|
||||
i915_vma_unpin_iomap(vma);
|
||||
else
|
||||
i915_gem_object_unpin_map(vma->obj);
|
||||
|
||||
GEM_BUG_ON(!ring->vaddr);
|
||||
ring->vaddr = NULL;
|
||||
|
||||
i915_vma_unpin(vma);
|
||||
i915_vma_make_purgeable(vma);
|
||||
i915_vma_unpin(vma);
|
||||
}
|
||||
|
||||
static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
|
||||
|
@ -282,6 +282,7 @@ void intel_timeline_fini(struct intel_timeline *timeline)
|
||||
{
|
||||
GEM_BUG_ON(atomic_read(&timeline->pin_count));
|
||||
GEM_BUG_ON(!list_empty(&timeline->requests));
|
||||
GEM_BUG_ON(timeline->retire);
|
||||
|
||||
if (timeline->hwsp_cacheline)
|
||||
cacheline_free(timeline->hwsp_cacheline);
|
||||
@ -339,15 +340,33 @@ void intel_timeline_enter(struct intel_timeline *tl)
|
||||
struct intel_gt_timelines *timelines = &tl->gt->timelines;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Pretend we are serialised by the timeline->mutex.
|
||||
*
|
||||
* While generally true, there are a few exceptions to the rule
|
||||
* for the engine->kernel_context being used to manage power
|
||||
* transitions. As the engine_park may be called from under any
|
||||
* timeline, it uses the power mutex as a global serialisation
|
||||
* lock to prevent any other request entering its timeline.
|
||||
*
|
||||
* The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
|
||||
*
|
||||
* However, intel_gt_retire_request() does not know which engine
|
||||
* it is retiring along and so cannot partake in the engine-pm
|
||||
* barrier, and there we use the tl->active_count as a means to
|
||||
* pin the timeline in the active_list while the locks are dropped.
|
||||
* Ergo, as that is outside of the engine-pm barrier, we need to
|
||||
* use atomic to manipulate tl->active_count.
|
||||
*/
|
||||
lockdep_assert_held(&tl->mutex);
|
||||
|
||||
GEM_BUG_ON(!atomic_read(&tl->pin_count));
|
||||
if (tl->active_count++)
|
||||
|
||||
if (atomic_add_unless(&tl->active_count, 1, 0))
|
||||
return;
|
||||
GEM_BUG_ON(!tl->active_count); /* overflow? */
|
||||
|
||||
spin_lock_irqsave(&timelines->lock, flags);
|
||||
list_add(&tl->link, &timelines->active_list);
|
||||
if (!atomic_fetch_inc(&tl->active_count))
|
||||
list_add_tail(&tl->link, &timelines->active_list);
|
||||
spin_unlock_irqrestore(&timelines->lock, flags);
|
||||
}
|
||||
|
||||
@ -356,14 +375,16 @@ void intel_timeline_exit(struct intel_timeline *tl)
|
||||
struct intel_gt_timelines *timelines = &tl->gt->timelines;
|
||||
unsigned long flags;
|
||||
|
||||
/* See intel_timeline_enter() */
|
||||
lockdep_assert_held(&tl->mutex);
|
||||
|
||||
GEM_BUG_ON(!tl->active_count);
|
||||
if (--tl->active_count)
|
||||
GEM_BUG_ON(!atomic_read(&tl->active_count));
|
||||
if (atomic_add_unless(&tl->active_count, -1, 1))
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&timelines->lock, flags);
|
||||
list_del(&tl->link);
|
||||
if (atomic_dec_and_test(&tl->active_count))
|
||||
list_del(&tl->link);
|
||||
spin_unlock_irqrestore(&timelines->lock, flags);
|
||||
|
||||
/*
|
||||
|
@ -42,7 +42,7 @@ struct intel_timeline {
|
||||
* from the intel_context caller plus internal atomicity.
|
||||
*/
|
||||
atomic_t pin_count;
|
||||
unsigned int active_count;
|
||||
atomic_t active_count;
|
||||
|
||||
const u32 *hwsp_seqno;
|
||||
struct i915_vma *hwsp_ggtt;
|
||||
@ -66,6 +66,9 @@ struct intel_timeline {
|
||||
*/
|
||||
struct i915_active_fence last_request;
|
||||
|
||||
/** A chain of completed timelines ready for early retirement. */
|
||||
struct intel_timeline *retire;
|
||||
|
||||
/**
|
||||
* We track the most recent seqno that we wait on in every context so
|
||||
* that we only have to emit a new await and dependency on a more
|
||||
|
@ -51,11 +51,12 @@ static int live_engine_pm(void *arg)
|
||||
pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
|
||||
engine->name, p->name);
|
||||
else
|
||||
intel_engine_pm_put(engine);
|
||||
intel_engine_pm_put(engine);
|
||||
intel_engine_pm_put_async(engine);
|
||||
intel_engine_pm_put_async(engine);
|
||||
p->critical_section_end();
|
||||
|
||||
/* engine wakeref is sync (instant) */
|
||||
intel_engine_pm_flush(engine);
|
||||
|
||||
if (intel_engine_pm_is_awake(engine)) {
|
||||
pr_err("%s is still awake after flushing pm\n",
|
||||
engine->name);
|
||||
|
@ -672,12 +672,13 @@ void i915_active_acquire_barrier(struct i915_active *ref)
|
||||
* populated by i915_request_add_active_barriers() to point to the
|
||||
* request that will eventually release them.
|
||||
*/
|
||||
spin_lock_irqsave_nested(&ref->tree_lock, flags, SINGLE_DEPTH_NESTING);
|
||||
llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
|
||||
struct active_node *node = barrier_from_ll(pos);
|
||||
struct intel_engine_cs *engine = barrier_to_engine(node);
|
||||
struct rb_node **p, *parent;
|
||||
|
||||
spin_lock_irqsave_nested(&ref->tree_lock, flags,
|
||||
SINGLE_DEPTH_NESTING);
|
||||
parent = NULL;
|
||||
p = &ref->tree.rb_node;
|
||||
while (*p) {
|
||||
@ -693,12 +694,12 @@ void i915_active_acquire_barrier(struct i915_active *ref)
|
||||
}
|
||||
rb_link_node(&node->node, parent, p);
|
||||
rb_insert_color(&node->node, &ref->tree);
|
||||
spin_unlock_irqrestore(&ref->tree_lock, flags);
|
||||
|
||||
GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
|
||||
llist_add(barrier_to_ll(node), &engine->barrier_tasks);
|
||||
intel_engine_pm_put(engine);
|
||||
}
|
||||
spin_unlock_irqrestore(&ref->tree_lock, flags);
|
||||
}
|
||||
|
||||
void i915_request_add_active_barriers(struct i915_request *rq)
|
||||
|
@ -190,7 +190,7 @@ static u64 get_rc6(struct intel_gt *gt)
|
||||
val = 0;
|
||||
if (intel_gt_pm_get_if_awake(gt)) {
|
||||
val = __get_rc6(gt);
|
||||
intel_gt_pm_put(gt);
|
||||
intel_gt_pm_put_async(gt);
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&pmu->lock, flags);
|
||||
@ -343,7 +343,7 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
|
||||
|
||||
skip:
|
||||
spin_unlock_irqrestore(&engine->uncore->lock, flags);
|
||||
intel_engine_pm_put(engine);
|
||||
intel_engine_pm_put_async(engine);
|
||||
}
|
||||
}
|
||||
|
||||
@ -368,7 +368,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
|
||||
if (intel_gt_pm_get_if_awake(gt)) {
|
||||
val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1);
|
||||
val = intel_get_cagf(rps, val);
|
||||
intel_gt_pm_put(gt);
|
||||
intel_gt_pm_put_async(gt);
|
||||
}
|
||||
|
||||
add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
|
||||
|
@ -103,15 +103,18 @@ query_engine_info(struct drm_i915_private *i915,
|
||||
struct drm_i915_engine_info __user *info_ptr;
|
||||
struct drm_i915_query_engine_info query;
|
||||
struct drm_i915_engine_info info = { };
|
||||
unsigned int num_uabi_engines = 0;
|
||||
struct intel_engine_cs *engine;
|
||||
int len, ret;
|
||||
|
||||
if (query_item->flags)
|
||||
return -EINVAL;
|
||||
|
||||
for_each_uabi_engine(engine, i915)
|
||||
num_uabi_engines++;
|
||||
|
||||
len = sizeof(struct drm_i915_query_engine_info) +
|
||||
RUNTIME_INFO(i915)->num_engines *
|
||||
sizeof(struct drm_i915_engine_info);
|
||||
num_uabi_engines * sizeof(struct drm_i915_engine_info);
|
||||
|
||||
ret = copy_query_item(&query, sizeof(query), len, query_item);
|
||||
if (ret != 0)
|
||||
|
@ -54,7 +54,8 @@ int __intel_wakeref_get_first(struct intel_wakeref *wf)
|
||||
|
||||
static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
|
||||
{
|
||||
if (!atomic_dec_and_test(&wf->count))
|
||||
INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
|
||||
if (unlikely(!atomic_dec_and_test(&wf->count)))
|
||||
goto unlock;
|
||||
|
||||
/* ops->put() must reschedule its own release on error/deferral */
|
||||
@ -67,13 +68,12 @@ unlock:
|
||||
mutex_unlock(&wf->mutex);
|
||||
}
|
||||
|
||||
void __intel_wakeref_put_last(struct intel_wakeref *wf)
|
||||
void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags)
|
||||
{
|
||||
INTEL_WAKEREF_BUG_ON(work_pending(&wf->work));
|
||||
|
||||
/* Assume we are not in process context and so cannot sleep. */
|
||||
if (wf->ops->flags & INTEL_WAKEREF_PUT_ASYNC ||
|
||||
!mutex_trylock(&wf->mutex)) {
|
||||
if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) {
|
||||
schedule_work(&wf->work);
|
||||
return;
|
||||
}
|
||||
@ -109,8 +109,17 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
|
||||
|
||||
int intel_wakeref_wait_for_idle(struct intel_wakeref *wf)
|
||||
{
|
||||
return wait_var_event_killable(&wf->wakeref,
|
||||
!intel_wakeref_is_active(wf));
|
||||
int err;
|
||||
|
||||
might_sleep();
|
||||
|
||||
err = wait_var_event_killable(&wf->wakeref,
|
||||
!intel_wakeref_is_active(wf));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
intel_wakeref_unlock_wait(wf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void wakeref_auto_timeout(struct timer_list *t)
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/bits.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/stackdepot.h>
|
||||
@ -29,9 +30,6 @@ typedef depot_stack_handle_t intel_wakeref_t;
|
||||
struct intel_wakeref_ops {
|
||||
int (*get)(struct intel_wakeref *wf);
|
||||
int (*put)(struct intel_wakeref *wf);
|
||||
|
||||
unsigned long flags;
|
||||
#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
|
||||
};
|
||||
|
||||
struct intel_wakeref {
|
||||
@ -57,7 +55,7 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
|
||||
} while (0)
|
||||
|
||||
int __intel_wakeref_get_first(struct intel_wakeref *wf);
|
||||
void __intel_wakeref_put_last(struct intel_wakeref *wf);
|
||||
void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags);
|
||||
|
||||
/**
|
||||
* intel_wakeref_get: Acquire the wakeref
|
||||
@ -100,10 +98,9 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_wakeref_put: Release the wakeref
|
||||
* @i915: the drm_i915_private device
|
||||
* intel_wakeref_put_flags: Release the wakeref
|
||||
* @wf: the wakeref
|
||||
* @fn: callback for releasing the wakeref, called only on final release.
|
||||
* @flags: control flags
|
||||
*
|
||||
* Release our hold on the wakeref. When there are no more users,
|
||||
* the runtime pm wakeref will be released after the @fn callback is called
|
||||
@ -116,11 +113,25 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
|
||||
* code otherwise.
|
||||
*/
|
||||
static inline void
|
||||
intel_wakeref_put(struct intel_wakeref *wf)
|
||||
__intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags)
|
||||
#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
|
||||
{
|
||||
INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
|
||||
if (unlikely(!atomic_add_unless(&wf->count, -1, 1)))
|
||||
__intel_wakeref_put_last(wf);
|
||||
__intel_wakeref_put_last(wf, flags);
|
||||
}
|
||||
|
||||
static inline void
|
||||
intel_wakeref_put(struct intel_wakeref *wf)
|
||||
{
|
||||
might_sleep();
|
||||
__intel_wakeref_put(wf, 0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
intel_wakeref_put_async(struct intel_wakeref *wf)
|
||||
{
|
||||
__intel_wakeref_put(wf, INTEL_WAKEREF_PUT_ASYNC);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -151,6 +162,21 @@ intel_wakeref_unlock(struct intel_wakeref *wf)
|
||||
mutex_unlock(&wf->mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_wakeref_unlock_wait: Wait until the active callback is complete
|
||||
* @wf: the wakeref
|
||||
*
|
||||
* Waits for the active callback (under the @wf->mutex or another CPU) is
|
||||
* complete.
|
||||
*/
|
||||
static inline void
|
||||
intel_wakeref_unlock_wait(struct intel_wakeref *wf)
|
||||
{
|
||||
mutex_lock(&wf->mutex);
|
||||
mutex_unlock(&wf->mutex);
|
||||
flush_work(&wf->work);
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_wakeref_is_active: Query whether the wakeref is currently held
|
||||
* @wf: the wakeref
|
||||
@ -170,6 +196,7 @@ intel_wakeref_is_active(const struct intel_wakeref *wf)
|
||||
static inline void
|
||||
__intel_wakeref_defer_park(struct intel_wakeref *wf)
|
||||
{
|
||||
lockdep_assert_held(&wf->mutex);
|
||||
INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count));
|
||||
atomic_set_release(&wf->count, 1);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user