- Selftest fix (Andrzej)
- TTM fix sg_table construction (Matt Auld) - Error return fixes (Dan) - Fix a performance regression related to waitboost (Chri\ s) - Fix GT resets (Chris) - Fix a possible refcount leak in DP MST connector (Hangy\ u) - Fix on loading guc on ADL-N (Daniele) - Fix vm use-after-free in vma destruction (Thomas) -----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEbSBwaO7dZQkcLOKj+mJfZA7rE8oFAmLPOyAACgkQ+mJfZA7r E8qL6AgAuNzYjzklhE4thI2D7kkJSAj/S9xteEHJ+Pfuu0U+mvAO2w7lxUPOXdE1 erqa9mVzlsxe9coxVel/4WclIInyMj9N0qqp/aXisJpzOMNqDT0qBBFWHCb4mvJI ta80pYb7ITCk3vQ/BsfARUFxDVuxPg/nGBaXq0Pe9MmSDuwn/doy3P+q/U2eOebe 5woUubC9/cTbB2ORnNVSBk7iFrkM19D7YH2N8cDBPpGJ1azUrsB5xS6xiz0/VNyl 9+NdjQ5/wfdmOqya7GLHDDED8kXx/cVyuN83sy4xJGXMUyBSQYJyIAm2CL6rK4nL hHLwM2QXcHUNwf03R5HO5r8LxZkiAg== =+20T -----END PGP SIGNATURE----- Merge tag 'drm-intel-fixes-2022-07-13' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixes - Selftest fix (Andrzej) - TTM fix sg_table construction (Matt Auld) - Error return fixes (Dan) - Fix a performance regression related to waitboost (Chris) - Fix GT resets (Chris) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/Ys87yMujcG2sJC1R@intel.com
This commit is contained in:
commit
5bde069b30
@ -620,10 +620,15 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
|
||||
struct ttm_resource *res)
|
||||
{
|
||||
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
|
||||
u64 page_alignment;
|
||||
|
||||
if (!i915_ttm_gtt_binds_lmem(res))
|
||||
return i915_ttm_tt_get_st(bo->ttm);
|
||||
|
||||
page_alignment = bo->page_alignment << PAGE_SHIFT;
|
||||
if (!page_alignment)
|
||||
page_alignment = obj->mm.region->min_page_size;
|
||||
|
||||
/*
|
||||
* If CPU mapping differs, we need to add the ttm_tt pages to
|
||||
* the resulting st. Might make sense for GGTT.
|
||||
@ -634,7 +639,8 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
|
||||
struct i915_refct_sgt *rsgt;
|
||||
|
||||
rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
|
||||
res);
|
||||
res,
|
||||
page_alignment);
|
||||
if (IS_ERR(rsgt))
|
||||
return rsgt;
|
||||
|
||||
@ -643,7 +649,8 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
|
||||
return i915_refct_sgt_get(obj->ttm.cached_io_rsgt);
|
||||
}
|
||||
|
||||
return intel_region_ttm_resource_to_rsgt(obj->mm.region, res);
|
||||
return intel_region_ttm_resource_to_rsgt(obj->mm.region, res,
|
||||
page_alignment);
|
||||
}
|
||||
|
||||
static int i915_ttm_truncate(struct drm_i915_gem_object *obj)
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <linux/jiffies.h>
|
||||
|
||||
#include "gt/intel_engine.h"
|
||||
#include "gt/intel_rps.h"
|
||||
|
||||
#include "i915_gem_ioctls.h"
|
||||
#include "i915_gem_object.h"
|
||||
@ -31,6 +32,37 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
|
||||
timeout);
|
||||
}
|
||||
|
||||
static void
|
||||
i915_gem_object_boost(struct dma_resv *resv, unsigned int flags)
|
||||
{
|
||||
struct dma_resv_iter cursor;
|
||||
struct dma_fence *fence;
|
||||
|
||||
/*
|
||||
* Prescan all fences for potential boosting before we begin waiting.
|
||||
*
|
||||
* When we wait, we wait on outstanding fences serially. If the
|
||||
* dma-resv contains a sequence such as 1:1, 1:2 instead of a reduced
|
||||
* form 1:2, then as we look at each wait in turn we see that each
|
||||
* request is currently executing and not worthy of boosting. But if
|
||||
* we only happen to look at the final fence in the sequence (because
|
||||
* of request coalescing or splitting between read/write arrays by
|
||||
* the iterator), then we would boost. As such our decision to boost
|
||||
* or not is delicately balanced on the order we wait on fences.
|
||||
*
|
||||
* So instead of looking for boosts sequentially, look for all boosts
|
||||
* upfront and then wait on the outstanding fences.
|
||||
*/
|
||||
|
||||
dma_resv_iter_begin(&cursor, resv,
|
||||
dma_resv_usage_rw(flags & I915_WAIT_ALL));
|
||||
dma_resv_for_each_fence_unlocked(&cursor, fence)
|
||||
if (dma_fence_is_i915(fence) &&
|
||||
!i915_request_started(to_request(fence)))
|
||||
intel_rps_boost(to_request(fence));
|
||||
dma_resv_iter_end(&cursor);
|
||||
}
|
||||
|
||||
static long
|
||||
i915_gem_object_wait_reservation(struct dma_resv *resv,
|
||||
unsigned int flags,
|
||||
@ -40,6 +72,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv,
|
||||
struct dma_fence *fence;
|
||||
long ret = timeout ?: 1;
|
||||
|
||||
i915_gem_object_boost(resv, flags);
|
||||
|
||||
dma_resv_iter_begin(&cursor, resv,
|
||||
dma_resv_usage_rw(flags & I915_WAIT_ALL));
|
||||
dma_resv_for_each_fence_unlocked(&cursor, fence) {
|
||||
|
@ -1209,6 +1209,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
|
||||
mutex_lock(>->tlb_invalidate_lock);
|
||||
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
|
||||
|
||||
spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
|
||||
|
||||
for_each_engine(engine, gt, id) {
|
||||
struct reg_and_bit rb;
|
||||
|
||||
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
|
||||
if (!i915_mmio_reg_offset(rb.reg))
|
||||
continue;
|
||||
|
||||
intel_uncore_write_fw(uncore, rb.reg, rb.bit);
|
||||
}
|
||||
|
||||
spin_unlock_irq(&uncore->lock);
|
||||
|
||||
for_each_engine(engine, gt, id) {
|
||||
/*
|
||||
* HW architecture suggest typical invalidation time at 40us,
|
||||
@ -1223,7 +1237,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
|
||||
if (!i915_mmio_reg_offset(rb.reg))
|
||||
continue;
|
||||
|
||||
intel_uncore_write_fw(uncore, rb.reg, rb.bit);
|
||||
if (__intel_wait_for_register_fw(uncore,
|
||||
rb.reg, rb.bit, 0,
|
||||
timeout_us, timeout_ms,
|
||||
|
@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gen6_reset_engines(struct intel_gt *gt,
|
||||
intel_engine_mask_t engine_mask,
|
||||
unsigned int retry)
|
||||
static int __gen6_reset_engines(struct intel_gt *gt,
|
||||
intel_engine_mask_t engine_mask,
|
||||
unsigned int retry)
|
||||
{
|
||||
struct intel_engine_cs *engine;
|
||||
u32 hw_mask;
|
||||
@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt,
|
||||
return gen6_hw_domain_reset(gt, hw_mask);
|
||||
}
|
||||
|
||||
static int gen6_reset_engines(struct intel_gt *gt,
|
||||
intel_engine_mask_t engine_mask,
|
||||
unsigned int retry)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
spin_lock_irqsave(>->uncore->lock, flags);
|
||||
ret = __gen6_reset_engines(gt, engine_mask, retry);
|
||||
spin_unlock_irqrestore(>->uncore->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine)
|
||||
{
|
||||
int vecs_id;
|
||||
@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
|
||||
rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
|
||||
}
|
||||
|
||||
static int gen11_reset_engines(struct intel_gt *gt,
|
||||
intel_engine_mask_t engine_mask,
|
||||
unsigned int retry)
|
||||
static int __gen11_reset_engines(struct intel_gt *gt,
|
||||
intel_engine_mask_t engine_mask,
|
||||
unsigned int retry)
|
||||
{
|
||||
struct intel_engine_cs *engine;
|
||||
intel_engine_mask_t tmp;
|
||||
@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt,
|
||||
struct intel_engine_cs *engine;
|
||||
const bool reset_non_ready = retry >= 1;
|
||||
intel_engine_mask_t tmp;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
spin_lock_irqsave(>->uncore->lock, flags);
|
||||
|
||||
for_each_engine_masked(engine, gt, engine_mask, tmp) {
|
||||
ret = gen8_engine_reset_prepare(engine);
|
||||
if (ret && !reset_non_ready)
|
||||
@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt,
|
||||
* This is best effort, so ignore any error from the initial reset.
|
||||
*/
|
||||
if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
|
||||
gen11_reset_engines(gt, gt->info.engine_mask, 0);
|
||||
__gen11_reset_engines(gt, gt->info.engine_mask, 0);
|
||||
|
||||
if (GRAPHICS_VER(gt->i915) >= 11)
|
||||
ret = gen11_reset_engines(gt, engine_mask, retry);
|
||||
ret = __gen11_reset_engines(gt, engine_mask, retry);
|
||||
else
|
||||
ret = gen6_reset_engines(gt, engine_mask, retry);
|
||||
ret = __gen6_reset_engines(gt, engine_mask, retry);
|
||||
|
||||
skip_reset:
|
||||
for_each_engine_masked(engine, gt, engine_mask, tmp)
|
||||
gen8_engine_reset_cancel(engine);
|
||||
|
||||
spin_unlock_irqrestore(>->uncore->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -176,8 +176,8 @@ static int live_lrc_layout(void *arg)
|
||||
continue;
|
||||
|
||||
hw = shmem_pin_map(engine->default_state);
|
||||
if (IS_ERR(hw)) {
|
||||
err = PTR_ERR(hw);
|
||||
if (!hw) {
|
||||
err = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
hw += LRC_STATE_OFFSET / sizeof(*hw);
|
||||
@ -365,8 +365,8 @@ static int live_lrc_fixed(void *arg)
|
||||
continue;
|
||||
|
||||
hw = shmem_pin_map(engine->default_state);
|
||||
if (IS_ERR(hw)) {
|
||||
err = PTR_ERR(hw);
|
||||
if (!hw) {
|
||||
err = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
hw += LRC_STATE_OFFSET / sizeof(*hw);
|
||||
|
@ -3117,9 +3117,9 @@ void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu)
|
||||
continue;
|
||||
|
||||
vaddr = shmem_pin_map(engine->default_state);
|
||||
if (IS_ERR(vaddr)) {
|
||||
gvt_err("failed to map %s->default state, err:%zd\n",
|
||||
engine->name, PTR_ERR(vaddr));
|
||||
if (!vaddr) {
|
||||
gvt_err("failed to map %s->default state\n",
|
||||
engine->name);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -68,6 +68,7 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size)
|
||||
* drm_mm_node
|
||||
* @node: The drm_mm_node.
|
||||
* @region_start: An offset to add to the dma addresses of the sg list.
|
||||
* @page_alignment: Required page alignment for each sg entry. Power of two.
|
||||
*
|
||||
* Create a struct sg_table, initializing it from a struct drm_mm_node,
|
||||
* taking a maximum segment length into account, splitting into segments
|
||||
@ -77,15 +78,18 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size)
|
||||
* error code cast to an error pointer on failure.
|
||||
*/
|
||||
struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
|
||||
u64 region_start)
|
||||
u64 region_start,
|
||||
u64 page_alignment)
|
||||
{
|
||||
const u64 max_segment = SZ_1G; /* Do we have a limit on this? */
|
||||
const u64 max_segment = round_down(UINT_MAX, page_alignment);
|
||||
u64 segment_pages = max_segment >> PAGE_SHIFT;
|
||||
u64 block_size, offset, prev_end;
|
||||
struct i915_refct_sgt *rsgt;
|
||||
struct sg_table *st;
|
||||
struct scatterlist *sg;
|
||||
|
||||
GEM_BUG_ON(!max_segment);
|
||||
|
||||
rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
|
||||
if (!rsgt)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
@ -112,6 +116,8 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
|
||||
sg = __sg_next(sg);
|
||||
|
||||
sg_dma_address(sg) = region_start + offset;
|
||||
GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg),
|
||||
page_alignment));
|
||||
sg_dma_len(sg) = 0;
|
||||
sg->length = 0;
|
||||
st->nents++;
|
||||
@ -138,6 +144,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
|
||||
* i915_buddy_block list
|
||||
* @res: The struct i915_ttm_buddy_resource.
|
||||
* @region_start: An offset to add to the dma addresses of the sg list.
|
||||
* @page_alignment: Required page alignment for each sg entry. Power of two.
|
||||
*
|
||||
* Create a struct sg_table, initializing it from struct i915_buddy_block list,
|
||||
* taking a maximum segment length into account, splitting into segments
|
||||
@ -147,11 +154,12 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
|
||||
* error code cast to an error pointer on failure.
|
||||
*/
|
||||
struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
|
||||
u64 region_start)
|
||||
u64 region_start,
|
||||
u64 page_alignment)
|
||||
{
|
||||
struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
|
||||
const u64 size = res->num_pages << PAGE_SHIFT;
|
||||
const u64 max_segment = rounddown(UINT_MAX, PAGE_SIZE);
|
||||
const u64 max_segment = round_down(UINT_MAX, page_alignment);
|
||||
struct drm_buddy *mm = bman_res->mm;
|
||||
struct list_head *blocks = &bman_res->blocks;
|
||||
struct drm_buddy_block *block;
|
||||
@ -161,6 +169,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
|
||||
resource_size_t prev_end;
|
||||
|
||||
GEM_BUG_ON(list_empty(blocks));
|
||||
GEM_BUG_ON(!max_segment);
|
||||
|
||||
rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
|
||||
if (!rsgt)
|
||||
@ -191,6 +200,8 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
|
||||
sg = __sg_next(sg);
|
||||
|
||||
sg_dma_address(sg) = region_start + offset;
|
||||
GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg),
|
||||
page_alignment));
|
||||
sg_dma_len(sg) = 0;
|
||||
sg->length = 0;
|
||||
st->nents++;
|
||||
|
@ -213,9 +213,11 @@ static inline void __i915_refct_sgt_init(struct i915_refct_sgt *rsgt,
|
||||
void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size);
|
||||
|
||||
struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
|
||||
u64 region_start);
|
||||
u64 region_start,
|
||||
u64 page_alignment);
|
||||
|
||||
struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
|
||||
u64 region_start);
|
||||
u64 region_start,
|
||||
u64 page_alignment);
|
||||
|
||||
#endif
|
||||
|
@ -152,6 +152,7 @@ int intel_region_ttm_fini(struct intel_memory_region *mem)
|
||||
* Convert an opaque TTM resource manager resource to a refcounted sg_table.
|
||||
* @mem: The memory region.
|
||||
* @res: The resource manager resource obtained from the TTM resource manager.
|
||||
* @page_alignment: Required page alignment for each sg entry. Power of two.
|
||||
*
|
||||
* The gem backends typically use sg-tables for operations on the underlying
|
||||
* io_memory. So provide a way for the backends to translate the
|
||||
@ -161,16 +162,19 @@ int intel_region_ttm_fini(struct intel_memory_region *mem)
|
||||
*/
|
||||
struct i915_refct_sgt *
|
||||
intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
|
||||
struct ttm_resource *res)
|
||||
struct ttm_resource *res,
|
||||
u64 page_alignment)
|
||||
{
|
||||
if (mem->is_range_manager) {
|
||||
struct ttm_range_mgr_node *range_node =
|
||||
to_ttm_range_mgr_node(res);
|
||||
|
||||
return i915_rsgt_from_mm_node(&range_node->mm_nodes[0],
|
||||
mem->region.start);
|
||||
mem->region.start,
|
||||
page_alignment);
|
||||
} else {
|
||||
return i915_rsgt_from_buddy_resource(res, mem->region.start);
|
||||
return i915_rsgt_from_buddy_resource(res, mem->region.start,
|
||||
page_alignment);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -24,7 +24,8 @@ int intel_region_ttm_fini(struct intel_memory_region *mem);
|
||||
|
||||
struct i915_refct_sgt *
|
||||
intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
|
||||
struct ttm_resource *res);
|
||||
struct ttm_resource *res,
|
||||
u64 page_alignment);
|
||||
|
||||
void intel_region_ttm_resource_free(struct intel_memory_region *mem,
|
||||
struct ttm_resource *res);
|
||||
|
@ -742,7 +742,7 @@ static int pot_hole(struct i915_address_space *vm,
|
||||
u64 addr;
|
||||
|
||||
for (addr = round_up(hole_start + min_alignment, step) - min_alignment;
|
||||
addr <= round_down(hole_end - (2 * min_alignment), step) - min_alignment;
|
||||
hole_end > addr && hole_end - addr >= 2 * min_alignment;
|
||||
addr += step) {
|
||||
err = i915_vma_pin(vma, 0, 0, addr | flags);
|
||||
if (err) {
|
||||
|
@ -451,7 +451,6 @@ out_put:
|
||||
|
||||
static int igt_mock_max_segment(void *arg)
|
||||
{
|
||||
const unsigned int max_segment = rounddown(UINT_MAX, PAGE_SIZE);
|
||||
struct intel_memory_region *mem = arg;
|
||||
struct drm_i915_private *i915 = mem->i915;
|
||||
struct i915_ttm_buddy_resource *res;
|
||||
@ -460,7 +459,10 @@ static int igt_mock_max_segment(void *arg)
|
||||
struct drm_buddy *mm;
|
||||
struct list_head *blocks;
|
||||
struct scatterlist *sg;
|
||||
I915_RND_STATE(prng);
|
||||
LIST_HEAD(objects);
|
||||
unsigned int max_segment;
|
||||
unsigned int ps;
|
||||
u64 size;
|
||||
int err = 0;
|
||||
|
||||
@ -472,7 +474,13 @@ static int igt_mock_max_segment(void *arg)
|
||||
*/
|
||||
|
||||
size = SZ_8G;
|
||||
mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0);
|
||||
ps = PAGE_SIZE;
|
||||
if (i915_prandom_u64_state(&prng) & 1)
|
||||
ps = SZ_64K; /* For something like DG2 */
|
||||
|
||||
max_segment = round_down(UINT_MAX, ps);
|
||||
|
||||
mem = mock_region_create(i915, 0, size, ps, 0, 0);
|
||||
if (IS_ERR(mem))
|
||||
return PTR_ERR(mem);
|
||||
|
||||
@ -498,12 +506,21 @@ static int igt_mock_max_segment(void *arg)
|
||||
}
|
||||
|
||||
for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) {
|
||||
dma_addr_t daddr = sg_dma_address(sg);
|
||||
|
||||
if (sg->length > max_segment) {
|
||||
pr_err("%s: Created an oversized scatterlist entry, %u > %u\n",
|
||||
__func__, sg->length, max_segment);
|
||||
err = -EINVAL;
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
if (!IS_ALIGNED(daddr, ps)) {
|
||||
pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n",
|
||||
__func__, &daddr, ps);
|
||||
err = -EINVAL;
|
||||
goto out_close;
|
||||
}
|
||||
}
|
||||
|
||||
out_close:
|
||||
|
@ -33,7 +33,8 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj)
|
||||
return PTR_ERR(obj->mm.res);
|
||||
|
||||
obj->mm.rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
|
||||
obj->mm.res);
|
||||
obj->mm.res,
|
||||
obj->mm.region->min_page_size);
|
||||
if (IS_ERR(obj->mm.rsgt)) {
|
||||
err = PTR_ERR(obj->mm.rsgt);
|
||||
goto err_free_resource;
|
||||
|
Loading…
Reference in New Issue
Block a user