Driver Changes:

- Fix ccs_mode setting for Xe2 and later (Balasubramani) - Synchronize ccs_mode setting with client creation (Balasubramani) - Apply scheduling WA for LNL in additional places as needed (Nirmoy) - Fix leak and lock handling in error paths of xe_exec ioctl (Matthew Brost) - Fix GGTT allocation leak leading to eventual crash in SR-IOV (Michal Wajdeczko) - Move run_ticks update out of job handling to avoid synchronization with reader (Lucas) -----BEGIN PGP SIGNATURE----- iQJNBAABCAA3FiEE6rM8lpABPHM5FqyDm6KlpjDL6lMFAmcuLUQZHGx1Y2FzLmRl bWFyY2hpQGludGVsLmNvbQAKCRCboqWmMMvqU3HEEACI04W0GFdk1ix9MS2vuK7U r0phWoaP4+29vfWG9BRN3S6jisAt2Y1+DBHCP78C7naV/FvHW8aJQ78cHEJ9DjF+ /OpzpErfd+BD2DSSOBRNcQJ3eqe7JVjy2Q4kV41+qkKqqOaN99L33vFexCXOu7Je wFOXn33pmrsFCp9mb6xBhLYGo8tFS5IctHvBajGFHoUQzqfA/WkD/g67x9qKSzYv BThYOm066jcWGX0LaKHCw6slQ/OeZE78q2gaQraEpCyw4dJg7QBTlrVU5662xCnI RXDv+adzsHGWah77vrnBKFBRAD66qBHj4ntecHesnhuBv31EJdqWlLb5baoiE5RN wuPm57hPJ39tsB/KxqUlTmFiBCWQ74Px4o6nRKcnme2N0m/H8xFTp7c45SRwJZ+X RhQPnv14fkLN3e4TaWbr8at89Gapk1R8cXRytZG4gwkrfFfLEGT1ufBV6lP3Pn0n 6DQvtNrKR2qGO1JM+R5MXIzzdFFqRuI/8+GX9dvs/5fFTLbTUCckyU9ZM9JXFKIl 2ShK7eSFhUPqw2p3zbQzyxQqkopWORQhRkzUmCm8e9WQLE9uBpaFbkeIzu36atY2 pjRElc6OtTlxCN1KcBnOk5UvTbEC8MxNdWjB5MZJd8zaxm5DVbgGbxp5YmU8K/gG GV1YXZlniVpdhixJFFoypQ== =jirT -----END PGP SIGNATURE----- Merge tag 'drm-xe-fixes-2024-11-08' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes Driver Changes: - Fix ccs_mode setting for Xe2 and later (Balasubramani) - Synchronize ccs_mode setting with client creation (Balasubramani) - Apply scheduling WA for LNL in additional places as needed (Nirmoy) - Fix leak and lock handling in error paths of xe_exec ioctl (Matthew Brost) - Fix GGTT allocation leak leading to eventual crash in SR-IOV (Michal Wajdeczko) - Move run_ticks update out of job handling to avoid synchronization with reader (Lucas) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Lucas De Marchi <lucas.demarchi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/4ffcebtluaaaohquxfyf5babpihmtscxwad3jjmt5nggwh2xpm@ztw67ucywttg
2024-11-25 13:41:51 +00:00 · 2024-11-09 05:14:28 +10:00 · 2024-11-09 05:14:28 +10:00 · 1a6bbc4d9e
commit 1a6bbc4d9e
parent 9b984a71c2 514447a121
12 changed files with 54 additions and 41 deletions
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@ -517,7 +517,7 @@
 *   [4-6]     RSVD
 *   [7]       Disabled
 */
-#define CCS_MODE				XE_REG(0x14804)
+#define CCS_MODE				XE_REG(0x14804, XE_REG_OPTION_MASKED)
 #define   CCS_MODE_CSLICE_0_3_MASK		REG_GENMASK(11, 0) /* 3 bits per cslice */
 #define   CCS_MODE_CSLICE_MASK			0x7 /* CCS0-3 + rsvd */
 #define   CCS_MODE_CSLICE_WIDTH			ilog2(CCS_MODE_CSLICE_MASK + 1)
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@ -87,10 +87,6 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
 	mutex_init(&xef->exec_queue.lock);
 	xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);

-	spin_lock(&xe->clients.lock);
-	xe->clients.count++;
-	spin_unlock(&xe->clients.lock);
-
 	file->driver_priv = xef;
 	kref_init(&xef->refcount);

@ -107,17 +103,12 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
 static void xe_file_destroy(struct kref *ref)
 {
 	struct xe_file *xef = container_of(ref, struct xe_file, refcount);
-	struct xe_device *xe = xef->xe;

 	xa_destroy(&xef->exec_queue.xa);
 	mutex_destroy(&xef->exec_queue.lock);
 	xa_destroy(&xef->vm.xa);
 	mutex_destroy(&xef->vm.lock);

-	spin_lock(&xe->clients.lock);
-	xe->clients.count--;
-	spin_unlock(&xe->clients.lock);
-
 	xe_drm_client_put(xef->client);
 	kfree(xef->process_name);
 	kfree(xef);
@ -333,7 +324,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 	xe->info.force_execlist = xe_modparam.force_execlist;

 	spin_lock_init(&xe->irq.lock);
-	spin_lock_init(&xe->clients.lock);

 	init_waitqueue_head(&xe->ufence_wq);

--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@ -178,4 +178,18 @@ void xe_device_declare_wedged(struct xe_device *xe);
 struct xe_file *xe_file_get(struct xe_file *xef);
 void xe_file_put(struct xe_file *xef);

+/*
+ * Occasionally it is seen that the G2H worker starts running after a delay of more than
+ * a second even after being queued and activated by the Linux workqueue subsystem. This
+ * leads to G2H timeout error. The root cause of issue lies with scheduling latency of
+ * Lunarlake Hybrid CPU. Issue disappears if we disable Lunarlake atom cores from BIOS
+ * and this is beyond xe kmd.
+ *
+ * TODO: Drop this change once workqueue scheduling delay issue is fixed on LNL Hybrid CPU.
+ */
+#define LNL_FLUSH_WORKQUEUE(wq__) \
+	flush_workqueue(wq__)
+#define LNL_FLUSH_WORK(wrk__) \
+	flush_work(wrk__)
+
 #endif
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@ -353,15 +353,6 @@ struct xe_device {
 		struct workqueue_struct *wq;
 	} sriov;

-	/** @clients: drm clients info */
-	struct {
-		/** @clients.lock: Protects drm clients info */
-		spinlock_t lock;
-
-		/** @clients.count: number of drm clients */
-		u64 count;
-	} clients;
-
 	/** @usm: unified memory state */
 	struct {
 		/** @usm.asid: convert a ASID to VM */
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@ -132,12 +132,16 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (XE_IOCTL_DBG(xe, !q))
 		return -ENOENT;

-	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
-		return -EINVAL;
+	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM)) {
+		err = -EINVAL;
+		goto err_exec_queue;
+	}

 	if (XE_IOCTL_DBG(xe, args->num_batch_buffer &&
-			 q->width != args->num_batch_buffer))
-		return -EINVAL;
+			 q->width != args->num_batch_buffer)) {
+		err = -EINVAL;
+		goto err_exec_queue;
+	}

 	if (XE_IOCTL_DBG(xe, q->ops->reset_status(q))) {
 		err = -ECANCELED;
@ -220,6 +224,7 @@ retry:
 			fence = xe_sync_in_fence_get(syncs, num_syncs, q, vm);
 			if (IS_ERR(fence)) {
 				err = PTR_ERR(fence);
+				xe_vm_unlock(vm);
 				goto err_unlock_list;
 			}
 			for (i = 0; i < num_syncs; i++)
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@ -260,8 +260,14 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
 {
 	int i;

+	/*
+	 * Before releasing our ref to lrc and xef, accumulate our run ticks
+	 */
+	xe_exec_queue_update_run_ticks(q);
+
 	for (i = 0; i < q->width; ++i)
 		xe_lrc_put(q->lrc[i]);
+
 	__xe_exec_queue_free(q);
 }

--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@ -68,6 +68,12 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
 		}
 	}

+	/*
+	 * Mask bits need to be set for the register. Though only Xe2+
+	 * platforms require setting of mask bits, it won't harm for older
+	 * platforms as these bits are unused there.
+	 */
+	mode |= CCS_MODE_CSLICE_0_3_MASK << 16;
 	xe_mmio_write32(gt, CCS_MODE, mode);

 	xe_gt_dbg(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
@ -133,9 +139,10 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr,
 	}

 	/* CCS mode can only be updated when there are no drm clients */
-	spin_lock(&xe->clients.lock);
-	if (xe->clients.count) {
-		spin_unlock(&xe->clients.lock);
+	mutex_lock(&xe->drm.filelist_mutex);
+	if (!list_empty(&xe->drm.filelist)) {
+		mutex_unlock(&xe->drm.filelist_mutex);
+		xe_gt_dbg(gt, "Rejecting compute mode change as there are active drm clients\n");
 		return -EBUSY;
 	}

@ -146,7 +153,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr,
 		xe_gt_reset_async(gt);
 	}

-	spin_unlock(&xe->clients.lock);
+	mutex_unlock(&xe->drm.filelist_mutex);

 	return count;
 }
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@ -387,6 +387,8 @@ static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node)
 		 * the xe_ggtt_clear() called by below xe_ggtt_remove_node().
 		 */
 		xe_ggtt_node_remove(node, false);
+	} else {
+		xe_ggtt_node_fini(node);
 	}
 }

@ -442,7 +444,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size)
 	config->ggtt_region = node;
 	return 0;
 err:
-	xe_ggtt_node_fini(node);
+	pf_release_ggtt(tile, node);
 	return err;
 }

--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@ -72,6 +72,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
 	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_gt_tlb_invalidation_fence *fence, *next;

+	LNL_FLUSH_WORK(&gt->uc.guc.ct.g2h_worker);
+
 	spin_lock_irq(&gt->tlb_invalidation.pending_lock);
 	list_for_each_entry_safe(fence, next,
 				 &gt->tlb_invalidation.pending_fences, link) {
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@ -897,17 +897,8 @@ retry_same_fence:

 	ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);

-	/*
-	 * Occasionally it is seen that the G2H worker starts running after a delay of more than
-	 * a second even after being queued and activated by the Linux workqueue subsystem. This
-	 * leads to G2H timeout error. The root cause of issue lies with scheduling latency of
-	 * Lunarlake Hybrid CPU. Issue dissappears if we disable Lunarlake atom cores from BIOS
-	 * and this is beyond xe kmd.
-	 *
-	 * TODO: Drop this change once workqueue scheduling delay issue is fixed on LNL Hybrid CPU.
-	 */
 	if (!ret) {
-		flush_work(&ct->g2h_worker);
+		LNL_FLUSH_WORK(&ct->g2h_worker);
 		if (g2h_fence.done) {
 			xe_gt_warn(gt, "G2H fence %u, action %04x, done\n",
 				   g2h_fence.seqno, action[0]);
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@ -745,8 +745,6 @@ static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
 {
 	struct xe_sched_job *job = to_xe_sched_job(drm_job);

-	xe_exec_queue_update_run_ticks(job->q);
-
 	trace_xe_sched_job_free(job);
 	xe_sched_job_put(job);
 }
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@ -155,6 +155,13 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 		}

 		if (!timeout) {
+			LNL_FLUSH_WORKQUEUE(xe->ordered_wq);
+			err = do_compare(addr, args->value, args->mask,
+					 args->op);
+			if (err <= 0) {
+				drm_dbg(&xe->drm, "LNL_FLUSH_WORKQUEUE resolved ufence timeout\n");
+				break;
+			}
 			err = -ETIME;
 			break;
 		}