From 121b78e679ee3ffab780115e260b2775d0cc1f73 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 26 May 2016 08:41:08 +0300 Subject: [PATCH 1/3] drm/amdkfd: unbind only existing processes When unbinding a process from a device (initiated by amd_iommu_v2), the driver needs to make sure that process still exists in the process table. There is a possibility that amdkfd's own notifier handler - kfd_process_notifier_release() - was called before the unbind function and it already removed the process from the process table. v2: Because there can be only one process with the specified pasid, and because *p can't be NULL inside the hash_for_each_rcu macro, it is more reasonable to just put the whole code inside the if statement that compares the pasid value. That way, when we exit hash_for_each_rcu, we simply exit the function as well. Signed-off-by: Oded Gabbay CC: Stable --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 76 ++++++++++++++---------- 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index ac005796b71c..a64bc619ed82 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -404,42 +404,52 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) idx = srcu_read_lock(&kfd_processes_srcu); + /* + * Look for the process that matches the pasid. If there is no such + * process, we either released it in amdkfd's own notifier, or there + * is a bug. Unfortunately, there is no way to tell... + */ hash_for_each_rcu(kfd_processes_table, i, p, kfd_processes) - if (p->pasid == pasid) - break; + if (p->pasid == pasid) { + + srcu_read_unlock(&kfd_processes_srcu, idx); + + pr_debug("Unbinding process %d from IOMMU\n", pasid); + + mutex_lock(&p->mutex); + + if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) + kfd_dbgmgr_destroy(dev->dbgmgr); + + pqm_uninit(&p->pqm); + + pdd = kfd_get_process_device_data(dev, p); + + if (!pdd) { + mutex_unlock(&p->mutex); + return; + } + + if (pdd->reset_wavefronts) { + dbgdev_wave_reset_wavefronts(pdd->dev, p); + pdd->reset_wavefronts = false; + } + + /* + * Just mark pdd as unbound, because we still need it + * to call amd_iommu_unbind_pasid() in when the + * process exits. + * We don't call amd_iommu_unbind_pasid() here + * because the IOMMU called us. + */ + pdd->bound = false; + + mutex_unlock(&p->mutex); + + return; + } srcu_read_unlock(&kfd_processes_srcu, idx); - - BUG_ON(p->pasid != pasid); - - mutex_lock(&p->mutex); - - if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) - kfd_dbgmgr_destroy(dev->dbgmgr); - - pqm_uninit(&p->pqm); - - pdd = kfd_get_process_device_data(dev, p); - - if (!pdd) { - mutex_unlock(&p->mutex); - return; - } - - if (pdd->reset_wavefronts) { - dbgdev_wave_reset_wavefronts(pdd->dev, p); - pdd->reset_wavefronts = false; - } - - /* - * Just mark pdd as unbound, because we still need it to call - * amd_iommu_unbind_pasid() in when the process exits. - * We don't call amd_iommu_unbind_pasid() here - * because the IOMMU called us. - */ - pdd->bound = false; - - mutex_unlock(&p->mutex); } struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p) From bc4755a4bd1845ef6e88ac8c62f12e05bb530256 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 26 May 2016 08:41:48 +0300 Subject: [PATCH 2/3] drm/amdkfd: destroy dbgmgr in notifier release amdkfd need to destroy the debug manager in case amdkfd's notifier function is called before the unbind function, because in that case, the unbind function will exit without destroying debug manager. Signed-off-by: Oded Gabbay CC: Stable --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a64bc619ed82..7708d90b9da9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -242,13 +242,19 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, pqm_uninit(&p->pqm); /* Iterate over all process device data structure and check - * if we should reset all wavefronts */ - list_for_each_entry(pdd, &p->per_device_data, per_device_list) + * if we should delete debug managers and reset all wavefronts + */ + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + if ((pdd->dev->dbgmgr) && + (pdd->dev->dbgmgr->pasid == p->pasid)) + kfd_dbgmgr_destroy(pdd->dev->dbgmgr); + if (pdd->reset_wavefronts) { pr_warn("amdkfd: Resetting all wave fronts\n"); dbgdev_wave_reset_wavefronts(pdd->dev, p); pdd->reset_wavefronts = false; } + } mutex_unlock(&p->mutex); From 0fbbbf8b599ff840ff1a3c0cc00dd67ba8a52c9c Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 29 May 2016 08:21:53 +0300 Subject: [PATCH 3/3] drm/amdkfd: print once about mem_banks truncation This print can really spam the kernel log in case we are truncating mem_banks, so just print this info once. It should also not be classified as warning. Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 74909e72a009..884c96f50c3d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -666,7 +666,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.simd_count); if (dev->mem_bank_count < dev->node_props.mem_banks_count) { - pr_warn("kfd: mem_banks_count truncated from %d to %d\n", + pr_info_once("kfd: mem_banks_count truncated from %d to %d\n", dev->node_props.mem_banks_count, dev->mem_bank_count); sysfs_show_32bit_prop(buffer, "mem_banks_count",