drm/amdkfd: simplify drain retry fault
unmap range always increase atomic svms->drain_pagefaults to simplify both parent range and child range unmap, page fault handle ignores the retry fault if svms->drain_pagefaults is set to speed up interrupt handling. svm_range_drain_retry_fault restart draining if another range unmap from cpu. Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
0cc53cb450
commit
6946be2443
@ -766,7 +766,7 @@ struct svm_range_list {
|
|||||||
struct list_head deferred_range_list;
|
struct list_head deferred_range_list;
|
||||||
spinlock_t deferred_list_lock;
|
spinlock_t deferred_list_lock;
|
||||||
atomic_t evicted_ranges;
|
atomic_t evicted_ranges;
|
||||||
bool drain_pagefaults;
|
atomic_t drain_pagefaults;
|
||||||
struct delayed_work restore_work;
|
struct delayed_work restore_work;
|
||||||
DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
|
DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
|
||||||
struct task_struct *faulting_task;
|
struct task_struct *faulting_task;
|
||||||
|
@ -1968,10 +1968,16 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
|
|||||||
struct kfd_process_device *pdd;
|
struct kfd_process_device *pdd;
|
||||||
struct amdgpu_device *adev;
|
struct amdgpu_device *adev;
|
||||||
struct kfd_process *p;
|
struct kfd_process *p;
|
||||||
|
int drain;
|
||||||
uint32_t i;
|
uint32_t i;
|
||||||
|
|
||||||
p = container_of(svms, struct kfd_process, svms);
|
p = container_of(svms, struct kfd_process, svms);
|
||||||
|
|
||||||
|
restart:
|
||||||
|
drain = atomic_read(&svms->drain_pagefaults);
|
||||||
|
if (!drain)
|
||||||
|
return;
|
||||||
|
|
||||||
for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
|
for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
|
||||||
pdd = p->pdds[i];
|
pdd = p->pdds[i];
|
||||||
if (!pdd)
|
if (!pdd)
|
||||||
@ -1983,6 +1989,8 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
|
|||||||
amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1);
|
amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1);
|
||||||
pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
|
pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
|
||||||
}
|
}
|
||||||
|
if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain)
|
||||||
|
goto restart;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void svm_range_deferred_list_work(struct work_struct *work)
|
static void svm_range_deferred_list_work(struct work_struct *work)
|
||||||
@ -2008,8 +2016,7 @@ retry:
|
|||||||
/* Checking for the need to drain retry faults must be inside
|
/* Checking for the need to drain retry faults must be inside
|
||||||
* mmap write lock to serialize with munmap notifiers.
|
* mmap write lock to serialize with munmap notifiers.
|
||||||
*/
|
*/
|
||||||
if (unlikely(READ_ONCE(svms->drain_pagefaults))) {
|
if (unlikely(atomic_read(&svms->drain_pagefaults))) {
|
||||||
WRITE_ONCE(svms->drain_pagefaults, false);
|
|
||||||
mmap_write_unlock(mm);
|
mmap_write_unlock(mm);
|
||||||
svm_range_drain_retry_fault(svms);
|
svm_range_drain_retry_fault(svms);
|
||||||
goto retry;
|
goto retry;
|
||||||
@ -2056,12 +2063,6 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
|
|||||||
struct mm_struct *mm, enum svm_work_list_ops op)
|
struct mm_struct *mm, enum svm_work_list_ops op)
|
||||||
{
|
{
|
||||||
spin_lock(&svms->deferred_list_lock);
|
spin_lock(&svms->deferred_list_lock);
|
||||||
/* Make sure pending page faults are drained in the deferred worker
|
|
||||||
* before the range is freed to avoid straggler interrupts on
|
|
||||||
* unmapped memory causing "phantom faults".
|
|
||||||
*/
|
|
||||||
if (op == SVM_OP_UNMAP_RANGE)
|
|
||||||
svms->drain_pagefaults = true;
|
|
||||||
/* if prange is on the deferred list */
|
/* if prange is on the deferred list */
|
||||||
if (!list_empty(&prange->deferred_list)) {
|
if (!list_empty(&prange->deferred_list)) {
|
||||||
pr_debug("update exist prange 0x%p work op %d\n", prange, op);
|
pr_debug("update exist prange 0x%p work op %d\n", prange, op);
|
||||||
@ -2140,6 +2141,12 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
|
|||||||
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
|
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
|
||||||
prange, prange->start, prange->last, start, last);
|
prange, prange->start, prange->last, start, last);
|
||||||
|
|
||||||
|
/* Make sure pending page faults are drained in the deferred worker
|
||||||
|
* before the range is freed to avoid straggler interrupts on
|
||||||
|
* unmapped memory causing "phantom faults".
|
||||||
|
*/
|
||||||
|
atomic_inc(&svms->drain_pagefaults);
|
||||||
|
|
||||||
unmap_parent = start <= prange->start && last >= prange->last;
|
unmap_parent = start <= prange->start && last >= prange->last;
|
||||||
|
|
||||||
list_for_each_entry(pchild, &prange->child_list, child_list) {
|
list_for_each_entry(pchild, &prange->child_list, child_list) {
|
||||||
@ -2605,6 +2612,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
|
|||||||
|
|
||||||
pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
|
pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
|
||||||
|
|
||||||
|
if (atomic_read(&svms->drain_pagefaults)) {
|
||||||
|
pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
/* p->lead_thread is available as kfd_process_wq_release flush the work
|
/* p->lead_thread is available as kfd_process_wq_release flush the work
|
||||||
* before releasing task ref.
|
* before releasing task ref.
|
||||||
*/
|
*/
|
||||||
@ -2751,6 +2763,7 @@ void svm_range_list_fini(struct kfd_process *p)
|
|||||||
* Ensure no retry fault comes in afterwards, as page fault handler will
|
* Ensure no retry fault comes in afterwards, as page fault handler will
|
||||||
* not find kfd process and take mm lock to recover fault.
|
* not find kfd process and take mm lock to recover fault.
|
||||||
*/
|
*/
|
||||||
|
atomic_inc(&p->svms.drain_pagefaults);
|
||||||
svm_range_drain_retry_fault(&p->svms);
|
svm_range_drain_retry_fault(&p->svms);
|
||||||
|
|
||||||
|
|
||||||
@ -2774,6 +2787,7 @@ int svm_range_list_init(struct kfd_process *p)
|
|||||||
mutex_init(&svms->lock);
|
mutex_init(&svms->lock);
|
||||||
INIT_LIST_HEAD(&svms->list);
|
INIT_LIST_HEAD(&svms->list);
|
||||||
atomic_set(&svms->evicted_ranges, 0);
|
atomic_set(&svms->evicted_ranges, 0);
|
||||||
|
atomic_set(&svms->drain_pagefaults, 0);
|
||||||
INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
|
INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
|
||||||
INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
|
INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
|
||||||
INIT_LIST_HEAD(&svms->deferred_range_list);
|
INIT_LIST_HEAD(&svms->deferred_range_list);
|
||||||
|
Loading…
Reference in New Issue
Block a user