drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
We hit soft hang while doing memory pressure test on one numa system. After a qucik look, this is because kfd invalid/valid userptr memory frequently with process_info lock hold. Looks like update page table mapping use too much cpu time. perf top says below, 75.81% [kernel] [k] __srcu_read_unlock 6.19% [amdgpu] [k] amdgpu_gmc_set_pte_pde 3.56% [kernel] [k] __srcu_read_lock 2.20% [amdgpu] [k] amdgpu_vm_cpu_update 2.20% [kernel] [k] __sg_page_iter_dma_next 2.15% [drm] [k] drm_dev_enter 1.70% [drm] [k] drm_prime_sg_to_dma_addr_array 1.18% [kernel] [k] __sg_alloc_table_from_pages 1.09% [drm] [k] drm_dev_exit So move drm_dev_enter/exit outside gmc code, instead let caller do it. They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and gmc_init_pdb0. vm_bo_update_mapping already calls it. Signed-off-by: xinhui pan <xinhui.pan@amd.com> Reviewed-and-tested-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
@@ -34,6 +34,7 @@
|
|||||||
#include <asm/set_memory.h>
|
#include <asm/set_memory.h>
|
||||||
#endif
|
#endif
|
||||||
#include "amdgpu.h"
|
#include "amdgpu.h"
|
||||||
|
#include <drm/drm_drv.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GART
|
* GART
|
||||||
@@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
|
|||||||
u64 page_base;
|
u64 page_base;
|
||||||
/* Starting from VEGA10, system bit must be 0 to mean invalid. */
|
/* Starting from VEGA10, system bit must be 0 to mean invalid. */
|
||||||
uint64_t flags = 0;
|
uint64_t flags = 0;
|
||||||
|
int idx;
|
||||||
|
|
||||||
if (!adev->gart.ready) {
|
if (!adev->gart.ready) {
|
||||||
WARN(1, "trying to unbind memory from uninitialized GART !\n");
|
WARN(1, "trying to unbind memory from uninitialized GART !\n");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!drm_dev_enter(&adev->ddev, &idx))
|
||||||
|
return 0;
|
||||||
|
|
||||||
t = offset / AMDGPU_GPU_PAGE_SIZE;
|
t = offset / AMDGPU_GPU_PAGE_SIZE;
|
||||||
p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
|
p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
|
||||||
for (i = 0; i < pages; i++, p++) {
|
for (i = 0; i < pages; i++, p++) {
|
||||||
@@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
|
|||||||
for (i = 0; i < adev->num_vmhubs; i++)
|
for (i = 0; i < adev->num_vmhubs; i++)
|
||||||
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
|
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
|
||||||
|
|
||||||
|
drm_dev_exit(idx);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
|
|||||||
{
|
{
|
||||||
uint64_t page_base;
|
uint64_t page_base;
|
||||||
unsigned i, j, t;
|
unsigned i, j, t;
|
||||||
|
int idx;
|
||||||
|
|
||||||
if (!adev->gart.ready) {
|
if (!adev->gart.ready) {
|
||||||
WARN(1, "trying to bind memory to uninitialized GART !\n");
|
WARN(1, "trying to bind memory to uninitialized GART !\n");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!drm_dev_enter(&adev->ddev, &idx))
|
||||||
|
return 0;
|
||||||
|
|
||||||
t = offset / AMDGPU_GPU_PAGE_SIZE;
|
t = offset / AMDGPU_GPU_PAGE_SIZE;
|
||||||
|
|
||||||
for (i = 0; i < pages; i++) {
|
for (i = 0; i < pages; i++) {
|
||||||
@@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
|
|||||||
page_base += AMDGPU_GPU_PAGE_SIZE;
|
page_base += AMDGPU_GPU_PAGE_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
drm_dev_exit(idx);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
|
|||||||
{
|
{
|
||||||
void __iomem *ptr = (void *)cpu_pt_addr;
|
void __iomem *ptr = (void *)cpu_pt_addr;
|
||||||
uint64_t value;
|
uint64_t value;
|
||||||
int idx;
|
|
||||||
|
|
||||||
if (!drm_dev_enter(&adev->ddev, &idx))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The following is for PTE only. GART does not have PDEs.
|
* The following is for PTE only. GART does not have PDEs.
|
||||||
@@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
|
|||||||
value |= flags;
|
value |= flags;
|
||||||
writeq(value, ptr + (gpu_page_idx * 8));
|
writeq(value, ptr + (gpu_page_idx * 8));
|
||||||
|
|
||||||
drm_dev_exit(idx);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -749,6 +743,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
|
|||||||
adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
|
adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
|
||||||
u64 vram_end = vram_addr + vram_size;
|
u64 vram_end = vram_addr + vram_size;
|
||||||
u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
|
u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
|
||||||
|
int idx;
|
||||||
|
|
||||||
|
if (!drm_dev_enter(&adev->ddev, &idx))
|
||||||
|
return;
|
||||||
|
|
||||||
flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
|
flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
|
||||||
flags |= AMDGPU_PTE_WRITEABLE;
|
flags |= AMDGPU_PTE_WRITEABLE;
|
||||||
@@ -770,6 +768,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
|
|||||||
flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
|
flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
|
||||||
/* Requires gart_ptb_gpu_pa to be 4K aligned */
|
/* Requires gart_ptb_gpu_pa to be 4K aligned */
|
||||||
amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
|
amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
|
||||||
|
drm_dev_exit(idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
|
|||||||
struct amdgpu_bo *bo = &vmbo->bo;
|
struct amdgpu_bo *bo = &vmbo->bo;
|
||||||
unsigned entries, ats_entries;
|
unsigned entries, ats_entries;
|
||||||
uint64_t addr;
|
uint64_t addr;
|
||||||
int r;
|
int r, idx;
|
||||||
|
|
||||||
/* Figure out our place in the hierarchy */
|
/* Figure out our place in the hierarchy */
|
||||||
if (ancestor->parent) {
|
if (ancestor->parent) {
|
||||||
@@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!drm_dev_enter(&adev->ddev, &idx))
|
||||||
|
return -ENODEV;
|
||||||
|
|
||||||
r = vm->update_funcs->map_table(vmbo);
|
r = vm->update_funcs->map_table(vmbo);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
goto exit;
|
||||||
|
|
||||||
memset(¶ms, 0, sizeof(params));
|
memset(¶ms, 0, sizeof(params));
|
||||||
params.adev = adev;
|
params.adev = adev;
|
||||||
@@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
|
|||||||
|
|
||||||
r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT);
|
r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
goto exit;
|
||||||
|
|
||||||
addr = 0;
|
addr = 0;
|
||||||
if (ats_entries) {
|
if (ats_entries) {
|
||||||
@@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
|
|||||||
r = vm->update_funcs->update(¶ms, vmbo, addr, 0, ats_entries,
|
r = vm->update_funcs->update(¶ms, vmbo, addr, 0, ats_entries,
|
||||||
value, flags);
|
value, flags);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
goto exit;
|
||||||
|
|
||||||
addr += ats_entries * 8;
|
addr += ats_entries * 8;
|
||||||
}
|
}
|
||||||
@@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
|
|||||||
r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries,
|
r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries,
|
||||||
value, flags);
|
value, flags);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
return vm->update_funcs->commit(¶ms, NULL);
|
r = vm->update_funcs->commit(¶ms, NULL);
|
||||||
|
exit:
|
||||||
|
drm_dev_exit(idx);
|
||||||
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
|
|||||||
struct amdgpu_vm *vm, bool immediate)
|
struct amdgpu_vm *vm, bool immediate)
|
||||||
{
|
{
|
||||||
struct amdgpu_vm_update_params params;
|
struct amdgpu_vm_update_params params;
|
||||||
int r;
|
int r, idx;
|
||||||
|
|
||||||
if (list_empty(&vm->relocated))
|
if (list_empty(&vm->relocated))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
if (!drm_dev_enter(&adev->ddev, &idx))
|
||||||
|
return -ENODEV;
|
||||||
|
|
||||||
memset(¶ms, 0, sizeof(params));
|
memset(¶ms, 0, sizeof(params));
|
||||||
params.adev = adev;
|
params.adev = adev;
|
||||||
params.vm = vm;
|
params.vm = vm;
|
||||||
@@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
|
|||||||
|
|
||||||
r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT);
|
r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
goto exit;
|
||||||
|
|
||||||
while (!list_empty(&vm->relocated)) {
|
while (!list_empty(&vm->relocated)) {
|
||||||
struct amdgpu_vm_bo_base *entry;
|
struct amdgpu_vm_bo_base *entry;
|
||||||
@@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
|
|||||||
r = vm->update_funcs->commit(¶ms, &vm->last_update);
|
r = vm->update_funcs->commit(¶ms, &vm->last_update);
|
||||||
if (r)
|
if (r)
|
||||||
goto error;
|
goto error;
|
||||||
|
drm_dev_exit(idx);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
amdgpu_vm_invalidate_pds(adev, vm);
|
amdgpu_vm_invalidate_pds(adev, vm);
|
||||||
|
exit:
|
||||||
|
drm_dev_exit(idx);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user