drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
We hit soft hang while doing memory pressure test on one numa system. After a qucik look, this is because kfd invalid/valid userptr memory frequently with process_info lock hold. Looks like update page table mapping use too much cpu time. perf top says below, 75.81% [kernel] [k] __srcu_read_unlock 6.19% [amdgpu] [k] amdgpu_gmc_set_pte_pde 3.56% [kernel] [k] __srcu_read_lock 2.20% [amdgpu] [k] amdgpu_vm_cpu_update 2.20% [kernel] [k] __sg_page_iter_dma_next 2.15% [drm] [k] drm_dev_enter 1.70% [drm] [k] drm_prime_sg_to_dma_addr_array 1.18% [kernel] [k] __sg_alloc_table_from_pages 1.09% [drm] [k] drm_dev_exit So move drm_dev_enter/exit outside gmc code, instead let caller do it. They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and gmc_init_pdb0. vm_bo_update_mapping already calls it. Signed-off-by: xinhui pan <xinhui.pan@amd.com> Reviewed-and-tested-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
		
							parent
							
								
									006c26a0f1
								
							
						
					
					
						commit
						b2fe31cf64
					
				| @ -34,6 +34,7 @@ | ||||
| #include <asm/set_memory.h> | ||||
| #endif | ||||
| #include "amdgpu.h" | ||||
| #include <drm/drm_drv.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * GART | ||||
| @ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, | ||||
| 	u64 page_base; | ||||
| 	/* Starting from VEGA10, system bit must be 0 to mean invalid. */ | ||||
| 	uint64_t flags = 0; | ||||
| 	int idx; | ||||
| 
 | ||||
| 	if (!adev->gart.ready) { | ||||
| 		WARN(1, "trying to unbind memory from uninitialized GART !\n"); | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	if (!drm_dev_enter(&adev->ddev, &idx)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	t = offset / AMDGPU_GPU_PAGE_SIZE; | ||||
| 	p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE; | ||||
| 	for (i = 0; i < pages; i++, p++) { | ||||
| @ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, | ||||
| 	for (i = 0; i < adev->num_vmhubs; i++) | ||||
| 		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); | ||||
| 
 | ||||
| 	drm_dev_exit(idx); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| @ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, | ||||
| { | ||||
| 	uint64_t page_base; | ||||
| 	unsigned i, j, t; | ||||
| 	int idx; | ||||
| 
 | ||||
| 	if (!adev->gart.ready) { | ||||
| 		WARN(1, "trying to bind memory to uninitialized GART !\n"); | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	if (!drm_dev_enter(&adev->ddev, &idx)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	t = offset / AMDGPU_GPU_PAGE_SIZE; | ||||
| 
 | ||||
| 	for (i = 0; i < pages; i++) { | ||||
| @ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, | ||||
| 			page_base += AMDGPU_GPU_PAGE_SIZE; | ||||
| 		} | ||||
| 	} | ||||
| 	drm_dev_exit(idx); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, | ||||
| { | ||||
| 	void __iomem *ptr = (void *)cpu_pt_addr; | ||||
| 	uint64_t value; | ||||
| 	int idx; | ||||
| 
 | ||||
| 	if (!drm_dev_enter(&adev->ddev, &idx)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The following is for PTE only. GART does not have PDEs. | ||||
| @ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, | ||||
| 	value |= flags; | ||||
| 	writeq(value, ptr + (gpu_page_idx * 8)); | ||||
| 
 | ||||
| 	drm_dev_exit(idx); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| @ -749,6 +743,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) | ||||
| 		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; | ||||
| 	u64 vram_end = vram_addr + vram_size; | ||||
| 	u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo); | ||||
| 	int idx; | ||||
| 
 | ||||
| 	if (!drm_dev_enter(&adev->ddev, &idx)) | ||||
| 		return; | ||||
| 
 | ||||
| 	flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE; | ||||
| 	flags |= AMDGPU_PTE_WRITEABLE; | ||||
| @ -770,6 +768,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) | ||||
| 	flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED; | ||||
| 	/* Requires gart_ptb_gpu_pa to be 4K aligned */ | ||||
| 	amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags); | ||||
| 	drm_dev_exit(idx); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  | ||||
| @ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, | ||||
| 	struct amdgpu_bo *bo = &vmbo->bo; | ||||
| 	unsigned entries, ats_entries; | ||||
| 	uint64_t addr; | ||||
| 	int r; | ||||
| 	int r, idx; | ||||
| 
 | ||||
| 	/* Figure out our place in the hierarchy */ | ||||
| 	if (ancestor->parent) { | ||||
| @ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, | ||||
| 			return r; | ||||
| 	} | ||||
| 
 | ||||
| 	if (!drm_dev_enter(&adev->ddev, &idx)) | ||||
| 		return -ENODEV; | ||||
| 
 | ||||
| 	r = vm->update_funcs->map_table(vmbo); | ||||
| 	if (r) | ||||
| 		return r; | ||||
| 		goto exit; | ||||
| 
 | ||||
| 	memset(¶ms, 0, sizeof(params)); | ||||
| 	params.adev = adev; | ||||
| @ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, | ||||
| 
 | ||||
| 	r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); | ||||
| 	if (r) | ||||
| 		return r; | ||||
| 		goto exit; | ||||
| 
 | ||||
| 	addr = 0; | ||||
| 	if (ats_entries) { | ||||
| @ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, | ||||
| 		r = vm->update_funcs->update(¶ms, vmbo, addr, 0, ats_entries, | ||||
| 					     value, flags); | ||||
| 		if (r) | ||||
| 			return r; | ||||
| 			goto exit; | ||||
| 
 | ||||
| 		addr += ats_entries * 8; | ||||
| 	} | ||||
| @ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, | ||||
| 		r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, | ||||
| 					     value, flags); | ||||
| 		if (r) | ||||
| 			return r; | ||||
| 			goto exit; | ||||
| 	} | ||||
| 
 | ||||
| 	return vm->update_funcs->commit(¶ms, NULL); | ||||
| 	r = vm->update_funcs->commit(¶ms, NULL); | ||||
| exit: | ||||
| 	drm_dev_exit(idx); | ||||
| 	return r; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
| @ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, | ||||
| 			  struct amdgpu_vm *vm, bool immediate) | ||||
| { | ||||
| 	struct amdgpu_vm_update_params params; | ||||
| 	int r; | ||||
| 	int r, idx; | ||||
| 
 | ||||
| 	if (list_empty(&vm->relocated)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	if (!drm_dev_enter(&adev->ddev, &idx)) | ||||
| 		return -ENODEV; | ||||
| 
 | ||||
| 	memset(¶ms, 0, sizeof(params)); | ||||
| 	params.adev = adev; | ||||
| 	params.vm = vm; | ||||
| @ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, | ||||
| 
 | ||||
| 	r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); | ||||
| 	if (r) | ||||
| 		return r; | ||||
| 		goto exit; | ||||
| 
 | ||||
| 	while (!list_empty(&vm->relocated)) { | ||||
| 		struct amdgpu_vm_bo_base *entry; | ||||
| @ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, | ||||
| 	r = vm->update_funcs->commit(¶ms, &vm->last_update); | ||||
| 	if (r) | ||||
| 		goto error; | ||||
| 	drm_dev_exit(idx); | ||||
| 	return 0; | ||||
| 
 | ||||
| error: | ||||
| 	amdgpu_vm_invalidate_pds(adev, vm); | ||||
| exit: | ||||
| 	drm_dev_exit(idx); | ||||
| 	return r; | ||||
| } | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user