drm/amdgpu: Move reset domain init before calling RREG32
amdgpu_detect_virtualization reads register, amdgpu_device_rreg access
adev->reset_domain->sem if kernel defined CONFIG_LOCKDEP, below is the
random boot hang backtrace on Vega10. It may get random NULL pointer
access backtrace if amdgpu_sriov_runtime is true too.
Move amdgpu_reset_create_reset_domain before calling to RREG32.
BUG: kernel NULL pointer dereference, address:
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP NOPTI
Workqueue: events work_for_cpu_fn
RIP: 0010:down_read_trylock+0x13/0xf0
Call Trace:
<TASK>
amdgpu_device_skip_hw_access+0x38/0x80 [amdgpu]
amdgpu_device_rreg+0x1b/0x170 [amdgpu]
amdgpu_detect_virtualization+0x73/0x100 [amdgpu]
amdgpu_device_init.cold.60+0xbe/0x16b1 [amdgpu]
? pci_bus_read_config_word+0x43/0x70
amdgpu_driver_load_kms+0x15/0x120 [amdgpu]
amdgpu_pci_probe+0x1a1/0x3a0 [amdgpu]
Fixes: d0fb18b535 ("drm/amdgpu: Move reset sem into reset_domain")
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
committed by
Alex Deucher
parent
72a98763b4
commit
436afdfa35
@@ -3666,6 +3666,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
||||
if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
|
||||
adev->enable_mes = true;
|
||||
|
||||
/*
|
||||
* Reset domain needs to be present early, before XGMI hive discovered
|
||||
* (if any) and intitialized to use reset sem and in_gpu reset flag
|
||||
* early on during init and before calling to RREG32.
|
||||
*/
|
||||
adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
|
||||
if (!adev->reset_domain)
|
||||
return -ENOMEM;
|
||||
|
||||
/* detect hw virtualization here */
|
||||
amdgpu_detect_virtualization(adev);
|
||||
|
||||
@@ -3675,15 +3684,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset domain needs to be present early, before XGMI hive discovered
|
||||
* (if any) and intitialized to use reset sem and in_gpu reset flag
|
||||
* early on during init.
|
||||
*/
|
||||
adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE ,"amdgpu-reset-dev");
|
||||
if (!adev->reset_domain)
|
||||
return -ENOMEM;
|
||||
|
||||
/* early init functions */
|
||||
r = amdgpu_device_ip_early_init(adev);
|
||||
if (r)
|
||||
|
||||
Reference in New Issue
Block a user