drm/amdgpu: refine create and release logic of hive info

Change to dynamically create and release hive info object,
which help driver support more hives in the future.

v2:
Change to save hive object pointer in adev, to avoid locking
xgmi_mutex every time when calling amdgpu_get_xgmi_hive.

v3:
1. Change type of hive object pointer in adev from void* to
amdgpu_hive_info*.
2. remove unnecessary variable initialization.

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Dennis Li
2020-08-18 18:44:17 +08:00
committed by Alex Deucher
parent aac891685d
commit d95e8e97e2
5 changed files with 132 additions and 107 deletions

View File

@@ -2857,7 +2857,7 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
{
struct amdgpu_device *adev =
container_of(__work, struct amdgpu_device, xgmi_reset_work);
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
/* It's a bug to not have a hive within this function */
if (WARN_ON(!hive))
@@ -2895,6 +2895,7 @@ fail:
if (adev->asic_reset_res)
DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
adev->asic_reset_res, adev->ddev->unique);
amdgpu_put_xgmi_hive(hive);
}
static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
@@ -4339,11 +4340,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
* We always reset all schedulers for device and all devices for XGMI
* hive so that should take care of them too.
*/
hive = amdgpu_get_xgmi_hive(adev, false);
hive = amdgpu_get_xgmi_hive(adev);
if (hive) {
if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
job ? job->base.id : -1, hive->hive_id);
amdgpu_put_xgmi_hive(hive);
return 0;
}
mutex_lock(&hive->hive_lock);
@@ -4509,6 +4511,7 @@ skip_recovery:
if (hive) {
atomic_set(&hive->in_reset, 0);
mutex_unlock(&hive->hive_lock);
amdgpu_put_xgmi_hive(hive);
}
if (r)