drm/amdgpu: reverts commit ce316fa55e.
				
					
				
			In preparation for doing XGMI reset synchronization using task barrier. Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Reviewed-by: Le Ma <Le.Ma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
		
							parent
							
								
									f06a58db92
								
							
						
					
					
						commit
						041a62bc06
					
				| @ -994,8 +994,6 @@ struct amdgpu_device { | ||||
| 
 | ||||
| 	bool                            pm_sysfs_en; | ||||
| 	bool                            ucode_sysfs_en; | ||||
| 
 | ||||
| 	bool				in_baco; | ||||
| }; | ||||
| 
 | ||||
| static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) | ||||
|  | ||||
| @ -3797,18 +3797,13 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, | ||||
| 	return r; | ||||
| } | ||||
| 
 | ||||
| static int amdgpu_do_asic_reset(struct amdgpu_device *adev, | ||||
| 			       struct amdgpu_hive_info *hive, | ||||
| static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, | ||||
| 			       struct list_head *device_list_handle, | ||||
| 			       bool *need_full_reset_arg) | ||||
| { | ||||
| 	struct amdgpu_device *tmp_adev = NULL; | ||||
| 	bool need_full_reset = *need_full_reset_arg, vram_lost = false; | ||||
| 	int r = 0; | ||||
| 	int cpu = smp_processor_id(); | ||||
| 	bool use_baco = | ||||
| 		(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? | ||||
| 		true : false; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * ASIC reset has to be done on all HGMI hive nodes ASAP | ||||
| @ -3816,24 +3811,21 @@ static int amdgpu_do_asic_reset(struct amdgpu_device *adev, | ||||
| 	 */ | ||||
| 	if (need_full_reset) { | ||||
| 		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { | ||||
| 			/*
 | ||||
| 			 * For XGMI run all resets in parallel to speed up the | ||||
| 			 * process by scheduling the highpri wq on different | ||||
| 			 * cpus. For XGMI with baco reset, all nodes must enter | ||||
| 			 * baco within close proximity before anyone exit. | ||||
| 			 */ | ||||
| 			/* For XGMI run all resets in parallel to speed up the process */ | ||||
| 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { | ||||
| 				if (!queue_work_on(cpu, system_highpri_wq, | ||||
| 						   &tmp_adev->xgmi_reset_work)) | ||||
| 				if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work)) | ||||
| 					r = -EALREADY; | ||||
| 				cpu = cpumask_next(cpu, cpu_online_mask); | ||||
| 			} else | ||||
| 				r = amdgpu_asic_reset(tmp_adev); | ||||
| 			if (r) | ||||
| 
 | ||||
| 			if (r) { | ||||
| 				DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s", | ||||
| 					 r, tmp_adev->ddev->unique); | ||||
| 				break; | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		/* For XGMI wait for all work to complete before proceed */ | ||||
| 		/* For XGMI wait for all resets to complete before proceed */ | ||||
| 		if (!r) { | ||||
| 			list_for_each_entry(tmp_adev, device_list_handle, | ||||
| 					    gmc.xgmi.head) { | ||||
| @ -3842,53 +3834,9 @@ static int amdgpu_do_asic_reset(struct amdgpu_device *adev, | ||||
| 					r = tmp_adev->asic_reset_res; | ||||
| 					if (r) | ||||
| 						break; | ||||
| 					if (use_baco) | ||||
| 						tmp_adev->in_baco = true; | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * For XGMI with baco reset, need exit baco phase by scheduling | ||||
| 		 * xgmi_reset_work one more time. PSP reset and sGPU skips this | ||||
| 		 * phase. Not assume the situation that PSP reset and baco reset | ||||
| 		 * coexist within an XGMI hive. | ||||
| 		 */ | ||||
| 
 | ||||
| 		if (!r && use_baco) { | ||||
| 			cpu = smp_processor_id(); | ||||
| 			list_for_each_entry(tmp_adev, device_list_handle, | ||||
| 					    gmc.xgmi.head) { | ||||
| 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { | ||||
| 					if (!queue_work_on(cpu, | ||||
| 						system_highpri_wq, | ||||
| 						&tmp_adev->xgmi_reset_work)) | ||||
| 						r = -EALREADY; | ||||
| 					if (r) | ||||
| 						break; | ||||
| 					cpu = cpumask_next(cpu, cpu_online_mask); | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		if (!r && use_baco) { | ||||
| 			list_for_each_entry(tmp_adev, device_list_handle, | ||||
| 					    gmc.xgmi.head) { | ||||
| 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { | ||||
| 					flush_work(&tmp_adev->xgmi_reset_work); | ||||
| 					r = tmp_adev->asic_reset_res; | ||||
| 					if (r) | ||||
| 						break; | ||||
| 					tmp_adev->in_baco = false; | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		if (r) { | ||||
| 			DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s", | ||||
| 				 r, tmp_adev->ddev->unique); | ||||
| 			goto end; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if (!r && amdgpu_ras_intr_triggered()) | ||||
| @ -4182,8 +4130,7 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */ | ||||
| 		if (r) | ||||
| 			adev->asic_reset_res = r; | ||||
| 	} else { | ||||
| 		r  = amdgpu_do_asic_reset(adev, hive, device_list_handle, | ||||
| 					  &need_full_reset); | ||||
| 		r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset); | ||||
| 		if (r && r == -EAGAIN) | ||||
| 			goto retry; | ||||
| 	} | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user