From f1403342ebdfcff3c3cf57ae476f19d3078f2767 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 12 Aug 2020 17:48:26 +0200 Subject: [PATCH] drm/amdgpu: revert "fix system hang issue during GPU reset" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The whole approach wasn't thought through till the end. We already had a reset lock like this in the past and it caused the same problems like this one. Completely revert the patch for now and add individual trylock protection to the hardware access functions as necessary. This reverts commit df9c8d1aa278c435c30a69b8f2418b4a52fcb929. Signed-off-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 9 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 40 +- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 7 - drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 - drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 4 - drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 57 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 - drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 4 - drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 353 ++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 11 +- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 3 +- drivers/gpu/drm/amd/amdgpu/atom.c | 1 - drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 10 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 +- drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 13 +- drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 13 +- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 16 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 4 - .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 2 +- .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 2 +- 39 files changed, 184 insertions(+), 469 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3e82a11577d9..08f80ca3b296 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -949,9 +949,9 @@ struct amdgpu_device { bool in_suspend; bool in_hibernate; - atomic_t in_gpu_reset; + bool in_gpu_reset; enum pp_mp1_state mp1_state; - struct rw_semaphore reset_sem; + struct mutex lock_reset; struct amdgpu_doorbell_index doorbell_index; struct mutex notifier_lock; @@ -1266,9 +1266,4 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device *adev) return adev->gmc.tmz_enabled; } -static inline bool amdgpu_in_reset(struct amdgpu_device *adev) -{ - return atomic_read(&adev->in_gpu_reset) ? true : false; -} - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 9738dccb1c2c..0effc1d46824 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -244,14 +244,11 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, if (cp_mqd_gfx9) bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9; - if (!down_read_trylock(&adev->reset_sem)) - return -EIO; - r = amdgpu_bo_create(adev, &bp, &bo); if (r) { dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); - goto err; + return r; } /* map the buffer */ @@ -286,7 +283,6 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, amdgpu_bo_unreserve(bo); - up_read(&adev->reset_sem); return 0; allocate_mem_kmap_bo_failed: @@ -295,25 +291,19 @@ allocate_mem_pin_bo_failed: amdgpu_bo_unreserve(bo); allocate_mem_reserve_bo_failed: amdgpu_bo_unref(&bo); -err: - up_read(&adev->reset_sem); + return r; } void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; - down_read(&adev->reset_sem); - amdgpu_bo_reserve(bo, true); amdgpu_bo_kunmap(bo); amdgpu_bo_unpin(bo); amdgpu_bo_unreserve(bo); amdgpu_bo_unref(&(bo)); - - up_read(&adev->reset_sem); } int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, @@ -345,14 +335,9 @@ int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj) { - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj; - down_read(&adev->reset_sem); - amdgpu_bo_unref(&bo); - - up_read(&adev->reset_sem); } uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, @@ -626,15 +611,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, /* This works for NO_HWS. TODO: need to handle without knowing VMID */ job->vmid = vmid; - if (!down_read_trylock(&adev->reset_sem)) { - ret = -EIO; - goto err_ib_sched; - } - ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); - up_read(&adev->reset_sem); - if (ret) { DRM_ERROR("amdgpu: failed to schedule IB.\n"); goto err_ib_sched; @@ -670,9 +648,6 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (!down_read_trylock(&adev->reset_sem)) - return -EIO; - if (adev->family == AMDGPU_FAMILY_AI) { int i; @@ -682,8 +657,6 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); } - up_read(&adev->reset_sem); - return 0; } @@ -692,18 +665,11 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid) struct amdgpu_device *adev = (struct amdgpu_device *)kgd; const uint32_t flush_type = 0; bool all_hub = false; - int ret = -EIO; if (adev->family == AMDGPU_FAMILY_AI) all_hub = true; - if (down_read_trylock(&adev->reset_sem)) { - ret = amdgpu_gmc_flush_gpu_tlb_pasid(adev, - pasid, flush_type, all_hub); - up_read(&adev->reset_sem); - } - - return ret; + return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); } bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index b0dcc800251e..bf927f432506 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -542,7 +542,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t temp; struct v10_compute_mqd *m = get_mqd(mqd); - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EIO; #if 0 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 275f20399373..744366c7ee85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -423,7 +423,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, unsigned long flags, end_jiffies; int retry; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EIO; acquire_queue(kgd, pipe_id, queue_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 4997189d8b36..feab4cc6e836 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -419,7 +419,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, int retry; struct vi_mqd *m = get_mqd(mqd); - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EIO; acquire_queue(kgd, pipe_id, queue_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index d5d997fe6aa4..e4c274bd35c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -539,7 +539,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t temp; struct v9_mqd *m = get_mqd(mqd); - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EIO; acquire_queue(kgd, pipe_id, queue_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index fcf72f337785..62cb510e2cc4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1194,9 +1194,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( return -EINVAL; } - if (!down_read_trylock(&adev->reset_sem)) - return -EIO; - *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); if (!*mem) { ret = -ENOMEM; @@ -1263,7 +1260,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (offset) *offset = amdgpu_bo_mmap_offset(bo); - up_read(&adev->reset_sem); return 0; allocate_init_user_pages_failed: @@ -1281,9 +1277,6 @@ err: sg_free_table(sg); kfree(sg); } - - up_read(&adev->reset_sem); - return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index a3b150304dae..a512ccbc4dea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1292,8 +1292,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) parser.adev = adev; parser.filp = filp; - down_read(&adev->reset_sem); - r = amdgpu_cs_parser_init(&parser, data); if (r) { DRM_ERROR("Failed to initialize parser %d!\n", r); @@ -1333,8 +1331,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); - up_read(&adev->reset_sem); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index d85d13f7a043..8842c55d4490 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -358,8 +358,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, if (atomic_read(&ctx->guilty)) out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; - down_read(&adev->reset_sem); - /*query ue count*/ ras_counter = amdgpu_ras_query_error_count(adev, false); /*ras counter is monotonic increasing*/ @@ -375,8 +373,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, ctx->ras_counter_ce = ras_counter; } - up_read(&adev->reset_sem); - mutex_unlock(&mgr->lock); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 0af249a1e35b..35fed75a4397 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -101,14 +101,14 @@ static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file) file->private_data = adev; - down_read(&adev->reset_sem); + mutex_lock(&adev->lock_reset); if (adev->autodump.dumping.done) { reinit_completion(&adev->autodump.dumping); ret = 0; } else { ret = -EBUSY; } - up_read(&adev->reset_sem); + mutex_unlock(&adev->lock_reset); return ret; } @@ -127,7 +127,7 @@ static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_ poll_wait(file, &adev->autodump.gpu_hang, poll_table); - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return POLLIN | POLLRDNORM | POLLWRNORM; return 0; @@ -1242,7 +1242,7 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) } /* Avoid accidently unparking the sched thread during GPU reset */ - down_read(&adev->reset_sem); + mutex_lock(&adev->lock_reset); /* hold on the scheduler */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { @@ -1269,7 +1269,7 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) kthread_unpark(ring->sched.thread); } - up_read(&adev->reset_sem); + mutex_unlock(&adev->lock_reset); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); @@ -1459,7 +1459,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) return -ENOMEM; /* Avoid accidently unparking the sched thread during GPU reset */ - down_read(&adev->reset_sem); + mutex_lock(&adev->lock_reset); /* stop the scheduler */ kthread_park(ring->sched.thread); @@ -1500,7 +1500,7 @@ failure: /* restart the scheduler */ kthread_unpark(ring->sched.thread); - up_read(&adev->reset_sem); + mutex_unlock(&adev->lock_reset); ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bb7f0c8611f9..415e1a32b98c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1940,7 +1940,7 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) if (adev->ip_blocks[i].status.hw == true) break; - if (amdgpu_in_reset(adev) || adev->in_suspend) { + if (adev->in_gpu_reset || adev->in_suspend) { r = adev->ip_blocks[i].version->funcs->resume(adev); if (r) { DRM_ERROR("resume of IP block <%s> failed %d\n", @@ -2117,7 +2117,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) AMDGPU_RESET_MAGIC_NUM)) return true; - if (!amdgpu_in_reset(adev)) + if (!adev->in_gpu_reset) return false; /* @@ -3053,8 +3053,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->mn_lock); mutex_init(&adev->virt.vf_errors.lock); hash_init(adev->mn_hash); - init_rwsem(&adev->reset_sem); - atomic_set(&adev->in_gpu_reset, 0); + mutex_init(&adev->lock_reset); mutex_init(&adev->psp.mutex); mutex_init(&adev->notifier_lock); @@ -4082,11 +4081,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { if (need_full_reset) { /* post card */ - if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context)) { - dev_warn(tmp_adev->dev, "asic atom init failed!"); - r = -EAGAIN; - goto out; - } + if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context)) + DRM_WARN("asic atom init failed!"); if (!r) { dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); @@ -4176,18 +4172,16 @@ end: return r; } -static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive) +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock) { - if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0) - return false; - - if (hive) { - down_write_nest_lock(&adev->reset_sem, &hive->hive_lock); - } else { - down_write(&adev->reset_sem); - } + if (trylock) { + if (!mutex_trylock(&adev->lock_reset)) + return false; + } else + mutex_lock(&adev->lock_reset); atomic_inc(&adev->gpu_reset_counter); + adev->in_gpu_reset = true; switch (amdgpu_asic_reset_method(adev)) { case AMD_RESET_METHOD_MODE1: adev->mp1_state = PP_MP1_STATE_SHUTDOWN; @@ -4207,8 +4201,8 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) { amdgpu_vf_error_trans_all(adev); adev->mp1_state = PP_MP1_STATE_NONE; - atomic_set(&adev->in_gpu_reset, 0); - up_write(&adev->reset_sem); + adev->in_gpu_reset = false; + mutex_unlock(&adev->lock_reset); } static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) @@ -4318,14 +4312,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * We always reset all schedulers for device and all devices for XGMI * hive so that should take care of them too. */ - hive = amdgpu_get_xgmi_hive(adev, false); - if (hive) { - if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) { - DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", - job ? job->base.id : -1, hive->hive_id); - return 0; - } - mutex_lock(&hive->hive_lock); + hive = amdgpu_get_xgmi_hive(adev, true); + if (hive && !mutex_trylock(&hive->reset_lock)) { + DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", + job ? job->base.id : -1, hive->hive_id); + mutex_unlock(&hive->hive_lock); + return 0; } /* @@ -4347,11 +4339,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* block all schedulers and reset given job's ring */ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { - if (!amdgpu_device_lock_adev(tmp_adev, hive)) { + if (!amdgpu_device_lock_adev(tmp_adev, !hive)) { DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress", job ? job->base.id : -1); - r = 0; - goto skip_recovery; + mutex_unlock(&hive->hive_lock); + return 0; } /* @@ -4484,9 +4476,8 @@ skip_sched_resume: amdgpu_device_unlock_adev(tmp_adev); } -skip_recovery: if (hive) { - atomic_set(&hive->in_reset, 0); + mutex_unlock(&hive->reset_lock); mutex_unlock(&hive->hive_lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 73cc68ab53d0..7f9e50247413 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -671,8 +671,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, bo_va = NULL; } - down_read(&adev->reset_sem); - switch (args->operation) { case AMDGPU_VA_OP_MAP: va_flags = amdgpu_gem_va_map_flags(adev, args->flags); @@ -702,8 +700,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, args->operation); - up_read(&adev->reset_sem); - error_backoff: ttm_eu_backoff_reservation(&ticket, &list); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 8ccd17d02cc6..a819360a4b6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -719,7 +719,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) * * also don't wait anymore for IRQ context * */ - if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) goto failed_kiq_read; might_sleep(); @@ -777,7 +777,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) * * also don't wait anymore for IRQ context * */ - if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) goto failed_kiq_write; might_sleep(); @@ -796,5 +796,5 @@ failed_undo: amdgpu_ring_undo(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq_write: - dev_warn(adev->dev, "failed to write reg:%x\n", reg); + pr_err("failed to write reg:%x\n", reg); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 75d37dfb51aa..937029ad5271 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -220,17 +220,17 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) trace_amdgpu_sched_run_job(job); - if (down_read_trylock(&ring->adev->reset_sem)) { + if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter)) + dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */ + + if (finished->error < 0) { + DRM_INFO("Skip scheduling IBs!\n"); + } else { r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, - &fence); - up_read(&ring->adev->reset_sem); + &fence); if (r) DRM_ERROR("Error scheduling IBs (%d)\n", r); - } else { - dma_fence_set_error(finished, -ECANCELED); - DRM_INFO("Skip scheduling IBs!\n"); } - /* if gpu reset, hw fence will be replaced here */ dma_fence_put(job->fence); job->fence = dma_fence_get(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 58580a48b648..7619f1c3084d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1087,8 +1087,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, if (!fpriv) return; - down_read(&adev->reset_sem); - pm_runtime_get_sync(dev->dev); if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD) != NULL) @@ -1127,8 +1125,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); - - up_read(&adev->reset_sem); } /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 1705e328c6fc..65ad174bb976 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -163,7 +163,7 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev, enum amd_pm_state_type pm; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -172,8 +172,6 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) { if (adev->smu.ppt_funcs->get_current_power_state) pm = smu_get_current_power_state(&adev->smu); @@ -185,8 +183,6 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev, pm = adev->pm.dpm.user_state; } - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -205,7 +201,7 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev, enum amd_pm_state_type state; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; if (strncmp("battery", buf, strlen("battery")) == 0) @@ -223,8 +219,6 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) { mutex_lock(&adev->pm.mutex); adev->pm.dpm.user_state = state; @@ -238,9 +232,6 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev, amdgpu_pm_compute_clocks(adev); } - - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -316,7 +307,7 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev, enum amd_dpm_forced_level level = 0xff; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -325,8 +316,6 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) level = smu_get_performance_level(&adev->smu); else if (adev->powerplay.pp_funcs->get_performance_level) @@ -334,8 +323,6 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev, else level = adev->pm.dpm.forced_level; - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -362,7 +349,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, enum amd_dpm_forced_level current_level = 0xff; int ret = 0; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; if (strncmp("low", buf, strlen("low")) == 0) { @@ -393,8 +380,6 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) current_level = smu_get_performance_level(&adev->smu); else if (adev->powerplay.pp_funcs->get_performance_level) @@ -403,8 +388,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, if (current_level == level) { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - ret = count; - goto pro_end; + return count; } if (adev->asic_type == CHIP_RAVEN) { @@ -425,8 +409,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, pr_err("Currently not in any profile mode!\n"); pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - ret = -EINVAL; - goto pro_end; + return -EINVAL; } if (is_support_sw_smu(adev)) { @@ -434,8 +417,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, if (ret) { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - ret = -EINVAL; - goto pro_end; + return -EINVAL; } } else if (adev->powerplay.pp_funcs->force_performance_level) { mutex_lock(&adev->pm.mutex); @@ -443,16 +425,14 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, mutex_unlock(&adev->pm.mutex); pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - ret = -EINVAL; - goto pro_end; + return -EINVAL; } ret = amdgpu_dpm_force_performance_level(adev, level); if (ret) { mutex_unlock(&adev->pm.mutex); pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - ret = -EINVAL; - goto pro_end; + return -EINVAL; } else { adev->pm.dpm.forced_level = level; } @@ -461,9 +441,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); -pro_end: - up_read(&adev->reset_sem); - return ret; + return count; } static ssize_t amdgpu_get_pp_num_states(struct device *dev, @@ -475,7 +453,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, struct pp_states_info data; int i, buf_len, ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -519,7 +497,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, enum amd_pm_state_type pm = 0; int i = 0, ret = 0; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -560,7 +538,7 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; if (adev->pp_force_state_enabled) @@ -580,7 +558,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, unsigned long idx; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; if (strlen(buf) == 1) @@ -606,7 +584,6 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, return ret; } - down_read(&adev->reset_sem); /* only set user selected power states */ if (state != POWER_STATE_TYPE_INTERNAL_BOOT && state != POWER_STATE_TYPE_DEFAULT) { @@ -614,8 +591,6 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, AMD_PP_TASK_ENABLE_USER_STATE, &state); adev->pp_force_state_enabled = true; } - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); } @@ -643,7 +618,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, char *table = NULL; int size, ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -687,7 +662,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int ret = 0; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -696,21 +671,16 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) { ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count); if (ret) { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - up_read(&adev->reset_sem); return ret; } } else if (adev->powerplay.pp_funcs->set_pp_table) amdgpu_dpm_set_pp_table(adev, buf, count); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -845,7 +815,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, const char delimiter[3] = {' ', '\n', '\0'}; uint32_t type; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; if (count > 127) @@ -889,10 +859,6 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, return ret; } - ret = count; - - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) { ret = smu_od_edit_dpm_table(&adev->smu, type, parameter, parameter_size); @@ -900,8 +866,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, if (ret) { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - ret = -EINVAL; - goto pro_end; + return -EINVAL; } } else { if (adev->powerplay.pp_funcs->odn_edit_dpm_table) { @@ -910,8 +875,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, if (ret) { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - ret = -EINVAL; - goto pro_end; + return -EINVAL; } } @@ -922,22 +886,18 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, NULL); pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - ret = count; - goto pro_end; + return count; } else { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - ret = -EINVAL; - goto pro_end; + return -EINVAL; } } } pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); -pro_end: - up_read(&adev->reset_sem); - return ret; + return count; } static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, @@ -949,7 +909,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, ssize_t size; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1003,7 +963,7 @@ static ssize_t amdgpu_set_pp_features(struct device *dev, uint64_t featuremask; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = kstrtou64(buf, 0, &featuremask); @@ -1018,13 +978,11 @@ static ssize_t amdgpu_set_pp_features(struct device *dev, return ret; } - down_read(&adev->reset_sem); if (is_support_sw_smu(adev)) { ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask); if (ret) { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - up_read(&adev->reset_sem); return -EINVAL; } } else if (adev->powerplay.pp_funcs->set_ppfeature_status) { @@ -1032,12 +990,9 @@ static ssize_t amdgpu_set_pp_features(struct device *dev, if (ret) { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); - up_read(&adev->reset_sem); return -EINVAL; } } - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1053,7 +1008,7 @@ static ssize_t amdgpu_get_pp_features(struct device *dev, ssize_t size; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1062,8 +1017,6 @@ static ssize_t amdgpu_get_pp_features(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) size = smu_sys_get_pp_feature_mask(&adev->smu, buf); else if (adev->powerplay.pp_funcs->get_ppfeature_status) @@ -1071,8 +1024,6 @@ static ssize_t amdgpu_get_pp_features(struct device *dev, else size = snprintf(buf, PAGE_SIZE, "\n"); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1118,7 +1069,7 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, ssize_t size; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1127,8 +1078,6 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) @@ -1136,8 +1085,6 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, else size = snprintf(buf, PAGE_SIZE, "\n"); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1190,7 +1137,7 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, int ret; uint32_t mask = 0; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = amdgpu_read_mask(buf, count, &mask); @@ -1203,15 +1150,11 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1230,7 +1173,7 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, ssize_t size; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1239,8 +1182,6 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) @@ -1248,8 +1189,6 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, else size = snprintf(buf, PAGE_SIZE, "\n"); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1266,7 +1205,7 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, uint32_t mask = 0; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = amdgpu_read_mask(buf, count, &mask); @@ -1279,15 +1218,11 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1306,7 +1241,7 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, ssize_t size; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1315,8 +1250,6 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) @@ -1324,8 +1257,6 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, else size = snprintf(buf, PAGE_SIZE, "\n"); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1342,7 +1273,7 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, int ret; uint32_t mask = 0; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = amdgpu_read_mask(buf, count, &mask); @@ -1355,8 +1286,6 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) @@ -1364,8 +1293,6 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, else ret = 0; - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1384,7 +1311,7 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, ssize_t size; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1393,8 +1320,6 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) @@ -1402,8 +1327,6 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, else size = snprintf(buf, PAGE_SIZE, "\n"); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1420,7 +1343,7 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, int ret; uint32_t mask = 0; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = amdgpu_read_mask(buf, count, &mask); @@ -1433,8 +1356,6 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) @@ -1442,8 +1363,6 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, else ret = 0; - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1462,7 +1381,7 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, ssize_t size; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1471,8 +1390,6 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) @@ -1480,8 +1397,6 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, else size = snprintf(buf, PAGE_SIZE, "\n"); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1498,7 +1413,7 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, int ret; uint32_t mask = 0; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = amdgpu_read_mask(buf, count, &mask); @@ -1511,8 +1426,6 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) @@ -1520,8 +1433,6 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, else ret = 0; - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1540,7 +1451,7 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, ssize_t size; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1549,8 +1460,6 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) @@ -1558,8 +1467,6 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, else size = snprintf(buf, PAGE_SIZE, "\n"); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1576,7 +1483,7 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, int ret; uint32_t mask = 0; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = amdgpu_read_mask(buf, count, &mask); @@ -1589,8 +1496,6 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask); else if (adev->powerplay.pp_funcs->force_clock_level) @@ -1598,8 +1503,6 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, else ret = 0; - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1618,7 +1521,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev, uint32_t value = 0; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1627,15 +1530,11 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK); else if (adev->powerplay.pp_funcs->get_sclk_od) value = amdgpu_dpm_get_sclk_od(adev); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1652,7 +1551,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, int ret; long int value; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = kstrtol(buf, 0, &value); @@ -1666,8 +1565,6 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) { value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value); } else { @@ -1682,8 +1579,6 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, } } - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1699,7 +1594,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev, uint32_t value = 0; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1708,15 +1603,11 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK); else if (adev->powerplay.pp_funcs->get_mclk_od) value = amdgpu_dpm_get_mclk_od(adev); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1733,7 +1624,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, int ret; long int value; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = kstrtol(buf, 0, &value); @@ -1747,8 +1638,6 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) { value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value); } else { @@ -1763,8 +1652,6 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, } } - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1800,7 +1687,7 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, ssize_t size; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -1809,8 +1696,6 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) size = smu_get_power_profile_mode(&adev->smu, buf); else if (adev->powerplay.pp_funcs->get_power_profile_mode) @@ -1818,8 +1703,6 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, else size = snprintf(buf, PAGE_SIZE, "\n"); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1844,7 +1727,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, long int profile_mode = 0; const char delimiter[3] = {' ', '\n', '\0'}; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; tmp[0] = *(buf); @@ -1878,15 +1761,11 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true); else if (adev->powerplay.pp_funcs->set_power_profile_mode) ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1912,7 +1791,7 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int r, value, size = sizeof(value); - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; r = pm_runtime_get_sync(ddev->dev); @@ -1921,11 +1800,9 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev, return r; } - down_read(&adev->reset_sem); /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size); - up_read(&adev->reset_sem); pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1952,7 +1829,7 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int r, value, size = sizeof(value); - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; r = pm_runtime_get_sync(ddev->dev); @@ -1961,14 +1838,10 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev, return r; } - down_read(&adev->reset_sem); - /* read the IP busy sensor */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -1999,7 +1872,7 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev, uint64_t count0 = 0, count1 = 0; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; if (adev->flags & AMD_IS_APU) @@ -2014,12 +1887,8 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev, return ret; } - down_read(&adev->reset_sem); - amdgpu_asic_get_pcie_usage(adev, &count0, &count1); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); @@ -2044,7 +1913,7 @@ static ssize_t amdgpu_get_unique_id(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; if (adev->unique_id) @@ -2142,7 +2011,7 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev, ssize_t size = 0; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(ddev->dev); @@ -2151,12 +2020,10 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev, return ret; } - down_read(&adev->reset_sem); if (is_support_sw_smu(adev)) size = smu_sys_get_gpu_metrics(&adev->smu, &gpu_metrics); else if (adev->powerplay.pp_funcs->get_gpu_metrics) size = amdgpu_dpm_get_gpu_metrics(adev, &gpu_metrics); - up_read(&adev->reset_sem); if (size <= 0) goto out; @@ -2368,7 +2235,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, int channel = to_sensor_dev_attr(attr)->index; int r, temp = 0, size = sizeof(temp); - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; if (channel >= PP_TEMP_MAX) @@ -2380,8 +2247,6 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, return r; } - down_read(&adev->reset_sem); - switch (channel) { case PP_TEMP_JUNCTION: /* get current junction temperature */ @@ -2403,8 +2268,6 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, break; } - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2508,7 +2371,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, u32 pwm_mode = 0; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(adev->ddev->dev); @@ -2517,23 +2380,18 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) { pwm_mode = smu_get_fan_control_mode(&adev->smu); } else { if (!adev->powerplay.pp_funcs->get_fan_control_mode) { pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); - up_read(&adev->reset_sem); return -EINVAL; } pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); } - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2549,7 +2407,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, int err, ret; int value; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; err = kstrtoint(buf, 10, &value); @@ -2562,23 +2420,18 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, value); } else { if (!adev->powerplay.pp_funcs->set_fan_control_mode) { pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); - up_read(&adev->reset_sem); return -EINVAL; } amdgpu_dpm_set_fan_control_mode(adev, value); } - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2608,7 +2461,7 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, u32 value; u32 pwm_mode; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; err = pm_runtime_get_sync(adev->ddev->dev); @@ -2617,15 +2470,11 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, return err; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) pwm_mode = smu_get_fan_control_mode(&adev->smu); else pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); - up_read(&adev->reset_sem); - if (pwm_mode != AMD_FAN_CTRL_MANUAL) { pr_info("manual fan speed control should be enabled first\n"); pm_runtime_mark_last_busy(adev->ddev->dev); @@ -2666,7 +2515,7 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, int err; u32 speed = 0; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; err = pm_runtime_get_sync(adev->ddev->dev); @@ -2675,8 +2524,6 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, return err; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) err = smu_get_fan_speed_percent(&adev->smu, &speed); else if (adev->powerplay.pp_funcs->get_fan_speed_percent) @@ -2684,8 +2531,6 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, else err = -EINVAL; - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2705,7 +2550,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, int err; u32 speed = 0; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; err = pm_runtime_get_sync(adev->ddev->dev); @@ -2714,8 +2559,6 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, return err; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) err = smu_get_fan_speed_rpm(&adev->smu, &speed); else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) @@ -2723,8 +2566,6 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, else err = -EINVAL; - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2743,7 +2584,7 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, u32 size = sizeof(min_rpm); int r; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); @@ -2752,13 +2593,9 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, return r; } - down_read(&adev->reset_sem); - r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM, (void *)&min_rpm, &size); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2777,7 +2614,7 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev, u32 size = sizeof(max_rpm); int r; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); @@ -2786,13 +2623,9 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev, return r; } - down_read(&adev->reset_sem); - r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM, (void *)&max_rpm, &size); - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2810,7 +2643,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, int err; u32 rpm = 0; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; err = pm_runtime_get_sync(adev->ddev->dev); @@ -2819,8 +2652,6 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, return err; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) err = smu_get_fan_speed_rpm(&adev->smu, &rpm); else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) @@ -2828,8 +2659,6 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, else err = -EINVAL; - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2848,7 +2677,7 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, u32 value; u32 pwm_mode; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; err = pm_runtime_get_sync(adev->ddev->dev); @@ -2857,15 +2686,11 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, return err; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) pwm_mode = smu_get_fan_control_mode(&adev->smu); else pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); - up_read(&adev->reset_sem); - if (pwm_mode != AMD_FAN_CTRL_MANUAL) { pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2879,8 +2704,6 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, return err; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) err = smu_set_fan_speed_rpm(&adev->smu, value); else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) @@ -2888,8 +2711,6 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, else err = -EINVAL; - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2907,7 +2728,7 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev, u32 pwm_mode = 0; int ret; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; ret = pm_runtime_get_sync(adev->ddev->dev); @@ -2916,23 +2737,18 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev, return ret; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) { pwm_mode = smu_get_fan_control_mode(&adev->smu); } else { if (!adev->powerplay.pp_funcs->get_fan_control_mode) { pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); - up_read(&adev->reset_sem); return -EINVAL; } pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); } - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2949,7 +2765,7 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, int value; u32 pwm_mode; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; err = kstrtoint(buf, 10, &value); @@ -2969,22 +2785,17 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, return err; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, pwm_mode); } else { if (!adev->powerplay.pp_funcs->set_fan_control_mode) { pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); - up_read(&adev->reset_sem); return -EINVAL; } amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); } - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -2999,7 +2810,7 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, u32 vddgfx; int r, size = sizeof(vddgfx); - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); @@ -3008,11 +2819,9 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, return r; } - down_read(&adev->reset_sem); /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&vddgfx, &size); - up_read(&adev->reset_sem); pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -3038,7 +2847,7 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, u32 vddnb; int r, size = sizeof(vddnb); - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; /* only APUs have vddnb */ @@ -3051,11 +2860,9 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, return r; } - down_read(&adev->reset_sem); /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&vddnb, &size); - up_read(&adev->reset_sem); pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -3082,7 +2889,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, int r, size = sizeof(u32); unsigned uw; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); @@ -3091,11 +2898,9 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, return r; } - down_read(&adev->reset_sem); /* get the voltage */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size); - up_read(&adev->reset_sem); pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -3125,7 +2930,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, ssize_t size; int r; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); @@ -3134,8 +2939,6 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, return r; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) { smu_get_power_limit(&adev->smu, &limit, true); size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); @@ -3146,8 +2949,6 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, size = snprintf(buf, PAGE_SIZE, "\n"); } - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -3163,7 +2964,7 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, ssize_t size; int r; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); @@ -3172,8 +2973,6 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, return r; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) { smu_get_power_limit(&adev->smu, &limit, false); size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); @@ -3184,8 +2983,6 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, size = snprintf(buf, PAGE_SIZE, "\n"); } - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -3202,7 +2999,7 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, int err; u32 value; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; if (amdgpu_sriov_vf(adev)) @@ -3221,8 +3018,6 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, return err; } - down_read(&adev->reset_sem); - if (is_support_sw_smu(adev)) err = smu_set_power_limit(&adev->smu, value); else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) @@ -3230,8 +3025,6 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, else err = -EINVAL; - up_read(&adev->reset_sem); - pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -3249,7 +3042,7 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, uint32_t sclk; int r, size = sizeof(sclk); - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); @@ -3258,11 +3051,9 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, return r; } - down_read(&adev->reset_sem); /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&sclk, &size); - up_read(&adev->reset_sem); pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -3288,7 +3079,7 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, uint32_t mclk; int r, size = sizeof(mclk); - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; r = pm_runtime_get_sync(adev->ddev->dev); @@ -3297,11 +3088,9 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, return r; } - down_read(&adev->reset_sem); /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, (void *)&mclk, &size); - up_read(&adev->reset_sem); pm_runtime_mark_last_busy(adev->ddev->dev); pm_runtime_put_autosuspend(adev->ddev->dev); @@ -4188,7 +3977,7 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) u32 flags = 0; int r; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EPERM; r = pm_runtime_get_sync(dev->dev); @@ -4204,7 +3993,6 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) return 0; } - down_read(&adev->reset_sem); if (!is_support_sw_smu(adev) && adev->powerplay.pp_funcs->debugfs_print_current_performance_level) { mutex_lock(&adev->pm.mutex); @@ -4217,13 +4005,10 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) } else { r = amdgpu_debugfs_pm_info_pp(m, adev); } - up_read(&adev->reset_sem); if (r) goto out; - down_read(&adev->reset_sem); amdgpu_device_ip_get_clockgating_state(adev, &flags); - up_read(&adev->reset_sem); seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags); amdgpu_parse_cg_state(m, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 116a89990f39..aa1e77c60c0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1869,7 +1869,7 @@ static int psp_load_smu_fw(struct psp_context *psp) return 0; - if (amdgpu_in_reset(adev) && ras && ras->supported) { + if (adev->in_gpu_reset && ras && ras->supported) { ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD); if (ret) { DRM_WARN("Failed to set MP1 state prepare for reload\n"); @@ -1984,7 +1984,7 @@ static int psp_load_fw(struct amdgpu_device *adev) int ret; struct psp_context *psp = &adev->psp; - if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) { + if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) { psp_ring_stop(psp, PSP_RING_TYPE__KM); /* should not destroy ring, only stop */ goto skip_memalloc; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index cd1403f83dcf..f09082578865 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2079,7 +2079,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, amdgpu_ras_request_reset_on_boot(adev, ras_block->block); return 0; - } else if (adev->in_suspend || amdgpu_in_reset(adev)) { + } else if (adev->in_suspend || adev->in_gpu_reset) { /* in resume phase, if fail to enable ras, * clean up all ras fs nodes, and disable ras */ goto cleanup; @@ -2088,7 +2088,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, } /* in resume phase, no need to create ras fs node */ - if (adev->in_suspend || amdgpu_in_reset(adev)) + if (adev->in_suspend || adev->in_gpu_reset) return 0; if (ih_info->cb) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c7421aa32946..2d502e98fad0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -2098,7 +2098,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) uint64_t size; int r; - if (!adev->mman.initialized || amdgpu_in_reset(adev) || + if (!adev->mman.initialized || adev->in_gpu_reset || adev->mman.buffer_funcs_enabled == enable) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 039245c98ff8..183743c5fb7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -628,8 +628,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) struct amdgpu_firmware_info *ucode = NULL; /* for baremetal, the ucode is allocated in gtt, so don't need to fill the bo when reset/suspend */ - if (!amdgpu_sriov_vf(adev) && - (amdgpu_in_reset(adev) || adev->in_suspend)) + if (!amdgpu_sriov_vf(adev) && (adev->in_gpu_reset || adev->in_suspend)) return 0; /* * if SMU loaded firmware, it needn't add SMC, UVD, and VCE diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 1e211544f2dc..ae720a6dc5a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -93,7 +93,7 @@ failed_undo: amdgpu_ring_undo(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq: - dev_warn(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1); + pr_err("failed to write reg %x wait reg %x\n", reg0, reg1); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index b2046c3a404d..f826945989c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -325,9 +325,9 @@ static inline bool is_virtual_machine(void) #define amdgpu_sriov_is_pp_one_vf(adev) \ ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF) #define amdgpu_sriov_is_debug(adev) \ - ((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug) + ((!adev->in_gpu_reset) && adev->virt.tdr_debug) #define amdgpu_sriov_is_normal(adev) \ - ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug)) + ((!adev->in_gpu_reset) && (!adev->virt.tdr_debug)) bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 67a756f4337b..cd6e6eb7d966 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -372,7 +372,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo tmp->hive_id = adev->gmc.xgmi.hive_id; INIT_LIST_HEAD(&tmp->device_list); mutex_init(&tmp->hive_lock); - atomic_set(&tmp->in_reset, 0); + mutex_init(&tmp->reset_lock); task_barrier_init(&tmp->tb); if (lock) @@ -397,7 +397,6 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) hive->hi_req_gpu : adev; bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20; bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN; - bool locked; /* fw bug so temporarily disable pstate switching */ return 0; @@ -405,9 +404,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) if (!hive || adev->asic_type != CHIP_VEGA20) return 0; - locked = atomic_read(&hive->in_reset) ? false : true; - if (locked) - mutex_lock(&hive->hive_lock); + mutex_lock(&hive->hive_lock); if (is_hi_req) hive->hi_req_count++; @@ -442,8 +439,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) adev : NULL; } out: - if (locked) - mutex_unlock(&hive->hive_lock); + mutex_unlock(&hive->hive_lock); return ret; } @@ -598,6 +594,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) if(!(--hive->number_devices)){ amdgpu_xgmi_sysfs_destroy(adev, hive); mutex_destroy(&hive->hive_lock); + mutex_destroy(&hive->reset_lock); } return psp_xgmi_terminate(&adev->psp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 61720cd4a1ee..6999eab16a72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -30,8 +30,7 @@ struct amdgpu_hive_info { uint64_t hive_id; struct list_head device_list; int number_devices; - struct mutex hive_lock; - atomic_t in_reset; + struct mutex hive_lock, reset_lock; struct kobject *kobj; struct device_attribute dev_attr; struct amdgpu_device *adev; diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 8341bd965202..4cfc786699c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -755,7 +755,6 @@ static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg) /* jiffies wrap around we will just wait a little longer */ ctx->last_jump_jiffies = jiffies; } - schedule(); } else { ctx->last_jump = ctx->start + target; ctx->last_jump_jiffies = jiffies; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index de6e6de41867..e87d43537013 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6201,7 +6201,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) struct v10_gfx_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.gfx_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!adev->in_gpu_reset && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -6213,7 +6213,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.me.mqd_backup[mqd_idx]) memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); - } else if (amdgpu_in_reset(adev)) { + } else if (adev->in_gpu_reset) { /* reset mqd with the backup copy */ if (adev->gfx.me.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); @@ -6566,7 +6566,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) gfx_v10_0_kiq_setting(ring); - if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ + if (adev->in_gpu_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); @@ -6602,7 +6602,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring) struct v10_compute_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!adev->in_gpu_reset && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -6612,7 +6612,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring) if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); - } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ + } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 7df567a6656d..14fd04b699da 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4633,7 +4633,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) gfx_v8_0_kiq_setting(ring); - if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ + if (adev->in_gpu_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); @@ -4670,7 +4670,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) struct vi_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!adev->in_gpu_reset && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -4682,7 +4682,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); - } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ + } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 93c63ff3b35e..2c5bb282cc01 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3686,7 +3686,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) gfx_v9_0_kiq_setting(ring); - if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ + if (adev->in_gpu_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); @@ -3724,7 +3724,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) struct v9_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!adev->in_gpu_reset && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -3736,7 +3736,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); - } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ + } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); @@ -3930,7 +3930,7 @@ static int gfx_v9_0_hw_fini(void *handle) /* Use deinitialize sequence from CAIL when unbinding device from driver, * otherwise KIQ is hanging when binding back */ - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!adev->in_gpu_reset && !adev->in_suspend) { mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, adev->gfx.kiq.ring.me, adev->gfx.kiq.ring.pipe, @@ -4088,7 +4088,7 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) * * also don't wait anymore for IRQ context * */ - if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) goto failed_kiq_read; might_sleep(); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 9d3b1245a339..ec8c0af39553 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -287,7 +287,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, */ if (adev->gfx.kiq.ring.sched.ready && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && - !amdgpu_in_reset(adev)) { + !adev->in_gpu_reset) { struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; @@ -312,7 +312,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (!adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready || - amdgpu_in_reset(adev) || + adev->in_gpu_reset || ring->sched.ready == false) { gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0); mutex_unlock(&adev->mman.gtt_window_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 80c146df338a..3ce5c1d2fdf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -434,7 +434,7 @@ static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, int vmid; unsigned int tmp; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EIO; for (vmid = 1; vmid < 16; vmid++) { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 9ab65ca7df77..3e6615f9d39c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -635,7 +635,7 @@ static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, int vmid; unsigned int tmp; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EIO; for (vmid = 1; vmid < 16; vmid++) { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 773ee11b3d17..6a780b674018 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -501,7 +501,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, */ if (adev->gfx.kiq.ring.sched.ready && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && - !amdgpu_in_reset(adev)) { + !adev->in_gpu_reset) { uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; @@ -596,7 +596,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, struct amdgpu_ring *ring = &adev->gfx.kiq.ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq; - if (amdgpu_in_reset(adev)) + if (adev->in_gpu_reset) return -EIO; if (ring->sched.ready) { @@ -633,8 +633,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, spin_unlock(&adev->gfx.kiq.ring_lock); r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); if (r < 1) { - dev_info(adev->dev, - "wait for kiq fence error: %ld\n", r); + DRM_ERROR("wait for kiq fence error: %ld.\n", r); return -ETIME; } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index fe31cbeccfe9..5fd67e1cc2a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -238,16 +238,20 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT; + int locked; /* block amdgpu_gpu_recover till msg FLR COMPLETE received, * otherwise the mailbox msg will be ruined/reseted by * the VF FLR. * - * we can unlock the reset_sem to allow "amdgpu_job_timedout" + * we can unlock the lock_reset to allow "amdgpu_job_timedout" * to run gpu_recover() after FLR_NOTIFICATION_CMPL received * which means host side had finished this VF's FLR. */ - down_read(&adev->reset_sem); + locked = mutex_trylock(&adev->lock_reset); + if (locked) + adev->in_gpu_reset = true; + do { if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) goto flr_done; @@ -257,7 +261,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) } while (timeout > 1); flr_done: - up_read(&adev->reset_sem); + if (locked) { + adev->in_gpu_reset = false; + mutex_unlock(&adev->lock_reset); + } /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 6f55172e8337..ce2bf1fb79ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -259,16 +259,20 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT; + int locked; /* block amdgpu_gpu_recover till msg FLR COMPLETE received, * otherwise the mailbox msg will be ruined/reseted by * the VF FLR. * - * we can unlock the reset_sem to allow "amdgpu_job_timedout" + * we can unlock the lock_reset to allow "amdgpu_job_timedout" * to run gpu_recover() after FLR_NOTIFICATION_CMPL received * which means host side had finished this VF's FLR. */ - down_read(&adev->reset_sem); + locked = mutex_trylock(&adev->lock_reset); + if (locked) + adev->in_gpu_reset = true; + do { if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) goto flr_done; @@ -278,7 +282,10 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) } while (timeout > 1); flr_done: - up_read(&adev->reset_sem); + if (locked) { + adev->in_gpu_reset = false; + mutex_unlock(&adev->lock_reset); + } /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 7ad1537820b5..e0e60b0d0669 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -304,17 +304,15 @@ static void deallocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { - if (!dqm->is_resetting) { - /* On GFX v7, CP doesn't flush TC at dequeue */ - if (q->device->device_info->asic_family == CHIP_HAWAII) - if (flush_texture_cache_nocpsch(q->device, qpd)) - pr_err("Failed to flush TC\n"); + /* On GFX v7, CP doesn't flush TC at dequeue */ + if (q->device->device_info->asic_family == CHIP_HAWAII) + if (flush_texture_cache_nocpsch(q->device, qpd)) + pr_err("Failed to flush TC\n"); - kfd_flush_tlb(qpd_to_pdd(qpd)); + kfd_flush_tlb(qpd_to_pdd(qpd)); - /* Release the vmid mapping */ - set_pasid_vmid_mapping(dqm, 0, qpd->vmid); - } + /* Release the vmid mapping */ + set_pasid_vmid_mapping(dqm, 0, qpd->vmid); dqm->vmid_pasid[qpd->vmid] = 0; qpd->vmid = 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index ee2258404c8f..40695d52e9a8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1551,10 +1551,6 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, void kfd_flush_tlb(struct kfd_process_device *pdd) { struct kfd_dev *dev = pdd->dev; - struct device_queue_manager *dqm = dev->dqm; - - if (dqm->is_resetting) - return; if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { /* Nothing to flush until a VMID is assigned, which diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 031b7eb55356..653b4a0d51d8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1658,7 +1658,7 @@ static int dm_suspend(void *handle) struct amdgpu_display_manager *dm = &adev->dm; int ret = 0; - if (amdgpu_in_reset(adev)) { + if (adev->in_gpu_reset) { mutex_lock(&dm->dc_lock); dm->cached_dc_state = dc_copy_state(dm->dc->current_state); @@ -1844,7 +1844,7 @@ static int dm_resume(void *handle) struct dc_state *dc_state; int i, r, j; - if (amdgpu_in_reset(adev)) { + if (adev->in_gpu_reset) { dc_state = dm->cached_dc_state; r = dm_dmub_hw_init(adev); diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index f87a73cb7ec5..7d17c4f1b489 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -1110,7 +1110,7 @@ static int smu_disable_dpms(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; int ret = 0; bool use_baco = !smu->is_apu && - ((amdgpu_in_reset(adev) && + ((adev->in_gpu_reset && (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) || ((adev->in_runpm || adev->in_hibernate) && amdgpu_asic_supports_baco(adev))); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index da84012b7fd5..c7216362b68d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -489,7 +489,7 @@ static int vega20_setup_asic_task(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); int ret = 0; - bool use_baco = (amdgpu_in_reset(adev) && + bool use_baco = (adev->in_gpu_reset && (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) || (adev->in_runpm && amdgpu_asic_supports_baco(adev));