diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index ce5ca304dba9..fa572ba7f9fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1439,8 +1439,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, list_add_tail(&vm->vm_list_node, &(vm->process_info->vm_list_head)); vm->process_info->n_vms++; - - *ef = dma_fence_get(&vm->process_info->eviction_fence->base); + if (ef) + *ef = dma_fence_get(&vm->process_info->eviction_fence->base); mutex_unlock(&vm->process_info->lock); return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d07acf1b2f93..d665ecdcd12f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1702,12 +1702,15 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm, &p->kgd_process_info, - &ef); + p->ef ? NULL : &ef); if (ret) { dev_err(dev->adev->dev, "Failed to create process VM object\n"); return ret; } - RCU_INIT_POINTER(p->ef, ef); + + if (!p->ef) + RCU_INIT_POINTER(p->ef, ef); + pdd->drm_priv = drm_file->private_data; ret = kfd_process_device_reserve_ib_mem(pdd); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 60c617fcc97e..6b5e2206e687 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2972,10 +2972,11 @@ static int dm_suspend(void *handle) hpd_rx_irq_work_suspend(dm); - if (adev->dm.dc->caps.ips_support) - dc_allow_idle_optimizations(adev->dm.dc, true); - dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); + + if (dm->dc->caps.ips_support && adev->in_s0ix) + dc_allow_idle_optimizations(dm->dc, true); + dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3); return 0; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 5c39390ecbd5..a88f1b6ea64c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5065,11 +5065,26 @@ static bool update_planes_and_stream_v3(struct dc *dc, return true; } +static void clear_update_flags(struct dc_surface_update *srf_updates, + int surface_count, struct dc_stream_state *stream) +{ + int i; + + if (stream) + stream->update_flags.raw = 0; + + for (i = 0; i < surface_count; i++) + if (srf_updates[i].surface) + srf_updates[i].surface->update_flags.raw = 0; +} + bool dc_update_planes_and_stream(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, struct dc_stream_state *stream, struct dc_stream_update *stream_update) { + bool ret = false; + dc_exit_ips_for_hw_access(dc); /* * update planes and stream version 3 separates FULL and FAST updates @@ -5086,10 +5101,16 @@ bool dc_update_planes_and_stream(struct dc *dc, * features as they are now transparent to the new sequence. */ if (dc->ctx->dce_version >= DCN_VERSION_4_01) - return update_planes_and_stream_v3(dc, srf_updates, + ret = update_planes_and_stream_v3(dc, srf_updates, surface_count, stream, stream_update); - return update_planes_and_stream_v2(dc, srf_updates, + else + ret = update_planes_and_stream_v2(dc, srf_updates, surface_count, stream, stream_update); + + if (ret) + clear_update_flags(srf_updates, surface_count, stream); + + return ret; } void dc_commit_updates_for_stream(struct dc *dc, @@ -5099,6 +5120,8 @@ void dc_commit_updates_for_stream(struct dc *dc, struct dc_stream_update *stream_update, struct dc_state *state) { + bool ret = false; + dc_exit_ips_for_hw_access(dc); /* TODO: Since change commit sequence can have a huge impact, * we decided to only enable it for DCN3x. However, as soon as @@ -5106,17 +5129,17 @@ void dc_commit_updates_for_stream(struct dc *dc, * the new sequence for all ASICs. */ if (dc->ctx->dce_version >= DCN_VERSION_4_01) { - update_planes_and_stream_v3(dc, srf_updates, surface_count, + ret = update_planes_and_stream_v3(dc, srf_updates, surface_count, stream, stream_update); - return; - } - if (dc->ctx->dce_version >= DCN_VERSION_3_2) { - update_planes_and_stream_v2(dc, srf_updates, surface_count, + } else if (dc->ctx->dce_version >= DCN_VERSION_3_2) { + ret = update_planes_and_stream_v2(dc, srf_updates, surface_count, stream, stream_update); - return; - } - update_planes_and_stream_v1(dc, srf_updates, surface_count, stream, - stream_update, state); + } else + ret = update_planes_and_stream_v1(dc, srf_updates, surface_count, stream, + stream_update, state); + + if (ret) + clear_update_flags(srf_updates, surface_count, stream); } uint8_t dc_get_current_stream_count(struct dc *dc) diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h index 9118fcddbf11..227bf0e84a13 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h @@ -60,7 +60,7 @@ struct vi_dpm_level { struct vi_dpm_table { uint32_t count; - struct vi_dpm_level dpm_level[] __counted_by(count); + struct vi_dpm_level dpm_level[]; }; #define PCIE_PERF_REQ_REMOVE_REGISTRY 0 @@ -91,7 +91,7 @@ struct phm_set_power_state_input { struct phm_clock_array { uint32_t count; - uint32_t values[] __counted_by(count); + uint32_t values[]; }; struct phm_clock_voltage_dependency_record { @@ -123,7 +123,7 @@ struct phm_acpclock_voltage_dependency_record { struct phm_clock_voltage_dependency_table { uint32_t count; - struct phm_clock_voltage_dependency_record entries[] __counted_by(count); + struct phm_clock_voltage_dependency_record entries[]; }; struct phm_phase_shedding_limits_record { @@ -140,7 +140,7 @@ struct phm_uvd_clock_voltage_dependency_record { struct phm_uvd_clock_voltage_dependency_table { uint8_t count; - struct phm_uvd_clock_voltage_dependency_record entries[] __counted_by(count); + struct phm_uvd_clock_voltage_dependency_record entries[]; }; struct phm_acp_clock_voltage_dependency_record { @@ -150,7 +150,7 @@ struct phm_acp_clock_voltage_dependency_record { struct phm_acp_clock_voltage_dependency_table { uint32_t count; - struct phm_acp_clock_voltage_dependency_record entries[] __counted_by(count); + struct phm_acp_clock_voltage_dependency_record entries[]; }; struct phm_vce_clock_voltage_dependency_record { @@ -161,32 +161,32 @@ struct phm_vce_clock_voltage_dependency_record { struct phm_phase_shedding_limits_table { uint32_t count; - struct phm_phase_shedding_limits_record entries[] __counted_by(count); + struct phm_phase_shedding_limits_record entries[]; }; struct phm_vceclock_voltage_dependency_table { uint8_t count; - struct phm_vceclock_voltage_dependency_record entries[] __counted_by(count); + struct phm_vceclock_voltage_dependency_record entries[]; }; struct phm_uvdclock_voltage_dependency_table { uint8_t count; - struct phm_uvdclock_voltage_dependency_record entries[] __counted_by(count); + struct phm_uvdclock_voltage_dependency_record entries[]; }; struct phm_samuclock_voltage_dependency_table { uint8_t count; - struct phm_samuclock_voltage_dependency_record entries[] __counted_by(count); + struct phm_samuclock_voltage_dependency_record entries[]; }; struct phm_acpclock_voltage_dependency_table { uint32_t count; - struct phm_acpclock_voltage_dependency_record entries[] __counted_by(count); + struct phm_acpclock_voltage_dependency_record entries[]; }; struct phm_vce_clock_voltage_dependency_table { uint8_t count; - struct phm_vce_clock_voltage_dependency_record entries[] __counted_by(count); + struct phm_vce_clock_voltage_dependency_record entries[]; }; @@ -393,7 +393,7 @@ union phm_cac_leakage_record { struct phm_cac_leakage_table { uint32_t count; - union phm_cac_leakage_record entries[] __counted_by(count); + union phm_cac_leakage_record entries[]; }; struct phm_samu_clock_voltage_dependency_record { @@ -404,7 +404,7 @@ struct phm_samu_clock_voltage_dependency_record { struct phm_samu_clock_voltage_dependency_table { uint8_t count; - struct phm_samu_clock_voltage_dependency_record entries[] __counted_by(count); + struct phm_samu_clock_voltage_dependency_record entries[]; }; struct phm_cac_tdp_table { diff --git a/drivers/gpu/drm/drm_fbdev_dma.c b/drivers/gpu/drm/drm_fbdev_dma.c index b0602c4f3628..51c2d742d199 100644 --- a/drivers/gpu/drm/drm_fbdev_dma.c +++ b/drivers/gpu/drm/drm_fbdev_dma.c @@ -50,7 +50,8 @@ static void drm_fbdev_dma_fb_destroy(struct fb_info *info) if (!fb_helper->dev) return; - fb_deferred_io_cleanup(info); + if (info->fbdefio) + fb_deferred_io_cleanup(info); drm_fb_helper_fini(fb_helper); drm_client_buffer_vunmap(fb_helper->buffer); diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 6980b98792c2..377939de0ff4 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -1094,7 +1094,8 @@ static void intel_hdcp_update_value(struct intel_connector *connector, hdcp->value = value; if (update_property) { drm_connector_get(&connector->base); - queue_work(i915->unordered_wq, &hdcp->prop_work); + if (!queue_work(i915->unordered_wq, &hdcp->prop_work)) + drm_connector_put(&connector->base); } } @@ -2524,7 +2525,8 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state, mutex_lock(&hdcp->mutex); hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; drm_connector_get(&connector->base); - queue_work(i915->unordered_wq, &hdcp->prop_work); + if (!queue_work(i915->unordered_wq, &hdcp->prop_work)) + drm_connector_put(&connector->base); mutex_unlock(&hdcp->mutex); } @@ -2541,7 +2543,9 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state, */ if (!desired_and_not_enabled && !content_protection_type_changed) { drm_connector_get(&connector->base); - queue_work(i915->unordered_wq, &hdcp->prop_work); + if (!queue_work(i915->unordered_wq, &hdcp->prop_work)) + drm_connector_put(&connector->base); + } } diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h index 9e6f39912368..a2055f2a014a 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h @@ -210,7 +210,7 @@ struct nvkm_gsp { } *rm; struct { - struct mutex mutex;; + struct mutex mutex; struct idr idr; } client_id; diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 1f2d649f4b96..1a072568cef6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -193,7 +193,7 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) if (!spage || !(src & MIGRATE_PFN_MIGRATE)) goto done; - dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address); + dpage = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vmf->vma, vmf->address); if (!dpage) goto done; diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index f6e78dba594f..34985771b2a2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -331,7 +331,7 @@ nouveau_accel_ce_init(struct nouveau_drm *drm) return; } - ret = nouveau_channel_new(&drm->client, false, runm, NvDmaFB, NvDmaTT, &drm->cechan); + ret = nouveau_channel_new(&drm->client, true, runm, NvDmaFB, NvDmaTT, &drm->cechan); if (ret) NV_ERROR(drm, "failed to create ce channel, %d\n", ret); } diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index fca8b08535a5..6328627b7c34 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -228,10 +228,8 @@ void radeon_dp_aux_init(struct radeon_connector *radeon_connector) { struct drm_device *dev = radeon_connector->base.dev; struct radeon_device *rdev = dev->dev_private; - int ret; radeon_connector->ddc_bus->rec.hpd = radeon_connector->hpd.hpd; - radeon_connector->ddc_bus->aux.dev = radeon_connector->base.kdev; radeon_connector->ddc_bus->aux.drm_dev = radeon_connector->base.dev; if (ASIC_IS_DCE5(rdev)) { if (radeon_auxch) @@ -242,11 +240,8 @@ void radeon_dp_aux_init(struct radeon_connector *radeon_connector) radeon_connector->ddc_bus->aux.transfer = radeon_dp_aux_transfer_atom; } - ret = drm_dp_aux_register(&radeon_connector->ddc_bus->aux); - if (!ret) - radeon_connector->ddc_bus->has_aux = true; - - WARN(ret, "drm_dp_aux_register() failed with error %d\n", ret); + drm_dp_aux_init(&radeon_connector->ddc_bus->aux); + radeon_connector->ddc_bus->has_aux = true; } /***** general DP utility functions *****/ diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 528a8f3677c2..f9c73c55f04f 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1786,6 +1786,20 @@ static enum drm_mode_status radeon_dp_mode_valid(struct drm_connector *connector return MODE_OK; } +static int +radeon_connector_late_register(struct drm_connector *connector) +{ + struct radeon_connector *radeon_connector = to_radeon_connector(connector); + int r = 0; + + if (radeon_connector->ddc_bus->has_aux) { + radeon_connector->ddc_bus->aux.dev = radeon_connector->base.kdev; + r = drm_dp_aux_register(&radeon_connector->ddc_bus->aux); + } + + return r; +} + static const struct drm_connector_helper_funcs radeon_dp_connector_helper_funcs = { .get_modes = radeon_dp_get_modes, .mode_valid = radeon_dp_mode_valid, @@ -1800,6 +1814,7 @@ static const struct drm_connector_funcs radeon_dp_connector_funcs = { .early_unregister = radeon_connector_unregister, .destroy = radeon_connector_destroy, .force = radeon_dvi_force, + .late_register = radeon_connector_late_register, }; static const struct drm_connector_funcs radeon_edp_connector_funcs = { @@ -1810,6 +1825,7 @@ static const struct drm_connector_funcs radeon_edp_connector_funcs = { .early_unregister = radeon_connector_unregister, .destroy = radeon_connector_destroy, .force = radeon_dvi_force, + .late_register = radeon_connector_late_register, }; static const struct drm_connector_funcs radeon_lvds_bridge_connector_funcs = { @@ -1820,6 +1836,7 @@ static const struct drm_connector_funcs radeon_lvds_bridge_connector_funcs = { .early_unregister = radeon_connector_unregister, .destroy = radeon_connector_destroy, .force = radeon_dvi_force, + .late_register = radeon_connector_late_register, }; void diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 9735f4968b86..bf2d4b16dc2a 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -44,8 +44,6 @@ struct sg_table *radeon_gem_prime_get_sg_table(struct drm_gem_object *obj); int radeon_gem_prime_pin(struct drm_gem_object *obj); void radeon_gem_prime_unpin(struct drm_gem_object *obj); -const struct drm_gem_object_funcs radeon_gem_object_funcs; - static vm_fault_t radeon_gem_fault(struct vm_fault *vmf) { struct ttm_buffer_object *bo = vmf->vma->vm_private_data; @@ -132,7 +130,6 @@ retry: return r; } *obj = &robj->tbo.base; - (*obj)->funcs = &radeon_gem_object_funcs; robj->pid = task_pid_nr(current); mutex_lock(&rdev->gem.mutex); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index d0e4b43d155c..7672404fdb29 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -151,6 +151,7 @@ int radeon_bo_create(struct radeon_device *rdev, if (bo == NULL) return -ENOMEM; drm_gem_private_object_init(rdev_to_drm(rdev), &bo->tbo.base, size); + bo->tbo.base.funcs = &radeon_gem_object_funcs; bo->rdev = rdev; bo->surface_reg = -1; INIT_LIST_HEAD(&bo->list); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 6f27cab0b76d..eaef20f41786 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -87,6 +87,12 @@ #define CREATE_TRACE_POINTS #include "gpu_scheduler_trace.h" +#ifdef CONFIG_LOCKDEP +static struct lockdep_map drm_sched_lockdep_map = { + .name = "drm_sched_lockdep_map" +}; +#endif + #define to_drm_sched_job(sched_job) \ container_of((sched_job), struct drm_sched_job, queue_node) @@ -1269,7 +1275,12 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, sched->submit_wq = submit_wq; sched->own_submit_wq = false; } else { +#ifdef CONFIG_LOCKDEP + sched->submit_wq = alloc_ordered_workqueue_lockdep_map(name, 0, + &drm_sched_lockdep_map); +#else sched->submit_wq = alloc_ordered_workqueue(name, 0); +#endif if (!sched->submit_wq) return -ENOMEM; diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c index cd7f1eedf17f..00cd081d7873 100644 --- a/drivers/gpu/drm/v3d/v3d_perfmon.c +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -306,6 +306,11 @@ void v3d_perfmon_open_file(struct v3d_file_priv *v3d_priv) static int v3d_perfmon_idr_del(int id, void *elem, void *data) { struct v3d_perfmon *perfmon = elem; + struct v3d_dev *v3d = (struct v3d_dev *)data; + + /* If the active perfmon is being destroyed, stop it first */ + if (perfmon == v3d->active_perfmon) + v3d_perfmon_stop(v3d, perfmon, false); v3d_perfmon_put(perfmon); @@ -314,8 +319,10 @@ static int v3d_perfmon_idr_del(int id, void *elem, void *data) void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv) { + struct v3d_dev *v3d = v3d_priv->v3d; + mutex_lock(&v3d_priv->perfmon.lock); - idr_for_each(&v3d_priv->perfmon.idr, v3d_perfmon_idr_del, NULL); + idr_for_each(&v3d_priv->perfmon.idr, v3d_perfmon_idr_del, v3d); idr_destroy(&v3d_priv->perfmon.idr); mutex_unlock(&v3d_priv->perfmon.lock); mutex_destroy(&v3d_priv->perfmon.lock); diff --git a/drivers/gpu/drm/vc4/vc4_perfmon.c b/drivers/gpu/drm/vc4/vc4_perfmon.c index c4ac2c946238..c00a5cc2316d 100644 --- a/drivers/gpu/drm/vc4/vc4_perfmon.c +++ b/drivers/gpu/drm/vc4/vc4_perfmon.c @@ -116,6 +116,11 @@ void vc4_perfmon_open_file(struct vc4_file *vc4file) static int vc4_perfmon_idr_del(int id, void *elem, void *data) { struct vc4_perfmon *perfmon = elem; + struct vc4_dev *vc4 = (struct vc4_dev *)data; + + /* If the active perfmon is being destroyed, stop it first */ + if (perfmon == vc4->active_perfmon) + vc4_perfmon_stop(vc4, perfmon, false); vc4_perfmon_put(perfmon); @@ -130,7 +135,7 @@ void vc4_perfmon_close_file(struct vc4_file *vc4file) return; mutex_lock(&vc4file->perfmon.lock); - idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL); + idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, vc4); idr_destroy(&vc4file->perfmon.idr); mutex_unlock(&vc4file->perfmon.lock); mutex_destroy(&vc4file->perfmon.lock); diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 668615c6b172..fe4319eb13fd 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -187,7 +187,7 @@ void xe_debugfs_register(struct xe_device *xe) debugfs_create_file("forcewake_all", 0400, root, xe, &forcewake_all_fops); - debugfs_create_file("wedged_mode", 0400, root, xe, + debugfs_create_file("wedged_mode", 0600, root, xe, &wedged_mode_fops); for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index f0dc2bf24c7b..ea65cf59372c 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -874,7 +874,9 @@ int xe_gt_sanitize_freq(struct xe_gt *gt) int ret = 0; if ((!xe_uc_fw_is_available(>->uc.gsc.fw) || - xe_uc_fw_is_loaded(>->uc.gsc.fw)) && XE_WA(gt, 22019338487)) + xe_uc_fw_is_loaded(>->uc.gsc.fw) || + xe_uc_fw_is_in_error_state(>->uc.gsc.fw)) && + XE_WA(gt, 22019338487)) ret = xe_guc_pc_restore_stashed_freq(>->uc.guc.pc); return ret; diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index f24dd5223926..d16eb9ab49fb 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -667,16 +667,12 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, num_g2h = 1; if (g2h_fence_needs_alloc(g2h_fence)) { - void *ptr; - g2h_fence->seqno = next_ct_seqno(ct, true); - ptr = xa_store(&ct->fence_lookup, - g2h_fence->seqno, - g2h_fence, GFP_ATOMIC); - if (IS_ERR(ptr)) { - ret = PTR_ERR(ptr); + ret = xa_err(xa_store(&ct->fence_lookup, + g2h_fence->seqno, g2h_fence, + GFP_ATOMIC)); + if (ret) goto out; - } } seqno = g2h_fence->seqno; @@ -879,14 +875,11 @@ retry: retry_same_fence: ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence); if (unlikely(ret == -ENOMEM)) { - void *ptr; - /* Retry allocation /w GFP_KERNEL */ - ptr = xa_store(&ct->fence_lookup, - g2h_fence.seqno, - &g2h_fence, GFP_KERNEL); - if (IS_ERR(ptr)) - return PTR_ERR(ptr); + ret = xa_err(xa_store(&ct->fence_lookup, g2h_fence.seqno, + &g2h_fence, GFP_KERNEL)); + if (ret) + return ret; goto retry_same_fence; } else if (unlikely(ret)) { @@ -903,16 +896,26 @@ retry_same_fence: } ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ); + + /* + * Ensure we serialize with completion side to prevent UAF with fence going out of scope on + * the stack, since we have no clue if it will fire after the timeout before we can erase + * from the xa. Also we have some dependent loads and stores below for which we need the + * correct ordering, and we lack the needed barriers. + */ + mutex_lock(&ct->lock); if (!ret) { - xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x", - g2h_fence.seqno, action[0]); + xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s", + g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done)); xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno); + mutex_unlock(&ct->lock); return -ETIME; } if (g2h_fence.retry) { xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n", action[0], g2h_fence.reason); + mutex_unlock(&ct->lock); goto retry; } if (g2h_fence.fail) { @@ -921,7 +924,12 @@ retry_same_fence: ret = -EIO; } - return ret > 0 ? response_buffer ? g2h_fence.response_len : g2h_fence.response_data : ret; + if (ret > 0) + ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data; + + mutex_unlock(&ct->lock); + + return ret; } /** diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 80062e1d3f66..63495007f336 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -224,80 +224,11 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) EXEC_QUEUE_STATE_BANNED)); } -#ifdef CONFIG_PROVE_LOCKING -static int alloc_submit_wq(struct xe_guc *guc) -{ - int i; - - for (i = 0; i < NUM_SUBMIT_WQ; ++i) { - guc->submission_state.submit_wq_pool[i] = - alloc_ordered_workqueue("submit_wq", 0); - if (!guc->submission_state.submit_wq_pool[i]) - goto err_free; - } - - return 0; - -err_free: - while (i) - destroy_workqueue(guc->submission_state.submit_wq_pool[--i]); - - return -ENOMEM; -} - -static void free_submit_wq(struct xe_guc *guc) -{ - int i; - - for (i = 0; i < NUM_SUBMIT_WQ; ++i) - destroy_workqueue(guc->submission_state.submit_wq_pool[i]); -} - -static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) -{ - int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ; - - return guc->submission_state.submit_wq_pool[idx]; -} -#else -static int alloc_submit_wq(struct xe_guc *guc) -{ - return 0; -} - -static void free_submit_wq(struct xe_guc *guc) -{ - -} - -static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) -{ - return NULL; -} -#endif - -static void xe_guc_submit_fini(struct xe_guc *guc) -{ - struct xe_device *xe = guc_to_xe(guc); - struct xe_gt *gt = guc_to_gt(guc); - int ret; - - ret = wait_event_timeout(guc->submission_state.fini_wq, - xa_empty(&guc->submission_state.exec_queue_lookup), - HZ * 5); - - drain_workqueue(xe->destroy_wq); - - xe_gt_assert(gt, ret); -} - static void guc_submit_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; - xe_guc_submit_fini(guc); xa_destroy(&guc->submission_state.exec_queue_lookup); - free_submit_wq(guc); } static void guc_submit_wedged_fini(void *arg) @@ -359,10 +290,6 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) if (err) return err; - err = alloc_submit_wq(guc); - if (err) - return err; - gt->exec_queue_ops = &guc_exec_queue_ops; xa_init(&guc->submission_state.exec_queue_lookup); @@ -393,7 +320,6 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) { int ret; - void *ptr; int i; /* @@ -413,12 +339,10 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) q->guc->id = ret; for (i = 0; i < q->width; ++i) { - ptr = xa_store(&guc->submission_state.exec_queue_lookup, - q->guc->id + i, q, GFP_NOWAIT); - if (IS_ERR(ptr)) { - ret = PTR_ERR(ptr); + ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, + q->guc->id + i, q, GFP_NOWAIT)); + if (ret) goto err_release; - } } return 0; @@ -1482,8 +1406,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(q->sched_props.job_timeout_ms); err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, - get_submit_wq(guc), - q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64, + NULL, q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64, timeout, guc_to_gt(guc)->ordered_wq, NULL, q->name, gt_to_xe(q->gt)->drm.dev); if (err) diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 69046f698271..ed150fc09ad0 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -72,13 +72,6 @@ struct xe_guc { atomic_t stopped; /** @submission_state.lock: protects submission state */ struct mutex lock; -#ifdef CONFIG_PROVE_LOCKING -#define NUM_SUBMIT_WQ 256 - /** @submission_state.submit_wq_pool: submission ordered workqueues pool */ - struct workqueue_struct *submit_wq_pool[NUM_SUBMIT_WQ]; - /** @submission_state.submit_wq_idx: submission ordered workqueue index */ - int submit_wq_idx; -#endif /** @submission_state.enabled: submission is enabled */ bool enabled; /** @submission_state.fini_wq: submit fini wait queue */