Merge tag 'amd-drm-next-5.18-2022-03-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

amd-drm-next-5.18-2022-03-09:

amdgpu:
- Misc code cleanups
- Misc display fixes
- PSR display fixes
- More RAS cleanup
- Hotplug fix
- Bump minor version for hotplug tests
- SR-IOV fixes
- GC 10.3.7 updates
- Remove some firmwares which are no longer used
- Mode2 reset refactor
- Aldebaran fixes
- Add VCN fwlog feature for VCN debugging
- CS code cleanup
- Fix clang warning
- Fix CS clean up rebase breakage

amdkfd:
- SVM fixes
- SMI event fixes and cleanups
- vmid_pasid mapping fix for gfx10.3

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220309224439.2178877-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2022-03-10 09:28:38 +10:00
commit 955ad0c8ba
84 changed files with 984 additions and 672 deletions

View File

@ -31,6 +31,17 @@
#include "amdgpu_psp.h"
#include "amdgpu_xgmi.h"
static bool aldebaran_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
{
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
adev->gmc.xgmi.connected_to_cpu))
return true;
return false;
}
static struct amdgpu_reset_handler *
aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
struct amdgpu_reset_context *reset_context)
@ -48,7 +59,7 @@ aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
}
}
if (adev->gmc.xgmi.connected_to_cpu) {
if (aldebaran_is_mode2_default(reset_ctl)) {
list_for_each_entry(handler, &reset_ctl->reset_handlers,
handler_list) {
if (handler->reset_method == AMD_RESET_METHOD_MODE2) {
@ -136,18 +147,31 @@ static int
aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
struct amdgpu_reset_context *reset_context)
{
struct amdgpu_device *tmp_adev = NULL;
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
struct amdgpu_device *tmp_adev = NULL;
struct list_head reset_device_list;
int r = 0;
dev_dbg(adev->dev, "aldebaran perform hw reset\n");
if (reset_context->hive == NULL) {
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
reset_context->hive == NULL) {
/* Wrong context, return error */
return -EINVAL;
}
list_for_each_entry(tmp_adev, &reset_context->hive->device_list,
gmc.xgmi.head) {
INIT_LIST_HEAD(&reset_device_list);
if (reset_context->hive) {
list_for_each_entry (tmp_adev,
&reset_context->hive->device_list,
gmc.xgmi.head)
list_add_tail(&tmp_adev->reset_list,
&reset_device_list);
} else {
list_add_tail(&reset_context->reset_req_dev->reset_list,
&reset_device_list);
}
list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
mutex_lock(&tmp_adev->reset_cntl->reset_lock);
tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_MODE2;
}
@ -155,8 +179,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
* Mode2 reset doesn't need any sync between nodes in XGMI hive, instead launch
* them together so that they can be completed asynchronously on multiple nodes
*/
list_for_each_entry(tmp_adev, &reset_context->hive->device_list,
gmc.xgmi.head) {
list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
/* For XGMI run all resets in parallel to speed up the process */
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
if (!queue_work(system_unbound_wq,
@ -174,9 +197,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
/* For XGMI wait for all resets to complete before proceed */
if (!r) {
list_for_each_entry(tmp_adev,
&reset_context->hive->device_list,
gmc.xgmi.head) {
list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
flush_work(&tmp_adev->reset_cntl->reset_work);
r = tmp_adev->asic_reset_res;
@ -186,8 +207,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
}
}
list_for_each_entry(tmp_adev, &reset_context->hive->device_list,
gmc.xgmi.head) {
list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
mutex_unlock(&tmp_adev->reset_cntl->reset_lock);
tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE;
}
@ -319,16 +339,30 @@ static int
aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
struct amdgpu_reset_context *reset_context)
{
int r;
struct amdgpu_device *tmp_adev = NULL;
struct list_head reset_device_list;
int r;
if (reset_context->hive == NULL) {
if (reset_context->reset_req_dev->ip_versions[MP1_HWIP][0] ==
IP_VERSION(13, 0, 2) &&
reset_context->hive == NULL) {
/* Wrong context, return error */
return -EINVAL;
}
list_for_each_entry(tmp_adev, &reset_context->hive->device_list,
gmc.xgmi.head) {
INIT_LIST_HEAD(&reset_device_list);
if (reset_context->hive) {
list_for_each_entry (tmp_adev,
&reset_context->hive->device_list,
gmc.xgmi.head)
list_add_tail(&tmp_adev->reset_list,
&reset_device_list);
} else {
list_add_tail(&reset_context->reset_req_dev->reset_list,
&reset_device_list);
}
list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
dev_info(tmp_adev->dev,
"GPU reset succeeded, trying to resume\n");
r = aldebaran_mode2_restore_ip(tmp_adev);

View File

@ -60,7 +60,6 @@
#include <drm/amdgpu_drm.h>
#include <drm/drm_gem.h>
#include <drm/drm_ioctl.h>
#include <drm/gpu_scheduler.h>
#include <kgd_kfd_interface.h>
#include "dm_pp_interface.h"
@ -233,6 +232,9 @@ extern int amdgpu_cik_support;
#endif
extern int amdgpu_num_kcq;
#define AMDGPU_VCNFW_LOG_SIZE (32 * 1024)
extern int amdgpu_vcnfw_log;
#define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
@ -274,9 +276,6 @@ extern int amdgpu_num_kcq;
#define AMDGPU_SMARTSHIFT_MIN_BIAS (-100)
struct amdgpu_device;
struct amdgpu_ib;
struct amdgpu_cs_parser;
struct amdgpu_job;
struct amdgpu_irq_src;
struct amdgpu_fpriv;
struct amdgpu_bo_va_mapping;
@ -464,20 +463,6 @@ struct amdgpu_flip_work {
};
/*
* CP & rings.
*/
struct amdgpu_ib {
struct amdgpu_sa_bo *sa_bo;
uint32_t length_dw;
uint64_t gpu_addr;
uint32_t *ptr;
uint32_t flags;
};
extern const struct drm_sched_backend_ops amdgpu_sched_ops;
/*
* file private structure
*/
@ -493,79 +478,6 @@ struct amdgpu_fpriv {
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size,
enum amdgpu_ib_pool_type pool,
struct amdgpu_ib *ib);
void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
struct dma_fence *f);
int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
struct amdgpu_ib *ibs, struct amdgpu_job *job,
struct dma_fence **f);
int amdgpu_ib_pool_init(struct amdgpu_device *adev);
void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
/*
* CS.
*/
struct amdgpu_cs_chunk {
uint32_t chunk_id;
uint32_t length_dw;
void *kdata;
};
struct amdgpu_cs_post_dep {
struct drm_syncobj *syncobj;
struct dma_fence_chain *chain;
u64 point;
};
struct amdgpu_cs_parser {
struct amdgpu_device *adev;
struct drm_file *filp;
struct amdgpu_ctx *ctx;
/* chunks */
unsigned nchunks;
struct amdgpu_cs_chunk *chunks;
/* scheduler job object */
struct amdgpu_job *job;
struct drm_sched_entity *entity;
/* buffer objects */
struct ww_acquire_ctx ticket;
struct amdgpu_bo_list *bo_list;
struct amdgpu_mn *mn;
struct amdgpu_bo_list_entry vm_pd;
struct list_head validated;
struct dma_fence *fence;
uint64_t bytes_moved_threshold;
uint64_t bytes_moved_vis_threshold;
uint64_t bytes_moved;
uint64_t bytes_moved_vis;
/* user fence */
struct amdgpu_bo_list_entry uf_entry;
unsigned num_post_deps;
struct amdgpu_cs_post_dep *post_deps;
};
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
uint32_t ib_idx, int idx)
{
return p->job->ibs[ib_idx].ptr[idx];
}
static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
uint32_t ib_idx, int idx,
uint32_t value)
{
p->job->ibs[ib_idx].ptr[idx] = value;
}
/*
* Writeback
*/
@ -1436,10 +1348,6 @@ static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { retu
static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
#endif
int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
uint64_t addr, struct amdgpu_bo **bo,
struct amdgpu_bo_va_mapping **mapping);
#if defined(CONFIG_DRM_AMD_DC)
int amdgpu_dm_display_resume(struct amdgpu_device *adev );
#else

View File

@ -26,6 +26,8 @@
#include "gc/gc_10_3_0_sh_mask.h"
#include "oss/osssys_5_0_0_offset.h"
#include "oss/osssys_5_0_0_sh_mask.h"
#include "athub/athub_2_1_0_offset.h"
#include "athub/athub_2_1_0_sh_mask.h"
#include "soc15_common.h"
#include "v10_structs.h"
#include "nv.h"
@ -606,6 +608,18 @@ static int wave_control_execute_v10_3(struct amdgpu_device *adev,
return 0;
}
static bool get_atc_vmid_pasid_mapping_info_v10_3(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid)
{
uint32_t value;
value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ vmid);
*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev,
uint32_t vmid, uint64_t page_table_base)
{
@ -788,7 +802,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.hqd_destroy = hqd_destroy_v10_3,
.hqd_sdma_destroy = hqd_sdma_destroy_v10_3,
.wave_control_execute = wave_control_execute_v10_3,
.get_atc_vmid_pasid_mapping_info = NULL,
.get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3,
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
.program_trap_handler_settings = program_trap_handler_settings_v10_3,
#if 0

View File

@ -32,6 +32,7 @@
#include <drm/amdgpu_drm.h>
#include <drm/drm_syncobj.h>
#include "amdgpu_cs.h"
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_gmc.h"
@ -782,12 +783,12 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
amdgpu_bo_kunmap(aobj);
r = amdgpu_ring_parse_cs(ring, p, j);
r = amdgpu_ring_parse_cs(ring, p, p->job, ib);
if (r)
return r;
} else {
ib->ptr = (uint32_t *)kptr;
r = amdgpu_ring_patch_cs_in_place(ring, p, j);
r = amdgpu_ring_patch_cs_in_place(ring, p, p->job, ib);
amdgpu_bo_kunmap(aobj);
if (r)
return r;

View File

@ -0,0 +1,80 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __AMDGPU_CS_H__
#define __AMDGPU_CS_H__
#include "amdgpu_job.h"
#include "amdgpu_bo_list.h"
#include "amdgpu_ring.h"
struct amdgpu_bo_va_mapping;
struct amdgpu_cs_chunk {
uint32_t chunk_id;
uint32_t length_dw;
void *kdata;
};
struct amdgpu_cs_post_dep {
struct drm_syncobj *syncobj;
struct dma_fence_chain *chain;
u64 point;
};
struct amdgpu_cs_parser {
struct amdgpu_device *adev;
struct drm_file *filp;
struct amdgpu_ctx *ctx;
/* chunks */
unsigned nchunks;
struct amdgpu_cs_chunk *chunks;
/* scheduler job object */
struct amdgpu_job *job;
struct drm_sched_entity *entity;
/* buffer objects */
struct ww_acquire_ctx ticket;
struct amdgpu_bo_list *bo_list;
struct amdgpu_mn *mn;
struct amdgpu_bo_list_entry vm_pd;
struct list_head validated;
struct dma_fence *fence;
uint64_t bytes_moved_threshold;
uint64_t bytes_moved_vis_threshold;
uint64_t bytes_moved;
uint64_t bytes_moved_vis;
/* user fence */
struct amdgpu_bo_list_entry uf_entry;
unsigned num_post_deps;
struct amdgpu_cs_post_dep *post_deps;
};
int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
uint64_t addr, struct amdgpu_bo **bo,
struct amdgpu_bo_va_mapping **mapping);
#endif

View File

@ -23,6 +23,7 @@
*/
#include <drm/drm_auth.h>
#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_sched.h"
#include "amdgpu_ras.h"
@ -204,9 +205,15 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
if (r)
goto error_free_entity;
ctx->entities[hw_ip][ring] = entity;
/* It's not an error if we fail to install the new entity */
if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity))
goto cleanup_entity;
return 0;
cleanup_entity:
drm_sched_entity_fini(&entity->entity);
error_free_entity:
kfree(entity);
@ -261,9 +268,6 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
struct amdgpu_device *adev = ctx->adev;
enum amd_dpm_forced_level current_level;
if (!ctx)
return -EINVAL;
current_level = amdgpu_dpm_get_performance_level(adev);
switch (current_level) {
@ -293,9 +297,6 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
enum amd_dpm_forced_level level;
int r;
if (!ctx)
return -EINVAL;
mutex_lock(&adev->pm.stable_pstate_ctx_lock);
if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) {
r = -EBUSY;
@ -339,7 +340,7 @@ static void amdgpu_ctx_fini(struct kref *ref)
{
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
struct amdgpu_device *adev = ctx->adev;
unsigned i, j;
unsigned i, j, idx;
if (!adev)
return;
@ -350,7 +351,12 @@ static void amdgpu_ctx_fini(struct kref *ref)
ctx->entities[i][j] = NULL;
}
}
amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE);
if (drm_dev_enter(&adev->ddev, &idx)) {
amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE);
drm_dev_exit(idx);
}
kfree(ctx);
}

View File

@ -1678,7 +1678,7 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
{
struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
char reg_offset[11];
uint32_t *tmp;
uint32_t *new, *tmp = NULL;
int ret, i = 0, len = 0;
do {
@ -1689,7 +1689,12 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
goto error_free;
}
tmp = krealloc_array(tmp, i + 1, sizeof(uint32_t), GFP_KERNEL);
new = krealloc_array(tmp, i + 1, sizeof(uint32_t), GFP_KERNEL);
if (!new) {
ret = -ENOMEM;
goto error_free;
}
tmp = new;
if (sscanf(reg_offset, "%X %n", &tmp[i], &ret) != 1) {
ret = -EINVAL;
goto error_free;
@ -1773,6 +1778,16 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
amdgpu_debugfs_ring_init(adev, ring);
}
for ( i = 0; i < adev->vcn.num_vcn_inst; i++) {
if (!amdgpu_vcnfw_log)
break;
if (adev->vcn.harvest_config & (1 << i))
continue;
amdgpu_debugfs_vcn_fwlog_init(adev, i, &adev->vcn.inst[i]);
}
amdgpu_ras_debugfs_create_all(adev);
amdgpu_rap_debugfs_init(adev);
amdgpu_securedisplay_debugfs_init(adev);

View File

@ -80,12 +80,7 @@ MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/yellow_carp_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
#define AMDGPU_MAX_RETRY_LIMIT 2
@ -1554,7 +1549,6 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
amdgpu_gmc_tmz_set(adev);
amdgpu_gmc_noretry_set(adev);
return 0;
}
@ -1992,27 +1986,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
case CHIP_ARCTURUS:
chip_name = "arcturus";
break;
case CHIP_RENOIR:
if (adev->apu_flags & AMD_APU_IS_RENOIR)
chip_name = "renoir";
else
chip_name = "green_sardine";
break;
case CHIP_NAVI10:
chip_name = "navi10";
break;
case CHIP_NAVI14:
chip_name = "navi14";
break;
case CHIP_NAVI12:
chip_name = "navi12";
break;
case CHIP_VANGOGH:
chip_name = "vangogh";
break;
case CHIP_YELLOW_CARP:
chip_name = "yellow_carp";
break;
}
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
@ -3711,6 +3687,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
amdgpu_gmc_noretry_set(adev);
/* Need to get xgmi info early to decide the reset behavior*/
if (adev->gmc.xgmi.supported) {
r = adev->gfxhub.funcs->get_xgmi_info(adev);

View File

@ -691,9 +691,9 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
return -EINVAL;
}
if (adev->asic_type >= CHIP_SIENNA_CICHLID)
if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
version = AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS;
else if (adev->family == AMDGPU_FAMILY_NV)
else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0))
version = AMD_FMT_MOD_TILE_VER_GFX10;
else
version = AMD_FMT_MOD_TILE_VER_GFX9;
@ -787,7 +787,7 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
if (adev->family >= AMDGPU_FAMILY_NV) {
int extra_pipe = 0;
if (adev->asic_type >= CHIP_SIENNA_CICHLID &&
if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) &&
pipes == packers && pipes > 1)
extra_pipe = 1;

View File

@ -100,9 +100,10 @@
* - 3.43.0 - Add device hot plug/unplug support
* - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B
* - 3.45.0 - Add context ioctl stable pstate interface
* * 3.46.0 - To enable hot plug amdgpu tests in libdrm
*/
#define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 45
#define KMS_DRIVER_MINOR 46
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit;
@ -177,6 +178,7 @@ int amdgpu_reset_method = -1; /* auto */
int amdgpu_num_kcq = -1;
int amdgpu_smartshift_bias;
int amdgpu_use_xgmi_p2p = 1;
int amdgpu_vcnfw_log;
static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
@ -854,6 +856,13 @@ module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444);
MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
/**
* DOC: vcnfw_log (int)
* Enable vcnfw log output for debugging, the default is disabled.
*/
MODULE_PARM_DESC(vcnfw_log, "Enable vcnfw log(0 = disable (default value), 1 = enable)");
module_param_named(vcnfw_log, amdgpu_vcnfw_log, int, 0444);
/**
* DOC: smu_pptable_id (int)
* Used to override pptable id. id = 0 use VBIOS pptable.

View File

@ -644,13 +644,6 @@ late_fini:
return r;
}
void amdgpu_gfx_ras_fini(struct amdgpu_device *adev)
{
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
adev->gfx.ras_if)
amdgpu_ras_block_late_fini(adev, adev->gfx.ras_if);
}
int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry)

View File

@ -387,7 +387,6 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry);

View File

@ -454,17 +454,7 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
{
if (adev->umc.ras && adev->umc.ras->ras_block.ras_fini)
adev->umc.ras->ras_block.ras_fini(adev);
if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini)
adev->mmhub.ras->ras_block.ras_fini(adev);
if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini)
adev->gmc.xgmi.ras->ras_block.ras_fini(adev);
if (adev->hdp.ras && adev->hdp.ras->ras_block.ras_fini)
adev->hdp.ras->ras_block.ras_fini(adev);
}
/*
@ -569,11 +559,11 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
{
struct amdgpu_gmc *gmc = &adev->gmc;
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA20:
case CHIP_ARCTURUS:
case CHIP_ALDEBARAN:
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 4, 0):
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
/*
* noretry = 0 will cause kfd page fault tests fail
* for some ASICs, so set default to 1 for these ASICs.
@ -583,7 +573,6 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
else
gmc->noretry = amdgpu_noretry;
break;
case CHIP_RAVEN:
default:
/* Raven currently has issues with noretry
* regardless of what we decide for other

View File

@ -24,9 +24,7 @@
#include "amdgpu.h"
#include "amdgpu_ras.h"
void amdgpu_hdp_ras_fini(struct amdgpu_device *adev)
void amdgpu_hdp_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block)
{
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) &&
adev->hdp.ras_if)
amdgpu_ras_block_late_fini(adev, adev->hdp.ras_if);
}

View File

@ -44,5 +44,4 @@ struct amdgpu_hdp {
};
int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
void amdgpu_hdp_ras_fini(struct amdgpu_device *adev);
#endif /* __AMDGPU_HDP_H__ */

View File

@ -204,7 +204,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
unsigned i;
int r;
if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
if (!dma_fence_is_signaled(ring->vmid_wait))
return amdgpu_sync_fence(sync, ring->vmid_wait);
fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL);

View File

@ -81,14 +81,10 @@ exit:
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
struct amdgpu_job **job, struct amdgpu_vm *vm)
{
size_t size = sizeof(struct amdgpu_job);
if (num_ibs == 0)
return -EINVAL;
size += sizeof(struct amdgpu_ib) * num_ibs;
*job = kzalloc(size, GFP_KERNEL);
*job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL);
if (!*job)
return -ENOMEM;
@ -98,7 +94,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
*/
(*job)->base.sched = &adev->rings[0]->sched;
(*job)->vm = vm;
(*job)->ibs = (void *)&(*job)[1];
(*job)->num_ibs = num_ibs;
amdgpu_sync_create(&(*job)->sync);

View File

@ -23,6 +23,10 @@
#ifndef __AMDGPU_JOB_H__
#define __AMDGPU_JOB_H__
#include <drm/gpu_scheduler.h>
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
/* bit set means command submit involves a preamble IB */
#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0)
/* bit set means preamble IB is first presented in belonging context */
@ -45,12 +49,10 @@ struct amdgpu_job {
struct amdgpu_vm *vm;
struct amdgpu_sync sync;
struct amdgpu_sync sched_sync;
struct amdgpu_ib *ibs;
struct dma_fence hw_fence;
struct dma_fence *external_hw_fence;
uint32_t preamble_status;
uint32_t preemption_status;
uint32_t num_ibs;
bool vm_needs_flush;
uint64_t vm_pd_addr;
unsigned vmid;
@ -66,6 +68,9 @@ struct amdgpu_job {
/* job_run_counter >= 1 means a resubmit job */
uint32_t job_run_counter;
uint32_t num_ibs;
struct amdgpu_ib ibs[];
};
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,

View File

@ -70,9 +70,3 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
amdgpu_mca_reset_error_count(adev, mc_status_addr);
}
void amdgpu_mca_ras_fini(struct amdgpu_device *adev,
struct amdgpu_mca_ras *mca_dev)
{
amdgpu_ras_block_late_fini(adev, mca_dev->ras_if);
}

View File

@ -56,7 +56,4 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
uint64_t mc_status_addr,
void *ras_error_status);
void amdgpu_mca_ras_fini(struct amdgpu_device *adev,
struct amdgpu_mca_ras *mca_dev);
#endif

View File

@ -24,9 +24,7 @@
#include "amdgpu.h"
#include "amdgpu_ras.h"
void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev)
void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block)
{
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) &&
adev->mmhub.ras_if)
amdgpu_ras_block_late_fini(adev, adev->mmhub.ras_if);
}

View File

@ -47,6 +47,5 @@ struct amdgpu_mmhub {
struct amdgpu_mmhub_ras *ras;
};
void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev);
#endif

View File

@ -43,10 +43,3 @@ late_fini:
amdgpu_ras_block_late_fini(adev, ras_block);
return r;
}
void amdgpu_nbio_ras_fini(struct amdgpu_device *adev)
{
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) &&
adev->nbio.ras_if)
amdgpu_ras_block_late_fini(adev, adev->nbio.ras_if);
}

View File

@ -105,5 +105,4 @@ struct amdgpu_nbio {
};
int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
void amdgpu_nbio_ras_fini(struct amdgpu_device *adev);
#endif

View File

@ -277,7 +277,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
ret = psp_init_cap_microcode(psp, "sienna_cichlid");
break;
case IP_VERSION(13, 0, 2):
ret = psp_init_ta_microcode(psp, "aldebaran");
ret = psp_init_cap_microcode(psp, "aldebaran");
break;
default:
BUG();

View File

@ -2477,6 +2477,12 @@ void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
amdgpu_ras_interrupt_remove_handler(adev, ras_block);
}
static void amdgpu_ras_block_late_fini_default(struct amdgpu_device *adev,
struct ras_common_if *ras_block)
{
return amdgpu_ras_block_late_fini(adev, ras_block);
}
/* do some init work after IP late init as dependence.
* and it runs in resume/gpu reset/booting up cases.
*/
@ -2572,11 +2578,27 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
int amdgpu_ras_fini(struct amdgpu_device *adev)
{
struct amdgpu_ras_block_list *ras_node, *tmp;
struct amdgpu_ras_block_object *obj = NULL;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
if (!adev->ras_enabled || !con)
return 0;
list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) {
if (ras_node->ras_obj) {
obj = ras_node->ras_obj;
if (amdgpu_ras_is_supported(adev, obj->ras_comm.block) &&
obj->ras_fini)
obj->ras_fini(adev, &obj->ras_comm);
else
amdgpu_ras_block_late_fini_default(adev, &obj->ras_comm);
}
/* Clear ras blocks from ras_list and free ras block list node */
list_del(&ras_node->node);
kfree(ras_node);
}
amdgpu_ras_fs_fini(adev);
amdgpu_ras_interrupt_remove_all(adev);
@ -2590,12 +2612,6 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
amdgpu_ras_set_context(adev, NULL);
kfree(con);
/* Clear ras blocks from ras_list and free ras block list node */
list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) {
list_del(&ras_node->node);
kfree(ras_node);
}
return 0;
}

View File

@ -491,7 +491,7 @@ struct amdgpu_ras_block_object {
int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj,
enum amdgpu_ras_block block, uint32_t sub_block_index);
int (*ras_late_init)(struct amdgpu_device *adev, struct ras_common_if *ras_block);
void (*ras_fini)(struct amdgpu_device *adev);
void (*ras_fini)(struct amdgpu_device *adev, struct ras_common_if *ras_block);
ras_ih_cb ras_cb;
const struct amdgpu_ras_block_hw_ops *hw_ops;
};

View File

@ -36,8 +36,8 @@ int amdgpu_reset_init(struct amdgpu_device *adev)
{
int ret = 0;
switch (adev->asic_type) {
case CHIP_ALDEBARAN:
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 2):
ret = aldebaran_reset_init(adev);
break;
default:
@ -51,8 +51,8 @@ int amdgpu_reset_fini(struct amdgpu_device *adev)
{
int ret = 0;
switch (adev->asic_type) {
case CHIP_ALDEBARAN:
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 2):
ret = aldebaran_reset_fini(adev);
break;
default:

View File

@ -193,6 +193,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
adev->rings[ring->idx] = ring;
ring->num_hw_submission = sched_hw_submission;
ring->sched_score = sched_score;
ring->vmid_wait = dma_fence_get_stub();
r = amdgpu_fence_driver_init_ring(ring);
if (r)
return r;

View File

@ -28,6 +28,13 @@
#include <drm/gpu_scheduler.h>
#include <drm/drm_print.h>
struct amdgpu_device;
struct amdgpu_ring;
struct amdgpu_ib;
struct amdgpu_cs_parser;
struct amdgpu_job;
struct amdgpu_vm;
/* max number of rings */
#define AMDGPU_MAX_RINGS 28
#define AMDGPU_MAX_HWIP_RINGS 8
@ -82,11 +89,13 @@ enum amdgpu_ib_pool_type {
AMDGPU_IB_POOL_MAX
};
struct amdgpu_device;
struct amdgpu_ring;
struct amdgpu_ib;
struct amdgpu_cs_parser;
struct amdgpu_job;
struct amdgpu_ib {
struct amdgpu_sa_bo *sa_bo;
uint32_t length_dw;
uint64_t gpu_addr;
uint32_t *ptr;
uint32_t flags;
};
struct amdgpu_sched {
u32 num_scheds;
@ -111,6 +120,8 @@ struct amdgpu_fence_driver {
struct dma_fence **fences;
};
extern const struct drm_sched_backend_ops amdgpu_sched_ops;
void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
@ -152,8 +163,12 @@ struct amdgpu_ring_funcs {
u64 (*get_wptr)(struct amdgpu_ring *ring);
void (*set_wptr)(struct amdgpu_ring *ring);
/* validating and patching of IBs */
int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
int (*parse_cs)(struct amdgpu_cs_parser *p,
struct amdgpu_job *job,
struct amdgpu_ib *ib);
int (*patch_cs_in_place)(struct amdgpu_cs_parser *p,
struct amdgpu_job *job,
struct amdgpu_ib *ib);
/* constants to calculate how many DW are needed for an emit */
unsigned emit_frame_size;
unsigned emit_ib_size;
@ -253,8 +268,8 @@ struct amdgpu_ring {
atomic_t *sched_score;
};
#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib)))
#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
#define amdgpu_ring_patch_cs_in_place(r, p, job, ib) ((r)->funcs->patch_cs_in_place((p), (job), (ib)))
#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
@ -352,4 +367,29 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, int idx)
{
return ib->ptr[idx];
}
static inline void amdgpu_ib_set_value(struct amdgpu_ib *ib, int idx,
uint32_t value)
{
ib->ptr[idx] = value;
}
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size,
enum amdgpu_ib_pool_type pool,
struct amdgpu_ib *ib);
void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
struct dma_fence *f);
int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
struct amdgpu_ib *ibs, struct amdgpu_job *job,
struct dma_fence **f);
int amdgpu_ib_pool_init(struct amdgpu_device *adev);
void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
#endif

View File

@ -111,13 +111,6 @@ late_fini:
return r;
}
void amdgpu_sdma_ras_fini(struct amdgpu_device *adev)
{
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) &&
adev->sdma.ras_if)
amdgpu_ras_block_late_fini(adev, adev->sdma.ras_if);
}
int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry)

View File

@ -118,7 +118,6 @@ int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index);
uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid);
int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
struct ras_common_if *ras_block);
void amdgpu_sdma_ras_fini(struct amdgpu_device *adev);
int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry);

View File

@ -23,6 +23,7 @@
*/
#include <drm/amdgpu_drm.h>
#include "amdgpu_cs.h"
#include "amdgpu.h"
#define CREATE_TRACE_POINTS

View File

@ -343,7 +343,8 @@ union amdgpu_firmware_header {
* fw loading support
*/
enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_SDMA0 = 0,
AMDGPU_UCODE_ID_CAP = 0,
AMDGPU_UCODE_ID_SDMA0,
AMDGPU_UCODE_ID_SDMA1,
AMDGPU_UCODE_ID_SDMA2,
AMDGPU_UCODE_ID_SDMA3,
@ -378,7 +379,6 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_VCN0_RAM,
AMDGPU_UCODE_ID_VCN1_RAM,
AMDGPU_UCODE_ID_DMCUB,
AMDGPU_UCODE_ID_CAP,
AMDGPU_UCODE_ID_MAXIMUM,
};

View File

@ -162,13 +162,6 @@ late_fini:
return r;
}
void amdgpu_umc_ras_fini(struct amdgpu_device *adev)
{
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) &&
adev->umc.ras_if)
amdgpu_ras_block_late_fini(adev, adev->umc.ras_if);
}
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)

View File

@ -73,7 +73,6 @@ struct amdgpu_umc {
};
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
void amdgpu_umc_ras_fini(struct amdgpu_device *adev);
int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
void *ras_error_status,
bool reset);

View File

@ -37,6 +37,7 @@
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_uvd.h"
#include "amdgpu_cs.h"
#include "cikd.h"
#include "uvd/uvd_4_2_d.h"
@ -98,7 +99,7 @@ struct amdgpu_uvd_cs_ctx {
unsigned reg, count;
unsigned data0, data1;
unsigned idx;
unsigned ib_idx;
struct amdgpu_ib *ib;
/* does the IB has a msg command */
bool has_msg_cmd;
@ -557,8 +558,8 @@ static u64 amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx *ctx)
uint32_t lo, hi;
uint64_t addr;
lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0);
hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1);
lo = amdgpu_ib_get_value(ctx->ib, ctx->data0);
hi = amdgpu_ib_get_value(ctx->ib, ctx->data1);
addr = ((uint64_t)lo) | (((uint64_t)hi) << 32);
return addr;
@ -589,7 +590,7 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
if (!ctx->parser->adev->uvd.address_64_bit) {
/* check if it's a message or feedback command */
cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1;
cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1;
if (cmd == 0x0 || cmd == 0x3) {
/* yes, force it into VRAM */
uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
@ -927,12 +928,10 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
start += addr;
amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0,
lower_32_bits(start));
amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data1,
upper_32_bits(start));
amdgpu_ib_set_value(ctx->ib, ctx->data0, lower_32_bits(start));
amdgpu_ib_set_value(ctx->ib, ctx->data1, upper_32_bits(start));
cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1;
cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1;
if (cmd < 0x4) {
if ((end - start) < ctx->buf_sizes[cmd]) {
DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
@ -992,14 +991,13 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
{
struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
int i, r;
ctx->idx++;
for (i = 0; i <= ctx->count; ++i) {
unsigned reg = ctx->reg + i;
if (ctx->idx >= ib->length_dw) {
if (ctx->idx >= ctx->ib->length_dw) {
DRM_ERROR("Register command after end of CS!\n");
return -EINVAL;
}
@ -1039,11 +1037,10 @@ static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
{
struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
int r;
for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) {
uint32_t cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx);
for (ctx->idx = 0 ; ctx->idx < ctx->ib->length_dw; ) {
uint32_t cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx);
unsigned type = CP_PACKET_GET_TYPE(cmd);
switch (type) {
case PACKET_TYPE0:
@ -1068,11 +1065,14 @@ static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
* amdgpu_uvd_ring_parse_cs - UVD command submission parser
*
* @parser: Command submission parser context
* @ib_idx: Which indirect buffer to use
* @job: the job to parse
* @ib: the IB to patch
*
* Parse the command stream, patch in addresses as necessary.
*/
int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
struct amdgpu_uvd_cs_ctx ctx = {};
unsigned buf_sizes[] = {
@ -1082,10 +1082,9 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
[0x00000003] = 2048,
[0x00000004] = 0xFFFFFFFF,
};
struct amdgpu_ib *ib = &parser->job->ibs[ib_idx];
int r;
parser->job->vm = NULL;
job->vm = NULL;
ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
if (ib->length_dw % 16) {
@ -1096,7 +1095,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
ctx.parser = parser;
ctx.buf_sizes = buf_sizes;
ctx.ib_idx = ib_idx;
ctx.ib = ib;
/* first round only required on chips without UVD 64 bit address support */
if (!parser->adev->uvd.address_64_bit) {

View File

@ -82,7 +82,9 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
bool direct, struct dma_fence **fence);
void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
struct drm_file *filp);
int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx);
int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_job *job,
struct amdgpu_ib *ib);
void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring);
void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring);
int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout);

View File

@ -34,6 +34,7 @@
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_vce.h"
#include "amdgpu_cs.h"
#include "cikd.h"
/* 1 second timeout */
@ -587,8 +588,7 @@ err:
/**
* amdgpu_vce_validate_bo - make sure not to cross 4GB boundary
*
* @p: parser context
* @ib_idx: indirect buffer to use
* @ib: indirect buffer to use
* @lo: address of lower dword
* @hi: address of higher dword
* @size: minimum size
@ -596,8 +596,9 @@ err:
*
* Make sure that no BO cross a 4GB boundary.
*/
static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
int lo, int hi, unsigned size, int32_t index)
static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
struct amdgpu_ib *ib, int lo, int hi,
unsigned size, int32_t index)
{
int64_t offset = ((uint64_t)size) * ((int64_t)index);
struct ttm_operation_ctx ctx = { false, false };
@ -607,8 +608,8 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
uint64_t addr;
int r;
addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) |
((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32;
if (index >= 0) {
addr += offset;
fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT;
@ -638,7 +639,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
* amdgpu_vce_cs_reloc - command submission relocation
*
* @p: parser context
* @ib_idx: indirect buffer to use
* @ib: indirect buffer to use
* @lo: address of lower dword
* @hi: address of higher dword
* @size: minimum size
@ -646,7 +647,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
*
* Patch relocation inside command stream with real buffer address
*/
static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib,
int lo, int hi, unsigned size, uint32_t index)
{
struct amdgpu_bo_va_mapping *mapping;
@ -657,8 +658,8 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
if (index == 0xffffffff)
index = 0;
addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) |
((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32;
addr += ((uint64_t)size) * ((uint64_t)index);
r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
@ -679,8 +680,8 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
addr += amdgpu_bo_gpu_offset(bo);
addr -= ((uint64_t)size) * ((uint64_t)index);
amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr));
amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr));
amdgpu_ib_set_value(ib, lo, lower_32_bits(addr));
amdgpu_ib_set_value(ib, hi, upper_32_bits(addr));
return 0;
}
@ -729,11 +730,13 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
* amdgpu_vce_ring_parse_cs - parse and validate the command stream
*
* @p: parser context
* @ib_idx: indirect buffer to use
* @job: the job to parse
* @ib: the IB to patch
*/
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
unsigned fb_idx = 0, bs_idx = 0;
int session_idx = -1;
uint32_t destroyed = 0;
@ -744,12 +747,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
unsigned idx;
int i, r = 0;
p->job->vm = NULL;
job->vm = NULL;
ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
for (idx = 0; idx < ib->length_dw;) {
uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
uint32_t len = amdgpu_ib_get_value(ib, idx);
uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
if ((len < 8) || (len & 3)) {
DRM_ERROR("invalid VCE command length (%d)!\n", len);
@ -759,52 +762,52 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
switch (cmd) {
case 0x00000002: /* task info */
fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
fb_idx = amdgpu_ib_get_value(ib, idx + 6);
bs_idx = amdgpu_ib_get_value(ib, idx + 7);
break;
case 0x03000001: /* encode */
r = amdgpu_vce_validate_bo(p, ib_idx, idx + 10,
idx + 9, 0, 0);
r = amdgpu_vce_validate_bo(p, ib, idx + 10, idx + 9,
0, 0);
if (r)
goto out;
r = amdgpu_vce_validate_bo(p, ib_idx, idx + 12,
idx + 11, 0, 0);
r = amdgpu_vce_validate_bo(p, ib, idx + 12, idx + 11,
0, 0);
if (r)
goto out;
break;
case 0x05000001: /* context buffer */
r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
idx + 2, 0, 0);
r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
0, 0);
if (r)
goto out;
break;
case 0x05000004: /* video bitstream buffer */
tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
tmp = amdgpu_ib_get_value(ib, idx + 4);
r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
tmp, bs_idx);
if (r)
goto out;
break;
case 0x05000005: /* feedback buffer */
r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
4096, fb_idx);
if (r)
goto out;
break;
case 0x0500000d: /* MV buffer */
r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
idx + 2, 0, 0);
r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
0, 0);
if (r)
goto out;
r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8,
idx + 7, 0, 0);
r = amdgpu_vce_validate_bo(p, ib, idx + 8, idx + 7,
0, 0);
if (r)
goto out;
break;
@ -814,12 +817,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
}
for (idx = 0; idx < ib->length_dw;) {
uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
uint32_t len = amdgpu_ib_get_value(ib, idx);
uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
switch (cmd) {
case 0x00000001: /* session */
handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
handle = amdgpu_ib_get_value(ib, idx + 2);
session_idx = amdgpu_vce_validate_handle(p, handle,
&allocated);
if (session_idx < 0) {
@ -830,8 +833,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
break;
case 0x00000002: /* task info */
fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
fb_idx = amdgpu_ib_get_value(ib, idx + 6);
bs_idx = amdgpu_ib_get_value(ib, idx + 7);
break;
case 0x01000001: /* create */
@ -846,8 +849,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
goto out;
}
*size = amdgpu_get_ib_value(p, ib_idx, idx + 8) *
amdgpu_get_ib_value(p, ib_idx, idx + 10) *
*size = amdgpu_ib_get_value(ib, idx + 8) *
amdgpu_ib_get_value(ib, idx + 10) *
8 * 3 / 2;
break;
@ -876,12 +879,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
break;
case 0x03000001: /* encode */
r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
r = amdgpu_vce_cs_reloc(p, ib, idx + 10, idx + 9,
*size, 0);
if (r)
goto out;
r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11,
r = amdgpu_vce_cs_reloc(p, ib, idx + 12, idx + 11,
*size / 3, 0);
if (r)
goto out;
@ -892,35 +895,35 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
break;
case 0x05000001: /* context buffer */
r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
*size * 2, 0);
if (r)
goto out;
break;
case 0x05000004: /* video bitstream buffer */
tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
tmp = amdgpu_ib_get_value(ib, idx + 4);
r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
tmp, bs_idx);
if (r)
goto out;
break;
case 0x05000005: /* feedback buffer */
r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
4096, fb_idx);
if (r)
goto out;
break;
case 0x0500000d: /* MV buffer */
r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3,
idx + 2, *size, 0);
r = amdgpu_vce_cs_reloc(p, ib, idx + 3,
idx + 2, *size, 0);
if (r)
goto out;
r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8,
idx + 7, *size / 12, 0);
r = amdgpu_vce_cs_reloc(p, ib, idx + 8,
idx + 7, *size / 12, 0);
if (r)
goto out;
break;
@ -965,11 +968,13 @@ out:
* amdgpu_vce_ring_parse_cs_vm - parse the command stream in VM mode
*
* @p: parser context
* @ib_idx: indirect buffer to use
* @job: the job to parse
* @ib: the IB to patch
*/
int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
int session_idx = -1;
uint32_t destroyed = 0;
uint32_t created = 0;
@ -978,8 +983,8 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
int i, r = 0, idx = 0;
while (idx < ib->length_dw) {
uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
uint32_t len = amdgpu_ib_get_value(ib, idx);
uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
if ((len < 8) || (len & 3)) {
DRM_ERROR("invalid VCE command length (%d)!\n", len);
@ -989,7 +994,7 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
switch (cmd) {
case 0x00000001: /* session */
handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
handle = amdgpu_ib_get_value(ib, idx + 2);
session_idx = amdgpu_vce_validate_handle(p, handle,
&allocated);
if (session_idx < 0) {

View File

@ -59,8 +59,11 @@ int amdgpu_vce_entity_init(struct amdgpu_device *adev);
int amdgpu_vce_suspend(struct amdgpu_device *adev);
int amdgpu_vce_resume(struct amdgpu_device *adev);
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
struct amdgpu_ib *ib);
int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
struct amdgpu_job *job,
struct amdgpu_ib *ib);
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
struct amdgpu_ib *ib, uint32_t flags);
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,

View File

@ -27,6 +27,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/debugfs.h>
#include <drm/drm_drv.h>
#include "amdgpu.h"
@ -79,6 +80,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
const char *fw_name;
const struct common_firmware_header *hdr;
unsigned char fw_check;
unsigned int fw_shared_size, log_offset;
int i, r;
INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
@ -226,7 +228,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
bo_size += AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
log_offset = offsetof(struct amdgpu_fw_shared, fw_log);
bo_size += fw_shared_size;
if (amdgpu_vcnfw_log)
bo_size += AMDGPU_VCNFW_LOG_SIZE;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
if (adev->vcn.harvest_config & (1 << i))
@ -240,10 +247,18 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
return r;
}
adev->vcn.inst[i].fw_shared_cpu_addr = adev->vcn.inst[i].cpu_addr +
bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
adev->vcn.inst[i].fw_shared_gpu_addr = adev->vcn.inst[i].gpu_addr +
bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr +
bo_size - fw_shared_size;
adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr +
bo_size - fw_shared_size;
adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size;
if (amdgpu_vcnfw_log) {
adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
adev->vcn.inst[i].fw_shared.log_offset = log_offset;
}
if (adev->vcn.indirect_sram) {
r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
@ -979,3 +994,112 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
dev_info(adev->dev, "Will use PSP to load VCN firmware\n");
}
}
/*
* debugfs for mapping vcn firmware log buffer.
*/
#if defined(CONFIG_DEBUG_FS)
static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
struct amdgpu_vcn_inst *vcn;
void *log_buf;
volatile struct amdgpu_vcn_fwlog *plog;
unsigned int read_pos, write_pos, available, i, read_bytes = 0;
unsigned int read_num[2] = {0};
vcn = file_inode(f)->i_private;
if (!vcn)
return -ENODEV;
if (!vcn->fw_shared.cpu_addr || !amdgpu_vcnfw_log)
return -EFAULT;
log_buf = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size;
plog = (volatile struct amdgpu_vcn_fwlog *)log_buf;
read_pos = plog->rptr;
write_pos = plog->wptr;
if (read_pos > AMDGPU_VCNFW_LOG_SIZE || write_pos > AMDGPU_VCNFW_LOG_SIZE)
return -EFAULT;
if (!size || (read_pos == write_pos))
return 0;
if (write_pos > read_pos) {
available = write_pos - read_pos;
read_num[0] = min(size, (size_t)available);
} else {
read_num[0] = AMDGPU_VCNFW_LOG_SIZE - read_pos;
available = read_num[0] + write_pos - plog->header_size;
if (size > available)
read_num[1] = write_pos - plog->header_size;
else if (size > read_num[0])
read_num[1] = size - read_num[0];
else
read_num[0] = size;
}
for (i = 0; i < 2; i++) {
if (read_num[i]) {
if (read_pos == AMDGPU_VCNFW_LOG_SIZE)
read_pos = plog->header_size;
if (read_num[i] == copy_to_user((buf + read_bytes),
(log_buf + read_pos), read_num[i]))
return -EFAULT;
read_bytes += read_num[i];
read_pos += read_num[i];
}
}
plog->rptr = read_pos;
*pos += read_bytes;
return read_bytes;
}
static const struct file_operations amdgpu_debugfs_vcnfwlog_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_vcn_fwlog_read,
.llseek = default_llseek
};
#endif
void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i,
struct amdgpu_vcn_inst *vcn)
{
#if defined(CONFIG_DEBUG_FS)
struct drm_minor *minor = adev_to_drm(adev)->primary;
struct dentry *root = minor->debugfs_root;
char name[32];
sprintf(name, "amdgpu_vcn_%d_fwlog", i);
debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, vcn,
&amdgpu_debugfs_vcnfwlog_fops,
AMDGPU_VCNFW_LOG_SIZE);
#endif
}
void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn)
{
#if defined(CONFIG_DEBUG_FS)
volatile uint32_t *flag = vcn->fw_shared.cpu_addr;
void *fw_log_cpu_addr = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size;
uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size;
volatile struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr;
volatile struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr
+ vcn->fw_shared.log_offset;
*flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG);
fw_log->is_enabled = 1;
fw_log->addr_lo = cpu_to_le32(fw_log_gpu_addr & 0xFFFFFFFF);
fw_log->addr_hi = cpu_to_le32(fw_log_gpu_addr >> 32);
fw_log->size = cpu_to_le32(AMDGPU_VCNFW_LOG_SIZE);
log_buf->header_size = sizeof(struct amdgpu_vcn_fwlog);
log_buf->buffer_size = AMDGPU_VCNFW_LOG_SIZE;
log_buf->rptr = log_buf->header_size;
log_buf->wptr = log_buf->header_size;
log_buf->wrapped = 0;
#endif
}

View File

@ -158,6 +158,7 @@
#define AMDGPU_VCN_FW_SHARED_FLAG_0_RB (1 << 6)
#define AMDGPU_VCN_MULTI_QUEUE_FLAG (1 << 8)
#define AMDGPU_VCN_SW_RING_FLAG (1 << 9)
#define AMDGPU_VCN_FW_LOGGING_FLAG (1 << 10)
#define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001
#define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001
@ -205,6 +206,13 @@ struct amdgpu_vcn_reg{
unsigned scratch9;
};
struct amdgpu_vcn_fw_shared {
void *cpu_addr;
uint64_t gpu_addr;
uint32_t mem_size;
uint32_t log_offset;
};
struct amdgpu_vcn_inst {
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
@ -221,8 +229,7 @@ struct amdgpu_vcn_inst {
uint64_t dpg_sram_gpu_addr;
uint32_t *dpg_sram_curr_addr;
atomic_t dpg_enc_submission_cnt;
void *fw_shared_cpu_addr;
uint64_t fw_shared_gpu_addr;
struct amdgpu_vcn_fw_shared fw_shared;
};
struct amdgpu_vcn {
@ -265,6 +272,13 @@ struct amdgpu_fw_shared_sw_ring {
uint8_t padding[3];
};
struct amdgpu_fw_shared_fw_logging {
uint8_t is_enabled;
uint32_t addr_lo;
uint32_t addr_hi;
uint32_t size;
};
struct amdgpu_fw_shared {
uint32_t present_flag_0;
uint8_t pad[44];
@ -272,6 +286,15 @@ struct amdgpu_fw_shared {
uint8_t pad1[1];
struct amdgpu_fw_shared_multi_queue multi_queue;
struct amdgpu_fw_shared_sw_ring sw_ring;
struct amdgpu_fw_shared_fw_logging fw_log;
};
struct amdgpu_vcn_fwlog {
uint32_t rptr;
uint32_t wptr;
uint32_t buffer_size;
uint32_t header_size;
uint8_t wrapped;
};
struct amdgpu_vcn_decode_buffer {
@ -313,4 +336,7 @@ enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring);
void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev);
void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn);
void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev,
uint8_t i, struct amdgpu_vcn_inst *vcn);
#endif

View File

@ -779,7 +779,8 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
amdgpu_vm_eviction_lock(vm);
ret = !vm->evicting;
amdgpu_vm_eviction_unlock(vm);
return ret;
return ret && list_empty(&vm->evicted);
}
/**

View File

@ -768,13 +768,6 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_comm
return amdgpu_ras_block_late_init(adev, ras_block);
}
static void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev)
{
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) &&
adev->gmc.xgmi.ras_if)
amdgpu_ras_block_late_fini(adev, adev->gmc.xgmi.ras_if);
}
uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
uint64_t addr)
{
@ -982,6 +975,5 @@ struct amdgpu_xgmi_ras xgmi_ras = {
},
.hw_ops = &xgmi_ras_hw_ops,
.ras_late_init = amdgpu_xgmi_ras_late_init,
.ras_fini = amdgpu_xgmi_ras_fini,
},
};

View File

@ -250,13 +250,6 @@ MODULE_FIRMWARE("amdgpu/yellow_carp_mec.bin");
MODULE_FIRMWARE("amdgpu/yellow_carp_mec2.bin");
MODULE_FIRMWARE("amdgpu/yellow_carp_rlc.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish_ce.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish_pfp.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish_me.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish_mec.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish_mec2.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish_rlc.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_ce.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_pfp.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_me.bin");
@ -4043,10 +4036,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
break;
case IP_VERSION(10, 1, 3):
case IP_VERSION(10, 1, 4):
if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2)
chip_name = "cyan_skillfish2";
else
chip_name = "cyan_skillfish";
chip_name = "cyan_skillfish2";
break;
case IP_VERSION(10, 3, 7):
chip_name = "gc_10_3_7";
@ -6557,6 +6547,7 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
case IP_VERSION(10, 3, 5):
case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 7):
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
@ -7857,6 +7848,7 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 5):
case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 7):
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
/* wait for RLC_SAFE_MODE */
@ -7894,6 +7886,7 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 5):
case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 7):
WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
break;
default:
@ -8348,6 +8341,7 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable)
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 7):
data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh;
WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data);
break;
@ -8417,6 +8411,7 @@ static int gfx_v10_0_set_powergating_state(void *handle,
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 7):
gfx_v10_cntl_pg(adev, enable);
amdgpu_gfx_off_ctrl(adev, enable);
break;
@ -8445,6 +8440,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
case IP_VERSION(10, 3, 5):
case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 7):
gfx_v10_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;

View File

@ -2204,10 +2204,6 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
if (!adev->gfx.ras->ras_block.ras_late_init)
adev->gfx.ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
/* If not define special ras_fini function, use gfx default ras_fini */
if (!adev->gfx.ras->ras_block.ras_fini)
adev->gfx.ras->ras_block.ras_fini = amdgpu_gfx_ras_fini;
/* If not defined special ras_cb function, use default ras_cb */
if (!adev->gfx.ras->ras_block.ras_cb)
adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
@ -2432,9 +2428,6 @@ static int gfx_v9_0_sw_fini(void *handle)
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->gfx.ras && adev->gfx.ras->ras_block.ras_fini)
adev->gfx.ras->ras_block.ras_fini(adev);
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)

View File

@ -683,10 +683,6 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
if (!adev->umc.ras->ras_block.ras_late_init)
adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
/* If don't define special ras_fini function, use default ras_fini */
if (!adev->umc.ras->ras_block.ras_fini)
adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini;
/* If not defined special ras_cb function, use default ras_cb */
if (!adev->umc.ras->ras_block.ras_cb)
adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb;

View File

@ -1243,10 +1243,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
if (!adev->umc.ras->ras_block.ras_late_init)
adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
/* If don't define special ras_fini function, use default ras_fini */
if (!adev->umc.ras->ras_block.ras_fini)
adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini;
/* If not defined special ras_cb function, use default ras_cb */
if (!adev->umc.ras->ras_block.ras_cb)
adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb;
@ -1292,10 +1288,6 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
adev->mmhub.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MMHUB;
adev->mmhub.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
adev->mmhub.ras_if = &adev->mmhub.ras->ras_block.ras_comm;
/* If don't define special ras_fini function, use default ras_fini */
if (!adev->mmhub.ras->ras_block.ras_fini)
adev->mmhub.ras->ras_block.ras_fini = amdgpu_mmhub_ras_fini;
}
}
@ -1561,7 +1553,7 @@ static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
static int gmc_v9_0_sw_init(void *handle)
{
int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfxhub.funcs->init(adev);
@ -1677,12 +1669,13 @@ static int gmc_v9_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
dma_addr_bits = adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ? 48:44;
r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits));
if (r) {
printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
return r;
}
adev->need_swiotlb = drm_need_swiotlb(44);
adev->need_swiotlb = drm_need_swiotlb(dma_addr_bits);
r = gmc_v9_0_mc_init(adev);
if (r)

View File

@ -166,7 +166,6 @@ struct amdgpu_hdp_ras hdp_v4_0_ras = {
.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
},
.hw_ops = &hdp_v4_0_ras_hw_ops,
.ras_fini = amdgpu_hdp_ras_fini,
},
};

View File

@ -37,11 +37,6 @@ static void mca_v3_0_mp0_query_ras_error_count(struct amdgpu_device *adev,
ras_error_status);
}
static void mca_v3_0_mp0_ras_fini(struct amdgpu_device *adev)
{
amdgpu_mca_ras_fini(adev, &adev->mca.mp0);
}
static int mca_v3_0_ras_block_match(struct amdgpu_ras_block_object *block_obj,
enum amdgpu_ras_block block, uint32_t sub_block_index)
{
@ -71,7 +66,6 @@ struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = {
},
.hw_ops = &mca_v3_0_mp0_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
.ras_fini = mca_v3_0_mp0_ras_fini,
},
};
@ -83,11 +77,6 @@ static void mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev,
ras_error_status);
}
static void mca_v3_0_mp1_ras_fini(struct amdgpu_device *adev)
{
amdgpu_mca_ras_fini(adev, &adev->mca.mp1);
}
const struct amdgpu_ras_block_hw_ops mca_v3_0_mp1_hw_ops = {
.query_ras_error_count = mca_v3_0_mp1_query_ras_error_count,
.query_ras_error_address = NULL,
@ -103,7 +92,6 @@ struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = {
},
.hw_ops = &mca_v3_0_mp1_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
.ras_fini = mca_v3_0_mp1_ras_fini,
},
};
@ -115,11 +103,6 @@ static void mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev,
ras_error_status);
}
static void mca_v3_0_mpio_ras_fini(struct amdgpu_device *adev)
{
amdgpu_mca_ras_fini(adev, &adev->mca.mpio);
}
const struct amdgpu_ras_block_hw_ops mca_v3_0_mpio_hw_ops = {
.query_ras_error_count = mca_v3_0_mpio_query_ras_error_count,
.query_ras_error_address = NULL,
@ -135,7 +118,6 @@ struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = {
},
.hw_ops = &mca_v3_0_mpio_hw_ops,
.ras_block_match = mca_v3_0_ras_block_match,
.ras_fini = mca_v3_0_mpio_ras_fini,
},
};

View File

@ -671,7 +671,6 @@ struct amdgpu_nbio_ras nbio_v7_4_ras = {
},
.hw_ops = &nbio_v7_4_ras_hw_ops,
.ras_late_init = amdgpu_nbio_ras_late_init,
.ras_fini = amdgpu_nbio_ras_fini,
},
.handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring,
.handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring,

View File

@ -946,10 +946,21 @@ static int nv_common_early_init(void *handle)
AMD_CG_SUPPORT_GFX_3D_CGLS |
AMD_CG_SUPPORT_GFX_RLC_LS |
AMD_CG_SUPPORT_GFX_CP_LS |
AMD_CG_SUPPORT_GFX_FGCG;
AMD_CG_SUPPORT_GFX_FGCG |
AMD_CG_SUPPORT_MC_MGCG |
AMD_CG_SUPPORT_MC_LS |
AMD_CG_SUPPORT_SDMA_LS |
AMD_CG_SUPPORT_HDP_MGCG |
AMD_CG_SUPPORT_HDP_LS |
AMD_CG_SUPPORT_ATHUB_MGCG |
AMD_CG_SUPPORT_ATHUB_LS |
AMD_CG_SUPPORT_IH_CG |
AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG;
adev->pg_flags = AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
AMD_PG_SUPPORT_JPEG;
AMD_PG_SUPPORT_JPEG |
AMD_PG_SUPPORT_GFX_PG;
adev->external_rev_id = adev->rev_id + 0x01;
break;
default:

View File

@ -31,6 +31,7 @@
MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin");
MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin");
MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin");
MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin");
MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin");
MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin");

View File

@ -1995,10 +1995,6 @@ static int sdma_v4_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops &&
adev->sdma.ras->ras_block.ras_fini)
adev->sdma.ras->ras_block.ras_fini(adev);
for (i = 0; i < adev->sdma.num_instances; i++) {
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
if (adev->sdma.has_page_queue)
@ -2826,10 +2822,6 @@ static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
if (!adev->sdma.ras->ras_block.ras_late_init)
adev->sdma.ras->ras_block.ras_late_init = amdgpu_sdma_ras_late_init;
/* If don't define special ras_fini function, use default ras_fini */
if (!adev->sdma.ras->ras_block.ras_fini)
adev->sdma.ras->ras_block.ras_fini = amdgpu_sdma_ras_fini;
/* If not defined special ras_cb function, use default ras_cb */
if (!adev->sdma.ras->ras_block.ras_cb)
adev->sdma.ras->ras_block.ras_cb = amdgpu_sdma_process_ras_data_cb;

View File

@ -51,9 +51,6 @@ MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin");
MODULE_FIRMWARE("amdgpu/navi12_sdma.bin");
MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish_sdma.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish_sdma1.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma.bin");
MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin");
@ -264,10 +261,7 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev)
chip_name = "navi12";
break;
case IP_VERSION(5, 0, 1):
if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2)
chip_name = "cyan_skillfish2";
else
chip_name = "cyan_skillfish";
chip_name = "cyan_skillfish2";
break;
default:
BUG();

View File

@ -1214,9 +1214,6 @@ static int soc15_common_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->nbio.ras && adev->nbio.ras->ras_block.ras_fini)
adev->nbio.ras->ras_block.ras_fini(adev);
if (adev->df.funcs &&
adev->df.funcs->sw_fini)
adev->df.funcs->sw_fini(adev);

View File

@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "amdgpu_uvd.h"
#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "soc15_common.h"
@ -1275,14 +1276,15 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
* uvd_v7_0_ring_patch_cs_in_place - Patch the IB for command submission.
*
* @p: the CS parser with the IBs
* @ib_idx: which IB to patch
* @job: which job this ib is in
* @ib: which IB to patch
*
*/
static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
uint32_t ib_idx)
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
unsigned i;
/* No patching necessary for the first instance */
@ -1290,12 +1292,12 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
return 0;
for (i = 0; i < ib->length_dw; i += 2) {
uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i);
uint32_t reg = amdgpu_ib_get_value(ib, i);
reg -= p->adev->reg_offset[UVD_HWIP][0][1];
reg += p->adev->reg_offset[UVD_HWIP][1][1];
amdgpu_set_ib_value(p, ib_idx, i, reg);
amdgpu_ib_set_value(ib, i, reg);
}
return 0;
}

View File

@ -148,6 +148,13 @@ static int vcn_v1_0_sw_init(void *handle)
adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
if (amdgpu_vcnfw_log) {
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
fw_shared->present_flag_0 = 0;
amdgpu_vcn_fwlog_init(adev->vcn.inst);
}
r = jpeg_v1_0_sw_init(handle);
return r;

View File

@ -172,8 +172,12 @@ static int vcn_v2_0_sw_init(void *handle)
if (r)
return r;
fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
if (amdgpu_vcnfw_log)
amdgpu_vcn_fwlog_init(adev->vcn.inst);
return 0;
}
@ -188,7 +192,7 @@ static int vcn_v2_0_sw_fini(void *handle)
{
int r, idx;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
fw_shared->present_flag_0 = 0;
@ -364,9 +368,9 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
/* non-cache window */
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr));
lower_32_bits(adev->vcn.inst->fw_shared.gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr));
upper_32_bits(adev->vcn.inst->fw_shared.gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0,
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
@ -455,10 +459,10 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
/* non-cache window */
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect);
lower_32_bits(adev->vcn.inst->fw_shared.gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect);
upper_32_bits(adev->vcn.inst->fw_shared.gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
@ -784,7 +788,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
{
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
@ -921,7 +925,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
static int vcn_v2_0_start(struct amdgpu_device *adev)
{
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
@ -1207,7 +1211,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
/* pause DPG */
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);

View File

@ -196,8 +196,11 @@ static int vcn_v2_5_sw_init(void *handle)
return r;
}
fw_shared = adev->vcn.inst[j].fw_shared_cpu_addr;
fw_shared = adev->vcn.inst[j].fw_shared.cpu_addr;
fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
if (amdgpu_vcnfw_log)
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
}
if (amdgpu_sriov_vf(adev)) {
@ -229,7 +232,7 @@ static int vcn_v2_5_sw_fini(void *handle)
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
if (adev->vcn.harvest_config & (1 << i))
continue;
fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->present_flag_0 = 0;
}
drm_dev_exit(idx);
@ -423,9 +426,9 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
/* non-cache window */
WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
lower_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr));
lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
upper_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr));
upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0,
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
@ -513,10 +516,10 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
/* non-cache window */
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
@ -757,7 +760,7 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
{
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
@ -981,7 +984,7 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
vcn_v2_5_mc_resume(adev);
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
if (adev->vcn.harvest_config & (1 << i))
continue;
/* VCN global tiling registers */
@ -1403,7 +1406,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
/* pause DPG */
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;

View File

@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "amdgpu_vcn.h"
#include "amdgpu_pm.h"
#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "vcn_v2_0.h"
@ -213,11 +214,14 @@ static int vcn_v3_0_sw_init(void *handle)
return r;
}
fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG) |
cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG) |
cpu_to_le32(AMDGPU_VCN_FW_SHARED_FLAG_0_RB);
fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED);
if (amdgpu_vcnfw_log)
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
}
if (amdgpu_sriov_vf(adev)) {
@ -249,7 +253,7 @@ static int vcn_v3_0_sw_fini(void *handle)
if (adev->vcn.harvest_config & (1 << i))
continue;
fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->present_flag_0 = 0;
fw_shared->sw_ring.is_enabled = false;
}
@ -295,6 +299,7 @@ static int vcn_v3_0_hw_init(void *handle)
ring = &adev->vcn.inst[i].ring_dec;
if (amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, i)) {
ring->sched.ready = false;
ring->no_scheduler = true;
dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
} else {
ring->wptr = 0;
@ -307,6 +312,7 @@ static int vcn_v3_0_hw_init(void *handle)
ring = &adev->vcn.inst[i].ring_enc[j];
if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
ring->sched.ready = false;
ring->no_scheduler = true;
dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
} else {
ring->wptr = 0;
@ -469,9 +475,9 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst)
/* non-cache window */
WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
lower_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr));
lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
upper_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr));
upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_SIZE0,
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
@ -558,10 +564,10 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
/* non-cache window */
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
@ -923,7 +929,7 @@ static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
{
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
@ -1220,7 +1226,7 @@ static int vcn_v3_0_start(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
/* programm the RB_BASE for ring buffer */
@ -1611,7 +1617,7 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(3, 0, 33)) {
/* Restore */
fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
ring = &adev->vcn.inst[inst_idx].ring_enc[0];
ring->wptr = 0;
@ -1700,7 +1706,7 @@ static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
/*whenever update RBC_RB_WPTR, we save the wptr in shared rb.wptr and scratch2 */
fw_shared = adev->vcn.inst[ring->me].fw_shared_cpu_addr;
fw_shared = adev->vcn.inst[ring->me].fw_shared.cpu_addr;
fw_shared->rb.wptr = lower_32_bits(ring->wptr);
WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2,
lower_32_bits(ring->wptr));
@ -1806,21 +1812,23 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = {
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p)
static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p,
struct amdgpu_job *job)
{
struct drm_gpu_scheduler **scheds;
/* The create msg must be in the first IB submitted */
if (atomic_read(&p->entity->fence_seq))
if (atomic_read(&job->base.entity->fence_seq))
return -EINVAL;
scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]
[AMDGPU_RING_PRIO_DEFAULT].sched;
drm_sched_entity_modify_sched(p->entity, scheds, 1);
drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
return 0;
}
static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
uint64_t addr)
{
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_bo_va_mapping *map;
@ -1891,7 +1899,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
continue;
r = vcn_v3_0_limit_sched(p);
r = vcn_v3_0_limit_sched(p, job);
if (r)
goto out;
}
@ -1902,10 +1910,10 @@ out:
}
static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
uint32_t ib_idx)
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
uint32_t msg_lo = 0, msg_hi = 0;
unsigned i;
int r;
@ -1915,8 +1923,8 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
return 0;
for (i = 0; i < ib->length_dw; i += 2) {
uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i);
uint32_t val = amdgpu_get_ib_value(p, ib_idx, i + 1);
uint32_t reg = amdgpu_ib_get_value(ib, i);
uint32_t val = amdgpu_ib_get_value(ib, i + 1);
if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) {
msg_lo = val;
@ -1924,7 +1932,8 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
msg_hi = val;
} else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) &&
val == 0) {
r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo);
r = vcn_v3_0_dec_msg(p, job,
((u64)msg_hi) << 32 | msg_lo);
if (r)
return r;
}

View File

@ -500,6 +500,11 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process
pr_debug("Killing all process wavefronts\n");
if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {
pr_err("no vmid pasid mapping supported \n");
return -EOPNOTSUPP;
}
/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
* ATC_VMID15_PASID_MAPPING
* to check which VMID the current process is mapped to.

View File

@ -82,7 +82,8 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
struct kfd_smi_client *client = filep->private_data;
unsigned char *buf;
buf = kmalloc_array(MAX_KFIFO_SIZE, sizeof(*buf), GFP_KERNEL);
size = min_t(size_t, size, MAX_KFIFO_SIZE);
buf = kmalloc(size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@ -96,7 +97,7 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
ret = -EAGAIN;
goto ret_err;
}
to_copy = min3(size, sizeof(buf), to_copy);
to_copy = min(size, to_copy);
ret = kfifo_out(&client->fifo, buf, to_copy);
spin_unlock(&client->lock);
if (ret <= 0) {
@ -175,22 +176,29 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
rcu_read_unlock();
}
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
__printf(3, 4)
static void kfd_smi_event_add(struct kfd_dev *dev, unsigned int event,
char *fmt, ...)
{
/*
* GpuReset msg = Reset seq number (incremented for
* every reset message sent before GPU reset).
* 1 byte event + 1 byte space + 8 bytes seq num +
* 1 byte \n + 1 byte \0 = 12
*/
char fifo_in[12];
char fifo_in[KFD_SMI_EVENT_MSG_SIZE];
int len;
unsigned int event;
va_list args;
if (list_empty(&dev->smi_clients))
return;
memset(fifo_in, 0x0, sizeof(fifo_in));
len = snprintf(fifo_in, sizeof(fifo_in), "%x ", event);
va_start(args, fmt);
len += vsnprintf(fifo_in + len, sizeof(fifo_in) - len, fmt, args);
va_end(args);
add_event_to_kfifo(dev, event, fifo_in, len);
}
void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
{
unsigned int event;
if (post_reset) {
event = KFD_SMI_EVENT_GPU_POST_RESET;
@ -198,48 +206,20 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
event = KFD_SMI_EVENT_GPU_PRE_RESET;
++(dev->reset_seq_num);
}
len = snprintf(fifo_in, sizeof(fifo_in), "%x %x\n", event,
dev->reset_seq_num);
add_event_to_kfifo(dev, event, fifo_in, len);
kfd_smi_event_add(dev, event, "%x\n", dev->reset_seq_num);
}
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
uint64_t throttle_bitmask)
{
/*
* ThermalThrottle msg = throttle_bitmask(8):
* thermal_interrupt_count(16):
* 1 byte event + 1 byte space + 16 byte throttle_bitmask +
* 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
* 1 byte \0 = 37
*/
char fifo_in[37];
int len;
if (list_empty(&dev->smi_clients))
return;
len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n",
KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
kfd_smi_event_add(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
throttle_bitmask,
amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
}
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
{
struct amdgpu_task_info task_info;
/* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
/* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n +
* 1 byte \0 = 29
*/
char fifo_in[29];
int len;
if (list_empty(&dev->smi_clients))
return;
memset(&task_info, 0, sizeof(struct amdgpu_task_info));
amdgpu_vm_get_task_info(dev->adev, pasid, &task_info);
@ -247,10 +227,8 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
if (!task_info.pid)
return;
len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
task_info.pid, task_info.task_name);
add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
kfd_smi_event_add(dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
task_info.pid, task_info.task_name);
}
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)

View File

@ -1629,6 +1629,7 @@ retry_flush_work:
static void svm_range_restore_work(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
struct amdkfd_process_info *process_info;
struct svm_range_list *svms;
struct svm_range *prange;
struct kfd_process *p;
@ -1645,6 +1646,7 @@ static void svm_range_restore_work(struct work_struct *work)
pr_debug("restore svm ranges\n");
p = container_of(svms, struct kfd_process, svms);
process_info = p->kgd_process_info;
/* Keep mm reference when svm_range_validate_and_map ranges */
mm = get_task_mm(p->lead_thread);
@ -1653,6 +1655,7 @@ static void svm_range_restore_work(struct work_struct *work)
return;
}
mutex_lock(&process_info->lock);
svm_range_list_lock_and_flush_work(svms, mm);
mutex_lock(&svms->lock);
@ -1705,6 +1708,7 @@ static void svm_range_restore_work(struct work_struct *work)
out_reschedule:
mutex_unlock(&svms->lock);
mmap_write_unlock(mm);
mutex_unlock(&process_info->lock);
mmput(mm);
/* If validation failed, reschedule another attempt */
@ -3209,6 +3213,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
uint64_t start, uint64_t size, uint32_t nattr,
struct kfd_ioctl_svm_attribute *attrs)
{
struct amdkfd_process_info *process_info = p->kgd_process_info;
struct list_head update_list;
struct list_head insert_list;
struct list_head remove_list;
@ -3226,6 +3231,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
svms = &p->svms;
mutex_lock(&process_info->lock);
svm_range_list_lock_and_flush_work(svms, mm);
r = svm_range_is_valid(p, start, size);
@ -3300,6 +3307,8 @@ out_unlock_range:
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
out:
mutex_unlock(&process_info->lock);
pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
&p->svms, start, start + size - 1, r);

View File

@ -1198,11 +1198,11 @@ void pre_validate_dsc(struct drm_atomic_state *state,
struct dc_state *local_dc_state = NULL;
if (!is_dsc_precompute_needed(state)) {
DRM_DEBUG_DRIVER("DSC precompute is not needed.\n");
DRM_INFO_ONCE("DSC precompute is not needed.\n");
return;
}
if (dm_atomic_get_state(state, dm_state_ptr)) {
DRM_DEBUG_DRIVER("dm_atomic_get_state() failed\n");
DRM_INFO_ONCE("dm_atomic_get_state() failed\n");
return;
}
dm_state = *dm_state_ptr;
@ -1245,7 +1245,7 @@ void pre_validate_dsc(struct drm_atomic_state *state,
}
if (!pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars)) {
DRM_DEBUG_DRIVER("pre_compute_mst_dsc_configs_for_state() failed\n");
DRM_INFO_ONCE("pre_compute_mst_dsc_configs_for_state() failed\n");
goto clean_exit;
}
@ -1258,7 +1258,7 @@ void pre_validate_dsc(struct drm_atomic_state *state,
if (local_dc_state->streams[i] &&
is_timing_changed(stream, local_dc_state->streams[i])) {
DRM_DEBUG_DRIVER("crtc[%d] needs mode_changed\n", i);
DRM_INFO_ONCE("crtc[%d] needs mode_changed\n", i);
} else {
int ind = find_crtc_index_in_state_by_stream(state, stream);

View File

@ -981,7 +981,8 @@ static bool should_verify_link_capability_destructively(struct dc_link *link,
destrictive = false;
}
}
}
} else if (dc_is_hdmi_signal(link->local_sink->sink_signal))
destrictive = true;
return destrictive;
}
@ -2801,6 +2802,17 @@ static bool dp_active_dongle_validate_timing(
return false;
}
/* Check 3D format */
switch (timing->timing_3d_format) {
case TIMING_3D_FORMAT_NONE:
case TIMING_3D_FORMAT_FRAME_ALTERNATE:
/*Only frame alternate 3D is supported on active dongle*/
break;
default:
/*other 3D formats are not supported due to bad infoframe translation */
return false;
}
#if defined(CONFIG_DRM_AMD_DC_DCN)
if (dongle_caps->dp_hdmi_frl_max_link_bw_in_kbps > 0) { // DP to HDMI FRL converter
struct dc_crtc_timing outputTiming = *timing;

View File

@ -505,17 +505,24 @@ static void vendor_specific_lttpr_wa_four(
}
}
static void vendor_specific_lttpr_wa_five(
static void dp_fixed_vs_pe_set_retimer_lane_settings(
struct dc_link *link,
const union dpcd_training_lane dpcd_lane_adjust[LANE_COUNT_DP_MAX],
uint8_t lane_count)
{
const uint32_t vendor_lttpr_write_address = 0xF004F;
const uint8_t offset = dp_convert_to_count(
link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
const uint8_t vendor_lttpr_write_data_reset[4] = {0x1, 0x50, 0x63, 0xFF};
uint32_t vendor_lttpr_write_address = 0xF004F;
uint8_t vendor_lttpr_write_data_vs[4] = {0x1, 0x51, 0x63, 0x0};
uint8_t vendor_lttpr_write_data_pe[4] = {0x1, 0x52, 0x63, 0x0};
uint8_t lane = 0;
if (offset != 0xFF) {
vendor_lttpr_write_address +=
((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (offset - 1));
}
for (lane = 0; lane < lane_count; lane++) {
vendor_lttpr_write_data_vs[3] |=
dpcd_lane_adjust[lane].bits.VOLTAGE_SWING_SET << (2 * lane);
@ -5989,15 +5996,14 @@ bool dc_link_dp_set_test_pattern(
if (link->dc->debug.apply_vendor_specific_lttpr_wa &&
(link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
link->lttpr_mode == LTTPR_MODE_TRANSPARENT) {
dpcd_set_lane_settings(link, p_link_settings, DPRX);
vendor_specific_lttpr_wa_five(
dp_fixed_vs_pe_set_retimer_lane_settings(
link,
p_link_settings->dpcd_lane_settings,
p_link_settings->link_settings.lane_count);
} else {
dp_set_hw_lane_settings(link, &pipe_ctx->link_res, p_link_settings, DPRX);
dpcd_set_lane_settings(link, p_link_settings, DPRX);
}
dpcd_set_lane_settings(link, p_link_settings, DPRX);
}
/* Blank stream if running test pattern */

View File

@ -2615,6 +2615,8 @@ static void set_avi_info_frame(
hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
///VIC
if (pipe_ctx->stream->timing.hdmi_vic != 0)
vic = 0;
format = stream->timing.timing_3d_format;
/*todo, add 3DStereo support*/
if (format != TIMING_3D_FORMAT_NONE) {

View File

@ -47,7 +47,7 @@ struct aux_payload;
struct set_config_cmd_payload;
struct dmub_notification;
#define DC_VER "3.2.174"
#define DC_VER "3.2.175"
#define MAX_SURFACES 3
#define MAX_PLANES 6
@ -354,6 +354,7 @@ enum dc_psr_power_opts {
psr_power_opt_invalid = 0x0,
psr_power_opt_smu_opt_static_screen = 0x1,
psr_power_opt_z10_static_screen = 0x10,
psr_power_opt_ds_disable_allow = 0x100,
};
enum dcc_option {
@ -710,6 +711,7 @@ struct dc_debug_options {
#endif
bool apply_vendor_specific_lttpr_wa;
bool ignore_dpref_ss;
uint8_t psr_power_use_phy_fsm;
};
struct gpu_info_soc_bounding_box_v1_0;

View File

@ -320,6 +320,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub,
copy_settings_data->otg_inst = 0;
// Misc
copy_settings_data->use_phy_fsm = link->ctx->dc->debug.psr_power_use_phy_fsm;
copy_settings_data->psr_level = psr_context->psr_level.u32all;
copy_settings_data->smu_optimizations_en = psr_context->allow_smu_optimizations;
copy_settings_data->multi_disp_optimizations_en = psr_context->allow_multi_disp_optimizations;

View File

@ -1509,6 +1509,31 @@ static enum dc_status apply_single_controller_ctx_to_hw(
if (!pipe_ctx->stream->apply_seamless_boot_optimization && dc->config.use_pipe_ctx_sync_logic)
check_syncd_pipes_for_disabled_master_pipe(dc, context, pipe_ctx->pipe_idx);
pipe_ctx->stream_res.opp->funcs->opp_program_fmt(
pipe_ctx->stream_res.opp,
&stream->bit_depth_params,
&stream->clamping);
pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion(
pipe_ctx->stream_res.opp,
COLOR_SPACE_YCBCR601,
stream->timing.display_color_depth,
stream->signal);
while (odm_pipe) {
odm_pipe->stream_res.opp->funcs->opp_set_dyn_expansion(
odm_pipe->stream_res.opp,
COLOR_SPACE_YCBCR601,
stream->timing.display_color_depth,
stream->signal);
odm_pipe->stream_res.opp->funcs->opp_program_fmt(
odm_pipe->stream_res.opp,
&stream->bit_depth_params,
&stream->clamping);
odm_pipe = odm_pipe->next_odm_pipe;
}
/* DCN3.1 FPGA Workaround
* Need to enable HPO DP Stream Encoder before setting OTG master enable.
* To do so, move calling function enable_stream_timing to only be done AFTER calling
@ -1548,30 +1573,6 @@ static enum dc_status apply_single_controller_ctx_to_hw(
if (dc_is_dp_signal(pipe_ctx->stream->signal))
dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_OTG);
pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion(
pipe_ctx->stream_res.opp,
COLOR_SPACE_YCBCR601,
stream->timing.display_color_depth,
stream->signal);
pipe_ctx->stream_res.opp->funcs->opp_program_fmt(
pipe_ctx->stream_res.opp,
&stream->bit_depth_params,
&stream->clamping);
while (odm_pipe) {
odm_pipe->stream_res.opp->funcs->opp_set_dyn_expansion(
odm_pipe->stream_res.opp,
COLOR_SPACE_YCBCR601,
stream->timing.display_color_depth,
stream->signal);
odm_pipe->stream_res.opp->funcs->opp_program_fmt(
odm_pipe->stream_res.opp,
&stream->bit_depth_params,
&stream->clamping);
odm_pipe = odm_pipe->next_odm_pipe;
}
if (!stream->dpms_off)
core_link_enable_stream(context, pipe_ctx);

View File

@ -107,7 +107,7 @@ void dcn10_lock_all_pipes(struct dc *dc,
* (un)locking. Also skip if pipe is disabled.
*/
if (pipe_ctx->top_pipe ||
!pipe_ctx->stream || !pipe_ctx->plane_state ||
!pipe_ctx->stream ||
!tg->funcs->is_tg_enabled(tg))
continue;
@ -3093,7 +3093,8 @@ static void dcn10_config_stereo_parameters(
flags->PROGRAM_STEREO = 1;
flags->PROGRAM_POLARITY = 1;
if (timing_3d_format == TIMING_3D_FORMAT_INBAND_FA ||
if (timing_3d_format == TIMING_3D_FORMAT_FRAME_ALTERNATE ||
timing_3d_format == TIMING_3D_FORMAT_INBAND_FA ||
timing_3d_format == TIMING_3D_FORMAT_DP_HDMI_INBAND_FA ||
timing_3d_format == TIMING_3D_FORMAT_SIDEBAND_FA) {
enum display_dongle_type dongle = \

View File

@ -707,17 +707,6 @@ bool hubp2_program_surface_flip_and_addr(
REG_UPDATE(VMID_SETTINGS_0,
VMID, address->vmid);
if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) {
REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1);
REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1);
} else {
// turn off stereo if not in stereo
REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x0);
REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x0);
}
/* HW automatically latch rest of address register on write to
* DCSURF_PRIMARY_SURFACE_ADDRESS if SURFACE_UPDATE_LOCK is not used
@ -942,10 +931,6 @@ void hubp2_set_blank_regs(struct hubp *hubp, bool blank)
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
uint32_t blank_en = blank ? 1 : 0;
REG_UPDATE_2(DCHUBP_CNTL,
HUBP_BLANK_EN, blank_en,
HUBP_TTU_DISABLE, blank_en);
if (blank) {
uint32_t reg_val = REG_READ(DCHUBP_CNTL);
@ -958,9 +943,13 @@ void hubp2_set_blank_regs(struct hubp *hubp, bool blank)
*/
REG_WAIT(DCHUBP_CNTL,
HUBP_NO_OUTSTANDING_REQ, 1,
1, 200);
1, 100000);
}
}
REG_UPDATE_2(DCHUBP_CNTL,
HUBP_BLANK_EN, blank_en,
HUBP_TTU_DISABLE, 0);
}
void hubp2_cursor_set_position(

View File

@ -240,18 +240,9 @@ static void enc31_hw_init(struct link_encoder *enc)
// 100MHz -> 0x32
// 48MHz -> 0x18
#ifdef CLEANUP_FIXME
/*from display_init*/
REG_WRITE(RDPCSTX_DEBUG_CONFIG, 0);
#endif
// Set TMDS_CTL0 to 1. This is a legacy setting.
REG_UPDATE(TMDS_CTL_BITS, TMDS_CTL0, 1);
/*HW default is 5*/
REG_UPDATE(RDPCSTX_CNTL,
RDPCS_TX_FIFO_RD_START_DELAY, 4);
dcn10_aux_initialize(enc10);
}

View File

@ -1777,7 +1777,7 @@ static bool is_dual_plane(enum surface_pixel_format format)
return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
}
static int dcn31_populate_dml_pipes_from_context(
int dcn31_populate_dml_pipes_from_context(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
bool fast_validate)
@ -1810,6 +1810,7 @@ static int dcn31_populate_dml_pipes_from_context(
pipes[pipe_cnt].pipe.src.immediate_flip = true;
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
pipes[pipe_cnt].pipe.src.gpuvm = true;
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
@ -2068,7 +2069,7 @@ static struct dc_cap_funcs cap_funcs = {
.get_dcc_compression_cap = dcn20_get_dcc_compression_cap
};
static void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
{
struct clk_limit_table *clk_table = &bw_params->clk_table;
struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];

View File

@ -43,6 +43,11 @@ void dcn31_calculate_wm_and_dlg(
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
int vlevel);
int dcn31_populate_dml_pipes_from_context(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
bool fast_validate);
void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context);
struct resource_pool *dcn31_create_resource_pool(

View File

@ -1035,6 +1035,7 @@ static const struct dc_debug_options debug_defaults_drv = {
},
.optimize_edp_link_rate = true,
.enable_sw_cntl_psr = true,
.psr_power_use_phy_fsm = 0,
};
static const struct dc_debug_options debug_defaults_diags = {

View File

@ -184,4 +184,7 @@ int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
const struct dc_link *link,
struct set_config_cmd_payload *payload,
enum set_config_status *operation_result);
enum dc_edid_status dm_helpers_get_sbios_edid(struct dc_link *link, struct dc_edid *edid);
#endif /* __DM_HELPERS__ */

View File

@ -46,10 +46,10 @@
/* Firmware versioning. */
#ifdef DMUB_EXPOSE_VERSION
#define DMUB_FW_VERSION_GIT_HASH 0x1422ef84
#define DMUB_FW_VERSION_GIT_HASH 0x082bd4c8
#define DMUB_FW_VERSION_MAJOR 0
#define DMUB_FW_VERSION_MINOR 0
#define DMUB_FW_VERSION_REVISION 104
#define DMUB_FW_VERSION_REVISION 106
#define DMUB_FW_VERSION_TEST 0
#define DMUB_FW_VERSION_VBIOS 0
#define DMUB_FW_VERSION_HOTFIX 0
@ -1560,10 +1560,14 @@ struct dmub_cmd_psr_copy_settings_data {
* DSC enable status in driver
*/
uint8_t dsc_enable_status;
/**
* Explicit padding to 3 byte boundary.
/*
* Use FSM state for PSR power up/down
*/
uint8_t pad3[3];
uint8_t use_phy_fsm;
/**
* Explicit padding to 2 byte boundary.
*/
uint8_t pad3[2];
};
/**

View File

@ -3095,7 +3095,7 @@ static int vega10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
void *pp_table, uint32_t classification_flag)
{
ATOM_Vega10_GFXCLK_Dependency_Record_V2 *patom_record_V2;
struct vega10_power_state *vega10_power_state =
struct vega10_power_state *vega10_ps =
cast_phw_vega10_power_state(&(power_state->hardware));
struct vega10_performance_level *performance_level;
ATOM_Vega10_State *state_entry = (ATOM_Vega10_State *)state;
@ -3145,17 +3145,17 @@ static int vega10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
power_state->temperatures.min = 0;
power_state->temperatures.max = 0;
performance_level = &(vega10_power_state->performance_levels
[vega10_power_state->performance_level_count++]);
performance_level = &(vega10_ps->performance_levels
[vega10_ps->performance_level_count++]);
PP_ASSERT_WITH_CODE(
(vega10_power_state->performance_level_count <
(vega10_ps->performance_level_count <
NUM_GFXCLK_DPM_LEVELS),
"Performance levels exceeds SMC limit!",
return -1);
PP_ASSERT_WITH_CODE(
(vega10_power_state->performance_level_count <=
(vega10_ps->performance_level_count <=
hwmgr->platform_descriptor.
hardwareActivityPerformanceLevels),
"Performance levels exceeds Driver limit!",
@ -3169,8 +3169,8 @@ static int vega10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
performance_level->mem_clock = mclk_dep_table->entries
[state_entry->ucMemClockIndexLow].ulMemClk;
performance_level = &(vega10_power_state->performance_levels
[vega10_power_state->performance_level_count++]);
performance_level = &(vega10_ps->performance_levels
[vega10_ps->performance_level_count++]);
performance_level->soc_clock = socclk_dep_table->entries
[state_entry->ucSocClockIndexHigh].ulClk;
if (gfxclk_dep_table->ucRevId == 0) {
@ -3201,11 +3201,11 @@ static int vega10_get_pp_table_entry(struct pp_hwmgr *hwmgr,
unsigned long entry_index, struct pp_power_state *state)
{
int result;
struct vega10_power_state *ps;
struct vega10_power_state *vega10_ps;
state->hardware.magic = PhwVega10_Magic;
ps = cast_phw_vega10_power_state(&state->hardware);
vega10_ps = cast_phw_vega10_power_state(&state->hardware);
result = vega10_get_powerplay_table_entry(hwmgr, entry_index, state,
vega10_get_pp_table_entry_callback_func);
@ -3218,10 +3218,10 @@ static int vega10_get_pp_table_entry(struct pp_hwmgr *hwmgr,
*/
/* set DC compatible flag if this state supports DC */
if (!state->validation.disallowOnDC)
ps->dc_compatible = true;
vega10_ps->dc_compatible = true;
ps->uvd_clks.vclk = state->uvd_clocks.VCLK;
ps->uvd_clks.dclk = state->uvd_clocks.DCLK;
vega10_ps->uvd_clks.vclk = state->uvd_clocks.VCLK;
vega10_ps->uvd_clks.dclk = state->uvd_clocks.DCLK;
return 0;
}
@ -4823,33 +4823,41 @@ static int vega10_check_states_equal(struct pp_hwmgr *hwmgr,
const struct pp_hw_power_state *pstate1,
const struct pp_hw_power_state *pstate2, bool *equal)
{
const struct vega10_power_state *psa;
const struct vega10_power_state *psb;
const struct vega10_power_state *vega10_psa;
const struct vega10_power_state *vega10_psb;
int i;
if (pstate1 == NULL || pstate2 == NULL || equal == NULL)
return -EINVAL;
psa = cast_const_phw_vega10_power_state(pstate1);
psb = cast_const_phw_vega10_power_state(pstate2);
/* If the two states don't even have the same number of performance levels they cannot be the same state. */
if (psa->performance_level_count != psb->performance_level_count) {
vega10_psa = cast_const_phw_vega10_power_state(pstate1);
vega10_psb = cast_const_phw_vega10_power_state(pstate2);
/* If the two states don't even have the same number of performance levels
* they cannot be the same state.
*/
if (vega10_psa->performance_level_count != vega10_psb->performance_level_count) {
*equal = false;
return 0;
}
for (i = 0; i < psa->performance_level_count; i++) {
if (!vega10_are_power_levels_equal(&(psa->performance_levels[i]), &(psb->performance_levels[i]))) {
/* If we have found even one performance level pair that is different the states are different. */
for (i = 0; i < vega10_psa->performance_level_count; i++) {
if (!vega10_are_power_levels_equal(&(vega10_psa->performance_levels[i]),
&(vega10_psb->performance_levels[i]))) {
/* If we have found even one performance level pair
* that is different the states are different.
*/
*equal = false;
return 0;
}
}
/* If all performance levels are the same try to use the UVD clocks to break the tie.*/
*equal = ((psa->uvd_clks.vclk == psb->uvd_clks.vclk) && (psa->uvd_clks.dclk == psb->uvd_clks.dclk));
*equal &= ((psa->vce_clks.evclk == psb->vce_clks.evclk) && (psa->vce_clks.ecclk == psb->vce_clks.ecclk));
*equal &= (psa->sclk_threshold == psb->sclk_threshold);
*equal = ((vega10_psa->uvd_clks.vclk == vega10_psb->uvd_clks.vclk) &&
(vega10_psa->uvd_clks.dclk == vega10_psb->uvd_clks.dclk));
*equal &= ((vega10_psa->vce_clks.evclk == vega10_psb->vce_clks.evclk) &&
(vega10_psa->vce_clks.ecclk == vega10_psb->vce_clks.ecclk));
*equal &= (vega10_psa->sclk_threshold == vega10_psb->sclk_threshold);
return 0;
}
@ -5444,19 +5452,19 @@ static int vega10_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_
PHM_PerformanceLevelDesignation designation, uint32_t index,
PHM_PerformanceLevel *level)
{
const struct vega10_power_state *ps;
const struct vega10_power_state *vega10_ps;
uint32_t i;
if (level == NULL || hwmgr == NULL || state == NULL)
return -EINVAL;
ps = cast_const_phw_vega10_power_state(state);
vega10_ps = cast_const_phw_vega10_power_state(state);
i = index > ps->performance_level_count - 1 ?
ps->performance_level_count - 1 : index;
i = index > vega10_ps->performance_level_count - 1 ?
vega10_ps->performance_level_count - 1 : index;
level->coreClock = ps->performance_levels[i].gfx_clock;
level->memory_clock = ps->performance_levels[i].mem_clock;
level->coreClock = vega10_ps->performance_levels[i].gfx_clock;
level->memory_clock = vega10_ps->performance_levels[i].mem_clock;
return 0;
}

View File

@ -1480,10 +1480,68 @@ typedef struct {
} SmuMetrics_V2_t;
typedef struct {
uint32_t CurrClock[PPCLK_COUNT];
uint16_t AverageGfxclkFrequencyPreDs;
uint16_t AverageGfxclkFrequencyPostDs;
uint16_t AverageFclkFrequencyPreDs;
uint16_t AverageFclkFrequencyPostDs;
uint16_t AverageUclkFrequencyPreDs;
uint16_t AverageUclkFrequencyPostDs;
uint16_t AverageGfxActivity;
uint16_t AverageUclkActivity;
uint8_t CurrSocVoltageOffset;
uint8_t CurrGfxVoltageOffset;
uint8_t CurrMemVidOffset;
uint8_t Padding8;
uint16_t AverageSocketPower;
uint16_t TemperatureEdge;
uint16_t TemperatureHotspot;
uint16_t TemperatureMem;
uint16_t TemperatureVrGfx;
uint16_t TemperatureVrMem0;
uint16_t TemperatureVrMem1;
uint16_t TemperatureVrSoc;
uint16_t TemperatureLiquid0;
uint16_t TemperatureLiquid1;
uint16_t TemperaturePlx;
uint16_t Padding16;
uint32_t AccCnt;
uint8_t ThrottlingPercentage[THROTTLER_COUNT];
uint8_t LinkDpmLevel;
uint8_t CurrFanPwm;
uint16_t CurrFanSpeed;
//BACO metrics, PMFW-1721
//metrics for D3hot entry/exit and driver ARM msgs
uint8_t D3HotEntryCountPerMode[D3HOT_SEQUENCE_COUNT];
uint8_t D3HotExitCountPerMode[D3HOT_SEQUENCE_COUNT];
uint8_t ArmMsgReceivedCountPerMode[D3HOT_SEQUENCE_COUNT];
//PMFW-4362
uint32_t EnergyAccumulator;
uint16_t AverageVclk0Frequency;
uint16_t AverageDclk0Frequency;
uint16_t AverageVclk1Frequency;
uint16_t AverageDclk1Frequency;
uint16_t VcnUsagePercentage0;
uint16_t VcnUsagePercentage1;
uint8_t PcieRate;
uint8_t PcieWidth;
uint16_t AverageGfxclkFrequencyTarget;
} SmuMetrics_V3_t;
typedef struct {
union {
SmuMetrics_t SmuMetrics;
SmuMetrics_V2_t SmuMetrics_V2;
SmuMetrics_V3_t SmuMetrics_V3;
};
uint32_t Spare[1];

View File

@ -554,6 +554,11 @@ static uint32_t sienna_cichlid_get_throttler_status_locked(struct smu_context *s
int i;
if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
(smu->smc_fw_version >= 0x3A4900)) {
for (i = 0; i < THROTTLER_COUNT; i++)
throttler_status |=
(metrics_ext->SmuMetrics_V3.ThrottlingPercentage[i] ? 1U << i : 0);
} else if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
(smu->smc_fw_version >= 0x3A4300)) {
for (i = 0; i < THROTTLER_COUNT; i++)
throttler_status |=
@ -574,11 +579,20 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu,
&(((SmuMetricsExternal_t *)(smu_table->metrics_table))->SmuMetrics);
SmuMetrics_V2_t *metrics_v2 =
&(((SmuMetricsExternal_t *)(smu_table->metrics_table))->SmuMetrics_V2);
bool use_metrics_v2 = ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
(smu->smc_fw_version >= 0x3A4300)) ? true : false;
SmuMetrics_V3_t *metrics_v3 =
&(((SmuMetricsExternal_t *)(smu_table->metrics_table))->SmuMetrics_V3);
bool use_metrics_v2 = false;
bool use_metrics_v3 = false;
uint16_t average_gfx_activity;
int ret = 0;
if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
(smu->smc_fw_version >= 0x3A4900))
use_metrics_v3 = true;
else if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
(smu->smc_fw_version >= 0x3A4300))
use_metrics_v2 = true;
ret = smu_cmn_get_metrics_table(smu,
NULL,
false);
@ -587,96 +601,119 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu,
switch (member) {
case METRICS_CURR_GFXCLK:
*value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_GFXCLK] :
*value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_GFXCLK] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_GFXCLK] :
metrics->CurrClock[PPCLK_GFXCLK];
break;
case METRICS_CURR_SOCCLK:
*value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_SOCCLK] :
*value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_SOCCLK] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_SOCCLK] :
metrics->CurrClock[PPCLK_SOCCLK];
break;
case METRICS_CURR_UCLK:
*value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_UCLK] :
*value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_UCLK] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_UCLK] :
metrics->CurrClock[PPCLK_UCLK];
break;
case METRICS_CURR_VCLK:
*value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_VCLK_0] :
*value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_VCLK_0] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_VCLK_0] :
metrics->CurrClock[PPCLK_VCLK_0];
break;
case METRICS_CURR_VCLK1:
*value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_VCLK_1] :
*value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_VCLK_1] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_VCLK_1] :
metrics->CurrClock[PPCLK_VCLK_1];
break;
case METRICS_CURR_DCLK:
*value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_0] :
*value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_DCLK_0] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_0] :
metrics->CurrClock[PPCLK_DCLK_0];
break;
case METRICS_CURR_DCLK1:
*value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_1] :
*value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_DCLK_1] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_1] :
metrics->CurrClock[PPCLK_DCLK_1];
break;
case METRICS_CURR_DCEFCLK:
*value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCEFCLK] :
*value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_DCEFCLK] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCEFCLK] :
metrics->CurrClock[PPCLK_DCEFCLK];
break;
case METRICS_CURR_FCLK:
*value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_FCLK] :
*value = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_FCLK] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_FCLK] :
metrics->CurrClock[PPCLK_FCLK];
break;
case METRICS_AVERAGE_GFXCLK:
average_gfx_activity = use_metrics_v2 ? metrics_v2->AverageGfxActivity :
average_gfx_activity = use_metrics_v3 ? metrics_v3->AverageGfxActivity :
use_metrics_v2 ? metrics_v2->AverageGfxActivity :
metrics->AverageGfxActivity;
if (average_gfx_activity <= SMU_11_0_7_GFX_BUSY_THRESHOLD)
*value = use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPostDs :
*value = use_metrics_v3 ? metrics_v3->AverageGfxclkFrequencyPostDs :
use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPostDs :
metrics->AverageGfxclkFrequencyPostDs;
else
*value = use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPreDs :
*value = use_metrics_v3 ? metrics_v3->AverageGfxclkFrequencyPreDs :
use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPreDs :
metrics->AverageGfxclkFrequencyPreDs;
break;
case METRICS_AVERAGE_FCLK:
*value = use_metrics_v2 ? metrics_v2->AverageFclkFrequencyPostDs :
*value = use_metrics_v3 ? metrics_v3->AverageFclkFrequencyPostDs :
use_metrics_v2 ? metrics_v2->AverageFclkFrequencyPostDs :
metrics->AverageFclkFrequencyPostDs;
break;
case METRICS_AVERAGE_UCLK:
*value = use_metrics_v2 ? metrics_v2->AverageUclkFrequencyPostDs :
*value = use_metrics_v3 ? metrics_v3->AverageUclkFrequencyPostDs :
use_metrics_v2 ? metrics_v2->AverageUclkFrequencyPostDs :
metrics->AverageUclkFrequencyPostDs;
break;
case METRICS_AVERAGE_GFXACTIVITY:
*value = use_metrics_v2 ? metrics_v2->AverageGfxActivity :
*value = use_metrics_v3 ? metrics_v3->AverageGfxActivity :
use_metrics_v2 ? metrics_v2->AverageGfxActivity :
metrics->AverageGfxActivity;
break;
case METRICS_AVERAGE_MEMACTIVITY:
*value = use_metrics_v2 ? metrics_v2->AverageUclkActivity :
*value = use_metrics_v3 ? metrics_v3->AverageUclkActivity :
use_metrics_v2 ? metrics_v2->AverageUclkActivity :
metrics->AverageUclkActivity;
break;
case METRICS_AVERAGE_SOCKETPOWER:
*value = use_metrics_v2 ? metrics_v2->AverageSocketPower << 8 :
*value = use_metrics_v3 ? metrics_v3->AverageSocketPower << 8 :
use_metrics_v2 ? metrics_v2->AverageSocketPower << 8 :
metrics->AverageSocketPower << 8;
break;
case METRICS_TEMPERATURE_EDGE:
*value = (use_metrics_v2 ? metrics_v2->TemperatureEdge : metrics->TemperatureEdge) *
SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
*value = (use_metrics_v3 ? metrics_v3->TemperatureEdge :
use_metrics_v2 ? metrics_v2->TemperatureEdge :
metrics->TemperatureEdge) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
case METRICS_TEMPERATURE_HOTSPOT:
*value = (use_metrics_v2 ? metrics_v2->TemperatureHotspot : metrics->TemperatureHotspot) *
SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
*value = (use_metrics_v3 ? metrics_v3->TemperatureHotspot :
use_metrics_v2 ? metrics_v2->TemperatureHotspot :
metrics->TemperatureHotspot) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
case METRICS_TEMPERATURE_MEM:
*value = (use_metrics_v2 ? metrics_v2->TemperatureMem : metrics->TemperatureMem) *
SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
*value = (use_metrics_v3 ? metrics_v3->TemperatureMem :
use_metrics_v2 ? metrics_v2->TemperatureMem :
metrics->TemperatureMem) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
case METRICS_TEMPERATURE_VRGFX:
*value = (use_metrics_v2 ? metrics_v2->TemperatureVrGfx : metrics->TemperatureVrGfx) *
SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
*value = (use_metrics_v3 ? metrics_v3->TemperatureVrGfx :
use_metrics_v2 ? metrics_v2->TemperatureVrGfx :
metrics->TemperatureVrGfx) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
case METRICS_TEMPERATURE_VRSOC:
*value = (use_metrics_v2 ? metrics_v2->TemperatureVrSoc : metrics->TemperatureVrSoc) *
SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
*value = (use_metrics_v3 ? metrics_v3->TemperatureVrSoc :
use_metrics_v2 ? metrics_v2->TemperatureVrSoc :
metrics->TemperatureVrSoc) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
case METRICS_THROTTLER_STATUS:
*value = sienna_cichlid_get_throttler_status_locked(smu);
break;
case METRICS_CURR_FANSPEED:
*value = use_metrics_v2 ? metrics_v2->CurrFanSpeed : metrics->CurrFanSpeed;
*value = use_metrics_v3 ? metrics_v3->CurrFanSpeed :
use_metrics_v2 ? metrics_v2->CurrFanSpeed : metrics->CurrFanSpeed;
break;
default:
*value = UINT_MAX;
@ -3656,12 +3693,22 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu,
&(metrics_external.SmuMetrics);
SmuMetrics_V2_t *metrics_v2 =
&(metrics_external.SmuMetrics_V2);
SmuMetrics_V3_t *metrics_v3 =
&(metrics_external.SmuMetrics_V3);
struct amdgpu_device *adev = smu->adev;
bool use_metrics_v2 = ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
(smu->smc_fw_version >= 0x3A4300)) ? true : false;
bool use_metrics_v2 = false;
bool use_metrics_v3 = false;
uint16_t average_gfx_activity;
int ret = 0;
if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
(smu->smc_fw_version >= 0x3A4900))
use_metrics_v3 = true;
else if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
(smu->smc_fw_version >= 0x3A4300))
use_metrics_v2 = true;
ret = smu_cmn_get_metrics_table(smu,
&metrics_external,
true);
@ -3670,29 +3717,30 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu,
smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3);
gpu_metrics->temperature_edge =
gpu_metrics->temperature_edge = use_metrics_v3 ? metrics_v3->TemperatureEdge :
use_metrics_v2 ? metrics_v2->TemperatureEdge : metrics->TemperatureEdge;
gpu_metrics->temperature_hotspot =
gpu_metrics->temperature_hotspot = use_metrics_v3 ? metrics_v3->TemperatureHotspot :
use_metrics_v2 ? metrics_v2->TemperatureHotspot : metrics->TemperatureHotspot;
gpu_metrics->temperature_mem =
gpu_metrics->temperature_mem = use_metrics_v3 ? metrics_v3->TemperatureMem :
use_metrics_v2 ? metrics_v2->TemperatureMem : metrics->TemperatureMem;
gpu_metrics->temperature_vrgfx =
gpu_metrics->temperature_vrgfx = use_metrics_v3 ? metrics_v3->TemperatureVrGfx :
use_metrics_v2 ? metrics_v2->TemperatureVrGfx : metrics->TemperatureVrGfx;
gpu_metrics->temperature_vrsoc =
gpu_metrics->temperature_vrsoc = use_metrics_v3 ? metrics_v3->TemperatureVrSoc :
use_metrics_v2 ? metrics_v2->TemperatureVrSoc : metrics->TemperatureVrSoc;
gpu_metrics->temperature_vrmem =
gpu_metrics->temperature_vrmem = use_metrics_v3 ? metrics_v3->TemperatureVrMem0 :
use_metrics_v2 ? metrics_v2->TemperatureVrMem0 : metrics->TemperatureVrMem0;
gpu_metrics->average_gfx_activity =
gpu_metrics->average_gfx_activity = use_metrics_v3 ? metrics_v3->AverageGfxActivity :
use_metrics_v2 ? metrics_v2->AverageGfxActivity : metrics->AverageGfxActivity;
gpu_metrics->average_umc_activity =
gpu_metrics->average_umc_activity = use_metrics_v3 ? metrics_v3->AverageUclkActivity :
use_metrics_v2 ? metrics_v2->AverageUclkActivity : metrics->AverageUclkActivity;
gpu_metrics->average_mm_activity =
gpu_metrics->average_mm_activity = use_metrics_v3 ?
(metrics_v3->VcnUsagePercentage0 + metrics_v3->VcnUsagePercentage1) / 2 :
use_metrics_v2 ? metrics_v2->VcnActivityPercentage : metrics->VcnActivityPercentage;
gpu_metrics->average_socket_power =
gpu_metrics->average_socket_power = use_metrics_v3 ? metrics_v3->AverageSocketPower :
use_metrics_v2 ? metrics_v2->AverageSocketPower : metrics->AverageSocketPower;
gpu_metrics->energy_accumulator =
gpu_metrics->energy_accumulator = use_metrics_v3 ? metrics_v3->EnergyAccumulator :
use_metrics_v2 ? metrics_v2->EnergyAccumulator : metrics->EnergyAccumulator;
if (metrics->CurrGfxVoltageOffset)
@ -3705,37 +3753,45 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu,
gpu_metrics->voltage_soc =
(155000 - 625 * metrics->CurrSocVoltageOffset) / 100;
average_gfx_activity = use_metrics_v2 ? metrics_v2->AverageGfxActivity : metrics->AverageGfxActivity;
average_gfx_activity = use_metrics_v3 ? metrics_v3->AverageGfxActivity :
use_metrics_v2 ? metrics_v2->AverageGfxActivity : metrics->AverageGfxActivity;
if (average_gfx_activity <= SMU_11_0_7_GFX_BUSY_THRESHOLD)
gpu_metrics->average_gfxclk_frequency =
use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPostDs : metrics->AverageGfxclkFrequencyPostDs;
use_metrics_v3 ? metrics_v3->AverageGfxclkFrequencyPostDs :
use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPostDs :
metrics->AverageGfxclkFrequencyPostDs;
else
gpu_metrics->average_gfxclk_frequency =
use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPreDs : metrics->AverageGfxclkFrequencyPreDs;
use_metrics_v3 ? metrics_v3->AverageGfxclkFrequencyPreDs :
use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPreDs :
metrics->AverageGfxclkFrequencyPreDs;
gpu_metrics->average_uclk_frequency =
use_metrics_v2 ? metrics_v2->AverageUclkFrequencyPostDs : metrics->AverageUclkFrequencyPostDs;
gpu_metrics->average_vclk0_frequency =
use_metrics_v3 ? metrics_v3->AverageUclkFrequencyPostDs :
use_metrics_v2 ? metrics_v2->AverageUclkFrequencyPostDs :
metrics->AverageUclkFrequencyPostDs;
gpu_metrics->average_vclk0_frequency = use_metrics_v3 ? metrics_v3->AverageVclk0Frequency :
use_metrics_v2 ? metrics_v2->AverageVclk0Frequency : metrics->AverageVclk0Frequency;
gpu_metrics->average_dclk0_frequency =
gpu_metrics->average_dclk0_frequency = use_metrics_v3 ? metrics_v3->AverageDclk0Frequency :
use_metrics_v2 ? metrics_v2->AverageDclk0Frequency : metrics->AverageDclk0Frequency;
gpu_metrics->average_vclk1_frequency =
gpu_metrics->average_vclk1_frequency = use_metrics_v3 ? metrics_v3->AverageVclk1Frequency :
use_metrics_v2 ? metrics_v2->AverageVclk1Frequency : metrics->AverageVclk1Frequency;
gpu_metrics->average_dclk1_frequency =
gpu_metrics->average_dclk1_frequency = use_metrics_v3 ? metrics_v3->AverageDclk1Frequency :
use_metrics_v2 ? metrics_v2->AverageDclk1Frequency : metrics->AverageDclk1Frequency;
gpu_metrics->current_gfxclk =
gpu_metrics->current_gfxclk = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_GFXCLK] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_GFXCLK] : metrics->CurrClock[PPCLK_GFXCLK];
gpu_metrics->current_socclk =
gpu_metrics->current_socclk = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_SOCCLK] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_SOCCLK] : metrics->CurrClock[PPCLK_SOCCLK];
gpu_metrics->current_uclk =
gpu_metrics->current_uclk = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_UCLK] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_UCLK] : metrics->CurrClock[PPCLK_UCLK];
gpu_metrics->current_vclk0 =
gpu_metrics->current_vclk0 = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_VCLK_0] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_VCLK_0] : metrics->CurrClock[PPCLK_VCLK_0];
gpu_metrics->current_dclk0 =
gpu_metrics->current_dclk0 = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_DCLK_0] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_0] : metrics->CurrClock[PPCLK_DCLK_0];
gpu_metrics->current_vclk1 =
gpu_metrics->current_vclk1 = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_VCLK_1] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_VCLK_1] : metrics->CurrClock[PPCLK_VCLK_1];
gpu_metrics->current_dclk1 =
gpu_metrics->current_dclk1 = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_DCLK_1] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_1] : metrics->CurrClock[PPCLK_DCLK_1];
gpu_metrics->throttle_status = sienna_cichlid_get_throttler_status_locked(smu);
@ -3743,12 +3799,15 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu,
smu_cmn_get_indep_throttler_status(gpu_metrics->throttle_status,
sienna_cichlid_throttler_map);
gpu_metrics->current_fan_speed = use_metrics_v2 ? metrics_v2->CurrFanSpeed : metrics->CurrFanSpeed;
gpu_metrics->current_fan_speed = use_metrics_v3 ? metrics_v3->CurrFanSpeed :
use_metrics_v2 ? metrics_v2->CurrFanSpeed : metrics->CurrFanSpeed;
if (((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && smu->smc_fw_version > 0x003A1E00) ||
((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 11)) && smu->smc_fw_version > 0x00410400)) {
gpu_metrics->pcie_link_width = use_metrics_v2 ? metrics_v2->PcieWidth : metrics->PcieWidth;
gpu_metrics->pcie_link_speed = link_speed[use_metrics_v2 ? metrics_v2->PcieRate : metrics->PcieRate];
gpu_metrics->pcie_link_width = use_metrics_v3 ? metrics_v3->PcieWidth :
use_metrics_v2 ? metrics_v2->PcieWidth : metrics->PcieWidth;
gpu_metrics->pcie_link_speed = link_speed[use_metrics_v3 ? metrics_v3->PcieRate :
use_metrics_v2 ? metrics_v2->PcieRate : metrics->PcieRate];
} else {
gpu_metrics->pcie_link_width =
smu_v11_0_get_current_pcie_link_width(smu);

View File

@ -752,6 +752,7 @@ int smu_v13_0_gfx_off_control(struct smu_context *smu, bool enable)
case IP_VERSION(13, 0, 1):
case IP_VERSION(13, 0, 3):
case IP_VERSION(13, 0, 5):
case IP_VERSION(13, 0, 8):
if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
return 0;
if (enable)

View File

@ -463,6 +463,7 @@ enum kfd_smi_event {
};
#define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
#define KFD_SMI_EVENT_MSG_SIZE 96
struct kfd_ioctl_smi_events_args {
__u32 gpuid; /* to KFD */