drm/amdkfd: fix set kfd node ras properties value
The ctx->features are new RAS implementation which is only available for Vega20 and onwards, it is not available for vega10, vega10 should follow legacy ECC implementation. Changed from V1: wrap function to initialize kfd node properties Changed from V2: remove wrap function and SDMA SRAM ECC check Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com> Reviewed-by: Guchun Chen <guchun.chen@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
1887544d4d
commit
5436ab94cd
@ -986,6 +986,7 @@ struct amdgpu_device {
|
||||
|
||||
atomic_t throttling_logging_enabled;
|
||||
struct ratelimit_state throttling_logging_rs;
|
||||
uint32_t ras_features;
|
||||
};
|
||||
|
||||
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
|
||||
|
@ -1963,6 +1963,17 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_ras_check_asic_type(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->asic_type != CHIP_VEGA10 &&
|
||||
adev->asic_type != CHIP_VEGA20 &&
|
||||
adev->asic_type != CHIP_ARCTURUS &&
|
||||
adev->asic_type != CHIP_SIENNA_CICHLID)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* check hardware's ras ability which will be saved in hw_supported.
|
||||
* if hardware does not support ras, we can skip some ras initializtion and
|
||||
@ -1979,9 +1990,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
|
||||
*supported = 0;
|
||||
|
||||
if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
|
||||
(adev->asic_type != CHIP_VEGA20 &&
|
||||
adev->asic_type != CHIP_ARCTURUS &&
|
||||
adev->asic_type != CHIP_SIENNA_CICHLID))
|
||||
amdgpu_ras_check_asic_type(adev))
|
||||
return;
|
||||
|
||||
if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
|
||||
@ -2003,6 +2012,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
|
||||
|
||||
*supported = amdgpu_ras_enable == 0 ?
|
||||
0 : *hw_supported & amdgpu_ras_mask;
|
||||
adev->ras_features = *supported;
|
||||
}
|
||||
|
||||
int amdgpu_ras_init(struct amdgpu_device *adev)
|
||||
@ -2025,9 +2035,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
|
||||
|
||||
amdgpu_ras_check_supported(adev, &con->hw_supported,
|
||||
&con->supported);
|
||||
if (!con->hw_supported) {
|
||||
if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) {
|
||||
r = 0;
|
||||
goto err_out;
|
||||
goto release_con;
|
||||
}
|
||||
|
||||
con->features = 0;
|
||||
@ -2038,25 +2048,25 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
|
||||
if (adev->nbio.funcs->init_ras_controller_interrupt) {
|
||||
r = adev->nbio.funcs->init_ras_controller_interrupt(adev);
|
||||
if (r)
|
||||
goto err_out;
|
||||
goto release_con;
|
||||
}
|
||||
|
||||
if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) {
|
||||
r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev);
|
||||
if (r)
|
||||
goto err_out;
|
||||
goto release_con;
|
||||
}
|
||||
|
||||
if (amdgpu_ras_fs_init(adev)) {
|
||||
r = -EINVAL;
|
||||
goto err_out;
|
||||
goto release_con;
|
||||
}
|
||||
|
||||
dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
|
||||
"hardware ability[%x] ras_mask[%x]\n",
|
||||
con->hw_supported, con->supported);
|
||||
return 0;
|
||||
err_out:
|
||||
release_con:
|
||||
amdgpu_ras_set_context(adev, NULL);
|
||||
kfree(con);
|
||||
|
||||
|
@ -1239,7 +1239,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||
void *crat_image = NULL;
|
||||
size_t image_size = 0;
|
||||
int proximity_domain;
|
||||
struct amdgpu_ras *ctx;
|
||||
struct amdgpu_device *adev;
|
||||
|
||||
INIT_LIST_HEAD(&temp_topology_device_list);
|
||||
|
||||
@ -1404,19 +1404,17 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||
dev->node_props.max_waves_per_simd = 10;
|
||||
}
|
||||
|
||||
ctx = amdgpu_ras_get_context((struct amdgpu_device *)(dev->gpu->kgd));
|
||||
if (ctx) {
|
||||
/* kfd only concerns sram ecc on GFX/SDMA and HBM ecc on UMC */
|
||||
dev->node_props.capability |=
|
||||
(((ctx->features & BIT(AMDGPU_RAS_BLOCK__SDMA)) != 0) ||
|
||||
((ctx->features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0)) ?
|
||||
HSA_CAP_SRAM_EDCSUPPORTED : 0;
|
||||
dev->node_props.capability |= ((ctx->features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
|
||||
HSA_CAP_MEM_EDCSUPPORTED : 0;
|
||||
adev = (struct amdgpu_device *)(dev->gpu->kgd);
|
||||
/* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
|
||||
dev->node_props.capability |=
|
||||
((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
|
||||
HSA_CAP_SRAM_EDCSUPPORTED : 0;
|
||||
dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
|
||||
HSA_CAP_MEM_EDCSUPPORTED : 0;
|
||||
|
||||
dev->node_props.capability |= (ctx->features != 0) ?
|
||||
if (adev->asic_type != CHIP_VEGA10)
|
||||
dev->node_props.capability |= (adev->ras_features != 0) ?
|
||||
HSA_CAP_RASEVENTNOTIFY : 0;
|
||||
}
|
||||
|
||||
kfd_debug_print_topology();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user