mirror of
https://github.com/torvalds/linux.git
synced 2024-11-27 14:41:39 +00:00
drm/amdgpu: add RAS page retirement functions for MCA
Define page retirement functions for MCA platform. v2: remove page retirement handling from MCA poison handler, let MCA notifier do page retirement. v3: remove specific poison handler for MCA to simplify code. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
2513574853
commit
cbe4d43ea5
@ -22,6 +22,59 @@
|
||||
*/
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "umc_v6_7.h"
|
||||
|
||||
static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev,
|
||||
struct ras_err_data *err_data, uint64_t err_addr,
|
||||
uint32_t ch_inst, uint32_t umc_inst)
|
||||
{
|
||||
switch (adev->ip_versions[UMC_HWIP][0]) {
|
||||
case IP_VERSION(6, 7, 0):
|
||||
umc_v6_7_convert_error_address(adev,
|
||||
err_data, err_addr, ch_inst, umc_inst);
|
||||
break;
|
||||
default:
|
||||
dev_warn(adev->dev,
|
||||
"UMC address to Physical address translation is not supported\n");
|
||||
return AMDGPU_RAS_FAIL;
|
||||
}
|
||||
|
||||
return AMDGPU_RAS_SUCCESS;
|
||||
}
|
||||
|
||||
int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
|
||||
uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst)
|
||||
{
|
||||
struct ras_err_data err_data = {0, 0, 0, NULL};
|
||||
int ret = AMDGPU_RAS_FAIL;
|
||||
|
||||
err_data.err_addr =
|
||||
kcalloc(adev->umc.max_ras_err_cnt_per_query,
|
||||
sizeof(struct eeprom_table_record), GFP_KERNEL);
|
||||
if (!err_data.err_addr) {
|
||||
dev_warn(adev->dev,
|
||||
"Failed to alloc memory for umc error record in MCA notifier!\n");
|
||||
return AMDGPU_RAS_FAIL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Translate UMC channel address to Physical address
|
||||
*/
|
||||
ret = amdgpu_umc_convert_error_address(adev, &err_data, err_addr,
|
||||
ch_inst, umc_inst);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (amdgpu_bad_page_threshold != 0) {
|
||||
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
|
||||
err_data.err_addr_cnt);
|
||||
amdgpu_ras_save_bad_pages(adev);
|
||||
}
|
||||
|
||||
out:
|
||||
kfree(err_data.err_addr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
|
||||
void *ras_error_status,
|
||||
|
@ -98,4 +98,6 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
|
||||
int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
|
||||
void *ras_error_status,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
|
||||
uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst);
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user