drm/amdgpu: update algorithm of umc address conversion
On ALDEBARAN, we need to traverse all column bits higher than BIT11(C4C3C2) in a row, the shift of R14 bit should be also taken into account. Retire all pages we find. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
498d46fe7a
commit
e63fa4dcea
@ -119,7 +119,7 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
|
|||||||
uint32_t ch_inst,
|
uint32_t ch_inst,
|
||||||
uint32_t umc_inst)
|
uint32_t umc_inst)
|
||||||
{
|
{
|
||||||
uint64_t mc_umc_status, err_addr, retired_page;
|
uint64_t mc_umc_status, err_addr, soc_pa, retired_page, column;
|
||||||
uint32_t channel_index;
|
uint32_t channel_index;
|
||||||
uint32_t eccinfo_table_idx;
|
uint32_t eccinfo_table_idx;
|
||||||
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||||
@ -145,15 +145,27 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
|
|||||||
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
|
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
|
||||||
|
|
||||||
/* translate umc channel address to soc pa, 3 parts are included */
|
/* translate umc channel address to soc pa, 3 parts are included */
|
||||||
retired_page = ADDR_OF_8KB_BLOCK(err_addr) |
|
soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
|
||||||
ADDR_OF_256B_BLOCK(channel_index) |
|
ADDR_OF_256B_BLOCK(channel_index) |
|
||||||
OFFSET_IN_256B_BLOCK(err_addr);
|
OFFSET_IN_256B_BLOCK(err_addr);
|
||||||
|
/* clear [C4 C3 C2] in soc physical address */
|
||||||
|
soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
|
||||||
|
|
||||||
/* we only save ue error information currently, ce is skipped */
|
/* we only save ue error information currently, ce is skipped */
|
||||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
||||||
== 1)
|
== 1) {
|
||||||
amdgpu_umc_fill_error_record(err_data, err_addr,
|
/* loop for all possibilities of [C4 C3 C2] */
|
||||||
|
for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) {
|
||||||
|
retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT);
|
||||||
|
amdgpu_umc_fill_error_record(err_data, err_addr,
|
||||||
retired_page, channel_index, umc_inst);
|
retired_page, channel_index, umc_inst);
|
||||||
|
|
||||||
|
/* shift R14 bit */
|
||||||
|
retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT);
|
||||||
|
amdgpu_umc_fill_error_record(err_data, err_addr,
|
||||||
|
retired_page, channel_index, umc_inst);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -332,8 +344,9 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
|
|||||||
uint32_t umc_inst)
|
uint32_t umc_inst)
|
||||||
{
|
{
|
||||||
uint32_t mc_umc_status_addr;
|
uint32_t mc_umc_status_addr;
|
||||||
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
|
|
||||||
uint32_t channel_index;
|
uint32_t channel_index;
|
||||||
|
uint64_t mc_umc_status, mc_umc_addrt0;
|
||||||
|
uint64_t err_addr, soc_pa, retired_page, column;
|
||||||
|
|
||||||
mc_umc_status_addr =
|
mc_umc_status_addr =
|
||||||
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
|
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
|
||||||
@ -363,15 +376,27 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
|
|||||||
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
|
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
|
||||||
|
|
||||||
/* translate umc channel address to soc pa, 3 parts are included */
|
/* translate umc channel address to soc pa, 3 parts are included */
|
||||||
retired_page = ADDR_OF_8KB_BLOCK(err_addr) |
|
soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
|
||||||
ADDR_OF_256B_BLOCK(channel_index) |
|
ADDR_OF_256B_BLOCK(channel_index) |
|
||||||
OFFSET_IN_256B_BLOCK(err_addr);
|
OFFSET_IN_256B_BLOCK(err_addr);
|
||||||
|
/* clear [C4 C3 C2] in soc physical address */
|
||||||
|
soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
|
||||||
|
|
||||||
/* we only save ue error information currently, ce is skipped */
|
/* we only save ue error information currently, ce is skipped */
|
||||||
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
|
||||||
== 1)
|
== 1) {
|
||||||
amdgpu_umc_fill_error_record(err_data, err_addr,
|
/* loop for all possibilities of [C4 C3 C2] */
|
||||||
|
for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) {
|
||||||
|
retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT);
|
||||||
|
amdgpu_umc_fill_error_record(err_data, err_addr,
|
||||||
retired_page, channel_index, umc_inst);
|
retired_page, channel_index, umc_inst);
|
||||||
|
|
||||||
|
/* shift R14 bit */
|
||||||
|
retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT);
|
||||||
|
amdgpu_umc_fill_error_record(err_data, err_addr,
|
||||||
|
retired_page, channel_index, umc_inst);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* clear umc status */
|
/* clear umc status */
|
||||||
|
|||||||
@ -45,6 +45,10 @@
|
|||||||
#define UMC_V6_7_NA_MAP_PA_NUM 8
|
#define UMC_V6_7_NA_MAP_PA_NUM 8
|
||||||
/* R14 bit shift should be considered, double the number */
|
/* R14 bit shift should be considered, double the number */
|
||||||
#define UMC_V6_7_BAD_PAGE_NUM_PER_CHANNEL (UMC_V6_7_NA_MAP_PA_NUM * 2)
|
#define UMC_V6_7_BAD_PAGE_NUM_PER_CHANNEL (UMC_V6_7_NA_MAP_PA_NUM * 2)
|
||||||
|
/* The C2 bit in SOC physical address */
|
||||||
|
#define UMC_V6_7_PA_C2_BIT 17
|
||||||
|
/* The R14 bit in SOC physical address */
|
||||||
|
#define UMC_V6_7_PA_R14_BIT 34
|
||||||
/* UMC regiser per channel offset */
|
/* UMC regiser per channel offset */
|
||||||
#define UMC_V6_7_PER_CHANNEL_OFFSET 0x400
|
#define UMC_V6_7_PER_CHANNEL_OFFSET 0x400
|
||||||
extern struct amdgpu_umc_ras umc_v6_7_ras;
|
extern struct amdgpu_umc_ras umc_v6_7_ras;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user