drm/amdgpu: add reset_ras_error_count function for SDMA
SDMA ras error counters are dirty ones after cold reboot Read operation is needed to reset them to 0 Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Guchun Chen <guchun.chen@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
committed by
Alex Deucher
parent
e7429606bb
commit
86153f1be2
@@ -56,6 +56,7 @@ struct amdgpu_sdma_ras_funcs {
|
|||||||
void (*ras_fini)(struct amdgpu_device *adev);
|
void (*ras_fini)(struct amdgpu_device *adev);
|
||||||
int (*query_ras_error_count)(struct amdgpu_device *adev,
|
int (*query_ras_error_count)(struct amdgpu_device *adev,
|
||||||
uint32_t instance, void *ras_error_status);
|
uint32_t instance, void *ras_error_status);
|
||||||
|
void (*reset_ras_error_count)(struct amdgpu_device *adev);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct amdgpu_sdma {
|
struct amdgpu_sdma {
|
||||||
|
|||||||
@@ -1801,13 +1801,9 @@ static int sdma_v4_0_late_init(void *handle)
|
|||||||
struct ras_ih_if ih_info = {
|
struct ras_ih_if ih_info = {
|
||||||
.cb = sdma_v4_0_process_ras_data_cb,
|
.cb = sdma_v4_0_process_ras_data_cb,
|
||||||
};
|
};
|
||||||
int i;
|
|
||||||
|
|
||||||
/* read back edc counter registers to clear the counters */
|
if (adev->sdma.funcs && adev->sdma.funcs->reset_ras_error_count)
|
||||||
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
|
adev->sdma.funcs->reset_ras_error_count(adev);
|
||||||
for (i = 0; i < adev->sdma.num_instances; i++)
|
|
||||||
RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
|
if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
|
||||||
return adev->sdma.funcs->ras_late_init(adev, &ih_info);
|
return adev->sdma.funcs->ras_late_init(adev, &ih_info);
|
||||||
@@ -2572,10 +2568,22 @@ static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
|
|||||||
return 0;
|
return 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* read back edc counter registers to clear the counters */
|
||||||
|
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
|
||||||
|
for (i = 0; i < adev->sdma.num_instances; i++)
|
||||||
|
RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = {
|
static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = {
|
||||||
.ras_late_init = amdgpu_sdma_ras_late_init,
|
.ras_late_init = amdgpu_sdma_ras_late_init,
|
||||||
.ras_fini = amdgpu_sdma_ras_fini,
|
.ras_fini = amdgpu_sdma_ras_fini,
|
||||||
.query_ras_error_count = sdma_v4_0_query_ras_error_count,
|
.query_ras_error_count = sdma_v4_0_query_ras_error_count,
|
||||||
|
.reset_ras_error_count = sdma_v4_0_reset_ras_error_count,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
|
static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
|
||||||
|
|||||||
Reference in New Issue
Block a user