habanalabs/gaudi: add support for NIC DERR

We add support for NIC DERR ECC error events, in case this error
is received a device reset will be performed.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Ofir Bitton 2021-06-17 09:52:55 +03:00 committed by Oded Gabbay
parent 3817b352aa
commit 6c31f494d8
3 changed files with 16 additions and 5 deletions

View File

@ -7870,6 +7870,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
case GAUDI_EVENT_MMU_DERR:
case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
gaudi_print_irq_info(hdev, event_type, true);
gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
goto reset_device;

View File

@ -252,6 +252,11 @@ enum gaudi_async_event_id {
GAUDI_EVENT_HBM3_SPI_0 = 407,
GAUDI_EVENT_HBM3_SPI_1 = 408,
GAUDI_EVENT_PSOC_GPIO_U16_0 = 421,
GAUDI_EVENT_NIC0_CS_DBG_DERR = 483,
GAUDI_EVENT_NIC1_CS_DBG_DERR = 487,
GAUDI_EVENT_NIC2_CS_DBG_DERR = 491,
GAUDI_EVENT_NIC3_CS_DBG_DERR = 495,
GAUDI_EVENT_NIC4_CS_DBG_DERR = 499,
GAUDI_EVENT_RAZWI_OR_ADC = 548,
GAUDI_EVENT_TPC0_QM = 572,
GAUDI_EVENT_TPC1_QM = 573,

View File

@ -507,23 +507,28 @@ static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = {
{ .fc_id = 480, .cpu_id = 329, .valid = 0, .name = "" },
{ .fc_id = 481, .cpu_id = 330, .valid = 0, .name = "" },
{ .fc_id = 482, .cpu_id = 331, .valid = 0, .name = "" },
{ .fc_id = 483, .cpu_id = 332, .valid = 0, .name = "" },
{ .fc_id = 483, .cpu_id = 332, .valid = 1,
.name = "NIC0_CS_DBG_DERR" },
{ .fc_id = 484, .cpu_id = 333, .valid = 0, .name = "" },
{ .fc_id = 485, .cpu_id = 334, .valid = 0, .name = "" },
{ .fc_id = 486, .cpu_id = 335, .valid = 0, .name = "" },
{ .fc_id = 487, .cpu_id = 336, .valid = 0, .name = "" },
{ .fc_id = 487, .cpu_id = 336, .valid = 1,
.name = "NIC1_CS_DBG_DERR" },
{ .fc_id = 488, .cpu_id = 337, .valid = 0, .name = "" },
{ .fc_id = 489, .cpu_id = 338, .valid = 0, .name = "" },
{ .fc_id = 490, .cpu_id = 339, .valid = 0, .name = "" },
{ .fc_id = 491, .cpu_id = 340, .valid = 0, .name = "" },
{ .fc_id = 491, .cpu_id = 340, .valid = 1,
.name = "NIC2_CS_DBG_DERR" },
{ .fc_id = 492, .cpu_id = 341, .valid = 0, .name = "" },
{ .fc_id = 493, .cpu_id = 342, .valid = 0, .name = "" },
{ .fc_id = 494, .cpu_id = 343, .valid = 0, .name = "" },
{ .fc_id = 495, .cpu_id = 344, .valid = 0, .name = "" },
{ .fc_id = 495, .cpu_id = 344, .valid = 1,
.name = "NIC3_CS_DBG_DERR" },
{ .fc_id = 496, .cpu_id = 345, .valid = 0, .name = "" },
{ .fc_id = 497, .cpu_id = 346, .valid = 0, .name = "" },
{ .fc_id = 498, .cpu_id = 347, .valid = 0, .name = "" },
{ .fc_id = 499, .cpu_id = 348, .valid = 0, .name = "" },
{ .fc_id = 499, .cpu_id = 348, .valid = 1,
.name = "NIC4_CS_DBG_DERR" },
{ .fc_id = 500, .cpu_id = 349, .valid = 0, .name = "" },
{ .fc_id = 501, .cpu_id = 350, .valid = 0, .name = "" },
{ .fc_id = 502, .cpu_id = 351, .valid = 0, .name = "" },