diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index d8b6a2a4b210..fb30b7de4aab 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -735,11 +735,10 @@ static void cs_timedout(struct work_struct *work) hdev = cs->ctx->hdev; /* Save only the first CS timeout parameters */ - rc = atomic_cmpxchg(&hdev->last_error.cs_write_disable, 0, 1); + rc = atomic_cmpxchg(&hdev->last_error.cs_timeout.write_disable, 0, 1); if (!rc) { - hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime; - hdev->last_error.cs_timeout_timestamp = ktime_get(); - hdev->last_error.cs_timeout_seq = cs->sequence; + hdev->last_error.cs_timeout.timestamp = ktime_get(); + hdev->last_error.cs_timeout.seq = cs->sequence; } switch (cs->type) { diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 99cbed5ef0e6..b0b0f3f89865 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2568,37 +2568,50 @@ struct hl_clk_throttle { }; /** - * struct last_error_session_info - info about last session in which CS timeout or - * razwi error occurred. - * @open_dev_timestamp: device open timestamp. - * @cs_timeout_timestamp: CS timeout timestamp. - * @razwi_timestamp: razwi timestamp. - * @cs_write_disable: if set writing to CS parameters in the structure is disabled so the - * first (root cause) CS timeout will not be overwritten. - * @razwi_write_disable: if set writing to razwi parameters in the structure is disabled so the - * first (root cause) razwi will not be overwritten. - * @cs_timeout_seq: CS timeout sequence number. - * @razwi_addr: address that caused razwi. - * @razwi_engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does - * not have engine id it will be set to U16_MAX. - * @razwi_engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible - * engines which one them caused the razwi. In that case, it will contain the - * second possible engine id, otherwise it will be set to U16_MAX. - * @razwi_non_engine_initiator: in case the initiator of the razwi does not have engine id. - * @razwi_type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX. + * struct cs_timeout_info - info of last CS timeout occurred. + * @timestamp: CS timeout timestamp. + * @write_disable: if set writing to CS parameters in the structure is disabled so, + * the first (root cause) CS timeout will not be overwritten. + * @seq: CS timeout sequence number. + */ +struct cs_timeout_info { + ktime_t timestamp; + atomic_t write_disable; + u64 seq; +}; + +/** + * struct razwi_info - info about last razwi error occurred. + * @timestamp: razwi timestamp. + * @write_disable: if set writing to razwi parameters in the structure is disabled so the + * first (root cause) razwi will not be overwritten. + * @addr: address that caused razwi. + * @engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does + * not have engine id it will be set to U16_MAX. + * @engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible + * engines which one them caused the razwi. In that case, it will contain the + * second possible engine id, otherwise it will be set to U16_MAX. + * @non_engine_initiator: in case the initiator of the razwi does not have engine id. + * @type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX. + */ +struct razwi_info { + ktime_t timestamp; + atomic_t write_disable; + u64 addr; + u16 engine_id_1; + u16 engine_id_2; + u8 non_engine_initiator; + u8 type; +}; + +/** + * struct last_error_session_info - info about last session errors occurred. + * @cs_timeout: CS timeout error last information. + * @razwi: razwi last information. */ struct last_error_session_info { - ktime_t open_dev_timestamp; - ktime_t cs_timeout_timestamp; - ktime_t razwi_timestamp; - atomic_t cs_write_disable; - atomic_t razwi_write_disable; - u64 cs_timeout_seq; - u64 razwi_addr; - u16 razwi_engine_id_1; - u16 razwi_engine_id_2; - u8 razwi_non_engine_initiator; - u8 razwi_type; + struct cs_timeout_info cs_timeout; + struct razwi_info razwi; }; /** diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 9ead0927208d..37edb69a7255 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -195,8 +195,8 @@ int hl_device_open(struct inode *inode, struct file *filp) hl_debugfs_add_file(hpriv); - atomic_set(&hdev->last_error.cs_write_disable, 0); - atomic_set(&hdev->last_error.razwi_write_disable, 0); + atomic_set(&hdev->last_error.cs_timeout.write_disable, 0); + atomic_set(&hdev->last_error.razwi.write_disable, 0); hdev->open_counter++; hdev->last_successful_open_jif = jiffies; diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 8fd2b427863f..c7864d6bb0a1 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -569,7 +569,7 @@ static int last_err_open_dev_info(struct hl_fpriv *hpriv, struct hl_info_args *a if ((!max_size) || (!out)) return -EINVAL; - info.timestamp = ktime_to_ns(hdev->last_error.open_dev_timestamp); + info.timestamp = ktime_to_ns(hdev->last_successful_open_ktime); return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } @@ -584,8 +584,8 @@ static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - info.seq = hdev->last_error.cs_timeout_seq; - info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout_timestamp); + info.seq = hdev->last_error.cs_timeout.seq; + info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout.timestamp); return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } @@ -600,12 +600,12 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - info.timestamp = ktime_to_ns(hdev->last_error.razwi_timestamp); - info.addr = hdev->last_error.razwi_addr; - info.engine_id_1 = hdev->last_error.razwi_engine_id_1; - info.engine_id_2 = hdev->last_error.razwi_engine_id_2; - info.no_engine_id = hdev->last_error.razwi_non_engine_initiator; - info.error_type = hdev->last_error.razwi_type; + info.timestamp = ktime_to_ns(hdev->last_error.razwi.timestamp); + info.addr = hdev->last_error.razwi.addr; + info.engine_id_1 = hdev->last_error.razwi.engine_id_1; + info.engine_id_2 = hdev->last_error.razwi.engine_id_2; + info.no_engine_id = hdev->last_error.razwi.non_engine_initiator; + info.error_type = hdev->last_error.razwi.type; return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 96a83317b302..fba322241096 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7404,19 +7404,18 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type); /* In case it's the first razwi, save its parameters*/ - rc = atomic_cmpxchg(&hdev->last_error.razwi_write_disable, 0, 1); + rc = atomic_cmpxchg(&hdev->last_error.razwi.write_disable, 0, 1); if (!rc) { - hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime; - hdev->last_error.razwi_timestamp = ktime_get(); - hdev->last_error.razwi_addr = razwi_addr; - hdev->last_error.razwi_engine_id_1 = engine_id_1; - hdev->last_error.razwi_engine_id_2 = engine_id_2; + hdev->last_error.razwi.timestamp = ktime_get(); + hdev->last_error.razwi.addr = razwi_addr; + hdev->last_error.razwi.engine_id_1 = engine_id_1; + hdev->last_error.razwi.engine_id_2 = engine_id_2; /* * If first engine id holds non valid value the razwi initiator * does not have engine id */ - hdev->last_error.razwi_non_engine_initiator = (engine_id_1 == U16_MAX); - hdev->last_error.razwi_type = razwi_type; + hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX); + hdev->last_error.razwi.type = razwi_type; } }