bnxt_en: implement hw health reporter
This reporter will report NVM errors which are non-fatal. When we receive these NVM error events, we'll report it through this new hw health reporter. Reviewed-by: Edwin Peer <edwin.peer@broadcom.com> Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> Signed-off-by: Michael Chan <michael.chan@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
f16a916928
commit
bafed3f231
@ -2061,6 +2061,22 @@ static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
|
||||
case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD:
|
||||
netdev_warn(bp->dev, "One or more MMIO doorbells dropped by the device!\n");
|
||||
break;
|
||||
case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM: {
|
||||
struct bnxt_hw_health *hw_health = &bp->hw_health;
|
||||
|
||||
hw_health->nvm_err_address = EVENT_DATA2_NVM_ERR_ADDR(data2);
|
||||
if (EVENT_DATA1_NVM_ERR_TYPE_WRITE(data1)) {
|
||||
hw_health->synd = BNXT_HW_STATUS_NVM_WRITE_ERR;
|
||||
hw_health->nvm_write_errors++;
|
||||
} else if (EVENT_DATA1_NVM_ERR_TYPE_ERASE(data1)) {
|
||||
hw_health->synd = BNXT_HW_STATUS_NVM_ERASE_ERR;
|
||||
hw_health->nvm_erase_errors++;
|
||||
} else {
|
||||
hw_health->synd = BNXT_HW_STATUS_NVM_UNKNOWN_ERR;
|
||||
}
|
||||
set_bit(BNXT_FW_NVM_ERR_SP_EVENT, &bp->sp_event);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
netdev_err(bp->dev, "FW reported unknown error type %u\n",
|
||||
err_type);
|
||||
@ -11887,6 +11903,9 @@ static void bnxt_sp_task(struct work_struct *work)
|
||||
if (test_and_clear_bit(BNXT_FW_ECHO_REQUEST_SP_EVENT, &bp->sp_event))
|
||||
bnxt_fw_echo_reply(bp);
|
||||
|
||||
if (test_and_clear_bit(BNXT_FW_NVM_ERR_SP_EVENT, &bp->sp_event))
|
||||
bnxt_devlink_health_hw_report(bp);
|
||||
|
||||
/* These functions below will clear BNXT_STATE_IN_SP_TASK. They
|
||||
* must be the last functions to be called before exiting.
|
||||
*/
|
||||
|
@ -516,6 +516,21 @@ struct rx_tpa_end_cmp_ext {
|
||||
ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA2_PIN_ID_MASK) >>\
|
||||
ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA2_PIN_ID_SFT)
|
||||
|
||||
#define EVENT_DATA2_NVM_ERR_ADDR(data2) \
|
||||
(((data2) & \
|
||||
ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA2_ERR_ADDR_MASK) >>\
|
||||
ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA2_ERR_ADDR_SFT)
|
||||
|
||||
#define EVENT_DATA1_NVM_ERR_TYPE_WRITE(data1) \
|
||||
(((data1) & \
|
||||
ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_MASK) ==\
|
||||
ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_WRITE)
|
||||
|
||||
#define EVENT_DATA1_NVM_ERR_TYPE_ERASE(data1) \
|
||||
(((data1) & \
|
||||
ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_MASK) ==\
|
||||
ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_ERASE)
|
||||
|
||||
struct nqe_cn {
|
||||
__le16 type;
|
||||
#define NQ_CN_TYPE_MASK 0x3fUL
|
||||
@ -1528,6 +1543,21 @@ struct bnxt_ctx_mem_info {
|
||||
struct bnxt_mem_init mem_init[BNXT_CTX_MEM_INIT_MAX];
|
||||
};
|
||||
|
||||
enum bnxt_hw_err {
|
||||
BNXT_HW_STATUS_HEALTHY = 0x0,
|
||||
BNXT_HW_STATUS_NVM_WRITE_ERR = 0x1,
|
||||
BNXT_HW_STATUS_NVM_ERASE_ERR = 0x2,
|
||||
BNXT_HW_STATUS_NVM_UNKNOWN_ERR = 0x3,
|
||||
};
|
||||
|
||||
struct bnxt_hw_health {
|
||||
u32 nvm_err_address;
|
||||
u32 nvm_write_errors;
|
||||
u32 nvm_erase_errors;
|
||||
u8 synd;
|
||||
struct devlink_health_reporter *hw_reporter;
|
||||
};
|
||||
|
||||
enum bnxt_health_severity {
|
||||
SEVERITY_NORMAL = 0,
|
||||
SEVERITY_WARNING,
|
||||
@ -2045,6 +2075,7 @@ struct bnxt {
|
||||
#define BNXT_FW_EXCEPTION_SP_EVENT 19
|
||||
#define BNXT_LINK_CFG_CHANGE_SP_EVENT 21
|
||||
#define BNXT_FW_ECHO_REQUEST_SP_EVENT 23
|
||||
#define BNXT_FW_NVM_ERR_SP_EVENT 25
|
||||
|
||||
struct delayed_work fw_reset_task;
|
||||
int fw_reset_state;
|
||||
@ -2145,6 +2176,8 @@ struct bnxt {
|
||||
struct dentry *debugfs_pdev;
|
||||
struct device *hwmon_dev;
|
||||
enum board_idx board_idx;
|
||||
|
||||
struct bnxt_hw_health hw_health;
|
||||
};
|
||||
|
||||
#define BNXT_NUM_RX_RING_STATS 8
|
||||
|
@ -241,6 +241,69 @@ static const struct devlink_health_reporter_ops bnxt_dl_fw_reporter_ops = {
|
||||
.recover = bnxt_fw_recover,
|
||||
};
|
||||
|
||||
static int bnxt_hw_recover(struct devlink_health_reporter *reporter,
|
||||
void *priv_ctx,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
struct bnxt *bp = devlink_health_reporter_priv(reporter);
|
||||
struct bnxt_hw_health *hw_health = &bp->hw_health;
|
||||
|
||||
hw_health->synd = BNXT_HW_STATUS_HEALTHY;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *hw_err_str(u8 synd)
|
||||
{
|
||||
switch (synd) {
|
||||
case BNXT_HW_STATUS_HEALTHY:
|
||||
return "healthy";
|
||||
case BNXT_HW_STATUS_NVM_WRITE_ERR:
|
||||
return "nvm write error";
|
||||
case BNXT_HW_STATUS_NVM_ERASE_ERR:
|
||||
return "nvm erase error";
|
||||
case BNXT_HW_STATUS_NVM_UNKNOWN_ERR:
|
||||
return "unrecognized nvm error";
|
||||
default:
|
||||
return "unknown hw error";
|
||||
}
|
||||
}
|
||||
|
||||
static int bnxt_hw_diagnose(struct devlink_health_reporter *reporter,
|
||||
struct devlink_fmsg *fmsg,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
struct bnxt *bp = devlink_health_reporter_priv(reporter);
|
||||
struct bnxt_hw_health *h = &bp->hw_health;
|
||||
int rc;
|
||||
|
||||
rc = devlink_fmsg_string_pair_put(fmsg, "Status", hw_err_str(h->synd));
|
||||
if (rc)
|
||||
return rc;
|
||||
rc = devlink_fmsg_u32_pair_put(fmsg, "nvm_write_errors", h->nvm_write_errors);
|
||||
if (rc)
|
||||
return rc;
|
||||
rc = devlink_fmsg_u32_pair_put(fmsg, "nvm_erase_errors", h->nvm_erase_errors);
|
||||
if (rc)
|
||||
return rc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bnxt_devlink_health_hw_report(struct bnxt *bp)
|
||||
{
|
||||
struct bnxt_hw_health *hw_health = &bp->hw_health;
|
||||
|
||||
netdev_warn(bp->dev, "%s reported at address 0x%x\n", hw_err_str(hw_health->synd),
|
||||
hw_health->nvm_err_address);
|
||||
|
||||
devlink_health_report(hw_health->hw_reporter, hw_err_str(hw_health->synd), NULL);
|
||||
}
|
||||
|
||||
static const struct devlink_health_reporter_ops bnxt_dl_hw_reporter_ops = {
|
||||
.name = "hw",
|
||||
.diagnose = bnxt_hw_diagnose,
|
||||
.recover = bnxt_hw_recover,
|
||||
};
|
||||
|
||||
static struct devlink_health_reporter *
|
||||
__bnxt_dl_reporter_create(struct bnxt *bp,
|
||||
const struct devlink_health_reporter_ops *ops)
|
||||
@ -260,6 +323,10 @@ __bnxt_dl_reporter_create(struct bnxt *bp,
|
||||
void bnxt_dl_fw_reporters_create(struct bnxt *bp)
|
||||
{
|
||||
struct bnxt_fw_health *fw_health = bp->fw_health;
|
||||
struct bnxt_hw_health *hw_health = &bp->hw_health;
|
||||
|
||||
if (!hw_health->hw_reporter)
|
||||
hw_health->hw_reporter = __bnxt_dl_reporter_create(bp, &bnxt_dl_hw_reporter_ops);
|
||||
|
||||
if (fw_health && !fw_health->fw_reporter)
|
||||
fw_health->fw_reporter = __bnxt_dl_reporter_create(bp, &bnxt_dl_fw_reporter_ops);
|
||||
@ -268,6 +335,12 @@ void bnxt_dl_fw_reporters_create(struct bnxt *bp)
|
||||
void bnxt_dl_fw_reporters_destroy(struct bnxt *bp)
|
||||
{
|
||||
struct bnxt_fw_health *fw_health = bp->fw_health;
|
||||
struct bnxt_hw_health *hw_health = &bp->hw_health;
|
||||
|
||||
if (hw_health->hw_reporter) {
|
||||
devlink_health_reporter_destroy(hw_health->hw_reporter);
|
||||
hw_health->hw_reporter = NULL;
|
||||
}
|
||||
|
||||
if (fw_health && fw_health->fw_reporter) {
|
||||
devlink_health_reporter_destroy(fw_health->fw_reporter);
|
||||
|
@ -74,6 +74,7 @@ enum bnxt_dl_version_type {
|
||||
void bnxt_devlink_health_fw_report(struct bnxt *bp);
|
||||
void bnxt_dl_health_fw_status_update(struct bnxt *bp, bool healthy);
|
||||
void bnxt_dl_health_fw_recovery_done(struct bnxt *bp);
|
||||
void bnxt_devlink_health_hw_report(struct bnxt *bp);
|
||||
void bnxt_dl_fw_reporters_create(struct bnxt *bp);
|
||||
void bnxt_dl_fw_reporters_destroy(struct bnxt *bp);
|
||||
int bnxt_dl_register(struct bnxt *bp);
|
||||
|
Loading…
Reference in New Issue
Block a user