From d41f26b5ef8fb4d5ae6f9b51526eefa62ec53348 Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Tue, 2 Mar 2021 10:12:06 -0800 Subject: [PATCH 01/15] ice: use kernel definitions for IANA protocol ports and ether-types The well-known IANA protocol port 3260 (iscsi-target 0x0cbc) and the ether-types 0x8906 (ETH_P_FCOE) and 0x8914 (ETH_P_FIP) are already defined in kernel header files. Use those definitions instead of open-coding the same. Signed-off-by: Bruce Allan Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 3 +++ drivers/net/ethernet/intel/ice/ice_dcb.c | 8 ++++---- drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 2 +- drivers/net/ethernet/intel/ice/ice_type.h | 3 --- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 07777ac4f098..dd2e75d15558 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -44,6 +44,9 @@ #include #include #include +#if IS_ENABLED(CONFIG_DCB) +#include +#endif /* CONFIG_DCB */ #include "ice_devids.h" #include "ice_type.h" #include "ice_txrx.h" diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index 43c6af42de8a..34c1aba050b8 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -804,7 +804,7 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FCOE_M; ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FCOE_S; ice_app_sel_type = ICE_APP_SEL_ETHTYPE; - ice_app_prot_id_type = ICE_APP_PROT_ID_FCOE; + ice_app_prot_id_type = ETH_P_FCOE; } else if (i == 1) { /* iSCSI APP */ ice_aqc_cee_status_mask = ICE_AQC_CEE_ISCSI_STATUS_M; @@ -812,14 +812,14 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_ISCSI_M; ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_ISCSI_S; ice_app_sel_type = ICE_APP_SEL_TCPIP; - ice_app_prot_id_type = ICE_APP_PROT_ID_ISCSI; + ice_app_prot_id_type = ISCSI_LISTEN_PORT; for (j = 0; j < cmp_dcbcfg->numapps; j++) { u16 prot_id = cmp_dcbcfg->app[j].prot_id; u8 sel = cmp_dcbcfg->app[j].selector; if (sel == ICE_APP_SEL_TCPIP && - (prot_id == ICE_APP_PROT_ID_ISCSI || + (prot_id == ISCSI_LISTEN_PORT || prot_id == ICE_APP_PROT_ID_ISCSI_860)) { ice_app_prot_id_type = prot_id; break; @@ -832,7 +832,7 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FIP_M; ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FIP_S; ice_app_sel_type = ICE_APP_SEL_ETHTYPE; - ice_app_prot_id_type = ICE_APP_PROT_ID_FIP; + ice_app_prot_id_type = ETH_P_FIP; } status = (tlv_status & ice_aqc_cee_status_mask) >> diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 1e8f71ffc8ce..df02cffdf209 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -563,7 +563,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked) dcbcfg->numapps = 1; dcbcfg->app[0].selector = ICE_APP_SEL_ETHTYPE; dcbcfg->app[0].priority = 3; - dcbcfg->app[0].prot_id = ICE_APP_PROT_ID_FCOE; + dcbcfg->app[0].prot_id = ETH_P_FCOE; ret = ice_pf_dcb_cfg(pf, dcbcfg, locked); kfree(dcbcfg); diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 7ead1c13f16f..9b80962ff92f 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -551,10 +551,7 @@ struct ice_dcb_app_priority_table { #define ICE_TLV_STATUS_OPER 0x1 #define ICE_TLV_STATUS_SYNC 0x2 #define ICE_TLV_STATUS_ERR 0x4 -#define ICE_APP_PROT_ID_FCOE 0x8906 -#define ICE_APP_PROT_ID_ISCSI 0x0cbc #define ICE_APP_PROT_ID_ISCSI_860 0x035c -#define ICE_APP_PROT_ID_FIP 0x8914 #define ICE_APP_SEL_ETHTYPE 0x1 #define ICE_APP_SEL_TCPIP 0x2 #define ICE_CEE_APP_SEL_ETHTYPE 0x0 From 7e408e07b42dceba4bc6630ff9ce9a55fcb043e0 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 2 Mar 2021 10:15:38 -0800 Subject: [PATCH 02/15] ice: Drop leading underscores in enum ice_pf_state Remove the leading underscores in enum ice_pf_state. This is not really communicating anything and is unnecessary. No functional change. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 68 ++--- drivers/net/ethernet/intel/ice/ice_ethtool.c | 18 +- .../net/ethernet/intel/ice/ice_ethtool_fdir.c | 2 +- drivers/net/ethernet/intel/ice/ice_lib.c | 18 +- drivers/net/ethernet/intel/ice/ice_main.c | 234 +++++++++--------- .../ethernet/intel/ice/ice_virtchnl_fdir.c | 6 +- .../net/ethernet/intel/ice/ice_virtchnl_pf.c | 22 +- drivers/net/ethernet/intel/ice/ice_xsk.c | 6 +- 8 files changed, 187 insertions(+), 187 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index dd2e75d15558..2cb09af3148c 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -197,45 +197,45 @@ struct ice_sw { }; enum ice_pf_state { - __ICE_TESTING, - __ICE_DOWN, - __ICE_NEEDS_RESTART, - __ICE_PREPARED_FOR_RESET, /* set by driver when prepared */ - __ICE_RESET_OICR_RECV, /* set by driver after rcv reset OICR */ - __ICE_PFR_REQ, /* set by driver and peers */ - __ICE_CORER_REQ, /* set by driver and peers */ - __ICE_GLOBR_REQ, /* set by driver and peers */ - __ICE_CORER_RECV, /* set by OICR handler */ - __ICE_GLOBR_RECV, /* set by OICR handler */ - __ICE_EMPR_RECV, /* set by OICR handler */ - __ICE_SUSPENDED, /* set on module remove path */ - __ICE_RESET_FAILED, /* set by reset/rebuild */ + ICE_TESTING, + ICE_DOWN, + ICE_NEEDS_RESTART, + ICE_PREPARED_FOR_RESET, /* set by driver when prepared */ + ICE_RESET_OICR_RECV, /* set by driver after rcv reset OICR */ + ICE_PFR_REQ, /* set by driver and peers */ + ICE_CORER_REQ, /* set by driver and peers */ + ICE_GLOBR_REQ, /* set by driver and peers */ + ICE_CORER_RECV, /* set by OICR handler */ + ICE_GLOBR_RECV, /* set by OICR handler */ + ICE_EMPR_RECV, /* set by OICR handler */ + ICE_SUSPENDED, /* set on module remove path */ + ICE_RESET_FAILED, /* set by reset/rebuild */ /* When checking for the PF to be in a nominal operating state, the * bits that are grouped at the beginning of the list need to be - * checked. Bits occurring before __ICE_STATE_NOMINAL_CHECK_BITS will + * checked. Bits occurring before ICE_STATE_NOMINAL_CHECK_BITS will * be checked. If you need to add a bit into consideration for nominal * operating state, it must be added before - * __ICE_STATE_NOMINAL_CHECK_BITS. Do not move this entry's position + * ICE_STATE_NOMINAL_CHECK_BITS. Do not move this entry's position * without appropriate consideration. */ - __ICE_STATE_NOMINAL_CHECK_BITS, - __ICE_ADMINQ_EVENT_PENDING, - __ICE_MAILBOXQ_EVENT_PENDING, - __ICE_MDD_EVENT_PENDING, - __ICE_VFLR_EVENT_PENDING, - __ICE_FLTR_OVERFLOW_PROMISC, - __ICE_VF_DIS, - __ICE_CFG_BUSY, - __ICE_SERVICE_SCHED, - __ICE_SERVICE_DIS, - __ICE_FD_FLUSH_REQ, - __ICE_OICR_INTR_DIS, /* Global OICR interrupt disabled */ - __ICE_MDD_VF_PRINT_PENDING, /* set when MDD event handle */ - __ICE_VF_RESETS_DISABLED, /* disable resets during ice_remove */ - __ICE_LINK_DEFAULT_OVERRIDE_PENDING, - __ICE_PHY_INIT_COMPLETE, - __ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */ - __ICE_STATE_NBITS /* must be last */ + ICE_STATE_NOMINAL_CHECK_BITS, + ICE_ADMINQ_EVENT_PENDING, + ICE_MAILBOXQ_EVENT_PENDING, + ICE_MDD_EVENT_PENDING, + ICE_VFLR_EVENT_PENDING, + ICE_FLTR_OVERFLOW_PROMISC, + ICE_VF_DIS, + ICE_CFG_BUSY, + ICE_SERVICE_SCHED, + ICE_SERVICE_DIS, + ICE_FD_FLUSH_REQ, + ICE_OICR_INTR_DIS, /* Global OICR interrupt disabled */ + ICE_MDD_VF_PRINT_PENDING, /* set when MDD event handle */ + ICE_VF_RESETS_DISABLED, /* disable resets during ice_remove */ + ICE_LINK_DEFAULT_OVERRIDE_PENDING, + ICE_PHY_INIT_COMPLETE, + ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */ + ICE_STATE_NBITS /* must be last */ }; enum ice_vsi_state { @@ -421,7 +421,7 @@ struct ice_pf { u16 num_msix_per_vf; /* used to ratelimit the MDD event logging */ unsigned long last_printed_mdd_jiffies; - DECLARE_BITMAP(state, __ICE_STATE_NBITS); + DECLARE_BITMAP(state, ICE_STATE_NBITS); DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS); unsigned long *avail_txqs; /* bitmap to track PF Tx queue usage */ unsigned long *avail_rxqs; /* bitmap to track PF Rx queue usage */ diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index a39e890100d9..f2bc8f1e86cc 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -806,7 +806,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, if (eth_test->flags == ETH_TEST_FL_OFFLINE) { netdev_info(netdev, "offline testing starting\n"); - set_bit(__ICE_TESTING, pf->state); + set_bit(ICE_TESTING, pf->state); if (ice_active_vfs(pf)) { dev_warn(dev, "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n"); @@ -816,7 +816,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, data[ICE_ETH_TEST_LOOP] = 1; data[ICE_ETH_TEST_LINK] = 1; eth_test->flags |= ETH_TEST_FL_FAILED; - clear_bit(__ICE_TESTING, pf->state); + clear_bit(ICE_TESTING, pf->state); goto skip_ol_tests; } /* If the device is online then take it offline */ @@ -837,7 +837,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, data[ICE_ETH_TEST_REG]) eth_test->flags |= ETH_TEST_FL_FAILED; - clear_bit(__ICE_TESTING, pf->state); + clear_bit(ICE_TESTING, pf->state); if (if_running) { int status = ice_open(netdev); @@ -1097,7 +1097,7 @@ static int ice_nway_reset(struct net_device *netdev) int err; /* If VSI state is up, then restart autoneg with link up */ - if (!test_bit(__ICE_DOWN, vsi->back->state)) + if (!test_bit(ICE_DOWN, vsi->back->state)) err = ice_set_link(vsi, true); else err = ice_set_link(vsi, false); @@ -2282,7 +2282,7 @@ ice_set_link_ksettings(struct net_device *netdev, goto done; } - while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) { + while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) { timeout--; if (!timeout) { err = -EBUSY; @@ -2392,7 +2392,7 @@ ice_set_link_ksettings(struct net_device *netdev, pi->phy.curr_user_speed_req = adv_link_speed; done: kfree(phy_caps); - clear_bit(__ICE_CFG_BUSY, pf->state); + clear_bit(ICE_CFG_BUSY, pf->state); return err; } @@ -2748,7 +2748,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) if (ice_xsk_any_rx_ring_ena(vsi)) return -EBUSY; - while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) { + while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) { timeout--; if (!timeout) return -EBUSY; @@ -2927,7 +2927,7 @@ free_tx: } done: - clear_bit(__ICE_CFG_BUSY, pf->state); + clear_bit(ICE_CFG_BUSY, pf->state); return err; } @@ -3046,7 +3046,7 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) } /* If we have link and don't have autoneg */ - if (!test_bit(__ICE_DOWN, pf->state) && + if (!test_bit(ICE_DOWN, pf->state) && !(hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) { /* Send message that it might not necessarily work*/ netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n"); diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 440964defa4a..16de603b280c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1452,7 +1452,7 @@ int ice_del_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd) return -EBUSY; } - if (test_bit(__ICE_FD_FLUSH_REQ, pf->state)) + if (test_bit(ICE_FD_FLUSH_REQ, pf->state)) return -EBUSY; mutex_lock(&hw->fdir_fltr_lock); diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 16d0ee5b48a5..162f01b42147 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1502,13 +1502,13 @@ static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi) */ bool ice_pf_state_is_nominal(struct ice_pf *pf) { - DECLARE_BITMAP(check_bits, __ICE_STATE_NBITS) = { 0 }; + DECLARE_BITMAP(check_bits, ICE_STATE_NBITS) = { 0 }; if (!pf) return false; - bitmap_set(check_bits, 0, __ICE_STATE_NOMINAL_CHECK_BITS); - if (bitmap_intersects(pf->state, check_bits, __ICE_STATE_NBITS)) + bitmap_set(check_bits, 0, ICE_STATE_NOMINAL_CHECK_BITS); + if (bitmap_intersects(pf->state, check_bits, ICE_STATE_NBITS)) return false; return true; @@ -2811,7 +2811,7 @@ int ice_vsi_release(struct ice_vsi *vsi) ice_vsi_delete(vsi); ice_vsi_free_q_vectors(vsi); - /* make sure unregister_netdev() was called by checking __ICE_DOWN */ + /* make sure unregister_netdev() was called by checking ICE_DOWN */ if (vsi->netdev && test_bit(ICE_VSI_DOWN, vsi->state)) { free_netdev(vsi->netdev); vsi->netdev = NULL; @@ -3140,7 +3140,7 @@ err_rings: } err_vsi: ice_vsi_clear(vsi); - set_bit(__ICE_RESET_FAILED, pf->state); + set_bit(ICE_RESET_FAILED, pf->state); kfree(coalesce); return ret; } @@ -3151,10 +3151,10 @@ err_vsi: */ bool ice_is_reset_in_progress(unsigned long *state) { - return test_bit(__ICE_RESET_OICR_RECV, state) || - test_bit(__ICE_PFR_REQ, state) || - test_bit(__ICE_CORER_REQ, state) || - test_bit(__ICE_GLOBR_REQ, state); + return test_bit(ICE_RESET_OICR_RECV, state) || + test_bit(ICE_PFR_REQ, state) || + test_bit(ICE_CORER_REQ, state) || + test_bit(ICE_GLOBR_REQ, state); } #ifdef CONFIG_DCB diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 1b2f1e258e5c..032101680e09 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -257,7 +257,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) if (!vsi->netdev) return -EINVAL; - while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) + while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) usleep_range(1000, 2000); changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags; @@ -307,7 +307,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) * space reserved for promiscuous filters. */ if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC && - !test_and_set_bit(__ICE_FLTR_OVERFLOW_PROMISC, + !test_and_set_bit(ICE_FLTR_OVERFLOW_PROMISC, vsi->state)) { promisc_forced_on = true; netdev_warn(netdev, "Reached MAC filter limit, forcing promisc mode on VSI %d\n", @@ -391,7 +391,7 @@ out: set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); exit: - clear_bit(__ICE_CFG_BUSY, vsi->state); + clear_bit(ICE_CFG_BUSY, vsi->state); return err; } @@ -451,7 +451,7 @@ ice_prepare_for_reset(struct ice_pf *pf) unsigned int i; /* already prepared for reset */ - if (test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) + if (test_bit(ICE_PREPARED_FOR_RESET, pf->state)) return; /* Notify VFs of impending reset */ @@ -472,7 +472,7 @@ ice_prepare_for_reset(struct ice_pf *pf) ice_shutdown_all_ctrlq(hw); - set_bit(__ICE_PREPARED_FOR_RESET, pf->state); + set_bit(ICE_PREPARED_FOR_RESET, pf->state); } /** @@ -493,12 +493,12 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) /* trigger the reset */ if (ice_reset(hw, reset_type)) { dev_err(dev, "reset %d failed\n", reset_type); - set_bit(__ICE_RESET_FAILED, pf->state); - clear_bit(__ICE_RESET_OICR_RECV, pf->state); - clear_bit(__ICE_PREPARED_FOR_RESET, pf->state); - clear_bit(__ICE_PFR_REQ, pf->state); - clear_bit(__ICE_CORER_REQ, pf->state); - clear_bit(__ICE_GLOBR_REQ, pf->state); + set_bit(ICE_RESET_FAILED, pf->state); + clear_bit(ICE_RESET_OICR_RECV, pf->state); + clear_bit(ICE_PREPARED_FOR_RESET, pf->state); + clear_bit(ICE_PFR_REQ, pf->state); + clear_bit(ICE_CORER_REQ, pf->state); + clear_bit(ICE_GLOBR_REQ, pf->state); return; } @@ -509,8 +509,8 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) if (reset_type == ICE_RESET_PFR) { pf->pfr_count++; ice_rebuild(pf, reset_type); - clear_bit(__ICE_PREPARED_FOR_RESET, pf->state); - clear_bit(__ICE_PFR_REQ, pf->state); + clear_bit(ICE_PREPARED_FOR_RESET, pf->state); + clear_bit(ICE_PFR_REQ, pf->state); ice_reset_all_vfs(pf, true); } } @@ -526,20 +526,20 @@ static void ice_reset_subtask(struct ice_pf *pf) /* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an * OICR interrupt. The OICR handler (ice_misc_intr) determines what type * of reset is pending and sets bits in pf->state indicating the reset - * type and __ICE_RESET_OICR_RECV. So, if the latter bit is set + * type and ICE_RESET_OICR_RECV. So, if the latter bit is set * prepare for pending reset if not already (for PF software-initiated * global resets the software should already be prepared for it as - * indicated by __ICE_PREPARED_FOR_RESET; for global resets initiated + * indicated by ICE_PREPARED_FOR_RESET; for global resets initiated * by firmware or software on other PFs, that bit is not set so prepare * for the reset now), poll for reset done, rebuild and return. */ - if (test_bit(__ICE_RESET_OICR_RECV, pf->state)) { + if (test_bit(ICE_RESET_OICR_RECV, pf->state)) { /* Perform the largest reset requested */ - if (test_and_clear_bit(__ICE_CORER_RECV, pf->state)) + if (test_and_clear_bit(ICE_CORER_RECV, pf->state)) reset_type = ICE_RESET_CORER; - if (test_and_clear_bit(__ICE_GLOBR_RECV, pf->state)) + if (test_and_clear_bit(ICE_GLOBR_RECV, pf->state)) reset_type = ICE_RESET_GLOBR; - if (test_and_clear_bit(__ICE_EMPR_RECV, pf->state)) + if (test_and_clear_bit(ICE_EMPR_RECV, pf->state)) reset_type = ICE_RESET_EMPR; /* return if no valid reset type requested */ if (reset_type == ICE_RESET_INVAL) @@ -548,7 +548,7 @@ static void ice_reset_subtask(struct ice_pf *pf) /* make sure we are ready to rebuild */ if (ice_check_reset(&pf->hw)) { - set_bit(__ICE_RESET_FAILED, pf->state); + set_bit(ICE_RESET_FAILED, pf->state); } else { /* done with reset. start rebuild */ pf->hw.reset_ongoing = false; @@ -556,11 +556,11 @@ static void ice_reset_subtask(struct ice_pf *pf) /* clear bit to resume normal operations, but * ICE_NEEDS_RESTART bit is set in case rebuild failed */ - clear_bit(__ICE_RESET_OICR_RECV, pf->state); - clear_bit(__ICE_PREPARED_FOR_RESET, pf->state); - clear_bit(__ICE_PFR_REQ, pf->state); - clear_bit(__ICE_CORER_REQ, pf->state); - clear_bit(__ICE_GLOBR_REQ, pf->state); + clear_bit(ICE_RESET_OICR_RECV, pf->state); + clear_bit(ICE_PREPARED_FOR_RESET, pf->state); + clear_bit(ICE_PFR_REQ, pf->state); + clear_bit(ICE_CORER_REQ, pf->state); + clear_bit(ICE_GLOBR_REQ, pf->state); ice_reset_all_vfs(pf, true); } @@ -568,19 +568,19 @@ static void ice_reset_subtask(struct ice_pf *pf) } /* No pending resets to finish processing. Check for new resets */ - if (test_bit(__ICE_PFR_REQ, pf->state)) + if (test_bit(ICE_PFR_REQ, pf->state)) reset_type = ICE_RESET_PFR; - if (test_bit(__ICE_CORER_REQ, pf->state)) + if (test_bit(ICE_CORER_REQ, pf->state)) reset_type = ICE_RESET_CORER; - if (test_bit(__ICE_GLOBR_REQ, pf->state)) + if (test_bit(ICE_GLOBR_REQ, pf->state)) reset_type = ICE_RESET_GLOBR; /* If no valid reset type requested just return */ if (reset_type == ICE_RESET_INVAL) return; /* reset if not already down or busy */ - if (!test_bit(__ICE_DOWN, pf->state) && - !test_bit(__ICE_CFG_BUSY, pf->state)) { + if (!test_bit(ICE_DOWN, pf->state) && + !test_bit(ICE_CFG_BUSY, pf->state)) { ice_do_reset(pf, reset_type); } } @@ -937,8 +937,8 @@ static void ice_watchdog_subtask(struct ice_pf *pf) int i; /* if interface is down do nothing */ - if (test_bit(__ICE_DOWN, pf->state) || - test_bit(__ICE_CFG_BUSY, pf->state)) + if (test_bit(ICE_DOWN, pf->state) || + test_bit(ICE_CFG_BUSY, pf->state)) return; /* make sure we don't do these things too often */ @@ -1182,7 +1182,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) u32 oldval, val; /* Do not clean control queue if/when PF reset fails */ - if (test_bit(__ICE_RESET_FAILED, pf->state)) + if (test_bit(ICE_RESET_FAILED, pf->state)) return 0; switch (q_type) { @@ -1317,13 +1317,13 @@ static void ice_clean_adminq_subtask(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - if (!test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) + if (!test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state)) return; if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN)) return; - clear_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); + clear_bit(ICE_ADMINQ_EVENT_PENDING, pf->state); /* There might be a situation where new messages arrive to a control * queue between processing the last message and clearing the @@ -1344,13 +1344,13 @@ static void ice_clean_mailboxq_subtask(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - if (!test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state)) + if (!test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state)) return; if (__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX)) return; - clear_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state); + clear_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state); if (ice_ctrlq_pending(hw, &hw->mailboxq)) __ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX); @@ -1366,9 +1366,9 @@ static void ice_clean_mailboxq_subtask(struct ice_pf *pf) */ void ice_service_task_schedule(struct ice_pf *pf) { - if (!test_bit(__ICE_SERVICE_DIS, pf->state) && - !test_and_set_bit(__ICE_SERVICE_SCHED, pf->state) && - !test_bit(__ICE_NEEDS_RESTART, pf->state)) + if (!test_bit(ICE_SERVICE_DIS, pf->state) && + !test_and_set_bit(ICE_SERVICE_SCHED, pf->state) && + !test_bit(ICE_NEEDS_RESTART, pf->state)) queue_work(ice_wq, &pf->serv_task); } @@ -1378,32 +1378,32 @@ void ice_service_task_schedule(struct ice_pf *pf) */ static void ice_service_task_complete(struct ice_pf *pf) { - WARN_ON(!test_bit(__ICE_SERVICE_SCHED, pf->state)); + WARN_ON(!test_bit(ICE_SERVICE_SCHED, pf->state)); /* force memory (pf->state) to sync before next service task */ smp_mb__before_atomic(); - clear_bit(__ICE_SERVICE_SCHED, pf->state); + clear_bit(ICE_SERVICE_SCHED, pf->state); } /** * ice_service_task_stop - stop service task and cancel works * @pf: board private structure * - * Return 0 if the __ICE_SERVICE_DIS bit was not already set, + * Return 0 if the ICE_SERVICE_DIS bit was not already set, * 1 otherwise. */ static int ice_service_task_stop(struct ice_pf *pf) { int ret; - ret = test_and_set_bit(__ICE_SERVICE_DIS, pf->state); + ret = test_and_set_bit(ICE_SERVICE_DIS, pf->state); if (pf->serv_tmr.function) del_timer_sync(&pf->serv_tmr); if (pf->serv_task.func) cancel_work_sync(&pf->serv_task); - clear_bit(__ICE_SERVICE_SCHED, pf->state); + clear_bit(ICE_SERVICE_SCHED, pf->state); return ret; } @@ -1415,7 +1415,7 @@ static int ice_service_task_stop(struct ice_pf *pf) */ static void ice_service_task_restart(struct ice_pf *pf) { - clear_bit(__ICE_SERVICE_DIS, pf->state); + clear_bit(ICE_SERVICE_DIS, pf->state); ice_service_task_schedule(pf); } @@ -1448,7 +1448,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) unsigned int i; u32 reg; - if (!test_and_clear_bit(__ICE_MDD_EVENT_PENDING, pf->state)) { + if (!test_and_clear_bit(ICE_MDD_EVENT_PENDING, pf->state)) { /* Since the VF MDD event logging is rate limited, check if * there are pending MDD events. */ @@ -1540,7 +1540,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_TX_PQM_VALID_M) { wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF); vf->mdd_tx_events.count++; - set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state); + set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n", i); @@ -1550,7 +1550,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_TX_TCLAN_VALID_M) { wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF); vf->mdd_tx_events.count++; - set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state); + set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n", i); @@ -1560,7 +1560,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_TX_TDPU_VALID_M) { wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF); vf->mdd_tx_events.count++; - set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state); + set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n", i); @@ -1570,7 +1570,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_RX_VALID_M) { wr32(hw, VP_MDET_RX(i), 0xFFFF); vf->mdd_rx_events.count++; - set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state); + set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_rx_err(pf)) dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n", i); @@ -1735,7 +1735,7 @@ static void ice_init_link_dflt_override(struct ice_port_info *pi) * settings using the default override mask from the NVM. * * The PHY should only be configured with the default override settings the - * first time media is available. The __ICE_LINK_DEFAULT_OVERRIDE_PENDING state + * first time media is available. The ICE_LINK_DEFAULT_OVERRIDE_PENDING state * is used to indicate that the user PHY cfg default override is initialized * and the PHY has not been configured with the default override settings. The * state is set here, and cleared in ice_configure_phy the first time the PHY is @@ -1767,7 +1767,7 @@ static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi) cfg->link_fec_opt = ldo->fec_options; phy->curr_user_fec_req = ICE_FEC_AUTO; - set_bit(__ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state); + set_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state); } /** @@ -1839,7 +1839,7 @@ static int ice_init_phy_user_cfg(struct ice_port_info *pi) out: phy->curr_user_speed_req = ICE_AQ_LINK_SPEED_M; - set_bit(__ICE_PHY_INIT_COMPLETE, pf->state); + set_bit(ICE_PHY_INIT_COMPLETE, pf->state); err_out: kfree(pcaps); return err; @@ -1923,7 +1923,7 @@ static int ice_configure_phy(struct ice_vsi *vsi) /* Speed - If default override pending, use curr_user_phy_cfg set in * ice_init_phy_user_cfg_ldo. */ - if (test_and_clear_bit(__ICE_LINK_DEFAULT_OVERRIDE_PENDING, + if (test_and_clear_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING, vsi->back->state)) { cfg->phy_type_low = phy->curr_user_phy_cfg.phy_type_low; cfg->phy_type_high = phy->curr_user_phy_cfg.phy_type_high; @@ -2002,7 +2002,7 @@ static void ice_check_media_subtask(struct ice_pf *pf) return; if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { - if (!test_bit(__ICE_PHY_INIT_COMPLETE, pf->state)) + if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) ice_init_phy_user_cfg(pi); /* PHY settings are reset on media insertion, reconfigure @@ -2038,8 +2038,8 @@ static void ice_service_task(struct work_struct *work) /* bail if a reset/recovery cycle is pending or rebuild failed */ if (ice_is_reset_in_progress(pf->state) || - test_bit(__ICE_SUSPENDED, pf->state) || - test_bit(__ICE_NEEDS_RESTART, pf->state)) { + test_bit(ICE_SUSPENDED, pf->state) || + test_bit(ICE_NEEDS_RESTART, pf->state)) { ice_service_task_complete(pf); return; } @@ -2060,7 +2060,8 @@ static void ice_service_task(struct work_struct *work) ice_clean_mailboxq_subtask(pf); ice_sync_arfs_fltrs(pf); ice_flush_fdir_ctx(pf); - /* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */ + + /* Clear ICE_SERVICE_SCHED flag to allow scheduling next event */ ice_service_task_complete(pf); /* If the tasks have taken longer than one service timer period @@ -2068,11 +2069,11 @@ static void ice_service_task(struct work_struct *work) * schedule the service task now. */ if (time_after(jiffies, (start_time + pf->serv_tmr_period)) || - test_bit(__ICE_MDD_EVENT_PENDING, pf->state) || - test_bit(__ICE_VFLR_EVENT_PENDING, pf->state) || - test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state) || - test_bit(__ICE_FD_VF_FLUSH_CTX, pf->state) || - test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) + test_bit(ICE_MDD_EVENT_PENDING, pf->state) || + test_bit(ICE_VFLR_EVENT_PENDING, pf->state) || + test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state) || + test_bit(ICE_FD_VF_FLUSH_CTX, pf->state) || + test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state)) mod_timer(&pf->serv_tmr, jiffies); } @@ -2102,7 +2103,7 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset) struct device *dev = ice_pf_to_dev(pf); /* bail out if earlier reset has failed */ - if (test_bit(__ICE_RESET_FAILED, pf->state)) { + if (test_bit(ICE_RESET_FAILED, pf->state)) { dev_dbg(dev, "earlier reset has failed\n"); return -EIO; } @@ -2114,13 +2115,13 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset) switch (reset) { case ICE_RESET_PFR: - set_bit(__ICE_PFR_REQ, pf->state); + set_bit(ICE_PFR_REQ, pf->state); break; case ICE_RESET_CORER: - set_bit(__ICE_CORER_REQ, pf->state); + set_bit(ICE_CORER_REQ, pf->state); break; case ICE_RESET_GLOBR: - set_bit(__ICE_GLOBR_REQ, pf->state); + set_bit(ICE_GLOBR_REQ, pf->state); break; default: return -EINVAL; @@ -2625,8 +2626,8 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) u32 oicr, ena_mask; dev = ice_pf_to_dev(pf); - set_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); - set_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state); + set_bit(ICE_ADMINQ_EVENT_PENDING, pf->state); + set_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state); oicr = rd32(hw, PFINT_OICR); ena_mask = rd32(hw, PFINT_OICR_ENA); @@ -2638,18 +2639,18 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & PFINT_OICR_MAL_DETECT_M) { ena_mask &= ~PFINT_OICR_MAL_DETECT_M; - set_bit(__ICE_MDD_EVENT_PENDING, pf->state); + set_bit(ICE_MDD_EVENT_PENDING, pf->state); } if (oicr & PFINT_OICR_VFLR_M) { /* disable any further VFLR event notifications */ - if (test_bit(__ICE_VF_RESETS_DISABLED, pf->state)) { + if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) { u32 reg = rd32(hw, PFINT_OICR_ENA); reg &= ~PFINT_OICR_VFLR_M; wr32(hw, PFINT_OICR_ENA, reg); } else { ena_mask &= ~PFINT_OICR_VFLR_M; - set_bit(__ICE_VFLR_EVENT_PENDING, pf->state); + set_bit(ICE_VFLR_EVENT_PENDING, pf->state); } } @@ -2675,13 +2676,13 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) * We also make note of which reset happened so that peer * devices/drivers can be informed. */ - if (!test_and_set_bit(__ICE_RESET_OICR_RECV, pf->state)) { + if (!test_and_set_bit(ICE_RESET_OICR_RECV, pf->state)) { if (reset == ICE_RESET_CORER) - set_bit(__ICE_CORER_RECV, pf->state); + set_bit(ICE_CORER_RECV, pf->state); else if (reset == ICE_RESET_GLOBR) - set_bit(__ICE_GLOBR_RECV, pf->state); + set_bit(ICE_GLOBR_RECV, pf->state); else - set_bit(__ICE_EMPR_RECV, pf->state); + set_bit(ICE_EMPR_RECV, pf->state); /* There are couple of different bits at play here. * hw->reset_ongoing indicates whether the hardware is @@ -2689,7 +2690,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) * is received and set back to false after the driver * has determined that the hardware is out of reset. * - * __ICE_RESET_OICR_RECV in pf->state indicates + * ICE_RESET_OICR_RECV in pf->state indicates * that a post reset rebuild is required before the * driver is operational again. This is set above. * @@ -2717,7 +2718,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_PCI_EXCEPTION_M | PFINT_OICR_ECC_ERR_M)) { - set_bit(__ICE_PFR_REQ, pf->state); + set_bit(ICE_PFR_REQ, pf->state); ice_service_task_schedule(pf); } } @@ -3321,7 +3322,7 @@ static int ice_init_pf(struct ice_pf *pf) timer_setup(&pf->serv_tmr, ice_service_timer, 0); pf->serv_tmr_period = HZ; INIT_WORK(&pf->serv_task, ice_service_task); - clear_bit(__ICE_SERVICE_SCHED, pf->state); + clear_bit(ICE_SERVICE_SCHED, pf->state); mutex_init(&pf->avail_q_mutex); pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); @@ -3530,7 +3531,7 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx) if (!new_rx && !new_tx) return -EINVAL; - while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) { + while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) { timeout--; if (!timeout) return -EBUSY; @@ -3554,7 +3555,7 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx) ice_pf_dcb_recfg(pf); ice_vsi_open(vsi); done: - clear_bit(__ICE_CFG_BUSY, pf->state); + clear_bit(ICE_CFG_BUSY, pf->state); return err; } @@ -4020,9 +4021,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) pf->pdev = pdev; pci_set_drvdata(pdev, pf); - set_bit(__ICE_DOWN, pf->state); + set_bit(ICE_DOWN, pf->state); /* Disable service task until DOWN bit is cleared */ - set_bit(__ICE_SERVICE_DIS, pf->state); + set_bit(ICE_SERVICE_DIS, pf->state); hw = &pf->hw; hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0]; @@ -4162,7 +4163,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) goto err_alloc_sw_unroll; } - clear_bit(__ICE_SERVICE_DIS, pf->state); + clear_bit(ICE_SERVICE_DIS, pf->state); /* tell the firmware we are up */ err = ice_send_version(pf); @@ -4256,16 +4257,15 @@ probe_done: goto err_netdev_reg; /* ready to go, so clear down state bit */ - clear_bit(__ICE_DOWN, pf->state); - + clear_bit(ICE_DOWN, pf->state); return 0; err_netdev_reg: err_send_version_unroll: ice_vsi_release_all(pf); err_alloc_sw_unroll: - set_bit(__ICE_SERVICE_DIS, pf->state); - set_bit(__ICE_DOWN, pf->state); + set_bit(ICE_SERVICE_DIS, pf->state); + set_bit(ICE_DOWN, pf->state); devm_kfree(dev, pf->first_sw); err_msix_misc_unroll: ice_free_irq_msix_misc(pf); @@ -4365,11 +4365,11 @@ static void ice_remove(struct pci_dev *pdev) } if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) { - set_bit(__ICE_VF_RESETS_DISABLED, pf->state); + set_bit(ICE_VF_RESETS_DISABLED, pf->state); ice_free_vfs(pf); } - set_bit(__ICE_DOWN, pf->state); + set_bit(ICE_DOWN, pf->state); ice_service_task_stop(pf); ice_aq_cancel_waiting_tasks(pf); @@ -4529,13 +4529,13 @@ static int __maybe_unused ice_suspend(struct device *dev) disabled = ice_service_task_stop(pf); /* Already suspended?, then there is nothing to do */ - if (test_and_set_bit(__ICE_SUSPENDED, pf->state)) { + if (test_and_set_bit(ICE_SUSPENDED, pf->state)) { if (!disabled) ice_service_task_restart(pf); return 0; } - if (test_bit(__ICE_DOWN, pf->state) || + if (test_bit(ICE_DOWN, pf->state) || ice_is_reset_in_progress(pf->state)) { dev_err(dev, "can't suspend device in reset or already down\n"); if (!disabled) @@ -4607,16 +4607,16 @@ static int __maybe_unused ice_resume(struct device *dev) if (ret) dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret); - clear_bit(__ICE_DOWN, pf->state); + clear_bit(ICE_DOWN, pf->state); /* Now perform PF reset and rebuild */ reset_type = ICE_RESET_PFR; /* re-enable service task for reset, but allow reset to schedule it */ - clear_bit(__ICE_SERVICE_DIS, pf->state); + clear_bit(ICE_SERVICE_DIS, pf->state); if (ice_schedule_reset(pf, reset_type)) dev_err(dev, "Reset during resume failed.\n"); - clear_bit(__ICE_SUSPENDED, pf->state); + clear_bit(ICE_SUSPENDED, pf->state); ice_service_task_restart(pf); /* Restart the service task */ @@ -4645,11 +4645,11 @@ ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err) return PCI_ERS_RESULT_DISCONNECT; } - if (!test_bit(__ICE_SUSPENDED, pf->state)) { + if (!test_bit(ICE_SUSPENDED, pf->state)) { ice_service_task_stop(pf); - if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) { - set_bit(__ICE_PFR_REQ, pf->state); + if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) { + set_bit(ICE_PFR_REQ, pf->state); ice_prepare_for_reset(pf); } } @@ -4716,7 +4716,7 @@ static void ice_pci_err_resume(struct pci_dev *pdev) return; } - if (test_bit(__ICE_SUSPENDED, pf->state)) { + if (test_bit(ICE_SUSPENDED, pf->state)) { dev_dbg(&pdev->dev, "%s failed to resume normal operations!\n", __func__); return; @@ -4737,11 +4737,11 @@ static void ice_pci_err_reset_prepare(struct pci_dev *pdev) { struct ice_pf *pf = pci_get_drvdata(pdev); - if (!test_bit(__ICE_SUSPENDED, pf->state)) { + if (!test_bit(ICE_SUSPENDED, pf->state)) { ice_service_task_stop(pf); - if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) { - set_bit(__ICE_PFR_REQ, pf->state); + if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) { + set_bit(ICE_PFR_REQ, pf->state); ice_prepare_for_reset(pf); } } @@ -4888,7 +4888,7 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) return 0; } - if (test_bit(__ICE_DOWN, pf->state) || + if (test_bit(ICE_DOWN, pf->state) || ice_is_reset_in_progress(pf->state)) { netdev_err(netdev, "can't set mac %pM. device not ready\n", mac); @@ -5373,7 +5373,7 @@ void ice_update_vsi_stats(struct ice_vsi *vsi) struct ice_pf *pf = vsi->back; if (test_bit(ICE_VSI_DOWN, vsi->state) || - test_bit(__ICE_CFG_BUSY, pf->state)) + test_bit(ICE_CFG_BUSY, pf->state)) return; /* get stats as recorded by Tx/Rx rings */ @@ -5625,7 +5625,7 @@ int ice_down(struct ice_vsi *vsi) int i, tx_err, rx_err, link_err = 0; /* Caller of this function is expected to set the - * vsi->state __ICE_DOWN bit + * vsi->state ICE_DOWN bit */ if (vsi->netdev) { netif_carrier_off(vsi->netdev); @@ -5973,7 +5973,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) enum ice_status ret; int err; - if (test_bit(__ICE_DOWN, pf->state)) + if (test_bit(ICE_DOWN, pf->state)) goto clear_recovery; dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type); @@ -6089,7 +6089,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) ice_replay_post(hw); /* if we get here, reset flow is successful */ - clear_bit(__ICE_RESET_FAILED, pf->state); + clear_bit(ICE_RESET_FAILED, pf->state); return; err_vsi_rebuild: @@ -6097,10 +6097,10 @@ err_sched_init_port: ice_sched_cleanup_all(hw); err_init_ctrlq: ice_shutdown_all_ctrlq(hw); - set_bit(__ICE_RESET_FAILED, pf->state); + set_bit(ICE_RESET_FAILED, pf->state); clear_recovery: /* set this bit in PF state to control service task scheduling */ - set_bit(__ICE_NEEDS_RESTART, pf->state); + set_bit(ICE_NEEDS_RESTART, pf->state); dev_err(dev, "Rebuild failed, unload and reload driver\n"); } @@ -6622,19 +6622,19 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) switch (pf->tx_timeout_recovery_level) { case 1: - set_bit(__ICE_PFR_REQ, pf->state); + set_bit(ICE_PFR_REQ, pf->state); break; case 2: - set_bit(__ICE_CORER_REQ, pf->state); + set_bit(ICE_CORER_REQ, pf->state); break; case 3: - set_bit(__ICE_GLOBR_REQ, pf->state); + set_bit(ICE_GLOBR_REQ, pf->state); break; default: netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n"); - set_bit(__ICE_DOWN, pf->state); + set_bit(ICE_DOWN, pf->state); set_bit(ICE_VSI_NEEDS_RESTART, vsi->state); - set_bit(__ICE_SERVICE_DIS, pf->state); + set_bit(ICE_SERVICE_DIS, pf->state); break; } @@ -6685,7 +6685,7 @@ int ice_open_internal(struct net_device *netdev) enum ice_status status; int err; - if (test_bit(__ICE_NEEDS_RESTART, pf->state)) { + if (test_bit(ICE_NEEDS_RESTART, pf->state)) { netdev_err(netdev, "driver needs to be unloaded and reloaded\n"); return -EIO; } @@ -6703,7 +6703,7 @@ int ice_open_internal(struct net_device *netdev) /* Set PHY if there is media, otherwise, turn off PHY */ if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { clear_bit(ICE_FLAG_NO_MEDIA, pf->flags); - if (!test_bit(__ICE_PHY_INIT_COMPLETE, pf->state)) { + if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) { err = ice_init_phy_user_cfg(pi); if (err) { netdev_err(netdev, "Failed to initialize PHY settings, error %d\n", diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 1f4ba38b1599..eee180d8c024 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -1548,7 +1548,7 @@ static void ice_vf_fdir_timer(struct timer_list *t) ctx_done->v_opcode = ctx_irq->v_opcode; spin_unlock_irqrestore(&fdir->ctx_lock, flags); - set_bit(__ICE_FD_VF_FLUSH_CTX, pf->state); + set_bit(ICE_FD_VF_FLUSH_CTX, pf->state); ice_service_task_schedule(pf); } @@ -1596,7 +1596,7 @@ ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi, if (!ret) dev_err(dev, "VF %d: Unexpected inactive timer!\n", vf->vf_id); - set_bit(__ICE_FD_VF_FLUSH_CTX, pf->state); + set_bit(ICE_FD_VF_FLUSH_CTX, pf->state); ice_service_task_schedule(pf); } @@ -1847,7 +1847,7 @@ void ice_flush_fdir_ctx(struct ice_pf *pf) { int i; - if (!test_and_clear_bit(__ICE_FD_VF_FLUSH_CTX, pf->state)) + if (!test_and_clear_bit(ICE_FD_VF_FLUSH_CTX, pf->state)) return; ice_for_each_vf(pf, i) { diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index e68d52a6b11d..a3d8d06b1e3f 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -371,7 +371,7 @@ void ice_free_vfs(struct ice_pf *pf) if (!pf->vf) return; - while (test_and_set_bit(__ICE_VF_DIS, pf->state)) + while (test_and_set_bit(ICE_VF_DIS, pf->state)) usleep_range(1000, 2000); /* Disable IOV before freeing resources. This lets any VF drivers @@ -424,7 +424,7 @@ void ice_free_vfs(struct ice_pf *pf) wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx)); } } - clear_bit(__ICE_VF_DIS, pf->state); + clear_bit(ICE_VF_DIS, pf->state); clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags); } @@ -1258,7 +1258,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) return false; /* If VFs have been disabled, there is no need to reset */ - if (test_and_set_bit(__ICE_VF_DIS, pf->state)) + if (test_and_set_bit(ICE_VF_DIS, pf->state)) return false; /* Begin reset on all VFs at once */ @@ -1314,7 +1314,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) } ice_flush(hw); - clear_bit(__ICE_VF_DIS, pf->state); + clear_bit(ICE_VF_DIS, pf->state); return true; } @@ -1334,7 +1334,7 @@ static bool ice_is_vf_disabled(struct ice_vf *vf) * means something else is resetting the VF, so we shouldn't continue. * Otherwise, set disable VF state bit for actual reset, and continue. */ - return (test_bit(__ICE_VF_DIS, pf->state) || + return (test_bit(ICE_VF_DIS, pf->state) || test_bit(ICE_VF_STATE_DIS, vf->vf_states)); } @@ -1359,7 +1359,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) dev = ice_pf_to_dev(pf); - if (test_bit(__ICE_VF_RESETS_DISABLED, pf->state)) { + if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) { dev_dbg(dev, "Trying to reset VF %d, but all VF resets are disabled\n", vf->vf_id); return true; @@ -1651,7 +1651,7 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) /* Disable global interrupt 0 so we don't try to handle the VFLR. */ wr32(hw, GLINT_DYN_CTL(pf->oicr_idx), ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S); - set_bit(__ICE_OICR_INTR_DIS, pf->state); + set_bit(ICE_OICR_INTR_DIS, pf->state); ice_flush(hw); ret = pci_enable_sriov(pf->pdev, num_vfs); @@ -1679,7 +1679,7 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) goto err_unroll_sriov; } - clear_bit(__ICE_VF_DIS, pf->state); + clear_bit(ICE_VF_DIS, pf->state); return 0; err_unroll_sriov: @@ -1691,7 +1691,7 @@ err_pci_disable_sriov: err_unroll_intr: /* rearm interrupts here */ ice_irq_dynamic_ena(hw, NULL, NULL); - clear_bit(__ICE_OICR_INTR_DIS, pf->state); + clear_bit(ICE_OICR_INTR_DIS, pf->state); return ret; } @@ -1809,7 +1809,7 @@ void ice_process_vflr_event(struct ice_pf *pf) unsigned int vf_id; u32 reg; - if (!test_and_clear_bit(__ICE_VFLR_EVENT_PENDING, pf->state) || + if (!test_and_clear_bit(ICE_VFLR_EVENT_PENDING, pf->state) || !pf->num_alloc_vfs) return; @@ -4194,7 +4194,7 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf) int i; /* check that there are pending MDD events to print */ - if (!test_and_clear_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state)) + if (!test_and_clear_bit(ICE_MDD_VF_PRINT_PENDING, pf->state)) return; /* VF MDD event logs are rate limited to one second intervals */ diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 17ab8ef024ad..b881b650af98 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -159,7 +159,7 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) rx_ring = vsi->rx_rings[q_idx]; q_vector = rx_ring->q_vector; - while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) { + while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) { timeout--; if (!timeout) return -EBUSY; @@ -249,7 +249,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) goto free_buf; - clear_bit(__ICE_CFG_BUSY, vsi->state); + clear_bit(ICE_CFG_BUSY, vsi->state); ice_qvec_toggle_napi(vsi, q_vector, true); ice_qvec_ena_irq(vsi, q_vector); @@ -758,7 +758,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, struct ice_vsi *vsi = np->vsi; struct ice_ring *ring; - if (test_bit(__ICE_DOWN, vsi->state)) + if (test_bit(ICE_DOWN, vsi->state)) return -ENETDOWN; if (!ice_is_xdp_ena_vsi(vsi)) From a476d72abe6cdd2cccc3dbf5a844286cfe9684ed Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 2 Mar 2021 10:15:41 -0800 Subject: [PATCH 03/15] ice: Add new VSI states to track netdev alloc/registration Add two new VSI states, one to track if a netdev for the VSI has been allocated and the other to track if the netdev has been registered. Call unregister_netdev/free_netdev only when the corresponding state bits are set. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 2 ++ drivers/net/ethernet/intel/ice/ice_lib.c | 21 +++++++++++++++------ drivers/net/ethernet/intel/ice/ice_main.c | 5 +++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 2cb09af3148c..5e1a680e8a8e 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -241,6 +241,8 @@ enum ice_pf_state { enum ice_vsi_state { ICE_VSI_DOWN, ICE_VSI_NEEDS_RESTART, + ICE_VSI_NETDEV_ALLOCD, + ICE_VSI_NETDEV_REGISTERED, ICE_VSI_UMAC_FLTR_CHANGED, ICE_VSI_MMAC_FLTR_CHANGED, ICE_VSI_VLAN_FLTR_CHANGED, diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 162f01b42147..40a9b034d73b 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2752,11 +2752,14 @@ int ice_vsi_release(struct ice_vsi *vsi) * PF that is running the work queue items currently. This is done to * avoid check_flush_dependency() warning on this wq */ - if (vsi->netdev && !ice_is_reset_in_progress(pf->state)) { + if (vsi->netdev && !ice_is_reset_in_progress(pf->state) && + (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state))) { unregister_netdev(vsi->netdev); - ice_devlink_destroy_port(vsi); + clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); } + ice_devlink_destroy_port(vsi); + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) ice_rss_clean(vsi); @@ -2811,10 +2814,16 @@ int ice_vsi_release(struct ice_vsi *vsi) ice_vsi_delete(vsi); ice_vsi_free_q_vectors(vsi); - /* make sure unregister_netdev() was called by checking ICE_DOWN */ - if (vsi->netdev && test_bit(ICE_VSI_DOWN, vsi->state)) { - free_netdev(vsi->netdev); - vsi->netdev = NULL; + if (vsi->netdev) { + if (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state)) { + unregister_netdev(vsi->netdev); + clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); + } + if (test_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state)) { + free_netdev(vsi->netdev); + vsi->netdev = NULL; + clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); + } } if (vsi->type == ICE_VSI_VF && diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 032101680e09..035c593bce61 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2977,6 +2977,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) if (!netdev) return -ENOMEM; + set_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); vsi->netdev = netdev; np = netdev_priv(netdev); np->vsi = vsi; @@ -3189,6 +3190,7 @@ unroll_napi_add: if (vsi) { ice_napi_del(vsi); if (vsi->netdev) { + clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); free_netdev(vsi->netdev); vsi->netdev = NULL; } @@ -3958,6 +3960,7 @@ static int ice_register_netdev(struct ice_pf *pf) if (err) goto err_register_netdev; + set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); netif_carrier_off(vsi->netdev); netif_tx_stop_all_queues(vsi->netdev); err = ice_devlink_create_port(vsi); @@ -3969,9 +3972,11 @@ static int ice_register_netdev(struct ice_pf *pf) return 0; err_devlink_create: unregister_netdev(vsi->netdev); + clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); err_register_netdev: free_netdev(vsi->netdev); vsi->netdev = NULL; + clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); return err; } From b8b4772377dd8a916479796c8a8c5425f937fcaf Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 31 Mar 2021 14:16:56 -0700 Subject: [PATCH 04/15] ice: refactor interrupt moderation writes Introduce several new helpers for writing ITR and GLINT_RATE registers, and refactor the code calling them. This resulted in removal of several duplicate functions and rolled a bunch of simple code back into the calling routines. In particular this removes some code that was doing both a store and a set in a helper function, which seems better done as separate tasks in the caller (and generally takes less lines of code even with a tiny bit of repetition). Signed-off-by: Jesse Brandeburg Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_base.c | 22 +-- drivers/net/ethernet/intel/ice/ice_ethtool.c | 17 +- drivers/net/ethernet/intel/ice/ice_lib.c | 167 +++++++++++-------- drivers/net/ethernet/intel/ice/ice_lib.h | 3 +- drivers/net/ethernet/intel/ice/ice_xsk.c | 3 - 5 files changed, 110 insertions(+), 102 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index be26775a7dfe..21eb5d447d31 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -740,25 +740,13 @@ void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector) { ice_cfg_itr_gran(hw); - if (q_vector->num_ring_rx) { - struct ice_ring_container *rc = &q_vector->rx; + if (q_vector->num_ring_rx) + ice_write_itr(&q_vector->rx, q_vector->rx.itr_setting); - rc->target_itr = ITR_TO_REG(rc->itr_setting); - rc->next_update = jiffies + 1; - rc->current_itr = rc->target_itr; - wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); - } + if (q_vector->num_ring_tx) + ice_write_itr(&q_vector->tx, q_vector->tx.itr_setting); - if (q_vector->num_ring_tx) { - struct ice_ring_container *rc = &q_vector->tx; - - rc->target_itr = ITR_TO_REG(rc->itr_setting); - rc->next_update = jiffies + 1; - rc->current_itr = rc->target_itr; - wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); - } + ice_write_intrl(q_vector, q_vector->intrl); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index f2bc8f1e86cc..e273560d9e6e 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3636,9 +3636,8 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, } if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) { rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high; - wr32(&pf->hw, GLINT_RATE(rc->ring->q_vector->reg_idx), - ice_intrl_usec_to_reg(ec->rx_coalesce_usecs_high, - pf->hw.intrl_gran)); + ice_write_intrl(rc->ring->q_vector, + ec->rx_coalesce_usecs_high); } use_adaptive_coalesce = ec->use_adaptive_rx_coalesce; @@ -3672,10 +3671,15 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, if (use_adaptive_coalesce) { rc->itr_setting |= ICE_ITR_DYNAMIC; } else { - /* save the user set usecs */ + /* store user facing value how it was set */ rc->itr_setting = coalesce_usecs; - /* device ITR granularity is in 2 usec increments */ - rc->target_itr = ITR_REG_ALIGN(rc->itr_setting); + /* write the change to the register */ + ice_write_itr(rc, coalesce_usecs); + /* force writes to take effect immediately, the flush shouldn't + * be done in the functions above because the intent is for + * them to do lazy writes. + */ + ice_flush(&pf->hw); } return 0; @@ -3793,7 +3797,6 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, return -EINVAL; set_complete: - return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 40a9b034d73b..bb47376386f4 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1773,7 +1773,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi) * This function converts a decimal interrupt rate limit in usecs to the format * expected by firmware. */ -u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran) +static u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran) { u32 val = intrl / gran; @@ -1782,6 +1782,58 @@ u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran) return 0; } +/** + * ice_write_intrl - write throttle rate limit to interrupt specific register + * @q_vector: pointer to interrupt specific structure + * @intrl: throttle rate limit in microseconds to write + */ +void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl) +{ + struct ice_hw *hw = &q_vector->vsi->back->hw; + + wr32(hw, GLINT_RATE(q_vector->reg_idx), + ice_intrl_usec_to_reg(intrl, ICE_INTRL_GRAN_ABOVE_25)); +} + +/** + * __ice_write_itr - write throttle rate to register + * @q_vector: pointer to interrupt data structure + * @rc: pointer to ring container + * @itr: throttle rate in microseconds to write + */ +static void __ice_write_itr(struct ice_q_vector *q_vector, + struct ice_ring_container *rc, u16 itr) +{ + struct ice_hw *hw = &q_vector->vsi->back->hw; + + wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), + ITR_REG_ALIGN(itr) >> ICE_ITR_GRAN_S); +} + +/** + * ice_write_itr - write throttle rate to queue specific register + * @rc: pointer to ring container + * @itr: throttle rate in microseconds to write + * + * This function is resilient to having the 0x8000 bit set which + * is indicating that an ITR value is "DYNAMIC", and will write + * the correct value to the register. + */ +void ice_write_itr(struct ice_ring_container *rc, u16 itr) +{ + struct ice_q_vector *q_vector; + + if (!rc->ring) + return; + + q_vector = rc->ring->q_vector; + + /* clear the "DYNAMIC" bit */ + itr = ITR_TO_REG(itr); + + __ice_write_itr(q_vector, rc, itr); +} + /** * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW * @vsi: the VSI being configured @@ -1802,9 +1854,6 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi) ice_cfg_itr(hw, q_vector); - wr32(hw, GLINT_RATE(reg_idx), - ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran)); - /* Both Transmit Queue Interrupt Cause Control register * and Receive Queue Interrupt Cause control register * expects MSIX_INDX field to be the vector index @@ -2492,11 +2541,10 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi) for (i = 0; i < vsi->num_q_vectors; i++) { struct ice_q_vector *q_vector = vsi->q_vectors[i]; - u16 reg_idx = q_vector->reg_idx; - wr32(hw, GLINT_ITR(ICE_IDX_ITR0, reg_idx), 0); - wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0); + ice_write_intrl(q_vector, 0); for (q = 0; q < q_vector->num_ring_tx; q++) { + ice_write_itr(&q_vector->tx, 0); wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0); if (ice_is_xdp_ena_vsi(vsi)) { u32 xdp_txq = txq + vsi->num_xdp_txq; @@ -2507,6 +2555,7 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi) } for (q = 0; q < q_vector->num_ring_rx; q++) { + ice_write_itr(&q_vector->rx, 0); wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), 0); rxq++; } @@ -2843,47 +2892,6 @@ int ice_vsi_release(struct ice_vsi *vsi) return 0; } -/** - * ice_vsi_rebuild_update_coalesce_intrl - set interrupt rate limit for a q_vector - * @q_vector: pointer to q_vector which is being updated - * @stored_intrl_setting: original INTRL setting - * - * Set coalesce param in q_vector and update these parameters in HW. - */ -static void -ice_vsi_rebuild_update_coalesce_intrl(struct ice_q_vector *q_vector, - u16 stored_intrl_setting) -{ - struct ice_hw *hw = &q_vector->vsi->back->hw; - - q_vector->intrl = stored_intrl_setting; - wr32(hw, GLINT_RATE(q_vector->reg_idx), - ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran)); -} - -/** - * ice_vsi_rebuild_update_coalesce_itr - set coalesce for a q_vector - * @q_vector: pointer to q_vector which is being updated - * @rc: pointer to ring container - * @stored_itr_setting: original ITR setting - * - * Set coalesce param in q_vector and update these parameters in HW. - */ -static void -ice_vsi_rebuild_update_coalesce_itr(struct ice_q_vector *q_vector, - struct ice_ring_container *rc, - u16 stored_itr_setting) -{ - struct ice_hw *hw = &q_vector->vsi->back->hw; - - rc->itr_setting = stored_itr_setting; - - /* dynamic ITR values will be updated during Tx/Rx */ - if (!ITR_IS_DYNAMIC(rc->itr_setting)) - wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(rc->itr_setting) >> ICE_ITR_GRAN_S); -} - /** * ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors * @vsi: VSI connected with q_vectors @@ -2927,6 +2935,7 @@ static void ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, struct ice_coalesce_stored *coalesce, int size) { + struct ice_ring_container *rc; int i; if ((size && !coalesce) || !vsi) @@ -2949,41 +2958,51 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, * rings is less than are allocated (this means the number of * rings increased from previously), then write out the * values in the first element + * + * Also, always write the ITR, even if in ITR_IS_DYNAMIC + * as there is no harm because the dynamic algorithm + * will just overwrite. */ - if (i < vsi->alloc_rxq && coalesce[i].rx_valid) - ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], - &vsi->q_vectors[i]->rx, - coalesce[i].itr_rx); - else if (i < vsi->alloc_rxq) - ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], - &vsi->q_vectors[i]->rx, - coalesce[0].itr_rx); + if (i < vsi->alloc_rxq && coalesce[i].rx_valid) { + rc = &vsi->q_vectors[i]->rx; + rc->itr_setting = coalesce[i].itr_rx; + ice_write_itr(rc, rc->itr_setting); + } else if (i < vsi->alloc_rxq) { + rc = &vsi->q_vectors[i]->rx; + rc->itr_setting = coalesce[0].itr_rx; + ice_write_itr(rc, rc->itr_setting); + } - if (i < vsi->alloc_txq && coalesce[i].tx_valid) - ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], - &vsi->q_vectors[i]->tx, - coalesce[i].itr_tx); - else if (i < vsi->alloc_txq) - ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], - &vsi->q_vectors[i]->tx, - coalesce[0].itr_tx); + if (i < vsi->alloc_txq && coalesce[i].tx_valid) { + rc = &vsi->q_vectors[i]->tx; + rc->itr_setting = coalesce[i].itr_tx; + ice_write_itr(rc, rc->itr_setting); + } else if (i < vsi->alloc_txq) { + rc = &vsi->q_vectors[i]->tx; + rc->itr_setting = coalesce[0].itr_tx; + ice_write_itr(rc, rc->itr_setting); + } - ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i], - coalesce[i].intrl); + vsi->q_vectors[i]->intrl = coalesce[i].intrl; + ice_write_intrl(vsi->q_vectors[i], coalesce[i].intrl); } /* the number of queue vectors increased so write whatever is in * the first element */ for (; i < vsi->num_q_vectors; i++) { - ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], - &vsi->q_vectors[i]->tx, - coalesce[0].itr_tx); - ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], - &vsi->q_vectors[i]->rx, - coalesce[0].itr_rx); - ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i], - coalesce[0].intrl); + /* transmit */ + rc = &vsi->q_vectors[i]->tx; + rc->itr_setting = coalesce[0].itr_tx; + ice_write_itr(rc, rc->itr_setting); + + /* receive */ + rc = &vsi->q_vectors[i]->rx; + rc->itr_setting = coalesce[0].itr_rx; + ice_write_itr(rc, rc->itr_setting); + + vsi->q_vectors[i]->intrl = coalesce[0].intrl; + ice_write_intrl(vsi->q_vectors[i], coalesce[0].intrl); } } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 462c3ab7abad..f48c5ccb8036 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -95,7 +95,8 @@ void ice_vsi_cfg_frame_size(struct ice_vsi *vsi); int ice_status_to_errno(enum ice_status err); -u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran); +void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl); +void ice_write_itr(struct ice_ring_container *rc, u16 itr); enum ice_status ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index b881b650af98..faa7b8d96adb 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -108,9 +108,6 @@ ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector) ice_cfg_itr(hw, q_vector); - wr32(hw, GLINT_RATE(reg_idx), - ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran)); - ice_for_each_ring(ring, q_vector->tx) ice_cfg_txq_interrupt(vsi, ring->reg_idx, reg_idx, q_vector->tx.itr_idx); From cdf1f1f169179659621bb540575b3a9d1cd38072 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 31 Mar 2021 14:16:57 -0700 Subject: [PATCH 05/15] ice: replace custom AIM algorithm with kernel's DIM library The ice driver has support for adaptive interrupt moderation, an algorithm for tuning the interrupt rate dynamically. This algorithm is based on various assumptions about ring size, socket buffer size, link speed, SKB overhead, ethernet frame overhead and more. The Linux kernel has support for a dynamic interrupt moderation algorithm known as "dimlib". Replace the custom driver-specific implementation of dynamic interrupt moderation with the kernel's algorithm. The Intel hardware has a different hardware implementation than the originators of the dimlib code had to work with, which requires the driver to use a slightly different set of inputs for the actual moderation values, while getting all the advice from dimlib of better/worse, shift left or right. The change made for this implementation is to use a pair of values for each of the 5 "slots" that the dimlib moderation expects, and the driver will program those pairs when dimlib recommends a slot to use. The currently implementation uses two tables, one for receive and one for transmit, and the pairs of values in each slot set the maximum delay of an interrupt and a maximum number of interrupts per second (both expressed in microseconds). There are two separate kinds of bugs fixed by using DIMLIB, one is UDP single stream send was too slow, and the other is that 8K ping-pong was going to the most aggressive moderation and has much too high latency. The overall result of using DIMLIB is that we meet or exceed our performance expectations set based on the old algorithm. Co-developed-by: Jesse Brandeburg Signed-off-by: Jesse Brandeburg Signed-off-by: Jacob Keller Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/Kconfig | 1 + drivers/net/ethernet/intel/ice/ice.h | 5 +- drivers/net/ethernet/intel/ice/ice_ethtool.c | 6 + drivers/net/ethernet/intel/ice/ice_lib.c | 13 +- drivers/net/ethernet/intel/ice/ice_main.c | 108 +++++++ drivers/net/ethernet/intel/ice/ice_txrx.c | 311 ++++--------------- drivers/net/ethernet/intel/ice/ice_txrx.h | 6 +- 7 files changed, 179 insertions(+), 271 deletions(-) diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 5aa86318ed3e..c1d155690341 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -294,6 +294,7 @@ config ICE tristate "Intel(R) Ethernet Connection E800 Series Support" default n depends on PCI_MSI + select DIMLIB select NET_DEVLINK select PLDMFW help diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 5e1a680e8a8e..7ae10fd87265 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -351,7 +352,7 @@ struct ice_q_vector { u16 reg_idx; u8 num_ring_rx; /* total number of Rx rings in vector */ u8 num_ring_tx; /* total number of Tx rings in vector */ - u8 itr_countdown; /* when 0 should adjust adaptive ITR */ + u8 wb_on_itr:1; /* if true, WB on ITR is enabled */ /* in usecs, need to use ice_intrl_to_usecs_reg() before writing this * value to the device */ @@ -366,6 +367,8 @@ struct ice_q_vector { struct irq_affinity_notify affinity_notify; char name[ICE_INT_NAME_STR_LEN]; + + u16 total_events; /* net_dim(): number of interrupts processed */ } ____cacheline_internodealigned_in_smp; enum ice_pf_flags { diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index e273560d9e6e..6c057eb6cc45 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3634,6 +3634,12 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, ICE_MAX_INTRL); return -EINVAL; } + if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl && + (ec->use_adaptive_rx_coalesce || ec->use_adaptive_tx_coalesce)) { + netdev_info(vsi->netdev, "Invalid value, %s-usecs-high cannot be changed if adaptive-tx or adaptive-rx is enabled\n", + c_type_str); + return -EINVAL; + } if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) { rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high; ice_write_intrl(rc->ring->q_vector, diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index bb47376386f4..a080cf7bc2d1 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -385,6 +385,8 @@ static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data) if (!q_vector->tx.ring && !q_vector->rx.ring) return IRQ_HANDLED; + q_vector->total_events++; + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -3270,20 +3272,15 @@ out: /** * ice_update_ring_stats - Update ring statistics * @ring: ring to update - * @cont: used to increment per-vector counters * @pkts: number of processed packets * @bytes: number of processed bytes * * This function assumes that caller has acquired a u64_stats_sync lock. */ -static void -ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont, - u64 pkts, u64 bytes) +static void ice_update_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes) { ring->stats.bytes += bytes; ring->stats.pkts += pkts; - cont->total_bytes += bytes; - cont->total_pkts += pkts; } /** @@ -3295,7 +3292,7 @@ ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont, void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes) { u64_stats_update_begin(&tx_ring->syncp); - ice_update_ring_stats(tx_ring, &tx_ring->q_vector->tx, pkts, bytes); + ice_update_ring_stats(tx_ring, pkts, bytes); u64_stats_update_end(&tx_ring->syncp); } @@ -3308,7 +3305,7 @@ void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes) void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes) { u64_stats_update_begin(&rx_ring->syncp); - ice_update_ring_stats(rx_ring, &rx_ring->q_vector->rx, pkts, bytes); + ice_update_ring_stats(rx_ring, pkts, bytes); u64_stats_update_end(&rx_ring->syncp); } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 035c593bce61..1e023d163447 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5196,6 +5196,105 @@ int ice_vsi_cfg(struct ice_vsi *vsi) return err; } +/* THEORY OF MODERATION: + * The below code creates custom DIM profiles for use by this driver, because + * the ice driver hardware works differently than the hardware that DIMLIB was + * originally made for. ice hardware doesn't have packet count limits that + * can trigger an interrupt, but it *does* have interrupt rate limit support, + * and this code adds that capability to be used by the driver when it's using + * DIMLIB. The DIMLIB code was always designed to be a suggestion to the driver + * for how to "respond" to traffic and interrupts, so this driver uses a + * slightly different set of moderation parameters to get best performance. + */ +struct ice_dim { + /* the throttle rate for interrupts, basically worst case delay before + * an initial interrupt fires, value is stored in microseconds. + */ + u16 itr; + /* the rate limit for interrupts, which can cap a delay from a small + * ITR at a certain amount of interrupts per second. f.e. a 2us ITR + * could yield as much as 500,000 interrupts per second, but with a + * 10us rate limit, it limits to 100,000 interrupts per second. Value + * is stored in microseconds. + */ + u16 intrl; +}; + +/* Make a different profile for Rx that doesn't allow quite so aggressive + * moderation at the high end (it maxes out at 128us or about 8k interrupts a + * second. The INTRL/rate parameters here are only useful to cap small ITR + * values, which is why for larger ITR's - like 128, which can only generate + * 8k interrupts per second, there is no point to rate limit and the values + * are set to zero. The rate limit values do affect latency, and so must + * be reasonably small so to not impact latency sensitive tests. + */ +static const struct ice_dim rx_profile[] = { + {2, 10}, + {8, 16}, + {32, 0}, + {96, 0}, + {128, 0} +}; + +/* The transmit profile, which has the same sorts of values + * as the previous struct + */ +static const struct ice_dim tx_profile[] = { + {2, 10}, + {8, 16}, + {64, 0}, + {128, 0}, + {256, 0} +}; + +static void ice_tx_dim_work(struct work_struct *work) +{ + struct ice_ring_container *rc; + struct ice_q_vector *q_vector; + struct dim *dim; + u16 itr, intrl; + + dim = container_of(work, struct dim, work); + rc = container_of(dim, struct ice_ring_container, dim); + q_vector = container_of(rc, struct ice_q_vector, tx); + + if (dim->profile_ix >= ARRAY_SIZE(tx_profile)) + dim->profile_ix = ARRAY_SIZE(tx_profile) - 1; + + /* look up the values in our local table */ + itr = tx_profile[dim->profile_ix].itr; + intrl = tx_profile[dim->profile_ix].intrl; + + ice_write_itr(rc, itr); + ice_write_intrl(q_vector, intrl); + + dim->state = DIM_START_MEASURE; +} + +static void ice_rx_dim_work(struct work_struct *work) +{ + struct ice_ring_container *rc; + struct ice_q_vector *q_vector; + struct dim *dim; + u16 itr, intrl; + + dim = container_of(work, struct dim, work); + rc = container_of(dim, struct ice_ring_container, dim); + q_vector = container_of(rc, struct ice_q_vector, rx); + + if (dim->profile_ix >= ARRAY_SIZE(rx_profile)) + dim->profile_ix = ARRAY_SIZE(rx_profile) - 1; + + /* look up the values in our local table */ + itr = rx_profile[dim->profile_ix].itr; + intrl = rx_profile[dim->profile_ix].intrl; + + ice_write_itr(rc, itr); + ice_write_intrl(q_vector, intrl); + + dim->state = DIM_START_MEASURE; +} + /** * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI * @vsi: the VSI being configured @@ -5210,6 +5309,12 @@ static void ice_napi_enable_all(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, q_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; + INIT_WORK(&q_vector->tx.dim.work, ice_tx_dim_work); + q_vector->tx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + + INIT_WORK(&q_vector->rx.dim.work, ice_rx_dim_work); + q_vector->rx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + if (q_vector->rx.ring || q_vector->tx.ring) napi_enable(&q_vector->napi); } @@ -5618,6 +5723,9 @@ static void ice_napi_disable_all(struct ice_vsi *vsi) if (q_vector->rx.ring || q_vector->tx.ring) napi_disable(&q_vector->napi); + + cancel_work_sync(&q_vector->tx.dim.work); + cancel_work_sync(&q_vector->rx.dim.work); } } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index dfdf2c1fa9d3..97c3efb932dc 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1223,216 +1223,50 @@ construct_skb: } /** - * ice_adjust_itr_by_size_and_speed - Adjust ITR based on current traffic - * @port_info: port_info structure containing the current link speed - * @avg_pkt_size: average size of Tx or Rx packets based on clean routine - * @itr: ITR value to update + * ice_net_dim - Update net DIM algorithm + * @q_vector: the vector associated with the interrupt * - * Calculate how big of an increment should be applied to the ITR value passed - * in based on wmem_default, SKB overhead, ethernet overhead, and the current - * link speed. + * Create a DIM sample and notify net_dim() so that it can possibly decide + * a new ITR value based on incoming packets, bytes, and interrupts. * - * The following is a calculation derived from: - * wmem_default / (size + overhead) = desired_pkts_per_int - * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate - * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value - * - * Assuming wmem_default is 212992 and overhead is 640 bytes per - * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the - * formula down to: - * - * wmem_default * bits_per_byte * usecs_per_sec pkt_size + 24 - * ITR = -------------------------------------------- * -------------- - * rate pkt_size + 640 + * This function is a no-op if the ring is not configured to dynamic ITR. */ -static unsigned int -ice_adjust_itr_by_size_and_speed(struct ice_port_info *port_info, - unsigned int avg_pkt_size, - unsigned int itr) +static void ice_net_dim(struct ice_q_vector *q_vector) { - switch (port_info->phy.link_info.link_speed) { - case ICE_AQ_LINK_SPEED_100GB: - itr += DIV_ROUND_UP(17 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - case ICE_AQ_LINK_SPEED_50GB: - itr += DIV_ROUND_UP(34 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - case ICE_AQ_LINK_SPEED_40GB: - itr += DIV_ROUND_UP(43 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - case ICE_AQ_LINK_SPEED_25GB: - itr += DIV_ROUND_UP(68 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - case ICE_AQ_LINK_SPEED_20GB: - itr += DIV_ROUND_UP(85 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - case ICE_AQ_LINK_SPEED_10GB: - default: - itr += DIV_ROUND_UP(170 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - } + struct ice_ring_container *tx = &q_vector->tx; + struct ice_ring_container *rx = &q_vector->rx; - if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) { - itr &= ICE_ITR_ADAPTIVE_LATENCY; - itr += ICE_ITR_ADAPTIVE_MAX_USECS; - } + if (ITR_IS_DYNAMIC(tx->itr_setting)) { + struct dim_sample dim_sample = {}; + u64 packets = 0, bytes = 0; + struct ice_ring *ring; - return itr; -} - -/** - * ice_update_itr - update the adaptive ITR value based on statistics - * @q_vector: structure containing interrupt and ring information - * @rc: structure containing ring performance data - * - * Stores a new ITR value based on packets and byte - * counts during the last interrupt. The advantage of per interrupt - * computation is faster updates and more accurate ITR for the current - * traffic pattern. Constants in this function were computed - * based on theoretical maximum wire speed and thresholds were set based - * on testing data as well as attempting to minimize response time - * while increasing bulk throughput. - */ -static void -ice_update_itr(struct ice_q_vector *q_vector, struct ice_ring_container *rc) -{ - unsigned long next_update = jiffies; - unsigned int packets, bytes, itr; - bool container_is_rx; - - if (!rc->ring || !ITR_IS_DYNAMIC(rc->itr_setting)) - return; - - /* If itr_countdown is set it means we programmed an ITR within - * the last 4 interrupt cycles. This has a side effect of us - * potentially firing an early interrupt. In order to work around - * this we need to throw out any data received for a few - * interrupts following the update. - */ - if (q_vector->itr_countdown) { - itr = rc->target_itr; - goto clear_counts; - } - - container_is_rx = (&q_vector->rx == rc); - /* For Rx we want to push the delay up and default to low latency. - * for Tx we want to pull the delay down and default to high latency. - */ - itr = container_is_rx ? - ICE_ITR_ADAPTIVE_MIN_USECS | ICE_ITR_ADAPTIVE_LATENCY : - ICE_ITR_ADAPTIVE_MAX_USECS | ICE_ITR_ADAPTIVE_LATENCY; - - /* If we didn't update within up to 1 - 2 jiffies we can assume - * that either packets are coming in so slow there hasn't been - * any work, or that there is so much work that NAPI is dealing - * with interrupt moderation and we don't need to do anything. - */ - if (time_after(next_update, rc->next_update)) - goto clear_counts; - - prefetch(q_vector->vsi->port_info); - - packets = rc->total_pkts; - bytes = rc->total_bytes; - - if (container_is_rx) { - /* If Rx there are 1 to 4 packets and bytes are less than - * 9000 assume insufficient data to use bulk rate limiting - * approach unless Tx is already in bulk rate limiting. We - * are likely latency driven. - */ - if (packets && packets < 4 && bytes < 9000 && - (q_vector->tx.target_itr & ICE_ITR_ADAPTIVE_LATENCY)) { - itr = ICE_ITR_ADAPTIVE_LATENCY; - goto adjust_by_size_and_speed; + ice_for_each_ring(ring, q_vector->tx) { + packets += ring->stats.pkts; + bytes += ring->stats.bytes; } - } else if (packets < 4) { - /* If we have Tx and Rx ITR maxed and Tx ITR is running in - * bulk mode and we are receiving 4 or fewer packets just - * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so - * that the Rx can relax. - */ - if (rc->target_itr == ICE_ITR_ADAPTIVE_MAX_USECS && - (q_vector->rx.target_itr & ICE_ITR_MASK) == - ICE_ITR_ADAPTIVE_MAX_USECS) - goto clear_counts; - } else if (packets > 32) { - /* If we have processed over 32 packets in a single interrupt - * for Tx assume we need to switch over to "bulk" mode. - */ - rc->target_itr &= ~ICE_ITR_ADAPTIVE_LATENCY; + + dim_update_sample(q_vector->total_events, packets, bytes, + &dim_sample); + + net_dim(&tx->dim, dim_sample); } - /* We have no packets to actually measure against. This means - * either one of the other queues on this vector is active or - * we are a Tx queue doing TSO with too high of an interrupt rate. - * - * Between 4 and 56 we can assume that our current interrupt delay - * is only slightly too low. As such we should increase it by a small - * fixed amount. - */ - if (packets < 56) { - itr = rc->target_itr + ICE_ITR_ADAPTIVE_MIN_INC; - if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) { - itr &= ICE_ITR_ADAPTIVE_LATENCY; - itr += ICE_ITR_ADAPTIVE_MAX_USECS; + if (ITR_IS_DYNAMIC(rx->itr_setting)) { + struct dim_sample dim_sample = {}; + u64 packets = 0, bytes = 0; + struct ice_ring *ring; + + ice_for_each_ring(ring, q_vector->rx) { + packets += ring->stats.pkts; + bytes += ring->stats.bytes; } - goto clear_counts; + + dim_update_sample(q_vector->total_events, packets, bytes, + &dim_sample); + + net_dim(&rx->dim, dim_sample); } - - if (packets <= 256) { - itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); - itr &= ICE_ITR_MASK; - - /* Between 56 and 112 is our "goldilocks" zone where we are - * working out "just right". Just report that our current - * ITR is good for us. - */ - if (packets <= 112) - goto clear_counts; - - /* If packet count is 128 or greater we are likely looking - * at a slight overrun of the delay we want. Try halving - * our delay to see if that will cut the number of packets - * in half per interrupt. - */ - itr >>= 1; - itr &= ICE_ITR_MASK; - if (itr < ICE_ITR_ADAPTIVE_MIN_USECS) - itr = ICE_ITR_ADAPTIVE_MIN_USECS; - - goto clear_counts; - } - - /* The paths below assume we are dealing with a bulk ITR since - * number of packets is greater than 256. We are just going to have - * to compute a value and try to bring the count under control, - * though for smaller packet sizes there isn't much we can do as - * NAPI polling will likely be kicking in sooner rather than later. - */ - itr = ICE_ITR_ADAPTIVE_BULK; - -adjust_by_size_and_speed: - - /* based on checks above packets cannot be 0 so division is safe */ - itr = ice_adjust_itr_by_size_and_speed(q_vector->vsi->port_info, - bytes / packets, itr); - -clear_counts: - /* write back value */ - rc->target_itr = itr; - - /* next update should occur within next jiffy */ - rc->next_update = next_update + 1; - - rc->total_bytes = 0; - rc->total_pkts = 0; } /** @@ -1456,72 +1290,35 @@ static u32 ice_buildreg_itr(u16 itr_idx, u16 itr) (itr << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S)); } -/* The act of updating the ITR will cause it to immediately trigger. In order - * to prevent this from throwing off adaptive update statistics we defer the - * update so that it can only happen so often. So after either Tx or Rx are - * updated we make the adaptive scheme wait until either the ITR completely - * expires via the next_update expiration or we have been through at least - * 3 interrupts. - */ -#define ITR_COUNTDOWN_START 3 - /** - * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt - * @q_vector: q_vector for which ITR is being updated and interrupt enabled + * ice_update_ena_itr - Update ITR moderation and re-enable MSI-X interrupt + * @q_vector: the vector associated with the interrupt to enable + * + * Update the net_dim() algorithm and re-enable the interrupt associated with + * this vector. + * + * If the VSI is down, the interrupt will not be re-enabled. */ static void ice_update_ena_itr(struct ice_q_vector *q_vector) { - struct ice_ring_container *tx = &q_vector->tx; - struct ice_ring_container *rx = &q_vector->rx; struct ice_vsi *vsi = q_vector->vsi; u32 itr_val; - /* when exiting WB_ON_ITR just reset the countdown and let ITR - * resume it's normal "interrupts-enabled" path + if (test_bit(ICE_DOWN, vsi->state)) + return; + + /* When exiting WB_ON_ITR, let ITR resume its normal + * interrupts-enabled path. */ - if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) - q_vector->itr_countdown = 0; + if (q_vector->wb_on_itr) + q_vector->wb_on_itr = false; - /* This will do nothing if dynamic updates are not enabled */ - ice_update_itr(q_vector, tx); - ice_update_itr(q_vector, rx); + /* This will do nothing if dynamic updates are not enabled. */ + ice_net_dim(q_vector); - /* This block of logic allows us to get away with only updating - * one ITR value with each interrupt. The idea is to perform a - * pseudo-lazy update with the following criteria. - * - * 1. Rx is given higher priority than Tx if both are in same state - * 2. If we must reduce an ITR that is given highest priority. - * 3. We then give priority to increasing ITR based on amount. - */ - if (rx->target_itr < rx->current_itr) { - /* Rx ITR needs to be reduced, this is highest priority */ - itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr); - rx->current_itr = rx->target_itr; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - } else if ((tx->target_itr < tx->current_itr) || - ((rx->target_itr - rx->current_itr) < - (tx->target_itr - tx->current_itr))) { - /* Tx ITR needs to be reduced, this is second priority - * Tx ITR needs to be increased more than Rx, fourth priority - */ - itr_val = ice_buildreg_itr(tx->itr_idx, tx->target_itr); - tx->current_itr = tx->target_itr; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - } else if (rx->current_itr != rx->target_itr) { - /* Rx ITR needs to be increased, third priority */ - itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr); - rx->current_itr = rx->target_itr; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - } else { - /* Still have to re-enable the interrupts */ - itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); - if (q_vector->itr_countdown) - q_vector->itr_countdown--; - } - - if (!test_bit(ICE_VSI_DOWN, vsi->state)) - wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); + /* net_dim() updates ITR out-of-band using a work item */ + itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); + wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); } /** @@ -1543,7 +1340,7 @@ static void ice_set_wb_on_itr(struct ice_q_vector *q_vector) struct ice_vsi *vsi = q_vector->vsi; /* already in wb_on_itr mode no need to change it */ - if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) + if (q_vector->wb_on_itr) return; /* use previously set ITR values for all of the ITR indices by @@ -1555,7 +1352,7 @@ static void ice_set_wb_on_itr(struct ice_q_vector *q_vector) GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M | GLINT_DYN_CTL_WB_ON_ITR_M); - q_vector->itr_countdown = ICE_IN_WB_ON_ITR_MODE; + q_vector->wb_on_itr = true; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 701552d88bea..ae12b4e6453b 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -339,12 +339,8 @@ static inline bool ice_ring_is_xdp(struct ice_ring *ring) struct ice_ring_container { /* head of linked-list of rings */ struct ice_ring *ring; - unsigned long next_update; /* jiffies value of next queue update */ - unsigned int total_bytes; /* total bytes processed this int */ - unsigned int total_pkts; /* total packets processed this int */ + struct dim dim; /* data for net_dim algorithm */ u16 itr_idx; /* index in the interrupt vector */ - u16 target_itr; /* value in usecs divided by the hw->itr_gran */ - u16 current_itr; /* value in usecs divided by the hw->itr_gran */ /* high bit set means dynamic ITR, rest is used to store user * readable ITR value in usecs and must be converted before programming * to a register. From b7306b42beaf6abdbcb49849b5254ad06321abd1 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 31 Mar 2021 14:16:58 -0700 Subject: [PATCH 06/15] ice: manage interrupts during poll exit The driver would occasionally miss that there were outstanding descriptors to clean when exiting busy/napi poll. This issue has been in the code since the introduction of the ice driver. Attempt to "catch" any remaining work by triggering a software interrupt when exiting napi poll or busy-poll. This will not cause extra interrupts in the case of normal execution. This issue was found when running sfnt-pingpong, with busy poll enabled, and typically with larger I/O sizes like > 8192, the program would occasionally report > 1 second maximums to complete a ping pong. Signed-off-by: Jesse Brandeburg Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_hw_autogen.h | 1 + drivers/net/ethernet/intel/ice/ice_txrx.c | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 67b5b9b9d009..de38a0fc9665 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -130,6 +130,7 @@ #define GLINT_DYN_CTL_ITR_INDX_M ICE_M(0x3, 3) #define GLINT_DYN_CTL_INTERVAL_S 5 #define GLINT_DYN_CTL_INTERVAL_M ICE_M(0xFFF, 5) +#define GLINT_DYN_CTL_SW_ITR_INDX_ENA_M BIT(24) #define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, 25) #define GLINT_DYN_CTL_WB_ON_ITR_M BIT(30) #define GLINT_DYN_CTL_INTENA_MSK_M BIT(31) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 97c3efb932dc..1ce9ad02477d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1302,6 +1302,7 @@ static u32 ice_buildreg_itr(u16 itr_idx, u16 itr) static void ice_update_ena_itr(struct ice_q_vector *q_vector) { struct ice_vsi *vsi = q_vector->vsi; + bool wb_en = q_vector->wb_on_itr; u32 itr_val; if (test_bit(ICE_DOWN, vsi->state)) @@ -1310,7 +1311,7 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector) /* When exiting WB_ON_ITR, let ITR resume its normal * interrupts-enabled path. */ - if (q_vector->wb_on_itr) + if (wb_en) q_vector->wb_on_itr = false; /* This will do nothing if dynamic updates are not enabled. */ @@ -1318,6 +1319,16 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector) /* net_dim() updates ITR out-of-band using a work item */ itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); + /* trigger an immediate software interrupt when exiting + * busy poll, to make sure to catch any pending cleanups + * that might have been missed due to interrupt state + * transition. + */ + if (wb_en) { + itr_val |= GLINT_DYN_CTL_SWINT_TRIG_M | + GLINT_DYN_CTL_SW_ITR_INDX_M | + GLINT_DYN_CTL_SW_ITR_INDX_ENA_M; + } wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); } From d59684a07e37b06295e314301c9d0c04915a52f7 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 31 Mar 2021 14:16:59 -0700 Subject: [PATCH 07/15] ice: refactor ITR data structures Use a dedicated bitfield in order to both increase the amount of checking around the length of ITR writes as well as simplify the checks of dynamic mode. Basically unpack the "high bit means dynamic" logic into bitfields. Also, remove some unused ITR defines. Signed-off-by: Jesse Brandeburg Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_base.c | 3 ++ drivers/net/ethernet/intel/ice/ice_ethtool.c | 15 +++++----- drivers/net/ethernet/intel/ice/ice_lib.c | 7 ----- drivers/net/ethernet/intel/ice/ice_txrx.c | 4 +-- drivers/net/ethernet/intel/ice/ice_txrx.h | 30 +++++++++----------- 5 files changed, 26 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 21eb5d447d31..5985a7e5ca8a 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -113,6 +113,9 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) q_vector->v_idx = v_idx; q_vector->tx.itr_setting = ICE_DFLT_TX_ITR; q_vector->rx.itr_setting = ICE_DFLT_RX_ITR; + q_vector->tx.itr_mode = ITR_DYNAMIC; + q_vector->rx.itr_mode = ITR_DYNAMIC; + if (vsi->type == ICE_VSI_VF) goto out; /* only set affinity_mask if the CPU is online */ diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 6c057eb6cc45..68c8ad8d157e 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3510,13 +3510,13 @@ ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type, switch (c_type) { case ICE_RX_CONTAINER: - ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting); - ec->rx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC; + ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc); + ec->rx_coalesce_usecs = rc->itr_setting; ec->rx_coalesce_usecs_high = rc->ring->q_vector->intrl; break; case ICE_TX_CONTAINER: - ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting); - ec->tx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC; + ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc); + ec->tx_coalesce_usecs = rc->itr_setting; break; default: dev_dbg(ice_pf_to_dev(pf), "Invalid c_type %d\n", c_type); @@ -3661,7 +3661,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, return -EINVAL; } - itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC; + itr_setting = rc->itr_setting; if (coalesce_usecs != itr_setting && use_adaptive_coalesce) { netdev_info(vsi->netdev, "%s interrupt throttling cannot be changed if adaptive-%s is enabled\n", c_type_str, c_type_str); @@ -3675,8 +3675,9 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, } if (use_adaptive_coalesce) { - rc->itr_setting |= ICE_ITR_DYNAMIC; + rc->itr_mode = ITR_DYNAMIC; } else { + rc->itr_mode = ITR_STATIC; /* store user facing value how it was set */ rc->itr_setting = coalesce_usecs; /* write the change to the register */ @@ -3747,8 +3748,6 @@ ice_print_if_odd_usecs(struct net_device *netdev, u16 itr_setting, if (use_adaptive_coalesce) return; - itr_setting = ITR_TO_REG(itr_setting); - if (itr_setting != coalesce_usecs && (coalesce_usecs % 2)) netdev_info(netdev, "User set %s-usecs to %d, device only supports even values. Rounding down and attempting to set %s-usecs to %d\n", c_type_str, coalesce_usecs, c_type_str, diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index a080cf7bc2d1..45837e9eee86 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1816,10 +1816,6 @@ static void __ice_write_itr(struct ice_q_vector *q_vector, * ice_write_itr - write throttle rate to queue specific register * @rc: pointer to ring container * @itr: throttle rate in microseconds to write - * - * This function is resilient to having the 0x8000 bit set which - * is indicating that an ITR value is "DYNAMIC", and will write - * the correct value to the register. */ void ice_write_itr(struct ice_ring_container *rc, u16 itr) { @@ -1830,9 +1826,6 @@ void ice_write_itr(struct ice_ring_container *rc, u16 itr) q_vector = rc->ring->q_vector; - /* clear the "DYNAMIC" bit */ - itr = ITR_TO_REG(itr); - __ice_write_itr(q_vector, rc, itr); } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 1ce9ad02477d..e2b4b29ea207 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1236,7 +1236,7 @@ static void ice_net_dim(struct ice_q_vector *q_vector) struct ice_ring_container *tx = &q_vector->tx; struct ice_ring_container *rx = &q_vector->rx; - if (ITR_IS_DYNAMIC(tx->itr_setting)) { + if (ITR_IS_DYNAMIC(tx)) { struct dim_sample dim_sample = {}; u64 packets = 0, bytes = 0; struct ice_ring *ring; @@ -1252,7 +1252,7 @@ static void ice_net_dim(struct ice_q_vector *q_vector) net_dim(&tx->dim, dim_sample); } - if (ITR_IS_DYNAMIC(rx->itr_setting)) { + if (ITR_IS_DYNAMIC(rx)) { struct dim_sample dim_sample = {}; u64 packets = 0, bytes = 0; struct ice_ring *ring; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index ae12b4e6453b..c5a92ac787d6 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -223,23 +223,20 @@ enum ice_rx_dtype { #define ICE_TX_ITR ICE_IDX_ITR1 #define ICE_ITR_8K 124 #define ICE_ITR_20K 50 -#define ICE_ITR_MAX 8160 -#define ICE_DFLT_TX_ITR (ICE_ITR_20K | ICE_ITR_DYNAMIC) -#define ICE_DFLT_RX_ITR (ICE_ITR_20K | ICE_ITR_DYNAMIC) -#define ICE_ITR_DYNAMIC 0x8000 /* used as flag for itr_setting */ -#define ITR_IS_DYNAMIC(setting) (!!((setting) & ICE_ITR_DYNAMIC)) -#define ITR_TO_REG(setting) ((setting) & ~ICE_ITR_DYNAMIC) +#define ICE_ITR_MAX 8160 /* 0x1FE0 */ +#define ICE_DFLT_TX_ITR ICE_ITR_20K +#define ICE_DFLT_RX_ITR ICE_ITR_20K +enum ice_dynamic_itr { + ITR_STATIC = 0, + ITR_DYNAMIC = 1 +}; + +#define ITR_IS_DYNAMIC(rc) ((rc)->itr_mode == ITR_DYNAMIC) #define ICE_ITR_GRAN_S 1 /* ITR granularity is always 2us */ #define ICE_ITR_GRAN_US BIT(ICE_ITR_GRAN_S) #define ICE_ITR_MASK 0x1FFE /* ITR register value alignment mask */ #define ITR_REG_ALIGN(setting) ((setting) & ICE_ITR_MASK) -#define ICE_ITR_ADAPTIVE_MIN_INC 0x0002 -#define ICE_ITR_ADAPTIVE_MIN_USECS 0x0002 -#define ICE_ITR_ADAPTIVE_MAX_USECS 0x00FA -#define ICE_ITR_ADAPTIVE_LATENCY 0x8000 -#define ICE_ITR_ADAPTIVE_BULK 0x0000 - #define ICE_DFLT_INTRL 0 #define ICE_MAX_INTRL 236 @@ -341,11 +338,12 @@ struct ice_ring_container { struct ice_ring *ring; struct dim dim; /* data for net_dim algorithm */ u16 itr_idx; /* index in the interrupt vector */ - /* high bit set means dynamic ITR, rest is used to store user - * readable ITR value in usecs and must be converted before programming - * to a register. + /* this matches the maximum number of ITR bits, but in usec + * values, so it is shifted left one bit (bit zero is ignored) */ - u16 itr_setting; + u16 itr_setting:13; + u16 itr_reserved:2; + u16 itr_mode:1; }; struct ice_coalesce_stored { From e9c9692c8a81aacf0854f68ab54dc182f8be38e8 Mon Sep 17 00:00:00 2001 From: Scott W Taylor Date: Wed, 31 Mar 2021 14:17:00 -0700 Subject: [PATCH 08/15] ice: Reimplement module reads used by ethtool There was an excessive increment of the QSFP page, which is now fixed. Additionally, this new update now reads 8 bytes at a time and will retry each request if the module/bus is busy. Also, prevent reading from upper pages if module does not support those pages. Signed-off-by: Scott W Taylor Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 49 ++++++++++++++++---- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 68c8ad8d157e..d9ddd0bcf65f 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3914,30 +3914,33 @@ ice_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee, u8 *data) { struct ice_netdev_priv *np = netdev_priv(netdev); +#define SFF_READ_BLOCK_SIZE 8 + u8 value[SFF_READ_BLOCK_SIZE] = { 0 }; u8 addr = ICE_I2C_EEPROM_DEV_ADDR; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; enum ice_status status; bool is_sfp = false; - unsigned int i; + unsigned int i, j; u16 offset = 0; - u8 value = 0; u8 page = 0; if (!ee || !ee->len || !data) return -EINVAL; - status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, &value, 1, 0, + status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, value, 1, 0, NULL); if (status) return -EIO; - if (value == ICE_MODULE_TYPE_SFP) + if (value[0] == ICE_MODULE_TYPE_SFP) is_sfp = true; - for (i = 0; i < ee->len; i++) { + memset(data, 0, ee->len); + for (i = 0; i < ee->len; i += SFF_READ_BLOCK_SIZE) { offset = i + ee->offset; + page = 0; /* Check if we need to access the other memory page */ if (is_sfp) { @@ -3953,11 +3956,37 @@ ice_get_module_eeprom(struct net_device *netdev, } } - status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, !is_sfp, - &value, 1, 0, NULL); - if (status) - value = 0; - data[i] = value; + /* Bit 2 of EEPROM address 0x02 declares upper + * pages are disabled on QSFP modules. + * SFP modules only ever use page 0. + */ + if (page == 0 || !(data[0x2] & 0x4)) { + /* If i2c bus is busy due to slow page change or + * link management access, call can fail. This is normal. + * So we retry this a few times. + */ + for (j = 0; j < 4; j++) { + status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, + !is_sfp, value, + SFF_READ_BLOCK_SIZE, + 0, NULL); + netdev_dbg(netdev, "SFF %02X %02X %02X %X = %02X%02X%02X%02X.%02X%02X%02X%02X (%X)\n", + addr, offset, page, is_sfp, + value[0], value[1], value[2], value[3], + value[4], value[5], value[6], value[7], + status); + if (status) { + usleep_range(1500, 2500); + memset(value, 0, SFF_READ_BLOCK_SIZE); + continue; + } + break; + } + + /* Make sure we have enough room for the new block */ + if ((i + SFF_READ_BLOCK_SIZE) < ee->len) + memcpy(data + i, value, SFF_READ_BLOCK_SIZE); + } } return 0; } From 80ad6dde61894dd880c3690b33eebbbc813e0276 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 31 Mar 2021 14:17:01 -0700 Subject: [PATCH 09/15] ice: print name in /proc/iomem The driver previously printed it's PCI address in the name field for the pci resource, which when displayed via /proc/iomem, would print the same thing twice. It's more useful for debugging to see the driver name, as most other modules do. Here's a diff of before and after this change: 99100000-991fffff : 0000:3b:00.1 9a000000-a04fffff : PCI Bus 0000:3b 9a000000-9bffffff : 0000:3b:00.1 - 9a000000-9bffffff : 0000:3b:00.1 + 9a000000-9bffffff : ice 9c000000-9dffffff : 0000:3b:00.0 - 9c000000-9dffffff : 0000:3b:00.0 + 9c000000-9dffffff : ice 9e000000-9effffff : 0000:3b:00.1 9f000000-9fffffff : 0000:3b:00.0 a0000000-a000ffff : 0000:3b:00.1 Signed-off-by: Jesse Brandeburg Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 1e023d163447..02672be04f78 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4002,7 +4002,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) if (err) return err; - err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev)); + err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), dev_driver_string(dev)); if (err) { dev_err(dev, "BAR0 I/O map error %d\n", err); return err; From 58623c52b4278de6ed462e6f25402457ffbdd63f Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 31 Mar 2021 14:17:02 -0700 Subject: [PATCH 10/15] ice: use local for consistency Do a minor refactor on ice_vsi_rebuild to use a local variable to store vsi->type. Signed-off-by: Jesse Brandeburg Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 45837e9eee86..0443338f9eaf 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3014,6 +3014,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) struct ice_coalesce_stored *coalesce; int prev_num_q_vectors = 0; struct ice_vf *vf = NULL; + enum ice_vsi_type vtype; enum ice_status status; struct ice_pf *pf; int ret, i; @@ -3022,7 +3023,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) return -EINVAL; pf = vsi->back; - if (vsi->type == ICE_VSI_VF) + vtype = vsi->type; + if (vtype == ICE_VSI_VF) vf = &pf->vf[vsi->vf_id]; coalesce = kcalloc(vsi->num_q_vectors, @@ -3040,7 +3042,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) * many interrupts each VF needs. SR-IOV MSIX resources are also * cleared in the same manner. */ - if (vsi->type != ICE_VSI_VF) { + if (vtype != ICE_VSI_VF) { /* reclaim SW interrupts back to the common pool */ ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx); pf->num_avail_sw_msix += vsi->num_q_vectors; @@ -3055,7 +3057,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) ice_vsi_put_qs(vsi); ice_vsi_clear_rings(vsi); ice_vsi_free_arrays(vsi); - if (vsi->type == ICE_VSI_VF) + if (vtype == ICE_VSI_VF) ice_vsi_set_num_qs(vsi, vf->vf_id); else ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID); @@ -3074,7 +3076,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) if (ret < 0) goto err_vsi; - switch (vsi->type) { + switch (vtype) { case ICE_VSI_CTRL: case ICE_VSI_PF: ret = ice_vsi_alloc_q_vectors(vsi); @@ -3101,7 +3103,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) goto err_vectors; } /* ICE_VSI_CTRL does not need RSS so skip RSS processing */ - if (vsi->type != ICE_VSI_CTRL) + if (vtype != ICE_VSI_CTRL) /* Do not exit if configuring RSS had an issue, at * least receive traffic on first queue. Hence no * need to capture return value From 1cdea9a7eae3a976adc2735bc7ce62ac07cafcdb Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 31 Mar 2021 14:17:03 -0700 Subject: [PATCH 11/15] ice: remove unused struct member The only time you can ever have a rq_last_status is if a firmware event was somehow reporting a status on the receive queue, which are generally firmware initiated events or mailbox messages from a VF. Mostly this struct member was unused. Fix this problem by still printing the value of the field in a debug print, but don't store the value forever in a struct, potentially creating opportunities for callers to use the wrong struct member. Signed-off-by: Jesse Brandeburg Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_controlq.c | 6 +++--- drivers/net/ethernet/intel/ice/ice_controlq.h | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index 0f207a42ea77..87b33bdd4960 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -1097,6 +1097,7 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, struct ice_rq_event_info *e, u16 *pending) { u16 ntc = cq->rq.next_to_clean; + enum ice_aq_err rq_last_status; enum ice_status ret_code = 0; struct ice_aq_desc *desc; struct ice_dma_mem *bi; @@ -1130,13 +1131,12 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, desc = ICE_CTL_Q_DESC(cq->rq, ntc); desc_idx = ntc; - cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval); + rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval); flags = le16_to_cpu(desc->flags); if (flags & ICE_AQ_FLAG_ERR) { ret_code = ICE_ERR_AQ_ERROR; ice_debug(hw, ICE_DBG_AQ_MSG, "Control Receive Queue Event 0x%04X received with error 0x%X\n", - le16_to_cpu(desc->opcode), - cq->rq_last_status); + le16_to_cpu(desc->opcode), rq_last_status); } memcpy(&e->desc, desc, sizeof(e->desc)); datalen = le16_to_cpu(desc->datalen); diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index 77c2307d4fb8..fe75871e48ca 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -83,7 +83,6 @@ struct ice_rq_event_info { /* Control Queue information */ struct ice_ctl_q_info { enum ice_ctl_q qtype; - enum ice_aq_err rq_last_status; /* last status on receive queue */ struct ice_ctl_q_ring rq; /* receive queue */ struct ice_ctl_q_ring sq; /* send queue */ u32 sq_cmd_timeout; /* send queue cmd write back timeout */ From c931c782d8465c0408e14bf031e951134c30b059 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Wed, 31 Mar 2021 14:17:04 -0700 Subject: [PATCH 12/15] ice: Set vsi->vf_id as ICE_INVAL_VFID for non VF VSI types Currently the vsi->vf_id is set only for ICE_VSI_VF and it's left as 0 for all other VSI types. This is confusing and could be problematic since 0 is a valid vf_id. Fix this by always setting non VF VSI types to ICE_INVAL_VFID. Signed-off-by: Brett Creeley Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 0443338f9eaf..eb6e352d3387 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -158,6 +158,8 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) if (vsi->type == ICE_VSI_VF) vsi->vf_id = vf_id; + else + vsi->vf_id = ICE_INVAL_VFID; switch (vsi->type) { case ICE_VSI_PF: From b370245b4b95a07433a03e06eb3d32a01ded2c5d Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Wed, 31 Mar 2021 14:17:05 -0700 Subject: [PATCH 13/15] ice: suppress false cppcheck issues Silence false errors, warnings and style issues reported by cppcheck. Signed-off-by: Bruce Allan Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_flex_pipe.c | 3 +++ drivers/net/ethernet/intel/ice/ice_nvm.c | 1 + drivers/net/ethernet/intel/ice/ice_sched.c | 1 + 3 files changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 4b83960876f4..06ac9badee77 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -334,6 +334,7 @@ ice_boost_tcam_handler(u32 sect_type, void *section, u32 index, u32 *offset) if (sect_type != ICE_SID_RXPARSER_BOOST_TCAM) return NULL; + /* cppcheck-suppress nullPointer */ if (index > ICE_MAX_BST_TCAMS_IN_BUF) return NULL; @@ -404,6 +405,7 @@ ice_label_enum_handler(u32 __always_unused sect_type, void *section, u32 index, if (!section) return NULL; + /* cppcheck-suppress nullPointer */ if (index > ICE_MAX_LABELS_IN_BUF) return NULL; @@ -2067,6 +2069,7 @@ ice_match_prop_lst(struct list_head *list1, struct list_head *list2) count++; list_for_each_entry(tmp2, list2, list) chk_count++; + /* cppcheck-suppress knownConditionTrueFalse */ if (!count || count != chk_count) return false; diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index 75ccbfc07f99..fee37a5844cf 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -644,6 +644,7 @@ ice_get_orom_civd_data(struct ice_hw *hw, enum ice_bank_select bank, /* Verify that the simple checksum is zero */ for (i = 0; i < sizeof(tmp); i++) + /* cppcheck-suppress objectIndex */ sum += ((u8 *)&tmp)[i]; if (sum) { diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 97562051fe14..2f097637e405 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -988,6 +988,7 @@ ice_sched_add_nodes_to_layer(struct ice_port_info *pi, *num_nodes_added = 0; while (*num_nodes_added < num_nodes) { u16 max_child_nodes, num_added = 0; + /* cppcheck-suppress unusedVariable */ u32 temp; status = ice_sched_add_nodes_to_hw_layer(pi, tc_node, parent, From 4fe36226943b9ca99cf51573297b39644a1946d6 Mon Sep 17 00:00:00 2001 From: Paul M Stillwell Jr Date: Wed, 31 Mar 2021 14:17:07 -0700 Subject: [PATCH 14/15] ice: remove return variable We were saving the return value from ice_vsi_manage_rss_lut(), but the errors from that function are not critical so change it to return void and remove the code that saved the value. Signed-off-by: Paul M Stillwell Jr Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 8 +++----- drivers/net/ethernet/intel/ice/ice_lib.h | 2 +- drivers/net/ethernet/intel/ice/ice_main.c | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index eb6e352d3387..82e2ce23df3d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1313,14 +1313,13 @@ err_out: * LUT, while in the event of enable request for RSS, it will reconfigure RSS * LUT. */ -int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) +void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) { - int err = 0; u8 *lut; lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) - return -ENOMEM; + return; if (ena) { if (vsi->rss_lut_user) @@ -1330,9 +1329,8 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) vsi->rss_size); } - err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size); + ice_set_rss_lut(vsi, lut, vsi->rss_table_size); kfree(lut); - return err; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index f48c5ccb8036..511c2316c40c 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -85,7 +85,7 @@ void ice_vsi_free_rx_rings(struct ice_vsi *vsi); void ice_vsi_free_tx_rings(struct ice_vsi *vsi); -int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena); +void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena); void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 02672be04f78..6dbaa9099fdf 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5112,10 +5112,10 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) * separate if/else statements to guarantee each feature is checked */ if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH)) - ret = ice_vsi_manage_rss_lut(vsi, true); + ice_vsi_manage_rss_lut(vsi, true); else if (!(features & NETIF_F_RXHASH) && netdev->features & NETIF_F_RXHASH) - ret = ice_vsi_manage_rss_lut(vsi, false); + ice_vsi_manage_rss_lut(vsi, false); if ((features & NETIF_F_HW_VLAN_CTAG_RX) && !(netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) From 4c26f69d0cf966044ef0c31a87c2da68fc6d066a Mon Sep 17 00:00:00 2001 From: Paul M Stillwell Jr Date: Wed, 31 Mar 2021 14:17:08 -0700 Subject: [PATCH 15/15] ice: reduce scope of variable The scope of this variable can be reduced so do that. Signed-off-by: Paul M Stillwell Jr Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index a3d8d06b1e3f..e38d4adc5b8d 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -4234,7 +4234,6 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf) */ void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) { - struct pci_dev *vfdev; u16 vf_id; int pos; @@ -4243,6 +4242,8 @@ void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); if (pos) { + struct pci_dev *vfdev; + pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id); vfdev = pci_get_device(pdev->vendor, vf_id, NULL);