Merge branch 'net-generic-busy-polling'

Eric Dumazet says:

====================
net: extend busy polling support

This patch series extends busy polling range to tunnels devices,
and adds busy polling generic support to all NAPI drivers.

No need to provide ndo_busy_poll() method and extra synchronization
between ndo_busy_poll() and normal napi->poll() method.
This was proven very difficult and bug prone.

mlx5 driver is changed to support busy polling using this new method,
and a second mlx5 patch adds napi_complete_done() support and proper
SNMP accounting.

bnx2x and mlx4 drivers are converted to new infrastructure,
reducing kernel bloat and improving performance.

Latest patch, adding generic support, adds a new requirement :

 -free_netdev() and netif_napi_del() must be called from process context.

Since this might not be the case in some drivers, we might have to
either : fix the non conformant drivers (by disabling busy polling on them)
or revert this last patch.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2015-11-18 16:17:43 -05:00
commit 85c72ba1ed
41 changed files with 171 additions and 507 deletions

View File

@ -2024,7 +2024,6 @@ read_again:
skb->dev = netdev;
skb->protocol = eth_type_trans(skb, netdev);
skb_record_rx_queue(skb, channel->queue_index);
skb_mark_napi_id(skb, napi);
napi_gro_receive(napi, skb);

View File

@ -1216,7 +1216,7 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
/* Initialize SW view of the ring */
spin_lock_init(&ring->lock);
ring->priv = priv;
netif_napi_add(priv->netdev, &ring->napi, bcm_sysport_tx_poll, 64);
netif_tx_napi_add(priv->netdev, &ring->napi, bcm_sysport_tx_poll, 64);
ring->index = index;
ring->size = size;
ring->alloc_size = ring->size;

View File

@ -540,10 +540,6 @@ struct bnx2x_fastpath {
struct napi_struct napi;
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned long busy_poll_state;
#endif
union host_hc_status_block status_blk;
/* chip independent shortcuts into sb structure */
__le16 *sb_index_values;
@ -617,115 +613,6 @@ struct bnx2x_fastpath {
#define bnx2x_fp_stats(bp, fp) (&((bp)->fp_stats[(fp)->index]))
#define bnx2x_fp_qstats(bp, fp) (&((bp)->fp_stats[(fp)->index].eth_q_stats))
#ifdef CONFIG_NET_RX_BUSY_POLL
enum bnx2x_fp_state {
BNX2X_STATE_FP_NAPI = BIT(0), /* NAPI handler owns the queue */
BNX2X_STATE_FP_NAPI_REQ_BIT = 1, /* NAPI would like to own the queue */
BNX2X_STATE_FP_NAPI_REQ = BIT(1),
BNX2X_STATE_FP_POLL_BIT = 2,
BNX2X_STATE_FP_POLL = BIT(2), /* busy_poll owns the queue */
BNX2X_STATE_FP_DISABLE_BIT = 3, /* queue is dismantled */
};
static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp)
{
WRITE_ONCE(fp->busy_poll_state, 0);
}
/* called from the device poll routine to get ownership of a FP */
static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp)
{
unsigned long prev, old = READ_ONCE(fp->busy_poll_state);
while (1) {
switch (old) {
case BNX2X_STATE_FP_POLL:
/* make sure bnx2x_fp_lock_poll() wont starve us */
set_bit(BNX2X_STATE_FP_NAPI_REQ_BIT,
&fp->busy_poll_state);
/* fallthrough */
case BNX2X_STATE_FP_POLL | BNX2X_STATE_FP_NAPI_REQ:
return false;
default:
break;
}
prev = cmpxchg(&fp->busy_poll_state, old, BNX2X_STATE_FP_NAPI);
if (unlikely(prev != old)) {
old = prev;
continue;
}
return true;
}
}
static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
{
smp_wmb();
fp->busy_poll_state = 0;
}
/* called from bnx2x_low_latency_poll() */
static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
{
return cmpxchg(&fp->busy_poll_state, 0, BNX2X_STATE_FP_POLL) == 0;
}
static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
{
smp_mb__before_atomic();
clear_bit(BNX2X_STATE_FP_POLL_BIT, &fp->busy_poll_state);
}
/* true if a socket is polling */
static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
{
return READ_ONCE(fp->busy_poll_state) & BNX2X_STATE_FP_POLL;
}
/* false if fp is currently owned */
static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp)
{
set_bit(BNX2X_STATE_FP_DISABLE_BIT, &fp->busy_poll_state);
return !bnx2x_fp_ll_polling(fp);
}
#else
static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp)
{
}
static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp)
{
return true;
}
static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
{
}
static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
{
return false;
}
static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
{
}
static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
{
return false;
}
static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp)
{
return true;
}
#endif /* CONFIG_NET_RX_BUSY_POLL */
/* Use 2500 as a mini-jumbo MTU for FCoE */
#define BNX2X_FCOE_MINI_JUMBO_MTU 2500

View File

@ -46,7 +46,6 @@ static void bnx2x_add_all_napi_cnic(struct bnx2x *bp)
for_each_rx_queue_cnic(bp, i) {
netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi),
bnx2x_poll, NAPI_POLL_WEIGHT);
napi_hash_add(&bnx2x_fp(bp, i, napi));
}
}
@ -58,7 +57,6 @@ static void bnx2x_add_all_napi(struct bnx2x *bp)
for_each_eth_queue(bp, i) {
netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi),
bnx2x_poll, NAPI_POLL_WEIGHT);
napi_hash_add(&bnx2x_fp(bp, i, napi));
}
}
@ -1094,12 +1092,7 @@ reuse_rx:
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
le16_to_cpu(cqe_fp->vlan_tag));
skb_mark_napi_id(skb, &fp->napi);
if (bnx2x_fp_ll_polling(fp))
netif_receive_skb(skb);
else
napi_gro_receive(&fp->napi, skb);
napi_gro_receive(&fp->napi, skb);
next_rx:
rx_buf->data = NULL;
@ -1869,7 +1862,6 @@ static void bnx2x_napi_enable_cnic(struct bnx2x *bp)
int i;
for_each_rx_queue_cnic(bp, i) {
bnx2x_fp_busy_poll_init(&bp->fp[i]);
napi_enable(&bnx2x_fp(bp, i, napi));
}
}
@ -1879,7 +1871,6 @@ static void bnx2x_napi_enable(struct bnx2x *bp)
int i;
for_each_eth_queue(bp, i) {
bnx2x_fp_busy_poll_init(&bp->fp[i]);
napi_enable(&bnx2x_fp(bp, i, napi));
}
}
@ -1890,8 +1881,6 @@ static void bnx2x_napi_disable_cnic(struct bnx2x *bp)
for_each_rx_queue_cnic(bp, i) {
napi_disable(&bnx2x_fp(bp, i, napi));
while (!bnx2x_fp_ll_disable(&bp->fp[i]))
usleep_range(1000, 2000);
}
}
@ -1901,8 +1890,6 @@ static void bnx2x_napi_disable(struct bnx2x *bp)
for_each_eth_queue(bp, i) {
napi_disable(&bnx2x_fp(bp, i, napi));
while (!bnx2x_fp_ll_disable(&bp->fp[i]))
usleep_range(1000, 2000);
}
}
@ -3232,9 +3219,6 @@ static int bnx2x_poll(struct napi_struct *napi, int budget)
return 0;
}
#endif
if (!bnx2x_fp_lock_napi(fp))
return budget;
for_each_cos_in_tx_queue(fp, cos)
if (bnx2x_tx_queue_has_work(fp->txdata_ptr[cos]))
bnx2x_tx_int(bp, fp->txdata_ptr[cos]);
@ -3243,14 +3227,10 @@ static int bnx2x_poll(struct napi_struct *napi, int budget)
work_done += bnx2x_rx_int(fp, budget - work_done);
/* must not complete if we consumed full budget */
if (work_done >= budget) {
bnx2x_fp_unlock_napi(fp);
if (work_done >= budget)
break;
}
}
bnx2x_fp_unlock_napi(fp);
/* Fall out from the NAPI loop if needed */
if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
@ -3294,32 +3274,6 @@ static int bnx2x_poll(struct napi_struct *napi, int budget)
return work_done;
}
#ifdef CONFIG_NET_RX_BUSY_POLL
/* must be called with local_bh_disable()d */
int bnx2x_low_latency_recv(struct napi_struct *napi)
{
struct bnx2x_fastpath *fp = container_of(napi, struct bnx2x_fastpath,
napi);
struct bnx2x *bp = fp->bp;
int found = 0;
if ((bp->state == BNX2X_STATE_CLOSED) ||
(bp->state == BNX2X_STATE_ERROR) ||
(bp->dev->features & (NETIF_F_LRO | NETIF_F_GRO)))
return LL_FLUSH_FAILED;
if (!bnx2x_fp_lock_poll(fp))
return LL_FLUSH_BUSY;
if (bnx2x_has_rx_work(fp))
found = bnx2x_rx_int(fp, 4);
bnx2x_fp_unlock_poll(fp);
return found;
}
#endif
/* we split the first BD into headers and data BDs
* to ease the pain of our fellow microcode engineers
* we use one mapping for both BDs

View File

@ -569,13 +569,6 @@ int bnx2x_enable_msix(struct bnx2x *bp);
*/
int bnx2x_enable_msi(struct bnx2x *bp);
/**
* bnx2x_low_latency_recv - LL callback
*
* @napi: napi structure
*/
int bnx2x_low_latency_recv(struct napi_struct *napi);
/**
* bnx2x_alloc_mem_bp - allocate memories outsize main driver structure
*

View File

@ -13004,9 +13004,6 @@ static const struct net_device_ops bnx2x_netdev_ops = {
.ndo_fcoe_get_wwn = bnx2x_fcoe_get_wwn,
#endif
#ifdef CONFIG_NET_RX_BUSY_POLL
.ndo_busy_poll = bnx2x_low_latency_recv,
#endif
.ndo_get_phys_port_id = bnx2x_get_phys_port_id,
.ndo_set_vf_link_state = bnx2x_set_vf_link_state,
.ndo_features_check = bnx2x_features_check,

View File

@ -4227,12 +4227,10 @@ static void bnxt_init_napi(struct bnxt *bp)
bnapi = bp->bnapi[i];
netif_napi_add(bp->dev, &bnapi->napi,
bnxt_poll, 64);
napi_hash_add(&bnapi->napi);
}
} else {
bnapi = bp->bnapi[0];
netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll, 64);
napi_hash_add(&bnapi->napi);
}
}

View File

@ -2041,11 +2041,11 @@ static void bcmgenet_init_tx_napi(struct bcmgenet_priv *priv)
for (i = 0; i < priv->hw_params->tx_queues; ++i) {
ring = &priv->tx_rings[i];
netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64);
netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64);
}
ring = &priv->tx_rings[DESC_INDEX];
netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64);
netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64);
}
static void bcmgenet_enable_tx_napi(struct bcmgenet_priv *priv)

View File

@ -1864,7 +1864,6 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
skb->truesize += skb->data_len;
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb_record_rx_queue(skb, rxq->rspq.idx);
skb_mark_napi_id(skb, &rxq->rspq.napi);
pi = netdev_priv(skb->dev);
if (pi->rxtstamp)
cxgb4_sgetim_to_hwtstamp(adapter, skb_hwtstamps(skb),
@ -2528,7 +2527,6 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
goto err;
netif_napi_add(dev, &iq->napi, napi_rx_handler, 64);
napi_hash_add(&iq->napi);
iq->cur_desc = iq->desc;
iq->cidx = 0;
iq->gen = 1;

View File

@ -2458,13 +2458,11 @@ static int enic_dev_init(struct enic *enic)
switch (vnic_dev_get_intr_mode(enic->vdev)) {
default:
netif_napi_add(netdev, &enic->napi[0], enic_poll, 64);
napi_hash_add(&enic->napi[0]);
break;
case VNIC_DEV_INTR_MODE_MSIX:
for (i = 0; i < enic->rq_count; i++) {
netif_napi_add(netdev, &enic->napi[i],
enic_poll_msix_rq, NAPI_POLL_WEIGHT);
napi_hash_add(&enic->napi[i]);
}
for (i = 0; i < enic->wq_count; i++)
netif_napi_add(netdev, &enic->napi[enic_cq_wq(enic, i)],

View File

@ -2184,7 +2184,6 @@ static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
skb->csum_level = rxcp->tunneled;
skb_mark_napi_id(skb, napi);
if (rxcp->vlanf)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
@ -2631,7 +2630,6 @@ static int be_evt_queues_create(struct be_adapter *adapter)
eqo->affinity_mask);
netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
BE_NAPI_WEIGHT);
napi_hash_add(&eqo->napi);
}
return 0;
}

View File

@ -1050,7 +1050,7 @@ static int fs_enet_probe(struct platform_device *ofdev)
ndev->netdev_ops = &fs_enet_netdev_ops;
ndev->watchdog_timeo = 2 * HZ;
netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi, fpi->napi_weight);
netif_napi_add(ndev, &fep->napi_tx, fs_enet_tx_napi, 2);
netif_tx_napi_add(ndev, &fep->napi_tx, fs_enet_tx_napi, 2);
ndev->ethtool_ops = &fs_ethtool_ops;

View File

@ -1347,12 +1347,12 @@ static int gfar_probe(struct platform_device *ofdev)
if (priv->poll_mode == GFAR_SQ_POLLING) {
netif_napi_add(dev, &priv->gfargrp[i].napi_rx,
gfar_poll_rx_sq, GFAR_DEV_WEIGHT);
netif_napi_add(dev, &priv->gfargrp[i].napi_tx,
netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx,
gfar_poll_tx_sq, 2);
} else {
netif_napi_add(dev, &priv->gfargrp[i].napi_rx,
gfar_poll_rx, GFAR_DEV_WEIGHT);
netif_napi_add(dev, &priv->gfargrp[i].napi_tx,
netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx,
gfar_poll_tx, 2);
}
}

View File

@ -1632,7 +1632,6 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
continue;
}
#endif
skb_mark_napi_id(skb, &rx_ring->q_vector->napi);
i40e_receive_skb(rx_ring, skb, vlan_tag);
rx_desc->wb.qword1.status_error_len = 0;

View File

@ -1090,7 +1090,6 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
continue;
}
#endif
skb_mark_napi_id(skb, &rx_ring->q_vector->napi);
i40e_receive_skb(rx_ring, skb, vlan_tag);
rx_desc->wb.qword1.status_error_len = 0;

View File

@ -844,7 +844,6 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
/* initialize NAPI */
netif_napi_add(adapter->netdev, &q_vector->napi,
ixgbe_poll, 64);
napi_hash_add(&q_vector->napi);
#ifdef CONFIG_NET_RX_BUSY_POLL
/* initialize busy poll */

View File

@ -1659,6 +1659,7 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
struct sk_buff *skb)
{
skb_mark_napi_id(skb, &q_vector->napi);
if (ixgbe_qv_busy_polling(q_vector))
netif_receive_skb(skb);
else
@ -2123,7 +2124,6 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
}
#endif /* IXGBE_FCOE */
skb_mark_napi_id(skb, &q_vector->napi);
ixgbe_rx_skb(q_vector, skb);
/* update budget accounting */

View File

@ -2483,9 +2483,6 @@ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter)
q_vector->v_idx = q_idx;
netif_napi_add(adapter->netdev, &q_vector->napi,
ixgbevf_poll, 64);
#ifdef CONFIG_NET_RX_BUSY_POLL
napi_hash_add(&q_vector->napi);
#endif
adapter->q_vector[q_idx] = q_vector;
}

View File

@ -155,13 +155,11 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq;
cq->mcq.event = mlx4_en_cq_event;
if (cq->is_tx) {
netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq,
NAPI_POLL_WEIGHT);
} else {
if (cq->is_tx)
netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq,
NAPI_POLL_WEIGHT);
else
netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64);
napi_hash_add(&cq->napi);
}
napi_enable(&cq->napi);

View File

@ -337,11 +337,7 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset)
case ETH_SS_STATS:
return bitmap_iterator_count(&it) +
(priv->tx_ring_num * 2) +
#ifdef CONFIG_NET_RX_BUSY_POLL
(priv->rx_ring_num * 5);
#else
(priv->rx_ring_num * 2);
#endif
case ETH_SS_TEST:
return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags
& MLX4_DEV_CAP_FLAG_UC_LOOPBACK) * 2;
@ -408,11 +404,6 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
for (i = 0; i < priv->rx_ring_num; i++) {
data[index++] = priv->rx_ring[i]->packets;
data[index++] = priv->rx_ring[i]->bytes;
#ifdef CONFIG_NET_RX_BUSY_POLL
data[index++] = priv->rx_ring[i]->yields;
data[index++] = priv->rx_ring[i]->misses;
data[index++] = priv->rx_ring[i]->cleaned;
#endif
}
spin_unlock_bh(&priv->stats_lock);
@ -486,14 +477,6 @@ static void mlx4_en_get_strings(struct net_device *dev,
"rx%d_packets", i);
sprintf(data + (index++) * ETH_GSTRING_LEN,
"rx%d_bytes", i);
#ifdef CONFIG_NET_RX_BUSY_POLL
sprintf(data + (index++) * ETH_GSTRING_LEN,
"rx%d_napi_yield", i);
sprintf(data + (index++) * ETH_GSTRING_LEN,
"rx%d_misses", i);
sprintf(data + (index++) * ETH_GSTRING_LEN,
"rx%d_cleaned", i);
#endif
}
break;
case ETH_SS_PRIV_FLAGS:

View File

@ -69,34 +69,6 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up)
return 0;
}
#ifdef CONFIG_NET_RX_BUSY_POLL
/* must be called with local_bh_disable()d */
static int mlx4_en_low_latency_recv(struct napi_struct *napi)
{
struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
struct net_device *dev = cq->dev;
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring];
int done;
if (!priv->port_up)
return LL_FLUSH_FAILED;
if (!mlx4_en_cq_lock_poll(cq))
return LL_FLUSH_BUSY;
done = mlx4_en_process_rx_cq(dev, cq, 4);
if (likely(done))
rx_ring->cleaned += done;
else
rx_ring->misses++;
mlx4_en_cq_unlock_poll(cq);
return done;
}
#endif /* CONFIG_NET_RX_BUSY_POLL */
#ifdef CONFIG_RFS_ACCEL
struct mlx4_en_filter {
@ -1561,8 +1533,6 @@ int mlx4_en_start_port(struct net_device *dev)
for (i = 0; i < priv->rx_ring_num; i++) {
cq = priv->rx_cq[i];
mlx4_en_cq_init_lock(cq);
err = mlx4_en_init_affinity_hint(priv, i);
if (err) {
en_err(priv, "Failed preparing IRQ affinity hint\n");
@ -1859,13 +1829,6 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
for (i = 0; i < priv->rx_ring_num; i++) {
struct mlx4_en_cq *cq = priv->rx_cq[i];
local_bh_disable();
while (!mlx4_en_cq_lock_napi(cq)) {
pr_info("CQ %d locked\n", i);
mdelay(1);
}
local_bh_enable();
napi_synchronize(&cq->napi);
mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]);
mlx4_en_deactivate_cq(priv, cq);
@ -2503,9 +2466,6 @@ static const struct net_device_ops mlx4_netdev_ops = {
.ndo_setup_tc = mlx4_en_setup_tc,
#ifdef CONFIG_RFS_ACCEL
.ndo_rx_flow_steer = mlx4_en_filter_rfs,
#endif
#ifdef CONFIG_NET_RX_BUSY_POLL
.ndo_busy_poll = mlx4_en_low_latency_recv,
#endif
.ndo_get_phys_port_id = mlx4_en_get_phys_port_id,
#ifdef CONFIG_MLX4_EN_VXLAN

View File

@ -873,10 +873,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
* - TCP/IP (v4)
* - without IP options
* - not an IP fragment
* - no LLS polling in progress
*/
if (!mlx4_en_cq_busy_polling(cq) &&
(dev->features & NETIF_F_GRO)) {
if (dev->features & NETIF_F_GRO) {
struct sk_buff *gro_skb = napi_get_frags(&cq->napi);
if (!gro_skb)
goto next;
@ -927,7 +925,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
PKT_HASH_TYPE_L3);
skb_record_rx_queue(gro_skb, cq->ring);
skb_mark_napi_id(gro_skb, &cq->napi);
if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) {
timestamp = mlx4_en_get_cqe_ts(cqe);
@ -990,13 +987,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
timestamp);
}
skb_mark_napi_id(skb, &cq->napi);
if (!mlx4_en_cq_busy_polling(cq))
napi_gro_receive(&cq->napi, skb);
else
netif_receive_skb(skb);
napi_gro_receive(&cq->napi, skb);
next:
for (nr = 0; nr < priv->num_frags; nr++)
mlx4_en_free_frag(priv, frags, nr);
@ -1038,13 +1029,8 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
struct mlx4_en_priv *priv = netdev_priv(dev);
int done;
if (!mlx4_en_cq_lock_napi(cq))
return budget;
done = mlx4_en_process_rx_cq(dev, cq, budget);
mlx4_en_cq_unlock_napi(cq);
/* If we used up all the quota - we're probably not done yet... */
if (done == budget) {
const struct cpumask *aff;

View File

@ -320,11 +320,6 @@ struct mlx4_en_rx_ring {
void *rx_info;
unsigned long bytes;
unsigned long packets;
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned long yields;
unsigned long misses;
unsigned long cleaned;
#endif
unsigned long csum_ok;
unsigned long csum_none;
unsigned long csum_complete;
@ -347,18 +342,6 @@ struct mlx4_en_cq {
struct mlx4_cqe *buf;
#define MLX4_EN_OPCODE_ERROR 0x1e
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int state;
#define MLX4_EN_CQ_STATE_IDLE 0
#define MLX4_EN_CQ_STATE_NAPI 1 /* NAPI owns this CQ */
#define MLX4_EN_CQ_STATE_POLL 2 /* poll owns this CQ */
#define MLX4_CQ_LOCKED (MLX4_EN_CQ_STATE_NAPI | MLX4_EN_CQ_STATE_POLL)
#define MLX4_EN_CQ_STATE_NAPI_YIELD 4 /* NAPI yielded this CQ */
#define MLX4_EN_CQ_STATE_POLL_YIELD 8 /* poll yielded this CQ */
#define CQ_YIELD (MLX4_EN_CQ_STATE_NAPI_YIELD | MLX4_EN_CQ_STATE_POLL_YIELD)
#define CQ_USER_PEND (MLX4_EN_CQ_STATE_POLL | MLX4_EN_CQ_STATE_POLL_YIELD)
spinlock_t poll_lock; /* protects from LLS/napi conflicts */
#endif /* CONFIG_NET_RX_BUSY_POLL */
struct irq_desc *irq_desc;
};
@ -622,115 +605,6 @@ static inline struct mlx4_cqe *mlx4_en_get_cqe(void *buf, int idx, int cqe_sz)
return buf + idx * cqe_sz;
}
#ifdef CONFIG_NET_RX_BUSY_POLL
static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq)
{
spin_lock_init(&cq->poll_lock);
cq->state = MLX4_EN_CQ_STATE_IDLE;
}
/* called from the device poll rutine to get ownership of a cq */
static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq)
{
int rc = true;
spin_lock(&cq->poll_lock);
if (cq->state & MLX4_CQ_LOCKED) {
WARN_ON(cq->state & MLX4_EN_CQ_STATE_NAPI);
cq->state |= MLX4_EN_CQ_STATE_NAPI_YIELD;
rc = false;
} else
/* we don't care if someone yielded */
cq->state = MLX4_EN_CQ_STATE_NAPI;
spin_unlock(&cq->poll_lock);
return rc;
}
/* returns true is someone tried to get the cq while napi had it */
static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq)
{
int rc = false;
spin_lock(&cq->poll_lock);
WARN_ON(cq->state & (MLX4_EN_CQ_STATE_POLL |
MLX4_EN_CQ_STATE_NAPI_YIELD));
if (cq->state & MLX4_EN_CQ_STATE_POLL_YIELD)
rc = true;
cq->state = MLX4_EN_CQ_STATE_IDLE;
spin_unlock(&cq->poll_lock);
return rc;
}
/* called from mlx4_en_low_latency_poll() */
static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq)
{
int rc = true;
spin_lock_bh(&cq->poll_lock);
if ((cq->state & MLX4_CQ_LOCKED)) {
struct net_device *dev = cq->dev;
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring];
cq->state |= MLX4_EN_CQ_STATE_POLL_YIELD;
rc = false;
rx_ring->yields++;
} else
/* preserve yield marks */
cq->state |= MLX4_EN_CQ_STATE_POLL;
spin_unlock_bh(&cq->poll_lock);
return rc;
}
/* returns true if someone tried to get the cq while it was locked */
static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq)
{
int rc = false;
spin_lock_bh(&cq->poll_lock);
WARN_ON(cq->state & (MLX4_EN_CQ_STATE_NAPI));
if (cq->state & MLX4_EN_CQ_STATE_POLL_YIELD)
rc = true;
cq->state = MLX4_EN_CQ_STATE_IDLE;
spin_unlock_bh(&cq->poll_lock);
return rc;
}
/* true if a socket is polling, even if it did not get the lock */
static inline bool mlx4_en_cq_busy_polling(struct mlx4_en_cq *cq)
{
WARN_ON(!(cq->state & MLX4_CQ_LOCKED));
return cq->state & CQ_USER_PEND;
}
#else
static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq)
{
}
static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq)
{
return true;
}
static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq)
{
return false;
}
static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq)
{
return false;
}
static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq)
{
return false;
}
static inline bool mlx4_en_cq_busy_polling(struct mlx4_en_cq *cq)
{
return false;
}
#endif /* CONFIG_NET_RX_BUSY_POLL */
#define MLX4_EN_WOL_DO_MODIFY (1ULL << 63)
void mlx4_en_update_loopback_state(struct net_device *dev,

View File

@ -564,7 +564,7 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq);
void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
int mlx5e_napi_poll(struct napi_struct *napi, int budget);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq);
bool mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);

View File

@ -1020,6 +1020,7 @@ err_close_tx_cqs:
err_napi_del:
netif_napi_del(&c->napi);
napi_hash_del(&c->napi);
kfree(c);
return err;
@ -1033,6 +1034,10 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
mlx5e_close_cq(&c->rq.cq);
mlx5e_close_tx_cqs(c);
netif_napi_del(&c->napi);
napi_hash_del(&c->napi);
synchronize_rcu();
kfree(c);
}

View File

@ -33,6 +33,7 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <net/busy_poll.h>
#include "en.h"
static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq,
@ -215,16 +216,16 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
be16_to_cpu(cqe->vlan_info));
}
bool mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
{
struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
int i;
int work_done;
/* avoid accessing cq (dma coherent memory) if not needed */
if (!test_and_clear_bit(MLX5E_CQ_HAS_CQES, &cq->flags))
return false;
return 0;
for (i = 0; i < budget; i++) {
for (work_done = 0; work_done < budget; work_done++) {
struct mlx5e_rx_wqe *wqe;
struct mlx5_cqe64 *cqe;
struct sk_buff *skb;
@ -269,10 +270,8 @@ wq_ll_pop:
/* ensure cq space is freed before enabling more cqes */
wmb();
if (i == budget) {
if (work_done == budget)
set_bit(MLX5E_CQ_HAS_CQES, &cq->flags);
return true;
}
return false;
return work_done;
}

View File

@ -54,6 +54,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
napi);
bool busy = false;
int work_done;
int i;
clear_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags);
@ -61,26 +62,26 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
for (i = 0; i < c->num_tc; i++)
busy |= mlx5e_poll_tx_cq(&c->sq[i].cq);
busy |= mlx5e_poll_rx_cq(&c->rq.cq, budget);
work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget);
busy |= work_done == budget;
busy |= mlx5e_post_rx_wqes(&c->rq);
if (busy)
return budget;
napi_complete(napi);
napi_complete_done(napi, work_done);
/* avoid losing completion event during/after polling cqs */
if (test_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags)) {
napi_schedule(napi);
return 0;
return work_done;
}
for (i = 0; i < c->num_tc; i++)
mlx5e_cq_arm(&c->sq[i].cq);
mlx5e_cq_arm(&c->rq.cq);
return 0;
return work_done;
}
void mlx5e_completion_event(struct mlx5_core_cq *mcq)

View File

@ -1488,7 +1488,6 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum)
}
myri10ge_vlan_rx(mgp->dev, va, skb);
skb_record_rx_queue(skb, ss - &mgp->ss[0]);
skb_mark_napi_id(skb, &ss->napi);
if (polling) {
int hlen;
@ -1506,6 +1505,7 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum)
skb->data_len -= hlen;
skb->tail += hlen;
skb->protocol = eth_type_trans(skb, dev);
skb_mark_napi_id(skb, &ss->napi);
netif_receive_skb(skb);
}
else
@ -3814,7 +3814,6 @@ static int myri10ge_alloc_slices(struct myri10ge_priv *mgp)
ss->dev = mgp->dev;
netif_napi_add(ss->dev, &ss->napi, myri10ge_poll,
myri10ge_napi_weight);
napi_hash_add(&ss->napi);
}
return 0;
abort:

View File

@ -1604,7 +1604,7 @@ int qlcnic_82xx_napi_add(struct qlcnic_adapter *adapter,
if (qlcnic_check_multi_tx(adapter) && !adapter->ahw->diag_test) {
for (ring = 0; ring < adapter->drv_tx_rings; ring++) {
tx_ring = &adapter->tx_ring[ring];
netif_napi_add(netdev, &tx_ring->napi, qlcnic_tx_poll,
netif_tx_napi_add(netdev, &tx_ring->napi, qlcnic_tx_poll,
NAPI_POLL_WEIGHT);
}
}
@ -2135,7 +2135,7 @@ int qlcnic_83xx_napi_add(struct qlcnic_adapter *adapter,
!(adapter->flags & QLCNIC_TX_INTR_SHARED)) {
for (ring = 0; ring < adapter->drv_tx_rings; ring++) {
tx_ring = &adapter->tx_ring[ring];
netif_napi_add(netdev, &tx_ring->napi,
netif_tx_napi_add(netdev, &tx_ring->napi,
qlcnic_83xx_msix_tx_poll,
NAPI_POLL_WEIGHT);
}

View File

@ -4998,7 +4998,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
dev->netdev_ops = &rocker_port_netdev_ops;
dev->ethtool_ops = &rocker_port_ethtool_ops;
dev->switchdev_ops = &rocker_port_switchdev_ops;
netif_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx,
netif_tx_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx,
NAPI_POLL_WEIGHT);
netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx,
NAPI_POLL_WEIGHT);

View File

@ -2059,7 +2059,6 @@ static void efx_init_napi_channel(struct efx_channel *channel)
channel->napi_dev = efx->net_dev;
netif_napi_add(channel->napi_dev, &channel->napi_str,
efx_poll, napi_weight);
napi_hash_add(&channel->napi_str);
efx_channel_busy_poll_init(channel);
}

View File

@ -463,7 +463,6 @@ efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
skb_record_rx_queue(skb, channel->rx_queue.core_index);
skb_mark_napi_id(skb, &channel->napi_str);
gro_result = napi_gro_frags(napi);
if (gro_result != GRO_DROP)
channel->irq_mod_score += 2;

View File

@ -2469,7 +2469,7 @@ static int cpsw_probe(struct platform_device *pdev)
ndev->netdev_ops = &cpsw_netdev_ops;
ndev->ethtool_ops = &cpsw_ethtool_ops;
netif_napi_add(ndev, &priv->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT);
netif_napi_add(ndev, &priv->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT);
netif_tx_napi_add(ndev, &priv->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT);
/* register the network device */
SET_NETDEV_DEV(ndev, &pdev->dev);

View File

@ -1990,7 +1990,7 @@ static int netcp_create_interface(struct netcp_device *netcp_device,
/* NAPI register */
netif_napi_add(ndev, &netcp->rx_napi, netcp_rx_poll, NETCP_NAPI_WEIGHT);
netif_napi_add(ndev, &netcp->tx_napi, netcp_tx_poll, NETCP_NAPI_WEIGHT);
netif_tx_napi_add(ndev, &netcp->tx_napi, netcp_tx_poll, NETCP_NAPI_WEIGHT);
/* Register the network device */
ndev->dev_id = 0;

View File

@ -516,8 +516,6 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
skb_shinfo(skb)->gso_segs = 0;
}
skb_mark_napi_id(skb, &rq->napi);
napi_gro_receive(&rq->napi, skb);
return;
@ -1612,7 +1610,6 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
vi->rq[i].pages = NULL;
netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
napi_weight);
napi_hash_add(&vi->rq[i].napi);
sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);

View File

@ -183,7 +183,7 @@ void *wil_if_alloc(struct device *dev)
netif_napi_add(ndev, &wil->napi_rx, wil6210_netdev_poll_rx,
WIL6210_NAPI_BUDGET);
netif_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx,
netif_tx_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx,
WIL6210_NAPI_BUDGET);
netif_tx_stop_all_queues(ndev);

View File

@ -16,6 +16,10 @@
struct hlist_head name[1 << (bits)] = \
{ [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }
#define DEFINE_READ_MOSTLY_HASHTABLE(name, bits) \
struct hlist_head name[1 << (bits)] __read_mostly = \
{ [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }
#define DECLARE_HASHTABLE(name, bits) \
struct hlist_head name[1 << (bits)]

View File

@ -326,7 +326,8 @@ enum {
NAPI_STATE_SCHED, /* Poll is scheduled */
NAPI_STATE_DISABLE, /* Disable pending */
NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
NAPI_STATE_HASHED, /* In NAPI hash */
NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
};
enum gro_result {
@ -460,20 +461,14 @@ static inline void napi_complete(struct napi_struct *n)
return napi_complete_done(n, 0);
}
/**
* napi_by_id - lookup a NAPI by napi_id
* @napi_id: hashed napi_id
*
* lookup @napi_id in napi_hash table
* must be called under rcu_read_lock()
*/
struct napi_struct *napi_by_id(unsigned int napi_id);
/**
* napi_hash_add - add a NAPI to global hashtable
* @napi: napi context
*
* generate a new napi_id and store a @napi under it in napi_hash
* Used for busy polling (CONFIG_NET_RX_BUSY_POLL)
* Note: This is normally automatically done from netif_napi_add(),
* so might disappear in a future linux version.
*/
void napi_hash_add(struct napi_struct *napi);
@ -482,9 +477,14 @@ void napi_hash_add(struct napi_struct *napi);
* @napi: napi context
*
* Warning: caller must observe rcu grace period
* before freeing memory containing @napi
* before freeing memory containing @napi, if
* this function returns true.
* Note: core networking stack automatically calls it
* from netif_napi_del()
* Drivers might want to call this helper to combine all
* the needed rcu grace periods into a single one.
*/
void napi_hash_del(struct napi_struct *napi);
bool napi_hash_del(struct napi_struct *napi);
/**
* napi_disable - prevent NAPI from scheduling
@ -1947,6 +1947,26 @@ static inline void *netdev_priv(const struct net_device *dev)
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight);
/**
* netif_tx_napi_add - initialize a napi context
* @dev: network device
* @napi: napi context
* @poll: polling function
* @weight: default weight
*
* This variant of netif_napi_add() should be used from drivers using NAPI
* to exclusively poll a TX queue.
* This will avoid we add it into napi_hash[], thus polluting this hash table.
*/
static inline void netif_tx_napi_add(struct net_device *dev,
struct napi_struct *napi,
int (*poll)(struct napi_struct *, int),
int weight)
{
set_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state);
netif_napi_add(dev, napi, poll, weight);
}
/**
* netif_napi_del - remove a napi context
* @napi: napi context

View File

@ -1082,9 +1082,6 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
static inline void skb_sender_cpu_clear(struct sk_buff *skb)
{
#ifdef CONFIG_XPS
skb->sender_cpu = 0;
#endif
}
#ifdef NET_SKBUFF_DATA_USES_OFFSET

View File

@ -72,50 +72,7 @@ static inline bool busy_loop_timeout(unsigned long end_time)
return time_after(now, end_time);
}
/* when used in sock_poll() nonblock is known at compile time to be true
* so the loop and end_time will be optimized out
*/
static inline bool sk_busy_loop(struct sock *sk, int nonblock)
{
unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
const struct net_device_ops *ops;
struct napi_struct *napi;
int rc = false;
/*
* rcu read lock for napi hash
* bh so we don't race with net_rx_action
*/
rcu_read_lock_bh();
napi = napi_by_id(sk->sk_napi_id);
if (!napi)
goto out;
ops = napi->dev->netdev_ops;
if (!ops->ndo_busy_poll)
goto out;
do {
rc = ops->ndo_busy_poll(napi);
if (rc == LL_FLUSH_FAILED)
break; /* permanent failure */
if (rc > 0)
/* local bh are disabled so it is ok to use _BH */
NET_ADD_STATS_BH(sock_net(sk),
LINUX_MIB_BUSYPOLLRXPACKETS, rc);
cpu_relax();
} while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
!need_resched() && !busy_loop_timeout(end_time));
rc = !skb_queue_empty(&sk->sk_receive_queue);
out:
rcu_read_unlock_bh();
return rc;
}
bool sk_busy_loop(struct sock *sk, int nonblock);
/* used in the NIC receive handler to mark the skb */
static inline void skb_mark_napi_id(struct sk_buff *skb,

View File

@ -96,6 +96,7 @@
#include <linux/skbuff.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/busy_poll.h>
#include <linux/rtnetlink.h>
#include <linux/stat.h>
#include <net/dst.h>
@ -182,8 +183,8 @@ EXPORT_SYMBOL(dev_base_lock);
/* protects napi_hash addition/deletion and napi_gen_id */
static DEFINE_SPINLOCK(napi_hash_lock);
static unsigned int napi_gen_id;
static DEFINE_HASHTABLE(napi_hash, 8);
static unsigned int napi_gen_id = NR_CPUS;
static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
static seqcount_t devnet_rename_seq;
@ -3021,7 +3022,9 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
int queue_index = 0;
#ifdef CONFIG_XPS
if (skb->sender_cpu == 0)
u32 sender_cpu = skb->sender_cpu - 1;
if (sender_cpu >= (u32)NR_CPUS)
skb->sender_cpu = raw_smp_processor_id() + 1;
#endif
@ -4353,6 +4356,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
skb_mark_napi_id(skb, napi);
trace_napi_gro_receive_entry(skb);
skb_gro_reset_offset(skb);
@ -4387,6 +4391,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
if (!skb) {
skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
napi->skb = skb;
skb_mark_napi_id(skb, napi);
}
return skb;
}
@ -4661,7 +4666,7 @@ void napi_complete_done(struct napi_struct *n, int work_done)
EXPORT_SYMBOL(napi_complete_done);
/* must be called under rcu_read_lock(), as we dont take a reference */
struct napi_struct *napi_by_id(unsigned int napi_id)
static struct napi_struct *napi_by_id(unsigned int napi_id)
{
unsigned int hash = napi_id % HASH_SIZE(napi_hash);
struct napi_struct *napi;
@ -4672,43 +4677,101 @@ struct napi_struct *napi_by_id(unsigned int napi_id)
return NULL;
}
EXPORT_SYMBOL_GPL(napi_by_id);
#if defined(CONFIG_NET_RX_BUSY_POLL)
#define BUSY_POLL_BUDGET 8
bool sk_busy_loop(struct sock *sk, int nonblock)
{
unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
int (*busy_poll)(struct napi_struct *dev);
struct napi_struct *napi;
int rc = false;
rcu_read_lock();
napi = napi_by_id(sk->sk_napi_id);
if (!napi)
goto out;
/* Note: ndo_busy_poll method is optional in linux-4.5 */
busy_poll = napi->dev->netdev_ops->ndo_busy_poll;
do {
rc = 0;
local_bh_disable();
if (busy_poll) {
rc = busy_poll(napi);
} else if (napi_schedule_prep(napi)) {
void *have = netpoll_poll_lock(napi);
if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
rc = napi->poll(napi, BUSY_POLL_BUDGET);
trace_napi_poll(napi);
if (rc == BUSY_POLL_BUDGET) {
napi_complete_done(napi, rc);
napi_schedule(napi);
}
}
netpoll_poll_unlock(have);
}
if (rc > 0)
NET_ADD_STATS_BH(sock_net(sk),
LINUX_MIB_BUSYPOLLRXPACKETS, rc);
local_bh_enable();
if (rc == LL_FLUSH_FAILED)
break; /* permanent failure */
cpu_relax();
} while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
!need_resched() && !busy_loop_timeout(end_time));
rc = !skb_queue_empty(&sk->sk_receive_queue);
out:
rcu_read_unlock();
return rc;
}
EXPORT_SYMBOL(sk_busy_loop);
#endif /* CONFIG_NET_RX_BUSY_POLL */
void napi_hash_add(struct napi_struct *napi)
{
if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {
if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||
test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
return;
spin_lock(&napi_hash_lock);
spin_lock(&napi_hash_lock);
/* 0 is not a valid id, we also skip an id that is taken
* we expect both events to be extremely rare
*/
napi->napi_id = 0;
while (!napi->napi_id) {
napi->napi_id = ++napi_gen_id;
if (napi_by_id(napi->napi_id))
napi->napi_id = 0;
}
/* 0..NR_CPUS+1 range is reserved for sender_cpu use */
do {
if (unlikely(++napi_gen_id < NR_CPUS + 1))
napi_gen_id = NR_CPUS + 1;
} while (napi_by_id(napi_gen_id));
napi->napi_id = napi_gen_id;
hlist_add_head_rcu(&napi->napi_hash_node,
&napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
hlist_add_head_rcu(&napi->napi_hash_node,
&napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
spin_unlock(&napi_hash_lock);
}
spin_unlock(&napi_hash_lock);
}
EXPORT_SYMBOL_GPL(napi_hash_add);
/* Warning : caller is responsible to make sure rcu grace period
* is respected before freeing memory containing @napi
*/
void napi_hash_del(struct napi_struct *napi)
bool napi_hash_del(struct napi_struct *napi)
{
bool rcu_sync_needed = false;
spin_lock(&napi_hash_lock);
if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))
if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) {
rcu_sync_needed = true;
hlist_del_rcu(&napi->napi_hash_node);
}
spin_unlock(&napi_hash_lock);
return rcu_sync_needed;
}
EXPORT_SYMBOL_GPL(napi_hash_del);
@ -4744,6 +4807,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
napi->poll_owner = -1;
#endif
set_bit(NAPI_STATE_SCHED, &napi->state);
napi_hash_add(napi);
}
EXPORT_SYMBOL(netif_napi_add);
@ -4763,8 +4827,12 @@ void napi_disable(struct napi_struct *n)
}
EXPORT_SYMBOL(napi_disable);
/* Must be called in process context */
void netif_napi_del(struct napi_struct *napi)
{
might_sleep();
if (napi_hash_del(napi))
synchronize_net();
list_del_init(&napi->dev_list);
napi_free_frags(napi);
@ -7164,11 +7232,13 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
* This function does the last stage of destroying an allocated device
* interface. The reference to the device object is released.
* If this is the last reference then it will be freed.
* Must be called in process context.
*/
void free_netdev(struct net_device *dev)
{
struct napi_struct *p, *n;
might_sleep();
netif_free_tx_queues(dev);
#ifdef CONFIG_SYSFS
kvfree(dev->_rx);