ixgbe: add AF_XDP zero-copy Tx support
This patch adds zero-copy Tx support for AF_XDP sockets. It implements the ndo_xsk_async_xmit netdev ndo and performs all the Tx logic from a NAPI context. This means pulling egress packets from the Tx ring, placing the frames on the NIC HW descriptor ring and completing sent frames back to the application via the completion ring. The regular XDP Tx ring is used for AF_XDP as well. This rationale for this is as follows: XDP_REDIRECT guarantees mutual exclusion between different NAPI contexts based on CPU id. In other words, a netdev can XDP_REDIRECT to another netdev with a different NAPI context, since the operation is bound to a specific core and each core has its own hardware ring. As the AF_XDP Tx action is running in the same NAPI context and using the same ring, it will also be protected from XDP_REDIRECT actions with the exact same mechanism. As with AF_XDP Rx, all AF_XDP Tx specific functions are added to ixgbe_xsk.c. Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Tested-by: William Tu <u9012063@gmail.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
This commit is contained in:
parent
05ae861450
commit
8221c5eba8
@ -3161,7 +3161,11 @@ int ixgbe_poll(struct napi_struct *napi, int budget)
|
||||
#endif
|
||||
|
||||
ixgbe_for_each_ring(ring, q_vector->tx) {
|
||||
if (!ixgbe_clean_tx_irq(q_vector, ring, budget))
|
||||
bool wd = ring->xsk_umem ?
|
||||
ixgbe_clean_xdp_tx_irq(q_vector, ring, budget) :
|
||||
ixgbe_clean_tx_irq(q_vector, ring, budget);
|
||||
|
||||
if (!wd)
|
||||
clean_complete = false;
|
||||
}
|
||||
|
||||
@ -3470,6 +3474,10 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
|
||||
u32 txdctl = IXGBE_TXDCTL_ENABLE;
|
||||
u8 reg_idx = ring->reg_idx;
|
||||
|
||||
ring->xsk_umem = NULL;
|
||||
if (ring_is_xdp(ring))
|
||||
ring->xsk_umem = ixgbe_xsk_umem(adapter, ring);
|
||||
|
||||
/* disable queue to avoid issues while updating state */
|
||||
IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), 0);
|
||||
IXGBE_WRITE_FLUSH(hw);
|
||||
@ -5942,6 +5950,11 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)
|
||||
u16 i = tx_ring->next_to_clean;
|
||||
struct ixgbe_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
|
||||
|
||||
if (tx_ring->xsk_umem) {
|
||||
ixgbe_xsk_clean_tx_ring(tx_ring);
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (i != tx_ring->next_to_use) {
|
||||
union ixgbe_adv_tx_desc *eop_desc, *tx_desc;
|
||||
|
||||
@ -5993,6 +6006,7 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)
|
||||
if (!ring_is_xdp(tx_ring))
|
||||
netdev_tx_reset_queue(txring_txq(tx_ring));
|
||||
|
||||
out:
|
||||
/* reset next_to_use and next_to_clean */
|
||||
tx_ring->next_to_use = 0;
|
||||
tx_ring->next_to_clean = 0;
|
||||
@ -10348,6 +10362,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
|
||||
.ndo_features_check = ixgbe_features_check,
|
||||
.ndo_bpf = ixgbe_xdp,
|
||||
.ndo_xdp_xmit = ixgbe_xdp_xmit,
|
||||
.ndo_xsk_async_xmit = ixgbe_xsk_async_xmit,
|
||||
};
|
||||
|
||||
static void ixgbe_disable_txr_hw(struct ixgbe_adapter *adapter,
|
||||
|
@ -42,5 +42,9 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
|
||||
struct ixgbe_ring *rx_ring,
|
||||
const int budget);
|
||||
void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring);
|
||||
bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
|
||||
struct ixgbe_ring *tx_ring, int napi_budget);
|
||||
int ixgbe_xsk_async_xmit(struct net_device *dev, u32 queue_id);
|
||||
void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring);
|
||||
|
||||
#endif /* #define _IXGBE_TXRX_COMMON_H_ */
|
||||
|
@ -624,3 +624,178 @@ void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
|
||||
{
|
||||
union ixgbe_adv_tx_desc *tx_desc = NULL;
|
||||
struct ixgbe_tx_buffer *tx_bi;
|
||||
bool work_done = true;
|
||||
u32 len, cmd_type;
|
||||
dma_addr_t dma;
|
||||
|
||||
while (budget-- > 0) {
|
||||
if (unlikely(!ixgbe_desc_unused(xdp_ring))) {
|
||||
work_done = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &dma, &len))
|
||||
break;
|
||||
|
||||
dma_sync_single_for_device(xdp_ring->dev, dma, len,
|
||||
DMA_BIDIRECTIONAL);
|
||||
|
||||
tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use];
|
||||
tx_bi->bytecount = len;
|
||||
tx_bi->xdpf = NULL;
|
||||
|
||||
tx_desc = IXGBE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
|
||||
tx_desc->read.buffer_addr = cpu_to_le64(dma);
|
||||
|
||||
/* put descriptor type bits */
|
||||
cmd_type = IXGBE_ADVTXD_DTYP_DATA |
|
||||
IXGBE_ADVTXD_DCMD_DEXT |
|
||||
IXGBE_ADVTXD_DCMD_IFCS;
|
||||
cmd_type |= len | IXGBE_TXD_CMD;
|
||||
tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
|
||||
tx_desc->read.olinfo_status =
|
||||
cpu_to_le32(len << IXGBE_ADVTXD_PAYLEN_SHIFT);
|
||||
|
||||
xdp_ring->next_to_use++;
|
||||
if (xdp_ring->next_to_use == xdp_ring->count)
|
||||
xdp_ring->next_to_use = 0;
|
||||
}
|
||||
|
||||
if (tx_desc) {
|
||||
ixgbe_xdp_ring_update_tail(xdp_ring);
|
||||
xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
|
||||
}
|
||||
|
||||
return !!budget && work_done;
|
||||
}
|
||||
|
||||
static void ixgbe_clean_xdp_tx_buffer(struct ixgbe_ring *tx_ring,
|
||||
struct ixgbe_tx_buffer *tx_bi)
|
||||
{
|
||||
xdp_return_frame(tx_bi->xdpf);
|
||||
dma_unmap_single(tx_ring->dev,
|
||||
dma_unmap_addr(tx_bi, dma),
|
||||
dma_unmap_len(tx_bi, len), DMA_TO_DEVICE);
|
||||
dma_unmap_len_set(tx_bi, len, 0);
|
||||
}
|
||||
|
||||
bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
|
||||
struct ixgbe_ring *tx_ring, int napi_budget)
|
||||
{
|
||||
unsigned int total_packets = 0, total_bytes = 0;
|
||||
u32 i = tx_ring->next_to_clean, xsk_frames = 0;
|
||||
unsigned int budget = q_vector->tx.work_limit;
|
||||
struct xdp_umem *umem = tx_ring->xsk_umem;
|
||||
union ixgbe_adv_tx_desc *tx_desc;
|
||||
struct ixgbe_tx_buffer *tx_bi;
|
||||
bool xmit_done;
|
||||
|
||||
tx_bi = &tx_ring->tx_buffer_info[i];
|
||||
tx_desc = IXGBE_TX_DESC(tx_ring, i);
|
||||
i -= tx_ring->count;
|
||||
|
||||
do {
|
||||
if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
|
||||
break;
|
||||
|
||||
total_bytes += tx_bi->bytecount;
|
||||
total_packets += tx_bi->gso_segs;
|
||||
|
||||
if (tx_bi->xdpf)
|
||||
ixgbe_clean_xdp_tx_buffer(tx_ring, tx_bi);
|
||||
else
|
||||
xsk_frames++;
|
||||
|
||||
tx_bi->xdpf = NULL;
|
||||
total_bytes += tx_bi->bytecount;
|
||||
|
||||
tx_bi++;
|
||||
tx_desc++;
|
||||
i++;
|
||||
if (unlikely(!i)) {
|
||||
i -= tx_ring->count;
|
||||
tx_bi = tx_ring->tx_buffer_info;
|
||||
tx_desc = IXGBE_TX_DESC(tx_ring, 0);
|
||||
}
|
||||
|
||||
/* issue prefetch for next Tx descriptor */
|
||||
prefetch(tx_desc);
|
||||
|
||||
/* update budget accounting */
|
||||
budget--;
|
||||
} while (likely(budget));
|
||||
|
||||
i += tx_ring->count;
|
||||
tx_ring->next_to_clean = i;
|
||||
|
||||
u64_stats_update_begin(&tx_ring->syncp);
|
||||
tx_ring->stats.bytes += total_bytes;
|
||||
tx_ring->stats.packets += total_packets;
|
||||
u64_stats_update_end(&tx_ring->syncp);
|
||||
q_vector->tx.total_bytes += total_bytes;
|
||||
q_vector->tx.total_packets += total_packets;
|
||||
|
||||
if (xsk_frames)
|
||||
xsk_umem_complete_tx(umem, xsk_frames);
|
||||
|
||||
xmit_done = ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit);
|
||||
return budget > 0 && xmit_done;
|
||||
}
|
||||
|
||||
int ixgbe_xsk_async_xmit(struct net_device *dev, u32 qid)
|
||||
{
|
||||
struct ixgbe_adapter *adapter = netdev_priv(dev);
|
||||
struct ixgbe_ring *ring;
|
||||
|
||||
if (test_bit(__IXGBE_DOWN, &adapter->state))
|
||||
return -ENETDOWN;
|
||||
|
||||
if (!READ_ONCE(adapter->xdp_prog))
|
||||
return -ENXIO;
|
||||
|
||||
if (qid >= adapter->num_xdp_queues)
|
||||
return -ENXIO;
|
||||
|
||||
if (!adapter->xsk_umems || !adapter->xsk_umems[qid])
|
||||
return -ENXIO;
|
||||
|
||||
ring = adapter->xdp_ring[qid];
|
||||
if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
|
||||
u64 eics = BIT_ULL(ring->q_vector->v_idx);
|
||||
|
||||
ixgbe_irq_rearm_queues(adapter, eics);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring)
|
||||
{
|
||||
u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
|
||||
struct xdp_umem *umem = tx_ring->xsk_umem;
|
||||
struct ixgbe_tx_buffer *tx_bi;
|
||||
u32 xsk_frames = 0;
|
||||
|
||||
while (ntc != ntu) {
|
||||
tx_bi = &tx_ring->tx_buffer_info[ntc];
|
||||
|
||||
if (tx_bi->xdpf)
|
||||
ixgbe_clean_xdp_tx_buffer(tx_ring, tx_bi);
|
||||
else
|
||||
xsk_frames++;
|
||||
|
||||
tx_bi->xdpf = NULL;
|
||||
|
||||
ntc++;
|
||||
if (ntc == tx_ring->count)
|
||||
ntc = 0;
|
||||
}
|
||||
|
||||
if (xsk_frames)
|
||||
xsk_umem_complete_tx(umem, xsk_frames);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user