ice: xsk: Handle SW XDP ring wrap and bump tail more often
Currently, if ice_clean_rx_irq_zc() processed the whole ring and next_to_use != 0, then ice_alloc_rx_buf_zc() would not refill the whole ring even if the XSK buffer pool would have enough free entries (either from fill ring or the internal recycle mechanism) - it is because ring wrap is not handled. Improve the logic in ice_alloc_rx_buf_zc() to address the problem above. Do not clamp the count of buffers that is passed to xsk_buff_alloc_batch() in case when next_to_use + buffer count >= rx_ring->count, but rather split it and have two calls to the mentioned function - one for the part up until the wrap and one for the part after the wrap. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Alexander Lobakin <alexandr.lobakin@intel.com> Link: https://lore.kernel.org/bpf/20220125160446.78976-4-maciej.fijalkowski@intel.com
This commit is contained in:
parent
296f13ff38
commit
3876ff525d
@ -111,6 +111,8 @@ static inline int ice_skb_pad(void)
|
||||
(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
|
||||
(R)->next_to_clean - (R)->next_to_use - 1)
|
||||
|
||||
#define ICE_RING_QUARTER(R) ((R)->count >> 2)
|
||||
|
||||
#define ICE_TX_FLAGS_TSO BIT(0)
|
||||
#define ICE_TX_FLAGS_HW_VLAN BIT(1)
|
||||
#define ICE_TX_FLAGS_SW_VLAN BIT(2)
|
||||
|
@ -367,33 +367,28 @@ failure:
|
||||
}
|
||||
|
||||
/**
|
||||
* ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
|
||||
* @rx_ring: Rx ring
|
||||
* ice_fill_rx_descs - pick buffers from XSK buffer pool and use it
|
||||
* @pool: XSK Buffer pool to pull the buffers from
|
||||
* @xdp: SW ring of xdp_buff that will hold the buffers
|
||||
* @rx_desc: Pointer to Rx descriptors that will be filled
|
||||
* @count: The number of buffers to allocate
|
||||
*
|
||||
* This function allocates a number of Rx buffers from the fill ring
|
||||
* or the internal recycle mechanism and places them on the Rx ring.
|
||||
*
|
||||
* Returns true if all allocations were successful, false if any fail.
|
||||
* Note that ring wrap should be handled by caller of this function.
|
||||
*
|
||||
* Returns the amount of allocated Rx descriptors
|
||||
*/
|
||||
bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
|
||||
static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
|
||||
union ice_32b_rx_flex_desc *rx_desc, u16 count)
|
||||
{
|
||||
union ice_32b_rx_flex_desc *rx_desc;
|
||||
u16 ntu = rx_ring->next_to_use;
|
||||
struct xdp_buff **xdp;
|
||||
u32 nb_buffs, i;
|
||||
dma_addr_t dma;
|
||||
u16 buffs;
|
||||
int i;
|
||||
|
||||
rx_desc = ICE_RX_DESC(rx_ring, ntu);
|
||||
xdp = ice_xdp_buf(rx_ring, ntu);
|
||||
|
||||
nb_buffs = min_t(u16, count, rx_ring->count - ntu);
|
||||
nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs);
|
||||
if (!nb_buffs)
|
||||
return false;
|
||||
|
||||
i = nb_buffs;
|
||||
while (i--) {
|
||||
buffs = xsk_buff_alloc_batch(pool, xdp, count);
|
||||
for (i = 0; i < buffs; i++) {
|
||||
dma = xsk_buff_xdp_get_dma(*xdp);
|
||||
rx_desc->read.pkt_addr = cpu_to_le64(dma);
|
||||
rx_desc->wb.status_error0 = 0;
|
||||
@ -402,13 +397,77 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
|
||||
xdp++;
|
||||
}
|
||||
|
||||
return buffs;
|
||||
}
|
||||
|
||||
/**
|
||||
* __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
|
||||
* @rx_ring: Rx ring
|
||||
* @count: The number of buffers to allocate
|
||||
*
|
||||
* Place the @count of descriptors onto Rx ring. Handle the ring wrap
|
||||
* for case where space from next_to_use up to the end of ring is less
|
||||
* than @count. Finally do a tail bump.
|
||||
*
|
||||
* Returns true if all allocations were successful, false if any fail.
|
||||
*/
|
||||
static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
|
||||
{
|
||||
union ice_32b_rx_flex_desc *rx_desc;
|
||||
u32 nb_buffs_extra = 0, nb_buffs;
|
||||
u16 ntu = rx_ring->next_to_use;
|
||||
u16 total_count = count;
|
||||
struct xdp_buff **xdp;
|
||||
|
||||
rx_desc = ICE_RX_DESC(rx_ring, ntu);
|
||||
xdp = ice_xdp_buf(rx_ring, ntu);
|
||||
|
||||
if (ntu + count >= rx_ring->count) {
|
||||
nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp,
|
||||
rx_desc,
|
||||
rx_ring->count - ntu);
|
||||
rx_desc = ICE_RX_DESC(rx_ring, 0);
|
||||
xdp = ice_xdp_buf(rx_ring, 0);
|
||||
ntu = 0;
|
||||
count -= nb_buffs_extra;
|
||||
ice_release_rx_desc(rx_ring, 0);
|
||||
}
|
||||
|
||||
nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count);
|
||||
|
||||
ntu += nb_buffs;
|
||||
if (ntu == rx_ring->count)
|
||||
ntu = 0;
|
||||
|
||||
ice_release_rx_desc(rx_ring, ntu);
|
||||
if (rx_ring->next_to_use != ntu)
|
||||
ice_release_rx_desc(rx_ring, ntu);
|
||||
|
||||
return count == nb_buffs;
|
||||
return total_count == (nb_buffs_extra + nb_buffs);
|
||||
}
|
||||
|
||||
/**
|
||||
* ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
|
||||
* @rx_ring: Rx ring
|
||||
* @count: The number of buffers to allocate
|
||||
*
|
||||
* Wrapper for internal allocation routine; figure out how many tail
|
||||
* bumps should take place based on the given threshold
|
||||
*
|
||||
* Returns true if all calls to internal alloc routine succeeded
|
||||
*/
|
||||
bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
|
||||
{
|
||||
u16 rx_thresh = ICE_RING_QUARTER(rx_ring);
|
||||
u16 batched, leftover, i, tail_bumps;
|
||||
|
||||
batched = ALIGN_DOWN(count, rx_thresh);
|
||||
tail_bumps = batched / rx_thresh;
|
||||
leftover = count & (rx_thresh - 1);
|
||||
|
||||
for (i = 0; i < tail_bumps; i++)
|
||||
if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh))
|
||||
return false;
|
||||
return __ice_alloc_rx_bufs_zc(rx_ring, leftover);
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user