mirror of
https://github.com/torvalds/linux.git
synced 2024-11-27 14:41:39 +00:00
veth: Add XDP TX and REDIRECT
This allows further redirection of xdp_frames like NIC -> veth--veth -> veth--veth (XDP) (XDP) (XDP) The intermediate XDP, redirecting packets from NIC to the other veth, reuses xdp_mem_info from NIC so that page recycling of the NIC works on the destination veth's XDP. In this way return_frame is not fully guarded by NAPI, since another NAPI handler on another cpu may use the same xdp_mem_info concurrently. Thus disable napi_direct by xdp_set_return_frame_no_direct() during the NAPI context. v8: - Don't use xdp_frame pointer address for data_hard_start of xdp_buff. v4: - Use xdp_[set|clear]_return_frame_no_direct() instead of a flag in xdp_mem_info. v3: - Fix double free when veth_xdp_tx() returns a positive value. - Convert xdp_xmit and xdp_redir variables into flags. Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
parent
2539650fad
commit
d1396004dd
@ -32,6 +32,10 @@
|
||||
#define VETH_RING_SIZE 256
|
||||
#define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN)
|
||||
|
||||
/* Separating two types of XDP xmit */
|
||||
#define VETH_XDP_TX BIT(0)
|
||||
#define VETH_XDP_REDIR BIT(1)
|
||||
|
||||
struct pcpu_vstats {
|
||||
u64 packets;
|
||||
u64 bytes;
|
||||
@ -45,6 +49,7 @@ struct veth_priv {
|
||||
struct bpf_prog *_xdp_prog;
|
||||
struct net_device __rcu *peer;
|
||||
atomic64_t dropped;
|
||||
struct xdp_mem_info xdp_mem;
|
||||
unsigned requested_headroom;
|
||||
bool rx_notify_masked;
|
||||
struct ptr_ring xdp_ring;
|
||||
@ -317,12 +322,44 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
|
||||
return n - drops;
|
||||
}
|
||||
|
||||
static void veth_xdp_flush(struct net_device *dev)
|
||||
{
|
||||
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
|
||||
struct net_device *rcv;
|
||||
|
||||
rcu_read_lock();
|
||||
rcv = rcu_dereference(priv->peer);
|
||||
if (unlikely(!rcv))
|
||||
goto out;
|
||||
|
||||
rcv_priv = netdev_priv(rcv);
|
||||
/* xdp_ring is initialized on receive side? */
|
||||
if (unlikely(!rcu_access_pointer(rcv_priv->xdp_prog)))
|
||||
goto out;
|
||||
|
||||
__veth_xdp_flush(rcv_priv);
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
|
||||
{
|
||||
struct xdp_frame *frame = convert_to_xdp_frame(xdp);
|
||||
|
||||
if (unlikely(!frame))
|
||||
return -EOVERFLOW;
|
||||
|
||||
return veth_xdp_xmit(dev, 1, &frame, 0);
|
||||
}
|
||||
|
||||
static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
|
||||
struct xdp_frame *frame)
|
||||
struct xdp_frame *frame,
|
||||
unsigned int *xdp_xmit)
|
||||
{
|
||||
void *hard_start = frame->data - frame->headroom;
|
||||
void *head = hard_start - sizeof(struct xdp_frame);
|
||||
int len = frame->len, delta = 0;
|
||||
struct xdp_frame orig_frame;
|
||||
struct bpf_prog *xdp_prog;
|
||||
unsigned int headroom;
|
||||
struct sk_buff *skb;
|
||||
@ -346,6 +383,29 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
|
||||
delta = frame->data - xdp.data;
|
||||
len = xdp.data_end - xdp.data;
|
||||
break;
|
||||
case XDP_TX:
|
||||
orig_frame = *frame;
|
||||
xdp.data_hard_start = head;
|
||||
xdp.rxq->mem = frame->mem;
|
||||
if (unlikely(veth_xdp_tx(priv->dev, &xdp) < 0)) {
|
||||
trace_xdp_exception(priv->dev, xdp_prog, act);
|
||||
frame = &orig_frame;
|
||||
goto err_xdp;
|
||||
}
|
||||
*xdp_xmit |= VETH_XDP_TX;
|
||||
rcu_read_unlock();
|
||||
goto xdp_xmit;
|
||||
case XDP_REDIRECT:
|
||||
orig_frame = *frame;
|
||||
xdp.data_hard_start = head;
|
||||
xdp.rxq->mem = frame->mem;
|
||||
if (xdp_do_redirect(priv->dev, &xdp, xdp_prog)) {
|
||||
frame = &orig_frame;
|
||||
goto err_xdp;
|
||||
}
|
||||
*xdp_xmit |= VETH_XDP_REDIR;
|
||||
rcu_read_unlock();
|
||||
goto xdp_xmit;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
case XDP_ABORTED:
|
||||
@ -370,12 +430,13 @@ err:
|
||||
err_xdp:
|
||||
rcu_read_unlock();
|
||||
xdp_return_frame(frame);
|
||||
|
||||
xdp_xmit:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv,
|
||||
struct sk_buff *skb)
|
||||
struct sk_buff *skb,
|
||||
unsigned int *xdp_xmit)
|
||||
{
|
||||
u32 pktlen, headroom, act, metalen;
|
||||
void *orig_data, *orig_data_end;
|
||||
@ -447,6 +508,26 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv,
|
||||
switch (act) {
|
||||
case XDP_PASS:
|
||||
break;
|
||||
case XDP_TX:
|
||||
get_page(virt_to_page(xdp.data));
|
||||
consume_skb(skb);
|
||||
xdp.rxq->mem = priv->xdp_mem;
|
||||
if (unlikely(veth_xdp_tx(priv->dev, &xdp) < 0)) {
|
||||
trace_xdp_exception(priv->dev, xdp_prog, act);
|
||||
goto err_xdp;
|
||||
}
|
||||
*xdp_xmit |= VETH_XDP_TX;
|
||||
rcu_read_unlock();
|
||||
goto xdp_xmit;
|
||||
case XDP_REDIRECT:
|
||||
get_page(virt_to_page(xdp.data));
|
||||
consume_skb(skb);
|
||||
xdp.rxq->mem = priv->xdp_mem;
|
||||
if (xdp_do_redirect(priv->dev, &xdp, xdp_prog))
|
||||
goto err_xdp;
|
||||
*xdp_xmit |= VETH_XDP_REDIR;
|
||||
rcu_read_unlock();
|
||||
goto xdp_xmit;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
case XDP_ABORTED:
|
||||
@ -477,9 +558,15 @@ drop:
|
||||
rcu_read_unlock();
|
||||
kfree_skb(skb);
|
||||
return NULL;
|
||||
err_xdp:
|
||||
rcu_read_unlock();
|
||||
page_frag_free(xdp.data);
|
||||
xdp_xmit:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int veth_xdp_rcv(struct veth_priv *priv, int budget)
|
||||
static int veth_xdp_rcv(struct veth_priv *priv, int budget,
|
||||
unsigned int *xdp_xmit)
|
||||
{
|
||||
int i, done = 0;
|
||||
|
||||
@ -490,10 +577,12 @@ static int veth_xdp_rcv(struct veth_priv *priv, int budget)
|
||||
if (!ptr)
|
||||
break;
|
||||
|
||||
if (veth_is_xdp_frame(ptr))
|
||||
skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr));
|
||||
else
|
||||
skb = veth_xdp_rcv_skb(priv, ptr);
|
||||
if (veth_is_xdp_frame(ptr)) {
|
||||
skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr),
|
||||
xdp_xmit);
|
||||
} else {
|
||||
skb = veth_xdp_rcv_skb(priv, ptr, xdp_xmit);
|
||||
}
|
||||
|
||||
if (skb)
|
||||
napi_gro_receive(&priv->xdp_napi, skb);
|
||||
@ -508,9 +597,11 @@ static int veth_poll(struct napi_struct *napi, int budget)
|
||||
{
|
||||
struct veth_priv *priv =
|
||||
container_of(napi, struct veth_priv, xdp_napi);
|
||||
unsigned int xdp_xmit = 0;
|
||||
int done;
|
||||
|
||||
done = veth_xdp_rcv(priv, budget);
|
||||
xdp_set_return_frame_no_direct();
|
||||
done = veth_xdp_rcv(priv, budget, &xdp_xmit);
|
||||
|
||||
if (done < budget && napi_complete_done(napi, done)) {
|
||||
/* Write rx_notify_masked before reading ptr_ring */
|
||||
@ -521,6 +612,12 @@ static int veth_poll(struct napi_struct *napi, int budget)
|
||||
}
|
||||
}
|
||||
|
||||
if (xdp_xmit & VETH_XDP_TX)
|
||||
veth_xdp_flush(priv->dev);
|
||||
if (xdp_xmit & VETH_XDP_REDIR)
|
||||
xdp_do_flush_map();
|
||||
xdp_clear_return_frame_no_direct();
|
||||
|
||||
return done;
|
||||
}
|
||||
|
||||
@ -567,6 +664,9 @@ static int veth_enable_xdp(struct net_device *dev)
|
||||
err = veth_napi_add(dev);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
/* Save original mem info as it can be overwritten */
|
||||
priv->xdp_mem = priv->xdp_rxq.mem;
|
||||
}
|
||||
|
||||
rcu_assign_pointer(priv->xdp_prog, priv->_xdp_prog);
|
||||
@ -584,6 +684,7 @@ static void veth_disable_xdp(struct net_device *dev)
|
||||
|
||||
rcu_assign_pointer(priv->xdp_prog, NULL);
|
||||
veth_napi_del(dev);
|
||||
priv->xdp_rxq.mem = priv->xdp_mem;
|
||||
xdp_rxq_info_unreg(&priv->xdp_rxq);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user