351e158139
This patch adds support of XDP in native mode for hv_netvsc driver, and transparently sets the XDP program on the associated VF NIC as well. Setting / unsetting XDP program on synthetic NIC (netvsc) propagates to VF NIC automatically. Setting / unsetting XDP program on VF NIC directly is not recommended, also not propagated to synthetic NIC, and may be overwritten by setting of synthetic NIC. The Azure/Hyper-V synthetic NIC receive buffer doesn't provide headroom for XDP. We thought about re-use the RNDIS header space, but it's too small. So we decided to copy the packets to a page buffer for XDP. And, most of our VMs on Azure have Accelerated Network (SRIOV) enabled, so most of the packets run on VF NIC. The synthetic NIC is considered as a fallback data-path. So the data copy on netvsc won't impact performance significantly. XDP program cannot run with LRO (RSC) enabled, so you need to disable LRO before running XDP: ethtool -K eth0 lro off XDP actions not yet supported: XDP_REDIRECT Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> Signed-off-by: David S. Miller <davem@davemloft.net>
210 lines
4.1 KiB
C
210 lines
4.1 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/* Copyright (c) 2019, Microsoft Corporation.
|
|
*
|
|
* Author:
|
|
* Haiyang Zhang <haiyangz@microsoft.com>
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/netdevice.h>
|
|
#include <linux/etherdevice.h>
|
|
#include <linux/ethtool.h>
|
|
#include <linux/bpf.h>
|
|
#include <linux/bpf_trace.h>
|
|
#include <linux/kernel.h>
|
|
#include <net/xdp.h>
|
|
|
|
#include <linux/mutex.h>
|
|
#include <linux/rtnetlink.h>
|
|
|
|
#include "hyperv_net.h"
|
|
|
|
u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
|
|
struct xdp_buff *xdp)
|
|
{
|
|
void *data = nvchan->rsc.data[0];
|
|
u32 len = nvchan->rsc.len[0];
|
|
struct page *page = NULL;
|
|
struct bpf_prog *prog;
|
|
u32 act = XDP_PASS;
|
|
|
|
xdp->data_hard_start = NULL;
|
|
|
|
rcu_read_lock();
|
|
prog = rcu_dereference(nvchan->bpf_prog);
|
|
|
|
if (!prog)
|
|
goto out;
|
|
|
|
/* allocate page buffer for data */
|
|
page = alloc_page(GFP_ATOMIC);
|
|
if (!page) {
|
|
act = XDP_DROP;
|
|
goto out;
|
|
}
|
|
|
|
xdp->data_hard_start = page_address(page);
|
|
xdp->data = xdp->data_hard_start + NETVSC_XDP_HDRM;
|
|
xdp_set_data_meta_invalid(xdp);
|
|
xdp->data_end = xdp->data + len;
|
|
xdp->rxq = &nvchan->xdp_rxq;
|
|
xdp->handle = 0;
|
|
|
|
memcpy(xdp->data, data, len);
|
|
|
|
act = bpf_prog_run_xdp(prog, xdp);
|
|
|
|
switch (act) {
|
|
case XDP_PASS:
|
|
case XDP_TX:
|
|
case XDP_DROP:
|
|
break;
|
|
|
|
case XDP_ABORTED:
|
|
trace_xdp_exception(ndev, prog, act);
|
|
break;
|
|
|
|
default:
|
|
bpf_warn_invalid_xdp_action(act);
|
|
}
|
|
|
|
out:
|
|
rcu_read_unlock();
|
|
|
|
if (page && act != XDP_PASS && act != XDP_TX) {
|
|
__free_page(page);
|
|
xdp->data_hard_start = NULL;
|
|
}
|
|
|
|
return act;
|
|
}
|
|
|
|
unsigned int netvsc_xdp_fraglen(unsigned int len)
|
|
{
|
|
return SKB_DATA_ALIGN(len) +
|
|
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
|
}
|
|
|
|
struct bpf_prog *netvsc_xdp_get(struct netvsc_device *nvdev)
|
|
{
|
|
return rtnl_dereference(nvdev->chan_table[0].bpf_prog);
|
|
}
|
|
|
|
int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog,
|
|
struct netlink_ext_ack *extack,
|
|
struct netvsc_device *nvdev)
|
|
{
|
|
struct bpf_prog *old_prog;
|
|
int buf_max, i;
|
|
|
|
old_prog = netvsc_xdp_get(nvdev);
|
|
|
|
if (!old_prog && !prog)
|
|
return 0;
|
|
|
|
buf_max = NETVSC_XDP_HDRM + netvsc_xdp_fraglen(dev->mtu + ETH_HLEN);
|
|
if (prog && buf_max > PAGE_SIZE) {
|
|
netdev_err(dev, "XDP: mtu:%u too large, buf_max:%u\n",
|
|
dev->mtu, buf_max);
|
|
NL_SET_ERR_MSG_MOD(extack, "XDP: mtu too large");
|
|
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
if (prog && (dev->features & NETIF_F_LRO)) {
|
|
netdev_err(dev, "XDP: not support LRO\n");
|
|
NL_SET_ERR_MSG_MOD(extack, "XDP: not support LRO");
|
|
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
if (prog)
|
|
bpf_prog_add(prog, nvdev->num_chn);
|
|
|
|
for (i = 0; i < nvdev->num_chn; i++)
|
|
rcu_assign_pointer(nvdev->chan_table[i].bpf_prog, prog);
|
|
|
|
if (old_prog)
|
|
for (i = 0; i < nvdev->num_chn; i++)
|
|
bpf_prog_put(old_prog);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
|
|
{
|
|
struct netdev_bpf xdp;
|
|
bpf_op_t ndo_bpf;
|
|
|
|
ASSERT_RTNL();
|
|
|
|
if (!vf_netdev)
|
|
return 0;
|
|
|
|
ndo_bpf = vf_netdev->netdev_ops->ndo_bpf;
|
|
if (!ndo_bpf)
|
|
return 0;
|
|
|
|
memset(&xdp, 0, sizeof(xdp));
|
|
|
|
xdp.command = XDP_SETUP_PROG;
|
|
xdp.prog = prog;
|
|
|
|
return ndo_bpf(vf_netdev, &xdp);
|
|
}
|
|
|
|
static u32 netvsc_xdp_query(struct netvsc_device *nvdev)
|
|
{
|
|
struct bpf_prog *prog = netvsc_xdp_get(nvdev);
|
|
|
|
if (prog)
|
|
return prog->aux->id;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
|
|
{
|
|
struct net_device_context *ndevctx = netdev_priv(dev);
|
|
struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
|
|
struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
|
|
struct netlink_ext_ack *extack = bpf->extack;
|
|
int ret;
|
|
|
|
if (!nvdev || nvdev->destroy) {
|
|
if (bpf->command == XDP_QUERY_PROG) {
|
|
bpf->prog_id = 0;
|
|
return 0; /* Query must always succeed */
|
|
} else {
|
|
return -ENODEV;
|
|
}
|
|
}
|
|
|
|
switch (bpf->command) {
|
|
case XDP_SETUP_PROG:
|
|
ret = netvsc_xdp_set(dev, bpf->prog, extack, nvdev);
|
|
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = netvsc_vf_setxdp(vf_netdev, bpf->prog);
|
|
|
|
if (ret) {
|
|
netdev_err(dev, "vf_setxdp failed:%d\n", ret);
|
|
NL_SET_ERR_MSG_MOD(extack, "vf_setxdp failed");
|
|
|
|
netvsc_xdp_set(dev, NULL, extack, nvdev);
|
|
}
|
|
|
|
return ret;
|
|
|
|
case XDP_QUERY_PROG:
|
|
bpf->prog_id = netvsc_xdp_query(nvdev);
|
|
return 0;
|
|
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
}
|