ipvlan: Introduce l3s mode

In a typical IPvlan L3 setup where master is in default-ns and
each slave is into different (slave) ns. In this setup egress
packet processing for traffic originating from slave-ns will
hit all NF_HOOKs in slave-ns as well as default-ns. However same
is not true for ingress processing. All these NF_HOOKs are
hit only in the slave-ns skipping them in the default-ns.
IPvlan in L3 mode is restrictive and if admins want to deploy
iptables rules in default-ns, this asymmetric data path makes it
impossible to do so.

This patch makes use of the l3_rcv() (added as part of l3mdev
enhancements) to perform input route lookup on RX packets without
changing the skb->dev and then uses nf_hook at NF_INET_LOCAL_IN
to change the skb->dev just before handing over skb to L4.

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
CC: David Ahern <dsa@cumulusnetworks.com>
Reviewed-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Mahesh Bandewar
2016-09-16 12:59:19 -07:00
committed by David S. Miller
parent e8bffe0cf9
commit 4fbae7d83c
6 changed files with 188 additions and 8 deletions

View File

@@ -9,24 +9,87 @@
#include "ipvlan.h"
static u32 ipvl_nf_hook_refcnt = 0;
static struct nf_hook_ops ipvl_nfops[] __read_mostly = {
{
.hook = ipvlan_nf_input,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = INT_MAX,
},
{
.hook = ipvlan_nf_input,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = INT_MAX,
},
};
static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = {
.l3mdev_l3_rcv = ipvlan_l3_rcv,
};
static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
{
ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj;
}
static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
static int ipvlan_register_nf_hook(void)
{
int err = 0;
if (!ipvl_nf_hook_refcnt) {
err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
if (!err)
ipvl_nf_hook_refcnt = 1;
} else {
ipvl_nf_hook_refcnt++;
}
return err;
}
static void ipvlan_unregister_nf_hook(void)
{
WARN_ON(!ipvl_nf_hook_refcnt);
ipvl_nf_hook_refcnt--;
if (!ipvl_nf_hook_refcnt)
_nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
}
static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
{
struct ipvl_dev *ipvlan;
struct net_device *mdev = port->dev;
int err = 0;
ASSERT_RTNL();
if (port->mode != nval) {
if (nval == IPVLAN_MODE_L3S) {
/* New mode is L3S */
err = ipvlan_register_nf_hook();
if (!err) {
mdev->l3mdev_ops = &ipvl_l3mdev_ops;
mdev->priv_flags |= IFF_L3MDEV_MASTER;
} else
return err;
} else if (port->mode == IPVLAN_MODE_L3S) {
/* Old mode was L3S */
mdev->priv_flags &= ~IFF_L3MDEV_MASTER;
ipvlan_unregister_nf_hook();
mdev->l3mdev_ops = NULL;
}
list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
if (nval == IPVLAN_MODE_L3)
if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S)
ipvlan->dev->flags |= IFF_NOARP;
else
ipvlan->dev->flags &= ~IFF_NOARP;
}
port->mode = nval;
}
return err;
}
static int ipvlan_port_create(struct net_device *dev)
@@ -74,6 +137,11 @@ static void ipvlan_port_destroy(struct net_device *dev)
struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
dev->priv_flags &= ~IFF_IPVLAN_MASTER;
if (port->mode == IPVLAN_MODE_L3S) {
dev->priv_flags &= ~IFF_L3MDEV_MASTER;
ipvlan_unregister_nf_hook();
dev->l3mdev_ops = NULL;
}
netdev_rx_handler_unregister(dev);
cancel_work_sync(&port->wq);
__skb_queue_purge(&port->backlog);
@@ -132,7 +200,8 @@ static int ipvlan_open(struct net_device *dev)
struct net_device *phy_dev = ipvlan->phy_dev;
struct ipvl_addr *addr;
if (ipvlan->port->mode == IPVLAN_MODE_L3)
if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
ipvlan->port->mode == IPVLAN_MODE_L3S)
dev->flags |= IFF_NOARP;
else
dev->flags &= ~IFF_NOARP;
@@ -372,13 +441,14 @@ static int ipvlan_nl_changelink(struct net_device *dev,
{
struct ipvl_dev *ipvlan = netdev_priv(dev);
struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
int err = 0;
if (data && data[IFLA_IPVLAN_MODE]) {
u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
ipvlan_set_port_mode(port, nmode);
err = ipvlan_set_port_mode(port, nmode);
}
return 0;
return err;
}
static size_t ipvlan_nl_getsize(const struct net_device *dev)
@@ -473,10 +543,13 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
unregister_netdevice(dev);
return err;
}
err = ipvlan_set_port_mode(port, mode);
if (err) {
unregister_netdevice(dev);
return err;
}
list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
ipvlan_set_port_mode(port, mode);
netif_stacked_transfer_operstate(phy_dev, dev);
return 0;
}