Merge branch 'net_next_ovs' of git://git.kernel.org/pub/scm/linux/kernel/git/pshelar/openvswitch

Pravin B Shelar says:

====================
Open vSwitch

First two patches are related to OVS MPLS support. Rest of patches
are mostly refactoring and minor improvements to openvswitch.

v1-v2:
 - Fix conflicts due to "gue: Remote checksum offload"
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2014-11-06 16:33:13 -05:00
commit 6b798d70d0
24 changed files with 606 additions and 245 deletions

View File

@ -47,7 +47,6 @@ enum {
NETIF_F_GSO_SIT_BIT, /* ... SIT tunnel with TSO */
NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */
NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */
NETIF_F_GSO_MPLS_BIT, /* ... MPLS segmentation */
NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */
/**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */
NETIF_F_GSO_TUNNEL_REMCSUM_BIT,
@ -119,7 +118,6 @@ enum {
#define NETIF_F_GSO_SIT __NETIF_F(GSO_SIT)
#define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL)
#define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM)
#define NETIF_F_GSO_MPLS __NETIF_F(GSO_MPLS)
#define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM)
#define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
#define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX)
@ -183,7 +181,6 @@ enum {
NETIF_F_GSO_IPIP | \
NETIF_F_GSO_SIT | \
NETIF_F_GSO_UDP_TUNNEL | \
NETIF_F_GSO_UDP_TUNNEL_CSUM | \
NETIF_F_GSO_MPLS)
NETIF_F_GSO_UDP_TUNNEL_CSUM)
#endif /* _LINUX_NETDEV_FEATURES_H */

View File

@ -3583,7 +3583,6 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
BUILD_BUG_ON(SKB_GSO_SIT != (NETIF_F_GSO_SIT >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_MPLS != (NETIF_F_GSO_MPLS >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT));
return (features & feature) == feature;

View File

@ -372,9 +372,7 @@ enum {
SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
SKB_GSO_MPLS = 1 << 12,
SKB_GSO_TUNNEL_REMCSUM = 1 << 13,
SKB_GSO_TUNNEL_REMCSUM = 1 << 12,
};
#if BITS_PER_LONG > 32

39
include/net/mpls.h Normal file
View File

@ -0,0 +1,39 @@
/*
* Copyright (c) 2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef _NET_MPLS_H
#define _NET_MPLS_H 1
#include <linux/if_ether.h>
#include <linux/netdevice.h>
#define MPLS_HLEN 4
static inline bool eth_p_mpls(__be16 eth_type)
{
return eth_type == htons(ETH_P_MPLS_UC) ||
eth_type == htons(ETH_P_MPLS_MC);
}
/*
* For non-MPLS skbs this will correspond to the network header.
* For MPLS skbs it will be before the network_header as the MPLS
* label stack lies between the end of the mac header and the network
* header. That is, for MPLS skbs the end of the mac header
* is the top of the MPLS label stack.
*/
static inline unsigned char *skb_mpls_header(struct sk_buff *skb)
{
return skb_mac_header(skb) + skb->mac_len;
}
#endif

View File

@ -293,6 +293,9 @@ enum ovs_key_attr {
OVS_KEY_ATTR_DP_HASH, /* u32 hash value. Value 0 indicates the hash
is not computed by the datapath. */
OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls.
* The implementation may restrict
* the accepted length of the array. */
#ifdef __KERNEL__
OVS_KEY_ATTR_TUNNEL_INFO, /* struct ovs_tunnel_info */
@ -340,6 +343,10 @@ struct ovs_key_ethernet {
__u8 eth_dst[ETH_ALEN];
};
struct ovs_key_mpls {
__be32 mpls_lse;
};
struct ovs_key_ipv4 {
__be32 ipv4_src;
__be32 ipv4_dst;
@ -393,9 +400,9 @@ struct ovs_key_arp {
};
struct ovs_key_nd {
__u32 nd_target[4];
__u8 nd_sll[ETH_ALEN];
__u8 nd_tll[ETH_ALEN];
__be32 nd_target[4];
__u8 nd_sll[ETH_ALEN];
__u8 nd_tll[ETH_ALEN];
};
/**
@ -483,6 +490,19 @@ enum ovs_userspace_attr {
#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
/**
* struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action argument.
* @mpls_lse: MPLS label stack entry to push.
* @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame.
*
* The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC and
* %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Other are rejected.
*/
struct ovs_action_push_mpls {
__be32 mpls_lse;
__be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */
};
/**
* struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
* @vlan_tpid: Tag protocol identifier (TPID) to push.
@ -534,6 +554,15 @@ struct ovs_action_hash {
* @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
* @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
* the nested %OVS_SAMPLE_ATTR_* attributes.
* @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the
* top of the packets MPLS label stack. Set the ethertype of the
* encapsulating frame to either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to
* indicate the new packet contents.
* @OVS_ACTION_ATTR_POP_MPLS: Pop an MPLS label stack entry off of the
* packet's MPLS label stack. Set the encapsulating frame's ethertype to
* indicate the new packet contents. This could potentially still be
* %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there
* is no MPLS label stack, as determined by ethertype, no action is taken.
*
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@ -550,6 +579,9 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
OVS_ACTION_ATTR_RECIRC, /* u32 recirc_id. */
OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */
OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */
OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */
__OVS_ACTION_ATTR_MAX
};

View File

@ -118,6 +118,7 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <net/ip.h>
#include <net/mpls.h>
#include <linux/ipv6.h>
#include <linux/in.h>
#include <linux/jhash.h>
@ -2530,7 +2531,7 @@ static netdev_features_t net_mpls_features(struct sk_buff *skb,
netdev_features_t features,
__be16 type)
{
if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC))
if (eth_p_mpls(type))
features &= skb->dev->mpls_features;
return features;

View File

@ -84,7 +84,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_GSO_IPIP_BIT] = "tx-ipip-segmentation",
[NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
[NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",

View File

@ -1223,7 +1223,6 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_TUNNEL_REMCSUM |
SKB_GSO_MPLS |
0)))
goto out;

View File

@ -94,7 +94,6 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_SIT |
SKB_GSO_MPLS |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_TUNNEL_REMCSUM |

View File

@ -207,8 +207,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_TUNNEL_REMCSUM |
SKB_GSO_IPIP |
SKB_GSO_GRE | SKB_GSO_GRE_CSUM |
SKB_GSO_MPLS) ||
SKB_GSO_GRE | SKB_GSO_GRE_CSUM) ||
!(type & (SKB_GSO_UDP))))
goto out;

View File

@ -79,7 +79,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_TUNNEL_REMCSUM |
SKB_GSO_MPLS |
SKB_GSO_TCPV6 |
0)))
goto out;

View File

@ -46,8 +46,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
SKB_GSO_GRE |
SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_SIT |
SKB_GSO_MPLS) ||
SKB_GSO_SIT) ||
!(type & (SKB_GSO_UDP))))
goto out;

View File

@ -34,8 +34,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_MPLS)))
SKB_GSO_IPIP)))
goto out;
/* Setup inner SKB. */

View File

@ -30,6 +30,7 @@ config OPENVSWITCH
config OPENVSWITCH_GRE
tristate "Open vSwitch GRE tunneling support"
select NET_MPLS_GSO
depends on INET
depends on OPENVSWITCH
depends on NET_IPGRE_DEMUX

View File

@ -28,10 +28,12 @@
#include <linux/in6.h>
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/checksum.h>
#include <net/dsfield.h>
#include <net/mpls.h>
#include <net/sctp/checksum.h>
#include "datapath.h"
@ -118,6 +120,92 @@ static int make_writable(struct sk_buff *skb, int write_len)
return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
}
static int push_mpls(struct sk_buff *skb,
const struct ovs_action_push_mpls *mpls)
{
__be32 *new_mpls_lse;
struct ethhdr *hdr;
/* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
if (skb->encapsulation)
return -ENOTSUPP;
if (skb_cow_head(skb, MPLS_HLEN) < 0)
return -ENOMEM;
skb_push(skb, MPLS_HLEN);
memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
skb->mac_len);
skb_reset_mac_header(skb);
new_mpls_lse = (__be32 *)skb_mpls_header(skb);
*new_mpls_lse = mpls->mpls_lse;
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
MPLS_HLEN, 0));
hdr = eth_hdr(skb);
hdr->h_proto = mpls->mpls_ethertype;
skb_set_inner_protocol(skb, skb->protocol);
skb->protocol = mpls->mpls_ethertype;
return 0;
}
static int pop_mpls(struct sk_buff *skb, const __be16 ethertype)
{
struct ethhdr *hdr;
int err;
err = make_writable(skb, skb->mac_len + MPLS_HLEN);
if (unlikely(err))
return err;
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_sub(skb->csum,
csum_partial(skb_mpls_header(skb),
MPLS_HLEN, 0));
memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
skb->mac_len);
__skb_pull(skb, MPLS_HLEN);
skb_reset_mac_header(skb);
/* skb_mpls_header() is used to locate the ethertype
* field correctly in the presence of VLAN tags.
*/
hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
hdr->h_proto = ethertype;
if (eth_p_mpls(skb->protocol))
skb->protocol = ethertype;
return 0;
}
static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
{
__be32 *stack;
int err;
err = make_writable(skb, skb->mac_len + MPLS_HLEN);
if (unlikely(err))
return err;
stack = (__be32 *)skb_mpls_header(skb);
if (skb->ip_summed == CHECKSUM_COMPLETE) {
__be32 diff[] = { ~(*stack), *mpls_lse };
skb->csum = ~csum_partial((char *)diff, sizeof(diff),
~skb->csum);
}
*stack = *mpls_lse;
return 0;
}
/* remove VLAN header from packet and update csum accordingly. */
static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
{
@ -140,10 +228,12 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
vlan_set_encap_proto(skb, vhdr);
skb->mac_header += VLAN_HLEN;
if (skb_network_offset(skb) < ETH_HLEN)
skb_set_network_header(skb, ETH_HLEN);
skb_reset_mac_len(skb);
/* Update mac_len for subsequent MPLS actions */
skb_reset_mac_len(skb);
return 0;
}
@ -186,6 +276,8 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
return -ENOMEM;
/* Update mac_len for subsequent MPLS actions */
skb->mac_len += VLAN_HLEN;
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_add(skb->csum, csum_partial(skb->data
@ -459,21 +551,14 @@ static int set_sctp(struct sk_buff *skb,
return 0;
}
static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
{
struct vport *vport;
struct vport *vport = ovs_vport_rcu(dp, out_port);
if (unlikely(!skb))
return -ENOMEM;
vport = ovs_vport_rcu(dp, out_port);
if (unlikely(!vport)) {
if (likely(vport))
ovs_vport_send(vport, skb);
else
kfree_skb(skb);
return -ENODEV;
}
ovs_vport_send(vport, skb);
return 0;
}
static int output_userspace(struct datapath *dp, struct sk_buff *skb,
@ -612,6 +697,10 @@ static int execute_set_action(struct sk_buff *skb,
case OVS_KEY_ATTR_SCTP:
err = set_sctp(skb, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_MPLS:
err = set_mpls(skb, nla_data(nested_attr));
break;
}
return err;
@ -672,8 +761,12 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
a = nla_next(a, &rem)) {
int err = 0;
if (prev_port != -1) {
do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port);
if (unlikely(prev_port != -1)) {
struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);
if (out_skb)
do_output(dp, out_skb, prev_port);
prev_port = -1;
}
@ -690,6 +783,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
execute_hash(skb, key, a);
break;
case OVS_ACTION_ATTR_PUSH_MPLS:
err = push_mpls(skb, nla_data(a));
break;
case OVS_ACTION_ATTR_POP_MPLS:
err = pop_mpls(skb, nla_get_be16(a));
break;
case OVS_ACTION_ATTR_PUSH_VLAN:
err = push_vlan(skb, nla_data(a));
if (unlikely(err)) /* skb already freed. */
@ -764,14 +865,11 @@ static void process_deferred_actions(struct datapath *dp)
/* Execute a list of actions against 'skb'. */
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key)
struct sw_flow_actions *acts, struct sw_flow_key *key)
{
int level = this_cpu_read(exec_actions_level);
struct sw_flow_actions *acts;
int err;
acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
this_cpu_inc(exec_actions_level);
OVS_CB(skb)->egress_tun_info = NULL;
err = do_execute_actions(dp, skb, key,

View File

@ -140,19 +140,30 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
const struct dp_upcall_info *);
/* Must be called with rcu_read_lock or ovs_mutex. */
static struct datapath *get_dp(struct net *net, int dp_ifindex)
/* Must be called with rcu_read_lock. */
static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
{
struct datapath *dp = NULL;
struct net_device *dev;
struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, dp_ifindex);
if (dev) {
struct vport *vport = ovs_internal_dev_get_vport(dev);
if (vport)
dp = vport->dp;
return vport->dp;
}
return NULL;
}
/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
* returned dp pointer valid.
*/
static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
{
struct datapath *dp;
WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
rcu_read_lock();
dp = get_dp_rcu(net, dp_ifindex);
rcu_read_unlock();
return dp;
@ -187,6 +198,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
{
struct datapath *dp = container_of(rcu, struct datapath, rcu);
ovs_flow_tbl_destroy(&dp->table);
free_percpu(dp->stats_percpu);
release_net(ovs_dp_get_net(dp));
kfree(dp->ports);
@ -245,6 +257,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
const struct vport *p = OVS_CB(skb)->input_vport;
struct datapath *dp = p->dp;
struct sw_flow *flow;
struct sw_flow_actions *sf_acts;
struct dp_stats_percpu *stats;
u64 *stats_counter;
u32 n_mask_hit;
@ -270,10 +283,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
goto out;
}
OVS_CB(skb)->flow = flow;
ovs_flow_stats_update(flow, key->tp.flags, skb);
sf_acts = rcu_dereference(flow->sf_acts);
ovs_execute_actions(dp, skb, sf_acts, key);
ovs_flow_stats_update(OVS_CB(skb)->flow, key->tp.flags, skb);
ovs_execute_actions(dp, skb, key);
stats_counter = &stats->n_hit;
out:
@ -362,37 +375,12 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
return err;
}
static size_t key_attr_size(void)
{
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
+ nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
+ nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
+ nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
+ nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
+ nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
+ nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+ nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
+ nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(4) /* OVS_KEY_ATTR_8021Q */
+ nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
+ nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
}
static size_t upcall_msg_size(const struct nlattr *userdata,
unsigned int hdrlen)
{
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
+ nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
+ nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
/* OVS_PACKET_ATTR_USERDATA */
if (userdata)
@ -512,6 +500,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct sw_flow_actions *acts;
struct sk_buff *packet;
struct sw_flow *flow;
struct sw_flow_actions *sf_acts;
struct datapath *dp;
struct ethhdr *eth;
struct vport *input_vport;
@ -554,25 +543,18 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
if (err)
goto err_flow_free;
acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
err = PTR_ERR(acts);
if (IS_ERR(acts))
goto err_flow_free;
err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
&flow->key, 0, &acts);
&flow->key, &acts);
if (err)
goto err_flow_free;
rcu_assign_pointer(flow->sf_acts, acts);
OVS_CB(packet)->egress_tun_info = NULL;
OVS_CB(packet)->flow = flow;
packet->priority = flow->key.phy.priority;
packet->mark = flow->key.phy.skb_mark;
rcu_read_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
goto err_unlock;
@ -585,9 +567,10 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
goto err_unlock;
OVS_CB(packet)->input_vport = input_vport;
sf_acts = rcu_dereference(flow->sf_acts);
local_bh_disable();
err = ovs_execute_actions(dp, packet, &flow->key);
err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
local_bh_enable();
rcu_read_unlock();
@ -664,8 +647,8 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
{
return NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
+ nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
+ nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */
+ nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */
+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
@ -673,58 +656,67 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
}
/* Called with ovs_mutex or RCU read lock. */
static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
struct sk_buff *skb, u32 portid,
u32 seq, u32 flags, u8 cmd)
static int ovs_flow_cmd_fill_match(const struct sw_flow *flow,
struct sk_buff *skb)
{
const int skb_orig_len = skb->len;
struct nlattr *start;
struct ovs_flow_stats stats;
__be16 tcp_flags;
unsigned long used;
struct ovs_header *ovs_header;
struct nlattr *nla;
int err;
ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
if (!ovs_header)
return -EMSGSIZE;
ovs_header->dp_ifindex = dp_ifindex;
/* Fill flow key. */
nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
if (!nla)
goto nla_put_failure;
return -EMSGSIZE;
err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
if (err)
goto error;
return err;
nla_nest_end(skb, nla);
/* Fill flow mask. */
nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
if (!nla)
goto nla_put_failure;
return -EMSGSIZE;
err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
if (err)
goto error;
return err;
nla_nest_end(skb, nla);
return 0;
}
/* Called with ovs_mutex or RCU read lock. */
static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
struct sk_buff *skb)
{
struct ovs_flow_stats stats;
__be16 tcp_flags;
unsigned long used;
ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
if (used &&
nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
goto nla_put_failure;
return -EMSGSIZE;
if (stats.n_packets &&
nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
goto nla_put_failure;
return -EMSGSIZE;
if ((u8)ntohs(tcp_flags) &&
nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
goto nla_put_failure;
return -EMSGSIZE;
return 0;
}
/* Called with ovs_mutex or RCU read lock. */
static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
struct sk_buff *skb, int skb_orig_len)
{
struct nlattr *start;
int err;
/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
* this is the first flow to be dumped into 'skb'. This is unusual for
@ -748,17 +740,47 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
nla_nest_end(skb, start);
else {
if (skb_orig_len)
goto error;
return err;
nla_nest_cancel(skb, start);
}
} else if (skb_orig_len)
goto nla_put_failure;
} else if (skb_orig_len) {
return -EMSGSIZE;
}
return 0;
}
/* Called with ovs_mutex or RCU read lock. */
static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
struct sk_buff *skb, u32 portid,
u32 seq, u32 flags, u8 cmd)
{
const int skb_orig_len = skb->len;
struct ovs_header *ovs_header;
int err;
ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
flags, cmd);
if (!ovs_header)
return -EMSGSIZE;
ovs_header->dp_ifindex = dp_ifindex;
err = ovs_flow_cmd_fill_match(flow, skb);
if (err)
goto error;
err = ovs_flow_cmd_fill_stats(flow, skb);
if (err)
goto error;
err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
if (err)
goto error;
return genlmsg_end(skb, ovs_header);
nla_put_failure:
err = -EMSGSIZE;
error:
genlmsg_cancel(skb, ovs_header);
return err;
@ -816,10 +838,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
/* Must have key and actions. */
error = -EINVAL;
if (!a[OVS_FLOW_ATTR_KEY])
if (!a[OVS_FLOW_ATTR_KEY]) {
OVS_NLERR("Flow key attribute not present in new flow.\n");
goto error;
if (!a[OVS_FLOW_ATTR_ACTIONS])
}
if (!a[OVS_FLOW_ATTR_ACTIONS]) {
OVS_NLERR("Flow actions attribute not present in new flow.\n");
goto error;
}
/* Most of the time we need to allocate a new flow, do it before
* locking.
@ -840,16 +866,11 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
/* Validate actions. */
acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
error = PTR_ERR(acts);
if (IS_ERR(acts))
goto err_kfree_flow;
error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
0, &acts);
&acts);
if (error) {
OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
goto err_kfree_acts;
goto err_kfree_flow;
}
reply = ovs_flow_cmd_alloc_info(acts, info, false);
@ -940,6 +961,7 @@ error:
return error;
}
/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
const struct sw_flow_key *key,
const struct sw_flow_mask *mask)
@ -948,15 +970,10 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
struct sw_flow_key masked_key;
int error;
acts = ovs_nla_alloc_flow_actions(nla_len(a));
if (IS_ERR(acts))
return acts;
ovs_flow_mask_key(&masked_key, key, mask);
error = ovs_nla_copy_actions(a, &masked_key, 0, &acts);
error = ovs_nla_copy_actions(a, &masked_key, &acts);
if (error) {
OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
kfree(acts);
OVS_NLERR("Actions may not be safe on all matching packets.\n");
return ERR_PTR(error);
}
@ -978,8 +995,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
/* Extract key. */
error = -EINVAL;
if (!a[OVS_FLOW_ATTR_KEY])
if (!a[OVS_FLOW_ATTR_KEY]) {
OVS_NLERR("Flow key attribute not present in set flow.\n");
goto error;
}
ovs_match_init(&match, &key, &mask);
error = ovs_nla_get_match(&match,
@ -994,10 +1013,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
error = PTR_ERR(acts);
goto error;
}
}
/* Can allocate before locking if have acts. */
if (acts) {
/* Can allocate before locking if have acts. */
reply = ovs_flow_cmd_alloc_info(acts, info, false);
if (IS_ERR(reply)) {
error = PTR_ERR(reply);
@ -1181,7 +1198,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct datapath *dp;
rcu_read_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
if (!dp) {
rcu_read_unlock();
return -ENODEV;
@ -1444,7 +1461,7 @@ err_destroy_ports_array:
err_destroy_percpu:
free_percpu(dp->stats_percpu);
err_destroy_table:
ovs_flow_tbl_destroy(&dp->table, false);
ovs_flow_tbl_destroy(&dp->table);
err_free_dp:
release_net(ovs_dp_get_net(dp));
kfree(dp);
@ -1476,8 +1493,6 @@ static void __dp_destroy(struct datapath *dp)
ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
/* RCU destroy the flow table */
ovs_flow_tbl_destroy(&dp->table, true);
call_rcu(&dp->rcu, destroy_dp_rcu);
}
@ -1945,7 +1960,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
int i, j = 0;
rcu_read_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
if (!dp) {
rcu_read_unlock();
return -ENODEV;

View File

@ -94,14 +94,12 @@ struct datapath {
/**
* struct ovs_skb_cb - OVS data in skb CB
* @flow: The flow associated with this packet. May be %NULL if no flow.
* @egress_tun_key: Tunnel information about this packet on egress path.
* NULL if the packet is not being tunneled.
* @input_vport: The original vport packet came in on. This value is cached
* when a packet is received by OVS.
*/
struct ovs_skb_cb {
struct sw_flow *flow;
struct ovs_tunnel_info *egress_tun_info;
struct vport *input_vport;
};
@ -194,7 +192,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
u8 cmd);
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *);
struct sw_flow_actions *acts, struct sw_flow_key *);
void ovs_dp_notify_wq(struct work_struct *work);

View File

@ -32,6 +32,7 @@
#include <linux/if_arp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/mpls.h>
#include <linux/sctp.h>
#include <linux/smp.h>
#include <linux/tcp.h>
@ -42,6 +43,7 @@
#include <net/ip.h>
#include <net/ip_tunnels.h>
#include <net/ipv6.h>
#include <net/mpls.h>
#include <net/ndisc.h>
#include "datapath.h"
@ -480,6 +482,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
return -ENOMEM;
skb_reset_network_header(skb);
skb_reset_mac_len(skb);
__skb_push(skb, skb->data - skb_mac_header(skb));
/* Network layer. */
@ -584,6 +587,33 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
memset(&key->ip, 0, sizeof(key->ip));
memset(&key->ipv4, 0, sizeof(key->ipv4));
}
} else if (eth_p_mpls(key->eth.type)) {
size_t stack_len = MPLS_HLEN;
/* In the presence of an MPLS label stack the end of the L2
* header and the beginning of the L3 header differ.
*
* Advance network_header to the beginning of the L3
* header. mac_len corresponds to the end of the L2 header.
*/
while (1) {
__be32 lse;
error = check_header(skb, skb->mac_len + stack_len);
if (unlikely(error))
return 0;
memcpy(&lse, skb_network_header(skb), MPLS_HLEN);
if (stack_len == MPLS_HLEN)
memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
skb_set_network_header(skb, skb->mac_len + stack_len);
if (lse & htonl(MPLS_LS_S_MASK))
break;
stack_len += MPLS_HLEN;
}
} else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */

View File

@ -102,12 +102,17 @@ struct sw_flow_key {
__be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
__be16 type; /* Ethernet frame type. */
} eth;
struct {
u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
u8 tos; /* IP ToS. */
u8 ttl; /* IP TTL/hop limit. */
u8 frag; /* One of OVS_FRAG_TYPE_*. */
} ip;
union {
struct {
__be32 top_lse; /* top label stack entry */
} mpls;
struct {
u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
u8 tos; /* IP ToS. */
u8 ttl; /* IP TTL/hop limit. */
u8 frag; /* One of OVS_FRAG_TYPE_*. */
} ip;
};
struct {
__be16 src; /* TCP/UDP/SCTP source port. */
__be16 dst; /* TCP/UDP/SCTP destination port. */

View File

@ -46,24 +46,22 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/mpls.h>
#include "flow_netlink.h"
static void update_range__(struct sw_flow_match *match,
size_t offset, size_t size, bool is_mask)
static void update_range(struct sw_flow_match *match,
size_t offset, size_t size, bool is_mask)
{
struct sw_flow_key_range *range = NULL;
struct sw_flow_key_range *range;
size_t start = rounddown(offset, sizeof(long));
size_t end = roundup(offset + size, sizeof(long));
if (!is_mask)
range = &match->range;
else if (match->mask)
else
range = &match->mask->range;
if (!range)
return;
if (range->start == range->end) {
range->start = start;
range->end = end;
@ -79,22 +77,20 @@ static void update_range__(struct sw_flow_match *match,
#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
do { \
update_range__(match, offsetof(struct sw_flow_key, field), \
sizeof((match)->key->field), is_mask); \
if (is_mask) { \
if ((match)->mask) \
(match)->mask->key.field = value; \
} else { \
update_range(match, offsetof(struct sw_flow_key, field), \
sizeof((match)->key->field), is_mask); \
if (is_mask) \
(match)->mask->key.field = value; \
else \
(match)->key->field = value; \
} \
} while (0)
#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \
do { \
update_range__(match, offset, len, is_mask); \
update_range(match, offset, len, is_mask); \
if (is_mask) \
memcpy((u8 *)&(match)->mask->key + offset, value_p, \
len); \
len); \
else \
memcpy((u8 *)(match)->key + offset, value_p, len); \
} while (0)
@ -103,18 +99,16 @@ static void update_range__(struct sw_flow_match *match,
SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
value_p, len, is_mask)
#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \
do { \
update_range__(match, offsetof(struct sw_flow_key, field), \
sizeof((match)->key->field), is_mask); \
if (is_mask) { \
if ((match)->mask) \
memset((u8 *)&(match)->mask->key.field, value,\
sizeof((match)->mask->key.field)); \
} else { \
#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \
do { \
update_range(match, offsetof(struct sw_flow_key, field), \
sizeof((match)->key->field), is_mask); \
if (is_mask) \
memset((u8 *)&(match)->mask->key.field, value, \
sizeof((match)->mask->key.field)); \
else \
memset((u8 *)&(match)->key->field, value, \
sizeof((match)->key->field)); \
} \
} while (0)
static bool match_validate(const struct sw_flow_match *match,
@ -134,7 +128,8 @@ static bool match_validate(const struct sw_flow_match *match,
| (1 << OVS_KEY_ATTR_ICMP)
| (1 << OVS_KEY_ATTR_ICMPV6)
| (1 << OVS_KEY_ATTR_ARP)
| (1 << OVS_KEY_ATTR_ND));
| (1 << OVS_KEY_ATTR_ND)
| (1 << OVS_KEY_ATTR_MPLS));
/* Always allowed mask fields. */
mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
@ -149,6 +144,12 @@ static bool match_validate(const struct sw_flow_match *match,
mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
}
if (eth_p_mpls(match->key->eth.type)) {
key_expected |= 1 << OVS_KEY_ATTR_MPLS;
if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
}
if (match->key->eth.type == htons(ETH_P_IP)) {
key_expected |= 1 << OVS_KEY_ATTR_IPV4;
if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
@ -244,6 +245,38 @@ static bool match_validate(const struct sw_flow_match *match,
return true;
}
size_t ovs_key_attr_size(void)
{
/* Whenever adding new OVS_KEY_ FIELDS, we should consider
* updating this function.
*/
BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22);
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
+ nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
+ nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
+ nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
+ nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
+ nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
+ nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+ nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
+ nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
+ nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */
+ nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
+ nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
+ nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
}
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_ENCAP] = -1,
@ -266,6 +299,7 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
[OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
[OVS_KEY_ATTR_TUNNEL] = -1,
[OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
};
static bool is_all_zero(const u8 *fp, size_t size)
@ -572,10 +606,13 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
if (is_mask)
if (is_mask) {
in_port = 0xffffffff; /* Always exact match in_port. */
else if (in_port >= DP_MAX_PORTS)
} else if (in_port >= DP_MAX_PORTS) {
OVS_NLERR("Port (%d) exceeds maximum allowable (%d).\n",
in_port, DP_MAX_PORTS);
return -EINVAL;
}
SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
*attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
@ -602,7 +639,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
const struct nlattr **a, bool is_mask)
{
int err;
u64 orig_attrs = attrs;
err = metadata_from_nlattrs(match, &attrs, a, is_mask);
if (err)
@ -634,8 +670,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
} else if (!is_mask)
SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
}
if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
__be16 eth_type;
@ -735,6 +770,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
attrs &= ~(1 << OVS_KEY_ATTR_ARP);
}
if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
const struct ovs_key_mpls *mpls_key;
mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
SW_FLOW_KEY_PUT(match, mpls.top_lse,
mpls_key->mpls_lse, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
}
if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
const struct ovs_key_tcp *tcp_key;
@ -745,15 +790,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
}
if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
SW_FLOW_KEY_PUT(match, tp.flags,
nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
is_mask);
} else {
SW_FLOW_KEY_PUT(match, tp.flags,
nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
is_mask);
}
SW_FLOW_KEY_PUT(match, tp.flags,
nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
}
@ -812,8 +851,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
attrs &= ~(1 << OVS_KEY_ATTR_ND);
}
if (attrs != 0)
if (attrs != 0) {
OVS_NLERR("Unknown key attributes (%llx).\n",
(unsigned long long)attrs);
return -EINVAL;
}
return 0;
}
@ -853,8 +895,8 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val)
* attribute specifies the mask field of the wildcarded flow.
*/
int ovs_nla_get_match(struct sw_flow_match *match,
const struct nlattr *key,
const struct nlattr *mask)
const struct nlattr *nla_key,
const struct nlattr *nla_mask)
{
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
const struct nlattr *encap;
@ -864,7 +906,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
bool encap_valid = false;
int err;
err = parse_flow_nlattrs(key, a, &key_attrs);
err = parse_flow_nlattrs(nla_key, a, &key_attrs);
if (err)
return err;
@ -905,36 +947,43 @@ int ovs_nla_get_match(struct sw_flow_match *match,
if (err)
return err;
if (match->mask && !mask) {
/* Create an exact match mask. We need to set to 0xff all the
* 'match->mask' fields that have been touched in 'match->key'.
* We cannot simply memset 'match->mask', because padding bytes
* and fields not specified in 'match->key' should be left to 0.
* Instead, we use a stream of netlink attributes, copied from
* 'key' and set to 0xff: ovs_key_from_nlattrs() will take care
* of filling 'match->mask' appropriately.
*/
newmask = kmemdup(key, nla_total_size(nla_len(key)),
GFP_KERNEL);
if (!newmask)
return -ENOMEM;
if (match->mask) {
if (!nla_mask) {
/* Create an exact match mask. We need to set to 0xff
* all the 'match->mask' fields that have been touched
* in 'match->key'. We cannot simply memset
* 'match->mask', because padding bytes and fields not
* specified in 'match->key' should be left to 0.
* Instead, we use a stream of netlink attributes,
* copied from 'key' and set to 0xff.
* ovs_key_from_nlattrs() will take care of filling
* 'match->mask' appropriately.
*/
newmask = kmemdup(nla_key,
nla_total_size(nla_len(nla_key)),
GFP_KERNEL);
if (!newmask)
return -ENOMEM;
mask_set_nlattr(newmask, 0xff);
mask_set_nlattr(newmask, 0xff);
/* The userspace does not send tunnel attributes that are 0,
* but we should not wildcard them nonetheless.
*/
if (match->key->tun_key.ipv4_dst)
SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 0xff, true);
/* The userspace does not send tunnel attributes that
* are 0, but we should not wildcard them nonetheless.
*/
if (match->key->tun_key.ipv4_dst)
SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
0xff, true);
mask = newmask;
}
nla_mask = newmask;
}
if (mask) {
err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs);
if (err)
goto free_newmask;
/* Always match on tci. */
SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) {
__be16 eth_type = 0;
__be16 tci = 0;
@ -1140,6 +1189,14 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
arp_key->arp_op = htons(output->ip.proto);
ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
} else if (eth_p_mpls(swkey->eth.type)) {
struct ovs_key_mpls *mpls_key;
nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
if (!nla)
goto nla_put_failure;
mpls_key = nla_data(nla);
mpls_key->mpls_lse = output->mpls.top_lse;
}
if ((swkey->eth.type == htons(ETH_P_IP) ||
@ -1226,12 +1283,14 @@ nla_put_failure:
#define MAX_ACTIONS_BUFSIZE (32 * 1024)
struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size)
static struct sw_flow_actions *nla_alloc_flow_actions(int size)
{
struct sw_flow_actions *sfa;
if (size > MAX_ACTIONS_BUFSIZE)
if (size > MAX_ACTIONS_BUFSIZE) {
OVS_NLERR("Flow action size (%u bytes) exceeds maximum", size);
return ERR_PTR(-EINVAL);
}
sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
if (!sfa)
@ -1269,7 +1328,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
new_acts_size = MAX_ACTIONS_BUFSIZE;
}
acts = ovs_nla_alloc_flow_actions(new_acts_size);
acts = nla_alloc_flow_actions(new_acts_size);
if (IS_ERR(acts))
return (void *)acts;
@ -1336,9 +1395,15 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
a->nla_len = sfa->actions_len - st_offset;
}
static int __ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key,
int depth, struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci);
static int validate_and_copy_sample(const struct nlattr *attr,
const struct sw_flow_key *key, int depth,
struct sw_flow_actions **sfa)
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
@ -1375,7 +1440,8 @@ static int validate_and_copy_sample(const struct nlattr *attr,
if (st_acts < 0)
return st_acts;
err = ovs_nla_copy_actions(actions, key, depth + 1, sfa);
err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa,
eth_type, vlan_tci);
if (err)
return err;
@ -1385,10 +1451,10 @@ static int validate_and_copy_sample(const struct nlattr *attr,
return 0;
}
static int validate_tp_port(const struct sw_flow_key *flow_key)
static int validate_tp_port(const struct sw_flow_key *flow_key,
__be16 eth_type)
{
if ((flow_key->eth.type == htons(ETH_P_IP) ||
flow_key->eth.type == htons(ETH_P_IPV6)) &&
if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) &&
(flow_key->tp.src || flow_key->tp.dst))
return 0;
@ -1483,7 +1549,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
static int validate_set(const struct nlattr *a,
const struct sw_flow_key *flow_key,
struct sw_flow_actions **sfa,
bool *set_tun)
bool *set_tun, __be16 eth_type)
{
const struct nlattr *ovs_key = nla_data(a);
int key_type = nla_type(ovs_key);
@ -1508,6 +1574,9 @@ static int validate_set(const struct nlattr *a,
break;
case OVS_KEY_ATTR_TUNNEL:
if (eth_p_mpls(eth_type))
return -EINVAL;
*set_tun = true;
err = validate_and_copy_set_tun(a, sfa);
if (err)
@ -1515,7 +1584,7 @@ static int validate_set(const struct nlattr *a,
break;
case OVS_KEY_ATTR_IPV4:
if (flow_key->eth.type != htons(ETH_P_IP))
if (eth_type != htons(ETH_P_IP))
return -EINVAL;
if (!flow_key->ip.proto)
@ -1531,7 +1600,7 @@ static int validate_set(const struct nlattr *a,
break;
case OVS_KEY_ATTR_IPV6:
if (flow_key->eth.type != htons(ETH_P_IPV6))
if (eth_type != htons(ETH_P_IPV6))
return -EINVAL;
if (!flow_key->ip.proto)
@ -1553,19 +1622,24 @@ static int validate_set(const struct nlattr *a,
if (flow_key->ip.proto != IPPROTO_TCP)
return -EINVAL;
return validate_tp_port(flow_key);
return validate_tp_port(flow_key, eth_type);
case OVS_KEY_ATTR_UDP:
if (flow_key->ip.proto != IPPROTO_UDP)
return -EINVAL;
return validate_tp_port(flow_key);
return validate_tp_port(flow_key, eth_type);
case OVS_KEY_ATTR_MPLS:
if (!eth_p_mpls(eth_type))
return -EINVAL;
break;
case OVS_KEY_ATTR_SCTP:
if (flow_key->ip.proto != IPPROTO_SCTP)
return -EINVAL;
return validate_tp_port(flow_key);
return validate_tp_port(flow_key, eth_type);
default:
return -EINVAL;
@ -1609,12 +1683,13 @@ static int copy_action(const struct nlattr *from,
return 0;
}
int ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key,
int depth,
struct sw_flow_actions **sfa)
static int __ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key,
int depth, struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci)
{
const struct nlattr *a;
bool out_tnl_port = false;
int rem, err;
if (depth >= SAMPLE_ACTION_DEPTH)
@ -1626,6 +1701,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
[OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
[OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
[OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
[OVS_ACTION_ATTR_POP_VLAN] = 0,
[OVS_ACTION_ATTR_SET] = (u32)-1,
@ -1655,6 +1732,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
case OVS_ACTION_ATTR_OUTPUT:
if (nla_get_u32(a) >= DP_MAX_PORTS)
return -EINVAL;
out_tnl_port = false;
break;
case OVS_ACTION_ATTR_HASH: {
@ -1671,6 +1750,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
}
case OVS_ACTION_ATTR_POP_VLAN:
vlan_tci = htons(0);
break;
case OVS_ACTION_ATTR_PUSH_VLAN:
@ -1679,25 +1759,73 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
return -EINVAL;
if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
return -EINVAL;
vlan_tci = vlan->vlan_tci;
break;
case OVS_ACTION_ATTR_RECIRC:
break;
case OVS_ACTION_ATTR_PUSH_MPLS: {
const struct ovs_action_push_mpls *mpls = nla_data(a);
/* Networking stack do not allow simultaneous Tunnel
* and MPLS GSO.
*/
if (out_tnl_port)
return -EINVAL;
if (!eth_p_mpls(mpls->mpls_ethertype))
return -EINVAL;
/* Prohibit push MPLS other than to a white list
* for packets that have a known tag order.
*/
if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
(eth_type != htons(ETH_P_IP) &&
eth_type != htons(ETH_P_IPV6) &&
eth_type != htons(ETH_P_ARP) &&
eth_type != htons(ETH_P_RARP) &&
!eth_p_mpls(eth_type)))
return -EINVAL;
eth_type = mpls->mpls_ethertype;
break;
}
case OVS_ACTION_ATTR_POP_MPLS:
if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
!eth_p_mpls(eth_type))
return -EINVAL;
/* Disallow subsequent L2.5+ set and mpls_pop actions
* as there is no check here to ensure that the new
* eth_type is valid and thus set actions could
* write off the end of the packet or otherwise
* corrupt it.
*
* Support for these actions is planned using packet
* recirculation.
*/
eth_type = htons(0);
break;
case OVS_ACTION_ATTR_SET:
err = validate_set(a, key, sfa, &skip_copy);
err = validate_set(a, key, sfa,
&out_tnl_port, eth_type);
if (err)
return err;
skip_copy = out_tnl_port;
break;
case OVS_ACTION_ATTR_SAMPLE:
err = validate_and_copy_sample(a, key, depth, sfa);
err = validate_and_copy_sample(a, key, depth, sfa,
eth_type, vlan_tci);
if (err)
return err;
skip_copy = true;
break;
default:
OVS_NLERR("Unknown tunnel attribute (%d).\n", type);
return -EINVAL;
}
if (!skip_copy) {
@ -1713,6 +1841,24 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
return 0;
}
int ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa)
{
int err;
*sfa = nla_alloc_flow_actions(nla_len(attr));
if (IS_ERR(*sfa))
return PTR_ERR(*sfa);
err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type,
key->eth.tci);
if (err)
kfree(*sfa);
return err;
}
static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
{
const struct nlattr *a;

View File

@ -37,6 +37,8 @@
#include "flow.h"
size_t ovs_key_attr_size(void);
void ovs_match_init(struct sw_flow_match *match,
struct sw_flow_key *key, struct sw_flow_mask *mask);
@ -49,12 +51,11 @@ int ovs_nla_get_match(struct sw_flow_match *match,
const struct nlattr *);
int ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key, int depth,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa);
int ovs_nla_put_actions(const struct nlattr *attr,
int len, struct sk_buff *skb);
struct sw_flow_actions *ovs_nla_alloc_flow_actions(int actions_len);
void ovs_nla_free_flow_actions(struct sw_flow_actions *);
#endif /* flow_netlink.h */

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007-2013 Nicira, Inc.
* Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@ -250,11 +250,14 @@ skip_flows:
__table_instance_destroy(ti);
}
void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
/* No need for locking this function is called from RCU callback or
* error path.
*/
void ovs_flow_tbl_destroy(struct flow_table *table)
{
struct table_instance *ti = ovsl_dereference(table->ti);
struct table_instance *ti = rcu_dereference_raw(table->ti);
table_instance_destroy(ti, deferred);
table_instance_destroy(ti, false);
}
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,

View File

@ -62,7 +62,7 @@ void ovs_flow_free(struct sw_flow *, bool deferred);
int ovs_flow_tbl_init(struct flow_table *);
int ovs_flow_tbl_count(struct flow_table *table);
void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred);
void ovs_flow_tbl_destroy(struct flow_table *table);
int ovs_flow_tbl_flush(struct flow_table *flow_table);
int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,

View File

@ -224,6 +224,11 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
struct net_device *netdev = netdev_vport_priv(vport)->dev;
int len;
if (unlikely(!(netdev->flags & IFF_UP))) {
kfree_skb(skb);
return 0;
}
len = skb->len;
skb_dst_drop(skb);