diff --git a/Documentation/networking/xfrm_device.txt b/Documentation/networking/xfrm_device.txt index 267f55b5f54a..a1c904dc70dc 100644 --- a/Documentation/networking/xfrm_device.txt +++ b/Documentation/networking/xfrm_device.txt @@ -111,9 +111,10 @@ the stack in xfrm_input(). xfrm_state_hold(xs); store the state information into the skb - skb->sp = secpath_dup(skb->sp); - skb->sp->xvec[skb->sp->len++] = xs; - skb->sp->olen++; + sp = secpath_set(skb); + if (!sp) return; + sp->xvec[sp->len++] = xs; + sp->olen++; indicate the success and/or error status of the offload xo = xfrm_offload(skb); diff --git a/drivers/crypto/chelsio/chcr_ipsec.c b/drivers/crypto/chelsio/chcr_ipsec.c index 461b97e2f1fd..ceaa16b8f72e 100644 --- a/drivers/crypto/chelsio/chcr_ipsec.c +++ b/drivers/crypto/chelsio/chcr_ipsec.c @@ -570,6 +570,7 @@ int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev) struct sge_eth_txq *q; struct port_info *pi; dma_addr_t addr[MAX_SKB_FRAGS + 1]; + struct sec_path *sp; bool immediate = false; if (!x->xso.offload_handle) @@ -578,7 +579,8 @@ int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev) sa_entry = (struct ipsec_sa_entry *)x->xso.offload_handle; kctx_len = sa_entry->kctx_len; - if (skb->sp->len != 1) { + sp = skb_sec_path(skb); + if (sp->len != 1) { out_free: dev_kfree_skb_any(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 4d77f42e035c..ff85ce5791a3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -1065,11 +1065,13 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev); struct ixgbe_ipsec *ipsec = adapter->ipsec; struct xfrm_state *xs; + struct sec_path *sp; struct tx_sa *tsa; - if (unlikely(!first->skb->sp->len)) { + sp = skb_sec_path(first->skb); + if (unlikely(!sp->len)) { netdev_err(tx_ring->netdev, "%s: no xfrm state len = %d\n", - __func__, first->skb->sp->len); + __func__, sp->len); return 0; } @@ -1159,6 +1161,7 @@ void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring, struct xfrm_state *xs = NULL; struct ipv6hdr *ip6 = NULL; struct iphdr *ip4 = NULL; + struct sec_path *sp; void *daddr; __be32 spi; u8 *c_hdr; @@ -1198,12 +1201,12 @@ void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring, if (unlikely(!xs)) return; - skb->sp = secpath_dup(skb->sp); - if (unlikely(!skb->sp)) + sp = secpath_set(skb); + if (unlikely(!sp)) return; - skb->sp->xvec[skb->sp->len++] = xs; - skb->sp->olen++; + sp->xvec[sp->len++] = xs; + sp->olen++; xo = xfrm_offload(skb); xo->flags = CRYPTO_DONE; xo->status = CRYPTO_SUCCESS; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index f1e40734c975..2cd8c42d1403 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8695,7 +8695,8 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, #endif /* IXGBE_FCOE */ #ifdef CONFIG_IXGBE_IPSEC - if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx)) + if (secpath_exists(skb) && + !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx)) goto out_drop; #endif tso = ixgbe_tso(tx_ring, first, &hdr_len, &ipsec_tx); @@ -10192,7 +10193,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, */ if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) { #ifdef CONFIG_IXGBE_IPSEC - if (!skb->sp) + if (!secpath_exists(skb)) #endif features &= ~NETIF_F_TSO; } diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.c b/drivers/net/ethernet/intel/ixgbevf/ipsec.c index e8a3231be0bf..5170dd9d8705 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ipsec.c +++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c @@ -450,12 +450,14 @@ int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring, struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev); struct ixgbevf_ipsec *ipsec = adapter->ipsec; struct xfrm_state *xs; + struct sec_path *sp; struct tx_sa *tsa; u16 sa_idx; - if (unlikely(!first->skb->sp->len)) { + sp = skb_sec_path(first->skb); + if (unlikely(!sp->len)) { netdev_err(tx_ring->netdev, "%s: no xfrm state len = %d\n", - __func__, first->skb->sp->len); + __func__, sp->len); return 0; } @@ -546,6 +548,7 @@ void ixgbevf_ipsec_rx(struct ixgbevf_ring *rx_ring, struct xfrm_state *xs = NULL; struct ipv6hdr *ip6 = NULL; struct iphdr *ip4 = NULL; + struct sec_path *sp; void *daddr; __be32 spi; u8 *c_hdr; @@ -585,12 +588,12 @@ void ixgbevf_ipsec_rx(struct ixgbevf_ring *rx_ring, if (unlikely(!xs)) return; - skb->sp = secpath_dup(skb->sp); - if (unlikely(!skb->sp)) + sp = secpath_set(skb); + if (unlikely(!sp)) return; - skb->sp->xvec[skb->sp->len++] = xs; - skb->sp->olen++; + sp->xvec[sp->len++] = xs; + sp->olen++; xo = xfrm_offload(skb); xo->flags = CRYPTO_DONE; xo->status = CRYPTO_SUCCESS; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 2de81f046fb5..49e23afa05a2 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4157,7 +4157,7 @@ static int ixgbevf_xmit_frame_ring(struct sk_buff *skb, first->protocol = vlan_get_protocol(skb); #ifdef CONFIG_IXGBEVF_IPSEC - if (skb->sp && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx)) + if (secpath_exists(skb) && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx)) goto out_drop; #endif tso = ixgbevf_tso(tx_ring, first, &hdr_len, &ipsec_tx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 128a82b1dbfc..53608afd39b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -254,11 +254,13 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, struct mlx5e_ipsec_metadata *mdata; struct mlx5e_ipsec_sa_entry *sa_entry; struct xfrm_state *x; + struct sec_path *sp; if (!xo) return skb; - if (unlikely(skb->sp->len != 1)) { + sp = skb_sec_path(skb); + if (unlikely(sp->len != 1)) { atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle); goto drop; } @@ -305,10 +307,11 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb, struct mlx5e_priv *priv = netdev_priv(netdev); struct xfrm_offload *xo; struct xfrm_state *xs; + struct sec_path *sp; u32 sa_handle; - skb->sp = secpath_dup(skb->sp); - if (unlikely(!skb->sp)) { + sp = secpath_set(skb); + if (unlikely(!sp)) { atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc); return NULL; } @@ -320,8 +323,9 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb, return NULL; } - skb->sp->xvec[skb->sp->len++] = xs; - skb->sp->olen++; + sp = skb_sec_path(skb); + sp->xvec[sp->len++] = xs; + sp->olen++; xo = xfrm_offload(skb); xo->flags = CRYPTO_DONE; @@ -372,10 +376,11 @@ struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev, netdev_features_t features) { + struct sec_path *sp = skb_sec_path(skb); struct xfrm_state *x; - if (skb->sp && skb->sp->len) { - x = skb->sp->xvec[0]; + if (sp && sp->len) { + x = sp->xvec[0]; if (x && x->xso.offload_handle) return true; } diff --git a/drivers/net/netdevsim/ipsec.c b/drivers/net/netdevsim/ipsec.c index 2dcf6cc269d0..76e11d889bb6 100644 --- a/drivers/net/netdevsim/ipsec.c +++ b/drivers/net/netdevsim/ipsec.c @@ -227,18 +227,19 @@ static const struct xfrmdev_ops nsim_xfrmdev_ops = { bool nsim_ipsec_tx(struct netdevsim *ns, struct sk_buff *skb) { + struct sec_path *sp = skb_sec_path(skb); struct nsim_ipsec *ipsec = &ns->ipsec; struct xfrm_state *xs; struct nsim_sa *tsa; u32 sa_idx; /* do we even need to check this packet? */ - if (!skb->sp) + if (!sp) return true; - if (unlikely(!skb->sp->len)) { + if (unlikely(!sp->len)) { netdev_err(ns->netdev, "no xfrm state len = %d\n", - skb->sp->len); + sp->len); return false; } diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index fa0686500970..5f2614d02e03 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -17,43 +17,58 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb) skb_dst_drop(skb); } +static inline struct nf_bridge_info * +nf_bridge_info_get(const struct sk_buff *skb) +{ + return skb_ext_find(skb, SKB_EXT_BRIDGE_NF); +} + +static inline bool nf_bridge_info_exists(const struct sk_buff *skb) +{ + return skb_ext_exist(skb, SKB_EXT_BRIDGE_NF); +} + static inline int nf_bridge_get_physinif(const struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge; + const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - if (skb->nf_bridge == NULL) + if (!nf_bridge) return 0; - nf_bridge = skb->nf_bridge; return nf_bridge->physindev ? nf_bridge->physindev->ifindex : 0; } static inline int nf_bridge_get_physoutif(const struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge; + const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); - if (skb->nf_bridge == NULL) + if (!nf_bridge) return 0; - nf_bridge = skb->nf_bridge; return nf_bridge->physoutdev ? nf_bridge->physoutdev->ifindex : 0; } static inline struct net_device * nf_bridge_get_physindev(const struct sk_buff *skb) { - return skb->nf_bridge ? skb->nf_bridge->physindev : NULL; + const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + return nf_bridge ? nf_bridge->physindev : NULL; } static inline struct net_device * nf_bridge_get_physoutdev(const struct sk_buff *skb) { - return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL; + const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + return nf_bridge ? nf_bridge->physoutdev : NULL; } static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb) { - return skb->nf_bridge && skb->nf_bridge->in_prerouting; + const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + return nf_bridge && nf_bridge->in_prerouting; } #else #define br_drop_fake_rtable(skb) do { } while (0) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b1831a5ca173..3f741b04e55d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -245,6 +245,7 @@ struct iov_iter; struct napi_struct; struct bpf_prog; union bpf_attr; +struct skb_ext; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct nf_conntrack { @@ -254,7 +255,6 @@ struct nf_conntrack { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info { - refcount_t use; enum { BRNF_PROTO_UNCHANGED, BRNF_PROTO_8021Q, @@ -636,6 +636,7 @@ typedef unsigned char *sk_buff_data_t; * @queue_mapping: Queue mapping for multiqueue devices * @xmit_more: More SKBs are pending for this queue * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves + * @active_extensions: active extensions (skb_ext_id types) * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_hash: indicate hash is a canonical 4-tuple hash over transport @@ -665,6 +666,7 @@ typedef unsigned char *sk_buff_data_t; * @data: Data head pointer * @truesize: Buffer size * @users: User count - see {datagram,tcp}.c + * @extensions: allocated extensions, valid if active_extensions is nonzero */ struct sk_buff { @@ -712,14 +714,8 @@ struct sk_buff { struct list_head tcp_tsorted_anchor; }; -#ifdef CONFIG_XFRM - struct sec_path *sp; -#endif #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) unsigned long _nfct; -#endif -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - struct nf_bridge_info *nf_bridge; #endif unsigned int len, data_len; @@ -747,7 +743,9 @@ struct sk_buff { head_frag:1, xmit_more:1, pfmemalloc:1; - +#ifdef CONFIG_SKB_EXTENSIONS + __u8 active_extensions; +#endif /* fields enclosed in headers_start/headers_end are copied * using a single memcpy() in __copy_skb_header() */ @@ -869,6 +867,11 @@ struct sk_buff { *data; unsigned int truesize; refcount_t users; + +#ifdef CONFIG_SKB_EXTENSIONS + /* only useable after checking ->active_extensions != 0 */ + struct skb_ext *extensions; +#endif }; #ifdef __KERNEL__ @@ -3896,18 +3899,108 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct) atomic_inc(&nfct->use); } #endif + +#ifdef CONFIG_SKB_EXTENSIONS +enum skb_ext_id { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) -static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) + SKB_EXT_BRIDGE_NF, +#endif +#ifdef CONFIG_XFRM + SKB_EXT_SEC_PATH, +#endif + SKB_EXT_NUM, /* must be last */ +}; + +/** + * struct skb_ext - sk_buff extensions + * @refcnt: 1 on allocation, deallocated on 0 + * @offset: offset to add to @data to obtain extension address + * @chunks: size currently allocated, stored in SKB_EXT_ALIGN_SHIFT units + * @data: start of extension data, variable sized + * + * Note: offsets/lengths are stored in chunks of 8 bytes, this allows + * to use 'u8' types while allowing up to 2kb worth of extension data. + */ +struct skb_ext { + refcount_t refcnt; + u8 offset[SKB_EXT_NUM]; /* in chunks of 8 bytes */ + u8 chunks; /* same */ + char data[0] __aligned(8); +}; + +void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id); +void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id); +void __skb_ext_put(struct skb_ext *ext); + +static inline void skb_ext_put(struct sk_buff *skb) { - if (nf_bridge && refcount_dec_and_test(&nf_bridge->use)) - kfree(nf_bridge); + if (skb->active_extensions) + __skb_ext_put(skb->extensions); } -static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge) + +static inline void skb_ext_get(struct sk_buff *skb) { - if (nf_bridge) - refcount_inc(&nf_bridge->use); + if (skb->active_extensions) { + struct skb_ext *ext = skb->extensions; + + if (ext) + refcount_inc(&ext->refcnt); + } } -#endif /* CONFIG_BRIDGE_NETFILTER */ + +static inline void __skb_ext_copy(struct sk_buff *dst, + const struct sk_buff *src) +{ + dst->active_extensions = src->active_extensions; + + if (src->active_extensions) { + struct skb_ext *ext = src->extensions; + + refcount_inc(&ext->refcnt); + dst->extensions = ext; + } +} + +static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *src) +{ + skb_ext_put(dst); + __skb_ext_copy(dst, src); +} + +static inline bool __skb_ext_exist(const struct skb_ext *ext, enum skb_ext_id i) +{ + return !!ext->offset[i]; +} + +static inline bool skb_ext_exist(const struct sk_buff *skb, enum skb_ext_id id) +{ + return skb->active_extensions & (1 << id); +} + +static inline void skb_ext_del(struct sk_buff *skb, enum skb_ext_id id) +{ + if (skb_ext_exist(skb, id)) + __skb_ext_del(skb, id); +} + +static inline void *skb_ext_find(const struct sk_buff *skb, enum skb_ext_id id) +{ + if (skb_ext_exist(skb, id)) { + struct skb_ext *ext = skb->extensions; + + return (void *)ext + (ext->offset[id] << 3); + } + + return NULL; +} +#else +static inline void skb_ext_put(struct sk_buff *skb) {} +static inline void skb_ext_get(struct sk_buff *skb) {} +static inline void skb_ext_del(struct sk_buff *skb, int unused) {} +static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {} +static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {} +#endif /* CONFIG_SKB_EXTENSIONS */ + static inline void nf_reset(struct sk_buff *skb) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) @@ -3915,8 +4008,7 @@ static inline void nf_reset(struct sk_buff *skb) skb->_nfct = 0; #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - nf_bridge_put(skb->nf_bridge); - skb->nf_bridge = NULL; + skb_ext_del(skb, SKB_EXT_BRIDGE_NF); #endif } @@ -3934,7 +4026,7 @@ static inline void ipvs_reset(struct sk_buff *skb) #endif } -/* Note: This doesn't put any conntrack and bridge info in dst. */ +/* Note: This doesn't put any conntrack info in dst. */ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, bool copy) { @@ -3942,10 +4034,6 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, dst->_nfct = src->_nfct; nf_conntrack_get(skb_nfct(src)); #endif -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - dst->nf_bridge = src->nf_bridge; - nf_bridge_get(src->nf_bridge); -#endif #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) if (copy) dst->nf_trace = src->nf_trace; @@ -3956,9 +4044,6 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(skb_nfct(dst)); -#endif -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - nf_bridge_put(dst->nf_bridge); #endif __nf_copy(dst, src, true); } @@ -3981,12 +4066,19 @@ static inline void skb_init_secmark(struct sk_buff *skb) { } #endif +static inline int secpath_exists(const struct sk_buff *skb) +{ +#ifdef CONFIG_XFRM + return skb_ext_exist(skb, SKB_EXT_SEC_PATH); +#else + return 0; +#endif +} + static inline bool skb_irq_freeable(const struct sk_buff *skb) { return !skb->destructor && -#if IS_ENABLED(CONFIG_XFRM) - !skb->sp && -#endif + !secpath_exists(skb) && !skb_nfct(skb) && !skb->_skb_refdst && !skb_has_frag_list(skb); @@ -4032,10 +4124,10 @@ static inline bool skb_get_dst_pending_confirm(const struct sk_buff *skb) return skb->dst_pending_confirm != 0; } -static inline struct sec_path *skb_sec_path(struct sk_buff *skb) +static inline struct sec_path *skb_sec_path(const struct sk_buff *skb) { #ifdef CONFIG_XFRM - return skb->sp; + return skb_ext_find(skb, SKB_EXT_SEC_PATH); #else return NULL; #endif diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index 74af19c3a8f7..4cd56808ac4e 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -6,12 +6,12 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) { - skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC); + struct nf_bridge_info *b = skb_ext_add(skb, SKB_EXT_BRIDGE_NF); - if (likely(skb->nf_bridge)) - refcount_set(&(skb->nf_bridge->use), 1); + if (b) + memset(b, 0, sizeof(*b)); - return skb->nf_bridge; + return b; } void nf_bridge_update_protocol(struct sk_buff *skb); @@ -22,12 +22,6 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, struct sock *sk, int (*okfn)(struct net *, struct sock *, struct sk_buff *)); -static inline struct nf_bridge_info * -nf_bridge_info_get(const struct sk_buff *skb) -{ - return skb->nf_bridge; -} - unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb); static inline void nf_bridge_push_encap_header(struct sk_buff *skb) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0a8d70d16918..38c232861a64 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1096,7 +1096,6 @@ struct xfrm_offload { }; struct sec_path { - refcount_t refcnt; int len; int olen; @@ -1104,41 +1103,13 @@ struct sec_path { struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH]; }; -static inline int secpath_exists(struct sk_buff *skb) -{ -#ifdef CONFIG_XFRM - return skb->sp != NULL; -#else - return 0; -#endif -} - -static inline struct sec_path * -secpath_get(struct sec_path *sp) -{ - if (sp) - refcount_inc(&sp->refcnt); - return sp; -} - -void __secpath_destroy(struct sec_path *sp); - -static inline void -secpath_put(struct sec_path *sp) -{ - if (sp && refcount_dec_and_test(&sp->refcnt)) - __secpath_destroy(sp); -} - -struct sec_path *secpath_dup(struct sec_path *src); -int secpath_set(struct sk_buff *skb); +struct sec_path *secpath_set(struct sk_buff *skb); static inline void secpath_reset(struct sk_buff *skb) { #ifdef CONFIG_XFRM - secpath_put(skb->sp); - skb->sp = NULL; + skb_ext_del(skb, SKB_EXT_SEC_PATH); #endif } @@ -1194,7 +1165,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, ndir, skb, family); - return (!net->xfrm.policy_count[dir] && !skb->sp) || + return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) || (skb_dst(skb)->flags & DST_NOPOLICY) || __xfrm_policy_check(sk, ndir, skb, family); } @@ -1905,14 +1876,16 @@ static inline void xfrm_states_delete(struct xfrm_state **states, int n) #ifdef CONFIG_XFRM static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) { - return skb->sp->xvec[skb->sp->len - 1]; + struct sec_path *sp = skb_sec_path(skb); + + return sp->xvec[sp->len - 1]; } #endif static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) { #ifdef CONFIG_XFRM - struct sec_path *sp = skb->sp; + struct sec_path *sp = skb_sec_path(skb); if (!sp || !sp->olen || sp->len != sp->olen) return NULL; diff --git a/net/Kconfig b/net/Kconfig index f235edb593ba..5cb9de1aaf88 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -51,6 +51,9 @@ config NET_INGRESS config NET_EGRESS bool +config SKB_EXTENSIONS + bool + menu "Networking options" source "net/packet/Kconfig" @@ -184,6 +187,7 @@ config BRIDGE_NETFILTER depends on NETFILTER && INET depends on NETFILTER_ADVANCED select NETFILTER_FAMILY_BRIDGE + select SKB_EXTENSIONS default m ---help--- Enabling this option will let arptables resp. iptables see bridged diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index c9383c470a83..d21a23698410 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -132,10 +132,7 @@ static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); static void nf_bridge_info_free(struct sk_buff *skb) { - if (skb->nf_bridge) { - nf_bridge_put(skb->nf_bridge); - skb->nf_bridge = NULL; - } + skb_ext_del(skb, SKB_EXT_BRIDGE_NF); } static inline struct net_device *bridge_parent(const struct net_device *dev) @@ -148,19 +145,7 @@ static inline struct net_device *bridge_parent(const struct net_device *dev) static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; - - if (refcount_read(&nf_bridge->use) > 1) { - struct nf_bridge_info *tmp = nf_bridge_alloc(skb); - - if (tmp) { - memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info)); - refcount_set(&tmp->use, 1); - } - nf_bridge_put(nf_bridge); - nf_bridge = tmp; - } - return nf_bridge; + return skb_ext_add(skb, SKB_EXT_BRIDGE_NF); } unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) @@ -247,7 +232,9 @@ drop: void nf_bridge_update_protocol(struct sk_buff *skb) { - switch (skb->nf_bridge->orig_proto) { + const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + switch (nf_bridge->orig_proto) { case BRNF_PROTO_8021Q: skb->protocol = htons(ETH_P_8021Q); break; @@ -506,7 +493,6 @@ static unsigned int br_nf_pre_routing(void *priv, if (br_validate_ipv4(state->net, skb)) return NF_DROP; - nf_bridge_put(skb->nf_bridge); if (!nf_bridge_alloc(skb)) return NF_DROP; if (!setup_pre_routing(skb)) @@ -569,7 +555,8 @@ static unsigned int br_nf_forward_ip(void *priv, struct net_device *parent; u_int8_t pf; - if (!skb->nf_bridge) + nf_bridge = nf_bridge_info_get(skb); + if (!nf_bridge) return NF_ACCEPT; /* Need exclusive nf_bridge_info since we might have multiple @@ -701,7 +688,9 @@ br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { - if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) + const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + if (nf_bridge->orig_proto == BRNF_PROTO_PPPOE) return PPPOE_SES_HLEN; return 0; } @@ -839,7 +828,9 @@ static unsigned int ip_sabotage_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - if (skb->nf_bridge && !skb->nf_bridge->in_prerouting && + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + if (nf_bridge && !nf_bridge->in_prerouting && !netif_is_l3_master(skb->dev)) { state->okfn(state->net, state->sk, skb); return NF_STOLEN; @@ -877,7 +868,9 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) static int br_nf_dev_xmit(struct sk_buff *skb) { - if (skb->nf_bridge && skb->nf_bridge->bridged_dnat) { + const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + if (nf_bridge && nf_bridge->bridged_dnat) { br_nf_pre_routing_finish_bridge_slow(skb); return 1; } diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 96c072e71ea2..94039f588f1d 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -224,8 +224,8 @@ unsigned int br_nf_pre_routing_ipv6(void *priv, if (br_validate_ipv6(state->net, skb)) return NF_DROP; - nf_bridge_put(skb->nf_bridge); - if (!nf_bridge_alloc(skb)) + nf_bridge = nf_bridge_alloc(skb); + if (!nf_bridge) return NF_DROP; if (!setup_pre_routing(skb)) return NF_DROP; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 40552547c69a..cb0bf4215745 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -79,6 +79,9 @@ struct kmem_cache *skbuff_head_cache __ro_after_init; static struct kmem_cache *skbuff_fclone_cache __ro_after_init; +#ifdef CONFIG_SKB_EXTENSIONS +static struct kmem_cache *skbuff_ext_cache __ro_after_init; +#endif int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; EXPORT_SYMBOL(sysctl_max_skb_frags); @@ -606,7 +609,6 @@ fastpath: void skb_release_head_state(struct sk_buff *skb) { skb_dst_drop(skb); - secpath_reset(skb); if (skb->destructor) { WARN_ON(in_irq()); skb->destructor(skb); @@ -614,9 +616,7 @@ void skb_release_head_state(struct sk_buff *skb) #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_conntrack_put(skb_nfct(skb)); #endif -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - nf_bridge_put(skb->nf_bridge); -#endif + skb_ext_put(skb); } /* Free everything but the sk_buff shell. */ @@ -796,9 +796,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->dev = old->dev; memcpy(new->cb, old->cb, sizeof(old->cb)); skb_dst_copy(new, old); -#ifdef CONFIG_XFRM - new->sp = secpath_get(old->sp); -#endif + __skb_ext_copy(new, old); __nf_copy(new, old, false); /* Note : this field could be in headers_start/headers_end section @@ -3902,6 +3900,46 @@ done: } EXPORT_SYMBOL_GPL(skb_gro_receive); +#ifdef CONFIG_SKB_EXTENSIONS +#define SKB_EXT_ALIGN_VALUE 8 +#define SKB_EXT_CHUNKSIZEOF(x) (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE) + +static const u8 skb_ext_type_len[] = { +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + [SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info), +#endif +#ifdef CONFIG_XFRM + [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path), +#endif +}; + +static __always_inline unsigned int skb_ext_total_length(void) +{ + return SKB_EXT_CHUNKSIZEOF(struct skb_ext) + +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + skb_ext_type_len[SKB_EXT_BRIDGE_NF] + +#endif +#ifdef CONFIG_XFRM + skb_ext_type_len[SKB_EXT_SEC_PATH] + +#endif + 0; +} + +static void skb_extensions_init(void) +{ + BUILD_BUG_ON(SKB_EXT_NUM >= 8); + BUILD_BUG_ON(skb_ext_total_length() > 255); + + skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache", + SKB_EXT_ALIGN_VALUE * skb_ext_total_length(), + 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, + NULL); +} +#else +static void skb_extensions_init(void) {} +#endif + void __init skb_init(void) { skbuff_head_cache = kmem_cache_create_usercopy("skbuff_head_cache", @@ -3916,6 +3954,7 @@ void __init skb_init(void) 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + skb_extensions_init(); } static int @@ -5554,3 +5593,151 @@ void skb_condense(struct sk_buff *skb) */ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); } + +#ifdef CONFIG_SKB_EXTENSIONS +static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id) +{ + return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE); +} + +static struct skb_ext *skb_ext_alloc(void) +{ + struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC); + + if (new) { + memset(new->offset, 0, sizeof(new->offset)); + refcount_set(&new->refcnt, 1); + } + + return new; +} + +static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old, + unsigned int old_active) +{ + struct skb_ext *new; + + if (refcount_read(&old->refcnt) == 1) + return old; + + new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC); + if (!new) + return NULL; + + memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE); + refcount_set(&new->refcnt, 1); + +#ifdef CONFIG_XFRM + if (old_active & (1 << SKB_EXT_SEC_PATH)) { + struct sec_path *sp = skb_ext_get_ptr(old, SKB_EXT_SEC_PATH); + unsigned int i; + + for (i = 0; i < sp->len; i++) + xfrm_state_hold(sp->xvec[i]); + } +#endif + __skb_ext_put(old); + return new; +} + +/** + * skb_ext_add - allocate space for given extension, COW if needed + * @skb: buffer + * @id: extension to allocate space for + * + * Allocates enough space for the given extension. + * If the extension is already present, a pointer to that extension + * is returned. + * + * If the skb was cloned, COW applies and the returned memory can be + * modified without changing the extension space of clones buffers. + * + * Returns pointer to the extension or NULL on allocation failure. + */ +void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id) +{ + struct skb_ext *new, *old = NULL; + unsigned int newlen, newoff; + + if (skb->active_extensions) { + old = skb->extensions; + + new = skb_ext_maybe_cow(old, skb->active_extensions); + if (!new) + return NULL; + + if (__skb_ext_exist(old, id)) { + if (old != new) + skb->extensions = new; + goto set_active; + } + + newoff = old->chunks; + } else { + newoff = SKB_EXT_CHUNKSIZEOF(*new); + + new = skb_ext_alloc(); + if (!new) + return NULL; + } + + newlen = newoff + skb_ext_type_len[id]; + new->chunks = newlen; + new->offset[id] = newoff; + skb->extensions = new; +set_active: + skb->active_extensions |= 1 << id; + return skb_ext_get_ptr(new, id); +} +EXPORT_SYMBOL(skb_ext_add); + +#ifdef CONFIG_XFRM +static void skb_ext_put_sp(struct sec_path *sp) +{ + unsigned int i; + + for (i = 0; i < sp->len; i++) + xfrm_state_put(sp->xvec[i]); +} +#endif + +void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id) +{ + struct skb_ext *ext = skb->extensions; + + skb->active_extensions &= ~(1 << id); + if (skb->active_extensions == 0) { + skb->extensions = NULL; + __skb_ext_put(ext); +#ifdef CONFIG_XFRM + } else if (id == SKB_EXT_SEC_PATH && + refcount_read(&ext->refcnt) == 1) { + struct sec_path *sp = skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH); + + skb_ext_put_sp(sp); + sp->len = 0; +#endif + } +} +EXPORT_SYMBOL(__skb_ext_del); + +void __skb_ext_put(struct skb_ext *ext) +{ + /* If this is last clone, nothing can increment + * it after check passes. Avoids one atomic op. + */ + if (refcount_read(&ext->refcnt) == 1) + goto free_now; + + if (!refcount_dec_and_test(&ext->refcnt)) + return; +free_now: +#ifdef CONFIG_XFRM + if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH)) + skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH)); +#endif + + kmem_cache_free(skbuff_ext_cache, ext); +} +EXPORT_SYMBOL(__skb_ext_put); +#endif /* CONFIG_SKB_EXTENSIONS */ diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 9e1c840596c5..5459f41fc26f 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -125,10 +125,13 @@ static void esp_output_done(struct crypto_async_request *base, int err) void *tmp; struct xfrm_state *x; - if (xo && (xo->flags & XFRM_DEV_RESUME)) - x = skb->sp->xvec[skb->sp->len - 1]; - else + if (xo && (xo->flags & XFRM_DEV_RESUME)) { + struct sec_path *sp = skb_sec_path(skb); + + x = sp->xvec[sp->len - 1]; + } else { x = skb_dst(skb)->xfrm; + } tmp = ESP_SKB_CB(skb)->tmp; esp_ssg_unref(x, tmp); diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 58834a10c0be..8756e0e790d2 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -46,11 +46,12 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, xo = xfrm_offload(skb); if (!xo || !(xo->flags & CRYPTO_DONE)) { - err = secpath_set(skb); - if (err) + struct sec_path *sp = secpath_set(skb); + + if (!sp) goto out; - if (skb->sp->len == XFRM_MAX_DEPTH) + if (sp->len == XFRM_MAX_DEPTH) goto out; x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, @@ -59,8 +60,8 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, if (!x) goto out; - skb->sp->xvec[skb->sp->len++] = x; - skb->sp->olen++; + sp->xvec[sp->len++] = x; + sp->olen++; xo = xfrm_offload(skb); if (!xo) { @@ -114,6 +115,7 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, struct crypto_aead *aead; netdev_features_t esp_features = features; struct xfrm_offload *xo = xfrm_offload(skb); + struct sec_path *sp; if (!xo) return ERR_PTR(-EINVAL); @@ -121,7 +123,8 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) return ERR_PTR(-EINVAL); - x = skb->sp->xvec[skb->sp->len - 1]; + sp = skb_sec_path(skb); + x = sp->xvec[sp->len - 1]; aead = x->data; esph = ip_esp_hdr(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ab6618036afe..c80188875f39 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -533,6 +533,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); + skb_ext_copy(to, from); #if IS_ENABLED(CONFIG_IP_VS) to->ipvs_property = from->ipvs_property; #endif diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 5cd06ba3535d..aa8304c618b8 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -102,6 +102,7 @@ EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put); /* Send RST reply */ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) { + struct net_device *br_indev __maybe_unused; struct sk_buff *nskb; struct iphdr *niph; const struct tcphdr *oth; @@ -147,10 +148,11 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) * build the eth header using the original destination's MAC as the * source, and send the RST packet directly. */ - if (oldskb->nf_bridge) { + br_indev = nf_bridge_get_physindev(oldskb); + if (br_indev) { struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = nf_bridge_get_physindev(oldskb); + nskb->dev = br_indev; niph->tot_len = htons(nskb->len); ip_send_check(niph); if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 63b2b66f9dfa..5afe9f83374d 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -145,10 +145,13 @@ static void esp_output_done(struct crypto_async_request *base, int err) void *tmp; struct xfrm_state *x; - if (xo && (xo->flags & XFRM_DEV_RESUME)) - x = skb->sp->xvec[skb->sp->len - 1]; - else + if (xo && (xo->flags & XFRM_DEV_RESUME)) { + struct sec_path *sp = skb_sec_path(skb); + + x = sp->xvec[sp->len - 1]; + } else { x = skb_dst(skb)->xfrm; + } tmp = ESP_SKB_CB(skb)->tmp; esp_ssg_unref(x, tmp); diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 6177e2171171..d46b4eb645c2 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -68,11 +68,12 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, xo = xfrm_offload(skb); if (!xo || !(xo->flags & CRYPTO_DONE)) { - err = secpath_set(skb); - if (err) + struct sec_path *sp = secpath_set(skb); + + if (!sp) goto out; - if (skb->sp->len == XFRM_MAX_DEPTH) + if (sp->len == XFRM_MAX_DEPTH) goto out; x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, @@ -81,8 +82,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, if (!x) goto out; - skb->sp->xvec[skb->sp->len++] = x; - skb->sp->olen++; + sp->xvec[sp->len++] = x; + sp->olen++; xo = xfrm_offload(skb); if (!xo) { @@ -141,6 +142,7 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, struct crypto_aead *aead; netdev_features_t esp_features = features; struct xfrm_offload *xo = xfrm_offload(skb); + struct sec_path *sp; if (!xo) return ERR_PTR(-EINVAL); @@ -148,7 +150,8 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) return ERR_PTR(-EINVAL); - x = skb->sp->xvec[skb->sp->len - 1]; + sp = skb_sec_path(skb); + x = sp->xvec[sp->len - 1]; aead = x->data; esph = ip_esp_hdr(skb); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9d55ee33b7f9..703a8e801c5c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -581,6 +581,7 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); + skb_ext_copy(to, from); skb_copy_secmark(to, from); } diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 24858402e374..b9c8a763c863 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -131,6 +131,7 @@ EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_put); void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) { + struct net_device *br_indev __maybe_unused; struct sk_buff *nskb; struct tcphdr _otcph; const struct tcphdr *otcph; @@ -197,15 +198,18 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) * build the eth header using the original destination's MAC as the * source, and send the RST packet directly. */ - if (oldskb->nf_bridge) { + br_indev = nf_bridge_get_physindev(oldskb); + if (br_indev) { struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = nf_bridge_get_physindev(oldskb); + nskb->dev = br_indev; nskb->protocol = htons(ETH_P_IPV6); ip6h->payload_len = htons(sizeof(struct tcphdr)); if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), - oeth->h_source, oeth->h_dest, nskb->len) < 0) + oeth->h_source, oeth->h_dest, nskb->len) < 0) { + kfree_skb(nskb); return; + } dev_queue_xmit(nskb); } else #endif diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 9ef490dddcea..a52cb3fc6df5 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -86,14 +86,16 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, { struct net *net = dev_net(skb->dev); struct xfrm_state *x = NULL; + struct sec_path *sp; int i = 0; - if (secpath_set(skb)) { + sp = secpath_set(skb); + if (!sp) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); goto drop; } - if (1 + skb->sp->len == XFRM_MAX_DEPTH) { + if (1 + sp->len == XFRM_MAX_DEPTH) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto drop; } @@ -145,7 +147,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, goto drop; } - skb->sp->xvec[skb->sp->len++] = x; + sp->xvec[sp->len++] = x; spin_lock(&x->lock); diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index a8c5c846aec1..3a0d6880b7c9 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -156,22 +156,20 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix) { + const struct net_device *physoutdev __maybe_unused; + const struct net_device *physindev __maybe_unused; + nf_log_buf_add(m, KERN_SOH "%c%sIN=%s OUT=%s ", '0' + loginfo->u.log.level, prefix, in ? in->name : "", out ? out->name : ""); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (skb->nf_bridge) { - const struct net_device *physindev; - const struct net_device *physoutdev; - - physindev = nf_bridge_get_physindev(skb); - if (physindev && in != physindev) - nf_log_buf_add(m, "PHYSIN=%s ", physindev->name); - physoutdev = nf_bridge_get_physoutdev(skb); - if (physoutdev && out != physoutdev) - nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name); - } + physindev = nf_bridge_get_physindev(skb); + if (physindev && in != physindev) + nf_log_buf_add(m, "PHYSIN=%s ", physindev->name); + physoutdev = nf_bridge_get_physoutdev(skb); + if (physoutdev && out != physoutdev) + nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name); #endif } EXPORT_SYMBOL_GPL(nf_log_dump_packet_common); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index d67a96a25a68..a36a77bae1d6 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -46,6 +46,24 @@ void nf_unregister_queue_handler(struct net *net) } EXPORT_SYMBOL(nf_unregister_queue_handler); +static void nf_queue_entry_release_br_nf_refs(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + if (nf_bridge) { + struct net_device *physdev; + + physdev = nf_bridge_get_physindev(skb); + if (physdev) + dev_put(physdev); + physdev = nf_bridge_get_physoutdev(skb); + if (physdev) + dev_put(physdev); + } +#endif +} + void nf_queue_entry_release_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; @@ -57,20 +75,28 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry) dev_put(state->out); if (state->sk) sock_put(state->sk); + + nf_queue_entry_release_br_nf_refs(entry->skb); +} +EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs); + +static void nf_queue_entry_get_br_nf_refs(struct sk_buff *skb) +{ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (entry->skb->nf_bridge) { + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + + if (nf_bridge) { struct net_device *physdev; - physdev = nf_bridge_get_physindev(entry->skb); + physdev = nf_bridge_get_physindev(skb); if (physdev) - dev_put(physdev); - physdev = nf_bridge_get_physoutdev(entry->skb); + dev_hold(physdev); + physdev = nf_bridge_get_physoutdev(skb); if (physdev) - dev_put(physdev); + dev_hold(physdev); } #endif } -EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs); /* Bump dev refs so they don't vanish while packet is out */ void nf_queue_entry_get_refs(struct nf_queue_entry *entry) @@ -83,18 +109,8 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry) dev_hold(state->out); if (state->sk) sock_hold(state->sk); -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (entry->skb->nf_bridge) { - struct net_device *physdev; - physdev = nf_bridge_get_physindev(entry->skb); - if (physdev) - dev_hold(physdev); - physdev = nf_bridge_get_physoutdev(entry->skb); - if (physdev) - dev_hold(physdev); - } -#endif + nf_queue_entry_get_br_nf_refs(entry->skb); } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 1ce30efe6854..0dcc3592d053 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -727,13 +727,13 @@ nf_queue_entry_dup(struct nf_queue_entry *e) */ static void nf_bridge_adjust_skb_data(struct sk_buff *skb) { - if (skb->nf_bridge) + if (nf_bridge_info_get(skb)) __skb_push(skb, skb->network_header - skb->mac_header); } static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) { - if (skb->nf_bridge) + if (nf_bridge_info_get(skb)) __skb_pull(skb, skb->network_header - skb->mac_header); } #else @@ -904,23 +904,22 @@ nfqnl_set_mode(struct nfqnl_instance *queue, static int dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) { +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + int physinif, physoutif; + + physinif = nf_bridge_get_physinif(entry->skb); + physoutif = nf_bridge_get_physoutif(entry->skb); + + if (physinif == ifindex || physoutif == ifindex) + return 1; +#endif if (entry->state.in) if (entry->state.in->ifindex == ifindex) return 1; if (entry->state.out) if (entry->state.out->ifindex == ifindex) return 1; -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (entry->skb->nf_bridge) { - int physinif, physoutif; - physinif = nf_bridge_get_physinif(entry->skb); - physoutif = nf_bridge_get_physoutif(entry->skb); - - if (physinif == ifindex || physoutif == ifindex) - return 1; - } -#endif return 0; } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 6180626c3f80..6df486c5ebd3 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -229,7 +229,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, } #ifdef CONFIG_XFRM case NFT_META_SECPATH: - nft_reg_store8(dest, !!skb->sp); + nft_reg_store8(dest, secpath_exists(skb)); break; #endif #ifdef CONFIG_NF_TABLES_BRIDGE diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 5322609f7662..b08865ec5ed3 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -161,7 +161,7 @@ static void nft_xfrm_get_eval_in(const struct nft_xfrm *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - const struct sec_path *sp = pkt->skb->sp; + const struct sec_path *sp = skb_sec_path(pkt->skb); const struct xfrm_state *state; if (sp == NULL || sp->len <= priv->spnum) { diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 9d6d67b953ac..4034d70bff39 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -33,7 +33,7 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par) /* Not a bridged IP packet or no info available yet: * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if * the destination device will be a bridge. */ - if (!skb->nf_bridge) { + if (!nf_bridge_info_exists(skb)) { /* Return MATCH if the invert flags of the used options are on */ if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) && !(info->invert & XT_PHYSDEV_OP_BRIDGED)) diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index 13f8ccf946d6..aa84e8121c93 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -56,7 +56,7 @@ match_policy_in(const struct sk_buff *skb, const struct xt_policy_info *info, unsigned short family) { const struct xt_policy_elem *e; - const struct sec_path *sp = skb->sp; + const struct sec_path *sp = skb_sec_path(skb); int strict = info->flags & XT_POLICY_MATCH_STRICT; int i, pos; diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 140270a13d54..5d43aaa17027 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -5,6 +5,7 @@ config XFRM bool depends on NET select GRO_CELLS + select SKB_EXTENSIONS config XFRM_OFFLOAD bool diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 144c137886b1..b8736f56e7f7 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -32,6 +32,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur struct softnet_data *sd; netdev_features_t esp_features = features; struct xfrm_offload *xo = xfrm_offload(skb); + struct sec_path *sp; if (!xo) return skb; @@ -39,7 +40,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur if (!(features & NETIF_F_HW_ESP)) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); - x = skb->sp->xvec[skb->sp->len - 1]; + sp = skb_sec_path(skb); + x = sp->xvec[sp->len - 1]; if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND) return skb; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 684c0bc01e2c..6bc817359b58 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -38,8 +38,6 @@ struct xfrm_trans_cb { #define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0])) -static struct kmem_cache *secpath_cachep __ro_after_init; - static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1]; @@ -111,56 +109,24 @@ static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol, return ret; } -void __secpath_destroy(struct sec_path *sp) +struct sec_path *secpath_set(struct sk_buff *skb) { - int i; - for (i = 0; i < sp->len; i++) - xfrm_state_put(sp->xvec[i]); - kmem_cache_free(secpath_cachep, sp); -} -EXPORT_SYMBOL(__secpath_destroy); + struct sec_path *sp, *tmp = skb_ext_find(skb, SKB_EXT_SEC_PATH); -struct sec_path *secpath_dup(struct sec_path *src) -{ - struct sec_path *sp; - - sp = kmem_cache_alloc(secpath_cachep, GFP_ATOMIC); + sp = skb_ext_add(skb, SKB_EXT_SEC_PATH); if (!sp) return NULL; - sp->len = 0; - sp->olen = 0; + if (tmp) /* reused existing one (was COW'd if needed) */ + return sp; + /* allocated new secpath */ memset(sp->ovec, 0, sizeof(sp->ovec)); + sp->olen = 0; + sp->len = 0; - if (src) { - int i; - - memcpy(sp, src, sizeof(*sp)); - for (i = 0; i < sp->len; i++) - xfrm_state_hold(sp->xvec[i]); - } - refcount_set(&sp->refcnt, 1); return sp; } -EXPORT_SYMBOL(secpath_dup); - -int secpath_set(struct sk_buff *skb) -{ - struct sec_path *sp; - - /* Allocate new secpath or COW existing one. */ - if (!skb->sp || refcount_read(&skb->sp->refcnt) != 1) { - sp = secpath_dup(skb->sp); - if (!sp) - return -ENOMEM; - - if (skb->sp) - secpath_put(skb->sp); - skb->sp = sp; - } - return 0; -} EXPORT_SYMBOL(secpath_set); /* Fetch spi and seq from ipsec header */ @@ -236,6 +202,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) bool xfrm_gro = false; bool crypto_done = false; struct xfrm_offload *xo = xfrm_offload(skb); + struct sec_path *sp; if (encap_type < 0) { x = xfrm_input_state(skb); @@ -312,8 +279,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) break; } - err = secpath_set(skb); - if (err) { + sp = secpath_set(skb); + if (!sp) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); goto drop; } @@ -328,7 +295,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) daddr = (xfrm_address_t *)(skb_network_header(skb) + XFRM_SPI_SKB_CB(skb)->daddroff); do { - if (skb->sp->len == XFRM_MAX_DEPTH) { + sp = skb_sec_path(skb); + + if (sp->len == XFRM_MAX_DEPTH) { secpath_reset(skb); XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto drop; @@ -344,7 +313,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->mark = xfrm_smark_get(skb->mark, x); - skb->sp->xvec[skb->sp->len++] = x; + sp->xvec[sp->len++] = x; lock: spin_lock(&x->lock); @@ -468,8 +437,9 @@ resume: nf_reset(skb); if (decaps) { - if (skb->sp) - skb->sp->olen = 0; + sp = skb_sec_path(skb); + if (sp) + sp->olen = 0; skb_dst_drop(skb); gro_cells_receive(&gro_cells, skb); return 0; @@ -480,8 +450,9 @@ resume: err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async); if (xfrm_gro) { - if (skb->sp) - skb->sp->olen = 0; + sp = skb_sec_path(skb); + if (sp) + sp->olen = 0; skb_dst_drop(skb); gro_cells_receive(&gro_cells, skb); return err; @@ -546,11 +517,6 @@ void __init xfrm_input_init(void) if (err) gro_cells.cells = NULL; - secpath_cachep = kmem_cache_create("secpath_cache", - sizeof(struct sec_path), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL); - for_each_possible_cpu(i) { struct xfrm_trans_tasklet *trans; diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index d679fa0f44b3..6be8c7df15bb 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -251,7 +251,7 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err) struct xfrm_if *xi; bool xnet; - if (err && !skb->sp) + if (err && !secpath_exists(skb)) return 0; x = xfrm_input_state(skb); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 4ae87c5ce2e3..757c4d11983b 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -218,19 +218,16 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) if (xfrm_dev_offload_ok(skb, x)) { struct sec_path *sp; - sp = secpath_dup(skb->sp); + sp = secpath_set(skb); if (!sp) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); kfree_skb(skb); return -ENOMEM; } - if (skb->sp) - secpath_put(skb->sp); - skb->sp = sp; skb->encapsulation = 1; sp->olen++; - sp->xvec[skb->sp->len++] = x; + sp->xvec[sp->len++] = x; xfrm_state_hold(x); if (skb_is_gso(skb)) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index be04091eb7db..d6acba07bdc9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3200,11 +3200,12 @@ EXPORT_SYMBOL(xfrm_lookup_route); static inline int xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) { + struct sec_path *sp = skb_sec_path(skb); struct xfrm_state *x; - if (!skb->sp || idx < 0 || idx >= skb->sp->len) + if (!sp || idx < 0 || idx >= sp->len) return 0; - x = skb->sp->xvec[idx]; + x = sp->xvec[idx]; if (!x->type->reject) return 0; return x->type->reject(x, skb, fl); @@ -3304,6 +3305,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, struct flowi fl; int xerr_idx = -1; const struct xfrm_if_cb *ifcb; + struct sec_path *sp; struct xfrm_if *xi; u32 if_id = 0; @@ -3328,11 +3330,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, nf_nat_decode_session(skb, &fl, family); /* First, check used SA against their selectors. */ - if (skb->sp) { + sp = skb_sec_path(skb); + if (sp) { int i; - for (i = skb->sp->len-1; i >= 0; i--) { - struct xfrm_state *x = skb->sp->xvec[i]; + for (i = sp->len - 1; i >= 0; i--) { + struct xfrm_state *x = sp->xvec[i]; if (!xfrm_selector_match(&x->sel, &fl, family)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); return 0; @@ -3359,7 +3362,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } if (!pol) { - if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { + if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) { xfrm_secpath_reject(xerr_idx, skb, &fl); XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; @@ -3388,7 +3391,6 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, #endif if (pol->action == XFRM_POLICY_ALLOW) { - struct sec_path *sp; static struct sec_path dummy; struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; @@ -3396,7 +3398,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, int ti = 0; int i, k; - if ((sp = skb->sp) == NULL) + sp = skb_sec_path(skb); + if (!sp) sp = &dummy; for (pi = 0; pi < npols; pi++) { diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 91dc3783ed94..bd7d18bdb147 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -230,7 +230,7 @@ static int selinux_xfrm_skb_sid_ingress(struct sk_buff *skb, u32 *sid, int ckall) { u32 sid_session = SECSID_NULL; - struct sec_path *sp = skb->sp; + struct sec_path *sp = skb_sec_path(skb); if (sp) { int i; @@ -408,7 +408,7 @@ int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb, struct common_audit_data *ad) { int i; - struct sec_path *sp = skb->sp; + struct sec_path *sp = skb_sec_path(skb); u32 peer_sid = SECINITSID_UNLABELED; if (sp) {