Merge branch 'linus/master' into rdma.git for-next
rdma.git merge resolution for the 4.19 merge window
Conflicts:
drivers/infiniband/core/rdma_core.c
- Use the rdma code and revise with the new spelling for
atomic_fetch_add_unless
drivers/nvme/host/rdma.c
- Replace max_sge with max_send_sge in new blk code
drivers/nvme/target/rdma.c
- Use the blk code and revise to use NULL for ib_post_recv when
appropriate
- Replace max_sge with max_recv_sge in new blk code
net/rds/ib_send.c
- Use the net code and revise to use NULL for ib_post_recv when
appropriate
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
@@ -837,7 +837,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
|
||||
struct sock *sk = sock->sk;
|
||||
__poll_t mask;
|
||||
|
||||
sock_poll_wait(file, sk_sleep(sk), wait);
|
||||
sock_poll_wait(file, wait);
|
||||
mask = 0;
|
||||
|
||||
/* exceptional events? */
|
||||
|
||||
878
net/core/dev.c
878
net/core/dev.c
File diff suppressed because it is too large
Load Diff
@@ -284,12 +284,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
|
||||
case SIOCSIFTXQLEN:
|
||||
if (ifr->ifr_qlen < 0)
|
||||
return -EINVAL;
|
||||
if (dev->tx_queue_len ^ ifr->ifr_qlen) {
|
||||
err = dev_change_tx_queue_len(dev, ifr->ifr_qlen);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
return dev_change_tx_queue_len(dev, ifr->ifr_qlen);
|
||||
|
||||
case SIOCSIFNAME:
|
||||
ifr->ifr_newname[IFNAMSIZ-1] = '\0';
|
||||
|
||||
1322
net/core/devlink.c
1322
net/core/devlink.c
File diff suppressed because it is too large
Load Diff
@@ -307,6 +307,7 @@ void metadata_dst_free(struct metadata_dst *md_dst)
|
||||
#endif
|
||||
kfree(md_dst);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(metadata_dst_free);
|
||||
|
||||
struct metadata_dst __percpu *
|
||||
metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags)
|
||||
|
||||
@@ -111,6 +111,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
|
||||
[NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload",
|
||||
[NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record",
|
||||
[NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload",
|
||||
[NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload",
|
||||
};
|
||||
|
||||
static const char
|
||||
|
||||
@@ -924,8 +924,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
|
||||
return 0;
|
||||
|
||||
errout:
|
||||
if (nlrule)
|
||||
kfree(nlrule);
|
||||
kfree(nlrule);
|
||||
rules_ops_put(ops);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -1453,30 +1453,6 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
|
||||
{
|
||||
struct bpf_prog *old_prog;
|
||||
int err;
|
||||
|
||||
if (bpf_prog_size(prog->len) > sysctl_optmem_max)
|
||||
return -ENOMEM;
|
||||
|
||||
if (sk_unhashed(sk) && sk->sk_reuseport) {
|
||||
err = reuseport_alloc(sk);
|
||||
if (err)
|
||||
return err;
|
||||
} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
|
||||
/* The socket wasn't bound with SO_REUSEPORT */
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
old_prog = reuseport_attach_prog(sk, prog);
|
||||
if (old_prog)
|
||||
bpf_prog_destroy(old_prog);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static
|
||||
struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
|
||||
{
|
||||
@@ -1550,13 +1526,15 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
|
||||
if (IS_ERR(prog))
|
||||
return PTR_ERR(prog);
|
||||
|
||||
err = __reuseport_attach_prog(prog, sk);
|
||||
if (err < 0) {
|
||||
__bpf_prog_release(prog);
|
||||
return err;
|
||||
}
|
||||
if (bpf_prog_size(prog->len) > sysctl_optmem_max)
|
||||
err = -ENOMEM;
|
||||
else
|
||||
err = reuseport_attach_prog(sk, prog);
|
||||
|
||||
return 0;
|
||||
if (err)
|
||||
__bpf_prog_release(prog);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
|
||||
@@ -1586,19 +1564,58 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
|
||||
|
||||
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
|
||||
{
|
||||
struct bpf_prog *prog = __get_bpf(ufd, sk);
|
||||
struct bpf_prog *prog;
|
||||
int err;
|
||||
|
||||
if (sock_flag(sk, SOCK_FILTER_LOCKED))
|
||||
return -EPERM;
|
||||
|
||||
prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
|
||||
if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL)
|
||||
prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
|
||||
if (IS_ERR(prog))
|
||||
return PTR_ERR(prog);
|
||||
|
||||
err = __reuseport_attach_prog(prog, sk);
|
||||
if (err < 0) {
|
||||
bpf_prog_put(prog);
|
||||
return err;
|
||||
if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
|
||||
/* Like other non BPF_PROG_TYPE_SOCKET_FILTER
|
||||
* bpf prog (e.g. sockmap). It depends on the
|
||||
* limitation imposed by bpf_prog_load().
|
||||
* Hence, sysctl_optmem_max is not checked.
|
||||
*/
|
||||
if ((sk->sk_type != SOCK_STREAM &&
|
||||
sk->sk_type != SOCK_DGRAM) ||
|
||||
(sk->sk_protocol != IPPROTO_UDP &&
|
||||
sk->sk_protocol != IPPROTO_TCP) ||
|
||||
(sk->sk_family != AF_INET &&
|
||||
sk->sk_family != AF_INET6)) {
|
||||
err = -ENOTSUPP;
|
||||
goto err_prog_put;
|
||||
}
|
||||
} else {
|
||||
/* BPF_PROG_TYPE_SOCKET_FILTER */
|
||||
if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
|
||||
err = -ENOMEM;
|
||||
goto err_prog_put;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
err = reuseport_attach_prog(sk, prog);
|
||||
err_prog_put:
|
||||
if (err)
|
||||
bpf_prog_put(prog);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void sk_reuseport_prog_free(struct bpf_prog *prog)
|
||||
{
|
||||
if (!prog)
|
||||
return;
|
||||
|
||||
if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
|
||||
bpf_prog_put(prog);
|
||||
else
|
||||
bpf_prog_destroy(prog);
|
||||
}
|
||||
|
||||
struct bpf_scratchpad {
|
||||
@@ -2082,19 +2099,12 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
struct redirect_info {
|
||||
u32 ifindex;
|
||||
u32 flags;
|
||||
struct bpf_map *map;
|
||||
struct bpf_map *map_to_flush;
|
||||
unsigned long map_owner;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct redirect_info, redirect_info);
|
||||
DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
|
||||
|
||||
BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
|
||||
{
|
||||
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_INGRESS)))
|
||||
return TC_ACT_SHOT;
|
||||
@@ -2107,7 +2117,7 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
|
||||
|
||||
int skb_do_redirect(struct sk_buff *skb)
|
||||
{
|
||||
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
struct net_device *dev;
|
||||
|
||||
dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
|
||||
@@ -3200,7 +3210,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
|
||||
|
||||
void xdp_do_flush_map(void)
|
||||
{
|
||||
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
struct bpf_map *map = ri->map_to_flush;
|
||||
|
||||
ri->map_to_flush = NULL;
|
||||
@@ -3245,7 +3255,7 @@ static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
|
||||
static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
|
||||
struct bpf_prog *xdp_prog)
|
||||
{
|
||||
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
unsigned long map_owner = ri->map_owner;
|
||||
struct bpf_map *map = ri->map;
|
||||
u32 index = ri->ifindex;
|
||||
@@ -3285,7 +3295,7 @@ err:
|
||||
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
||||
struct bpf_prog *xdp_prog)
|
||||
{
|
||||
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
struct net_device *fwd;
|
||||
u32 index = ri->ifindex;
|
||||
int err;
|
||||
@@ -3317,7 +3327,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
|
||||
struct xdp_buff *xdp,
|
||||
struct bpf_prog *xdp_prog)
|
||||
{
|
||||
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
unsigned long map_owner = ri->map_owner;
|
||||
struct bpf_map *map = ri->map;
|
||||
u32 index = ri->ifindex;
|
||||
@@ -3368,7 +3378,7 @@ err:
|
||||
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
|
||||
struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
|
||||
{
|
||||
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
u32 index = ri->ifindex;
|
||||
struct net_device *fwd;
|
||||
int err = 0;
|
||||
@@ -3399,7 +3409,7 @@ EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
|
||||
|
||||
BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
|
||||
{
|
||||
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
|
||||
if (unlikely(flags))
|
||||
return XDP_ABORTED;
|
||||
@@ -3423,7 +3433,7 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
|
||||
BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
|
||||
unsigned long, map_owner)
|
||||
{
|
||||
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
|
||||
if (unlikely(flags))
|
||||
return XDP_ABORTED;
|
||||
@@ -3681,7 +3691,7 @@ BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
|
||||
if (unlikely(size > IP_TUNNEL_OPTS_MAX))
|
||||
return -ENOMEM;
|
||||
|
||||
ip_tunnel_info_opts_set(info, from, size);
|
||||
ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -3768,6 +3778,32 @@ static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
};
|
||||
|
||||
BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
|
||||
ancestor_level)
|
||||
{
|
||||
struct sock *sk = skb_to_full_sk(skb);
|
||||
struct cgroup *ancestor;
|
||||
struct cgroup *cgrp;
|
||||
|
||||
if (!sk || !sk_fullsock(sk))
|
||||
return 0;
|
||||
|
||||
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
||||
ancestor = cgroup_ancestor(cgrp, ancestor_level);
|
||||
if (!ancestor)
|
||||
return 0;
|
||||
|
||||
return ancestor->kn->id.id;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
|
||||
.func = bpf_skb_ancestor_cgroup_id,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
#endif
|
||||
|
||||
static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
|
||||
@@ -3814,6 +3850,30 @@ static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
};
|
||||
|
||||
BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
|
||||
{
|
||||
return sock_gen_cookie(ctx->sk);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
|
||||
.func = bpf_get_socket_cookie_sock_addr,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
};
|
||||
|
||||
BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
|
||||
{
|
||||
return sock_gen_cookie(ctx->sk);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
|
||||
.func = bpf_get_socket_cookie_sock_ops,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
};
|
||||
|
||||
BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
|
||||
{
|
||||
struct sock *sk = sk_to_full_sk(skb->sk);
|
||||
@@ -4544,26 +4604,28 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
|
||||
{
|
||||
struct seg6_bpf_srh_state *srh_state =
|
||||
this_cpu_ptr(&seg6_bpf_srh_states);
|
||||
struct ipv6_sr_hdr *srh = srh_state->srh;
|
||||
void *srh_tlvs, *srh_end, *ptr;
|
||||
struct ipv6_sr_hdr *srh;
|
||||
int srhoff = 0;
|
||||
|
||||
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
|
||||
if (srh == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
|
||||
srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
|
||||
srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
|
||||
|
||||
ptr = skb->data + offset;
|
||||
if (ptr >= srh_tlvs && ptr + len <= srh_end)
|
||||
srh_state->valid = 0;
|
||||
srh_state->valid = false;
|
||||
else if (ptr < (void *)&srh->flags ||
|
||||
ptr + len > (void *)&srh->segments)
|
||||
return -EFAULT;
|
||||
|
||||
if (unlikely(bpf_try_make_writable(skb, offset + len)))
|
||||
return -EFAULT;
|
||||
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
|
||||
return -EINVAL;
|
||||
srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
|
||||
|
||||
memcpy(skb->data + offset, from, len);
|
||||
return 0;
|
||||
@@ -4579,52 +4641,78 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
|
||||
.arg4_type = ARG_CONST_SIZE
|
||||
};
|
||||
|
||||
static void bpf_update_srh_state(struct sk_buff *skb)
|
||||
{
|
||||
struct seg6_bpf_srh_state *srh_state =
|
||||
this_cpu_ptr(&seg6_bpf_srh_states);
|
||||
int srhoff = 0;
|
||||
|
||||
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) {
|
||||
srh_state->srh = NULL;
|
||||
} else {
|
||||
srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
|
||||
srh_state->hdrlen = srh_state->srh->hdrlen << 3;
|
||||
srh_state->valid = true;
|
||||
}
|
||||
}
|
||||
|
||||
BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
|
||||
u32, action, void *, param, u32, param_len)
|
||||
{
|
||||
struct seg6_bpf_srh_state *srh_state =
|
||||
this_cpu_ptr(&seg6_bpf_srh_states);
|
||||
struct ipv6_sr_hdr *srh;
|
||||
int srhoff = 0;
|
||||
int hdroff = 0;
|
||||
int err;
|
||||
|
||||
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
|
||||
return -EINVAL;
|
||||
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
|
||||
|
||||
if (!srh_state->valid) {
|
||||
if (unlikely((srh_state->hdrlen & 7) != 0))
|
||||
return -EBADMSG;
|
||||
|
||||
srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
|
||||
if (unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
|
||||
return -EBADMSG;
|
||||
|
||||
srh_state->valid = 1;
|
||||
}
|
||||
|
||||
switch (action) {
|
||||
case SEG6_LOCAL_ACTION_END_X:
|
||||
if (!seg6_bpf_has_valid_srh(skb))
|
||||
return -EBADMSG;
|
||||
if (param_len != sizeof(struct in6_addr))
|
||||
return -EINVAL;
|
||||
return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
|
||||
case SEG6_LOCAL_ACTION_END_T:
|
||||
if (!seg6_bpf_has_valid_srh(skb))
|
||||
return -EBADMSG;
|
||||
if (param_len != sizeof(int))
|
||||
return -EINVAL;
|
||||
return seg6_lookup_nexthop(skb, NULL, *(int *)param);
|
||||
case SEG6_LOCAL_ACTION_END_DT6:
|
||||
if (!seg6_bpf_has_valid_srh(skb))
|
||||
return -EBADMSG;
|
||||
if (param_len != sizeof(int))
|
||||
return -EINVAL;
|
||||
|
||||
if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0)
|
||||
return -EBADMSG;
|
||||
if (!pskb_pull(skb, hdroff))
|
||||
return -EBADMSG;
|
||||
|
||||
skb_postpull_rcsum(skb, skb_network_header(skb), hdroff);
|
||||
skb_reset_network_header(skb);
|
||||
skb_reset_transport_header(skb);
|
||||
skb->encapsulation = 0;
|
||||
|
||||
bpf_compute_data_pointers(skb);
|
||||
bpf_update_srh_state(skb);
|
||||
return seg6_lookup_nexthop(skb, NULL, *(int *)param);
|
||||
case SEG6_LOCAL_ACTION_END_B6:
|
||||
if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
|
||||
return -EBADMSG;
|
||||
err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
|
||||
param, param_len);
|
||||
if (!err)
|
||||
srh_state->hdrlen =
|
||||
((struct ipv6_sr_hdr *)param)->hdrlen << 3;
|
||||
bpf_update_srh_state(skb);
|
||||
|
||||
return err;
|
||||
case SEG6_LOCAL_ACTION_END_B6_ENCAP:
|
||||
if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
|
||||
return -EBADMSG;
|
||||
err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
|
||||
param, param_len);
|
||||
if (!err)
|
||||
srh_state->hdrlen =
|
||||
((struct ipv6_sr_hdr *)param)->hdrlen << 3;
|
||||
bpf_update_srh_state(skb);
|
||||
|
||||
return err;
|
||||
default:
|
||||
return -EINVAL;
|
||||
@@ -4646,15 +4734,14 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
|
||||
{
|
||||
struct seg6_bpf_srh_state *srh_state =
|
||||
this_cpu_ptr(&seg6_bpf_srh_states);
|
||||
struct ipv6_sr_hdr *srh = srh_state->srh;
|
||||
void *srh_end, *srh_tlvs, *ptr;
|
||||
struct ipv6_sr_hdr *srh;
|
||||
struct ipv6hdr *hdr;
|
||||
int srhoff = 0;
|
||||
int ret;
|
||||
|
||||
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
|
||||
if (unlikely(srh == NULL))
|
||||
return -EINVAL;
|
||||
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
|
||||
|
||||
srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
|
||||
((srh->first_segment + 1) << 4));
|
||||
@@ -4684,8 +4771,11 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
|
||||
hdr = (struct ipv6hdr *)skb->data;
|
||||
hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
|
||||
|
||||
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
|
||||
return -EINVAL;
|
||||
srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
|
||||
srh_state->hdrlen += len;
|
||||
srh_state->valid = 0;
|
||||
srh_state->valid = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4753,6 +4843,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
|
||||
case BPF_FUNC_trace_printk:
|
||||
if (capable(CAP_SYS_ADMIN))
|
||||
return bpf_get_trace_printk_proto();
|
||||
/* else: fall through */
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
@@ -4767,6 +4858,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
*/
|
||||
case BPF_FUNC_get_current_uid_gid:
|
||||
return &bpf_get_current_uid_gid_proto;
|
||||
case BPF_FUNC_get_local_storage:
|
||||
return &bpf_get_local_storage_proto;
|
||||
default:
|
||||
return bpf_base_func_proto(func_id);
|
||||
}
|
||||
@@ -4789,6 +4882,10 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
case BPF_FUNC_get_socket_cookie:
|
||||
return &bpf_get_socket_cookie_sock_addr_proto;
|
||||
case BPF_FUNC_get_local_storage:
|
||||
return &bpf_get_local_storage_proto;
|
||||
default:
|
||||
return bpf_base_func_proto(func_id);
|
||||
}
|
||||
@@ -4811,6 +4908,17 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
}
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
switch (func_id) {
|
||||
case BPF_FUNC_get_local_storage:
|
||||
return &bpf_get_local_storage_proto;
|
||||
default:
|
||||
return sk_filter_func_proto(func_id, prog);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
@@ -4884,6 +4992,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
#ifdef CONFIG_SOCK_CGROUP_DATA
|
||||
case BPF_FUNC_skb_cgroup_id:
|
||||
return &bpf_skb_cgroup_id_proto;
|
||||
case BPF_FUNC_skb_ancestor_cgroup_id:
|
||||
return &bpf_skb_ancestor_cgroup_id_proto;
|
||||
#endif
|
||||
default:
|
||||
return bpf_base_func_proto(func_id);
|
||||
@@ -4931,6 +5041,10 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_sock_map_update_proto;
|
||||
case BPF_FUNC_sock_hash_update:
|
||||
return &bpf_sock_hash_update_proto;
|
||||
case BPF_FUNC_get_socket_cookie:
|
||||
return &bpf_get_socket_cookie_sock_ops_proto;
|
||||
case BPF_FUNC_get_local_storage:
|
||||
return &bpf_get_local_storage_proto;
|
||||
default:
|
||||
return bpf_base_func_proto(func_id);
|
||||
}
|
||||
@@ -4950,6 +5064,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_msg_cork_bytes_proto;
|
||||
case BPF_FUNC_msg_pull_data:
|
||||
return &bpf_msg_pull_data_proto;
|
||||
case BPF_FUNC_get_local_storage:
|
||||
return &bpf_get_local_storage_proto;
|
||||
default:
|
||||
return bpf_base_func_proto(func_id);
|
||||
}
|
||||
@@ -4977,6 +5093,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_sk_redirect_map_proto;
|
||||
case BPF_FUNC_sk_redirect_hash:
|
||||
return &bpf_sk_redirect_hash_proto;
|
||||
case BPF_FUNC_get_local_storage:
|
||||
return &bpf_get_local_storage_proto;
|
||||
default:
|
||||
return bpf_base_func_proto(func_id);
|
||||
}
|
||||
@@ -6781,7 +6899,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
|
||||
};
|
||||
|
||||
const struct bpf_verifier_ops cg_skb_verifier_ops = {
|
||||
.get_func_proto = sk_filter_func_proto,
|
||||
.get_func_proto = cg_skb_func_proto,
|
||||
.is_valid_access = sk_filter_is_valid_access,
|
||||
.convert_ctx_access = bpf_convert_ctx_access,
|
||||
};
|
||||
@@ -6940,3 +7058,271 @@ out:
|
||||
release_sock(sk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_INET
|
||||
struct sk_reuseport_kern {
|
||||
struct sk_buff *skb;
|
||||
struct sock *sk;
|
||||
struct sock *selected_sk;
|
||||
void *data_end;
|
||||
u32 hash;
|
||||
u32 reuseport_id;
|
||||
bool bind_inany;
|
||||
};
|
||||
|
||||
static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
|
||||
struct sock_reuseport *reuse,
|
||||
struct sock *sk, struct sk_buff *skb,
|
||||
u32 hash)
|
||||
{
|
||||
reuse_kern->skb = skb;
|
||||
reuse_kern->sk = sk;
|
||||
reuse_kern->selected_sk = NULL;
|
||||
reuse_kern->data_end = skb->data + skb_headlen(skb);
|
||||
reuse_kern->hash = hash;
|
||||
reuse_kern->reuseport_id = reuse->reuseport_id;
|
||||
reuse_kern->bind_inany = reuse->bind_inany;
|
||||
}
|
||||
|
||||
struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
|
||||
struct bpf_prog *prog, struct sk_buff *skb,
|
||||
u32 hash)
|
||||
{
|
||||
struct sk_reuseport_kern reuse_kern;
|
||||
enum sk_action action;
|
||||
|
||||
bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash);
|
||||
action = BPF_PROG_RUN(prog, &reuse_kern);
|
||||
|
||||
if (action == SK_PASS)
|
||||
return reuse_kern.selected_sk;
|
||||
else
|
||||
return ERR_PTR(-ECONNREFUSED);
|
||||
}
|
||||
|
||||
BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
|
||||
struct bpf_map *, map, void *, key, u32, flags)
|
||||
{
|
||||
struct sock_reuseport *reuse;
|
||||
struct sock *selected_sk;
|
||||
|
||||
selected_sk = map->ops->map_lookup_elem(map, key);
|
||||
if (!selected_sk)
|
||||
return -ENOENT;
|
||||
|
||||
reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
|
||||
if (!reuse)
|
||||
/* selected_sk is unhashed (e.g. by close()) after the
|
||||
* above map_lookup_elem(). Treat selected_sk has already
|
||||
* been removed from the map.
|
||||
*/
|
||||
return -ENOENT;
|
||||
|
||||
if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
|
||||
struct sock *sk;
|
||||
|
||||
if (unlikely(!reuse_kern->reuseport_id))
|
||||
/* There is a small race between adding the
|
||||
* sk to the map and setting the
|
||||
* reuse_kern->reuseport_id.
|
||||
* Treat it as the sk has not been added to
|
||||
* the bpf map yet.
|
||||
*/
|
||||
return -ENOENT;
|
||||
|
||||
sk = reuse_kern->sk;
|
||||
if (sk->sk_protocol != selected_sk->sk_protocol)
|
||||
return -EPROTOTYPE;
|
||||
else if (sk->sk_family != selected_sk->sk_family)
|
||||
return -EAFNOSUPPORT;
|
||||
|
||||
/* Catch all. Likely bound to a different sockaddr. */
|
||||
return -EBADFD;
|
||||
}
|
||||
|
||||
reuse_kern->selected_sk = selected_sk;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto sk_select_reuseport_proto = {
|
||||
.func = sk_select_reuseport,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_CONST_MAP_PTR,
|
||||
.arg3_type = ARG_PTR_TO_MAP_KEY,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_4(sk_reuseport_load_bytes,
|
||||
const struct sk_reuseport_kern *, reuse_kern, u32, offset,
|
||||
void *, to, u32, len)
|
||||
{
|
||||
return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto sk_reuseport_load_bytes_proto = {
|
||||
.func = sk_reuseport_load_bytes,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg4_type = ARG_CONST_SIZE,
|
||||
};
|
||||
|
||||
BPF_CALL_5(sk_reuseport_load_bytes_relative,
|
||||
const struct sk_reuseport_kern *, reuse_kern, u32, offset,
|
||||
void *, to, u32, len, u32, start_header)
|
||||
{
|
||||
return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to,
|
||||
len, start_header);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = {
|
||||
.func = sk_reuseport_load_bytes_relative,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg4_type = ARG_CONST_SIZE,
|
||||
.arg5_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
sk_reuseport_func_proto(enum bpf_func_id func_id,
|
||||
const struct bpf_prog *prog)
|
||||
{
|
||||
switch (func_id) {
|
||||
case BPF_FUNC_sk_select_reuseport:
|
||||
return &sk_select_reuseport_proto;
|
||||
case BPF_FUNC_skb_load_bytes:
|
||||
return &sk_reuseport_load_bytes_proto;
|
||||
case BPF_FUNC_skb_load_bytes_relative:
|
||||
return &sk_reuseport_load_bytes_relative_proto;
|
||||
default:
|
||||
return bpf_base_func_proto(func_id);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
sk_reuseport_is_valid_access(int off, int size,
|
||||
enum bpf_access_type type,
|
||||
const struct bpf_prog *prog,
|
||||
struct bpf_insn_access_aux *info)
|
||||
{
|
||||
const u32 size_default = sizeof(__u32);
|
||||
|
||||
if (off < 0 || off >= sizeof(struct sk_reuseport_md) ||
|
||||
off % size || type != BPF_READ)
|
||||
return false;
|
||||
|
||||
switch (off) {
|
||||
case offsetof(struct sk_reuseport_md, data):
|
||||
info->reg_type = PTR_TO_PACKET;
|
||||
return size == sizeof(__u64);
|
||||
|
||||
case offsetof(struct sk_reuseport_md, data_end):
|
||||
info->reg_type = PTR_TO_PACKET_END;
|
||||
return size == sizeof(__u64);
|
||||
|
||||
case offsetof(struct sk_reuseport_md, hash):
|
||||
return size == size_default;
|
||||
|
||||
/* Fields that allow narrowing */
|
||||
case offsetof(struct sk_reuseport_md, eth_protocol):
|
||||
if (size < FIELD_SIZEOF(struct sk_buff, protocol))
|
||||
return false;
|
||||
/* fall through */
|
||||
case offsetof(struct sk_reuseport_md, ip_protocol):
|
||||
case offsetof(struct sk_reuseport_md, bind_inany):
|
||||
case offsetof(struct sk_reuseport_md, len):
|
||||
bpf_ctx_record_field_size(info, size_default);
|
||||
return bpf_ctx_narrow_access_ok(off, size, size_default);
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#define SK_REUSEPORT_LOAD_FIELD(F) ({ \
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
|
||||
si->dst_reg, si->src_reg, \
|
||||
bpf_target_off(struct sk_reuseport_kern, F, \
|
||||
FIELD_SIZEOF(struct sk_reuseport_kern, F), \
|
||||
target_size)); \
|
||||
})
|
||||
|
||||
#define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD) \
|
||||
SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
|
||||
struct sk_buff, \
|
||||
skb, \
|
||||
SKB_FIELD)
|
||||
|
||||
#define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \
|
||||
SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern, \
|
||||
struct sock, \
|
||||
sk, \
|
||||
SK_FIELD, BPF_SIZE, EXTRA_OFF)
|
||||
|
||||
static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
|
||||
const struct bpf_insn *si,
|
||||
struct bpf_insn *insn_buf,
|
||||
struct bpf_prog *prog,
|
||||
u32 *target_size)
|
||||
{
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
|
||||
switch (si->off) {
|
||||
case offsetof(struct sk_reuseport_md, data):
|
||||
SK_REUSEPORT_LOAD_SKB_FIELD(data);
|
||||
break;
|
||||
|
||||
case offsetof(struct sk_reuseport_md, len):
|
||||
SK_REUSEPORT_LOAD_SKB_FIELD(len);
|
||||
break;
|
||||
|
||||
case offsetof(struct sk_reuseport_md, eth_protocol):
|
||||
SK_REUSEPORT_LOAD_SKB_FIELD(protocol);
|
||||
break;
|
||||
|
||||
case offsetof(struct sk_reuseport_md, ip_protocol):
|
||||
BUILD_BUG_ON(hweight_long(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
|
||||
SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
|
||||
BPF_W, 0);
|
||||
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
|
||||
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
|
||||
SK_FL_PROTO_SHIFT);
|
||||
/* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian
|
||||
* aware. No further narrowing or masking is needed.
|
||||
*/
|
||||
*target_size = 1;
|
||||
break;
|
||||
|
||||
case offsetof(struct sk_reuseport_md, data_end):
|
||||
SK_REUSEPORT_LOAD_FIELD(data_end);
|
||||
break;
|
||||
|
||||
case offsetof(struct sk_reuseport_md, hash):
|
||||
SK_REUSEPORT_LOAD_FIELD(hash);
|
||||
break;
|
||||
|
||||
case offsetof(struct sk_reuseport_md, bind_inany):
|
||||
SK_REUSEPORT_LOAD_FIELD(bind_inany);
|
||||
break;
|
||||
}
|
||||
|
||||
return insn - insn_buf;
|
||||
}
|
||||
|
||||
const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
|
||||
.get_func_proto = sk_reuseport_func_proto,
|
||||
.is_valid_access = sk_reuseport_is_valid_access,
|
||||
.convert_ctx_access = sk_reuseport_convert_ctx_access,
|
||||
};
|
||||
|
||||
const struct bpf_prog_ops sk_reuseport_prog_ops = {
|
||||
};
|
||||
#endif /* CONFIG_INET */
|
||||
|
||||
@@ -152,7 +152,11 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
|
||||
!dissector_uses_key(flow_dissector,
|
||||
FLOW_DISSECTOR_KEY_ENC_CONTROL) &&
|
||||
!dissector_uses_key(flow_dissector,
|
||||
FLOW_DISSECTOR_KEY_ENC_PORTS))
|
||||
FLOW_DISSECTOR_KEY_ENC_PORTS) &&
|
||||
!dissector_uses_key(flow_dissector,
|
||||
FLOW_DISSECTOR_KEY_ENC_IP) &&
|
||||
!dissector_uses_key(flow_dissector,
|
||||
FLOW_DISSECTOR_KEY_ENC_OPTS))
|
||||
return;
|
||||
|
||||
info = skb_tunnel_info(skb);
|
||||
@@ -212,6 +216,31 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
|
||||
tp->src = key->tp_src;
|
||||
tp->dst = key->tp_dst;
|
||||
}
|
||||
|
||||
if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
|
||||
struct flow_dissector_key_ip *ip;
|
||||
|
||||
ip = skb_flow_dissector_target(flow_dissector,
|
||||
FLOW_DISSECTOR_KEY_ENC_IP,
|
||||
target_container);
|
||||
ip->tos = key->tos;
|
||||
ip->ttl = key->ttl;
|
||||
}
|
||||
|
||||
if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) {
|
||||
struct flow_dissector_key_enc_opts *enc_opt;
|
||||
|
||||
enc_opt = skb_flow_dissector_target(flow_dissector,
|
||||
FLOW_DISSECTOR_KEY_ENC_OPTS,
|
||||
target_container);
|
||||
|
||||
if (info->options_len) {
|
||||
enc_opt->len = info->options_len;
|
||||
ip_tunnel_info_opts_get(enc_opt->data, info);
|
||||
enc_opt->dst_opt_type = info->key.tun_flags &
|
||||
TUNNEL_OPTIONS_PRESENT;
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
|
||||
|
||||
@@ -589,7 +618,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
|
||||
struct flow_dissector_key_tags *key_tags;
|
||||
struct flow_dissector_key_vlan *key_vlan;
|
||||
enum flow_dissect_ret fdret;
|
||||
bool skip_vlan = false;
|
||||
enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
|
||||
int num_hdrs = 0;
|
||||
u8 ip_proto = 0;
|
||||
bool ret;
|
||||
@@ -748,14 +777,14 @@ proto_again:
|
||||
}
|
||||
case htons(ETH_P_8021AD):
|
||||
case htons(ETH_P_8021Q): {
|
||||
const struct vlan_hdr *vlan;
|
||||
const struct vlan_hdr *vlan = NULL;
|
||||
struct vlan_hdr _vlan;
|
||||
bool vlan_tag_present = skb && skb_vlan_tag_present(skb);
|
||||
__be16 saved_vlan_tpid = proto;
|
||||
|
||||
if (vlan_tag_present)
|
||||
if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX &&
|
||||
skb && skb_vlan_tag_present(skb)) {
|
||||
proto = skb->protocol;
|
||||
|
||||
if (!vlan_tag_present || eth_type_vlan(skb->protocol)) {
|
||||
} else {
|
||||
vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
|
||||
data, hlen, &_vlan);
|
||||
if (!vlan) {
|
||||
@@ -765,20 +794,23 @@ proto_again:
|
||||
|
||||
proto = vlan->h_vlan_encapsulated_proto;
|
||||
nhoff += sizeof(*vlan);
|
||||
if (skip_vlan) {
|
||||
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
skip_vlan = true;
|
||||
if (dissector_uses_key(flow_dissector,
|
||||
FLOW_DISSECTOR_KEY_VLAN)) {
|
||||
if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX) {
|
||||
dissector_vlan = FLOW_DISSECTOR_KEY_VLAN;
|
||||
} else if (dissector_vlan == FLOW_DISSECTOR_KEY_VLAN) {
|
||||
dissector_vlan = FLOW_DISSECTOR_KEY_CVLAN;
|
||||
} else {
|
||||
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
|
||||
break;
|
||||
}
|
||||
|
||||
if (dissector_uses_key(flow_dissector, dissector_vlan)) {
|
||||
key_vlan = skb_flow_dissector_target(flow_dissector,
|
||||
FLOW_DISSECTOR_KEY_VLAN,
|
||||
dissector_vlan,
|
||||
target_container);
|
||||
|
||||
if (vlan_tag_present) {
|
||||
if (!vlan) {
|
||||
key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
|
||||
key_vlan->vlan_priority =
|
||||
(skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
|
||||
@@ -789,6 +821,7 @@ proto_again:
|
||||
(ntohs(vlan->h_vlan_TCI) &
|
||||
VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
|
||||
}
|
||||
key_vlan->vlan_tpid = saved_vlan_tpid;
|
||||
}
|
||||
|
||||
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
|
||||
|
||||
@@ -112,7 +112,7 @@ static void est_timer(struct timer_list *t)
|
||||
* @bstats: basic statistics
|
||||
* @cpu_bstats: bstats per cpu
|
||||
* @rate_est: rate estimator statistics
|
||||
* @stats_lock: statistics lock
|
||||
* @lock: lock for statistics and control path
|
||||
* @running: qdisc running seqcount
|
||||
* @opt: rate estimator configuration TLV
|
||||
*
|
||||
@@ -128,7 +128,7 @@ static void est_timer(struct timer_list *t)
|
||||
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
|
||||
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
|
||||
struct net_rate_estimator __rcu **rate_est,
|
||||
spinlock_t *stats_lock,
|
||||
spinlock_t *lock,
|
||||
seqcount_t *running,
|
||||
struct nlattr *opt)
|
||||
{
|
||||
@@ -154,19 +154,22 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
|
||||
seqcount_init(&est->seq);
|
||||
intvl_log = parm->interval + 2;
|
||||
est->bstats = bstats;
|
||||
est->stats_lock = stats_lock;
|
||||
est->stats_lock = lock;
|
||||
est->running = running;
|
||||
est->ewma_log = parm->ewma_log;
|
||||
est->intvl_log = intvl_log;
|
||||
est->cpu_bstats = cpu_bstats;
|
||||
|
||||
if (stats_lock)
|
||||
if (lock)
|
||||
local_bh_disable();
|
||||
est_fetch_counters(est, &b);
|
||||
if (stats_lock)
|
||||
if (lock)
|
||||
local_bh_enable();
|
||||
est->last_bytes = b.bytes;
|
||||
est->last_packets = b.packets;
|
||||
|
||||
if (lock)
|
||||
spin_lock_bh(lock);
|
||||
old = rcu_dereference_protected(*rate_est, 1);
|
||||
if (old) {
|
||||
del_timer_sync(&old->timer);
|
||||
@@ -179,6 +182,8 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
|
||||
mod_timer(&est->timer, est->next_jiffies);
|
||||
|
||||
rcu_assign_pointer(*rate_est, est);
|
||||
if (lock)
|
||||
spin_unlock_bh(lock);
|
||||
if (old)
|
||||
kfree_rcu(old, rcu);
|
||||
return 0;
|
||||
@@ -209,7 +214,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
|
||||
* @bstats: basic statistics
|
||||
* @cpu_bstats: bstats per cpu
|
||||
* @rate_est: rate estimator statistics
|
||||
* @stats_lock: statistics lock
|
||||
* @lock: lock for statistics and control path
|
||||
* @running: qdisc running seqcount (might be NULL)
|
||||
* @opt: rate estimator configuration TLV
|
||||
*
|
||||
@@ -221,11 +226,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
|
||||
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
|
||||
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
|
||||
struct net_rate_estimator __rcu **rate_est,
|
||||
spinlock_t *stats_lock,
|
||||
spinlock_t *lock,
|
||||
seqcount_t *running, struct nlattr *opt)
|
||||
{
|
||||
return gen_new_estimator(bstats, cpu_bstats, rate_est,
|
||||
stats_lock, running, opt);
|
||||
lock, running, opt);
|
||||
}
|
||||
EXPORT_SYMBOL(gen_replace_estimator);
|
||||
|
||||
|
||||
@@ -50,10 +50,8 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
|
||||
* mixing with BH RCU lock doesn't work.
|
||||
*/
|
||||
preempt_disable();
|
||||
rcu_read_lock();
|
||||
bpf_compute_data_pointers(skb);
|
||||
ret = bpf_prog_run_save_cb(lwt->prog, skb);
|
||||
rcu_read_unlock();
|
||||
|
||||
switch (ret) {
|
||||
case BPF_OK:
|
||||
|
||||
@@ -1148,7 +1148,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
|
||||
neigh->nud_state = new;
|
||||
err = 0;
|
||||
notify = old & NUD_VALID;
|
||||
if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
|
||||
if (((old & (NUD_INCOMPLETE | NUD_PROBE)) ||
|
||||
(flags & NEIGH_UPDATE_F_ADMIN)) &&
|
||||
(new & NUD_FAILED)) {
|
||||
neigh_invalidate(neigh);
|
||||
notify = 1;
|
||||
@@ -3273,4 +3274,3 @@ static int __init neigh_init(void)
|
||||
}
|
||||
|
||||
subsys_initcall(neigh_init);
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/of_net.h>
|
||||
#include <linux/cpu.h>
|
||||
|
||||
#include "net-sysfs.h"
|
||||
|
||||
@@ -905,11 +906,20 @@ static const void *rx_queue_namespace(struct kobject *kobj)
|
||||
return ns;
|
||||
}
|
||||
|
||||
static void rx_queue_get_ownership(struct kobject *kobj,
|
||||
kuid_t *uid, kgid_t *gid)
|
||||
{
|
||||
const struct net *net = rx_queue_namespace(kobj);
|
||||
|
||||
net_ns_get_ownership(net, uid, gid);
|
||||
}
|
||||
|
||||
static struct kobj_type rx_queue_ktype __ro_after_init = {
|
||||
.sysfs_ops = &rx_queue_sysfs_ops,
|
||||
.release = rx_queue_release,
|
||||
.default_attrs = rx_queue_default_attrs,
|
||||
.namespace = rx_queue_namespace
|
||||
.namespace = rx_queue_namespace,
|
||||
.get_ownership = rx_queue_get_ownership,
|
||||
};
|
||||
|
||||
static int rx_queue_add_kobject(struct net_device *dev, int index)
|
||||
@@ -1047,13 +1057,30 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
|
||||
char *buf)
|
||||
{
|
||||
struct net_device *dev = queue->dev;
|
||||
int index = get_netdev_queue_index(queue);
|
||||
int tc = netdev_txq_to_tc(dev, index);
|
||||
int index;
|
||||
int tc;
|
||||
|
||||
if (!netif_is_multiqueue(dev))
|
||||
return -ENOENT;
|
||||
|
||||
index = get_netdev_queue_index(queue);
|
||||
|
||||
/* If queue belongs to subordinate dev use its TC mapping */
|
||||
dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
|
||||
|
||||
tc = netdev_txq_to_tc(dev, index);
|
||||
if (tc < 0)
|
||||
return -EINVAL;
|
||||
|
||||
return sprintf(buf, "%u\n", tc);
|
||||
/* We can report the traffic class one of two ways:
|
||||
* Subordinate device traffic classes are reported with the traffic
|
||||
* class first, and then the subordinate class so for example TC0 on
|
||||
* subordinate device 2 will be reported as "0-2". If the queue
|
||||
* belongs to the root device it will be reported with just the
|
||||
* traffic class, so just "0" for TC 0 for example.
|
||||
*/
|
||||
return dev->num_tc < 0 ? sprintf(buf, "%u%d\n", tc, dev->num_tc) :
|
||||
sprintf(buf, "%u\n", tc);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_XPS
|
||||
@@ -1070,6 +1097,9 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue,
|
||||
int err, index = get_netdev_queue_index(queue);
|
||||
u32 rate = 0;
|
||||
|
||||
if (!capable(CAP_NET_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
err = kstrtou32(buf, 10, &rate);
|
||||
if (err < 0)
|
||||
return err;
|
||||
@@ -1214,10 +1244,20 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
|
||||
cpumask_var_t mask;
|
||||
unsigned long index;
|
||||
|
||||
if (!netif_is_multiqueue(dev))
|
||||
return -ENOENT;
|
||||
|
||||
index = get_netdev_queue_index(queue);
|
||||
|
||||
if (dev->num_tc) {
|
||||
/* Do not allow XPS on subordinate device directly */
|
||||
num_tc = dev->num_tc;
|
||||
if (num_tc < 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* If queue belongs to subordinate dev use its map */
|
||||
dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
|
||||
|
||||
tc = netdev_txq_to_tc(dev, index);
|
||||
if (tc < 0)
|
||||
return -EINVAL;
|
||||
@@ -1227,13 +1267,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
|
||||
return -ENOMEM;
|
||||
|
||||
rcu_read_lock();
|
||||
dev_maps = rcu_dereference(dev->xps_maps);
|
||||
dev_maps = rcu_dereference(dev->xps_cpus_map);
|
||||
if (dev_maps) {
|
||||
for_each_possible_cpu(cpu) {
|
||||
int i, tci = cpu * num_tc + tc;
|
||||
struct xps_map *map;
|
||||
|
||||
map = rcu_dereference(dev_maps->cpu_map[tci]);
|
||||
map = rcu_dereference(dev_maps->attr_map[tci]);
|
||||
if (!map)
|
||||
continue;
|
||||
|
||||
@@ -1260,6 +1300,9 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
|
||||
cpumask_var_t mask;
|
||||
int err;
|
||||
|
||||
if (!netif_is_multiqueue(dev))
|
||||
return -ENOENT;
|
||||
|
||||
if (!capable(CAP_NET_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
@@ -1283,6 +1326,91 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
|
||||
|
||||
static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
|
||||
= __ATTR_RW(xps_cpus);
|
||||
|
||||
static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
|
||||
{
|
||||
struct net_device *dev = queue->dev;
|
||||
struct xps_dev_maps *dev_maps;
|
||||
unsigned long *mask, index;
|
||||
int j, len, num_tc = 1, tc = 0;
|
||||
|
||||
index = get_netdev_queue_index(queue);
|
||||
|
||||
if (dev->num_tc) {
|
||||
num_tc = dev->num_tc;
|
||||
tc = netdev_txq_to_tc(dev, index);
|
||||
if (tc < 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
|
||||
GFP_KERNEL);
|
||||
if (!mask)
|
||||
return -ENOMEM;
|
||||
|
||||
rcu_read_lock();
|
||||
dev_maps = rcu_dereference(dev->xps_rxqs_map);
|
||||
if (!dev_maps)
|
||||
goto out_no_maps;
|
||||
|
||||
for (j = -1; j = netif_attrmask_next(j, NULL, dev->num_rx_queues),
|
||||
j < dev->num_rx_queues;) {
|
||||
int i, tci = j * num_tc + tc;
|
||||
struct xps_map *map;
|
||||
|
||||
map = rcu_dereference(dev_maps->attr_map[tci]);
|
||||
if (!map)
|
||||
continue;
|
||||
|
||||
for (i = map->len; i--;) {
|
||||
if (map->queues[i] == index) {
|
||||
set_bit(j, mask);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
out_no_maps:
|
||||
rcu_read_unlock();
|
||||
|
||||
len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues);
|
||||
kfree(mask);
|
||||
|
||||
return len < PAGE_SIZE ? len : -EINVAL;
|
||||
}
|
||||
|
||||
static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
|
||||
size_t len)
|
||||
{
|
||||
struct net_device *dev = queue->dev;
|
||||
struct net *net = dev_net(dev);
|
||||
unsigned long *mask, index;
|
||||
int err;
|
||||
|
||||
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
|
||||
GFP_KERNEL);
|
||||
if (!mask)
|
||||
return -ENOMEM;
|
||||
|
||||
index = get_netdev_queue_index(queue);
|
||||
|
||||
err = bitmap_parse(buf, len, mask, dev->num_rx_queues);
|
||||
if (err) {
|
||||
kfree(mask);
|
||||
return err;
|
||||
}
|
||||
|
||||
cpus_read_lock();
|
||||
err = __netif_set_xps_queue(dev, mask, index, true);
|
||||
cpus_read_unlock();
|
||||
|
||||
kfree(mask);
|
||||
return err ? : len;
|
||||
}
|
||||
|
||||
static struct netdev_queue_attribute xps_rxqs_attribute __ro_after_init
|
||||
= __ATTR_RW(xps_rxqs);
|
||||
#endif /* CONFIG_XPS */
|
||||
|
||||
static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
|
||||
@@ -1290,6 +1418,7 @@ static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
|
||||
&queue_traffic_class.attr,
|
||||
#ifdef CONFIG_XPS
|
||||
&xps_cpus_attribute.attr,
|
||||
&xps_rxqs_attribute.attr,
|
||||
&queue_tx_maxrate.attr,
|
||||
#endif
|
||||
NULL
|
||||
@@ -1315,11 +1444,20 @@ static const void *netdev_queue_namespace(struct kobject *kobj)
|
||||
return ns;
|
||||
}
|
||||
|
||||
static void netdev_queue_get_ownership(struct kobject *kobj,
|
||||
kuid_t *uid, kgid_t *gid)
|
||||
{
|
||||
const struct net *net = netdev_queue_namespace(kobj);
|
||||
|
||||
net_ns_get_ownership(net, uid, gid);
|
||||
}
|
||||
|
||||
static struct kobj_type netdev_queue_ktype __ro_after_init = {
|
||||
.sysfs_ops = &netdev_queue_sysfs_ops,
|
||||
.release = netdev_queue_release,
|
||||
.default_attrs = netdev_queue_default_attrs,
|
||||
.namespace = netdev_queue_namespace,
|
||||
.get_ownership = netdev_queue_get_ownership,
|
||||
};
|
||||
|
||||
static int netdev_queue_add_kobject(struct net_device *dev, int index)
|
||||
@@ -1509,6 +1647,14 @@ static const void *net_namespace(struct device *d)
|
||||
return dev_net(dev);
|
||||
}
|
||||
|
||||
static void net_get_ownership(struct device *d, kuid_t *uid, kgid_t *gid)
|
||||
{
|
||||
struct net_device *dev = to_net_dev(d);
|
||||
const struct net *net = dev_net(dev);
|
||||
|
||||
net_ns_get_ownership(net, uid, gid);
|
||||
}
|
||||
|
||||
static struct class net_class __ro_after_init = {
|
||||
.name = "net",
|
||||
.dev_release = netdev_release,
|
||||
@@ -1516,6 +1662,7 @@ static struct class net_class __ro_after_init = {
|
||||
.dev_uevent = netdev_uevent,
|
||||
.ns_type = &net_ns_type_operations,
|
||||
.namespace = net_namespace,
|
||||
.get_ownership = net_get_ownership,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_OF_NET
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <linux/user_namespace.h>
|
||||
#include <linux/net_namespace.h>
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/uidgid.h>
|
||||
|
||||
#include <net/sock.h>
|
||||
#include <net/netlink.h>
|
||||
@@ -448,6 +449,33 @@ dec_ucounts:
|
||||
return net;
|
||||
}
|
||||
|
||||
/**
|
||||
* net_ns_get_ownership - get sysfs ownership data for @net
|
||||
* @net: network namespace in question (can be NULL)
|
||||
* @uid: kernel user ID for sysfs objects
|
||||
* @gid: kernel group ID for sysfs objects
|
||||
*
|
||||
* Returns the uid/gid pair of root in the user namespace associated with the
|
||||
* given network namespace.
|
||||
*/
|
||||
void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
|
||||
{
|
||||
if (net) {
|
||||
kuid_t ns_root_uid = make_kuid(net->user_ns, 0);
|
||||
kgid_t ns_root_gid = make_kgid(net->user_ns, 0);
|
||||
|
||||
if (uid_valid(ns_root_uid))
|
||||
*uid = ns_root_uid;
|
||||
|
||||
if (gid_valid(ns_root_gid))
|
||||
*gid = ns_root_gid;
|
||||
} else {
|
||||
*uid = GLOBAL_ROOT_UID;
|
||||
*gid = GLOBAL_ROOT_GID;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(net_ns_get_ownership);
|
||||
|
||||
static void unhash_nsid(struct net *net, struct net *last)
|
||||
{
|
||||
struct net *tmp;
|
||||
|
||||
@@ -1265,7 +1265,7 @@ static ssize_t pktgen_if_write(struct file *file,
|
||||
buf[len] = 0;
|
||||
if (strcmp(buf, pkt_dev->dst_min) != 0) {
|
||||
memset(pkt_dev->dst_min, 0, sizeof(pkt_dev->dst_min));
|
||||
strncpy(pkt_dev->dst_min, buf, len);
|
||||
strcpy(pkt_dev->dst_min, buf);
|
||||
pkt_dev->daddr_min = in_aton(pkt_dev->dst_min);
|
||||
pkt_dev->cur_daddr = pkt_dev->daddr_min;
|
||||
}
|
||||
@@ -1280,14 +1280,12 @@ static ssize_t pktgen_if_write(struct file *file,
|
||||
if (len < 0)
|
||||
return len;
|
||||
|
||||
|
||||
if (copy_from_user(buf, &user_buffer[i], len))
|
||||
return -EFAULT;
|
||||
|
||||
buf[len] = 0;
|
||||
if (strcmp(buf, pkt_dev->dst_max) != 0) {
|
||||
memset(pkt_dev->dst_max, 0, sizeof(pkt_dev->dst_max));
|
||||
strncpy(pkt_dev->dst_max, buf, len);
|
||||
strcpy(pkt_dev->dst_max, buf);
|
||||
pkt_dev->daddr_max = in_aton(pkt_dev->dst_max);
|
||||
pkt_dev->cur_daddr = pkt_dev->daddr_max;
|
||||
}
|
||||
@@ -1396,7 +1394,7 @@ static ssize_t pktgen_if_write(struct file *file,
|
||||
buf[len] = 0;
|
||||
if (strcmp(buf, pkt_dev->src_min) != 0) {
|
||||
memset(pkt_dev->src_min, 0, sizeof(pkt_dev->src_min));
|
||||
strncpy(pkt_dev->src_min, buf, len);
|
||||
strcpy(pkt_dev->src_min, buf);
|
||||
pkt_dev->saddr_min = in_aton(pkt_dev->src_min);
|
||||
pkt_dev->cur_saddr = pkt_dev->saddr_min;
|
||||
}
|
||||
@@ -1416,7 +1414,7 @@ static ssize_t pktgen_if_write(struct file *file,
|
||||
buf[len] = 0;
|
||||
if (strcmp(buf, pkt_dev->src_max) != 0) {
|
||||
memset(pkt_dev->src_max, 0, sizeof(pkt_dev->src_max));
|
||||
strncpy(pkt_dev->src_max, buf, len);
|
||||
strcpy(pkt_dev->src_max, buf);
|
||||
pkt_dev->saddr_max = in_aton(pkt_dev->src_max);
|
||||
pkt_dev->cur_saddr = pkt_dev->saddr_max;
|
||||
}
|
||||
@@ -2255,7 +2253,7 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
|
||||
x = xfrm_state_lookup_byspi(pn->net, htonl(pkt_dev->spi), AF_INET);
|
||||
} else {
|
||||
/* slow path: we dont already have xfrm_state */
|
||||
x = xfrm_stateonly_find(pn->net, DUMMY_MARK,
|
||||
x = xfrm_stateonly_find(pn->net, DUMMY_MARK, 0,
|
||||
(xfrm_address_t *)&pkt_dev->cur_daddr,
|
||||
(xfrm_address_t *)&pkt_dev->cur_saddr,
|
||||
AF_INET,
|
||||
|
||||
@@ -964,7 +964,8 @@ static size_t rtnl_xdp_size(void)
|
||||
{
|
||||
size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */
|
||||
nla_total_size(1) + /* XDP_ATTACHED */
|
||||
nla_total_size(4); /* XDP_PROG_ID */
|
||||
nla_total_size(4) + /* XDP_PROG_ID (or 1st mode) */
|
||||
nla_total_size(4); /* XDP_<mode>_PROG_ID */
|
||||
|
||||
return xdp_size;
|
||||
}
|
||||
@@ -1014,6 +1015,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
|
||||
+ nla_total_size(4) /* IFLA_IF_NETNSID */
|
||||
+ nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */
|
||||
+ nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */
|
||||
+ nla_total_size(4) /* IFLA_MIN_MTU */
|
||||
+ nla_total_size(4) /* IFLA_MAX_MTU */
|
||||
+ 0;
|
||||
}
|
||||
|
||||
@@ -1353,27 +1356,51 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
|
||||
static u32 rtnl_xdp_prog_skb(struct net_device *dev)
|
||||
{
|
||||
const struct net_device_ops *ops = dev->netdev_ops;
|
||||
const struct bpf_prog *generic_xdp_prog;
|
||||
struct netdev_bpf xdp;
|
||||
|
||||
ASSERT_RTNL();
|
||||
|
||||
*prog_id = 0;
|
||||
generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
|
||||
if (generic_xdp_prog) {
|
||||
*prog_id = generic_xdp_prog->aux->id;
|
||||
return XDP_ATTACHED_SKB;
|
||||
}
|
||||
if (!ops->ndo_bpf)
|
||||
return XDP_ATTACHED_NONE;
|
||||
if (!generic_xdp_prog)
|
||||
return 0;
|
||||
return generic_xdp_prog->aux->id;
|
||||
}
|
||||
|
||||
__dev_xdp_query(dev, ops->ndo_bpf, &xdp);
|
||||
*prog_id = xdp.prog_id;
|
||||
static u32 rtnl_xdp_prog_drv(struct net_device *dev)
|
||||
{
|
||||
return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG);
|
||||
}
|
||||
|
||||
return xdp.prog_attached;
|
||||
static u32 rtnl_xdp_prog_hw(struct net_device *dev)
|
||||
{
|
||||
return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
|
||||
XDP_QUERY_PROG_HW);
|
||||
}
|
||||
|
||||
static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev,
|
||||
u32 *prog_id, u8 *mode, u8 tgt_mode, u32 attr,
|
||||
u32 (*get_prog_id)(struct net_device *dev))
|
||||
{
|
||||
u32 curr_id;
|
||||
int err;
|
||||
|
||||
curr_id = get_prog_id(dev);
|
||||
if (!curr_id)
|
||||
return 0;
|
||||
|
||||
*prog_id = curr_id;
|
||||
err = nla_put_u32(skb, attr, curr_id);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (*mode != XDP_ATTACHED_NONE)
|
||||
*mode = XDP_ATTACHED_MULTI;
|
||||
else
|
||||
*mode = tgt_mode;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
|
||||
@@ -1381,17 +1408,32 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
|
||||
struct nlattr *xdp;
|
||||
u32 prog_id;
|
||||
int err;
|
||||
u8 mode;
|
||||
|
||||
xdp = nla_nest_start(skb, IFLA_XDP);
|
||||
if (!xdp)
|
||||
return -EMSGSIZE;
|
||||
|
||||
err = nla_put_u8(skb, IFLA_XDP_ATTACHED,
|
||||
rtnl_xdp_attached_mode(dev, &prog_id));
|
||||
prog_id = 0;
|
||||
mode = XDP_ATTACHED_NONE;
|
||||
err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB,
|
||||
IFLA_XDP_SKB_PROG_ID, rtnl_xdp_prog_skb);
|
||||
if (err)
|
||||
goto err_cancel;
|
||||
err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV,
|
||||
IFLA_XDP_DRV_PROG_ID, rtnl_xdp_prog_drv);
|
||||
if (err)
|
||||
goto err_cancel;
|
||||
err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW,
|
||||
IFLA_XDP_HW_PROG_ID, rtnl_xdp_prog_hw);
|
||||
if (err)
|
||||
goto err_cancel;
|
||||
|
||||
if (prog_id) {
|
||||
err = nla_put_u8(skb, IFLA_XDP_ATTACHED, mode);
|
||||
if (err)
|
||||
goto err_cancel;
|
||||
|
||||
if (prog_id && mode != XDP_ATTACHED_MULTI) {
|
||||
err = nla_put_u32(skb, IFLA_XDP_PROG_ID, prog_id);
|
||||
if (err)
|
||||
goto err_cancel;
|
||||
@@ -1561,6 +1603,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
|
||||
netif_running(dev) ? dev->operstate : IF_OPER_DOWN) ||
|
||||
nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) ||
|
||||
nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
|
||||
nla_put_u32(skb, IFLA_MIN_MTU, dev->min_mtu) ||
|
||||
nla_put_u32(skb, IFLA_MAX_MTU, dev->max_mtu) ||
|
||||
nla_put_u32(skb, IFLA_GROUP, dev->group) ||
|
||||
nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
|
||||
nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
|
||||
@@ -1692,6 +1736,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
|
||||
[IFLA_IF_NETNSID] = { .type = NLA_S32 },
|
||||
[IFLA_CARRIER_UP_COUNT] = { .type = NLA_U32 },
|
||||
[IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
|
||||
[IFLA_MIN_MTU] = { .type = NLA_U32 },
|
||||
[IFLA_MAX_MTU] = { .type = NLA_U32 },
|
||||
};
|
||||
|
||||
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
|
||||
@@ -2336,7 +2382,7 @@ static int do_setlink(const struct sk_buff *skb,
|
||||
}
|
||||
|
||||
if (tb[IFLA_MTU]) {
|
||||
err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
|
||||
err = dev_set_mtu_ext(dev, nla_get_u32(tb[IFLA_MTU]), extack);
|
||||
if (err < 0)
|
||||
goto errout;
|
||||
status |= DO_SETLINK_MODIFIED;
|
||||
|
||||
@@ -1291,7 +1291,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
|
||||
}
|
||||
EXPORT_SYMBOL(skb_clone);
|
||||
|
||||
static void skb_headers_offset_update(struct sk_buff *skb, int off)
|
||||
void skb_headers_offset_update(struct sk_buff *skb, int off)
|
||||
{
|
||||
/* Only adjust this if it actually is csum_start rather than csum */
|
||||
if (skb->ip_summed == CHECKSUM_PARTIAL)
|
||||
@@ -1305,6 +1305,7 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off)
|
||||
skb->inner_network_header += off;
|
||||
skb->inner_mac_header += off;
|
||||
}
|
||||
EXPORT_SYMBOL(skb_headers_offset_update);
|
||||
|
||||
void skb_copy_header(struct sk_buff *new, const struct sk_buff *old)
|
||||
{
|
||||
@@ -1715,7 +1716,7 @@ void *skb_push(struct sk_buff *skb, unsigned int len)
|
||||
{
|
||||
skb->data -= len;
|
||||
skb->len += len;
|
||||
if (unlikely(skb->data<skb->head))
|
||||
if (unlikely(skb->data < skb->head))
|
||||
skb_under_panic(skb, len, __builtin_return_address(0));
|
||||
return skb->data;
|
||||
}
|
||||
@@ -2858,23 +2859,27 @@ EXPORT_SYMBOL(skb_queue_purge);
|
||||
/**
|
||||
* skb_rbtree_purge - empty a skb rbtree
|
||||
* @root: root of the rbtree to empty
|
||||
* Return value: the sum of truesizes of all purged skbs.
|
||||
*
|
||||
* Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
|
||||
* the list and one reference dropped. This function does not take
|
||||
* any lock. Synchronization should be handled by the caller (e.g., TCP
|
||||
* out-of-order queue is protected by the socket lock).
|
||||
*/
|
||||
void skb_rbtree_purge(struct rb_root *root)
|
||||
unsigned int skb_rbtree_purge(struct rb_root *root)
|
||||
{
|
||||
struct rb_node *p = rb_first(root);
|
||||
unsigned int sum = 0;
|
||||
|
||||
while (p) {
|
||||
struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
|
||||
|
||||
p = rb_next(p);
|
||||
rb_erase(&skb->rbnode, root);
|
||||
sum += skb->truesize;
|
||||
kfree_skb(skb);
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -3816,14 +3821,14 @@ err:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(skb_segment);
|
||||
|
||||
int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
|
||||
int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
|
||||
{
|
||||
struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb);
|
||||
unsigned int offset = skb_gro_offset(skb);
|
||||
unsigned int headlen = skb_headlen(skb);
|
||||
unsigned int len = skb_gro_len(skb);
|
||||
struct sk_buff *lp, *p = *head;
|
||||
unsigned int delta_truesize;
|
||||
struct sk_buff *lp;
|
||||
|
||||
if (unlikely(p->len + len >= 65536))
|
||||
return -E2BIG;
|
||||
@@ -4899,7 +4904,6 @@ EXPORT_SYMBOL(skb_try_coalesce);
|
||||
*/
|
||||
void skb_scrub_packet(struct sk_buff *skb, bool xnet)
|
||||
{
|
||||
skb->tstamp = 0;
|
||||
skb->pkt_type = PACKET_HOST;
|
||||
skb->skb_iif = 0;
|
||||
skb->ignore_df = 0;
|
||||
@@ -4912,8 +4916,8 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
|
||||
return;
|
||||
|
||||
ipvs_reset(skb);
|
||||
skb_orphan(skb);
|
||||
skb->mark = 0;
|
||||
skb->tstamp = 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(skb_scrub_packet);
|
||||
|
||||
|
||||
106
net/core/sock.c
106
net/core/sock.c
@@ -91,6 +91,7 @@
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/errqueue.h>
|
||||
@@ -249,58 +250,13 @@ static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
|
||||
_sock_locks("k-clock-")
|
||||
};
|
||||
static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
|
||||
"rlock-AF_UNSPEC", "rlock-AF_UNIX" , "rlock-AF_INET" ,
|
||||
"rlock-AF_AX25" , "rlock-AF_IPX" , "rlock-AF_APPLETALK",
|
||||
"rlock-AF_NETROM", "rlock-AF_BRIDGE" , "rlock-AF_ATMPVC" ,
|
||||
"rlock-AF_X25" , "rlock-AF_INET6" , "rlock-AF_ROSE" ,
|
||||
"rlock-AF_DECnet", "rlock-AF_NETBEUI" , "rlock-AF_SECURITY" ,
|
||||
"rlock-AF_KEY" , "rlock-AF_NETLINK" , "rlock-AF_PACKET" ,
|
||||
"rlock-AF_ASH" , "rlock-AF_ECONET" , "rlock-AF_ATMSVC" ,
|
||||
"rlock-AF_RDS" , "rlock-AF_SNA" , "rlock-AF_IRDA" ,
|
||||
"rlock-AF_PPPOX" , "rlock-AF_WANPIPE" , "rlock-AF_LLC" ,
|
||||
"rlock-27" , "rlock-28" , "rlock-AF_CAN" ,
|
||||
"rlock-AF_TIPC" , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV" ,
|
||||
"rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" ,
|
||||
"rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" ,
|
||||
"rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" ,
|
||||
"rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_XDP" ,
|
||||
"rlock-AF_MAX"
|
||||
_sock_locks("rlock-")
|
||||
};
|
||||
static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
|
||||
"wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" ,
|
||||
"wlock-AF_AX25" , "wlock-AF_IPX" , "wlock-AF_APPLETALK",
|
||||
"wlock-AF_NETROM", "wlock-AF_BRIDGE" , "wlock-AF_ATMPVC" ,
|
||||
"wlock-AF_X25" , "wlock-AF_INET6" , "wlock-AF_ROSE" ,
|
||||
"wlock-AF_DECnet", "wlock-AF_NETBEUI" , "wlock-AF_SECURITY" ,
|
||||
"wlock-AF_KEY" , "wlock-AF_NETLINK" , "wlock-AF_PACKET" ,
|
||||
"wlock-AF_ASH" , "wlock-AF_ECONET" , "wlock-AF_ATMSVC" ,
|
||||
"wlock-AF_RDS" , "wlock-AF_SNA" , "wlock-AF_IRDA" ,
|
||||
"wlock-AF_PPPOX" , "wlock-AF_WANPIPE" , "wlock-AF_LLC" ,
|
||||
"wlock-27" , "wlock-28" , "wlock-AF_CAN" ,
|
||||
"wlock-AF_TIPC" , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV" ,
|
||||
"wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" ,
|
||||
"wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" ,
|
||||
"wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" ,
|
||||
"wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_XDP" ,
|
||||
"wlock-AF_MAX"
|
||||
_sock_locks("wlock-")
|
||||
};
|
||||
static const char *const af_family_elock_key_strings[AF_MAX+1] = {
|
||||
"elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" ,
|
||||
"elock-AF_AX25" , "elock-AF_IPX" , "elock-AF_APPLETALK",
|
||||
"elock-AF_NETROM", "elock-AF_BRIDGE" , "elock-AF_ATMPVC" ,
|
||||
"elock-AF_X25" , "elock-AF_INET6" , "elock-AF_ROSE" ,
|
||||
"elock-AF_DECnet", "elock-AF_NETBEUI" , "elock-AF_SECURITY" ,
|
||||
"elock-AF_KEY" , "elock-AF_NETLINK" , "elock-AF_PACKET" ,
|
||||
"elock-AF_ASH" , "elock-AF_ECONET" , "elock-AF_ATMSVC" ,
|
||||
"elock-AF_RDS" , "elock-AF_SNA" , "elock-AF_IRDA" ,
|
||||
"elock-AF_PPPOX" , "elock-AF_WANPIPE" , "elock-AF_LLC" ,
|
||||
"elock-27" , "elock-28" , "elock-AF_CAN" ,
|
||||
"elock-AF_TIPC" , "elock-AF_BLUETOOTH", "elock-AF_IUCV" ,
|
||||
"elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" ,
|
||||
"elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" ,
|
||||
"elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" ,
|
||||
"elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_XDP" ,
|
||||
"elock-AF_MAX"
|
||||
_sock_locks("elock-")
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -697,6 +653,7 @@ EXPORT_SYMBOL(sk_mc_loop);
|
||||
int sock_setsockopt(struct socket *sock, int level, int optname,
|
||||
char __user *optval, unsigned int optlen)
|
||||
{
|
||||
struct sock_txtime sk_txtime;
|
||||
struct sock *sk = sock->sk;
|
||||
int val;
|
||||
int valbool;
|
||||
@@ -1070,6 +1027,26 @@ set_rcvbuf:
|
||||
}
|
||||
break;
|
||||
|
||||
case SO_TXTIME:
|
||||
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
} else if (optlen != sizeof(struct sock_txtime)) {
|
||||
ret = -EINVAL;
|
||||
} else if (copy_from_user(&sk_txtime, optval,
|
||||
sizeof(struct sock_txtime))) {
|
||||
ret = -EFAULT;
|
||||
} else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
|
||||
ret = -EINVAL;
|
||||
} else {
|
||||
sock_valbool_flag(sk, SOCK_TXTIME, true);
|
||||
sk->sk_clockid = sk_txtime.clockid;
|
||||
sk->sk_txtime_deadline_mode =
|
||||
!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
|
||||
sk->sk_txtime_report_errors =
|
||||
!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
ret = -ENOPROTOOPT;
|
||||
break;
|
||||
@@ -1115,6 +1092,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
|
||||
u64 val64;
|
||||
struct linger ling;
|
||||
struct timeval tm;
|
||||
struct sock_txtime txtime;
|
||||
} v;
|
||||
|
||||
int lv = sizeof(int);
|
||||
@@ -1403,6 +1381,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
|
||||
v.val = sock_flag(sk, SOCK_ZEROCOPY);
|
||||
break;
|
||||
|
||||
case SO_TXTIME:
|
||||
lv = sizeof(v.txtime);
|
||||
v.txtime.clockid = sk->sk_clockid;
|
||||
v.txtime.flags |= sk->sk_txtime_deadline_mode ?
|
||||
SOF_TXTIME_DEADLINE_MODE : 0;
|
||||
v.txtime.flags |= sk->sk_txtime_report_errors ?
|
||||
SOF_TXTIME_REPORT_ERRORS : 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* We implement the SO_SNDLOWAT etc to not be settable
|
||||
* (1003.1g 7).
|
||||
@@ -2137,6 +2124,13 @@ int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
|
||||
sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
|
||||
sockc->tsflags |= tsflags;
|
||||
break;
|
||||
case SCM_TXTIME:
|
||||
if (!sock_flag(sk, SOCK_TXTIME))
|
||||
return -EINVAL;
|
||||
if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
|
||||
return -EINVAL;
|
||||
sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
|
||||
break;
|
||||
/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
|
||||
case SCM_RIGHTS:
|
||||
case SCM_CREDENTIALS:
|
||||
@@ -2401,9 +2395,10 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
|
||||
{
|
||||
struct proto *prot = sk->sk_prot;
|
||||
long allocated = sk_memory_allocated_add(sk, amt);
|
||||
bool charged = true;
|
||||
|
||||
if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
|
||||
!mem_cgroup_charge_skmem(sk->sk_memcg, amt))
|
||||
!(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt)))
|
||||
goto suppress_allocation;
|
||||
|
||||
/* Under limit. */
|
||||
@@ -2461,7 +2456,8 @@ suppress_allocation:
|
||||
return 1;
|
||||
}
|
||||
|
||||
trace_sock_exceed_buf_limit(sk, prot, allocated);
|
||||
if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
|
||||
trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
|
||||
|
||||
sk_memory_allocated_sub(sk, amt);
|
||||
|
||||
@@ -2818,6 +2814,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
|
||||
sk->sk_pacing_rate = ~0U;
|
||||
sk->sk_pacing_shift = 10;
|
||||
sk->sk_incoming_cpu = -1;
|
||||
|
||||
sk_rx_queue_clear(sk);
|
||||
/*
|
||||
* Before updating sk_refcnt, we must commit prior changes to memory
|
||||
* (Documentation/RCU/rculist_nulls.txt for details)
|
||||
@@ -2902,8 +2900,8 @@ EXPORT_SYMBOL(lock_sock_fast);
|
||||
int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
|
||||
{
|
||||
struct timeval tv;
|
||||
if (!sock_flag(sk, SOCK_TIMESTAMP))
|
||||
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
|
||||
|
||||
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
|
||||
tv = ktime_to_timeval(sk->sk_stamp);
|
||||
if (tv.tv_sec == -1)
|
||||
return -ENOENT;
|
||||
@@ -2918,8 +2916,8 @@ EXPORT_SYMBOL(sock_get_timestamp);
|
||||
int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
|
||||
{
|
||||
struct timespec ts;
|
||||
if (!sock_flag(sk, SOCK_TIMESTAMP))
|
||||
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
|
||||
|
||||
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
|
||||
ts = ktime_to_timespec(sk->sk_stamp);
|
||||
if (ts.tv_sec == -1)
|
||||
return -ENOENT;
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/tcp.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/nospec.h>
|
||||
|
||||
#include <linux/inet_diag.h>
|
||||
#include <linux/sock_diag.h>
|
||||
@@ -218,6 +219,7 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
|
||||
if (req->sdiag_family >= AF_MAX)
|
||||
return -EINVAL;
|
||||
req->sdiag_family = array_index_nospec(req->sdiag_family, AF_MAX);
|
||||
|
||||
if (sock_diag_handlers[req->sdiag_family] == NULL)
|
||||
sock_load_diag_module(req->sdiag_family, 0);
|
||||
|
||||
@@ -8,11 +8,34 @@
|
||||
|
||||
#include <net/sock_reuseport.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/rcupdate.h>
|
||||
|
||||
#define INIT_SOCKS 128
|
||||
|
||||
static DEFINE_SPINLOCK(reuseport_lock);
|
||||
DEFINE_SPINLOCK(reuseport_lock);
|
||||
|
||||
#define REUSEPORT_MIN_ID 1
|
||||
static DEFINE_IDA(reuseport_ida);
|
||||
|
||||
int reuseport_get_id(struct sock_reuseport *reuse)
|
||||
{
|
||||
int id;
|
||||
|
||||
if (reuse->reuseport_id)
|
||||
return reuse->reuseport_id;
|
||||
|
||||
id = ida_simple_get(&reuseport_ida, REUSEPORT_MIN_ID, 0,
|
||||
/* Called under reuseport_lock */
|
||||
GFP_ATOMIC);
|
||||
if (id < 0)
|
||||
return id;
|
||||
|
||||
reuse->reuseport_id = id;
|
||||
|
||||
return reuse->reuseport_id;
|
||||
}
|
||||
|
||||
static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
|
||||
{
|
||||
@@ -29,7 +52,7 @@ static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
|
||||
return reuse;
|
||||
}
|
||||
|
||||
int reuseport_alloc(struct sock *sk)
|
||||
int reuseport_alloc(struct sock *sk, bool bind_inany)
|
||||
{
|
||||
struct sock_reuseport *reuse;
|
||||
|
||||
@@ -41,9 +64,17 @@ int reuseport_alloc(struct sock *sk)
|
||||
/* Allocation attempts can occur concurrently via the setsockopt path
|
||||
* and the bind/hash path. Nothing to do when we lose the race.
|
||||
*/
|
||||
if (rcu_dereference_protected(sk->sk_reuseport_cb,
|
||||
lockdep_is_held(&reuseport_lock)))
|
||||
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
|
||||
lockdep_is_held(&reuseport_lock));
|
||||
if (reuse) {
|
||||
/* Only set reuse->bind_inany if the bind_inany is true.
|
||||
* Otherwise, it will overwrite the reuse->bind_inany
|
||||
* which was set by the bind/hash path.
|
||||
*/
|
||||
if (bind_inany)
|
||||
reuse->bind_inany = bind_inany;
|
||||
goto out;
|
||||
}
|
||||
|
||||
reuse = __reuseport_alloc(INIT_SOCKS);
|
||||
if (!reuse) {
|
||||
@@ -53,6 +84,7 @@ int reuseport_alloc(struct sock *sk)
|
||||
|
||||
reuse->socks[0] = sk;
|
||||
reuse->num_socks = 1;
|
||||
reuse->bind_inany = bind_inany;
|
||||
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
|
||||
|
||||
out:
|
||||
@@ -78,9 +110,12 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
|
||||
more_reuse->max_socks = more_socks_size;
|
||||
more_reuse->num_socks = reuse->num_socks;
|
||||
more_reuse->prog = reuse->prog;
|
||||
more_reuse->reuseport_id = reuse->reuseport_id;
|
||||
more_reuse->bind_inany = reuse->bind_inany;
|
||||
|
||||
memcpy(more_reuse->socks, reuse->socks,
|
||||
reuse->num_socks * sizeof(struct sock *));
|
||||
more_reuse->synq_overflow_ts = READ_ONCE(reuse->synq_overflow_ts);
|
||||
|
||||
for (i = 0; i < reuse->num_socks; ++i)
|
||||
rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
|
||||
@@ -99,8 +134,9 @@ static void reuseport_free_rcu(struct rcu_head *head)
|
||||
struct sock_reuseport *reuse;
|
||||
|
||||
reuse = container_of(head, struct sock_reuseport, rcu);
|
||||
if (reuse->prog)
|
||||
bpf_prog_destroy(reuse->prog);
|
||||
sk_reuseport_prog_free(rcu_dereference_protected(reuse->prog, 1));
|
||||
if (reuse->reuseport_id)
|
||||
ida_simple_remove(&reuseport_ida, reuse->reuseport_id);
|
||||
kfree(reuse);
|
||||
}
|
||||
|
||||
@@ -110,12 +146,12 @@ static void reuseport_free_rcu(struct rcu_head *head)
|
||||
* @sk2: Socket belonging to the existing reuseport group.
|
||||
* May return ENOMEM and not add socket to group under memory pressure.
|
||||
*/
|
||||
int reuseport_add_sock(struct sock *sk, struct sock *sk2)
|
||||
int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
|
||||
{
|
||||
struct sock_reuseport *old_reuse, *reuse;
|
||||
|
||||
if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
|
||||
int err = reuseport_alloc(sk2);
|
||||
int err = reuseport_alloc(sk2, bind_inany);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
@@ -160,6 +196,14 @@ void reuseport_detach_sock(struct sock *sk)
|
||||
spin_lock_bh(&reuseport_lock);
|
||||
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
|
||||
lockdep_is_held(&reuseport_lock));
|
||||
|
||||
/* At least one of the sk in this reuseport group is added to
|
||||
* a bpf map. Notify the bpf side. The bpf map logic will
|
||||
* remove the sk if it is indeed added to a bpf map.
|
||||
*/
|
||||
if (reuse->reuseport_id)
|
||||
bpf_sk_reuseport_detach(sk);
|
||||
|
||||
rcu_assign_pointer(sk->sk_reuseport_cb, NULL);
|
||||
|
||||
for (i = 0; i < reuse->num_socks; i++) {
|
||||
@@ -175,9 +219,9 @@ void reuseport_detach_sock(struct sock *sk)
|
||||
}
|
||||
EXPORT_SYMBOL(reuseport_detach_sock);
|
||||
|
||||
static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks,
|
||||
struct bpf_prog *prog, struct sk_buff *skb,
|
||||
int hdr_len)
|
||||
static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
|
||||
struct bpf_prog *prog, struct sk_buff *skb,
|
||||
int hdr_len)
|
||||
{
|
||||
struct sk_buff *nskb = NULL;
|
||||
u32 index;
|
||||
@@ -238,9 +282,15 @@ struct sock *reuseport_select_sock(struct sock *sk,
|
||||
/* paired with smp_wmb() in reuseport_add_sock() */
|
||||
smp_rmb();
|
||||
|
||||
if (prog && skb)
|
||||
sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
|
||||
if (!prog || !skb)
|
||||
goto select_by_hash;
|
||||
|
||||
if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
|
||||
sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, hash);
|
||||
else
|
||||
sk2 = run_bpf_filter(reuse, socks, prog, skb, hdr_len);
|
||||
|
||||
select_by_hash:
|
||||
/* no bpf or invalid bpf result: fall back to hash usage */
|
||||
if (!sk2)
|
||||
sk2 = reuse->socks[reciprocal_scale(hash, socks)];
|
||||
@@ -252,12 +302,21 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL(reuseport_select_sock);
|
||||
|
||||
struct bpf_prog *
|
||||
reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
|
||||
int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
|
||||
{
|
||||
struct sock_reuseport *reuse;
|
||||
struct bpf_prog *old_prog;
|
||||
|
||||
if (sk_unhashed(sk) && sk->sk_reuseport) {
|
||||
int err = reuseport_alloc(sk, false);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
|
||||
/* The socket wasn't bound with SO_REUSEPORT */
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
spin_lock_bh(&reuseport_lock);
|
||||
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
|
||||
lockdep_is_held(&reuseport_lock));
|
||||
@@ -266,6 +325,7 @@ reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
|
||||
rcu_assign_pointer(reuse->prog, prog);
|
||||
spin_unlock_bh(&reuseport_lock);
|
||||
|
||||
return old_prog;
|
||||
sk_reuseport_prog_free(old_prog);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(reuseport_attach_prog);
|
||||
|
||||
@@ -397,7 +397,7 @@ int inet_pton_with_scope(struct net *net, __kernel_sa_family_t af,
|
||||
break;
|
||||
default:
|
||||
pr_err("unexpected address family %d\n", af);
|
||||
};
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -3,8 +3,11 @@
|
||||
* Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
|
||||
* Released under terms in GPL version 2. See COPYING.
|
||||
*/
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/rhashtable.h>
|
||||
@@ -45,8 +48,8 @@ static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
|
||||
!= sizeof(u32));
|
||||
|
||||
/* Use cyclic increasing ID as direct hash key, see rht_bucket_index */
|
||||
return key << RHT_HASH_RESERVED_SPACE;
|
||||
/* Use cyclic increasing ID as direct hash key */
|
||||
return key;
|
||||
}
|
||||
|
||||
static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg,
|
||||
@@ -327,10 +330,12 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
|
||||
/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
|
||||
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
|
||||
page = virt_to_head_page(data);
|
||||
if (xa)
|
||||
if (xa) {
|
||||
napi_direct &= !xdp_return_frame_no_direct();
|
||||
page_pool_put_page(xa->page_pool, page, napi_direct);
|
||||
else
|
||||
} else {
|
||||
put_page(page);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
break;
|
||||
case MEM_TYPE_PAGE_SHARED:
|
||||
@@ -345,8 +350,7 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
|
||||
rcu_read_lock();
|
||||
/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
|
||||
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
|
||||
if (!WARN_ON_ONCE(!xa))
|
||||
xa->zc_alloc->free(xa->zc_alloc, handle);
|
||||
xa->zc_alloc->free(xa->zc_alloc, handle);
|
||||
rcu_read_unlock();
|
||||
default:
|
||||
/* Not possible, checked in xdp_rxq_info_reg_mem_model() */
|
||||
@@ -371,3 +375,34 @@ void xdp_return_buff(struct xdp_buff *xdp)
|
||||
__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_return_buff);
|
||||
|
||||
int xdp_attachment_query(struct xdp_attachment_info *info,
|
||||
struct netdev_bpf *bpf)
|
||||
{
|
||||
bpf->prog_id = info->prog ? info->prog->aux->id : 0;
|
||||
bpf->prog_flags = info->prog ? info->flags : 0;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_attachment_query);
|
||||
|
||||
bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
|
||||
struct netdev_bpf *bpf)
|
||||
{
|
||||
if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) {
|
||||
NL_SET_ERR_MSG(bpf->extack,
|
||||
"program loaded with different flags");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok);
|
||||
|
||||
void xdp_attachment_setup(struct xdp_attachment_info *info,
|
||||
struct netdev_bpf *bpf)
|
||||
{
|
||||
if (info->prog)
|
||||
bpf_prog_put(info->prog);
|
||||
info->prog = bpf->prog;
|
||||
info->flags = bpf->flags;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_attachment_setup);
|
||||
|
||||
Reference in New Issue
Block a user