Merge branch 'linus/master' into rdma.git for-next

rdma.git merge resolution for the 4.19 merge window

Conflicts:
 drivers/infiniband/core/rdma_core.c
   - Use the rdma code and revise with the new spelling for
     atomic_fetch_add_unless
 drivers/nvme/host/rdma.c
   - Replace max_sge with max_send_sge in new blk code
 drivers/nvme/target/rdma.c
   - Use the blk code and revise to use NULL for ib_post_recv when
     appropriate
   - Replace max_sge with max_recv_sge in new blk code
 net/rds/ib_send.c
   - Use the net code and revise to use NULL for ib_post_recv when
     appropriate

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
Jason Gunthorpe
2018-08-16 14:13:03 -06:00
7051 changed files with 339696 additions and 129229 deletions

View File

@@ -837,7 +837,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
struct sock *sk = sock->sk;
__poll_t mask;
sock_poll_wait(file, sk_sleep(sk), wait);
sock_poll_wait(file, wait);
mask = 0;
/* exceptional events? */

File diff suppressed because it is too large Load Diff

View File

@@ -284,12 +284,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
case SIOCSIFTXQLEN:
if (ifr->ifr_qlen < 0)
return -EINVAL;
if (dev->tx_queue_len ^ ifr->ifr_qlen) {
err = dev_change_tx_queue_len(dev, ifr->ifr_qlen);
if (err)
return err;
}
return 0;
return dev_change_tx_queue_len(dev, ifr->ifr_qlen);
case SIOCSIFNAME:
ifr->ifr_newname[IFNAMSIZ-1] = '\0';

File diff suppressed because it is too large Load Diff

View File

@@ -307,6 +307,7 @@ void metadata_dst_free(struct metadata_dst *md_dst)
#endif
kfree(md_dst);
}
EXPORT_SYMBOL_GPL(metadata_dst_free);
struct metadata_dst __percpu *
metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags)

View File

@@ -111,6 +111,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload",
[NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record",
[NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload",
[NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload",
};
static const char

View File

@@ -924,8 +924,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
errout:
if (nlrule)
kfree(nlrule);
kfree(nlrule);
rules_ops_put(ops);
return err;
}

View File

@@ -1453,30 +1453,6 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
return 0;
}
static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
{
struct bpf_prog *old_prog;
int err;
if (bpf_prog_size(prog->len) > sysctl_optmem_max)
return -ENOMEM;
if (sk_unhashed(sk) && sk->sk_reuseport) {
err = reuseport_alloc(sk);
if (err)
return err;
} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
/* The socket wasn't bound with SO_REUSEPORT */
return -EINVAL;
}
old_prog = reuseport_attach_prog(sk, prog);
if (old_prog)
bpf_prog_destroy(old_prog);
return 0;
}
static
struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
{
@@ -1550,13 +1526,15 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
if (IS_ERR(prog))
return PTR_ERR(prog);
err = __reuseport_attach_prog(prog, sk);
if (err < 0) {
__bpf_prog_release(prog);
return err;
}
if (bpf_prog_size(prog->len) > sysctl_optmem_max)
err = -ENOMEM;
else
err = reuseport_attach_prog(sk, prog);
return 0;
if (err)
__bpf_prog_release(prog);
return err;
}
static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
@@ -1586,19 +1564,58 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
{
struct bpf_prog *prog = __get_bpf(ufd, sk);
struct bpf_prog *prog;
int err;
if (sock_flag(sk, SOCK_FILTER_LOCKED))
return -EPERM;
prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL)
prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
if (IS_ERR(prog))
return PTR_ERR(prog);
err = __reuseport_attach_prog(prog, sk);
if (err < 0) {
bpf_prog_put(prog);
return err;
if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
/* Like other non BPF_PROG_TYPE_SOCKET_FILTER
* bpf prog (e.g. sockmap). It depends on the
* limitation imposed by bpf_prog_load().
* Hence, sysctl_optmem_max is not checked.
*/
if ((sk->sk_type != SOCK_STREAM &&
sk->sk_type != SOCK_DGRAM) ||
(sk->sk_protocol != IPPROTO_UDP &&
sk->sk_protocol != IPPROTO_TCP) ||
(sk->sk_family != AF_INET &&
sk->sk_family != AF_INET6)) {
err = -ENOTSUPP;
goto err_prog_put;
}
} else {
/* BPF_PROG_TYPE_SOCKET_FILTER */
if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
err = -ENOMEM;
goto err_prog_put;
}
}
return 0;
err = reuseport_attach_prog(sk, prog);
err_prog_put:
if (err)
bpf_prog_put(prog);
return err;
}
void sk_reuseport_prog_free(struct bpf_prog *prog)
{
if (!prog)
return;
if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
bpf_prog_put(prog);
else
bpf_prog_destroy(prog);
}
struct bpf_scratchpad {
@@ -2082,19 +2099,12 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
.arg3_type = ARG_ANYTHING,
};
struct redirect_info {
u32 ifindex;
u32 flags;
struct bpf_map *map;
struct bpf_map *map_to_flush;
unsigned long map_owner;
};
static DEFINE_PER_CPU(struct redirect_info, redirect_info);
DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
if (unlikely(flags & ~(BPF_F_INGRESS)))
return TC_ACT_SHOT;
@@ -2107,7 +2117,7 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
int skb_do_redirect(struct sk_buff *skb)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct net_device *dev;
dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
@@ -3200,7 +3210,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
void xdp_do_flush_map(void)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map = ri->map_to_flush;
ri->map_to_flush = NULL;
@@ -3245,7 +3255,7 @@ static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
unsigned long map_owner = ri->map_owner;
struct bpf_map *map = ri->map;
u32 index = ri->ifindex;
@@ -3285,7 +3295,7 @@ err:
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct net_device *fwd;
u32 index = ri->ifindex;
int err;
@@ -3317,7 +3327,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
unsigned long map_owner = ri->map_owner;
struct bpf_map *map = ri->map;
u32 index = ri->ifindex;
@@ -3368,7 +3378,7 @@ err:
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
u32 index = ri->ifindex;
struct net_device *fwd;
int err = 0;
@@ -3399,7 +3409,7 @@ EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
if (unlikely(flags))
return XDP_ABORTED;
@@ -3423,7 +3433,7 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
unsigned long, map_owner)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
if (unlikely(flags))
return XDP_ABORTED;
@@ -3681,7 +3691,7 @@ BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
if (unlikely(size > IP_TUNNEL_OPTS_MAX))
return -ENOMEM;
ip_tunnel_info_opts_set(info, from, size);
ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
return 0;
}
@@ -3768,6 +3778,32 @@ static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
};
BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
ancestor_level)
{
struct sock *sk = skb_to_full_sk(skb);
struct cgroup *ancestor;
struct cgroup *cgrp;
if (!sk || !sk_fullsock(sk))
return 0;
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
ancestor = cgroup_ancestor(cgrp, ancestor_level);
if (!ancestor)
return 0;
return ancestor->kn->id.id;
}
static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
.func = bpf_skb_ancestor_cgroup_id,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
};
#endif
static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
@@ -3814,6 +3850,30 @@ static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
{
return sock_gen_cookie(ctx->sk);
}
static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
.func = bpf_get_socket_cookie_sock_addr,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
};
BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
{
return sock_gen_cookie(ctx->sk);
}
static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
.func = bpf_get_socket_cookie_sock_ops,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
};
BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
{
struct sock *sk = sk_to_full_sk(skb->sk);
@@ -4544,26 +4604,28 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
struct ipv6_sr_hdr *srh = srh_state->srh;
void *srh_tlvs, *srh_end, *ptr;
struct ipv6_sr_hdr *srh;
int srhoff = 0;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
if (srh == NULL)
return -EINVAL;
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
ptr = skb->data + offset;
if (ptr >= srh_tlvs && ptr + len <= srh_end)
srh_state->valid = 0;
srh_state->valid = false;
else if (ptr < (void *)&srh->flags ||
ptr + len > (void *)&srh->segments)
return -EFAULT;
if (unlikely(bpf_try_make_writable(skb, offset + len)))
return -EFAULT;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
return -EINVAL;
srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
memcpy(skb->data + offset, from, len);
return 0;
@@ -4579,52 +4641,78 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
.arg4_type = ARG_CONST_SIZE
};
static void bpf_update_srh_state(struct sk_buff *skb)
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
int srhoff = 0;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) {
srh_state->srh = NULL;
} else {
srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh_state->hdrlen = srh_state->srh->hdrlen << 3;
srh_state->valid = true;
}
}
BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
u32, action, void *, param, u32, param_len)
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
struct ipv6_sr_hdr *srh;
int srhoff = 0;
int hdroff = 0;
int err;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
return -EINVAL;
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
if (!srh_state->valid) {
if (unlikely((srh_state->hdrlen & 7) != 0))
return -EBADMSG;
srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
if (unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
return -EBADMSG;
srh_state->valid = 1;
}
switch (action) {
case SEG6_LOCAL_ACTION_END_X:
if (!seg6_bpf_has_valid_srh(skb))
return -EBADMSG;
if (param_len != sizeof(struct in6_addr))
return -EINVAL;
return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
case SEG6_LOCAL_ACTION_END_T:
if (!seg6_bpf_has_valid_srh(skb))
return -EBADMSG;
if (param_len != sizeof(int))
return -EINVAL;
return seg6_lookup_nexthop(skb, NULL, *(int *)param);
case SEG6_LOCAL_ACTION_END_DT6:
if (!seg6_bpf_has_valid_srh(skb))
return -EBADMSG;
if (param_len != sizeof(int))
return -EINVAL;
if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0)
return -EBADMSG;
if (!pskb_pull(skb, hdroff))
return -EBADMSG;
skb_postpull_rcsum(skb, skb_network_header(skb), hdroff);
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
skb->encapsulation = 0;
bpf_compute_data_pointers(skb);
bpf_update_srh_state(skb);
return seg6_lookup_nexthop(skb, NULL, *(int *)param);
case SEG6_LOCAL_ACTION_END_B6:
if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
return -EBADMSG;
err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
param, param_len);
if (!err)
srh_state->hdrlen =
((struct ipv6_sr_hdr *)param)->hdrlen << 3;
bpf_update_srh_state(skb);
return err;
case SEG6_LOCAL_ACTION_END_B6_ENCAP:
if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
return -EBADMSG;
err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
param, param_len);
if (!err)
srh_state->hdrlen =
((struct ipv6_sr_hdr *)param)->hdrlen << 3;
bpf_update_srh_state(skb);
return err;
default:
return -EINVAL;
@@ -4646,15 +4734,14 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
struct ipv6_sr_hdr *srh = srh_state->srh;
void *srh_end, *srh_tlvs, *ptr;
struct ipv6_sr_hdr *srh;
struct ipv6hdr *hdr;
int srhoff = 0;
int ret;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
if (unlikely(srh == NULL))
return -EINVAL;
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
((srh->first_segment + 1) << 4));
@@ -4684,8 +4771,11 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
hdr = (struct ipv6hdr *)skb->data;
hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
return -EINVAL;
srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh_state->hdrlen += len;
srh_state->valid = 0;
srh_state->valid = false;
return 0;
}
@@ -4753,6 +4843,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_trace_printk:
if (capable(CAP_SYS_ADMIN))
return bpf_get_trace_printk_proto();
/* else: fall through */
default:
return NULL;
}
@@ -4767,6 +4858,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
*/
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -4789,6 +4882,10 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
default:
return NULL;
}
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_sock_addr_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -4811,6 +4908,17 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
}
static const struct bpf_func_proto *
cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
default:
return sk_filter_func_proto(func_id, prog);
}
}
static const struct bpf_func_proto *
tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -4884,6 +4992,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#ifdef CONFIG_SOCK_CGROUP_DATA
case BPF_FUNC_skb_cgroup_id:
return &bpf_skb_cgroup_id_proto;
case BPF_FUNC_skb_ancestor_cgroup_id:
return &bpf_skb_ancestor_cgroup_id_proto;
#endif
default:
return bpf_base_func_proto(func_id);
@@ -4931,6 +5041,10 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sock_map_update_proto;
case BPF_FUNC_sock_hash_update:
return &bpf_sock_hash_update_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_sock_ops_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -4950,6 +5064,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_msg_cork_bytes_proto;
case BPF_FUNC_msg_pull_data:
return &bpf_msg_pull_data_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -4977,6 +5093,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_redirect_map_proto;
case BPF_FUNC_sk_redirect_hash:
return &bpf_sk_redirect_hash_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -6781,7 +6899,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
};
const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = sk_filter_func_proto,
.get_func_proto = cg_skb_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};
@@ -6940,3 +7058,271 @@ out:
release_sock(sk);
return ret;
}
#ifdef CONFIG_INET
struct sk_reuseport_kern {
struct sk_buff *skb;
struct sock *sk;
struct sock *selected_sk;
void *data_end;
u32 hash;
u32 reuseport_id;
bool bind_inany;
};
static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
struct sock_reuseport *reuse,
struct sock *sk, struct sk_buff *skb,
u32 hash)
{
reuse_kern->skb = skb;
reuse_kern->sk = sk;
reuse_kern->selected_sk = NULL;
reuse_kern->data_end = skb->data + skb_headlen(skb);
reuse_kern->hash = hash;
reuse_kern->reuseport_id = reuse->reuseport_id;
reuse_kern->bind_inany = reuse->bind_inany;
}
struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
struct bpf_prog *prog, struct sk_buff *skb,
u32 hash)
{
struct sk_reuseport_kern reuse_kern;
enum sk_action action;
bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash);
action = BPF_PROG_RUN(prog, &reuse_kern);
if (action == SK_PASS)
return reuse_kern.selected_sk;
else
return ERR_PTR(-ECONNREFUSED);
}
BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
struct bpf_map *, map, void *, key, u32, flags)
{
struct sock_reuseport *reuse;
struct sock *selected_sk;
selected_sk = map->ops->map_lookup_elem(map, key);
if (!selected_sk)
return -ENOENT;
reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
if (!reuse)
/* selected_sk is unhashed (e.g. by close()) after the
* above map_lookup_elem(). Treat selected_sk has already
* been removed from the map.
*/
return -ENOENT;
if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
struct sock *sk;
if (unlikely(!reuse_kern->reuseport_id))
/* There is a small race between adding the
* sk to the map and setting the
* reuse_kern->reuseport_id.
* Treat it as the sk has not been added to
* the bpf map yet.
*/
return -ENOENT;
sk = reuse_kern->sk;
if (sk->sk_protocol != selected_sk->sk_protocol)
return -EPROTOTYPE;
else if (sk->sk_family != selected_sk->sk_family)
return -EAFNOSUPPORT;
/* Catch all. Likely bound to a different sockaddr. */
return -EBADFD;
}
reuse_kern->selected_sk = selected_sk;
return 0;
}
static const struct bpf_func_proto sk_select_reuseport_proto = {
.func = sk_select_reuseport,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_PTR_TO_MAP_KEY,
.arg4_type = ARG_ANYTHING,
};
BPF_CALL_4(sk_reuseport_load_bytes,
const struct sk_reuseport_kern *, reuse_kern, u32, offset,
void *, to, u32, len)
{
return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len);
}
static const struct bpf_func_proto sk_reuseport_load_bytes_proto = {
.func = sk_reuseport_load_bytes,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_UNINIT_MEM,
.arg4_type = ARG_CONST_SIZE,
};
BPF_CALL_5(sk_reuseport_load_bytes_relative,
const struct sk_reuseport_kern *, reuse_kern, u32, offset,
void *, to, u32, len, u32, start_header)
{
return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to,
len, start_header);
}
static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = {
.func = sk_reuseport_load_bytes_relative,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_UNINIT_MEM,
.arg4_type = ARG_CONST_SIZE,
.arg5_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *
sk_reuseport_func_proto(enum bpf_func_id func_id,
const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_sk_select_reuseport:
return &sk_select_reuseport_proto;
case BPF_FUNC_skb_load_bytes:
return &sk_reuseport_load_bytes_proto;
case BPF_FUNC_skb_load_bytes_relative:
return &sk_reuseport_load_bytes_relative_proto;
default:
return bpf_base_func_proto(func_id);
}
}
static bool
sk_reuseport_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const u32 size_default = sizeof(__u32);
if (off < 0 || off >= sizeof(struct sk_reuseport_md) ||
off % size || type != BPF_READ)
return false;
switch (off) {
case offsetof(struct sk_reuseport_md, data):
info->reg_type = PTR_TO_PACKET;
return size == sizeof(__u64);
case offsetof(struct sk_reuseport_md, data_end):
info->reg_type = PTR_TO_PACKET_END;
return size == sizeof(__u64);
case offsetof(struct sk_reuseport_md, hash):
return size == size_default;
/* Fields that allow narrowing */
case offsetof(struct sk_reuseport_md, eth_protocol):
if (size < FIELD_SIZEOF(struct sk_buff, protocol))
return false;
/* fall through */
case offsetof(struct sk_reuseport_md, ip_protocol):
case offsetof(struct sk_reuseport_md, bind_inany):
case offsetof(struct sk_reuseport_md, len):
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
default:
return false;
}
}
#define SK_REUSEPORT_LOAD_FIELD(F) ({ \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
si->dst_reg, si->src_reg, \
bpf_target_off(struct sk_reuseport_kern, F, \
FIELD_SIZEOF(struct sk_reuseport_kern, F), \
target_size)); \
})
#define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD) \
SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
struct sk_buff, \
skb, \
SKB_FIELD)
#define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \
SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern, \
struct sock, \
sk, \
SK_FIELD, BPF_SIZE, EXTRA_OFF)
static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog,
u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
switch (si->off) {
case offsetof(struct sk_reuseport_md, data):
SK_REUSEPORT_LOAD_SKB_FIELD(data);
break;
case offsetof(struct sk_reuseport_md, len):
SK_REUSEPORT_LOAD_SKB_FIELD(len);
break;
case offsetof(struct sk_reuseport_md, eth_protocol):
SK_REUSEPORT_LOAD_SKB_FIELD(protocol);
break;
case offsetof(struct sk_reuseport_md, ip_protocol):
BUILD_BUG_ON(hweight_long(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
BPF_W, 0);
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
SK_FL_PROTO_SHIFT);
/* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian
* aware. No further narrowing or masking is needed.
*/
*target_size = 1;
break;
case offsetof(struct sk_reuseport_md, data_end):
SK_REUSEPORT_LOAD_FIELD(data_end);
break;
case offsetof(struct sk_reuseport_md, hash):
SK_REUSEPORT_LOAD_FIELD(hash);
break;
case offsetof(struct sk_reuseport_md, bind_inany):
SK_REUSEPORT_LOAD_FIELD(bind_inany);
break;
}
return insn - insn_buf;
}
const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
.get_func_proto = sk_reuseport_func_proto,
.is_valid_access = sk_reuseport_is_valid_access,
.convert_ctx_access = sk_reuseport_convert_ctx_access,
};
const struct bpf_prog_ops sk_reuseport_prog_ops = {
};
#endif /* CONFIG_INET */

View File

@@ -152,7 +152,11 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
!dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_CONTROL) &&
!dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_PORTS))
FLOW_DISSECTOR_KEY_ENC_PORTS) &&
!dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_IP) &&
!dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_OPTS))
return;
info = skb_tunnel_info(skb);
@@ -212,6 +216,31 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
tp->src = key->tp_src;
tp->dst = key->tp_dst;
}
if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
struct flow_dissector_key_ip *ip;
ip = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_IP,
target_container);
ip->tos = key->tos;
ip->ttl = key->ttl;
}
if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) {
struct flow_dissector_key_enc_opts *enc_opt;
enc_opt = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_ENC_OPTS,
target_container);
if (info->options_len) {
enc_opt->len = info->options_len;
ip_tunnel_info_opts_get(enc_opt->data, info);
enc_opt->dst_opt_type = info->key.tun_flags &
TUNNEL_OPTIONS_PRESENT;
}
}
}
EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
@@ -589,7 +618,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
enum flow_dissect_ret fdret;
bool skip_vlan = false;
enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
int num_hdrs = 0;
u8 ip_proto = 0;
bool ret;
@@ -748,14 +777,14 @@ proto_again:
}
case htons(ETH_P_8021AD):
case htons(ETH_P_8021Q): {
const struct vlan_hdr *vlan;
const struct vlan_hdr *vlan = NULL;
struct vlan_hdr _vlan;
bool vlan_tag_present = skb && skb_vlan_tag_present(skb);
__be16 saved_vlan_tpid = proto;
if (vlan_tag_present)
if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX &&
skb && skb_vlan_tag_present(skb)) {
proto = skb->protocol;
if (!vlan_tag_present || eth_type_vlan(skb->protocol)) {
} else {
vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
data, hlen, &_vlan);
if (!vlan) {
@@ -765,20 +794,23 @@ proto_again:
proto = vlan->h_vlan_encapsulated_proto;
nhoff += sizeof(*vlan);
if (skip_vlan) {
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
break;
}
}
skip_vlan = true;
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_VLAN)) {
if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX) {
dissector_vlan = FLOW_DISSECTOR_KEY_VLAN;
} else if (dissector_vlan == FLOW_DISSECTOR_KEY_VLAN) {
dissector_vlan = FLOW_DISSECTOR_KEY_CVLAN;
} else {
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
break;
}
if (dissector_uses_key(flow_dissector, dissector_vlan)) {
key_vlan = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_VLAN,
dissector_vlan,
target_container);
if (vlan_tag_present) {
if (!vlan) {
key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
key_vlan->vlan_priority =
(skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
@@ -789,6 +821,7 @@ proto_again:
(ntohs(vlan->h_vlan_TCI) &
VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
}
key_vlan->vlan_tpid = saved_vlan_tpid;
}
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;

View File

@@ -112,7 +112,7 @@ static void est_timer(struct timer_list *t)
* @bstats: basic statistics
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
* @stats_lock: statistics lock
* @lock: lock for statistics and control path
* @running: qdisc running seqcount
* @opt: rate estimator configuration TLV
*
@@ -128,7 +128,7 @@ static void est_timer(struct timer_list *t)
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *stats_lock,
spinlock_t *lock,
seqcount_t *running,
struct nlattr *opt)
{
@@ -154,19 +154,22 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
seqcount_init(&est->seq);
intvl_log = parm->interval + 2;
est->bstats = bstats;
est->stats_lock = stats_lock;
est->stats_lock = lock;
est->running = running;
est->ewma_log = parm->ewma_log;
est->intvl_log = intvl_log;
est->cpu_bstats = cpu_bstats;
if (stats_lock)
if (lock)
local_bh_disable();
est_fetch_counters(est, &b);
if (stats_lock)
if (lock)
local_bh_enable();
est->last_bytes = b.bytes;
est->last_packets = b.packets;
if (lock)
spin_lock_bh(lock);
old = rcu_dereference_protected(*rate_est, 1);
if (old) {
del_timer_sync(&old->timer);
@@ -179,6 +182,8 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
mod_timer(&est->timer, est->next_jiffies);
rcu_assign_pointer(*rate_est, est);
if (lock)
spin_unlock_bh(lock);
if (old)
kfree_rcu(old, rcu);
return 0;
@@ -209,7 +214,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
* @bstats: basic statistics
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
* @stats_lock: statistics lock
* @lock: lock for statistics and control path
* @running: qdisc running seqcount (might be NULL)
* @opt: rate estimator configuration TLV
*
@@ -221,11 +226,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *stats_lock,
spinlock_t *lock,
seqcount_t *running, struct nlattr *opt)
{
return gen_new_estimator(bstats, cpu_bstats, rate_est,
stats_lock, running, opt);
lock, running, opt);
}
EXPORT_SYMBOL(gen_replace_estimator);

View File

@@ -50,10 +50,8 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
* mixing with BH RCU lock doesn't work.
*/
preempt_disable();
rcu_read_lock();
bpf_compute_data_pointers(skb);
ret = bpf_prog_run_save_cb(lwt->prog, skb);
rcu_read_unlock();
switch (ret) {
case BPF_OK:

View File

@@ -1148,7 +1148,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
neigh->nud_state = new;
err = 0;
notify = old & NUD_VALID;
if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
if (((old & (NUD_INCOMPLETE | NUD_PROBE)) ||
(flags & NEIGH_UPDATE_F_ADMIN)) &&
(new & NUD_FAILED)) {
neigh_invalidate(neigh);
notify = 1;
@@ -3273,4 +3274,3 @@ static int __init neigh_init(void)
}
subsys_initcall(neigh_init);

View File

@@ -26,6 +26,7 @@
#include <linux/pm_runtime.h>
#include <linux/of.h>
#include <linux/of_net.h>
#include <linux/cpu.h>
#include "net-sysfs.h"
@@ -905,11 +906,20 @@ static const void *rx_queue_namespace(struct kobject *kobj)
return ns;
}
static void rx_queue_get_ownership(struct kobject *kobj,
kuid_t *uid, kgid_t *gid)
{
const struct net *net = rx_queue_namespace(kobj);
net_ns_get_ownership(net, uid, gid);
}
static struct kobj_type rx_queue_ktype __ro_after_init = {
.sysfs_ops = &rx_queue_sysfs_ops,
.release = rx_queue_release,
.default_attrs = rx_queue_default_attrs,
.namespace = rx_queue_namespace
.namespace = rx_queue_namespace,
.get_ownership = rx_queue_get_ownership,
};
static int rx_queue_add_kobject(struct net_device *dev, int index)
@@ -1047,13 +1057,30 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
char *buf)
{
struct net_device *dev = queue->dev;
int index = get_netdev_queue_index(queue);
int tc = netdev_txq_to_tc(dev, index);
int index;
int tc;
if (!netif_is_multiqueue(dev))
return -ENOENT;
index = get_netdev_queue_index(queue);
/* If queue belongs to subordinate dev use its TC mapping */
dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
tc = netdev_txq_to_tc(dev, index);
if (tc < 0)
return -EINVAL;
return sprintf(buf, "%u\n", tc);
/* We can report the traffic class one of two ways:
* Subordinate device traffic classes are reported with the traffic
* class first, and then the subordinate class so for example TC0 on
* subordinate device 2 will be reported as "0-2". If the queue
* belongs to the root device it will be reported with just the
* traffic class, so just "0" for TC 0 for example.
*/
return dev->num_tc < 0 ? sprintf(buf, "%u%d\n", tc, dev->num_tc) :
sprintf(buf, "%u\n", tc);
}
#ifdef CONFIG_XPS
@@ -1070,6 +1097,9 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue,
int err, index = get_netdev_queue_index(queue);
u32 rate = 0;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
err = kstrtou32(buf, 10, &rate);
if (err < 0)
return err;
@@ -1214,10 +1244,20 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
cpumask_var_t mask;
unsigned long index;
if (!netif_is_multiqueue(dev))
return -ENOENT;
index = get_netdev_queue_index(queue);
if (dev->num_tc) {
/* Do not allow XPS on subordinate device directly */
num_tc = dev->num_tc;
if (num_tc < 0)
return -EINVAL;
/* If queue belongs to subordinate dev use its map */
dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
tc = netdev_txq_to_tc(dev, index);
if (tc < 0)
return -EINVAL;
@@ -1227,13 +1267,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
return -ENOMEM;
rcu_read_lock();
dev_maps = rcu_dereference(dev->xps_maps);
dev_maps = rcu_dereference(dev->xps_cpus_map);
if (dev_maps) {
for_each_possible_cpu(cpu) {
int i, tci = cpu * num_tc + tc;
struct xps_map *map;
map = rcu_dereference(dev_maps->cpu_map[tci]);
map = rcu_dereference(dev_maps->attr_map[tci]);
if (!map)
continue;
@@ -1260,6 +1300,9 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
cpumask_var_t mask;
int err;
if (!netif_is_multiqueue(dev))
return -ENOENT;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -1283,6 +1326,91 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
= __ATTR_RW(xps_cpus);
static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
{
struct net_device *dev = queue->dev;
struct xps_dev_maps *dev_maps;
unsigned long *mask, index;
int j, len, num_tc = 1, tc = 0;
index = get_netdev_queue_index(queue);
if (dev->num_tc) {
num_tc = dev->num_tc;
tc = netdev_txq_to_tc(dev, index);
if (tc < 0)
return -EINVAL;
}
mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
GFP_KERNEL);
if (!mask)
return -ENOMEM;
rcu_read_lock();
dev_maps = rcu_dereference(dev->xps_rxqs_map);
if (!dev_maps)
goto out_no_maps;
for (j = -1; j = netif_attrmask_next(j, NULL, dev->num_rx_queues),
j < dev->num_rx_queues;) {
int i, tci = j * num_tc + tc;
struct xps_map *map;
map = rcu_dereference(dev_maps->attr_map[tci]);
if (!map)
continue;
for (i = map->len; i--;) {
if (map->queues[i] == index) {
set_bit(j, mask);
break;
}
}
}
out_no_maps:
rcu_read_unlock();
len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues);
kfree(mask);
return len < PAGE_SIZE ? len : -EINVAL;
}
static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
size_t len)
{
struct net_device *dev = queue->dev;
struct net *net = dev_net(dev);
unsigned long *mask, index;
int err;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
GFP_KERNEL);
if (!mask)
return -ENOMEM;
index = get_netdev_queue_index(queue);
err = bitmap_parse(buf, len, mask, dev->num_rx_queues);
if (err) {
kfree(mask);
return err;
}
cpus_read_lock();
err = __netif_set_xps_queue(dev, mask, index, true);
cpus_read_unlock();
kfree(mask);
return err ? : len;
}
static struct netdev_queue_attribute xps_rxqs_attribute __ro_after_init
= __ATTR_RW(xps_rxqs);
#endif /* CONFIG_XPS */
static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
@@ -1290,6 +1418,7 @@ static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
&queue_traffic_class.attr,
#ifdef CONFIG_XPS
&xps_cpus_attribute.attr,
&xps_rxqs_attribute.attr,
&queue_tx_maxrate.attr,
#endif
NULL
@@ -1315,11 +1444,20 @@ static const void *netdev_queue_namespace(struct kobject *kobj)
return ns;
}
static void netdev_queue_get_ownership(struct kobject *kobj,
kuid_t *uid, kgid_t *gid)
{
const struct net *net = netdev_queue_namespace(kobj);
net_ns_get_ownership(net, uid, gid);
}
static struct kobj_type netdev_queue_ktype __ro_after_init = {
.sysfs_ops = &netdev_queue_sysfs_ops,
.release = netdev_queue_release,
.default_attrs = netdev_queue_default_attrs,
.namespace = netdev_queue_namespace,
.get_ownership = netdev_queue_get_ownership,
};
static int netdev_queue_add_kobject(struct net_device *dev, int index)
@@ -1509,6 +1647,14 @@ static const void *net_namespace(struct device *d)
return dev_net(dev);
}
static void net_get_ownership(struct device *d, kuid_t *uid, kgid_t *gid)
{
struct net_device *dev = to_net_dev(d);
const struct net *net = dev_net(dev);
net_ns_get_ownership(net, uid, gid);
}
static struct class net_class __ro_after_init = {
.name = "net",
.dev_release = netdev_release,
@@ -1516,6 +1662,7 @@ static struct class net_class __ro_after_init = {
.dev_uevent = netdev_uevent,
.ns_type = &net_ns_type_operations,
.namespace = net_namespace,
.get_ownership = net_get_ownership,
};
#ifdef CONFIG_OF_NET

View File

@@ -17,6 +17,7 @@
#include <linux/user_namespace.h>
#include <linux/net_namespace.h>
#include <linux/sched/task.h>
#include <linux/uidgid.h>
#include <net/sock.h>
#include <net/netlink.h>
@@ -448,6 +449,33 @@ dec_ucounts:
return net;
}
/**
* net_ns_get_ownership - get sysfs ownership data for @net
* @net: network namespace in question (can be NULL)
* @uid: kernel user ID for sysfs objects
* @gid: kernel group ID for sysfs objects
*
* Returns the uid/gid pair of root in the user namespace associated with the
* given network namespace.
*/
void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
{
if (net) {
kuid_t ns_root_uid = make_kuid(net->user_ns, 0);
kgid_t ns_root_gid = make_kgid(net->user_ns, 0);
if (uid_valid(ns_root_uid))
*uid = ns_root_uid;
if (gid_valid(ns_root_gid))
*gid = ns_root_gid;
} else {
*uid = GLOBAL_ROOT_UID;
*gid = GLOBAL_ROOT_GID;
}
}
EXPORT_SYMBOL_GPL(net_ns_get_ownership);
static void unhash_nsid(struct net *net, struct net *last)
{
struct net *tmp;

View File

@@ -1265,7 +1265,7 @@ static ssize_t pktgen_if_write(struct file *file,
buf[len] = 0;
if (strcmp(buf, pkt_dev->dst_min) != 0) {
memset(pkt_dev->dst_min, 0, sizeof(pkt_dev->dst_min));
strncpy(pkt_dev->dst_min, buf, len);
strcpy(pkt_dev->dst_min, buf);
pkt_dev->daddr_min = in_aton(pkt_dev->dst_min);
pkt_dev->cur_daddr = pkt_dev->daddr_min;
}
@@ -1280,14 +1280,12 @@ static ssize_t pktgen_if_write(struct file *file,
if (len < 0)
return len;
if (copy_from_user(buf, &user_buffer[i], len))
return -EFAULT;
buf[len] = 0;
if (strcmp(buf, pkt_dev->dst_max) != 0) {
memset(pkt_dev->dst_max, 0, sizeof(pkt_dev->dst_max));
strncpy(pkt_dev->dst_max, buf, len);
strcpy(pkt_dev->dst_max, buf);
pkt_dev->daddr_max = in_aton(pkt_dev->dst_max);
pkt_dev->cur_daddr = pkt_dev->daddr_max;
}
@@ -1396,7 +1394,7 @@ static ssize_t pktgen_if_write(struct file *file,
buf[len] = 0;
if (strcmp(buf, pkt_dev->src_min) != 0) {
memset(pkt_dev->src_min, 0, sizeof(pkt_dev->src_min));
strncpy(pkt_dev->src_min, buf, len);
strcpy(pkt_dev->src_min, buf);
pkt_dev->saddr_min = in_aton(pkt_dev->src_min);
pkt_dev->cur_saddr = pkt_dev->saddr_min;
}
@@ -1416,7 +1414,7 @@ static ssize_t pktgen_if_write(struct file *file,
buf[len] = 0;
if (strcmp(buf, pkt_dev->src_max) != 0) {
memset(pkt_dev->src_max, 0, sizeof(pkt_dev->src_max));
strncpy(pkt_dev->src_max, buf, len);
strcpy(pkt_dev->src_max, buf);
pkt_dev->saddr_max = in_aton(pkt_dev->src_max);
pkt_dev->cur_saddr = pkt_dev->saddr_max;
}
@@ -2255,7 +2253,7 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
x = xfrm_state_lookup_byspi(pn->net, htonl(pkt_dev->spi), AF_INET);
} else {
/* slow path: we dont already have xfrm_state */
x = xfrm_stateonly_find(pn->net, DUMMY_MARK,
x = xfrm_stateonly_find(pn->net, DUMMY_MARK, 0,
(xfrm_address_t *)&pkt_dev->cur_daddr,
(xfrm_address_t *)&pkt_dev->cur_saddr,
AF_INET,

View File

@@ -964,7 +964,8 @@ static size_t rtnl_xdp_size(void)
{
size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */
nla_total_size(1) + /* XDP_ATTACHED */
nla_total_size(4); /* XDP_PROG_ID */
nla_total_size(4) + /* XDP_PROG_ID (or 1st mode) */
nla_total_size(4); /* XDP_<mode>_PROG_ID */
return xdp_size;
}
@@ -1014,6 +1015,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_IF_NETNSID */
+ nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */
+ nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */
+ nla_total_size(4) /* IFLA_MIN_MTU */
+ nla_total_size(4) /* IFLA_MAX_MTU */
+ 0;
}
@@ -1353,27 +1356,51 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
return 0;
}
static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
static u32 rtnl_xdp_prog_skb(struct net_device *dev)
{
const struct net_device_ops *ops = dev->netdev_ops;
const struct bpf_prog *generic_xdp_prog;
struct netdev_bpf xdp;
ASSERT_RTNL();
*prog_id = 0;
generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
if (generic_xdp_prog) {
*prog_id = generic_xdp_prog->aux->id;
return XDP_ATTACHED_SKB;
}
if (!ops->ndo_bpf)
return XDP_ATTACHED_NONE;
if (!generic_xdp_prog)
return 0;
return generic_xdp_prog->aux->id;
}
__dev_xdp_query(dev, ops->ndo_bpf, &xdp);
*prog_id = xdp.prog_id;
static u32 rtnl_xdp_prog_drv(struct net_device *dev)
{
return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG);
}
return xdp.prog_attached;
static u32 rtnl_xdp_prog_hw(struct net_device *dev)
{
return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
XDP_QUERY_PROG_HW);
}
static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev,
u32 *prog_id, u8 *mode, u8 tgt_mode, u32 attr,
u32 (*get_prog_id)(struct net_device *dev))
{
u32 curr_id;
int err;
curr_id = get_prog_id(dev);
if (!curr_id)
return 0;
*prog_id = curr_id;
err = nla_put_u32(skb, attr, curr_id);
if (err)
return err;
if (*mode != XDP_ATTACHED_NONE)
*mode = XDP_ATTACHED_MULTI;
else
*mode = tgt_mode;
return 0;
}
static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1381,17 +1408,32 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
struct nlattr *xdp;
u32 prog_id;
int err;
u8 mode;
xdp = nla_nest_start(skb, IFLA_XDP);
if (!xdp)
return -EMSGSIZE;
err = nla_put_u8(skb, IFLA_XDP_ATTACHED,
rtnl_xdp_attached_mode(dev, &prog_id));
prog_id = 0;
mode = XDP_ATTACHED_NONE;
err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB,
IFLA_XDP_SKB_PROG_ID, rtnl_xdp_prog_skb);
if (err)
goto err_cancel;
err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV,
IFLA_XDP_DRV_PROG_ID, rtnl_xdp_prog_drv);
if (err)
goto err_cancel;
err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW,
IFLA_XDP_HW_PROG_ID, rtnl_xdp_prog_hw);
if (err)
goto err_cancel;
if (prog_id) {
err = nla_put_u8(skb, IFLA_XDP_ATTACHED, mode);
if (err)
goto err_cancel;
if (prog_id && mode != XDP_ATTACHED_MULTI) {
err = nla_put_u32(skb, IFLA_XDP_PROG_ID, prog_id);
if (err)
goto err_cancel;
@@ -1561,6 +1603,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
netif_running(dev) ? dev->operstate : IF_OPER_DOWN) ||
nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) ||
nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
nla_put_u32(skb, IFLA_MIN_MTU, dev->min_mtu) ||
nla_put_u32(skb, IFLA_MAX_MTU, dev->max_mtu) ||
nla_put_u32(skb, IFLA_GROUP, dev->group) ||
nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
@@ -1692,6 +1736,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_IF_NETNSID] = { .type = NLA_S32 },
[IFLA_CARRIER_UP_COUNT] = { .type = NLA_U32 },
[IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
[IFLA_MIN_MTU] = { .type = NLA_U32 },
[IFLA_MAX_MTU] = { .type = NLA_U32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2336,7 +2382,7 @@ static int do_setlink(const struct sk_buff *skb,
}
if (tb[IFLA_MTU]) {
err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
err = dev_set_mtu_ext(dev, nla_get_u32(tb[IFLA_MTU]), extack);
if (err < 0)
goto errout;
status |= DO_SETLINK_MODIFIED;

View File

@@ -1291,7 +1291,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
}
EXPORT_SYMBOL(skb_clone);
static void skb_headers_offset_update(struct sk_buff *skb, int off)
void skb_headers_offset_update(struct sk_buff *skb, int off)
{
/* Only adjust this if it actually is csum_start rather than csum */
if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -1305,6 +1305,7 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off)
skb->inner_network_header += off;
skb->inner_mac_header += off;
}
EXPORT_SYMBOL(skb_headers_offset_update);
void skb_copy_header(struct sk_buff *new, const struct sk_buff *old)
{
@@ -1715,7 +1716,7 @@ void *skb_push(struct sk_buff *skb, unsigned int len)
{
skb->data -= len;
skb->len += len;
if (unlikely(skb->data<skb->head))
if (unlikely(skb->data < skb->head))
skb_under_panic(skb, len, __builtin_return_address(0));
return skb->data;
}
@@ -2858,23 +2859,27 @@ EXPORT_SYMBOL(skb_queue_purge);
/**
* skb_rbtree_purge - empty a skb rbtree
* @root: root of the rbtree to empty
* Return value: the sum of truesizes of all purged skbs.
*
* Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
* the list and one reference dropped. This function does not take
* any lock. Synchronization should be handled by the caller (e.g., TCP
* out-of-order queue is protected by the socket lock).
*/
void skb_rbtree_purge(struct rb_root *root)
unsigned int skb_rbtree_purge(struct rb_root *root)
{
struct rb_node *p = rb_first(root);
unsigned int sum = 0;
while (p) {
struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
p = rb_next(p);
rb_erase(&skb->rbnode, root);
sum += skb->truesize;
kfree_skb(skb);
}
return sum;
}
/**
@@ -3816,14 +3821,14 @@ err:
}
EXPORT_SYMBOL_GPL(skb_segment);
int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
{
struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb);
unsigned int offset = skb_gro_offset(skb);
unsigned int headlen = skb_headlen(skb);
unsigned int len = skb_gro_len(skb);
struct sk_buff *lp, *p = *head;
unsigned int delta_truesize;
struct sk_buff *lp;
if (unlikely(p->len + len >= 65536))
return -E2BIG;
@@ -4899,7 +4904,6 @@ EXPORT_SYMBOL(skb_try_coalesce);
*/
void skb_scrub_packet(struct sk_buff *skb, bool xnet)
{
skb->tstamp = 0;
skb->pkt_type = PACKET_HOST;
skb->skb_iif = 0;
skb->ignore_df = 0;
@@ -4912,8 +4916,8 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
return;
ipvs_reset(skb);
skb_orphan(skb);
skb->mark = 0;
skb->tstamp = 0;
}
EXPORT_SYMBOL_GPL(skb_scrub_packet);

View File

@@ -91,6 +91,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <asm/unaligned.h>
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/errqueue.h>
@@ -249,58 +250,13 @@ static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
_sock_locks("k-clock-")
};
static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
"rlock-AF_UNSPEC", "rlock-AF_UNIX" , "rlock-AF_INET" ,
"rlock-AF_AX25" , "rlock-AF_IPX" , "rlock-AF_APPLETALK",
"rlock-AF_NETROM", "rlock-AF_BRIDGE" , "rlock-AF_ATMPVC" ,
"rlock-AF_X25" , "rlock-AF_INET6" , "rlock-AF_ROSE" ,
"rlock-AF_DECnet", "rlock-AF_NETBEUI" , "rlock-AF_SECURITY" ,
"rlock-AF_KEY" , "rlock-AF_NETLINK" , "rlock-AF_PACKET" ,
"rlock-AF_ASH" , "rlock-AF_ECONET" , "rlock-AF_ATMSVC" ,
"rlock-AF_RDS" , "rlock-AF_SNA" , "rlock-AF_IRDA" ,
"rlock-AF_PPPOX" , "rlock-AF_WANPIPE" , "rlock-AF_LLC" ,
"rlock-27" , "rlock-28" , "rlock-AF_CAN" ,
"rlock-AF_TIPC" , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV" ,
"rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" ,
"rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" ,
"rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" ,
"rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_XDP" ,
"rlock-AF_MAX"
_sock_locks("rlock-")
};
static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
"wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" ,
"wlock-AF_AX25" , "wlock-AF_IPX" , "wlock-AF_APPLETALK",
"wlock-AF_NETROM", "wlock-AF_BRIDGE" , "wlock-AF_ATMPVC" ,
"wlock-AF_X25" , "wlock-AF_INET6" , "wlock-AF_ROSE" ,
"wlock-AF_DECnet", "wlock-AF_NETBEUI" , "wlock-AF_SECURITY" ,
"wlock-AF_KEY" , "wlock-AF_NETLINK" , "wlock-AF_PACKET" ,
"wlock-AF_ASH" , "wlock-AF_ECONET" , "wlock-AF_ATMSVC" ,
"wlock-AF_RDS" , "wlock-AF_SNA" , "wlock-AF_IRDA" ,
"wlock-AF_PPPOX" , "wlock-AF_WANPIPE" , "wlock-AF_LLC" ,
"wlock-27" , "wlock-28" , "wlock-AF_CAN" ,
"wlock-AF_TIPC" , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV" ,
"wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" ,
"wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" ,
"wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" ,
"wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_XDP" ,
"wlock-AF_MAX"
_sock_locks("wlock-")
};
static const char *const af_family_elock_key_strings[AF_MAX+1] = {
"elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" ,
"elock-AF_AX25" , "elock-AF_IPX" , "elock-AF_APPLETALK",
"elock-AF_NETROM", "elock-AF_BRIDGE" , "elock-AF_ATMPVC" ,
"elock-AF_X25" , "elock-AF_INET6" , "elock-AF_ROSE" ,
"elock-AF_DECnet", "elock-AF_NETBEUI" , "elock-AF_SECURITY" ,
"elock-AF_KEY" , "elock-AF_NETLINK" , "elock-AF_PACKET" ,
"elock-AF_ASH" , "elock-AF_ECONET" , "elock-AF_ATMSVC" ,
"elock-AF_RDS" , "elock-AF_SNA" , "elock-AF_IRDA" ,
"elock-AF_PPPOX" , "elock-AF_WANPIPE" , "elock-AF_LLC" ,
"elock-27" , "elock-28" , "elock-AF_CAN" ,
"elock-AF_TIPC" , "elock-AF_BLUETOOTH", "elock-AF_IUCV" ,
"elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" ,
"elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" ,
"elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" ,
"elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_XDP" ,
"elock-AF_MAX"
_sock_locks("elock-")
};
/*
@@ -697,6 +653,7 @@ EXPORT_SYMBOL(sk_mc_loop);
int sock_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
struct sock_txtime sk_txtime;
struct sock *sk = sock->sk;
int val;
int valbool;
@@ -1070,6 +1027,26 @@ set_rcvbuf:
}
break;
case SO_TXTIME:
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
ret = -EPERM;
} else if (optlen != sizeof(struct sock_txtime)) {
ret = -EINVAL;
} else if (copy_from_user(&sk_txtime, optval,
sizeof(struct sock_txtime))) {
ret = -EFAULT;
} else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
ret = -EINVAL;
} else {
sock_valbool_flag(sk, SOCK_TXTIME, true);
sk->sk_clockid = sk_txtime.clockid;
sk->sk_txtime_deadline_mode =
!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
sk->sk_txtime_report_errors =
!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
}
break;
default:
ret = -ENOPROTOOPT;
break;
@@ -1115,6 +1092,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
u64 val64;
struct linger ling;
struct timeval tm;
struct sock_txtime txtime;
} v;
int lv = sizeof(int);
@@ -1403,6 +1381,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sock_flag(sk, SOCK_ZEROCOPY);
break;
case SO_TXTIME:
lv = sizeof(v.txtime);
v.txtime.clockid = sk->sk_clockid;
v.txtime.flags |= sk->sk_txtime_deadline_mode ?
SOF_TXTIME_DEADLINE_MODE : 0;
v.txtime.flags |= sk->sk_txtime_report_errors ?
SOF_TXTIME_REPORT_ERRORS : 0;
break;
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -2137,6 +2124,13 @@ int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
sockc->tsflags |= tsflags;
break;
case SCM_TXTIME:
if (!sock_flag(sk, SOCK_TXTIME))
return -EINVAL;
if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
return -EINVAL;
sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
break;
/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
case SCM_RIGHTS:
case SCM_CREDENTIALS:
@@ -2401,9 +2395,10 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
{
struct proto *prot = sk->sk_prot;
long allocated = sk_memory_allocated_add(sk, amt);
bool charged = true;
if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
!mem_cgroup_charge_skmem(sk->sk_memcg, amt))
!(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt)))
goto suppress_allocation;
/* Under limit. */
@@ -2461,7 +2456,8 @@ suppress_allocation:
return 1;
}
trace_sock_exceed_buf_limit(sk, prot, allocated);
if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
sk_memory_allocated_sub(sk, amt);
@@ -2818,6 +2814,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_pacing_rate = ~0U;
sk->sk_pacing_shift = 10;
sk->sk_incoming_cpu = -1;
sk_rx_queue_clear(sk);
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
@@ -2902,8 +2900,8 @@ EXPORT_SYMBOL(lock_sock_fast);
int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
{
struct timeval tv;
if (!sock_flag(sk, SOCK_TIMESTAMP))
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
tv = ktime_to_timeval(sk->sk_stamp);
if (tv.tv_sec == -1)
return -ENOENT;
@@ -2918,8 +2916,8 @@ EXPORT_SYMBOL(sock_get_timestamp);
int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
{
struct timespec ts;
if (!sock_flag(sk, SOCK_TIMESTAMP))
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
ts = ktime_to_timespec(sk->sk_stamp);
if (ts.tv_sec == -1)
return -ENOENT;

View File

@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/tcp.h>
#include <linux/workqueue.h>
#include <linux/nospec.h>
#include <linux/inet_diag.h>
#include <linux/sock_diag.h>
@@ -218,6 +219,7 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
if (req->sdiag_family >= AF_MAX)
return -EINVAL;
req->sdiag_family = array_index_nospec(req->sdiag_family, AF_MAX);
if (sock_diag_handlers[req->sdiag_family] == NULL)
sock_load_diag_module(req->sdiag_family, 0);

View File

@@ -8,11 +8,34 @@
#include <net/sock_reuseport.h>
#include <linux/bpf.h>
#include <linux/idr.h>
#include <linux/filter.h>
#include <linux/rcupdate.h>
#define INIT_SOCKS 128
static DEFINE_SPINLOCK(reuseport_lock);
DEFINE_SPINLOCK(reuseport_lock);
#define REUSEPORT_MIN_ID 1
static DEFINE_IDA(reuseport_ida);
int reuseport_get_id(struct sock_reuseport *reuse)
{
int id;
if (reuse->reuseport_id)
return reuse->reuseport_id;
id = ida_simple_get(&reuseport_ida, REUSEPORT_MIN_ID, 0,
/* Called under reuseport_lock */
GFP_ATOMIC);
if (id < 0)
return id;
reuse->reuseport_id = id;
return reuse->reuseport_id;
}
static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
{
@@ -29,7 +52,7 @@ static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
return reuse;
}
int reuseport_alloc(struct sock *sk)
int reuseport_alloc(struct sock *sk, bool bind_inany)
{
struct sock_reuseport *reuse;
@@ -41,9 +64,17 @@ int reuseport_alloc(struct sock *sk)
/* Allocation attempts can occur concurrently via the setsockopt path
* and the bind/hash path. Nothing to do when we lose the race.
*/
if (rcu_dereference_protected(sk->sk_reuseport_cb,
lockdep_is_held(&reuseport_lock)))
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
lockdep_is_held(&reuseport_lock));
if (reuse) {
/* Only set reuse->bind_inany if the bind_inany is true.
* Otherwise, it will overwrite the reuse->bind_inany
* which was set by the bind/hash path.
*/
if (bind_inany)
reuse->bind_inany = bind_inany;
goto out;
}
reuse = __reuseport_alloc(INIT_SOCKS);
if (!reuse) {
@@ -53,6 +84,7 @@ int reuseport_alloc(struct sock *sk)
reuse->socks[0] = sk;
reuse->num_socks = 1;
reuse->bind_inany = bind_inany;
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
out:
@@ -78,9 +110,12 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
more_reuse->max_socks = more_socks_size;
more_reuse->num_socks = reuse->num_socks;
more_reuse->prog = reuse->prog;
more_reuse->reuseport_id = reuse->reuseport_id;
more_reuse->bind_inany = reuse->bind_inany;
memcpy(more_reuse->socks, reuse->socks,
reuse->num_socks * sizeof(struct sock *));
more_reuse->synq_overflow_ts = READ_ONCE(reuse->synq_overflow_ts);
for (i = 0; i < reuse->num_socks; ++i)
rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
@@ -99,8 +134,9 @@ static void reuseport_free_rcu(struct rcu_head *head)
struct sock_reuseport *reuse;
reuse = container_of(head, struct sock_reuseport, rcu);
if (reuse->prog)
bpf_prog_destroy(reuse->prog);
sk_reuseport_prog_free(rcu_dereference_protected(reuse->prog, 1));
if (reuse->reuseport_id)
ida_simple_remove(&reuseport_ida, reuse->reuseport_id);
kfree(reuse);
}
@@ -110,12 +146,12 @@ static void reuseport_free_rcu(struct rcu_head *head)
* @sk2: Socket belonging to the existing reuseport group.
* May return ENOMEM and not add socket to group under memory pressure.
*/
int reuseport_add_sock(struct sock *sk, struct sock *sk2)
int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
{
struct sock_reuseport *old_reuse, *reuse;
if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
int err = reuseport_alloc(sk2);
int err = reuseport_alloc(sk2, bind_inany);
if (err)
return err;
@@ -160,6 +196,14 @@ void reuseport_detach_sock(struct sock *sk)
spin_lock_bh(&reuseport_lock);
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
lockdep_is_held(&reuseport_lock));
/* At least one of the sk in this reuseport group is added to
* a bpf map. Notify the bpf side. The bpf map logic will
* remove the sk if it is indeed added to a bpf map.
*/
if (reuse->reuseport_id)
bpf_sk_reuseport_detach(sk);
rcu_assign_pointer(sk->sk_reuseport_cb, NULL);
for (i = 0; i < reuse->num_socks; i++) {
@@ -175,9 +219,9 @@ void reuseport_detach_sock(struct sock *sk)
}
EXPORT_SYMBOL(reuseport_detach_sock);
static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks,
struct bpf_prog *prog, struct sk_buff *skb,
int hdr_len)
static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
struct bpf_prog *prog, struct sk_buff *skb,
int hdr_len)
{
struct sk_buff *nskb = NULL;
u32 index;
@@ -238,9 +282,15 @@ struct sock *reuseport_select_sock(struct sock *sk,
/* paired with smp_wmb() in reuseport_add_sock() */
smp_rmb();
if (prog && skb)
sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
if (!prog || !skb)
goto select_by_hash;
if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, hash);
else
sk2 = run_bpf_filter(reuse, socks, prog, skb, hdr_len);
select_by_hash:
/* no bpf or invalid bpf result: fall back to hash usage */
if (!sk2)
sk2 = reuse->socks[reciprocal_scale(hash, socks)];
@@ -252,12 +302,21 @@ out:
}
EXPORT_SYMBOL(reuseport_select_sock);
struct bpf_prog *
reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
{
struct sock_reuseport *reuse;
struct bpf_prog *old_prog;
if (sk_unhashed(sk) && sk->sk_reuseport) {
int err = reuseport_alloc(sk, false);
if (err)
return err;
} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
/* The socket wasn't bound with SO_REUSEPORT */
return -EINVAL;
}
spin_lock_bh(&reuseport_lock);
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
lockdep_is_held(&reuseport_lock));
@@ -266,6 +325,7 @@ reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
rcu_assign_pointer(reuse->prog, prog);
spin_unlock_bh(&reuseport_lock);
return old_prog;
sk_reuseport_prog_free(old_prog);
return 0;
}
EXPORT_SYMBOL(reuseport_attach_prog);

View File

@@ -397,7 +397,7 @@ int inet_pton_with_scope(struct net *net, __kernel_sa_family_t af,
break;
default:
pr_err("unexpected address family %d\n", af);
};
}
return ret;
}

View File

@@ -3,8 +3,11 @@
* Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
* Released under terms in GPL version 2. See COPYING.
*/
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/netdevice.h>
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/rhashtable.h>
@@ -45,8 +48,8 @@ static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
!= sizeof(u32));
/* Use cyclic increasing ID as direct hash key, see rht_bucket_index */
return key << RHT_HASH_RESERVED_SPACE;
/* Use cyclic increasing ID as direct hash key */
return key;
}
static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg,
@@ -327,10 +330,12 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
page = virt_to_head_page(data);
if (xa)
if (xa) {
napi_direct &= !xdp_return_frame_no_direct();
page_pool_put_page(xa->page_pool, page, napi_direct);
else
} else {
put_page(page);
}
rcu_read_unlock();
break;
case MEM_TYPE_PAGE_SHARED:
@@ -345,8 +350,7 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
rcu_read_lock();
/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
if (!WARN_ON_ONCE(!xa))
xa->zc_alloc->free(xa->zc_alloc, handle);
xa->zc_alloc->free(xa->zc_alloc, handle);
rcu_read_unlock();
default:
/* Not possible, checked in xdp_rxq_info_reg_mem_model() */
@@ -371,3 +375,34 @@ void xdp_return_buff(struct xdp_buff *xdp)
__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle);
}
EXPORT_SYMBOL_GPL(xdp_return_buff);
int xdp_attachment_query(struct xdp_attachment_info *info,
struct netdev_bpf *bpf)
{
bpf->prog_id = info->prog ? info->prog->aux->id : 0;
bpf->prog_flags = info->prog ? info->flags : 0;
return 0;
}
EXPORT_SYMBOL_GPL(xdp_attachment_query);
bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
struct netdev_bpf *bpf)
{
if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) {
NL_SET_ERR_MSG(bpf->extack,
"program loaded with different flags");
return false;
}
return true;
}
EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok);
void xdp_attachment_setup(struct xdp_attachment_info *info,
struct netdev_bpf *bpf)
{
if (info->prog)
bpf_prog_put(info->prog);
info->prog = bpf->prog;
info->flags = bpf->flags;
}
EXPORT_SYMBOL_GPL(xdp_attachment_setup);