Alexei Starovoitov says:

====================
pull-request: bpf-next 2022-03-21 v2

We've added 137 non-merge commits during the last 17 day(s) which contain
a total of 143 files changed, 7123 insertions(+), 1092 deletions(-).

The main changes are:

1) Custom SEC() handling in libbpf, from Andrii.

2) subskeleton support, from Delyan.

3) Use btf_tag to recognize __percpu pointers in the verifier, from Hao.

4) Fix net.core.bpf_jit_harden race, from Hou.

5) Fix bpf_sk_lookup remote_port on big-endian, from Jakub.

6) Introduce fprobe (multi kprobe) _without_ arch bits, from Masami.
The arch specific bits will come later.

7) Introduce multi_kprobe bpf programs on top of fprobe, from Jiri.

8) Enable non-atomic allocations in local storage, from Joanne.

9) Various var_off ptr_to_btf_id fixed, from Kumar.

10) bpf_ima_file_hash helper, from Roberto.

11) Add "live packet" mode for XDP in BPF_PROG_RUN, from Toke.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (137 commits)
  selftests/bpf: Fix kprobe_multi test.
  Revert "rethook: x86: Add rethook x86 implementation"
  Revert "arm64: rethook: Add arm64 rethook implementation"
  Revert "powerpc: Add rethook support"
  Revert "ARM: rethook: Add rethook arm implementation"
  bpftool: Fix a bug in subskeleton code generation
  bpf: Fix bpf_prog_pack when PMU_SIZE is not defined
  bpf: Fix bpf_prog_pack for multi-node setup
  bpf: Fix warning for cast from restricted gfp_t in verifier
  bpf, arm: Fix various typos in comments
  libbpf: Close fd in bpf_object__reuse_map
  bpftool: Fix print error when show bpf map
  bpf: Fix kprobe_multi return probe backtrace
  Revert "bpf: Add support to inline bpf_get_func_ip helper on x86"
  bpf: Simplify check in btf_parse_hdr()
  selftests/bpf/test_lirc_mode2.sh: Exit with proper code
  bpf: Check for NULL return from bpf_get_btf_vmlinux
  selftests/bpf: Test skipping stacktrace
  bpf: Adjust BPF stack helper functions to accommodate skip > 0
  bpf: Select proper size for bpf_prog_pack
  ...
====================

Link: https://lore.kernel.org/r/20220322050159.5507-1-alexei.starovoitov@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2022-03-22 10:36:56 -07:00
143 changed files with 7139 additions and 1108 deletions

View File

@@ -141,7 +141,7 @@ static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
if (sock) {
sdata = bpf_local_storage_update(
sock->sk, (struct bpf_local_storage_map *)map, value,
map_flags);
map_flags, GFP_ATOMIC);
sockfd_put(sock);
return PTR_ERR_OR_ZERO(sdata);
}
@@ -172,7 +172,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
{
struct bpf_local_storage_elem *copy_selem;
copy_selem = bpf_selem_alloc(smap, newsk, NULL, true);
copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC);
if (!copy_selem)
return NULL;
@@ -230,7 +230,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
bpf_selem_link_map(smap, copy_selem);
bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
} else {
ret = bpf_local_storage_alloc(newsk, smap, copy_selem);
ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
if (ret) {
kfree(copy_selem);
atomic_sub(smap->elem_size,
@@ -255,8 +255,9 @@ out:
return ret;
}
BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags)
/* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags, gfp_t, gfp_flags)
{
struct bpf_local_storage_data *sdata;
@@ -277,7 +278,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
refcount_inc_not_zero(&sk->sk_refcnt)) {
sdata = bpf_local_storage_update(
sk, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST);
BPF_NOEXIST, gfp_flags);
/* sk must be a fullsock (guaranteed by verifier),
* so sock_gen_put() is unnecessary.
*/
@@ -405,6 +406,8 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
btf_vmlinux = bpf_get_btf_vmlinux();
if (IS_ERR_OR_NULL(btf_vmlinux))
return false;
btf_id = prog->aux->attach_btf_id;
t = btf_type_by_id(btf_vmlinux, btf_id);
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
@@ -417,14 +420,16 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
return false;
}
BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags)
/* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags, gfp_t, gfp_flags)
{
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (in_hardirq() || in_nmi())
return (unsigned long)NULL;
return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags);
return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags,
gfp_flags);
}
BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,

View File

@@ -7388,36 +7388,36 @@ static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = {
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_3(bpf_skb_set_delivery_time, struct sk_buff *, skb,
u64, dtime, u32, dtime_type)
BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb,
u64, tstamp, u32, tstamp_type)
{
/* skb_clear_delivery_time() is done for inet protocol */
if (skb->protocol != htons(ETH_P_IP) &&
skb->protocol != htons(ETH_P_IPV6))
return -EOPNOTSUPP;
switch (dtime_type) {
case BPF_SKB_DELIVERY_TIME_MONO:
if (!dtime)
switch (tstamp_type) {
case BPF_SKB_TSTAMP_DELIVERY_MONO:
if (!tstamp)
return -EINVAL;
skb->tstamp = dtime;
skb->tstamp = tstamp;
skb->mono_delivery_time = 1;
break;
case BPF_SKB_DELIVERY_TIME_NONE:
if (dtime)
case BPF_SKB_TSTAMP_UNSPEC:
if (tstamp)
return -EINVAL;
skb->tstamp = 0;
skb->mono_delivery_time = 0;
break;
default:
return -EOPNOTSUPP;
return -EINVAL;
}
return 0;
}
static const struct bpf_func_proto bpf_skb_set_delivery_time_proto = {
.func = bpf_skb_set_delivery_time,
static const struct bpf_func_proto bpf_skb_set_tstamp_proto = {
.func = bpf_skb_set_tstamp,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
@@ -7786,8 +7786,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_gen_syncookie_proto;
case BPF_FUNC_sk_assign:
return &bpf_sk_assign_proto;
case BPF_FUNC_skb_set_delivery_time:
return &bpf_skb_set_delivery_time_proto;
case BPF_FUNC_skb_set_tstamp:
return &bpf_skb_set_tstamp_proto;
#endif
default:
return bpf_sk_base_func_proto(func_id);
@@ -8127,9 +8127,9 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
return false;
info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
break;
case offsetof(struct __sk_buff, delivery_time_type):
case offsetof(struct __sk_buff, tstamp_type):
return false;
case offsetofend(struct __sk_buff, delivery_time_type) ... offsetof(struct __sk_buff, hwtstamp) - 1:
case offsetofend(struct __sk_buff, tstamp_type) ... offsetof(struct __sk_buff, hwtstamp) - 1:
/* Explicitly prohibit access to padding in __sk_buff. */
return false;
default:
@@ -8484,14 +8484,14 @@ static bool tc_cls_act_is_valid_access(int off, int size,
break;
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
case offsetof(struct __sk_buff, delivery_time_type):
case offsetof(struct __sk_buff, tstamp_type):
/* The convert_ctx_access() on reading and writing
* __sk_buff->tstamp depends on whether the bpf prog
* has used __sk_buff->delivery_time_type or not.
* Thus, we need to set prog->delivery_time_access
* has used __sk_buff->tstamp_type or not.
* Thus, we need to set prog->tstamp_type_access
* earlier during is_valid_access() here.
*/
((struct bpf_prog *)prog)->delivery_time_access = 1;
((struct bpf_prog *)prog)->tstamp_type_access = 1;
return size == sizeof(__u8);
}
@@ -8888,42 +8888,22 @@ static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
static struct bpf_insn *bpf_convert_dtime_type_read(const struct bpf_insn *si,
struct bpf_insn *insn)
static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
struct bpf_insn *insn)
{
__u8 value_reg = si->dst_reg;
__u8 skb_reg = si->src_reg;
/* AX is needed because src_reg and dst_reg could be the same */
__u8 tmp_reg = BPF_REG_AX;
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
SKB_MONO_DELIVERY_TIME_OFFSET);
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
SKB_MONO_DELIVERY_TIME_MASK);
*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 2);
/* value_reg = BPF_SKB_DELIVERY_TIME_MONO */
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_DELIVERY_TIME_MONO);
*insn++ = BPF_JMP_A(IS_ENABLED(CONFIG_NET_CLS_ACT) ? 10 : 5);
*insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, skb_reg,
offsetof(struct sk_buff, tstamp));
*insn++ = BPF_JMP_IMM(BPF_JNE, tmp_reg, 0, 2);
/* value_reg = BPF_SKB_DELIVERY_TIME_NONE */
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_DELIVERY_TIME_NONE);
*insn++ = BPF_JMP_A(IS_ENABLED(CONFIG_NET_CLS_ACT) ? 6 : 1);
#ifdef CONFIG_NET_CLS_ACT
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, TC_AT_INGRESS_OFFSET);
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, TC_AT_INGRESS_MASK);
*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 2);
/* At ingress, value_reg = 0 */
*insn++ = BPF_MOV32_IMM(value_reg, 0);
PKT_VLAN_PRESENT_OFFSET);
*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
SKB_MONO_DELIVERY_TIME_MASK, 2);
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
*insn++ = BPF_JMP_A(1);
#endif
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO);
/* value_reg = BPF_SKB_DELIVERYT_TIME_UNSPEC */
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_DELIVERY_TIME_UNSPEC);
/* 15 insns with CONFIG_NET_CLS_ACT */
return insn;
}
@@ -8956,21 +8936,22 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
__u8 skb_reg = si->src_reg;
#ifdef CONFIG_NET_CLS_ACT
if (!prog->delivery_time_access) {
/* If the tstamp_type is read,
* the bpf prog is aware the tstamp could have delivery time.
* Thus, read skb->tstamp as is if tstamp_type_access is true.
*/
if (!prog->tstamp_type_access) {
/* AX is needed because src_reg and dst_reg could be the same */
__u8 tmp_reg = BPF_REG_AX;
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, TC_AT_INGRESS_OFFSET);
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, TC_AT_INGRESS_MASK);
*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 5);
/* @ingress, read __sk_buff->tstamp as the (rcv) timestamp,
* so check the skb->mono_delivery_time.
*/
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
SKB_MONO_DELIVERY_TIME_OFFSET);
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET);
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
SKB_MONO_DELIVERY_TIME_MASK);
*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 2);
/* skb->mono_delivery_time is set, read 0 as the (rcv) timestamp. */
TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK);
*insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2);
/* skb->tc_at_ingress && skb->mono_delivery_time,
* read 0 as the (rcv) timestamp.
*/
*insn++ = BPF_MOV64_IMM(value_reg, 0);
*insn++ = BPF_JMP_A(1);
}
@@ -8989,25 +8970,27 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
__u8 skb_reg = si->dst_reg;
#ifdef CONFIG_NET_CLS_ACT
if (!prog->delivery_time_access) {
/* If the tstamp_type is read,
* the bpf prog is aware the tstamp could have delivery time.
* Thus, write skb->tstamp as is if tstamp_type_access is true.
* Otherwise, writing at ingress will have to clear the
* mono_delivery_time bit also.
*/
if (!prog->tstamp_type_access) {
__u8 tmp_reg = BPF_REG_AX;
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, TC_AT_INGRESS_OFFSET);
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, TC_AT_INGRESS_MASK);
*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 3);
/* Writing __sk_buff->tstamp at ingress as the (rcv) timestamp.
* Clear the skb->mono_delivery_time.
*/
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
SKB_MONO_DELIVERY_TIME_OFFSET);
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
~SKB_MONO_DELIVERY_TIME_MASK);
*insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg,
SKB_MONO_DELIVERY_TIME_OFFSET);
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET);
/* Writing __sk_buff->tstamp as ingress, goto <clear> */
*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
/* goto <store> */
*insn++ = BPF_JMP_A(2);
/* <clear>: mono_delivery_time */
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK);
*insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, PKT_VLAN_PRESENT_OFFSET);
}
#endif
/* skb->tstamp = tstamp */
/* <store>: skb->tstamp = tstamp */
*insn++ = BPF_STX_MEM(BPF_DW, skb_reg, value_reg,
offsetof(struct sk_buff, tstamp));
return insn;
@@ -9326,8 +9309,8 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
insn = bpf_convert_tstamp_read(prog, si, insn);
break;
case offsetof(struct __sk_buff, delivery_time_type):
insn = bpf_convert_dtime_type_read(si, insn);
case offsetof(struct __sk_buff, tstamp_type):
insn = bpf_convert_tstamp_type_read(si, insn);
break;
case offsetof(struct __sk_buff, gso_segs):
@@ -11006,13 +10989,24 @@ static bool sk_lookup_is_valid_access(int off, int size,
case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
case offsetof(struct bpf_sk_lookup, remote_port) ...
offsetof(struct bpf_sk_lookup, local_ip4) - 1:
case bpf_ctx_range(struct bpf_sk_lookup, local_port):
case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex):
bpf_ctx_record_field_size(info, sizeof(__u32));
return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
/* Allow 4-byte access to 2-byte field for backward compatibility */
if (size == sizeof(__u32))
return true;
bpf_ctx_record_field_size(info, sizeof(__be16));
return bpf_ctx_narrow_access_ok(off, size, sizeof(__be16));
case offsetofend(struct bpf_sk_lookup, remote_port) ...
offsetof(struct bpf_sk_lookup, local_ip4) - 1:
/* Allow access to zero padding for backward compatibility */
bpf_ctx_record_field_size(info, sizeof(__u16));
return bpf_ctx_narrow_access_ok(off, size, sizeof(__u16));
default:
return false;
}
@@ -11094,6 +11088,11 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
sport, 2, target_size));
break;
case offsetofend(struct bpf_sk_lookup, remote_port):
*target_size = 2;
*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
break;
case offsetof(struct bpf_sk_lookup, local_port):
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
bpf_target_off(struct bpf_sk_lookup_kern,

View File

@@ -27,6 +27,7 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
int elem_first_coalesce)
{
struct page_frag *pfrag = sk_page_frag(sk);
u32 osize = msg->sg.size;
int ret = 0;
len -= msg->sg.size;
@@ -35,13 +36,17 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
u32 orig_offset;
int use, i;
if (!sk_page_frag_refill(sk, pfrag))
return -ENOMEM;
if (!sk_page_frag_refill(sk, pfrag)) {
ret = -ENOMEM;
goto msg_trim;
}
orig_offset = pfrag->offset;
use = min_t(int, len, pfrag->size - orig_offset);
if (!sk_wmem_schedule(sk, use))
return -ENOMEM;
if (!sk_wmem_schedule(sk, use)) {
ret = -ENOMEM;
goto msg_trim;
}
i = msg->sg.end;
sk_msg_iter_var_prev(i);
@@ -71,6 +76,10 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
}
return ret;
msg_trim:
sk_msg_trim(sk, msg, osize);
return ret;
}
EXPORT_SYMBOL_GPL(sk_msg_alloc);

View File

@@ -529,6 +529,7 @@ void xdp_return_buff(struct xdp_buff *xdp)
out:
__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
}
EXPORT_SYMBOL_GPL(xdp_return_buff);
/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
void __xdp_release_frame(void *data, struct xdp_mem_info *mem)