forked from Minki/linux
Merge branch 'bpf-helper-improvements'
Daniel Borkmann says: ==================== BPF updates This set contains various updates for eBPF, i.e. the addition of a generic csum helper function and other misc bits that mostly improve existing helpers and ease programming with eBPF on cls_bpf. For more details, please see individual patches. Set is rebased on top of http://patchwork.ozlabs.org/patch/584465/. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
9c572dc483
@ -65,6 +65,7 @@ enum bpf_arg_type {
|
||||
*/
|
||||
ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */
|
||||
ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */
|
||||
ARG_CONST_STACK_SIZE_OR_ZERO, /* number of bytes accessed from stack or 0 */
|
||||
|
||||
ARG_PTR_TO_CTX, /* pointer to context */
|
||||
ARG_ANYTHING, /* any (initialized) argument is ok */
|
||||
|
@ -2630,6 +2630,13 @@ static inline int skb_clone_writable(const struct sk_buff *skb, unsigned int len
|
||||
skb_headroom(skb) + len <= skb->hdr_len;
|
||||
}
|
||||
|
||||
static inline int skb_try_make_writable(struct sk_buff *skb,
|
||||
unsigned int write_len)
|
||||
{
|
||||
return skb_cloned(skb) && !skb_clone_writable(skb, write_len) &&
|
||||
pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
|
||||
}
|
||||
|
||||
static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom,
|
||||
int cloned)
|
||||
{
|
||||
|
@ -287,6 +287,17 @@ enum bpf_func_id {
|
||||
* Return: >= 0 stackid on success or negative error
|
||||
*/
|
||||
BPF_FUNC_get_stackid,
|
||||
|
||||
/**
|
||||
* bpf_csum_diff(from, from_size, to, to_size, seed) - calculate csum diff
|
||||
* @from: raw from buffer
|
||||
* @from_size: length of from buffer
|
||||
* @to: raw to buffer
|
||||
* @to_size: length of to buffer
|
||||
* @seed: optional seed
|
||||
* Return: csum result
|
||||
*/
|
||||
BPF_FUNC_csum_diff,
|
||||
__BPF_FUNC_MAX_ID,
|
||||
};
|
||||
|
||||
@ -302,6 +313,7 @@ enum bpf_func_id {
|
||||
|
||||
/* BPF_FUNC_l4_csum_replace flags. */
|
||||
#define BPF_F_PSEUDO_HDR (1ULL << 4)
|
||||
#define BPF_F_MARK_MANGLED_0 (1ULL << 5)
|
||||
|
||||
/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
|
||||
#define BPF_F_INGRESS (1ULL << 0)
|
||||
|
@ -779,15 +779,24 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
|
||||
* bytes from that pointer, make sure that it's within stack boundary
|
||||
* and all elements of stack are initialized
|
||||
*/
|
||||
static int check_stack_boundary(struct verifier_env *env,
|
||||
int regno, int access_size)
|
||||
static int check_stack_boundary(struct verifier_env *env, int regno,
|
||||
int access_size, bool zero_size_allowed)
|
||||
{
|
||||
struct verifier_state *state = &env->cur_state;
|
||||
struct reg_state *regs = state->regs;
|
||||
int off, i;
|
||||
|
||||
if (regs[regno].type != PTR_TO_STACK)
|
||||
if (regs[regno].type != PTR_TO_STACK) {
|
||||
if (zero_size_allowed && access_size == 0 &&
|
||||
regs[regno].type == CONST_IMM &&
|
||||
regs[regno].imm == 0)
|
||||
return 0;
|
||||
|
||||
verbose("R%d type=%s expected=%s\n", regno,
|
||||
reg_type_str[regs[regno].type],
|
||||
reg_type_str[PTR_TO_STACK]);
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
off = regs[regno].imm;
|
||||
if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
|
||||
@ -830,15 +839,24 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
|
||||
if (arg_type == ARG_PTR_TO_MAP_KEY ||
|
||||
arg_type == ARG_PTR_TO_MAP_VALUE) {
|
||||
expected_type = PTR_TO_STACK;
|
||||
} else if (arg_type == ARG_CONST_STACK_SIZE) {
|
||||
} else if (arg_type == ARG_CONST_STACK_SIZE ||
|
||||
arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
|
||||
expected_type = CONST_IMM;
|
||||
} else if (arg_type == ARG_CONST_MAP_PTR) {
|
||||
expected_type = CONST_PTR_TO_MAP;
|
||||
} else if (arg_type == ARG_PTR_TO_CTX) {
|
||||
expected_type = PTR_TO_CTX;
|
||||
} else if (arg_type == ARG_PTR_TO_STACK) {
|
||||
expected_type = PTR_TO_STACK;
|
||||
/* One exception here. In case function allows for NULL to be
|
||||
* passed in as argument, it's a CONST_IMM type. Final test
|
||||
* happens during stack boundary checking.
|
||||
*/
|
||||
if (reg->type == CONST_IMM && reg->imm == 0)
|
||||
expected_type = CONST_IMM;
|
||||
} else {
|
||||
verbose("unsupported arg_type %d\n", arg_type);
|
||||
return -EFAULT;
|
||||
@ -868,8 +886,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
|
||||
verbose("invalid map_ptr to access map->key\n");
|
||||
return -EACCES;
|
||||
}
|
||||
err = check_stack_boundary(env, regno, (*mapp)->key_size);
|
||||
|
||||
err = check_stack_boundary(env, regno, (*mapp)->key_size,
|
||||
false);
|
||||
} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
|
||||
/* bpf_map_xxx(..., map_ptr, ..., value) call:
|
||||
* check [value, value + map->value_size) validity
|
||||
@ -879,9 +897,12 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
|
||||
verbose("invalid map_ptr to access map->value\n");
|
||||
return -EACCES;
|
||||
}
|
||||
err = check_stack_boundary(env, regno, (*mapp)->value_size);
|
||||
err = check_stack_boundary(env, regno, (*mapp)->value_size,
|
||||
false);
|
||||
} else if (arg_type == ARG_CONST_STACK_SIZE ||
|
||||
arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
|
||||
bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO);
|
||||
|
||||
} else if (arg_type == ARG_CONST_STACK_SIZE) {
|
||||
/* bpf_xxx(..., buf, len) call will access 'len' bytes
|
||||
* from stack pointer 'buf'. Check it
|
||||
* note: regno == len, regno - 1 == buf
|
||||
@ -891,7 +912,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
|
||||
verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
|
||||
return -EACCES;
|
||||
}
|
||||
err = check_stack_boundary(env, regno - 1, reg->imm);
|
||||
err = check_stack_boundary(env, regno - 1, reg->imm,
|
||||
zero_size_allowed);
|
||||
}
|
||||
|
||||
return err;
|
||||
|
@ -530,12 +530,14 @@ do_pass:
|
||||
*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
|
||||
break;
|
||||
|
||||
/* RET_K, RET_A are remaped into 2 insns. */
|
||||
/* RET_K is remaped into 2 insns. RET_A case doesn't need an
|
||||
* extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
|
||||
*/
|
||||
case BPF_RET | BPF_A:
|
||||
case BPF_RET | BPF_K:
|
||||
*insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ?
|
||||
BPF_K : BPF_X, BPF_REG_0,
|
||||
BPF_REG_A, fp->k);
|
||||
if (BPF_RVAL(fp->code) == BPF_K)
|
||||
*insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
|
||||
0, fp->k);
|
||||
*insn = BPF_EXIT_INSN();
|
||||
break;
|
||||
|
||||
@ -1333,15 +1335,22 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define BPF_LDST_LEN 16U
|
||||
struct bpf_scratchpad {
|
||||
union {
|
||||
__be32 diff[MAX_BPF_STACK / sizeof(__be32)];
|
||||
u8 buff[MAX_BPF_STACK];
|
||||
};
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
|
||||
|
||||
static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
|
||||
{
|
||||
struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
|
||||
struct sk_buff *skb = (struct sk_buff *) (long) r1;
|
||||
int offset = (int) r2;
|
||||
void *from = (void *) (long) r3;
|
||||
unsigned int len = (unsigned int) r4;
|
||||
char buf[BPF_LDST_LEN];
|
||||
void *ptr;
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM)))
|
||||
@ -1355,14 +1364,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
|
||||
*
|
||||
* so check for invalid 'offset' and too large 'len'
|
||||
*/
|
||||
if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
|
||||
if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
|
||||
return -EFAULT;
|
||||
if (unlikely(skb_try_make_writable(skb, offset + len)))
|
||||
return -EFAULT;
|
||||
|
||||
if (unlikely(skb_cloned(skb) &&
|
||||
!skb_clone_writable(skb, offset + len)))
|
||||
return -EFAULT;
|
||||
|
||||
ptr = skb_header_pointer(skb, offset, len, buf);
|
||||
ptr = skb_header_pointer(skb, offset, len, sp->buff);
|
||||
if (unlikely(!ptr))
|
||||
return -EFAULT;
|
||||
|
||||
@ -1371,7 +1378,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
|
||||
|
||||
memcpy(ptr, from, len);
|
||||
|
||||
if (ptr == buf)
|
||||
if (ptr == sp->buff)
|
||||
/* skb_store_bits cannot return -EFAULT here */
|
||||
skb_store_bits(skb, offset, ptr, len);
|
||||
|
||||
@ -1400,7 +1407,7 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
||||
unsigned int len = (unsigned int) r4;
|
||||
void *ptr;
|
||||
|
||||
if (unlikely((u32) offset > 0xffff || len > BPF_LDST_LEN))
|
||||
if (unlikely((u32) offset > 0xffff || len > MAX_BPF_STACK))
|
||||
return -EFAULT;
|
||||
|
||||
ptr = skb_header_pointer(skb, offset, len, to);
|
||||
@ -1432,9 +1439,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
|
||||
return -EINVAL;
|
||||
if (unlikely((u32) offset > 0xffff))
|
||||
return -EFAULT;
|
||||
|
||||
if (unlikely(skb_cloned(skb) &&
|
||||
!skb_clone_writable(skb, offset + sizeof(sum))))
|
||||
if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
|
||||
return -EFAULT;
|
||||
|
||||
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
|
||||
@ -1474,23 +1479,31 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
|
||||
{
|
||||
struct sk_buff *skb = (struct sk_buff *) (long) r1;
|
||||
bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
|
||||
bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
|
||||
int offset = (int) r2;
|
||||
__sum16 sum, *ptr;
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
|
||||
if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
|
||||
BPF_F_HDR_FIELD_MASK)))
|
||||
return -EINVAL;
|
||||
if (unlikely((u32) offset > 0xffff))
|
||||
return -EFAULT;
|
||||
|
||||
if (unlikely(skb_cloned(skb) &&
|
||||
!skb_clone_writable(skb, offset + sizeof(sum))))
|
||||
if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
|
||||
return -EFAULT;
|
||||
|
||||
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
|
||||
if (unlikely(!ptr))
|
||||
return -EFAULT;
|
||||
if (is_mmzero && !*ptr)
|
||||
return 0;
|
||||
|
||||
switch (flags & BPF_F_HDR_FIELD_MASK) {
|
||||
case 0:
|
||||
if (unlikely(from != 0))
|
||||
return -EINVAL;
|
||||
|
||||
inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
|
||||
break;
|
||||
case 2:
|
||||
inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
|
||||
break;
|
||||
@ -1501,6 +1514,8 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (is_mmzero && !*ptr)
|
||||
*ptr = CSUM_MANGLED_0;
|
||||
if (ptr == &sum)
|
||||
/* skb_store_bits guaranteed to not return -EFAULT here */
|
||||
skb_store_bits(skb, offset, ptr, sizeof(sum));
|
||||
@ -1519,6 +1534,45 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = {
|
||||
.arg5_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed)
|
||||
{
|
||||
struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
|
||||
u64 diff_size = from_size + to_size;
|
||||
__be32 *from = (__be32 *) (long) r1;
|
||||
__be32 *to = (__be32 *) (long) r3;
|
||||
int i, j = 0;
|
||||
|
||||
/* This is quite flexible, some examples:
|
||||
*
|
||||
* from_size == 0, to_size > 0, seed := csum --> pushing data
|
||||
* from_size > 0, to_size == 0, seed := csum --> pulling data
|
||||
* from_size > 0, to_size > 0, seed := 0 --> diffing data
|
||||
*
|
||||
* Even for diffing, from_size and to_size don't need to be equal.
|
||||
*/
|
||||
if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
|
||||
diff_size > sizeof(sp->diff)))
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < from_size / sizeof(__be32); i++, j++)
|
||||
sp->diff[j] = ~from[i];
|
||||
for (i = 0; i < to_size / sizeof(__be32); i++, j++)
|
||||
sp->diff[j] = to[i];
|
||||
|
||||
return csum_partial(sp->diff, diff_size, seed);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_csum_diff_proto = {
|
||||
.func = bpf_csum_diff,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_STACK,
|
||||
.arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO,
|
||||
.arg3_type = ARG_PTR_TO_STACK,
|
||||
.arg4_type = ARG_CONST_STACK_SIZE_OR_ZERO,
|
||||
.arg5_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
|
||||
{
|
||||
struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
|
||||
@ -1682,6 +1736,13 @@ bool bpf_helper_changes_skb_data(void *func)
|
||||
return true;
|
||||
if (func == bpf_skb_vlan_pop)
|
||||
return true;
|
||||
if (func == bpf_skb_store_bytes)
|
||||
return true;
|
||||
if (func == bpf_l3_csum_replace)
|
||||
return true;
|
||||
if (func == bpf_l4_csum_replace)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1849,6 +1910,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
|
||||
return &bpf_skb_store_bytes_proto;
|
||||
case BPF_FUNC_skb_load_bytes:
|
||||
return &bpf_skb_load_bytes_proto;
|
||||
case BPF_FUNC_csum_diff:
|
||||
return &bpf_csum_diff_proto;
|
||||
case BPF_FUNC_l3_csum_replace:
|
||||
return &bpf_l3_csum_replace_proto;
|
||||
case BPF_FUNC_l4_csum_replace:
|
||||
|
@ -105,9 +105,7 @@ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
|
||||
int hl = ihl + jhl;
|
||||
|
||||
if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
|
||||
(skb_cloned(skb) &&
|
||||
!skb_clone_writable(skb, hl + ntkoff) &&
|
||||
pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
|
||||
skb_try_make_writable(skb, hl + ntkoff))
|
||||
return NULL;
|
||||
else
|
||||
return (void *)(skb_network_header(skb) + ihl);
|
||||
@ -365,9 +363,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
|
||||
}
|
||||
|
||||
if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
|
||||
if (skb_cloned(skb) &&
|
||||
!skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
|
||||
pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
|
||||
if (skb_try_make_writable(skb, sizeof(*iph) + ntkoff))
|
||||
goto fail;
|
||||
|
||||
ip_send_check(ip_hdr(skb));
|
||||
|
@ -126,9 +126,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
|
||||
addr = iph->daddr;
|
||||
|
||||
if (!((old_addr ^ addr) & mask)) {
|
||||
if (skb_cloned(skb) &&
|
||||
!skb_clone_writable(skb, sizeof(*iph) + noff) &&
|
||||
pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
|
||||
if (skb_try_make_writable(skb, sizeof(*iph) + noff))
|
||||
goto drop;
|
||||
|
||||
new_addr &= mask;
|
||||
@ -156,9 +154,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
|
||||
struct tcphdr *tcph;
|
||||
|
||||
if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) ||
|
||||
(skb_cloned(skb) &&
|
||||
!skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) &&
|
||||
pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
|
||||
skb_try_make_writable(skb, ihl + sizeof(*tcph) + noff))
|
||||
goto drop;
|
||||
|
||||
tcph = (void *)(skb_network_header(skb) + ihl);
|
||||
@ -171,9 +167,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
|
||||
struct udphdr *udph;
|
||||
|
||||
if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) ||
|
||||
(skb_cloned(skb) &&
|
||||
!skb_clone_writable(skb, ihl + sizeof(*udph) + noff) &&
|
||||
pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
|
||||
skb_try_make_writable(skb, ihl + sizeof(*udph) + noff))
|
||||
goto drop;
|
||||
|
||||
udph = (void *)(skb_network_header(skb) + ihl);
|
||||
@ -213,10 +207,8 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
|
||||
if ((old_addr ^ addr) & mask)
|
||||
break;
|
||||
|
||||
if (skb_cloned(skb) &&
|
||||
!skb_clone_writable(skb, ihl + sizeof(*icmph) +
|
||||
sizeof(*iph) + noff) &&
|
||||
pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
|
||||
if (skb_try_make_writable(skb, ihl + sizeof(*icmph) +
|
||||
sizeof(*iph) + noff))
|
||||
goto drop;
|
||||
|
||||
icmph = (void *)(skb_network_header(skb) + ihl);
|
||||
|
Loading…
Reference in New Issue
Block a user