diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a9b1875212f6..a7080c86fa76 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -392,10 +392,18 @@ enum bpf_type_flag { MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS), + /* DYNPTR points to memory local to the bpf program. */ + DYNPTR_TYPE_LOCAL = BIT(8 + BPF_BASE_TYPE_BITS), + + /* DYNPTR points to a ringbuf record. */ + DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; +#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF) + /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -438,6 +446,7 @@ enum bpf_arg_type { ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ ARG_PTR_TO_KPTR, /* pointer to referenced kptr */ + ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */ __BPF_ARG_TYPE_MAX, /* Extended arg_types. */ @@ -479,6 +488,7 @@ enum bpf_return_type { RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM, + RET_PTR_TO_DYNPTR_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is @@ -2225,6 +2235,9 @@ extern const struct bpf_func_proto bpf_ringbuf_reserve_proto; extern const struct bpf_func_proto bpf_ringbuf_submit_proto; extern const struct bpf_func_proto bpf_ringbuf_discard_proto; extern const struct bpf_func_proto bpf_ringbuf_query_proto; +extern const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto; +extern const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto; +extern const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto; extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; @@ -2376,4 +2389,33 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, u32 **bin_buf, u32 num_args); void bpf_bprintf_cleanup(void); +/* the implementation of the opaque uapi struct bpf_dynptr */ +struct bpf_dynptr_kern { + void *data; + /* Size represents the number of usable bytes of dynptr data. + * If for example the offset is at 4 for a local dynptr whose data is + * of type u64, the number of usable bytes is 4. + * + * The upper 8 bits are reserved. It is as follows: + * Bits 0 - 23 = size + * Bits 24 - 30 = dynptr type + * Bit 31 = whether dynptr is read-only + */ + u32 size; + u32 offset; +} __aligned(8); + +enum bpf_dynptr_type { + BPF_DYNPTR_TYPE_INVALID, + /* Points to memory that is local to the bpf program */ + BPF_DYNPTR_TYPE_LOCAL, + /* Underlying data is a ringbuf record */ + BPF_DYNPTR_TYPE_RINGBUF, +}; + +void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size); +void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); +int bpf_dynptr_check_size(u32 size); + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 1f1e7f2ea967..e8439f6cbe57 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -72,6 +72,18 @@ struct bpf_reg_state { u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */ + /* For dynptr stack slots */ + struct { + enum bpf_dynptr_type type; + /* A dynptr is 16 bytes so it takes up 2 stack slots. + * We need to track which slot is the first slot + * to protect against cases where the user may try to + * pass in an address starting at the second slot of the + * dynptr. + */ + bool first_slot; + } dynptr; + /* Max size from any of the above. */ struct { unsigned long raw1; @@ -88,6 +100,8 @@ struct bpf_reg_state { * for the purpose of tracking that it's freed. * For PTR_TO_SOCKET this is used to share which pointers retain the * same reference to the socket, to determine proper reference freeing. + * For stack slots that are dynptrs, this is used to track references to + * the dynptr to determine proper reference freeing. */ u32 id; /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned @@ -174,9 +188,15 @@ enum bpf_stack_slot_type { STACK_SPILL, /* register spilled into stack */ STACK_MISC, /* BPF program wrote some data into this slot */ STACK_ZERO, /* BPF program wrote constant zero */ + /* A dynptr is stored in this stack slot. The type of dynptr + * is stored in bpf_stack_state->spilled_ptr.dynptr.type + */ + STACK_DYNPTR, }; #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ +#define BPF_DYNPTR_SIZE sizeof(struct bpf_dynptr_kern) +#define BPF_DYNPTR_NR_SLOTS (BPF_DYNPTR_SIZE / BPF_REG_SIZE) struct bpf_stack_state { struct bpf_reg_state spilled_ptr; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 56688bee20d9..f4009dbdf62d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5178,6 +5178,77 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. * Return * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Get a dynptr to local memory *data*. + * + * *data* must be a ptr to a map value. + * The maximum *size* supported is DYNPTR_MAX_SIZE. + * *flags* is currently unused. + * Return + * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, + * -EINVAL if flags is not 0. + * + * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Reserve *size* bytes of payload in a ring buffer *ringbuf* + * through the dynptr interface. *flags* must be 0. + * + * Please note that a corresponding bpf_ringbuf_submit_dynptr or + * bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the + * reservation fails. This is enforced by the verifier. + * Return + * 0 on success, or a negative error in case of failure. + * + * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Submit reserved ring buffer sample, pointed to by *data*, + * through the dynptr interface. This is a no-op if the dynptr is + * invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_submit'. + * Return + * Nothing. Always succeeds. + * + * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Discard reserved ring buffer sample through the dynptr + * interface. This is a no-op if the dynptr is invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_discard'. + * Return + * Nothing. Always succeeds. + * + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) + * Description + * Read *len* bytes from *src* into *dst*, starting from *offset* + * into *src*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *src*'s data, -EINVAL if *src* is an invalid dynptr. + * + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) + * Description + * Write *len* bytes from *src* into *dst*, starting from *offset* + * into *dst*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* + * is a read-only dynptr. + * + * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) + * Description + * Get a pointer to the underlying dynptr data. + * + * *len* must be a statically known value. The returned data slice + * is invalidated whenever the dynptr is invalidated. + * Return + * Pointer to the underlying dynptr data, NULL if the dynptr is + * read-only, if the dynptr is invalid, or if the offset and length + * is out of bounds. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5377,6 +5448,13 @@ union bpf_attr { FN(kptr_xchg), \ FN(map_lookup_percpu_elem), \ FN(skc_to_mptcp_sock), \ + FN(dynptr_from_mem), \ + FN(ringbuf_reserve_dynptr), \ + FN(ringbuf_submit_dynptr), \ + FN(ringbuf_discard_dynptr), \ + FN(dynptr_read), \ + FN(dynptr_write), \ + FN(dynptr_data), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -6528,6 +6606,11 @@ struct bpf_timer { __u64 :64; } __attribute__((aligned(8))); +struct bpf_dynptr { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index bad96131a510..225806a02efb 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1412,6 +1412,169 @@ const struct bpf_func_proto bpf_kptr_xchg_proto = { .arg2_btf_id = BPF_PTR_POISON, }; +/* Since the upper 8 bits of dynptr->size is reserved, the + * maximum supported size is 2^24 - 1. + */ +#define DYNPTR_MAX_SIZE ((1UL << 24) - 1) +#define DYNPTR_TYPE_SHIFT 28 +#define DYNPTR_SIZE_MASK 0xFFFFFF +#define DYNPTR_RDONLY_BIT BIT(31) + +static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr) +{ + return ptr->size & DYNPTR_RDONLY_BIT; +} + +static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type) +{ + ptr->size |= type << DYNPTR_TYPE_SHIFT; +} + +static u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr) +{ + return ptr->size & DYNPTR_SIZE_MASK; +} + +int bpf_dynptr_check_size(u32 size) +{ + return size > DYNPTR_MAX_SIZE ? -E2BIG : 0; +} + +void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size) +{ + ptr->data = data; + ptr->offset = offset; + ptr->size = size; + bpf_dynptr_set_type(ptr, type); +} + +void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) +{ + memset(ptr, 0, sizeof(*ptr)); +} + +static int bpf_dynptr_check_off_len(struct bpf_dynptr_kern *ptr, u32 offset, u32 len) +{ + u32 size = bpf_dynptr_get_size(ptr); + + if (len > size || offset > size - len) + return -E2BIG; + + return 0; +} + +BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr) +{ + int err; + + err = bpf_dynptr_check_size(size); + if (err) + goto error; + + /* flags is currently unsupported */ + if (flags) { + err = -EINVAL; + goto error; + } + + bpf_dynptr_init(ptr, data, BPF_DYNPTR_TYPE_LOCAL, 0, size); + + return 0; + +error: + bpf_dynptr_set_null(ptr); + return err; +} + +const struct bpf_func_proto bpf_dynptr_from_mem_proto = { + .func = bpf_dynptr_from_mem, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT, +}; + +BPF_CALL_4(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src, u32, offset) +{ + int err; + + if (!src->data) + return -EINVAL; + + err = bpf_dynptr_check_off_len(src, offset, len); + if (err) + return err; + + memcpy(dst, src->data + src->offset + offset, len); + + return 0; +} + +const struct bpf_func_proto bpf_dynptr_read_proto = { + .func = bpf_dynptr_read, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_PTR_TO_DYNPTR, + .arg4_type = ARG_ANYTHING, +}; + +BPF_CALL_4(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src, u32, len) +{ + int err; + + if (!dst->data || bpf_dynptr_is_rdonly(dst)) + return -EINVAL; + + err = bpf_dynptr_check_off_len(dst, offset, len); + if (err) + return err; + + memcpy(dst->data + dst->offset + offset, src, len); + + return 0; +} + +const struct bpf_func_proto bpf_dynptr_write_proto = { + .func = bpf_dynptr_write, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_DYNPTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_CONST_SIZE_OR_ZERO, +}; + +BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len) +{ + int err; + + if (!ptr->data) + return 0; + + err = bpf_dynptr_check_off_len(ptr, offset, len); + if (err) + return 0; + + if (bpf_dynptr_is_rdonly(ptr)) + return 0; + + return (unsigned long)(ptr->data + ptr->offset + offset); +} + +const struct bpf_func_proto bpf_dynptr_data_proto = { + .func = bpf_dynptr_data, + .gpl_only = false, + .ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL, + .arg1_type = ARG_PTR_TO_DYNPTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO, +}; + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; @@ -1460,12 +1623,26 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_ringbuf_discard_proto; case BPF_FUNC_ringbuf_query: return &bpf_ringbuf_query_proto; + case BPF_FUNC_ringbuf_reserve_dynptr: + return &bpf_ringbuf_reserve_dynptr_proto; + case BPF_FUNC_ringbuf_submit_dynptr: + return &bpf_ringbuf_submit_dynptr_proto; + case BPF_FUNC_ringbuf_discard_dynptr: + return &bpf_ringbuf_discard_dynptr_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; case BPF_FUNC_loop: return &bpf_loop_proto; case BPF_FUNC_strncmp: return &bpf_strncmp_proto; + case BPF_FUNC_dynptr_from_mem: + return &bpf_dynptr_from_mem_proto; + case BPF_FUNC_dynptr_read: + return &bpf_dynptr_read_proto; + case BPF_FUNC_dynptr_write: + return &bpf_dynptr_write_proto; + case BPF_FUNC_dynptr_data: + return &bpf_dynptr_data_proto; default: break; } diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 311264ab80c4..ded4faeca192 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -475,3 +475,81 @@ const struct bpf_func_proto bpf_ringbuf_query_proto = { .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_ANYTHING, }; + +BPF_CALL_4(bpf_ringbuf_reserve_dynptr, struct bpf_map *, map, u32, size, u64, flags, + struct bpf_dynptr_kern *, ptr) +{ + struct bpf_ringbuf_map *rb_map; + void *sample; + int err; + + if (unlikely(flags)) { + bpf_dynptr_set_null(ptr); + return -EINVAL; + } + + err = bpf_dynptr_check_size(size); + if (err) { + bpf_dynptr_set_null(ptr); + return err; + } + + rb_map = container_of(map, struct bpf_ringbuf_map, map); + + sample = __bpf_ringbuf_reserve(rb_map->rb, size); + if (!sample) { + bpf_dynptr_set_null(ptr); + return -EINVAL; + } + + bpf_dynptr_init(ptr, sample, BPF_DYNPTR_TYPE_RINGBUF, 0, size); + + return 0; +} + +const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto = { + .func = bpf_ringbuf_reserve_dynptr, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_UNINIT, +}; + +BPF_CALL_2(bpf_ringbuf_submit_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags) +{ + if (!ptr->data) + return 0; + + bpf_ringbuf_commit(ptr->data, flags, false /* discard */); + + bpf_dynptr_set_null(ptr); + + return 0; +} + +const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto = { + .func = bpf_ringbuf_submit_dynptr, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | OBJ_RELEASE, + .arg2_type = ARG_ANYTHING, +}; + +BPF_CALL_2(bpf_ringbuf_discard_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags) +{ + if (!ptr->data) + return 0; + + bpf_ringbuf_commit(ptr->data, flags, true /* discard */); + + bpf_dynptr_set_null(ptr); + + return 0; +} + +const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto = { + .func = bpf_ringbuf_discard_dynptr, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | OBJ_RELEASE, + .arg2_type = ARG_ANYTHING, +}; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 45153cbc2bd6..aedac2ac02b9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -187,6 +187,9 @@ struct bpf_verifier_stack_elem { POISON_POINTER_DELTA)) #define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV)) +static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx); +static int release_reference(struct bpf_verifier_env *env, int ref_obj_id); + static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) { return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON; @@ -259,6 +262,7 @@ struct bpf_call_arg_meta { u32 ret_btf_id; u32 subprogno; struct bpf_map_value_off_desc *kptr_off_desc; + u8 uninit_dynptr_regno; }; struct btf *btf_vmlinux; @@ -581,6 +585,7 @@ static char slot_type_char[] = { [STACK_SPILL] = 'r', [STACK_MISC] = 'm', [STACK_ZERO] = '0', + [STACK_DYNPTR] = 'd', }; static void print_liveness(struct bpf_verifier_env *env, @@ -596,6 +601,25 @@ static void print_liveness(struct bpf_verifier_env *env, verbose(env, "D"); } +static int get_spi(s32 off) +{ + return (-off - 1) / BPF_REG_SIZE; +} + +static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots) +{ + int allocated_slots = state->allocated_stack / BPF_REG_SIZE; + + /* We need to check that slots between [spi - nr_slots + 1, spi] are + * within [0, allocated_stack). + * + * Please note that the spi grows downwards. For example, a dynptr + * takes the size of two stack slots; the first slot will be at + * spi and the second slot will be at spi - 1. + */ + return spi - nr_slots + 1 >= 0 && spi < allocated_slots; +} + static struct bpf_func_state *func(struct bpf_verifier_env *env, const struct bpf_reg_state *reg) { @@ -647,6 +671,132 @@ static void mark_verifier_state_scratched(struct bpf_verifier_env *env) env->scratched_stack_slots = ~0ULL; } +static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) +{ + switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { + case DYNPTR_TYPE_LOCAL: + return BPF_DYNPTR_TYPE_LOCAL; + case DYNPTR_TYPE_RINGBUF: + return BPF_DYNPTR_TYPE_RINGBUF; + default: + return BPF_DYNPTR_TYPE_INVALID; + } +} + +static bool dynptr_type_refcounted(enum bpf_dynptr_type type) +{ + return type == BPF_DYNPTR_TYPE_RINGBUF; +} + +static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + enum bpf_arg_type arg_type, int insn_idx) +{ + struct bpf_func_state *state = func(env, reg); + enum bpf_dynptr_type type; + int spi, i, id; + + spi = get_spi(reg->off); + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) + return -EINVAL; + + for (i = 0; i < BPF_REG_SIZE; i++) { + state->stack[spi].slot_type[i] = STACK_DYNPTR; + state->stack[spi - 1].slot_type[i] = STACK_DYNPTR; + } + + type = arg_to_dynptr_type(arg_type); + if (type == BPF_DYNPTR_TYPE_INVALID) + return -EINVAL; + + state->stack[spi].spilled_ptr.dynptr.first_slot = true; + state->stack[spi].spilled_ptr.dynptr.type = type; + state->stack[spi - 1].spilled_ptr.dynptr.type = type; + + if (dynptr_type_refcounted(type)) { + /* The id is used to track proper releasing */ + id = acquire_reference_state(env, insn_idx); + if (id < 0) + return id; + + state->stack[spi].spilled_ptr.id = id; + state->stack[spi - 1].spilled_ptr.id = id; + } + + return 0; +} + +static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i; + + spi = get_spi(reg->off); + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) + return -EINVAL; + + for (i = 0; i < BPF_REG_SIZE; i++) { + state->stack[spi].slot_type[i] = STACK_INVALID; + state->stack[spi - 1].slot_type[i] = STACK_INVALID; + } + + /* Invalidate any slices associated with this dynptr */ + if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) { + release_reference(env, state->stack[spi].spilled_ptr.id); + state->stack[spi].spilled_ptr.id = 0; + state->stack[spi - 1].spilled_ptr.id = 0; + } + + state->stack[spi].spilled_ptr.dynptr.first_slot = false; + state->stack[spi].spilled_ptr.dynptr.type = 0; + state->stack[spi - 1].spilled_ptr.dynptr.type = 0; + + return 0; +} + +static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + int i; + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) + return true; + + for (i = 0; i < BPF_REG_SIZE; i++) { + if (state->stack[spi].slot_type[i] == STACK_DYNPTR || + state->stack[spi - 1].slot_type[i] == STACK_DYNPTR) + return false; + } + + return true; +} + +static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + enum bpf_arg_type arg_type) +{ + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + int i; + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) || + !state->stack[spi].spilled_ptr.dynptr.first_slot) + return false; + + for (i = 0; i < BPF_REG_SIZE; i++) { + if (state->stack[spi].slot_type[i] != STACK_DYNPTR || + state->stack[spi - 1].slot_type[i] != STACK_DYNPTR) + return false; + } + + /* ARG_PTR_TO_DYNPTR takes any type of dynptr */ + if (arg_type == ARG_PTR_TO_DYNPTR) + return true; + + return state->stack[spi].spilled_ptr.dynptr.type == arg_to_dynptr_type(arg_type); +} + /* The reg state of a pointer or a bounded scalar was saved when * it was spilled to the stack. */ @@ -5400,6 +5550,11 @@ static bool arg_type_is_release(enum bpf_arg_type type) return type & OBJ_RELEASE; } +static bool arg_type_is_dynptr(enum bpf_arg_type type) +{ + return base_type(type) == ARG_PTR_TO_DYNPTR; +} + static int int_ptr_type_to_size(enum bpf_arg_type type) { if (type == ARG_PTR_TO_INT) @@ -5539,6 +5694,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, [ARG_PTR_TO_TIMER] = &timer_types, [ARG_PTR_TO_KPTR] = &kptr_types, + [ARG_PTR_TO_DYNPTR] = &stack_ptr_types, }; static int check_reg_type(struct bpf_verifier_env *env, u32 regno, @@ -5628,8 +5784,13 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, bool fixed_off_ok = false; switch ((u32)type) { - case SCALAR_VALUE: /* Pointer types where reg offset is explicitly allowed: */ + case PTR_TO_STACK: + if (arg_type_is_dynptr(arg_type) && reg->off % BPF_REG_SIZE) { + verbose(env, "cannot pass in dynptr at an offset\n"); + return -EINVAL; + } + fallthrough; case PTR_TO_PACKET: case PTR_TO_PACKET_META: case PTR_TO_MAP_KEY: @@ -5639,7 +5800,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, case PTR_TO_MEM | MEM_ALLOC: case PTR_TO_BUF: case PTR_TO_BUF | MEM_RDONLY: - case PTR_TO_STACK: + case SCALAR_VALUE: /* Some of the argument types nevertheless require a * zero register offset. */ @@ -5671,6 +5832,14 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, return __check_ptr_off_reg(env, reg, regno, fixed_off_ok); } +static u32 stack_slot_get_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + + return state->stack[spi].spilled_ptr.id; +} + static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta, const struct bpf_func_proto *fn) @@ -5725,7 +5894,16 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, skip_type_check: if (arg_type_is_release(arg_type)) { - if (!reg->ref_obj_id && !register_is_null(reg)) { + if (arg_type_is_dynptr(arg_type)) { + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) || + !state->stack[spi].spilled_ptr.id) { + verbose(env, "arg %d is an unacquired reference\n", regno); + return -EINVAL; + } + } else if (!reg->ref_obj_id && !register_is_null(reg)) { verbose(env, "R%d must be referenced when passed to release function\n", regno); return -EINVAL; @@ -5837,6 +6015,40 @@ skip_type_check: bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta); + } else if (arg_type_is_dynptr(arg_type)) { + if (arg_type & MEM_UNINIT) { + if (!is_dynptr_reg_valid_uninit(env, reg)) { + verbose(env, "Dynptr has to be an uninitialized dynptr\n"); + return -EINVAL; + } + + /* We only support one dynptr being uninitialized at the moment, + * which is sufficient for the helper functions we have right now. + */ + if (meta->uninit_dynptr_regno) { + verbose(env, "verifier internal error: multiple uninitialized dynptr args\n"); + return -EFAULT; + } + + meta->uninit_dynptr_regno = regno; + } else if (!is_dynptr_reg_valid_init(env, reg, arg_type)) { + const char *err_extra = ""; + + switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { + case DYNPTR_TYPE_LOCAL: + err_extra = "local "; + break; + case DYNPTR_TYPE_RINGBUF: + err_extra = "ringbuf "; + break; + default: + break; + } + + verbose(env, "Expected an initialized %sdynptr as arg #%d\n", + err_extra, arg + 1); + return -EINVAL; + } } else if (arg_type_is_alloc_size(arg_type)) { if (!tnum_is_const(reg->var_off)) { verbose(env, "R%d is not a known constant'\n", @@ -5958,7 +6170,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_RINGBUF: if (func_id != BPF_FUNC_ringbuf_output && func_id != BPF_FUNC_ringbuf_reserve && - func_id != BPF_FUNC_ringbuf_query) + func_id != BPF_FUNC_ringbuf_query && + func_id != BPF_FUNC_ringbuf_reserve_dynptr && + func_id != BPF_FUNC_ringbuf_submit_dynptr && + func_id != BPF_FUNC_ringbuf_discard_dynptr) goto error; break; case BPF_MAP_TYPE_STACK_TRACE: @@ -6074,6 +6289,9 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_FUNC_ringbuf_output: case BPF_FUNC_ringbuf_reserve: case BPF_FUNC_ringbuf_query: + case BPF_FUNC_ringbuf_reserve_dynptr: + case BPF_FUNC_ringbuf_submit_dynptr: + case BPF_FUNC_ringbuf_discard_dynptr: if (map->map_type != BPF_MAP_TYPE_RINGBUF) goto error; break; @@ -6970,9 +7188,27 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs = cur_regs(env); + if (meta.uninit_dynptr_regno) { + /* we write BPF_DW bits (8 bytes) at a time */ + for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) { + err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno, + i, BPF_DW, BPF_WRITE, -1, false); + if (err) + return err; + } + + err = mark_stack_slots_dynptr(env, ®s[meta.uninit_dynptr_regno], + fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1], + insn_idx); + if (err) + return err; + } + if (meta.release_regno) { err = -EINVAL; - if (meta.ref_obj_id) + if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) + err = unmark_stack_slots_dynptr(env, ®s[meta.release_regno]); + else if (meta.ref_obj_id) err = release_reference(env, meta.ref_obj_id); /* meta.ref_obj_id can only be 0 if register that is meant to be * released is NULL, which must be > R0. @@ -7022,6 +7258,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_loop_callback_state); break; + case BPF_FUNC_dynptr_from_mem: + if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) { + verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n", + reg_type_str(env, regs[BPF_REG_1].type)); + return -EACCES; + } } if (err) @@ -7150,6 +7392,21 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].id = id; /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = id; + } else if (func_id == BPF_FUNC_dynptr_data) { + int dynptr_id = 0, i; + + /* Find the id of the dynptr we're acquiring a reference to */ + for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { + if (arg_type_is_dynptr(fn->arg_type[i])) { + if (dynptr_id) { + verbose(env, "verifier internal error: multiple dynptr args in func\n"); + return -EFAULT; + } + dynptr_id = stack_slot_get_id(env, ®s[BPF_REG_1 + i]); + } + } + /* For release_reference() */ + regs[BPF_REG_0].ref_obj_id = dynptr_id; } do_refine_retval_range(regs, fn->ret_type, func_id, &meta); diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index d5452f7eb996..855b937e7585 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -634,6 +634,7 @@ class PrinterHelpers(Printer): 'struct file', 'struct bpf_timer', 'struct mptcp_sock', + 'struct bpf_dynptr', ] known_types = { '...', @@ -684,6 +685,7 @@ class PrinterHelpers(Printer): 'struct file', 'struct bpf_timer', 'struct mptcp_sock', + 'struct bpf_dynptr', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 56688bee20d9..f4009dbdf62d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5178,6 +5178,77 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. * Return * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Get a dynptr to local memory *data*. + * + * *data* must be a ptr to a map value. + * The maximum *size* supported is DYNPTR_MAX_SIZE. + * *flags* is currently unused. + * Return + * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, + * -EINVAL if flags is not 0. + * + * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Reserve *size* bytes of payload in a ring buffer *ringbuf* + * through the dynptr interface. *flags* must be 0. + * + * Please note that a corresponding bpf_ringbuf_submit_dynptr or + * bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the + * reservation fails. This is enforced by the verifier. + * Return + * 0 on success, or a negative error in case of failure. + * + * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Submit reserved ring buffer sample, pointed to by *data*, + * through the dynptr interface. This is a no-op if the dynptr is + * invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_submit'. + * Return + * Nothing. Always succeeds. + * + * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Discard reserved ring buffer sample through the dynptr + * interface. This is a no-op if the dynptr is invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_discard'. + * Return + * Nothing. Always succeeds. + * + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) + * Description + * Read *len* bytes from *src* into *dst*, starting from *offset* + * into *src*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *src*'s data, -EINVAL if *src* is an invalid dynptr. + * + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) + * Description + * Write *len* bytes from *src* into *dst*, starting from *offset* + * into *dst*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* + * is a read-only dynptr. + * + * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) + * Description + * Get a pointer to the underlying dynptr data. + * + * *len* must be a statically known value. The returned data slice + * is invalidated whenever the dynptr is invalidated. + * Return + * Pointer to the underlying dynptr data, NULL if the dynptr is + * read-only, if the dynptr is invalid, or if the offset and length + * is out of bounds. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5377,6 +5448,13 @@ union bpf_attr { FN(kptr_xchg), \ FN(map_lookup_percpu_elem), \ FN(skc_to_mptcp_sock), \ + FN(dynptr_from_mem), \ + FN(ringbuf_reserve_dynptr), \ + FN(ringbuf_submit_dynptr), \ + FN(ringbuf_discard_dynptr), \ + FN(dynptr_read), \ + FN(dynptr_write), \ + FN(dynptr_data), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -6528,6 +6606,11 @@ struct bpf_timer { __u64 :64; } __attribute__((aligned(8))); +struct bpf_dynptr { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c new file mode 100644 index 000000000000..3c7aa82b98e2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Facebook */ + +#include +#include "dynptr_fail.skel.h" +#include "dynptr_success.skel.h" + +static size_t log_buf_sz = 1048576; /* 1 MB */ +static char obj_log_buf[1048576]; + +static struct { + const char *prog_name; + const char *expected_err_msg; +} dynptr_tests[] = { + /* failure cases */ + {"ringbuf_missing_release1", "Unreleased reference id=1"}, + {"ringbuf_missing_release2", "Unreleased reference id=2"}, + {"ringbuf_missing_release_callback", "Unreleased reference id"}, + {"use_after_invalid", "Expected an initialized dynptr as arg #3"}, + {"ringbuf_invalid_api", "type=mem expected=alloc_mem"}, + {"add_dynptr_to_map1", "invalid indirect read from stack"}, + {"add_dynptr_to_map2", "invalid indirect read from stack"}, + {"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"}, + {"data_slice_out_of_bounds_map_value", "value is outside of the allowed memory range"}, + {"data_slice_use_after_release", "invalid mem access 'scalar'"}, + {"data_slice_missing_null_check1", "invalid mem access 'mem_or_null'"}, + {"data_slice_missing_null_check2", "invalid mem access 'mem_or_null'"}, + {"invalid_helper1", "invalid indirect read from stack"}, + {"invalid_helper2", "Expected an initialized dynptr as arg #3"}, + {"invalid_write1", "Expected an initialized dynptr as arg #1"}, + {"invalid_write2", "Expected an initialized dynptr as arg #3"}, + {"invalid_write3", "Expected an initialized ringbuf dynptr as arg #1"}, + {"invalid_write4", "arg 1 is an unacquired reference"}, + {"invalid_read1", "invalid read from stack"}, + {"invalid_read2", "cannot pass in dynptr at an offset"}, + {"invalid_read3", "invalid read from stack"}, + {"invalid_read4", "invalid read from stack"}, + {"invalid_offset", "invalid write to stack"}, + {"global", "type=map_value expected=fp"}, + {"release_twice", "arg 1 is an unacquired reference"}, + {"release_twice_callback", "arg 1 is an unacquired reference"}, + {"dynptr_from_mem_invalid_api", + "Unsupported reg type fp for bpf_dynptr_from_mem data"}, + + /* success cases */ + {"test_read_write", NULL}, + {"test_data_slice", NULL}, + {"test_ringbuf", NULL}, +}; + +static void verify_fail(const char *prog_name, const char *expected_err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct bpf_program *prog; + struct dynptr_fail *skel; + int err; + + opts.kernel_log_buf = obj_log_buf; + opts.kernel_log_size = log_buf_sz; + opts.kernel_log_level = 1; + + skel = dynptr_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "dynptr_fail__open_opts")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + bpf_program__set_autoload(prog, true); + + bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize()); + + err = dynptr_fail__load(skel); + if (!ASSERT_ERR(err, "unexpected load success")) + goto cleanup; + + if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) { + fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg); + fprintf(stderr, "Verifier output: %s\n", obj_log_buf); + } + +cleanup: + dynptr_fail__destroy(skel); +} + +static void verify_success(const char *prog_name) +{ + struct dynptr_success *skel; + struct bpf_program *prog; + struct bpf_link *link; + + skel = dynptr_success__open(); + if (!ASSERT_OK_PTR(skel, "dynptr_success__open")) + return; + + skel->bss->pid = getpid(); + + bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize()); + + dynptr_success__load(skel); + if (!ASSERT_OK_PTR(skel, "dynptr_success__load")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "bpf_program__attach")) + goto cleanup; + + usleep(1); + + ASSERT_EQ(skel->bss->err, 0, "err"); + + bpf_link__destroy(link); + +cleanup: + dynptr_success__destroy(skel); +} + +void test_dynptr(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(dynptr_tests); i++) { + if (!test__start_subtest(dynptr_tests[i].prog_name)) + continue; + + if (dynptr_tests[i].expected_err_msg) + verify_fail(dynptr_tests[i].prog_name, + dynptr_tests[i].expected_err_msg); + else + verify_success(dynptr_tests[i].prog_name); + } +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c new file mode 100644 index 000000000000..d811cff73597 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -0,0 +1,588 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Facebook */ + +#include +#include +#include +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct test_info { + int x; + struct bpf_dynptr ptr; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct bpf_dynptr); +} array_map1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct test_info); +} array_map2 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} array_map3 SEC(".maps"); + +struct sample { + int pid; + long value; + char comm[16]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +int err, val; + +static int get_map_val_dynptr(struct bpf_dynptr *ptr) +{ + __u32 key = 0, *map_val; + + bpf_map_update_elem(&array_map3, &key, &val, 0); + + map_val = bpf_map_lookup_elem(&array_map3, &key); + if (!map_val) + return -ENOENT; + + bpf_dynptr_from_mem(map_val, sizeof(*map_val), 0, ptr); + + return 0; +} + +/* Every bpf_ringbuf_reserve_dynptr call must have a corresponding + * bpf_ringbuf_submit/discard_dynptr call + */ +SEC("?raw_tp/sys_nanosleep") +int ringbuf_missing_release1(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + /* missing a call to bpf_ringbuf_discard/submit_dynptr */ + + return 0; +} + +SEC("?raw_tp/sys_nanosleep") +int ringbuf_missing_release2(void *ctx) +{ + struct bpf_dynptr ptr1, ptr2; + struct sample *sample; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr1); + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr2); + + sample = bpf_dynptr_data(&ptr1, 0, sizeof(*sample)); + if (!sample) { + bpf_ringbuf_discard_dynptr(&ptr1, 0); + bpf_ringbuf_discard_dynptr(&ptr2, 0); + return 0; + } + + bpf_ringbuf_submit_dynptr(&ptr1, 0); + + /* missing a call to bpf_ringbuf_discard/submit_dynptr on ptr2 */ + + return 0; +} + +static int missing_release_callback_fn(__u32 index, void *data) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + /* missing a call to bpf_ringbuf_discard/submit_dynptr */ + + return 0; +} + +/* Any dynptr initialized within a callback must have bpf_dynptr_put called */ +SEC("?raw_tp/sys_nanosleep") +int ringbuf_missing_release_callback(void *ctx) +{ + bpf_loop(10, missing_release_callback_fn, NULL, 0); + return 0; +} + +/* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */ +SEC("?raw_tp/sys_nanosleep") +int ringbuf_release_uninit_dynptr(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +/* A dynptr can't be used after it has been invalidated */ +SEC("?raw_tp/sys_nanosleep") +int use_after_invalid(void *ctx) +{ + struct bpf_dynptr ptr; + char read_data[64]; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(read_data), 0, &ptr); + + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + + return 0; +} + +/* Can't call non-dynptr ringbuf APIs on a dynptr ringbuf sample */ +SEC("?raw_tp/sys_nanosleep") +int ringbuf_invalid_api(void *ctx) +{ + struct bpf_dynptr ptr; + struct sample *sample; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr); + sample = bpf_dynptr_data(&ptr, 0, sizeof(*sample)); + if (!sample) + goto done; + + sample->pid = 123; + + /* invalid API use. need to use dynptr API to submit/discard */ + bpf_ringbuf_submit(sample, 0); + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +/* Can't add a dynptr to a map */ +SEC("?raw_tp/sys_nanosleep") +int add_dynptr_to_map1(void *ctx) +{ + struct bpf_dynptr ptr; + int key = 0; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + /* this should fail */ + bpf_map_update_elem(&array_map1, &key, &ptr, 0); + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +/* Can't add a struct with an embedded dynptr to a map */ +SEC("?raw_tp/sys_nanosleep") +int add_dynptr_to_map2(void *ctx) +{ + struct test_info x; + int key = 0; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &x.ptr); + + /* this should fail */ + bpf_map_update_elem(&array_map2, &key, &x, 0); + + bpf_ringbuf_submit_dynptr(&x.ptr, 0); + + return 0; +} + +/* A data slice can't be accessed out of bounds */ +SEC("?raw_tp/sys_nanosleep") +int data_slice_out_of_bounds_ringbuf(void *ctx) +{ + struct bpf_dynptr ptr; + void *data; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr); + + data = bpf_dynptr_data(&ptr, 0, 8); + if (!data) + goto done; + + /* can't index out of bounds of the data slice */ + val = *((char *)data + 8); + +done: + bpf_ringbuf_submit_dynptr(&ptr, 0); + return 0; +} + +SEC("?raw_tp/sys_nanosleep") +int data_slice_out_of_bounds_map_value(void *ctx) +{ + __u32 key = 0, map_val; + struct bpf_dynptr ptr; + void *data; + + get_map_val_dynptr(&ptr); + + data = bpf_dynptr_data(&ptr, 0, sizeof(map_val)); + if (!data) + return 0; + + /* can't index out of bounds of the data slice */ + val = *((char *)data + (sizeof(map_val) + 1)); + + return 0; +} + +/* A data slice can't be used after it has been released */ +SEC("?raw_tp/sys_nanosleep") +int data_slice_use_after_release(void *ctx) +{ + struct bpf_dynptr ptr; + struct sample *sample; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr); + sample = bpf_dynptr_data(&ptr, 0, sizeof(*sample)); + if (!sample) + goto done; + + sample->pid = 123; + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + /* this should fail */ + val = sample->pid; + + return 0; + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +/* A data slice must be first checked for NULL */ +SEC("?raw_tp/sys_nanosleep") +int data_slice_missing_null_check1(void *ctx) +{ + struct bpf_dynptr ptr; + void *data; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr); + + data = bpf_dynptr_data(&ptr, 0, 8); + + /* missing if (!data) check */ + + /* this should fail */ + *(__u8 *)data = 3; + + bpf_ringbuf_submit_dynptr(&ptr, 0); + return 0; +} + +/* A data slice can't be dereferenced if it wasn't checked for null */ +SEC("?raw_tp/sys_nanosleep") +int data_slice_missing_null_check2(void *ctx) +{ + struct bpf_dynptr ptr; + __u64 *data1, *data2; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr); + + data1 = bpf_dynptr_data(&ptr, 0, 8); + data2 = bpf_dynptr_data(&ptr, 0, 8); + if (data1) + /* this should fail */ + *data2 = 3; + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +/* Can't pass in a dynptr as an arg to a helper function that doesn't take in a + * dynptr argument + */ +SEC("?raw_tp/sys_nanosleep") +int invalid_helper1(void *ctx) +{ + struct bpf_dynptr ptr; + + get_map_val_dynptr(&ptr); + + /* this should fail */ + bpf_strncmp((const char *)&ptr, sizeof(ptr), "hello!"); + + return 0; +} + +/* A dynptr can't be passed into a helper function at a non-zero offset */ +SEC("?raw_tp/sys_nanosleep") +int invalid_helper2(void *ctx) +{ + struct bpf_dynptr ptr; + char read_data[64]; + + get_map_val_dynptr(&ptr); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0); + + return 0; +} + +/* A bpf_dynptr is invalidated if it's been written into */ +SEC("?raw_tp/sys_nanosleep") +int invalid_write1(void *ctx) +{ + struct bpf_dynptr ptr; + void *data; + __u8 x = 0; + + get_map_val_dynptr(&ptr); + + memcpy(&ptr, &x, sizeof(x)); + + /* this should fail */ + data = bpf_dynptr_data(&ptr, 0, 1); + + return 0; +} + +/* + * A bpf_dynptr can't be used as a dynptr if it has been written into at a fixed + * offset + */ +SEC("?raw_tp/sys_nanosleep") +int invalid_write2(void *ctx) +{ + struct bpf_dynptr ptr; + char read_data[64]; + __u8 x = 0; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr); + + memcpy((void *)&ptr + 8, &x, sizeof(x)); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +/* + * A bpf_dynptr can't be used as a dynptr if it has been written into at a + * non-const offset + */ +SEC("?raw_tp/sys_nanosleep") +int invalid_write3(void *ctx) +{ + struct bpf_dynptr ptr; + char stack_buf[16]; + unsigned long len; + __u8 x = 0; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr); + + memcpy(stack_buf, &val, sizeof(val)); + len = stack_buf[0] & 0xf; + + memcpy((void *)&ptr + len, &x, sizeof(x)); + + /* this should fail */ + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +static int invalid_write4_callback(__u32 index, void *data) +{ + *(__u32 *)data = 123; + + return 0; +} + +/* If the dynptr is written into in a callback function, it should + * be invalidated as a dynptr + */ +SEC("?raw_tp/sys_nanosleep") +int invalid_write4(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr); + + bpf_loop(10, invalid_write4_callback, &ptr, 0); + + /* this should fail */ + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +/* A globally-defined bpf_dynptr can't be used (it must reside as a stack frame) */ +struct bpf_dynptr global_dynptr; +SEC("?raw_tp/sys_nanosleep") +int global(void *ctx) +{ + /* this should fail */ + bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &global_dynptr); + + bpf_ringbuf_discard_dynptr(&global_dynptr, 0); + + return 0; +} + +/* A direct read should fail */ +SEC("?raw_tp/sys_nanosleep") +int invalid_read1(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr); + + /* this should fail */ + val = *(int *)&ptr; + + bpf_ringbuf_discard_dynptr(&ptr, 0); + + return 0; +} + +/* A direct read at an offset should fail */ +SEC("?raw_tp/sys_nanosleep") +int invalid_read2(void *ctx) +{ + struct bpf_dynptr ptr; + char read_data[64]; + + get_map_val_dynptr(&ptr); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0); + + return 0; +} + +/* A direct read at an offset into the lower stack slot should fail */ +SEC("?raw_tp/sys_nanosleep") +int invalid_read3(void *ctx) +{ + struct bpf_dynptr ptr1, ptr2; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr1); + bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr2); + + /* this should fail */ + memcpy(&val, (void *)&ptr1 + 8, sizeof(val)); + + bpf_ringbuf_discard_dynptr(&ptr1, 0); + bpf_ringbuf_discard_dynptr(&ptr2, 0); + + return 0; +} + +static int invalid_read4_callback(__u32 index, void *data) +{ + /* this should fail */ + val = *(__u32 *)data; + + return 0; +} + +/* A direct read within a callback function should fail */ +SEC("?raw_tp/sys_nanosleep") +int invalid_read4(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr); + + bpf_loop(10, invalid_read4_callback, &ptr, 0); + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +/* Initializing a dynptr on an offset should fail */ +SEC("?raw_tp/sys_nanosleep") +int invalid_offset(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr + 1); + + bpf_ringbuf_discard_dynptr(&ptr, 0); + + return 0; +} + +/* Can't release a dynptr twice */ +SEC("?raw_tp/sys_nanosleep") +int release_twice(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr); + + bpf_ringbuf_discard_dynptr(&ptr, 0); + + /* this second release should fail */ + bpf_ringbuf_discard_dynptr(&ptr, 0); + + return 0; +} + +static int release_twice_callback_fn(__u32 index, void *data) +{ + /* this should fail */ + bpf_ringbuf_discard_dynptr(data, 0); + + return 0; +} + +/* Test that releasing a dynptr twice, where one of the releases happens + * within a calback function, fails + */ +SEC("?raw_tp/sys_nanosleep") +int release_twice_callback(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 32, 0, &ptr); + + bpf_ringbuf_discard_dynptr(&ptr, 0); + + bpf_loop(10, release_twice_callback_fn, &ptr, 0); + + return 0; +} + +/* Reject unsupported local mem types for dynptr_from_mem API */ +SEC("?raw_tp/sys_nanosleep") +int dynptr_from_mem_invalid_api(void *ctx) +{ + struct bpf_dynptr ptr; + int x = 0; + + /* this should fail */ + bpf_dynptr_from_mem(&x, sizeof(x), 0, &ptr); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c new file mode 100644 index 000000000000..d67be48df4b2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Facebook */ + +#include +#include +#include +#include "bpf_misc.h" +#include "errno.h" + +char _license[] SEC("license") = "GPL"; + +int pid, err, val; + +struct sample { + int pid; + int seq; + long value; + char comm[16]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} array_map SEC(".maps"); + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_read_write(void *ctx) +{ + char write_data[64] = "hello there, world!!"; + char read_data[64] = {}, buf[64] = {}; + struct bpf_dynptr ptr; + int i; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr); + + /* Write data into the dynptr */ + err = err ?: bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data)); + + /* Read the data that was written into the dynptr */ + err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + + /* Ensure the data we read matches the data we wrote */ + for (i = 0; i < sizeof(read_data); i++) { + if (read_data[i] != write_data[i]) { + err = 1; + break; + } + } + + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_data_slice(void *ctx) +{ + __u32 key = 0, val = 235, *map_val; + struct bpf_dynptr ptr; + __u32 map_val_size; + void *data; + + map_val_size = sizeof(*map_val); + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + bpf_map_update_elem(&array_map, &key, &val, 0); + + map_val = bpf_map_lookup_elem(&array_map, &key); + if (!map_val) { + err = 1; + return 0; + } + + bpf_dynptr_from_mem(map_val, map_val_size, 0, &ptr); + + /* Try getting a data slice that is out of range */ + data = bpf_dynptr_data(&ptr, map_val_size + 1, 1); + if (data) { + err = 2; + return 0; + } + + /* Try getting more bytes than available */ + data = bpf_dynptr_data(&ptr, 0, map_val_size + 1); + if (data) { + err = 3; + return 0; + } + + data = bpf_dynptr_data(&ptr, 0, sizeof(__u32)); + if (!data) { + err = 4; + return 0; + } + + *(__u32 *)data = 999; + + err = bpf_probe_read_kernel(&val, sizeof(val), data); + if (err) + return 0; + + if (val != *(int *)data) + err = 5; + + return 0; +} + +static int ringbuf_callback(__u32 index, void *data) +{ + struct sample *sample; + + struct bpf_dynptr *ptr = (struct bpf_dynptr *)data; + + sample = bpf_dynptr_data(ptr, 0, sizeof(*sample)); + if (!sample) + err = 2; + else + sample->pid += index; + + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_ringbuf(void *ctx) +{ + struct bpf_dynptr ptr; + struct sample *sample; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + val = 100; + + /* check that you can reserve a dynamic size reservation */ + err = bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + sample = err ? NULL : bpf_dynptr_data(&ptr, 0, sizeof(*sample)); + if (!sample) { + err = 1; + goto done; + } + + sample->pid = 10; + + /* Can pass dynptr to callback functions */ + bpf_loop(10, ringbuf_callback, &ptr, 0); + + if (sample->pid != 55) + err = 2; + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +}