forked from Minki/linux
bpf: Enable non-atomic allocations in local storage
Currently, local storage memory can only be allocated atomically (GFP_ATOMIC). This restriction is too strict for sleepable bpf programs. In this patch, the verifier detects whether the program is sleepable, and passes the corresponding GFP_KERNEL or GFP_ATOMIC flag as a 5th argument to bpf_task/sk/inode_storage_get. This flag will propagate down to the local storage functions that allocate memory. Please note that bpf_task/sk/inode_storage_update_elem functions are invoked by userspace applications through syscalls. Preemption is disabled before bpf_task/sk/inode_storage_update_elem is called, which means they will always have to allocate memory atomically. Signed-off-by: Joanne Koong <joannelkoong@gmail.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: KP Singh <kpsingh@kernel.org> Acked-by: Martin KaFai Lau <kafai@fb.com> Link: https://lore.kernel.org/bpf/20220318045553.3091807-2-joannekoong@fb.com
This commit is contained in:
parent
a8fee96202
commit
b00fa38a9c
@ -154,16 +154,17 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);
|
||||
|
||||
struct bpf_local_storage_elem *
|
||||
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
|
||||
bool charge_mem);
|
||||
bool charge_mem, gfp_t gfp_flags);
|
||||
|
||||
int
|
||||
bpf_local_storage_alloc(void *owner,
|
||||
struct bpf_local_storage_map *smap,
|
||||
struct bpf_local_storage_elem *first_selem);
|
||||
struct bpf_local_storage_elem *first_selem,
|
||||
gfp_t gfp_flags);
|
||||
|
||||
struct bpf_local_storage_data *
|
||||
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
void *value, u64 map_flags);
|
||||
void *value, u64 map_flags, gfp_t gfp_flags);
|
||||
|
||||
void bpf_local_storage_free_rcu(struct rcu_head *rcu);
|
||||
|
||||
|
@ -136,7 +136,7 @@ static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
|
||||
|
||||
sdata = bpf_local_storage_update(f->f_inode,
|
||||
(struct bpf_local_storage_map *)map,
|
||||
value, map_flags);
|
||||
value, map_flags, GFP_ATOMIC);
|
||||
fput(f);
|
||||
return PTR_ERR_OR_ZERO(sdata);
|
||||
}
|
||||
@ -169,8 +169,9 @@ static int bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
|
||||
return err;
|
||||
}
|
||||
|
||||
BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
|
||||
void *, value, u64, flags)
|
||||
/* *gfp_flags* is a hidden argument provided by the verifier */
|
||||
BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
|
||||
void *, value, u64, flags, gfp_t, gfp_flags)
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
|
||||
@ -196,7 +197,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
|
||||
if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
|
||||
sdata = bpf_local_storage_update(
|
||||
inode, (struct bpf_local_storage_map *)map, value,
|
||||
BPF_NOEXIST);
|
||||
BPF_NOEXIST, gfp_flags);
|
||||
return IS_ERR(sdata) ? (unsigned long)NULL :
|
||||
(unsigned long)sdata->data;
|
||||
}
|
||||
|
@ -63,7 +63,7 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
|
||||
|
||||
struct bpf_local_storage_elem *
|
||||
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
|
||||
void *value, bool charge_mem)
|
||||
void *value, bool charge_mem, gfp_t gfp_flags)
|
||||
{
|
||||
struct bpf_local_storage_elem *selem;
|
||||
|
||||
@ -71,7 +71,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
|
||||
return NULL;
|
||||
|
||||
selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
|
||||
GFP_ATOMIC | __GFP_NOWARN);
|
||||
gfp_flags | __GFP_NOWARN);
|
||||
if (selem) {
|
||||
if (value)
|
||||
memcpy(SDATA(selem)->data, value, smap->map.value_size);
|
||||
@ -282,7 +282,8 @@ static int check_flags(const struct bpf_local_storage_data *old_sdata,
|
||||
|
||||
int bpf_local_storage_alloc(void *owner,
|
||||
struct bpf_local_storage_map *smap,
|
||||
struct bpf_local_storage_elem *first_selem)
|
||||
struct bpf_local_storage_elem *first_selem,
|
||||
gfp_t gfp_flags)
|
||||
{
|
||||
struct bpf_local_storage *prev_storage, *storage;
|
||||
struct bpf_local_storage **owner_storage_ptr;
|
||||
@ -293,7 +294,7 @@ int bpf_local_storage_alloc(void *owner,
|
||||
return err;
|
||||
|
||||
storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
|
||||
GFP_ATOMIC | __GFP_NOWARN);
|
||||
gfp_flags | __GFP_NOWARN);
|
||||
if (!storage) {
|
||||
err = -ENOMEM;
|
||||
goto uncharge;
|
||||
@ -350,10 +351,10 @@ uncharge:
|
||||
*/
|
||||
struct bpf_local_storage_data *
|
||||
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
void *value, u64 map_flags)
|
||||
void *value, u64 map_flags, gfp_t gfp_flags)
|
||||
{
|
||||
struct bpf_local_storage_data *old_sdata = NULL;
|
||||
struct bpf_local_storage_elem *selem;
|
||||
struct bpf_local_storage_elem *selem = NULL;
|
||||
struct bpf_local_storage *local_storage;
|
||||
unsigned long flags;
|
||||
int err;
|
||||
@ -365,6 +366,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
!map_value_has_spin_lock(&smap->map)))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
local_storage = rcu_dereference_check(*owner_storage(smap, owner),
|
||||
bpf_rcu_lock_held());
|
||||
if (!local_storage || hlist_empty(&local_storage->list)) {
|
||||
@ -373,11 +377,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
selem = bpf_selem_alloc(smap, owner, value, true);
|
||||
selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
|
||||
if (!selem)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
err = bpf_local_storage_alloc(owner, smap, selem);
|
||||
err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags);
|
||||
if (err) {
|
||||
kfree(selem);
|
||||
mem_uncharge(smap, owner, smap->elem_size);
|
||||
@ -404,6 +408,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
}
|
||||
}
|
||||
|
||||
if (gfp_flags == GFP_KERNEL) {
|
||||
selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
|
||||
if (!selem)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
raw_spin_lock_irqsave(&local_storage->lock, flags);
|
||||
|
||||
/* Recheck local_storage->list under local_storage->lock */
|
||||
@ -429,19 +439,21 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* local_storage->lock is held. Hence, we are sure
|
||||
* we can unlink and uncharge the old_sdata successfully
|
||||
* later. Hence, instead of charging the new selem now
|
||||
* and then uncharge the old selem later (which may cause
|
||||
* a potential but unnecessary charge failure), avoid taking
|
||||
* a charge at all here (the "!old_sdata" check) and the
|
||||
* old_sdata will not be uncharged later during
|
||||
* bpf_selem_unlink_storage_nolock().
|
||||
*/
|
||||
selem = bpf_selem_alloc(smap, owner, value, !old_sdata);
|
||||
if (!selem) {
|
||||
err = -ENOMEM;
|
||||
goto unlock_err;
|
||||
if (gfp_flags != GFP_KERNEL) {
|
||||
/* local_storage->lock is held. Hence, we are sure
|
||||
* we can unlink and uncharge the old_sdata successfully
|
||||
* later. Hence, instead of charging the new selem now
|
||||
* and then uncharge the old selem later (which may cause
|
||||
* a potential but unnecessary charge failure), avoid taking
|
||||
* a charge at all here (the "!old_sdata" check) and the
|
||||
* old_sdata will not be uncharged later during
|
||||
* bpf_selem_unlink_storage_nolock().
|
||||
*/
|
||||
selem = bpf_selem_alloc(smap, owner, value, !old_sdata, gfp_flags);
|
||||
if (!selem) {
|
||||
err = -ENOMEM;
|
||||
goto unlock_err;
|
||||
}
|
||||
}
|
||||
|
||||
/* First, link the new selem to the map */
|
||||
@ -463,6 +475,10 @@ unlock:
|
||||
|
||||
unlock_err:
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
if (selem) {
|
||||
mem_uncharge(smap, owner, smap->elem_size);
|
||||
kfree(selem);
|
||||
}
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
|
@ -174,7 +174,8 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
|
||||
|
||||
bpf_task_storage_lock();
|
||||
sdata = bpf_local_storage_update(
|
||||
task, (struct bpf_local_storage_map *)map, value, map_flags);
|
||||
task, (struct bpf_local_storage_map *)map, value, map_flags,
|
||||
GFP_ATOMIC);
|
||||
bpf_task_storage_unlock();
|
||||
|
||||
err = PTR_ERR_OR_ZERO(sdata);
|
||||
@ -226,8 +227,9 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
|
||||
task, void *, value, u64, flags)
|
||||
/* *gfp_flags* is a hidden argument provided by the verifier */
|
||||
BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
|
||||
task, void *, value, u64, flags, gfp_t, gfp_flags)
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
|
||||
@ -250,7 +252,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
|
||||
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
|
||||
sdata = bpf_local_storage_update(
|
||||
task, (struct bpf_local_storage_map *)map, value,
|
||||
BPF_NOEXIST);
|
||||
BPF_NOEXIST, gfp_flags);
|
||||
|
||||
unlock:
|
||||
bpf_task_storage_unlock();
|
||||
|
@ -13492,6 +13492,26 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
|
||||
goto patch_call_imm;
|
||||
}
|
||||
|
||||
if (insn->imm == BPF_FUNC_task_storage_get ||
|
||||
insn->imm == BPF_FUNC_sk_storage_get ||
|
||||
insn->imm == BPF_FUNC_inode_storage_get) {
|
||||
if (env->prog->aux->sleepable)
|
||||
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__s32)GFP_KERNEL);
|
||||
else
|
||||
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__s32)GFP_ATOMIC);
|
||||
insn_buf[1] = *insn;
|
||||
cnt = 2;
|
||||
|
||||
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
|
||||
if (!new_prog)
|
||||
return -ENOMEM;
|
||||
|
||||
delta += cnt - 1;
|
||||
env->prog = prog = new_prog;
|
||||
insn = new_prog->insnsi + i + delta;
|
||||
goto patch_call_imm;
|
||||
}
|
||||
|
||||
/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
|
||||
* and other inlining handlers are currently limited to 64 bit
|
||||
* only.
|
||||
|
@ -141,7 +141,7 @@ static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
|
||||
if (sock) {
|
||||
sdata = bpf_local_storage_update(
|
||||
sock->sk, (struct bpf_local_storage_map *)map, value,
|
||||
map_flags);
|
||||
map_flags, GFP_ATOMIC);
|
||||
sockfd_put(sock);
|
||||
return PTR_ERR_OR_ZERO(sdata);
|
||||
}
|
||||
@ -172,7 +172,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
|
||||
{
|
||||
struct bpf_local_storage_elem *copy_selem;
|
||||
|
||||
copy_selem = bpf_selem_alloc(smap, newsk, NULL, true);
|
||||
copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC);
|
||||
if (!copy_selem)
|
||||
return NULL;
|
||||
|
||||
@ -230,7 +230,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
|
||||
bpf_selem_link_map(smap, copy_selem);
|
||||
bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
|
||||
} else {
|
||||
ret = bpf_local_storage_alloc(newsk, smap, copy_selem);
|
||||
ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
|
||||
if (ret) {
|
||||
kfree(copy_selem);
|
||||
atomic_sub(smap->elem_size,
|
||||
@ -255,8 +255,9 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
|
||||
void *, value, u64, flags)
|
||||
/* *gfp_flags* is a hidden argument provided by the verifier */
|
||||
BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
|
||||
void *, value, u64, flags, gfp_t, gfp_flags)
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
|
||||
@ -277,7 +278,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
|
||||
refcount_inc_not_zero(&sk->sk_refcnt)) {
|
||||
sdata = bpf_local_storage_update(
|
||||
sk, (struct bpf_local_storage_map *)map, value,
|
||||
BPF_NOEXIST);
|
||||
BPF_NOEXIST, gfp_flags);
|
||||
/* sk must be a fullsock (guaranteed by verifier),
|
||||
* so sock_gen_put() is unnecessary.
|
||||
*/
|
||||
@ -417,14 +418,16 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
|
||||
return false;
|
||||
}
|
||||
|
||||
BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
|
||||
void *, value, u64, flags)
|
||||
/* *gfp_flags* is a hidden argument provided by the verifier */
|
||||
BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
|
||||
void *, value, u64, flags, gfp_t, gfp_flags)
|
||||
{
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (in_hardirq() || in_nmi())
|
||||
return (unsigned long)NULL;
|
||||
|
||||
return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags);
|
||||
return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags,
|
||||
gfp_flags);
|
||||
}
|
||||
|
||||
BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
|
||||
|
Loading…
Reference in New Issue
Block a user