mirror of
https://github.com/torvalds/linux.git
synced 2024-11-25 13:41:51 +00:00
Merge branch 'Sleepable local storage'
KP Singh says: ==================== Local storage is currently unusable in sleepable helpers. One of the important use cases of local_storage is to attach security (or performance) contextual information to kernel objects in LSM / tracing programs to be used later in the life-cyle of the object. Sometimes this context can only be gathered from sleepable programs (because it needs accesing __user pointers or helpers like bpf_ima_inode_hash). Allowing local storage to be used from sleepable programs allows such context to be managed with the benefits of local_storage. # v2 -> v3 * Fixed some RCU issues pointed by Martin * Added Martin's ack # v1 -> v2 * Generalize RCU checks (will send a separate patch for updating non local storage code where this can be used). * Add missing RCU lock checks from v1 ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
commit
1705c62e30
@ -17,6 +17,9 @@
|
||||
|
||||
#define BPF_LOCAL_STORAGE_CACHE_SIZE 16
|
||||
|
||||
#define bpf_rcu_lock_held() \
|
||||
(rcu_read_lock_held() || rcu_read_lock_trace_held() || \
|
||||
rcu_read_lock_bh_held())
|
||||
struct bpf_local_storage_map_bucket {
|
||||
struct hlist_head list;
|
||||
raw_spinlock_t lock;
|
||||
@ -162,4 +165,6 @@ struct bpf_local_storage_data *
|
||||
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
void *value, u64 map_flags);
|
||||
|
||||
void bpf_local_storage_free_rcu(struct rcu_head *rcu);
|
||||
|
||||
#endif /* _BPF_LOCAL_STORAGE_H */
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <linux/bpf_lsm.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/rcupdate_trace.h>
|
||||
|
||||
DEFINE_BPF_STORAGE_CACHE(inode_cache);
|
||||
|
||||
@ -44,7 +45,8 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode,
|
||||
if (!bsb)
|
||||
return NULL;
|
||||
|
||||
inode_storage = rcu_dereference(bsb->storage);
|
||||
inode_storage =
|
||||
rcu_dereference_check(bsb->storage, bpf_rcu_lock_held());
|
||||
if (!inode_storage)
|
||||
return NULL;
|
||||
|
||||
@ -172,6 +174,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
|
||||
return (unsigned long)NULL;
|
||||
|
||||
@ -204,6 +207,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
|
||||
BPF_CALL_2(bpf_inode_storage_delete,
|
||||
struct bpf_map *, map, struct inode *, inode)
|
||||
{
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (!inode)
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -11,6 +11,9 @@
|
||||
#include <net/sock.h>
|
||||
#include <uapi/linux/sock_diag.h>
|
||||
#include <uapi/linux/btf.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/rcupdate_trace.h>
|
||||
#include <linux/rcupdate_wait.h>
|
||||
|
||||
#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE)
|
||||
|
||||
@ -81,6 +84,22 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void bpf_local_storage_free_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_local_storage *local_storage;
|
||||
|
||||
local_storage = container_of(rcu, struct bpf_local_storage, rcu);
|
||||
kfree_rcu(local_storage, rcu);
|
||||
}
|
||||
|
||||
static void bpf_selem_free_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_local_storage_elem *selem;
|
||||
|
||||
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
|
||||
kfree_rcu(selem, rcu);
|
||||
}
|
||||
|
||||
/* local_storage->lock must be held and selem->local_storage == local_storage.
|
||||
* The caller must ensure selem->smap is still valid to be
|
||||
* dereferenced for its smap->elem_size and smap->cache_idx.
|
||||
@ -93,7 +112,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
|
||||
bool free_local_storage;
|
||||
void *owner;
|
||||
|
||||
smap = rcu_dereference(SDATA(selem)->smap);
|
||||
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
|
||||
owner = local_storage->owner;
|
||||
|
||||
/* All uncharging on the owner must be done first.
|
||||
@ -118,12 +137,12 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
|
||||
*
|
||||
* Although the unlock will be done under
|
||||
* rcu_read_lock(), it is more intutivie to
|
||||
* read if kfree_rcu(local_storage, rcu) is done
|
||||
* read if the freeing of the storage is done
|
||||
* after the raw_spin_unlock_bh(&local_storage->lock).
|
||||
*
|
||||
* Hence, a "bool free_local_storage" is returned
|
||||
* to the caller which then calls the kfree_rcu()
|
||||
* after unlock.
|
||||
* to the caller which then calls then frees the storage after
|
||||
* all the RCU grace periods have expired.
|
||||
*/
|
||||
}
|
||||
hlist_del_init_rcu(&selem->snode);
|
||||
@ -131,8 +150,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
|
||||
SDATA(selem))
|
||||
RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
|
||||
|
||||
kfree_rcu(selem, rcu);
|
||||
|
||||
call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu);
|
||||
return free_local_storage;
|
||||
}
|
||||
|
||||
@ -146,7 +164,8 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
|
||||
/* selem has already been unlinked from sk */
|
||||
return;
|
||||
|
||||
local_storage = rcu_dereference(selem->local_storage);
|
||||
local_storage = rcu_dereference_check(selem->local_storage,
|
||||
bpf_rcu_lock_held());
|
||||
raw_spin_lock_irqsave(&local_storage->lock, flags);
|
||||
if (likely(selem_linked_to_storage(selem)))
|
||||
free_local_storage = bpf_selem_unlink_storage_nolock(
|
||||
@ -154,7 +173,8 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
|
||||
if (free_local_storage)
|
||||
kfree_rcu(local_storage, rcu);
|
||||
call_rcu_tasks_trace(&local_storage->rcu,
|
||||
bpf_local_storage_free_rcu);
|
||||
}
|
||||
|
||||
void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
|
||||
@ -174,7 +194,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
|
||||
/* selem has already be unlinked from smap */
|
||||
return;
|
||||
|
||||
smap = rcu_dereference(SDATA(selem)->smap);
|
||||
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
|
||||
b = select_bucket(smap, selem);
|
||||
raw_spin_lock_irqsave(&b->lock, flags);
|
||||
if (likely(selem_linked_to_map(selem)))
|
||||
@ -213,12 +233,14 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
|
||||
struct bpf_local_storage_elem *selem;
|
||||
|
||||
/* Fast path (cache hit) */
|
||||
sdata = rcu_dereference(local_storage->cache[smap->cache_idx]);
|
||||
sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx],
|
||||
bpf_rcu_lock_held());
|
||||
if (sdata && rcu_access_pointer(sdata->smap) == smap)
|
||||
return sdata;
|
||||
|
||||
/* Slow path (cache miss) */
|
||||
hlist_for_each_entry_rcu(selem, &local_storage->list, snode)
|
||||
hlist_for_each_entry_rcu(selem, &local_storage->list, snode,
|
||||
rcu_read_lock_trace_held())
|
||||
if (rcu_access_pointer(SDATA(selem)->smap) == smap)
|
||||
break;
|
||||
|
||||
@ -306,7 +328,8 @@ int bpf_local_storage_alloc(void *owner,
|
||||
* bucket->list, first_selem can be freed immediately
|
||||
* (instead of kfree_rcu) because
|
||||
* bpf_local_storage_map_free() does a
|
||||
* synchronize_rcu() before walking the bucket->list.
|
||||
* synchronize_rcu_mult (waiting for both sleepable and
|
||||
* normal programs) before walking the bucket->list.
|
||||
* Hence, no one is accessing selem from the
|
||||
* bucket->list under rcu_read_lock().
|
||||
*/
|
||||
@ -342,7 +365,8 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
!map_value_has_spin_lock(&smap->map)))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
local_storage = rcu_dereference(*owner_storage(smap, owner));
|
||||
local_storage = rcu_dereference_check(*owner_storage(smap, owner),
|
||||
bpf_rcu_lock_held());
|
||||
if (!local_storage || hlist_empty(&local_storage->list)) {
|
||||
/* Very first elem for the owner */
|
||||
err = check_flags(NULL, map_flags);
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <uapi/linux/btf.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/rcupdate_trace.h>
|
||||
|
||||
DEFINE_BPF_STORAGE_CACHE(task_cache);
|
||||
|
||||
@ -59,7 +60,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
|
||||
struct bpf_local_storage *task_storage;
|
||||
struct bpf_local_storage_map *smap;
|
||||
|
||||
task_storage = rcu_dereference(task->bpf_storage);
|
||||
task_storage =
|
||||
rcu_dereference_check(task->bpf_storage, bpf_rcu_lock_held());
|
||||
if (!task_storage)
|
||||
return NULL;
|
||||
|
||||
@ -229,6 +231,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
|
||||
return (unsigned long)NULL;
|
||||
|
||||
@ -260,6 +263,7 @@ BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
|
||||
{
|
||||
int ret;
|
||||
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (!task)
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -11874,6 +11874,9 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
|
||||
}
|
||||
break;
|
||||
case BPF_MAP_TYPE_RINGBUF:
|
||||
case BPF_MAP_TYPE_INODE_STORAGE:
|
||||
case BPF_MAP_TYPE_SK_STORAGE:
|
||||
case BPF_MAP_TYPE_TASK_STORAGE:
|
||||
break;
|
||||
default:
|
||||
verbose(env,
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <net/sock.h>
|
||||
#include <uapi/linux/sock_diag.h>
|
||||
#include <uapi/linux/btf.h>
|
||||
#include <linux/rcupdate_trace.h>
|
||||
|
||||
DEFINE_BPF_STORAGE_CACHE(sk_cache);
|
||||
|
||||
@ -22,7 +23,8 @@ bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit)
|
||||
struct bpf_local_storage *sk_storage;
|
||||
struct bpf_local_storage_map *smap;
|
||||
|
||||
sk_storage = rcu_dereference(sk->sk_bpf_storage);
|
||||
sk_storage =
|
||||
rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held());
|
||||
if (!sk_storage)
|
||||
return NULL;
|
||||
|
||||
@ -258,6 +260,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE)
|
||||
return (unsigned long)NULL;
|
||||
|
||||
@ -288,6 +291,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
|
||||
|
||||
BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
|
||||
{
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (!sk || !sk_fullsock(sk))
|
||||
return -EINVAL;
|
||||
|
||||
@ -416,6 +420,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
|
||||
BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
|
||||
void *, value, u64, flags)
|
||||
{
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (in_hardirq() || in_nmi())
|
||||
return (unsigned long)NULL;
|
||||
|
||||
@ -425,6 +430,7 @@ BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
|
||||
BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
|
||||
struct sock *, sk)
|
||||
{
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (in_hardirq() || in_nmi())
|
||||
return -EPERM;
|
||||
|
||||
|
@ -28,10 +28,6 @@ static unsigned int duration;
|
||||
struct storage {
|
||||
void *inode;
|
||||
unsigned int value;
|
||||
/* Lock ensures that spin locked versions of local stoage operations
|
||||
* also work, most operations in this tests are still single threaded
|
||||
*/
|
||||
struct bpf_spin_lock lock;
|
||||
};
|
||||
|
||||
/* Fork and exec the provided rm binary and return the exit code of the
|
||||
@ -66,27 +62,24 @@ static int run_self_unlink(int *monitored_pid, const char *rm_path)
|
||||
|
||||
static bool check_syscall_operations(int map_fd, int obj_fd)
|
||||
{
|
||||
struct storage val = { .value = TEST_STORAGE_VALUE, .lock = { 0 } },
|
||||
lookup_val = { .value = 0, .lock = { 0 } };
|
||||
struct storage val = { .value = TEST_STORAGE_VALUE },
|
||||
lookup_val = { .value = 0 };
|
||||
int err;
|
||||
|
||||
/* Looking up an existing element should fail initially */
|
||||
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
|
||||
BPF_F_LOCK);
|
||||
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
|
||||
if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
|
||||
"err:%d errno:%d\n", err, errno))
|
||||
return false;
|
||||
|
||||
/* Create a new element */
|
||||
err = bpf_map_update_elem(map_fd, &obj_fd, &val,
|
||||
BPF_NOEXIST | BPF_F_LOCK);
|
||||
err = bpf_map_update_elem(map_fd, &obj_fd, &val, BPF_NOEXIST);
|
||||
if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err,
|
||||
errno))
|
||||
return false;
|
||||
|
||||
/* Lookup the newly created element */
|
||||
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
|
||||
BPF_F_LOCK);
|
||||
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
|
||||
if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err,
|
||||
errno))
|
||||
return false;
|
||||
@ -102,8 +95,7 @@ static bool check_syscall_operations(int map_fd, int obj_fd)
|
||||
return false;
|
||||
|
||||
/* The lookup should fail, now that the element has been deleted */
|
||||
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
|
||||
BPF_F_LOCK);
|
||||
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
|
||||
if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
|
||||
"err:%d errno:%d\n", err, errno))
|
||||
return false;
|
||||
|
@ -20,7 +20,6 @@ int sk_storage_result = -1;
|
||||
struct local_storage {
|
||||
struct inode *exec_inode;
|
||||
__u32 value;
|
||||
struct bpf_spin_lock lock;
|
||||
};
|
||||
|
||||
struct {
|
||||
@ -58,9 +57,7 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
|
||||
bpf_get_current_task_btf(), 0, 0);
|
||||
if (storage) {
|
||||
/* Don't let an executable delete itself */
|
||||
bpf_spin_lock(&storage->lock);
|
||||
is_self_unlink = storage->exec_inode == victim->d_inode;
|
||||
bpf_spin_unlock(&storage->lock);
|
||||
if (is_self_unlink)
|
||||
return -EPERM;
|
||||
}
|
||||
@ -68,7 +65,7 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("lsm/inode_rename")
|
||||
SEC("lsm.s/inode_rename")
|
||||
int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
|
||||
struct inode *new_dir, struct dentry *new_dentry,
|
||||
unsigned int flags)
|
||||
@ -89,10 +86,8 @@ int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
|
||||
if (!storage)
|
||||
return 0;
|
||||
|
||||
bpf_spin_lock(&storage->lock);
|
||||
if (storage->value != DUMMY_STORAGE_VALUE)
|
||||
inode_storage_result = -1;
|
||||
bpf_spin_unlock(&storage->lock);
|
||||
|
||||
err = bpf_inode_storage_delete(&inode_storage_map, old_dentry->d_inode);
|
||||
if (!err)
|
||||
@ -101,7 +96,7 @@ int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("lsm/socket_bind")
|
||||
SEC("lsm.s/socket_bind")
|
||||
int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
|
||||
int addrlen)
|
||||
{
|
||||
@ -117,10 +112,8 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
|
||||
if (!storage)
|
||||
return 0;
|
||||
|
||||
bpf_spin_lock(&storage->lock);
|
||||
if (storage->value != DUMMY_STORAGE_VALUE)
|
||||
sk_storage_result = -1;
|
||||
bpf_spin_unlock(&storage->lock);
|
||||
|
||||
err = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
|
||||
if (!err)
|
||||
@ -129,7 +122,7 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("lsm/socket_post_create")
|
||||
SEC("lsm.s/socket_post_create")
|
||||
int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
|
||||
int protocol, int kern)
|
||||
{
|
||||
@ -144,9 +137,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
|
||||
if (!storage)
|
||||
return 0;
|
||||
|
||||
bpf_spin_lock(&storage->lock);
|
||||
storage->value = DUMMY_STORAGE_VALUE;
|
||||
bpf_spin_unlock(&storage->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -154,7 +145,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
|
||||
/* This uses the local storage to remember the inode of the binary that a
|
||||
* process was originally executing.
|
||||
*/
|
||||
SEC("lsm/bprm_committed_creds")
|
||||
SEC("lsm.s/bprm_committed_creds")
|
||||
void BPF_PROG(exec, struct linux_binprm *bprm)
|
||||
{
|
||||
__u32 pid = bpf_get_current_pid_tgid() >> 32;
|
||||
@ -166,18 +157,13 @@ void BPF_PROG(exec, struct linux_binprm *bprm)
|
||||
storage = bpf_task_storage_get(&task_storage_map,
|
||||
bpf_get_current_task_btf(), 0,
|
||||
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (storage) {
|
||||
bpf_spin_lock(&storage->lock);
|
||||
if (storage)
|
||||
storage->exec_inode = bprm->file->f_inode;
|
||||
bpf_spin_unlock(&storage->lock);
|
||||
}
|
||||
|
||||
storage = bpf_inode_storage_get(&inode_storage_map, bprm->file->f_inode,
|
||||
0, BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (!storage)
|
||||
return;
|
||||
|
||||
bpf_spin_lock(&storage->lock);
|
||||
storage->value = DUMMY_STORAGE_VALUE;
|
||||
bpf_spin_unlock(&storage->lock);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user