diff options
author | Alexei Starovoitov <ast@kernel.org> | 2021-12-29 17:54:41 -0800 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2021-12-29 17:54:41 -0800 |
commit | 1705c62e300550e8cfac0d9d1c3869fb860f3162 (patch) | |
tree | 7f6f598f052e2dd7c07e9aae27c455e061fb06e2 /kernel | |
parent | 3ccdcee28415c4226de05438b4d89eb5514edf73 (diff) | |
parent | 0ae6eff2978ee118ce2b536090af0682db13bb83 (diff) |
Merge branch 'Sleepable local storage'
KP Singh says:
====================
Local storage is currently unusable in sleepable helpers. One of the
important use cases of local_storage is to attach security (or
performance) contextual information to kernel objects in LSM / tracing
programs to be used later in the life-cyle of the object.
Sometimes this context can only be gathered from sleepable programs
(because it needs accesing __user pointers or helpers like
bpf_ima_inode_hash). Allowing local storage to be used from sleepable
programs allows such context to be managed with the benefits of
local_storage.
# v2 -> v3
* Fixed some RCU issues pointed by Martin
* Added Martin's ack
# v1 -> v2
* Generalize RCU checks (will send a separate patch for updating
non local storage code where this can be used).
* Add missing RCU lock checks from v1
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/bpf_inode_storage.c | 6 | ||||
-rw-r--r-- | kernel/bpf/bpf_local_storage.c | 50 | ||||
-rw-r--r-- | kernel/bpf/bpf_task_storage.c | 6 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 3 |
4 files changed, 50 insertions, 15 deletions
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 96ceed0e0fb5..e29d9e3d853e 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -17,6 +17,7 @@ #include <linux/bpf_lsm.h> #include <linux/btf_ids.h> #include <linux/fdtable.h> +#include <linux/rcupdate_trace.h> DEFINE_BPF_STORAGE_CACHE(inode_cache); @@ -44,7 +45,8 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode, if (!bsb) return NULL; - inode_storage = rcu_dereference(bsb->storage); + inode_storage = + rcu_dereference_check(bsb->storage, bpf_rcu_lock_held()); if (!inode_storage) return NULL; @@ -172,6 +174,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, { struct bpf_local_storage_data *sdata; + WARN_ON_ONCE(!bpf_rcu_lock_held()); if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) return (unsigned long)NULL; @@ -204,6 +207,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, BPF_CALL_2(bpf_inode_storage_delete, struct bpf_map *, map, struct inode *, inode) { + WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!inode) return -EINVAL; diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index b305270b7a4b..71de2a89869c 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -11,6 +11,9 @@ #include <net/sock.h> #include <uapi/linux/sock_diag.h> #include <uapi/linux/btf.h> +#include <linux/rcupdate.h> +#include <linux/rcupdate_trace.h> +#include <linux/rcupdate_wait.h> #define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE) @@ -81,6 +84,22 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, return NULL; } +void bpf_local_storage_free_rcu(struct rcu_head *rcu) +{ + struct bpf_local_storage *local_storage; + + local_storage = container_of(rcu, struct bpf_local_storage, rcu); + kfree_rcu(local_storage, rcu); +} + +static void bpf_selem_free_rcu(struct rcu_head *rcu) +{ + struct bpf_local_storage_elem *selem; + + selem = container_of(rcu, struct bpf_local_storage_elem, rcu); + kfree_rcu(selem, rcu); +} + /* local_storage->lock must be held and selem->local_storage == local_storage. * The caller must ensure selem->smap is still valid to be * dereferenced for its smap->elem_size and smap->cache_idx. @@ -93,7 +112,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, bool free_local_storage; void *owner; - smap = rcu_dereference(SDATA(selem)->smap); + smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); owner = local_storage->owner; /* All uncharging on the owner must be done first. @@ -118,12 +137,12 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, * * Although the unlock will be done under * rcu_read_lock(), it is more intutivie to - * read if kfree_rcu(local_storage, rcu) is done + * read if the freeing of the storage is done * after the raw_spin_unlock_bh(&local_storage->lock). * * Hence, a "bool free_local_storage" is returned - * to the caller which then calls the kfree_rcu() - * after unlock. + * to the caller which then calls then frees the storage after + * all the RCU grace periods have expired. */ } hlist_del_init_rcu(&selem->snode); @@ -131,8 +150,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, SDATA(selem)) RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); - kfree_rcu(selem, rcu); - + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu); return free_local_storage; } @@ -146,7 +164,8 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) /* selem has already been unlinked from sk */ return; - local_storage = rcu_dereference(selem->local_storage); + local_storage = rcu_dereference_check(selem->local_storage, + bpf_rcu_lock_held()); raw_spin_lock_irqsave(&local_storage->lock, flags); if (likely(selem_linked_to_storage(selem))) free_local_storage = bpf_selem_unlink_storage_nolock( @@ -154,7 +173,8 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) raw_spin_unlock_irqrestore(&local_storage->lock, flags); if (free_local_storage) - kfree_rcu(local_storage, rcu); + call_rcu_tasks_trace(&local_storage->rcu, + bpf_local_storage_free_rcu); } void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, @@ -174,7 +194,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) /* selem has already be unlinked from smap */ return; - smap = rcu_dereference(SDATA(selem)->smap); + smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); b = select_bucket(smap, selem); raw_spin_lock_irqsave(&b->lock, flags); if (likely(selem_linked_to_map(selem))) @@ -213,12 +233,14 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem; /* Fast path (cache hit) */ - sdata = rcu_dereference(local_storage->cache[smap->cache_idx]); + sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx], + bpf_rcu_lock_held()); if (sdata && rcu_access_pointer(sdata->smap) == smap) return sdata; /* Slow path (cache miss) */ - hlist_for_each_entry_rcu(selem, &local_storage->list, snode) + hlist_for_each_entry_rcu(selem, &local_storage->list, snode, + rcu_read_lock_trace_held()) if (rcu_access_pointer(SDATA(selem)->smap) == smap) break; @@ -306,7 +328,8 @@ int bpf_local_storage_alloc(void *owner, * bucket->list, first_selem can be freed immediately * (instead of kfree_rcu) because * bpf_local_storage_map_free() does a - * synchronize_rcu() before walking the bucket->list. + * synchronize_rcu_mult (waiting for both sleepable and + * normal programs) before walking the bucket->list. * Hence, no one is accessing selem from the * bucket->list under rcu_read_lock(). */ @@ -342,7 +365,8 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, !map_value_has_spin_lock(&smap->map))) return ERR_PTR(-EINVAL); - local_storage = rcu_dereference(*owner_storage(smap, owner)); + local_storage = rcu_dereference_check(*owner_storage(smap, owner), + bpf_rcu_lock_held()); if (!local_storage || hlist_empty(&local_storage->list)) { /* Very first elem for the owner */ err = check_flags(NULL, map_flags); diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index bb69aea1a777..5da7bed0f5f6 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -17,6 +17,7 @@ #include <uapi/linux/btf.h> #include <linux/btf_ids.h> #include <linux/fdtable.h> +#include <linux/rcupdate_trace.h> DEFINE_BPF_STORAGE_CACHE(task_cache); @@ -59,7 +60,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map, struct bpf_local_storage *task_storage; struct bpf_local_storage_map *smap; - task_storage = rcu_dereference(task->bpf_storage); + task_storage = + rcu_dereference_check(task->bpf_storage, bpf_rcu_lock_held()); if (!task_storage) return NULL; @@ -229,6 +231,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, { struct bpf_local_storage_data *sdata; + WARN_ON_ONCE(!bpf_rcu_lock_held()); if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) return (unsigned long)NULL; @@ -260,6 +263,7 @@ BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, { int ret; + WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!task) return -EINVAL; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ca5cd0de804c..133599dfe2a2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11874,6 +11874,9 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, } break; case BPF_MAP_TYPE_RINGBUF: + case BPF_MAP_TYPE_INODE_STORAGE: + case BPF_MAP_TYPE_SK_STORAGE: + case BPF_MAP_TYPE_TASK_STORAGE: break; default: verbose(env, |