summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2024-09-13 11:08:27 +0200
committerVlastimil Babka <vbabka@suse.cz>2024-09-13 11:08:27 +0200
commita715e94dbda4ece41aac49b7b7ff8ddb55a7fe08 (patch)
tree337ca3751374479574ff2d2af58a8759b15e237b /mm
parente02147cb703412fa13dd31908c734d7fb2314f55 (diff)
parent9028cdeb38e1f37d63cb3154799dd259b67e879e (diff)
Merge branch 'slab/for-6.12/rcu_barriers' into slab/for-next
Merge most of SLUB feature work for 6.12: - Barrier for pending kfree_rcu() in kmem_cache_destroy() and associated refactoring of the destroy path (Vlastimil Babka) - CONFIG_SLUB_RCU_DEBUG to allow KASAN catching UAF bugs in SLAB_TYPESAFE_BY_RCU caches (Jann Horn) - kmem_cache_charge() for delayed kmemcg charging (Shakeel Butt)
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig.debug32
-rw-r--r--mm/kasan/common.c62
-rw-r--r--mm/kasan/kasan_test.c46
-rw-r--r--mm/slab.h7
-rw-r--r--mm/slab_common.c127
-rw-r--r--mm/slub.c139
6 files changed, 300 insertions, 113 deletions
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index afc72fde0f03..41a58536531d 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -70,6 +70,38 @@ config SLUB_DEBUG_ON
off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying
"slab_debug=-".
+config SLUB_RCU_DEBUG
+ bool "Enable UAF detection in TYPESAFE_BY_RCU caches (for KASAN)"
+ depends on SLUB_DEBUG
+ # SLUB_RCU_DEBUG should build fine without KASAN, but is currently useless
+ # without KASAN, so mark it as a dependency of KASAN for now.
+ depends on KASAN
+ default KASAN_GENERIC || KASAN_SW_TAGS
+ help
+ Make SLAB_TYPESAFE_BY_RCU caches behave approximately as if the cache
+ was not marked as SLAB_TYPESAFE_BY_RCU and every caller used
+ kfree_rcu() instead.
+
+ This is intended for use in combination with KASAN, to enable KASAN to
+ detect use-after-free accesses in such caches.
+ (KFENCE is able to do that independent of this flag.)
+
+ This might degrade performance.
+ Unfortunately this also prevents a very specific bug pattern from
+ triggering (insufficient checks against an object being recycled
+ within the RCU grace period); so this option can be turned off even on
+ KASAN builds, in case you want to test for such a bug.
+
+ If you're using this for testing bugs / fuzzing and care about
+ catching all the bugs WAY more than performance, you might want to
+ also turn on CONFIG_RCU_STRICT_GRACE_PERIOD.
+
+ WARNING:
+ This is designed as a debugging feature, not a security feature.
+ Objects are sometimes recycled without RCU delay under memory pressure.
+
+ If unsure, say N.
+
config PAGE_OWNER
bool "Track page owner"
depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 85e7c6b4575c..ed4873e18c75 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -208,15 +208,12 @@ void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache,
return (void *)object;
}
-static inline bool poison_slab_object(struct kmem_cache *cache, void *object,
- unsigned long ip, bool init)
+/* Returns true when freeing the object is not safe. */
+static bool check_slab_allocation(struct kmem_cache *cache, void *object,
+ unsigned long ip)
{
- void *tagged_object;
-
- if (!kasan_arch_is_ready())
- return false;
+ void *tagged_object = object;
- tagged_object = object;
object = kasan_reset_tag(object);
if (unlikely(nearest_obj(cache, virt_to_slab(object), object) != object)) {
@@ -224,37 +221,47 @@ static inline bool poison_slab_object(struct kmem_cache *cache, void *object,
return true;
}
- /* RCU slabs could be legally used after free within the RCU period. */
- if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU))
- return false;
-
if (!kasan_byte_accessible(tagged_object)) {
kasan_report_invalid_free(tagged_object, ip, KASAN_REPORT_DOUBLE_FREE);
return true;
}
+ return false;
+}
+
+static inline void poison_slab_object(struct kmem_cache *cache, void *object,
+ bool init, bool still_accessible)
+{
+ void *tagged_object = object;
+
+ object = kasan_reset_tag(object);
+
+ /* RCU slabs could be legally used after free within the RCU period. */
+ if (unlikely(still_accessible))
+ return;
+
kasan_poison(object, round_up(cache->object_size, KASAN_GRANULE_SIZE),
KASAN_SLAB_FREE, init);
if (kasan_stack_collection_enabled())
kasan_save_free_info(cache, tagged_object);
+}
- return false;
+bool __kasan_slab_pre_free(struct kmem_cache *cache, void *object,
+ unsigned long ip)
+{
+ if (!kasan_arch_is_ready() || is_kfence_address(object))
+ return false;
+ return check_slab_allocation(cache, object, ip);
}
-bool __kasan_slab_free(struct kmem_cache *cache, void *object,
- unsigned long ip, bool init)
+bool __kasan_slab_free(struct kmem_cache *cache, void *object, bool init,
+ bool still_accessible)
{
- if (is_kfence_address(object))
+ if (!kasan_arch_is_ready() || is_kfence_address(object))
return false;
- /*
- * If the object is buggy, do not let slab put the object onto the
- * freelist. The object will thus never be allocated again and its
- * metadata will never get released.
- */
- if (poison_slab_object(cache, object, ip, init))
- return true;
+ poison_slab_object(cache, object, init, still_accessible);
/*
* If the object is put into quarantine, do not let slab put the object
@@ -504,11 +511,16 @@ bool __kasan_mempool_poison_object(void *ptr, unsigned long ip)
return true;
}
- if (is_kfence_address(ptr))
- return false;
+ if (is_kfence_address(ptr) || !kasan_arch_is_ready())
+ return true;
slab = folio_slab(folio);
- return !poison_slab_object(slab->slab_cache, ptr, ip, false);
+
+ if (check_slab_allocation(slab->slab_cache, ptr, ip))
+ return false;
+
+ poison_slab_object(slab->slab_cache, ptr, false, false);
+ return true;
}
void __kasan_mempool_unpoison_object(void *ptr, size_t size, unsigned long ip)
diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c
index 7b32be2a3cf0..567d33b493e2 100644
--- a/mm/kasan/kasan_test.c
+++ b/mm/kasan/kasan_test.c
@@ -996,6 +996,51 @@ static void kmem_cache_invalid_free(struct kunit *test)
kmem_cache_destroy(cache);
}
+static void kmem_cache_rcu_uaf(struct kunit *test)
+{
+ char *p;
+ size_t size = 200;
+ struct kmem_cache *cache;
+
+ KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB_RCU_DEBUG);
+
+ cache = kmem_cache_create("test_cache", size, 0, SLAB_TYPESAFE_BY_RCU,
+ NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
+
+ p = kmem_cache_alloc(cache, GFP_KERNEL);
+ if (!p) {
+ kunit_err(test, "Allocation failed: %s\n", __func__);
+ kmem_cache_destroy(cache);
+ return;
+ }
+ *p = 1;
+
+ rcu_read_lock();
+
+ /* Free the object - this will internally schedule an RCU callback. */
+ kmem_cache_free(cache, p);
+
+ /*
+ * We should still be allowed to access the object at this point because
+ * the cache is SLAB_TYPESAFE_BY_RCU and we've been in an RCU read-side
+ * critical section since before the kmem_cache_free().
+ */
+ READ_ONCE(*p);
+
+ rcu_read_unlock();
+
+ /*
+ * Wait for the RCU callback to execute; after this, the object should
+ * have actually been freed from KASAN's perspective.
+ */
+ rcu_barrier();
+
+ KUNIT_EXPECT_KASAN_FAIL(test, READ_ONCE(*p));
+
+ kmem_cache_destroy(cache);
+}
+
static void empty_cache_ctor(void *object) { }
static void kmem_cache_double_destroy(struct kunit *test)
@@ -1937,6 +1982,7 @@ static struct kunit_case kasan_kunit_test_cases[] = {
KUNIT_CASE(kmem_cache_oob),
KUNIT_CASE(kmem_cache_double_free),
KUNIT_CASE(kmem_cache_invalid_free),
+ KUNIT_CASE(kmem_cache_rcu_uaf),
KUNIT_CASE(kmem_cache_double_destroy),
KUNIT_CASE(kmem_cache_accounted),
KUNIT_CASE(kmem_cache_bulk),
diff --git a/mm/slab.h b/mm/slab.h
index dcdb56b8e7f5..9f907e930609 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -443,6 +443,13 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s)
return (s->flags & SLAB_KMALLOC);
}
+static inline bool is_kmalloc_normal(struct kmem_cache *s)
+{
+ if (!is_kmalloc_cache(s))
+ return false;
+ return !(s->flags & (SLAB_CACHE_DMA|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT));
+}
+
/* Legal flag mask for kmem_cache_create(), for various configurations */
#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \
SLAB_CACHE_DMA32 | SLAB_PANIC | \
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 85afeb69b3c0..11ef221bce17 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -40,11 +40,6 @@ LIST_HEAD(slab_caches);
DEFINE_MUTEX(slab_mutex);
struct kmem_cache *kmem_cache;
-static LIST_HEAD(slab_caches_to_rcu_destroy);
-static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work);
-static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
- slab_caches_to_rcu_destroy_workfn);
-
/*
* Set of flags that will prevent slab merging
*/
@@ -502,81 +497,19 @@ fail:
}
EXPORT_SYMBOL(kmem_buckets_create);
-#ifdef SLAB_SUPPORTS_SYSFS
/*
* For a given kmem_cache, kmem_cache_destroy() should only be called
* once or there will be a use-after-free problem. The actual deletion
* and release of the kobject does not need slab_mutex or cpu_hotplug_lock
* protection. So they are now done without holding those locks.
- *
- * Note that there will be a slight delay in the deletion of sysfs files
- * if kmem_cache_release() is called indrectly from a work function.
*/
static void kmem_cache_release(struct kmem_cache *s)
{
- if (slab_state >= FULL) {
- sysfs_slab_unlink(s);
+ kfence_shutdown_cache(s);
+ if (__is_defined(SLAB_SUPPORTS_SYSFS) && slab_state >= FULL)
sysfs_slab_release(s);
- } else {
+ else
slab_kmem_cache_release(s);
- }
-}
-#else
-static void kmem_cache_release(struct kmem_cache *s)
-{
- slab_kmem_cache_release(s);
-}
-#endif
-
-static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
-{
- LIST_HEAD(to_destroy);
- struct kmem_cache *s, *s2;
-
- /*
- * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
- * @slab_caches_to_rcu_destroy list. The slab pages are freed
- * through RCU and the associated kmem_cache are dereferenced
- * while freeing the pages, so the kmem_caches should be freed only
- * after the pending RCU operations are finished. As rcu_barrier()
- * is a pretty slow operation, we batch all pending destructions
- * asynchronously.
- */
- mutex_lock(&slab_mutex);
- list_splice_init(&slab_caches_to_rcu_destroy, &to_destroy);
- mutex_unlock(&slab_mutex);
-
- if (list_empty(&to_destroy))
- return;
-
- rcu_barrier();
-
- list_for_each_entry_safe(s, s2, &to_destroy, list) {
- debugfs_slab_release(s);
- kfence_shutdown_cache(s);
- kmem_cache_release(s);
- }
-}
-
-static int shutdown_cache(struct kmem_cache *s)
-{
- /* free asan quarantined objects */
- kasan_cache_shutdown(s);
-
- if (__kmem_cache_shutdown(s) != 0)
- return -EBUSY;
-
- list_del(&s->list);
-
- if (s->flags & SLAB_TYPESAFE_BY_RCU) {
- list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
- schedule_work(&slab_caches_to_rcu_destroy_work);
- } else {
- kfence_shutdown_cache(s);
- debugfs_slab_release(s);
- }
-
- return 0;
}
void slab_kmem_cache_release(struct kmem_cache *s)
@@ -588,29 +521,63 @@ void slab_kmem_cache_release(struct kmem_cache *s)
void kmem_cache_destroy(struct kmem_cache *s)
{
- int err = -EBUSY;
- bool rcu_set;
+ int err;
if (unlikely(!s) || !kasan_check_byte(s))
return;
+ /* in-flight kfree_rcu()'s may include objects from our cache */
+ kvfree_rcu_barrier();
+
+ if (IS_ENABLED(CONFIG_SLUB_RCU_DEBUG) &&
+ (s->flags & SLAB_TYPESAFE_BY_RCU)) {
+ /*
+ * Under CONFIG_SLUB_RCU_DEBUG, when objects in a
+ * SLAB_TYPESAFE_BY_RCU slab are freed, SLUB will internally
+ * defer their freeing with call_rcu().
+ * Wait for such call_rcu() invocations here before actually
+ * destroying the cache.
+ *
+ * It doesn't matter that we haven't looked at the slab refcount
+ * yet - slabs with SLAB_TYPESAFE_BY_RCU can't be merged, so
+ * the refcount should be 1 here.
+ */
+ rcu_barrier();
+ }
+
cpus_read_lock();
mutex_lock(&slab_mutex);
- rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU;
-
s->refcount--;
- if (s->refcount)
- goto out_unlock;
+ if (s->refcount) {
+ mutex_unlock(&slab_mutex);
+ cpus_read_unlock();
+ return;
+ }
- err = shutdown_cache(s);
+ /* free asan quarantined objects */
+ kasan_cache_shutdown(s);
+
+ err = __kmem_cache_shutdown(s);
WARN(err, "%s %s: Slab cache still has objects when called from %pS",
__func__, s->name, (void *)_RET_IP_);
-out_unlock:
+
+ list_del(&s->list);
+
mutex_unlock(&slab_mutex);
cpus_read_unlock();
- if (!err && !rcu_set)
- kmem_cache_release(s);
+
+ if (slab_state >= FULL)
+ sysfs_slab_unlink(s);
+ debugfs_slab_release(s);
+
+ if (err)
+ return;
+
+ if (s->flags & SLAB_TYPESAFE_BY_RCU)
+ rcu_barrier();
+
+ kmem_cache_release(s);
}
EXPORT_SYMBOL(kmem_cache_destroy);
diff --git a/mm/slub.c b/mm/slub.c
index d52c88f29f69..81cea762d094 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2184,6 +2184,45 @@ void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
__memcg_slab_free_hook(s, slab, p, objects, obj_exts);
}
+
+static __fastpath_inline
+bool memcg_slab_post_charge(void *p, gfp_t flags)
+{
+ struct slabobj_ext *slab_exts;
+ struct kmem_cache *s;
+ struct folio *folio;
+ struct slab *slab;
+ unsigned long off;
+
+ folio = virt_to_folio(p);
+ if (!folio_test_slab(folio)) {
+ return folio_memcg_kmem(folio) ||
+ (__memcg_kmem_charge_page(folio_page(folio, 0), flags,
+ folio_order(folio)) == 0);
+ }
+
+ slab = folio_slab(folio);
+ s = slab->slab_cache;
+
+ /*
+ * Ignore KMALLOC_NORMAL cache to avoid possible circular dependency
+ * of slab_obj_exts being allocated from the same slab and thus the slab
+ * becoming effectively unfreeable.
+ */
+ if (is_kmalloc_normal(s))
+ return true;
+
+ /* Ignore already charged objects. */
+ slab_exts = slab_obj_exts(slab);
+ if (slab_exts) {
+ off = obj_to_index(s, slab, p);
+ if (unlikely(slab_exts[off].objcg))
+ return true;
+ }
+
+ return __memcg_slab_post_alloc_hook(s, NULL, flags, 1, &p);
+}
+
#else /* CONFIG_MEMCG */
static inline bool memcg_slab_post_alloc_hook(struct kmem_cache *s,
struct list_lru *lru,
@@ -2197,18 +2236,37 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
void **p, int objects)
{
}
+
+static inline bool memcg_slab_post_charge(void *p, gfp_t flags)
+{
+ return true;
+}
#endif /* CONFIG_MEMCG */
+#ifdef CONFIG_SLUB_RCU_DEBUG
+static void slab_free_after_rcu_debug(struct rcu_head *rcu_head);
+
+struct rcu_delayed_free {
+ struct rcu_head head;
+ void *object;
+};
+#endif
+
/*
* Hooks for other subsystems that check memory allocations. In a typical
* production configuration these hooks all should produce no code at all.
*
* Returns true if freeing of the object can proceed, false if its reuse
- * was delayed by KASAN quarantine, or it was returned to KFENCE.
+ * was delayed by CONFIG_SLUB_RCU_DEBUG or KASAN quarantine, or it was returned
+ * to KFENCE.
*/
static __always_inline
-bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
+bool slab_free_hook(struct kmem_cache *s, void *x, bool init,
+ bool after_rcu_delay)
{
+ /* Are the object contents still accessible? */
+ bool still_accessible = (s->flags & SLAB_TYPESAFE_BY_RCU) && !after_rcu_delay;
+
kmemleak_free_recursive(x, s->flags);
kmsan_slab_free(s, x);
@@ -2218,7 +2276,7 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
debug_check_no_obj_freed(x, s->object_size);
/* Use KCSAN to help debug racy use-after-free. */
- if (!(s->flags & SLAB_TYPESAFE_BY_RCU))
+ if (!still_accessible)
__kcsan_check_access(x, s->object_size,
KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
@@ -2226,6 +2284,35 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
return false;
/*
+ * Give KASAN a chance to notice an invalid free operation before we
+ * modify the object.
+ */
+ if (kasan_slab_pre_free(s, x))
+ return false;
+
+#ifdef CONFIG_SLUB_RCU_DEBUG
+ if (still_accessible) {
+ struct rcu_delayed_free *delayed_free;
+
+ delayed_free = kmalloc(sizeof(*delayed_free), GFP_NOWAIT);
+ if (delayed_free) {
+ /*
+ * Let KASAN track our call stack as a "related work
+ * creation", just like if the object had been freed
+ * normally via kfree_rcu().
+ * We have to do this manually because the rcu_head is
+ * not located inside the object.
+ */
+ kasan_record_aux_stack_noalloc(x);
+
+ delayed_free->object = x;
+ call_rcu(&delayed_free->head, slab_free_after_rcu_debug);
+ return false;
+ }
+ }
+#endif /* CONFIG_SLUB_RCU_DEBUG */
+
+ /*
* As memory initialization might be integrated into KASAN,
* kasan_slab_free and initialization memset's must be
* kept together to avoid discrepancies in behavior.
@@ -2255,7 +2342,7 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
}
/* KASAN might put x into memory quarantine, delaying its reuse. */
- return !kasan_slab_free(s, x, init);
+ return !kasan_slab_free(s, x, init, still_accessible);
}
static __fastpath_inline
@@ -2269,7 +2356,7 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail,
bool init;
if (is_kfence_address(next)) {
- slab_free_hook(s, next, false);
+ slab_free_hook(s, next, false, false);
return false;
}
@@ -2284,7 +2371,7 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail,
next = get_freepointer(s, object);
/* If object's reuse doesn't have to be delayed */
- if (likely(slab_free_hook(s, object, init))) {
+ if (likely(slab_free_hook(s, object, init, false))) {
/* Move object to the new freelist */
set_freepointer(s, object, *head);
*head = object;
@@ -4073,6 +4160,15 @@ void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru,
}
EXPORT_SYMBOL(kmem_cache_alloc_lru_noprof);
+bool kmem_cache_charge(void *objp, gfp_t gfpflags)
+{
+ if (!memcg_kmem_online())
+ return true;
+
+ return memcg_slab_post_charge(objp, gfpflags);
+}
+EXPORT_SYMBOL(kmem_cache_charge);
+
/**
* kmem_cache_alloc_node - Allocate an object on the specified node
* @s: The cache to allocate from.
@@ -4481,7 +4577,7 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
memcg_slab_free_hook(s, slab, &object, 1);
alloc_tagging_slab_free_hook(s, slab, &object, 1);
- if (likely(slab_free_hook(s, object, slab_want_init_on_free(s))))
+ if (likely(slab_free_hook(s, object, slab_want_init_on_free(s), false)))
do_slab_free(s, slab, object, object, 1, addr);
}
@@ -4490,7 +4586,7 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
static noinline
void memcg_alloc_abort_single(struct kmem_cache *s, void *object)
{
- if (likely(slab_free_hook(s, object, slab_want_init_on_free(s))))
+ if (likely(slab_free_hook(s, object, slab_want_init_on_free(s), false)))
do_slab_free(s, virt_to_slab(object), object, object, 1, _RET_IP_);
}
#endif
@@ -4509,6 +4605,33 @@ void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head,
do_slab_free(s, slab, head, tail, cnt, addr);
}
+#ifdef CONFIG_SLUB_RCU_DEBUG
+static void slab_free_after_rcu_debug(struct rcu_head *rcu_head)
+{
+ struct rcu_delayed_free *delayed_free =
+ container_of(rcu_head, struct rcu_delayed_free, head);
+ void *object = delayed_free->object;
+ struct slab *slab = virt_to_slab(object);
+ struct kmem_cache *s;
+
+ kfree(delayed_free);
+
+ if (WARN_ON(is_kfence_address(object)))
+ return;
+
+ /* find the object and the cache again */
+ if (WARN_ON(!slab))
+ return;
+ s = slab->slab_cache;
+ if (WARN_ON(!(s->flags & SLAB_TYPESAFE_BY_RCU)))
+ return;
+
+ /* resume freeing */
+ if (slab_free_hook(s, object, slab_want_init_on_free(s), true))
+ do_slab_free(s, slab, object, object, 1, _THIS_IP_);
+}
+#endif /* CONFIG_SLUB_RCU_DEBUG */
+
#ifdef CONFIG_KASAN_GENERIC
void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
{