mirror of
https://github.com/torvalds/linux.git
synced 2024-11-22 12:11:40 +00:00
Merge branch 'slab/for-6.12/rcu_barriers' into slab/for-next
Merge most of SLUB feature work for 6.12: - Barrier for pending kfree_rcu() in kmem_cache_destroy() and associated refactoring of the destroy path (Vlastimil Babka) - CONFIG_SLUB_RCU_DEBUG to allow KASAN catching UAF bugs in SLAB_TYPESAFE_BY_RCU caches (Jann Horn) - kmem_cache_charge() for delayed kmemcg charging (Shakeel Butt)
This commit is contained in:
commit
a715e94dbd
@ -175,13 +175,59 @@ static __always_inline void * __must_check kasan_init_slab_obj(
|
||||
return (void *)object;
|
||||
}
|
||||
|
||||
bool __kasan_slab_free(struct kmem_cache *s, void *object,
|
||||
unsigned long ip, bool init);
|
||||
static __always_inline bool kasan_slab_free(struct kmem_cache *s,
|
||||
void *object, bool init)
|
||||
bool __kasan_slab_pre_free(struct kmem_cache *s, void *object,
|
||||
unsigned long ip);
|
||||
/**
|
||||
* kasan_slab_pre_free - Check whether freeing a slab object is safe.
|
||||
* @object: Object to be freed.
|
||||
*
|
||||
* This function checks whether freeing the given object is safe. It may
|
||||
* check for double-free and invalid-free bugs and report them.
|
||||
*
|
||||
* This function is intended only for use by the slab allocator.
|
||||
*
|
||||
* @Return true if freeing the object is unsafe; false otherwise.
|
||||
*/
|
||||
static __always_inline bool kasan_slab_pre_free(struct kmem_cache *s,
|
||||
void *object)
|
||||
{
|
||||
if (kasan_enabled())
|
||||
return __kasan_slab_free(s, object, _RET_IP_, init);
|
||||
return __kasan_slab_pre_free(s, object, _RET_IP_);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
|
||||
bool still_accessible);
|
||||
/**
|
||||
* kasan_slab_free - Poison, initialize, and quarantine a slab object.
|
||||
* @object: Object to be freed.
|
||||
* @init: Whether to initialize the object.
|
||||
* @still_accessible: Whether the object contents are still accessible.
|
||||
*
|
||||
* This function informs that a slab object has been freed and is not
|
||||
* supposed to be accessed anymore, except when @still_accessible is set
|
||||
* (indicating that the object is in a SLAB_TYPESAFE_BY_RCU cache and an RCU
|
||||
* grace period might not have passed yet).
|
||||
*
|
||||
* For KASAN modes that have integrated memory initialization
|
||||
* (kasan_has_integrated_init() == true), this function also initializes
|
||||
* the object's memory. For other modes, the @init argument is ignored.
|
||||
*
|
||||
* This function might also take ownership of the object to quarantine it.
|
||||
* When this happens, KASAN will defer freeing the object to a later
|
||||
* stage and handle it internally until then. The return value indicates
|
||||
* whether KASAN took ownership of the object.
|
||||
*
|
||||
* This function is intended only for use by the slab allocator.
|
||||
*
|
||||
* @Return true if KASAN took ownership of the object; false otherwise.
|
||||
*/
|
||||
static __always_inline bool kasan_slab_free(struct kmem_cache *s,
|
||||
void *object, bool init,
|
||||
bool still_accessible)
|
||||
{
|
||||
if (kasan_enabled())
|
||||
return __kasan_slab_free(s, object, init, still_accessible);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -371,7 +417,14 @@ static inline void *kasan_init_slab_obj(struct kmem_cache *cache,
|
||||
{
|
||||
return (void *)object;
|
||||
}
|
||||
static inline bool kasan_slab_free(struct kmem_cache *s, void *object, bool init)
|
||||
|
||||
static inline bool kasan_slab_pre_free(struct kmem_cache *s, void *object)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
|
||||
bool init, bool still_accessible)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -111,6 +111,11 @@ static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr)
|
||||
kvfree(ptr);
|
||||
}
|
||||
|
||||
static inline void kvfree_rcu_barrier(void)
|
||||
{
|
||||
rcu_barrier();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KASAN_GENERIC
|
||||
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
|
||||
#else
|
||||
|
@ -35,6 +35,7 @@ static inline void rcu_virt_note_context_switch(void)
|
||||
|
||||
void synchronize_rcu_expedited(void);
|
||||
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
|
||||
void kvfree_rcu_barrier(void);
|
||||
|
||||
void rcu_barrier(void);
|
||||
void rcu_momentary_dyntick_idle(void);
|
||||
|
@ -547,6 +547,35 @@ void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru,
|
||||
gfp_t gfpflags) __assume_slab_alignment __malloc;
|
||||
#define kmem_cache_alloc_lru(...) alloc_hooks(kmem_cache_alloc_lru_noprof(__VA_ARGS__))
|
||||
|
||||
/**
|
||||
* kmem_cache_charge - memcg charge an already allocated slab memory
|
||||
* @objp: address of the slab object to memcg charge
|
||||
* @gfpflags: describe the allocation context
|
||||
*
|
||||
* kmem_cache_charge allows charging a slab object to the current memcg,
|
||||
* primarily in cases where charging at allocation time might not be possible
|
||||
* because the target memcg is not known (i.e. softirq context)
|
||||
*
|
||||
* The objp should be pointer returned by the slab allocator functions like
|
||||
* kmalloc (with __GFP_ACCOUNT in flags) or kmem_cache_alloc. The memcg charge
|
||||
* behavior can be controlled through gfpflags parameter, which affects how the
|
||||
* necessary internal metadata can be allocated. Including __GFP_NOFAIL denotes
|
||||
* that overcharging is requested instead of failure, but is not applied for the
|
||||
* internal metadata allocation.
|
||||
*
|
||||
* There are several cases where it will return true even if the charging was
|
||||
* not done:
|
||||
* More specifically:
|
||||
*
|
||||
* 1. For !CONFIG_MEMCG or cgroup_disable=memory systems.
|
||||
* 2. Already charged slab objects.
|
||||
* 3. For slab objects from KMALLOC_NORMAL caches - allocated by kmalloc()
|
||||
* without __GFP_ACCOUNT
|
||||
* 4. Allocating internal metadata has failed
|
||||
*
|
||||
* Return: true if charge was successful otherwise false.
|
||||
*/
|
||||
bool kmem_cache_charge(void *objp, gfp_t gfpflags);
|
||||
void kmem_cache_free(struct kmem_cache *s, void *objp);
|
||||
|
||||
kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
|
||||
|
@ -3584,18 +3584,15 @@ kvfree_rcu_drain_ready(struct kfree_rcu_cpu *krcp)
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
|
||||
* Return: %true if a work is queued, %false otherwise.
|
||||
*/
|
||||
static void kfree_rcu_monitor(struct work_struct *work)
|
||||
static bool
|
||||
kvfree_rcu_queue_batch(struct kfree_rcu_cpu *krcp)
|
||||
{
|
||||
struct kfree_rcu_cpu *krcp = container_of(work,
|
||||
struct kfree_rcu_cpu, monitor_work.work);
|
||||
unsigned long flags;
|
||||
bool queued = false;
|
||||
int i, j;
|
||||
|
||||
// Drain ready for reclaim.
|
||||
kvfree_rcu_drain_ready(krcp);
|
||||
|
||||
raw_spin_lock_irqsave(&krcp->lock, flags);
|
||||
|
||||
// Attempt to start a new batch.
|
||||
@ -3634,11 +3631,27 @@ static void kfree_rcu_monitor(struct work_struct *work)
|
||||
// be that the work is in the pending state when
|
||||
// channels have been detached following by each
|
||||
// other.
|
||||
queue_rcu_work(system_wq, &krwp->rcu_work);
|
||||
queued = queue_rcu_work(system_wq, &krwp->rcu_work);
|
||||
}
|
||||
}
|
||||
|
||||
raw_spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
return queued;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
|
||||
*/
|
||||
static void kfree_rcu_monitor(struct work_struct *work)
|
||||
{
|
||||
struct kfree_rcu_cpu *krcp = container_of(work,
|
||||
struct kfree_rcu_cpu, monitor_work.work);
|
||||
|
||||
// Drain ready for reclaim.
|
||||
kvfree_rcu_drain_ready(krcp);
|
||||
|
||||
// Queue a batch for a rest.
|
||||
kvfree_rcu_queue_batch(krcp);
|
||||
|
||||
// If there is nothing to detach, it means that our job is
|
||||
// successfully done here. In case of having at least one
|
||||
@ -3859,6 +3872,86 @@ unlock_return:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvfree_call_rcu);
|
||||
|
||||
/**
|
||||
* kvfree_rcu_barrier - Wait until all in-flight kvfree_rcu() complete.
|
||||
*
|
||||
* Note that a single argument of kvfree_rcu() call has a slow path that
|
||||
* triggers synchronize_rcu() following by freeing a pointer. It is done
|
||||
* before the return from the function. Therefore for any single-argument
|
||||
* call that will result in a kfree() to a cache that is to be destroyed
|
||||
* during module exit, it is developer's responsibility to ensure that all
|
||||
* such calls have returned before the call to kmem_cache_destroy().
|
||||
*/
|
||||
void kvfree_rcu_barrier(void)
|
||||
{
|
||||
struct kfree_rcu_cpu_work *krwp;
|
||||
struct kfree_rcu_cpu *krcp;
|
||||
bool queued;
|
||||
int i, cpu;
|
||||
|
||||
/*
|
||||
* Firstly we detach objects and queue them over an RCU-batch
|
||||
* for all CPUs. Finally queued works are flushed for each CPU.
|
||||
*
|
||||
* Please note. If there are outstanding batches for a particular
|
||||
* CPU, those have to be finished first following by queuing a new.
|
||||
*/
|
||||
for_each_possible_cpu(cpu) {
|
||||
krcp = per_cpu_ptr(&krc, cpu);
|
||||
|
||||
/*
|
||||
* Check if this CPU has any objects which have been queued for a
|
||||
* new GP completion. If not(means nothing to detach), we are done
|
||||
* with it. If any batch is pending/running for this "krcp", below
|
||||
* per-cpu flush_rcu_work() waits its completion(see last step).
|
||||
*/
|
||||
if (!need_offload_krc(krcp))
|
||||
continue;
|
||||
|
||||
while (1) {
|
||||
/*
|
||||
* If we are not able to queue a new RCU work it means:
|
||||
* - batches for this CPU are still in flight which should
|
||||
* be flushed first and then repeat;
|
||||
* - no objects to detach, because of concurrency.
|
||||
*/
|
||||
queued = kvfree_rcu_queue_batch(krcp);
|
||||
|
||||
/*
|
||||
* Bail out, if there is no need to offload this "krcp"
|
||||
* anymore. As noted earlier it can run concurrently.
|
||||
*/
|
||||
if (queued || !need_offload_krc(krcp))
|
||||
break;
|
||||
|
||||
/* There are ongoing batches. */
|
||||
for (i = 0; i < KFREE_N_BATCHES; i++) {
|
||||
krwp = &(krcp->krw_arr[i]);
|
||||
flush_rcu_work(&krwp->rcu_work);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Now we guarantee that all objects are flushed.
|
||||
*/
|
||||
for_each_possible_cpu(cpu) {
|
||||
krcp = per_cpu_ptr(&krc, cpu);
|
||||
|
||||
/*
|
||||
* A monitor work can drain ready to reclaim objects
|
||||
* directly. Wait its completion if running or pending.
|
||||
*/
|
||||
cancel_delayed_work_sync(&krcp->monitor_work);
|
||||
|
||||
for (i = 0; i < KFREE_N_BATCHES; i++) {
|
||||
krwp = &(krcp->krw_arr[i]);
|
||||
flush_rcu_work(&krwp->rcu_work);
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvfree_rcu_barrier);
|
||||
|
||||
static unsigned long
|
||||
kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
|
||||
{
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include "../mm/slab.h"
|
||||
|
||||
static struct kunit_resource resource;
|
||||
@ -157,6 +158,34 @@ static void test_kmalloc_redzone_access(struct kunit *test)
|
||||
kmem_cache_destroy(s);
|
||||
}
|
||||
|
||||
struct test_kfree_rcu_struct {
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
static void test_kfree_rcu(struct kunit *test)
|
||||
{
|
||||
struct kmem_cache *s = test_kmem_cache_create("TestSlub_kfree_rcu",
|
||||
sizeof(struct test_kfree_rcu_struct),
|
||||
SLAB_NO_MERGE);
|
||||
struct test_kfree_rcu_struct *p = kmem_cache_alloc(s, GFP_KERNEL);
|
||||
|
||||
kfree_rcu(p, rcu);
|
||||
kmem_cache_destroy(s);
|
||||
|
||||
KUNIT_EXPECT_EQ(test, 0, slab_errors);
|
||||
}
|
||||
|
||||
static void test_leak_destroy(struct kunit *test)
|
||||
{
|
||||
struct kmem_cache *s = test_kmem_cache_create("TestSlub_kfree_rcu",
|
||||
64, SLAB_NO_MERGE);
|
||||
kmem_cache_alloc(s, GFP_KERNEL);
|
||||
|
||||
kmem_cache_destroy(s);
|
||||
|
||||
KUNIT_EXPECT_EQ(test, 1, slab_errors);
|
||||
}
|
||||
|
||||
static int test_init(struct kunit *test)
|
||||
{
|
||||
slab_errors = 0;
|
||||
@ -177,6 +206,8 @@ static struct kunit_case test_cases[] = {
|
||||
|
||||
KUNIT_CASE(test_clobber_redzone_free),
|
||||
KUNIT_CASE(test_kmalloc_redzone_access),
|
||||
KUNIT_CASE(test_kfree_rcu),
|
||||
KUNIT_CASE(test_leak_destroy),
|
||||
{}
|
||||
};
|
||||
|
||||
|
@ -70,6 +70,38 @@ config SLUB_DEBUG_ON
|
||||
off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying
|
||||
"slab_debug=-".
|
||||
|
||||
config SLUB_RCU_DEBUG
|
||||
bool "Enable UAF detection in TYPESAFE_BY_RCU caches (for KASAN)"
|
||||
depends on SLUB_DEBUG
|
||||
# SLUB_RCU_DEBUG should build fine without KASAN, but is currently useless
|
||||
# without KASAN, so mark it as a dependency of KASAN for now.
|
||||
depends on KASAN
|
||||
default KASAN_GENERIC || KASAN_SW_TAGS
|
||||
help
|
||||
Make SLAB_TYPESAFE_BY_RCU caches behave approximately as if the cache
|
||||
was not marked as SLAB_TYPESAFE_BY_RCU and every caller used
|
||||
kfree_rcu() instead.
|
||||
|
||||
This is intended for use in combination with KASAN, to enable KASAN to
|
||||
detect use-after-free accesses in such caches.
|
||||
(KFENCE is able to do that independent of this flag.)
|
||||
|
||||
This might degrade performance.
|
||||
Unfortunately this also prevents a very specific bug pattern from
|
||||
triggering (insufficient checks against an object being recycled
|
||||
within the RCU grace period); so this option can be turned off even on
|
||||
KASAN builds, in case you want to test for such a bug.
|
||||
|
||||
If you're using this for testing bugs / fuzzing and care about
|
||||
catching all the bugs WAY more than performance, you might want to
|
||||
also turn on CONFIG_RCU_STRICT_GRACE_PERIOD.
|
||||
|
||||
WARNING:
|
||||
This is designed as a debugging feature, not a security feature.
|
||||
Objects are sometimes recycled without RCU delay under memory pressure.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config PAGE_OWNER
|
||||
bool "Track page owner"
|
||||
depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
|
||||
|
@ -208,15 +208,12 @@ void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache,
|
||||
return (void *)object;
|
||||
}
|
||||
|
||||
static inline bool poison_slab_object(struct kmem_cache *cache, void *object,
|
||||
unsigned long ip, bool init)
|
||||
/* Returns true when freeing the object is not safe. */
|
||||
static bool check_slab_allocation(struct kmem_cache *cache, void *object,
|
||||
unsigned long ip)
|
||||
{
|
||||
void *tagged_object;
|
||||
void *tagged_object = object;
|
||||
|
||||
if (!kasan_arch_is_ready())
|
||||
return false;
|
||||
|
||||
tagged_object = object;
|
||||
object = kasan_reset_tag(object);
|
||||
|
||||
if (unlikely(nearest_obj(cache, virt_to_slab(object), object) != object)) {
|
||||
@ -224,37 +221,47 @@ static inline bool poison_slab_object(struct kmem_cache *cache, void *object,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* RCU slabs could be legally used after free within the RCU period. */
|
||||
if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU))
|
||||
return false;
|
||||
|
||||
if (!kasan_byte_accessible(tagged_object)) {
|
||||
kasan_report_invalid_free(tagged_object, ip, KASAN_REPORT_DOUBLE_FREE);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void poison_slab_object(struct kmem_cache *cache, void *object,
|
||||
bool init, bool still_accessible)
|
||||
{
|
||||
void *tagged_object = object;
|
||||
|
||||
object = kasan_reset_tag(object);
|
||||
|
||||
/* RCU slabs could be legally used after free within the RCU period. */
|
||||
if (unlikely(still_accessible))
|
||||
return;
|
||||
|
||||
kasan_poison(object, round_up(cache->object_size, KASAN_GRANULE_SIZE),
|
||||
KASAN_SLAB_FREE, init);
|
||||
|
||||
if (kasan_stack_collection_enabled())
|
||||
kasan_save_free_info(cache, tagged_object);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool __kasan_slab_free(struct kmem_cache *cache, void *object,
|
||||
unsigned long ip, bool init)
|
||||
bool __kasan_slab_pre_free(struct kmem_cache *cache, void *object,
|
||||
unsigned long ip)
|
||||
{
|
||||
if (is_kfence_address(object))
|
||||
if (!kasan_arch_is_ready() || is_kfence_address(object))
|
||||
return false;
|
||||
return check_slab_allocation(cache, object, ip);
|
||||
}
|
||||
|
||||
bool __kasan_slab_free(struct kmem_cache *cache, void *object, bool init,
|
||||
bool still_accessible)
|
||||
{
|
||||
if (!kasan_arch_is_ready() || is_kfence_address(object))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If the object is buggy, do not let slab put the object onto the
|
||||
* freelist. The object will thus never be allocated again and its
|
||||
* metadata will never get released.
|
||||
*/
|
||||
if (poison_slab_object(cache, object, ip, init))
|
||||
return true;
|
||||
poison_slab_object(cache, object, init, still_accessible);
|
||||
|
||||
/*
|
||||
* If the object is put into quarantine, do not let slab put the object
|
||||
@ -504,11 +511,16 @@ bool __kasan_mempool_poison_object(void *ptr, unsigned long ip)
|
||||
return true;
|
||||
}
|
||||
|
||||
if (is_kfence_address(ptr))
|
||||
return false;
|
||||
if (is_kfence_address(ptr) || !kasan_arch_is_ready())
|
||||
return true;
|
||||
|
||||
slab = folio_slab(folio);
|
||||
return !poison_slab_object(slab->slab_cache, ptr, ip, false);
|
||||
|
||||
if (check_slab_allocation(slab->slab_cache, ptr, ip))
|
||||
return false;
|
||||
|
||||
poison_slab_object(slab->slab_cache, ptr, false, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
void __kasan_mempool_unpoison_object(void *ptr, size_t size, unsigned long ip)
|
||||
|
@ -996,6 +996,51 @@ static void kmem_cache_invalid_free(struct kunit *test)
|
||||
kmem_cache_destroy(cache);
|
||||
}
|
||||
|
||||
static void kmem_cache_rcu_uaf(struct kunit *test)
|
||||
{
|
||||
char *p;
|
||||
size_t size = 200;
|
||||
struct kmem_cache *cache;
|
||||
|
||||
KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB_RCU_DEBUG);
|
||||
|
||||
cache = kmem_cache_create("test_cache", size, 0, SLAB_TYPESAFE_BY_RCU,
|
||||
NULL);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
|
||||
|
||||
p = kmem_cache_alloc(cache, GFP_KERNEL);
|
||||
if (!p) {
|
||||
kunit_err(test, "Allocation failed: %s\n", __func__);
|
||||
kmem_cache_destroy(cache);
|
||||
return;
|
||||
}
|
||||
*p = 1;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
/* Free the object - this will internally schedule an RCU callback. */
|
||||
kmem_cache_free(cache, p);
|
||||
|
||||
/*
|
||||
* We should still be allowed to access the object at this point because
|
||||
* the cache is SLAB_TYPESAFE_BY_RCU and we've been in an RCU read-side
|
||||
* critical section since before the kmem_cache_free().
|
||||
*/
|
||||
READ_ONCE(*p);
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* Wait for the RCU callback to execute; after this, the object should
|
||||
* have actually been freed from KASAN's perspective.
|
||||
*/
|
||||
rcu_barrier();
|
||||
|
||||
KUNIT_EXPECT_KASAN_FAIL(test, READ_ONCE(*p));
|
||||
|
||||
kmem_cache_destroy(cache);
|
||||
}
|
||||
|
||||
static void empty_cache_ctor(void *object) { }
|
||||
|
||||
static void kmem_cache_double_destroy(struct kunit *test)
|
||||
@ -1937,6 +1982,7 @@ static struct kunit_case kasan_kunit_test_cases[] = {
|
||||
KUNIT_CASE(kmem_cache_oob),
|
||||
KUNIT_CASE(kmem_cache_double_free),
|
||||
KUNIT_CASE(kmem_cache_invalid_free),
|
||||
KUNIT_CASE(kmem_cache_rcu_uaf),
|
||||
KUNIT_CASE(kmem_cache_double_destroy),
|
||||
KUNIT_CASE(kmem_cache_accounted),
|
||||
KUNIT_CASE(kmem_cache_bulk),
|
||||
|
@ -443,6 +443,13 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s)
|
||||
return (s->flags & SLAB_KMALLOC);
|
||||
}
|
||||
|
||||
static inline bool is_kmalloc_normal(struct kmem_cache *s)
|
||||
{
|
||||
if (!is_kmalloc_cache(s))
|
||||
return false;
|
||||
return !(s->flags & (SLAB_CACHE_DMA|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT));
|
||||
}
|
||||
|
||||
/* Legal flag mask for kmem_cache_create(), for various configurations */
|
||||
#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \
|
||||
SLAB_CACHE_DMA32 | SLAB_PANIC | \
|
||||
|
127
mm/slab_common.c
127
mm/slab_common.c
@ -40,11 +40,6 @@ LIST_HEAD(slab_caches);
|
||||
DEFINE_MUTEX(slab_mutex);
|
||||
struct kmem_cache *kmem_cache;
|
||||
|
||||
static LIST_HEAD(slab_caches_to_rcu_destroy);
|
||||
static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work);
|
||||
static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
|
||||
slab_caches_to_rcu_destroy_workfn);
|
||||
|
||||
/*
|
||||
* Set of flags that will prevent slab merging
|
||||
*/
|
||||
@ -502,81 +497,19 @@ fail:
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_buckets_create);
|
||||
|
||||
#ifdef SLAB_SUPPORTS_SYSFS
|
||||
/*
|
||||
* For a given kmem_cache, kmem_cache_destroy() should only be called
|
||||
* once or there will be a use-after-free problem. The actual deletion
|
||||
* and release of the kobject does not need slab_mutex or cpu_hotplug_lock
|
||||
* protection. So they are now done without holding those locks.
|
||||
*
|
||||
* Note that there will be a slight delay in the deletion of sysfs files
|
||||
* if kmem_cache_release() is called indrectly from a work function.
|
||||
*/
|
||||
static void kmem_cache_release(struct kmem_cache *s)
|
||||
{
|
||||
if (slab_state >= FULL) {
|
||||
sysfs_slab_unlink(s);
|
||||
kfence_shutdown_cache(s);
|
||||
if (__is_defined(SLAB_SUPPORTS_SYSFS) && slab_state >= FULL)
|
||||
sysfs_slab_release(s);
|
||||
} else {
|
||||
else
|
||||
slab_kmem_cache_release(s);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static void kmem_cache_release(struct kmem_cache *s)
|
||||
{
|
||||
slab_kmem_cache_release(s);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
|
||||
{
|
||||
LIST_HEAD(to_destroy);
|
||||
struct kmem_cache *s, *s2;
|
||||
|
||||
/*
|
||||
* On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
|
||||
* @slab_caches_to_rcu_destroy list. The slab pages are freed
|
||||
* through RCU and the associated kmem_cache are dereferenced
|
||||
* while freeing the pages, so the kmem_caches should be freed only
|
||||
* after the pending RCU operations are finished. As rcu_barrier()
|
||||
* is a pretty slow operation, we batch all pending destructions
|
||||
* asynchronously.
|
||||
*/
|
||||
mutex_lock(&slab_mutex);
|
||||
list_splice_init(&slab_caches_to_rcu_destroy, &to_destroy);
|
||||
mutex_unlock(&slab_mutex);
|
||||
|
||||
if (list_empty(&to_destroy))
|
||||
return;
|
||||
|
||||
rcu_barrier();
|
||||
|
||||
list_for_each_entry_safe(s, s2, &to_destroy, list) {
|
||||
debugfs_slab_release(s);
|
||||
kfence_shutdown_cache(s);
|
||||
kmem_cache_release(s);
|
||||
}
|
||||
}
|
||||
|
||||
static int shutdown_cache(struct kmem_cache *s)
|
||||
{
|
||||
/* free asan quarantined objects */
|
||||
kasan_cache_shutdown(s);
|
||||
|
||||
if (__kmem_cache_shutdown(s) != 0)
|
||||
return -EBUSY;
|
||||
|
||||
list_del(&s->list);
|
||||
|
||||
if (s->flags & SLAB_TYPESAFE_BY_RCU) {
|
||||
list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
|
||||
schedule_work(&slab_caches_to_rcu_destroy_work);
|
||||
} else {
|
||||
kfence_shutdown_cache(s);
|
||||
debugfs_slab_release(s);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void slab_kmem_cache_release(struct kmem_cache *s)
|
||||
@ -588,29 +521,63 @@ void slab_kmem_cache_release(struct kmem_cache *s)
|
||||
|
||||
void kmem_cache_destroy(struct kmem_cache *s)
|
||||
{
|
||||
int err = -EBUSY;
|
||||
bool rcu_set;
|
||||
int err;
|
||||
|
||||
if (unlikely(!s) || !kasan_check_byte(s))
|
||||
return;
|
||||
|
||||
/* in-flight kfree_rcu()'s may include objects from our cache */
|
||||
kvfree_rcu_barrier();
|
||||
|
||||
if (IS_ENABLED(CONFIG_SLUB_RCU_DEBUG) &&
|
||||
(s->flags & SLAB_TYPESAFE_BY_RCU)) {
|
||||
/*
|
||||
* Under CONFIG_SLUB_RCU_DEBUG, when objects in a
|
||||
* SLAB_TYPESAFE_BY_RCU slab are freed, SLUB will internally
|
||||
* defer their freeing with call_rcu().
|
||||
* Wait for such call_rcu() invocations here before actually
|
||||
* destroying the cache.
|
||||
*
|
||||
* It doesn't matter that we haven't looked at the slab refcount
|
||||
* yet - slabs with SLAB_TYPESAFE_BY_RCU can't be merged, so
|
||||
* the refcount should be 1 here.
|
||||
*/
|
||||
rcu_barrier();
|
||||
}
|
||||
|
||||
cpus_read_lock();
|
||||
mutex_lock(&slab_mutex);
|
||||
|
||||
rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU;
|
||||
|
||||
s->refcount--;
|
||||
if (s->refcount)
|
||||
goto out_unlock;
|
||||
if (s->refcount) {
|
||||
mutex_unlock(&slab_mutex);
|
||||
cpus_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
err = shutdown_cache(s);
|
||||
/* free asan quarantined objects */
|
||||
kasan_cache_shutdown(s);
|
||||
|
||||
err = __kmem_cache_shutdown(s);
|
||||
WARN(err, "%s %s: Slab cache still has objects when called from %pS",
|
||||
__func__, s->name, (void *)_RET_IP_);
|
||||
out_unlock:
|
||||
|
||||
list_del(&s->list);
|
||||
|
||||
mutex_unlock(&slab_mutex);
|
||||
cpus_read_unlock();
|
||||
if (!err && !rcu_set)
|
||||
kmem_cache_release(s);
|
||||
|
||||
if (slab_state >= FULL)
|
||||
sysfs_slab_unlink(s);
|
||||
debugfs_slab_release(s);
|
||||
|
||||
if (err)
|
||||
return;
|
||||
|
||||
if (s->flags & SLAB_TYPESAFE_BY_RCU)
|
||||
rcu_barrier();
|
||||
|
||||
kmem_cache_release(s);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_destroy);
|
||||
|
||||
|
139
mm/slub.c
139
mm/slub.c
@ -2184,6 +2184,45 @@ void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
|
||||
|
||||
__memcg_slab_free_hook(s, slab, p, objects, obj_exts);
|
||||
}
|
||||
|
||||
static __fastpath_inline
|
||||
bool memcg_slab_post_charge(void *p, gfp_t flags)
|
||||
{
|
||||
struct slabobj_ext *slab_exts;
|
||||
struct kmem_cache *s;
|
||||
struct folio *folio;
|
||||
struct slab *slab;
|
||||
unsigned long off;
|
||||
|
||||
folio = virt_to_folio(p);
|
||||
if (!folio_test_slab(folio)) {
|
||||
return folio_memcg_kmem(folio) ||
|
||||
(__memcg_kmem_charge_page(folio_page(folio, 0), flags,
|
||||
folio_order(folio)) == 0);
|
||||
}
|
||||
|
||||
slab = folio_slab(folio);
|
||||
s = slab->slab_cache;
|
||||
|
||||
/*
|
||||
* Ignore KMALLOC_NORMAL cache to avoid possible circular dependency
|
||||
* of slab_obj_exts being allocated from the same slab and thus the slab
|
||||
* becoming effectively unfreeable.
|
||||
*/
|
||||
if (is_kmalloc_normal(s))
|
||||
return true;
|
||||
|
||||
/* Ignore already charged objects. */
|
||||
slab_exts = slab_obj_exts(slab);
|
||||
if (slab_exts) {
|
||||
off = obj_to_index(s, slab, p);
|
||||
if (unlikely(slab_exts[off].objcg))
|
||||
return true;
|
||||
}
|
||||
|
||||
return __memcg_slab_post_alloc_hook(s, NULL, flags, 1, &p);
|
||||
}
|
||||
|
||||
#else /* CONFIG_MEMCG */
|
||||
static inline bool memcg_slab_post_alloc_hook(struct kmem_cache *s,
|
||||
struct list_lru *lru,
|
||||
@ -2197,18 +2236,37 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
|
||||
void **p, int objects)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool memcg_slab_post_charge(void *p, gfp_t flags)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif /* CONFIG_MEMCG */
|
||||
|
||||
#ifdef CONFIG_SLUB_RCU_DEBUG
|
||||
static void slab_free_after_rcu_debug(struct rcu_head *rcu_head);
|
||||
|
||||
struct rcu_delayed_free {
|
||||
struct rcu_head head;
|
||||
void *object;
|
||||
};
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Hooks for other subsystems that check memory allocations. In a typical
|
||||
* production configuration these hooks all should produce no code at all.
|
||||
*
|
||||
* Returns true if freeing of the object can proceed, false if its reuse
|
||||
* was delayed by KASAN quarantine, or it was returned to KFENCE.
|
||||
* was delayed by CONFIG_SLUB_RCU_DEBUG or KASAN quarantine, or it was returned
|
||||
* to KFENCE.
|
||||
*/
|
||||
static __always_inline
|
||||
bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
|
||||
bool slab_free_hook(struct kmem_cache *s, void *x, bool init,
|
||||
bool after_rcu_delay)
|
||||
{
|
||||
/* Are the object contents still accessible? */
|
||||
bool still_accessible = (s->flags & SLAB_TYPESAFE_BY_RCU) && !after_rcu_delay;
|
||||
|
||||
kmemleak_free_recursive(x, s->flags);
|
||||
kmsan_slab_free(s, x);
|
||||
|
||||
@ -2218,13 +2276,42 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
|
||||
debug_check_no_obj_freed(x, s->object_size);
|
||||
|
||||
/* Use KCSAN to help debug racy use-after-free. */
|
||||
if (!(s->flags & SLAB_TYPESAFE_BY_RCU))
|
||||
if (!still_accessible)
|
||||
__kcsan_check_access(x, s->object_size,
|
||||
KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
|
||||
|
||||
if (kfence_free(x))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Give KASAN a chance to notice an invalid free operation before we
|
||||
* modify the object.
|
||||
*/
|
||||
if (kasan_slab_pre_free(s, x))
|
||||
return false;
|
||||
|
||||
#ifdef CONFIG_SLUB_RCU_DEBUG
|
||||
if (still_accessible) {
|
||||
struct rcu_delayed_free *delayed_free;
|
||||
|
||||
delayed_free = kmalloc(sizeof(*delayed_free), GFP_NOWAIT);
|
||||
if (delayed_free) {
|
||||
/*
|
||||
* Let KASAN track our call stack as a "related work
|
||||
* creation", just like if the object had been freed
|
||||
* normally via kfree_rcu().
|
||||
* We have to do this manually because the rcu_head is
|
||||
* not located inside the object.
|
||||
*/
|
||||
kasan_record_aux_stack_noalloc(x);
|
||||
|
||||
delayed_free->object = x;
|
||||
call_rcu(&delayed_free->head, slab_free_after_rcu_debug);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_SLUB_RCU_DEBUG */
|
||||
|
||||
/*
|
||||
* As memory initialization might be integrated into KASAN,
|
||||
* kasan_slab_free and initialization memset's must be
|
||||
@ -2255,7 +2342,7 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
|
||||
|
||||
}
|
||||
/* KASAN might put x into memory quarantine, delaying its reuse. */
|
||||
return !kasan_slab_free(s, x, init);
|
||||
return !kasan_slab_free(s, x, init, still_accessible);
|
||||
}
|
||||
|
||||
static __fastpath_inline
|
||||
@ -2269,7 +2356,7 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail,
|
||||
bool init;
|
||||
|
||||
if (is_kfence_address(next)) {
|
||||
slab_free_hook(s, next, false);
|
||||
slab_free_hook(s, next, false, false);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -2284,7 +2371,7 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail,
|
||||
next = get_freepointer(s, object);
|
||||
|
||||
/* If object's reuse doesn't have to be delayed */
|
||||
if (likely(slab_free_hook(s, object, init))) {
|
||||
if (likely(slab_free_hook(s, object, init, false))) {
|
||||
/* Move object to the new freelist */
|
||||
set_freepointer(s, object, *head);
|
||||
*head = object;
|
||||
@ -4073,6 +4160,15 @@ void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru,
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_alloc_lru_noprof);
|
||||
|
||||
bool kmem_cache_charge(void *objp, gfp_t gfpflags)
|
||||
{
|
||||
if (!memcg_kmem_online())
|
||||
return true;
|
||||
|
||||
return memcg_slab_post_charge(objp, gfpflags);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_charge);
|
||||
|
||||
/**
|
||||
* kmem_cache_alloc_node - Allocate an object on the specified node
|
||||
* @s: The cache to allocate from.
|
||||
@ -4481,7 +4577,7 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
|
||||
memcg_slab_free_hook(s, slab, &object, 1);
|
||||
alloc_tagging_slab_free_hook(s, slab, &object, 1);
|
||||
|
||||
if (likely(slab_free_hook(s, object, slab_want_init_on_free(s))))
|
||||
if (likely(slab_free_hook(s, object, slab_want_init_on_free(s), false)))
|
||||
do_slab_free(s, slab, object, object, 1, addr);
|
||||
}
|
||||
|
||||
@ -4490,7 +4586,7 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
|
||||
static noinline
|
||||
void memcg_alloc_abort_single(struct kmem_cache *s, void *object)
|
||||
{
|
||||
if (likely(slab_free_hook(s, object, slab_want_init_on_free(s))))
|
||||
if (likely(slab_free_hook(s, object, slab_want_init_on_free(s), false)))
|
||||
do_slab_free(s, virt_to_slab(object), object, object, 1, _RET_IP_);
|
||||
}
|
||||
#endif
|
||||
@ -4509,6 +4605,33 @@ void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head,
|
||||
do_slab_free(s, slab, head, tail, cnt, addr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SLUB_RCU_DEBUG
|
||||
static void slab_free_after_rcu_debug(struct rcu_head *rcu_head)
|
||||
{
|
||||
struct rcu_delayed_free *delayed_free =
|
||||
container_of(rcu_head, struct rcu_delayed_free, head);
|
||||
void *object = delayed_free->object;
|
||||
struct slab *slab = virt_to_slab(object);
|
||||
struct kmem_cache *s;
|
||||
|
||||
kfree(delayed_free);
|
||||
|
||||
if (WARN_ON(is_kfence_address(object)))
|
||||
return;
|
||||
|
||||
/* find the object and the cache again */
|
||||
if (WARN_ON(!slab))
|
||||
return;
|
||||
s = slab->slab_cache;
|
||||
if (WARN_ON(!(s->flags & SLAB_TYPESAFE_BY_RCU)))
|
||||
return;
|
||||
|
||||
/* resume freeing */
|
||||
if (slab_free_hook(s, object, slab_want_init_on_free(s), true))
|
||||
do_slab_free(s, slab, object, object, 1, _THIS_IP_);
|
||||
}
|
||||
#endif /* CONFIG_SLUB_RCU_DEBUG */
|
||||
|
||||
#ifdef CONFIG_KASAN_GENERIC
|
||||
void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
|
||||
{
|
||||
|
@ -714,6 +714,7 @@ struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg)
|
||||
out:
|
||||
release_sock(sk);
|
||||
if (newsk && mem_cgroup_sockets_enabled) {
|
||||
gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL;
|
||||
int amt = 0;
|
||||
|
||||
/* atomically get the memory usage, set and charge the
|
||||
@ -731,8 +732,8 @@ out:
|
||||
}
|
||||
|
||||
if (amt)
|
||||
mem_cgroup_charge_skmem(newsk->sk_memcg, amt,
|
||||
GFP_KERNEL | __GFP_NOFAIL);
|
||||
mem_cgroup_charge_skmem(newsk->sk_memcg, amt, gfp);
|
||||
kmem_cache_charge(newsk, gfp);
|
||||
|
||||
release_sock(newsk);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user