linux/include/linux/slub_def.h
Mikulas Patocka d50d82faa0 slub: fix failure when we delete and create a slab cache
In kernel 4.17 I removed some code from dm-bufio that did slab cache
merging (commit 21bb132767: "dm bufio: remove code that merges slab
caches") - both slab and slub support merging caches with identical
attributes, so dm-bufio now just calls kmem_cache_create and relies on
implicit merging.

This uncovered a bug in the slub subsystem - if we delete a cache and
immediatelly create another cache with the same attributes, it fails
because of duplicate filename in /sys/kernel/slab/.  The slub subsystem
offloads freeing the cache to a workqueue - and if we create the new
cache before the workqueue runs, it complains because of duplicate
filename in sysfs.

This patch fixes the bug by moving the call of kobject_del from
sysfs_slab_remove_workfn to shutdown_cache.  kobject_del must be called
while we hold slab_mutex - so that the sysfs entry is deleted before a
cache with the same attributes could be created.

Running device-mapper-test-suite with:

  dmtest run --suite thin-provisioning -n /commit_failure_causes_fallback/

triggered:

  Buffer I/O error on dev dm-0, logical block 1572848, async page read
  device-mapper: thin: 253:1: metadata operation 'dm_pool_alloc_data_block' failed: error = -5
  device-mapper: thin: 253:1: aborting current metadata transaction
  sysfs: cannot create duplicate filename '/kernel/slab/:a-0000144'
  CPU: 2 PID: 1037 Comm: kworker/u48:1 Not tainted 4.17.0.snitm+ #25
  Hardware name: Supermicro SYS-1029P-WTR/X11DDW-L, BIOS 2.0a 12/06/2017
  Workqueue: dm-thin do_worker [dm_thin_pool]
  Call Trace:
   dump_stack+0x5a/0x73
   sysfs_warn_dup+0x58/0x70
   sysfs_create_dir_ns+0x77/0x80
   kobject_add_internal+0xba/0x2e0
   kobject_init_and_add+0x70/0xb0
   sysfs_slab_add+0xb1/0x250
   __kmem_cache_create+0x116/0x150
   create_cache+0xd9/0x1f0
   kmem_cache_create_usercopy+0x1c1/0x250
   kmem_cache_create+0x18/0x20
   dm_bufio_client_create+0x1ae/0x410 [dm_bufio]
   dm_block_manager_create+0x5e/0x90 [dm_persistent_data]
   __create_persistent_data_objects+0x38/0x940 [dm_thin_pool]
   dm_pool_abort_metadata+0x64/0x90 [dm_thin_pool]
   metadata_operation_failed+0x59/0x100 [dm_thin_pool]
   alloc_data_block.isra.53+0x86/0x180 [dm_thin_pool]
   process_cell+0x2a3/0x550 [dm_thin_pool]
   do_worker+0x28d/0x8f0 [dm_thin_pool]
   process_one_work+0x171/0x370
   worker_thread+0x49/0x3f0
   kthread+0xf8/0x130
   ret_from_fork+0x35/0x40
  kobject_add_internal failed for :a-0000144 with -EEXIST, don't try to register things with the same name in the same directory.
  kmem_cache_create(dm_bufio_buffer-16) failed with error -17

Link: http://lkml.kernel.org/r/alpine.LRH.2.02.1806151817130.6333@file01.intranet.prod.int.rdu2.redhat.com
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reported-by: Mike Snitzer <snitzer@redhat.com>
Tested-by: Mike Snitzer <snitzer@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-06-28 11:16:44 -07:00

186 lines
5.6 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SLUB_DEF_H
#define _LINUX_SLUB_DEF_H
/*
* SLUB : A Slab allocator without object queues.
*
* (C) 2007 SGI, Christoph Lameter
*/
#include <linux/kobject.h>
enum stat_item {
ALLOC_FASTPATH, /* Allocation from cpu slab */
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
FREE_FASTPATH, /* Free to cpu slab */
FREE_SLOWPATH, /* Freeing not to cpu slab */
FREE_FROZEN, /* Freeing to frozen slab */
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
ALLOC_NODE_MISMATCH, /* Switching cpu slab */
FREE_SLAB, /* Slab freed to the page allocator */
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
DEACTIVATE_BYPASS, /* Implicit deactivation */
ORDER_FALLBACK, /* Number of times fallback was necessary */
CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */
CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */
CPU_PARTIAL_FREE, /* Refill cpu partial on free */
CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
NR_SLUB_STAT_ITEMS };
struct kmem_cache_cpu {
void **freelist; /* Pointer to next available object */
unsigned long tid; /* Globally unique transaction id */
struct page *page; /* The slab from which we are allocating */
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct page *partial; /* Partially allocated frozen slabs */
#endif
#ifdef CONFIG_SLUB_STATS
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
};
#ifdef CONFIG_SLUB_CPU_PARTIAL
#define slub_percpu_partial(c) ((c)->partial)
#define slub_set_percpu_partial(c, p) \
({ \
slub_percpu_partial(c) = (p)->next; \
})
#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c))
#else
#define slub_percpu_partial(c) NULL
#define slub_set_percpu_partial(c, p)
#define slub_percpu_partial_read_once(c) NULL
#endif // CONFIG_SLUB_CPU_PARTIAL
/*
* Word size structure that can be atomically updated or read and that
* contains both the order and the number of objects that a slab of the
* given order would contain.
*/
struct kmem_cache_order_objects {
unsigned int x;
};
/*
* Slab cache management.
*/
struct kmem_cache {
struct kmem_cache_cpu __percpu *cpu_slab;
/* Used for retriving partial slabs etc */
slab_flags_t flags;
unsigned long min_partial;
unsigned int size; /* The size of an object including meta data */
unsigned int object_size;/* The size of an object without meta data */
unsigned int offset; /* Free pointer offset. */
#ifdef CONFIG_SLUB_CPU_PARTIAL
/* Number of per cpu partial objects to keep around */
unsigned int cpu_partial;
#endif
struct kmem_cache_order_objects oo;
/* Allocation and freeing of slabs */
struct kmem_cache_order_objects max;
struct kmem_cache_order_objects min;
gfp_t allocflags; /* gfp flags to use on each alloc */
int refcount; /* Refcount for slab cache destroy */
void (*ctor)(void *);
unsigned int inuse; /* Offset to metadata */
unsigned int align; /* Alignment */
unsigned int red_left_pad; /* Left redzone padding size */
const char *name; /* Name (only for display!) */
struct list_head list; /* List of slab caches */
#ifdef CONFIG_SYSFS
struct kobject kobj; /* For sysfs */
struct work_struct kobj_remove_work;
#endif
#ifdef CONFIG_MEMCG
struct memcg_cache_params memcg_params;
/* for propagation, maximum size of a stored attr */
unsigned int max_attr_size;
#ifdef CONFIG_SYSFS
struct kset *memcg_kset;
#endif
#endif
#ifdef CONFIG_SLAB_FREELIST_HARDENED
unsigned long random;
#endif
#ifdef CONFIG_NUMA
/*
* Defragmentation by allocating from a remote node.
*/
unsigned int remote_node_defrag_ratio;
#endif
#ifdef CONFIG_SLAB_FREELIST_RANDOM
unsigned int *random_seq;
#endif
#ifdef CONFIG_KASAN
struct kasan_cache kasan_info;
#endif
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
struct kmem_cache_node *node[MAX_NUMNODES];
};
#ifdef CONFIG_SLUB_CPU_PARTIAL
#define slub_cpu_partial(s) ((s)->cpu_partial)
#define slub_set_cpu_partial(s, n) \
({ \
slub_cpu_partial(s) = (n); \
})
#else
#define slub_cpu_partial(s) (0)
#define slub_set_cpu_partial(s, n)
#endif // CONFIG_SLUB_CPU_PARTIAL
#ifdef CONFIG_SYSFS
#define SLAB_SUPPORTS_SYSFS
void sysfs_slab_unlink(struct kmem_cache *);
void sysfs_slab_release(struct kmem_cache *);
#else
static inline void sysfs_slab_unlink(struct kmem_cache *s)
{
}
static inline void sysfs_slab_release(struct kmem_cache *s)
{
}
#endif
void object_err(struct kmem_cache *s, struct page *page,
u8 *object, char *reason);
void *fixup_red_left(struct kmem_cache *s, void *p);
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
void *x) {
void *object = x - (x - page_address(page)) % cache->size;
void *last_object = page_address(page) +
(page->objects - 1) * cache->size;
void *result = (unlikely(object > last_object)) ? last_object : object;
result = fixup_red_left(cache, result);
return result;
}
#endif /* _LINUX_SLUB_DEF_H */