From bb813f4c933ae9f887a014483690d5f8b8ec05e1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 18 Jan 2013 14:05:56 -0800 Subject: [PATCH 1/8] init, block: try to load default elevator module early during boot This patch adds default module loading and uses it to load the default block elevator. During boot, it's called right after initramfs or initrd is made available and right before control is passed to userland. This ensures that as long as the modules are available in the usual places in initramfs, initrd or the root filesystem, the default modules are loaded as soon as possible. This will replace the on-demand elevator module loading from elevator init path. v2: Fixed build breakage when !CONFIG_BLOCK. Reported by kbuild test robot. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Arjan van de Ven Cc: Linus Torvalds Cc: Alex Riesen Cc: Fengguang We --- block/elevator.c | 16 ++++++++++++++++ include/linux/elevator.h | 5 +++++ include/linux/init.h | 1 + init/do_mounts_initrd.c | 3 +++ init/initramfs.c | 8 +++++++- init/main.c | 16 ++++++++++++++++ 6 files changed, 48 insertions(+), 1 deletion(-) diff --git a/block/elevator.c b/block/elevator.c index 9edba1b8323e..c2d61d56e0b7 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -136,6 +136,22 @@ static int __init elevator_setup(char *str) __setup("elevator=", elevator_setup); +/* called during boot to load the elevator chosen by the elevator param */ +void __init load_default_elevator_module(void) +{ + struct elevator_type *e; + + if (!chosen_elevator[0]) + return; + + spin_lock(&elv_list_lock); + e = elevator_find(chosen_elevator); + spin_unlock(&elv_list_lock); + + if (!e) + request_module("%s-iosched", chosen_elevator); +} + static struct kobj_type elv_ktype; static struct elevator_queue *elevator_alloc(struct request_queue *q, diff --git a/include/linux/elevator.h b/include/linux/elevator.h index c03af7687bb4..186620631750 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -138,6 +138,7 @@ extern void elv_drain_elevator(struct request_queue *); /* * io scheduler registration */ +extern void __init load_default_elevator_module(void); extern int elv_register(struct elevator_type *); extern void elv_unregister(struct elevator_type *); @@ -206,5 +207,9 @@ enum { INIT_LIST_HEAD(&(rq)->csd.list); \ } while (0) +#else /* CONFIG_BLOCK */ + +static inline void load_default_elevator_module(void) { } + #endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/init.h b/include/linux/init.h index a799273714ac..9230c9408d8b 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -161,6 +161,7 @@ extern unsigned int reset_devices; /* used by init/main.c */ void setup_arch(char **); void prepare_namespace(void); +void __init load_default_modules(void); extern void (*late_time_init)(void); diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 5e4ded51788e..dfe606a7dd61 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -57,6 +57,9 @@ static void __init handle_initrd(void) sys_mkdir("/old", 0700); sys_chdir("/old"); + /* try loading default modules from initrd */ + load_default_modules(); + /* * In case that a resume from disk is carried out by linuxrc or one of * its children, we need to tell the freezer not to wait for us. diff --git a/init/initramfs.c b/init/initramfs.c index 84c6bf111300..a67ef9dbda9d 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -592,7 +592,7 @@ static int __init populate_rootfs(void) initrd_end - initrd_start); if (!err) { free_initrd(); - return 0; + goto done; } else { clean_rootfs(); unpack_to_rootfs(__initramfs_start, __initramfs_size); @@ -607,6 +607,7 @@ static int __init populate_rootfs(void) sys_close(fd); free_initrd(); } + done: #else printk(KERN_INFO "Unpacking initramfs...\n"); err = unpack_to_rootfs((char *)initrd_start, @@ -615,6 +616,11 @@ static int __init populate_rootfs(void) printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err); free_initrd(); #endif + /* + * Try loading default modules from initramfs. This gives + * us a chance to load before device_initcalls. + */ + load_default_modules(); } return 0; } diff --git a/init/main.c b/init/main.c index baf1f0f5c461..18efadb11cf6 100644 --- a/init/main.c +++ b/init/main.c @@ -70,6 +70,8 @@ #include #include #include +#include +#include #include #include @@ -794,6 +796,17 @@ static void __init do_pre_smp_initcalls(void) do_one_initcall(*fn); } +/* + * This function requests modules which should be loaded by default and is + * called twice right after initrd is mounted and right before init is + * exec'd. If such modules are on either initrd or rootfs, they will be + * loaded before control is passed to userland. + */ +void __init load_default_modules(void) +{ + load_default_elevator_module(); +} + static int run_init_process(const char *init_filename) { argv_init[0] = init_filename; @@ -898,4 +911,7 @@ static void __init kernel_init_freeable(void) * we're essentially up and running. Get rid of the * initmem segments and start the user-mode stuff.. */ + + /* rootfs is available now, try loading default modules */ + load_default_modules(); } From 21c3c5d2800733b7a276725b8e1ae49a694adc1a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 22 Jan 2013 16:48:03 -0800 Subject: [PATCH 2/8] block: don't request module during elevator init Block layer allows selecting an elevator which is built as a module to be selected as system default via kernel param "elevator=". This is achieved by automatically invoking request_module() whenever a new block device is initialized and the elevator is not available. This led to an interesting deadlock problem involving async and module init. Block device probing running off an async job invokes request_module(). While the module is being loaded, it performs async_synchronize_full() which ends up waiting for the async job which is already waiting for request_module() to finish, leading to deadlock. Invoking request_module() from deep in block device init path is already nasty in itself. It seems best to avoid these situations from the beginning by moving on-demand module loading out of block init path. The previous patch made sure that the default elevator module is loaded early during boot if available. This patch removes on-demand loading of the default elevator from elevator init path. As the module would have been loaded during boot, userland-visible behavior difference should be minimal. For more details, please refer to the following thread. http://thread.gmane.org/gmane.linux.kernel/1420814 v2: The bool parameter was named @request_module which conflicted with request_module(). This built okay w/ CONFIG_MODULES because request_module() was defined as a macro. W/o CONFIG_MODULES, it causes build breakage. Rename the parameter to @try_loading. Reported by Fengguang. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Arjan van de Ven Cc: Linus Torvalds Cc: Alex Riesen Cc: Fengguang Wu --- block/elevator.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index c2d61d56e0b7..603b2c178740 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -100,14 +100,14 @@ static void elevator_put(struct elevator_type *e) module_put(e->elevator_owner); } -static struct elevator_type *elevator_get(const char *name) +static struct elevator_type *elevator_get(const char *name, bool try_loading) { struct elevator_type *e; spin_lock(&elv_list_lock); e = elevator_find(name); - if (!e) { + if (!e && try_loading) { spin_unlock(&elv_list_lock); request_module("%s-iosched", name); spin_lock(&elv_list_lock); @@ -207,25 +207,30 @@ int elevator_init(struct request_queue *q, char *name) q->boundary_rq = NULL; if (name) { - e = elevator_get(name); + e = elevator_get(name, true); if (!e) return -EINVAL; } + /* + * Use the default elevator specified by config boot param or + * config option. Don't try to load modules as we could be running + * off async and request_module() isn't allowed from async. + */ if (!e && *chosen_elevator) { - e = elevator_get(chosen_elevator); + e = elevator_get(chosen_elevator, false); if (!e) printk(KERN_ERR "I/O scheduler %s not found\n", chosen_elevator); } if (!e) { - e = elevator_get(CONFIG_DEFAULT_IOSCHED); + e = elevator_get(CONFIG_DEFAULT_IOSCHED, false); if (!e) { printk(KERN_ERR "Default I/O scheduler not found. " \ "Using noop.\n"); - e = elevator_get("noop"); + e = elevator_get("noop", false); } } @@ -967,7 +972,7 @@ int elevator_change(struct request_queue *q, const char *name) return -ENXIO; strlcpy(elevator_name, name, sizeof(elevator_name)); - e = elevator_get(strstrip(elevator_name)); + e = elevator_get(strstrip(elevator_name), true); if (!e) { printk(KERN_ERR "elevator: type %s not found\n", elevator_name); return -EINVAL; From 0fdff3ec6d87856cdcc99e69cf42143fdd6c56b4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 22 Jan 2013 16:48:03 -0800 Subject: [PATCH 3/8] async, kmod: warn on synchronous request_module() from async workers Synchronous requet_module() from an async worker can lead to deadlock because module init path may invoke async_synchronize_full(). The async worker waits for request_module() to complete and the module loading waits for the async task to finish. This bug happened in the block layer because of default elevator auto-loading. Block layer has been updated not to do default elevator auto-loading and it has been decided to disallow synchronous request_module() from async workers. Trigger WARN_ON_ONCE() on synchronous request_module() from async workers. For more details, please refer to the following thread. http://thread.gmane.org/gmane.linux.kernel/1420814 Signed-off-by: Tejun Heo Reported-by: Alex Riesen Cc: Linus Torvalds Cc: Arjan van de Ven --- kernel/kmod.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/kmod.c b/kernel/kmod.c index 1c317e386831..ecd42b484db8 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -130,6 +131,14 @@ int __request_module(bool wait, const char *fmt, ...) #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ static int kmod_loop_msg; + /* + * We don't allow synchronous module loading from async. Module + * init may invoke async_synchronize_full() which will end up + * waiting for this task which already is waiting for the module + * loading to complete, leading to a deadlock. + */ + WARN_ON_ONCE(wait && current_is_async()); + va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); va_end(args); From 8723d5037cafea09c7242303c6c8e5d7058cec61 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Jan 2013 09:32:30 -0800 Subject: [PATCH 4/8] async: bring sanity to the use of words domain and running In the beginning, running lists were literal struct list_heads. Later on, struct async_domain was added. For some reason, while the conversion substituted list_heads with async_domains, the variable names weren't fully converted. In more places, "running" was used for struct async_domain while other places adopted new "domain" name. The situation is made much worse by having async_domain's running list named "domain" and async_entry's field pointing to async_domain named "running". So, we end up with mix of "running" and "domain" for variable names for async_domain, with the field names of async_domain and async_entry swapped between "running" and "domain". It feels almost intentionally made to be as confusing as possible. Bring some sanity by * Renaming all async_domain variables "domain". * s/async_running/async_dfl_domain/ * s/async_domain->domain/async_domain->running/ * s/async_entry->running/async_entry->domain/ Signed-off-by: Tejun Heo Cc: Arjan van de Ven Cc: Dan Williams Cc: Linus Torvalds --- include/linux/async.h | 6 ++-- kernel/async.c | 68 +++++++++++++++++++++---------------------- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/include/linux/async.h b/include/linux/async.h index 345169cfa304..34ff5c610e0e 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -19,7 +19,7 @@ typedef u64 async_cookie_t; typedef void (async_func_ptr) (void *data, async_cookie_t cookie); struct async_domain { struct list_head node; - struct list_head domain; + struct list_head running; int count; unsigned registered:1; }; @@ -29,7 +29,7 @@ struct async_domain { */ #define ASYNC_DOMAIN(_name) \ struct async_domain _name = { .node = LIST_HEAD_INIT(_name.node), \ - .domain = LIST_HEAD_INIT(_name.domain), \ + .running = LIST_HEAD_INIT(_name.running), \ .count = 0, \ .registered = 1 } @@ -39,7 +39,7 @@ struct async_domain { */ #define ASYNC_DOMAIN_EXCLUSIVE(_name) \ struct async_domain _name = { .node = LIST_HEAD_INIT(_name.node), \ - .domain = LIST_HEAD_INIT(_name.domain), \ + .running = LIST_HEAD_INIT(_name.running), \ .count = 0, \ .registered = 0 } diff --git a/kernel/async.c b/kernel/async.c index 6c68fc3fae7b..29d51d483bee 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -64,7 +64,7 @@ static async_cookie_t next_cookie = 1; #define MAX_WORK 32768 static LIST_HEAD(async_pending); -static ASYNC_DOMAIN(async_running); +static ASYNC_DOMAIN(async_dfl_domain); static LIST_HEAD(async_domains); static DEFINE_SPINLOCK(async_lock); static DEFINE_MUTEX(async_register_mutex); @@ -75,7 +75,7 @@ struct async_entry { async_cookie_t cookie; async_func_ptr *func; void *data; - struct async_domain *running; + struct async_domain *domain; }; static DECLARE_WAIT_QUEUE_HEAD(async_done); @@ -86,7 +86,7 @@ static atomic_t entry_count; /* * MUST be called with the lock held! */ -static async_cookie_t __lowest_in_progress(struct async_domain *running) +static async_cookie_t __lowest_in_progress(struct async_domain *domain) { async_cookie_t first_running = next_cookie; /* infinity value */ async_cookie_t first_pending = next_cookie; /* ditto */ @@ -96,13 +96,13 @@ static async_cookie_t __lowest_in_progress(struct async_domain *running) * Both running and pending lists are sorted but not disjoint. * Take the first cookies from both and return the min. */ - if (!list_empty(&running->domain)) { - entry = list_first_entry(&running->domain, typeof(*entry), list); + if (!list_empty(&domain->running)) { + entry = list_first_entry(&domain->running, typeof(*entry), list); first_running = entry->cookie; } list_for_each_entry(entry, &async_pending, list) { - if (entry->running == running) { + if (entry->domain == domain) { first_pending = entry->cookie; break; } @@ -111,13 +111,13 @@ static async_cookie_t __lowest_in_progress(struct async_domain *running) return min(first_running, first_pending); } -static async_cookie_t lowest_in_progress(struct async_domain *running) +static async_cookie_t lowest_in_progress(struct async_domain *domain) { unsigned long flags; async_cookie_t ret; spin_lock_irqsave(&async_lock, flags); - ret = __lowest_in_progress(running); + ret = __lowest_in_progress(domain); spin_unlock_irqrestore(&async_lock, flags); return ret; } @@ -132,11 +132,11 @@ static void async_run_entry_fn(struct work_struct *work) struct async_entry *pos; unsigned long flags; ktime_t uninitialized_var(calltime), delta, rettime; - struct async_domain *running = entry->running; + struct async_domain *domain = entry->domain; /* 1) move self to the running queue, make sure it stays sorted */ spin_lock_irqsave(&async_lock, flags); - list_for_each_entry_reverse(pos, &running->domain, list) + list_for_each_entry_reverse(pos, &domain->running, list) if (entry->cookie < pos->cookie) break; list_move_tail(&entry->list, &pos->list); @@ -162,8 +162,8 @@ static void async_run_entry_fn(struct work_struct *work) /* 3) remove self from the running queue */ spin_lock_irqsave(&async_lock, flags); list_del(&entry->list); - if (running->registered && --running->count == 0) - list_del_init(&running->node); + if (domain->registered && --domain->count == 0) + list_del_init(&domain->node); /* 4) free the entry */ kfree(entry); @@ -175,7 +175,7 @@ static void async_run_entry_fn(struct work_struct *work) wake_up(&async_done); } -static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct async_domain *running) +static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct async_domain *domain) { struct async_entry *entry; unsigned long flags; @@ -201,13 +201,13 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a INIT_WORK(&entry->work, async_run_entry_fn); entry->func = ptr; entry->data = data; - entry->running = running; + entry->domain = domain; spin_lock_irqsave(&async_lock, flags); newcookie = entry->cookie = next_cookie++; list_add_tail(&entry->list, &async_pending); - if (running->registered && running->count++ == 0) - list_add_tail(&running->node, &async_domains); + if (domain->registered && domain->count++ == 0) + list_add_tail(&domain->node, &async_domains); atomic_inc(&entry_count); spin_unlock_irqrestore(&async_lock, flags); @@ -230,7 +230,7 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a */ async_cookie_t async_schedule(async_func_ptr *ptr, void *data) { - return __async_schedule(ptr, data, &async_running); + return __async_schedule(ptr, data, &async_dfl_domain); } EXPORT_SYMBOL_GPL(async_schedule); @@ -238,18 +238,18 @@ EXPORT_SYMBOL_GPL(async_schedule); * async_schedule_domain - schedule a function for asynchronous execution within a certain domain * @ptr: function to execute asynchronously * @data: data pointer to pass to the function - * @running: running list for the domain + * @domain: the domain * * Returns an async_cookie_t that may be used for checkpointing later. - * @running may be used in the async_synchronize_*_domain() functions - * to wait within a certain synchronization domain rather than globally. - * A synchronization domain is specified via the running queue @running to use. - * Note: This function may be called from atomic or non-atomic contexts. + * @domain may be used in the async_synchronize_*_domain() functions to + * wait within a certain synchronization domain rather than globally. A + * synchronization domain is specified via @domain. Note: This function + * may be called from atomic or non-atomic contexts. */ async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data, - struct async_domain *running) + struct async_domain *domain) { - return __async_schedule(ptr, data, running); + return __async_schedule(ptr, data, domain); } EXPORT_SYMBOL_GPL(async_schedule_domain); @@ -289,7 +289,7 @@ void async_unregister_domain(struct async_domain *domain) mutex_lock(&async_register_mutex); spin_lock_irq(&async_lock); WARN_ON(!domain->registered || !list_empty(&domain->node) || - !list_empty(&domain->domain)); + !list_empty(&domain->running)); domain->registered = 0; spin_unlock_irq(&async_lock); mutex_unlock(&async_register_mutex); @@ -298,10 +298,10 @@ EXPORT_SYMBOL_GPL(async_unregister_domain); /** * async_synchronize_full_domain - synchronize all asynchronous function within a certain domain - * @domain: running list to synchronize on + * @domain: the domain to synchronize * * This function waits until all asynchronous function calls for the - * synchronization domain specified by the running list @domain have been done. + * synchronization domain specified by @domain have been done. */ void async_synchronize_full_domain(struct async_domain *domain) { @@ -312,17 +312,17 @@ EXPORT_SYMBOL_GPL(async_synchronize_full_domain); /** * async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing * @cookie: async_cookie_t to use as checkpoint - * @running: running list to synchronize on + * @domain: the domain to synchronize * * This function waits until all asynchronous function calls for the - * synchronization domain specified by running list @running submitted - * prior to @cookie have been done. + * synchronization domain specified by @domain submitted prior to @cookie + * have been done. */ -void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *running) +void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain) { ktime_t uninitialized_var(starttime), delta, endtime; - if (!running) + if (!domain) return; if (initcall_debug && system_state == SYSTEM_BOOTING) { @@ -330,7 +330,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain starttime = ktime_get(); } - wait_event(async_done, lowest_in_progress(running) >= cookie); + wait_event(async_done, lowest_in_progress(domain) >= cookie); if (initcall_debug && system_state == SYSTEM_BOOTING) { endtime = ktime_get(); @@ -352,7 +352,7 @@ EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain); */ void async_synchronize_cookie(async_cookie_t cookie) { - async_synchronize_cookie_domain(cookie, &async_running); + async_synchronize_cookie_domain(cookie, &async_dfl_domain); } EXPORT_SYMBOL_GPL(async_synchronize_cookie); From c68eee14ec2da345e86f2778c8570759309a4a2e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Jan 2013 09:32:30 -0800 Subject: [PATCH 5/8] async: use ULLONG_MAX for infinity cookie value Currently, next_cookie is used as the infinity value. In most cases, this should work fine but it theoretically could bring subtle behavior difference between async_synchronize_full() and async_synchronize_full_domain(). async_synchronize_full() keeps waiting until there's no registered async_entry left regardless of what next_cookie was when the function was called. It guarantees that the queue is completely drained at least once before returning. However, async_synchronize_full_domain() doesn't. It synchronizes upto next_cookie and if further async jobs are queued after the next_cookie value to synchronize is decided, they won't be waited for. For unrelated async jobs, the behavior difference doesn't matter; however, if async jobs which are related (nested or otherwise) to the executing ones are queued while sychronization is in progress, the resulting behavior difference could be problematic. This can be easily fixed by using ULLONG_MAX as the infinity value instead. Define ASYNC_COOKIE_MAX as ULLONG_MAX and use it as the infinity value for synchronization. This makes async_synchronize_full_domain() fully drain the domain at least once before returning, making its behavior match async_synchronize_full(). Signed-off-by: Tejun Heo Cc: Arjan van de Ven Cc: Dan Williams Cc: Linus Torvalds --- kernel/async.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/async.c b/kernel/async.c index 29d51d483bee..a4c1a9e63b2e 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -61,7 +61,8 @@ asynchronous and synchronous parts of the kernel. static async_cookie_t next_cookie = 1; -#define MAX_WORK 32768 +#define MAX_WORK 32768 +#define ASYNC_COOKIE_MAX ULLONG_MAX /* infinity cookie */ static LIST_HEAD(async_pending); static ASYNC_DOMAIN(async_dfl_domain); @@ -88,8 +89,8 @@ static atomic_t entry_count; */ static async_cookie_t __lowest_in_progress(struct async_domain *domain) { - async_cookie_t first_running = next_cookie; /* infinity value */ - async_cookie_t first_pending = next_cookie; /* ditto */ + async_cookie_t first_running = ASYNC_COOKIE_MAX; + async_cookie_t first_pending = ASYNC_COOKIE_MAX; struct async_entry *entry; /* @@ -269,7 +270,7 @@ void async_synchronize_full(void) domain = list_first_entry(&async_domains, typeof(*domain), node); spin_unlock_irq(&async_lock); - async_synchronize_cookie_domain(next_cookie, domain); + async_synchronize_cookie_domain(ASYNC_COOKIE_MAX, domain); } while (!list_empty(&async_domains)); mutex_unlock(&async_register_mutex); } @@ -305,7 +306,7 @@ EXPORT_SYMBOL_GPL(async_unregister_domain); */ void async_synchronize_full_domain(struct async_domain *domain) { - async_synchronize_cookie_domain(next_cookie, domain); + async_synchronize_cookie_domain(ASYNC_COOKIE_MAX, domain); } EXPORT_SYMBOL_GPL(async_synchronize_full_domain); From 52722794d6a48162fd8906d54618ae60a4abdb21 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Jan 2013 09:32:30 -0800 Subject: [PATCH 6/8] async: keep pending tasks on async_domain and remove async_pending Async kept single global pending list and per-domain running lists. When an async item is queued, it's put on the global pending list. The item is moved to the per-domain running list when its execution starts. At this point, this design complicates execution and synchronization without bringing any benefit. The list only matters for synchronization which doesn't care whether a given async item is pending or executing. Also, global synchronization is done by iterating through all active registered async_domains, so the global async_pending list doesn't help anything either. Rename async_domain->running to async_domain->pending and put async items directly there and remove when execution completes. This simplifies lowest_in_progress() a lot - the first item on the pending list is the one with the lowest cookie, and async_run_entry_fn() doesn't have to mess with moving the item from pending to running. After the change, whether a domain is empty or not can be trivially determined by looking at async_domain->pending. Remove async_domain->count and use list_empty() on pending instead. Signed-off-by: Tejun Heo Cc: Arjan van de Ven Cc: Dan Williams Cc: Linus Torvalds --- include/linux/async.h | 9 +++---- kernel/async.c | 63 ++++++++++--------------------------------- 2 files changed, 17 insertions(+), 55 deletions(-) diff --git a/include/linux/async.h b/include/linux/async.h index 34ff5c610e0e..a2e3f18b2ad6 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -19,8 +19,7 @@ typedef u64 async_cookie_t; typedef void (async_func_ptr) (void *data, async_cookie_t cookie); struct async_domain { struct list_head node; - struct list_head running; - int count; + struct list_head pending; unsigned registered:1; }; @@ -29,8 +28,7 @@ struct async_domain { */ #define ASYNC_DOMAIN(_name) \ struct async_domain _name = { .node = LIST_HEAD_INIT(_name.node), \ - .running = LIST_HEAD_INIT(_name.running), \ - .count = 0, \ + .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 1 } /* @@ -39,8 +37,7 @@ struct async_domain { */ #define ASYNC_DOMAIN_EXCLUSIVE(_name) \ struct async_domain _name = { .node = LIST_HEAD_INIT(_name.node), \ - .running = LIST_HEAD_INIT(_name.running), \ - .count = 0, \ + .pending = LIST_HEAD_INIT(_name.pending), \ .registered = 0 } extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data); diff --git a/kernel/async.c b/kernel/async.c index a4c1a9e63b2e..7c9f50f436d6 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -64,7 +64,6 @@ static async_cookie_t next_cookie = 1; #define MAX_WORK 32768 #define ASYNC_COOKIE_MAX ULLONG_MAX /* infinity cookie */ -static LIST_HEAD(async_pending); static ASYNC_DOMAIN(async_dfl_domain); static LIST_HEAD(async_domains); static DEFINE_SPINLOCK(async_lock); @@ -83,42 +82,17 @@ static DECLARE_WAIT_QUEUE_HEAD(async_done); static atomic_t entry_count; - -/* - * MUST be called with the lock held! - */ -static async_cookie_t __lowest_in_progress(struct async_domain *domain) -{ - async_cookie_t first_running = ASYNC_COOKIE_MAX; - async_cookie_t first_pending = ASYNC_COOKIE_MAX; - struct async_entry *entry; - - /* - * Both running and pending lists are sorted but not disjoint. - * Take the first cookies from both and return the min. - */ - if (!list_empty(&domain->running)) { - entry = list_first_entry(&domain->running, typeof(*entry), list); - first_running = entry->cookie; - } - - list_for_each_entry(entry, &async_pending, list) { - if (entry->domain == domain) { - first_pending = entry->cookie; - break; - } - } - - return min(first_running, first_pending); -} - static async_cookie_t lowest_in_progress(struct async_domain *domain) { + async_cookie_t ret = ASYNC_COOKIE_MAX; unsigned long flags; - async_cookie_t ret; spin_lock_irqsave(&async_lock, flags); - ret = __lowest_in_progress(domain); + if (!list_empty(&domain->pending)) { + struct async_entry *first = list_first_entry(&domain->pending, + struct async_entry, list); + ret = first->cookie; + } spin_unlock_irqrestore(&async_lock, flags); return ret; } @@ -130,20 +104,11 @@ static void async_run_entry_fn(struct work_struct *work) { struct async_entry *entry = container_of(work, struct async_entry, work); - struct async_entry *pos; unsigned long flags; ktime_t uninitialized_var(calltime), delta, rettime; struct async_domain *domain = entry->domain; - /* 1) move self to the running queue, make sure it stays sorted */ - spin_lock_irqsave(&async_lock, flags); - list_for_each_entry_reverse(pos, &domain->running, list) - if (entry->cookie < pos->cookie) - break; - list_move_tail(&entry->list, &pos->list); - spin_unlock_irqrestore(&async_lock, flags); - - /* 2) run (and print duration) */ + /* 1) run (and print duration) */ if (initcall_debug && system_state == SYSTEM_BOOTING) { printk(KERN_DEBUG "calling %lli_%pF @ %i\n", (long long)entry->cookie, @@ -160,19 +125,19 @@ static void async_run_entry_fn(struct work_struct *work) (long long)ktime_to_ns(delta) >> 10); } - /* 3) remove self from the running queue */ + /* 2) remove self from the pending queues */ spin_lock_irqsave(&async_lock, flags); list_del(&entry->list); - if (domain->registered && --domain->count == 0) + if (domain->registered && list_empty(&domain->pending)) list_del_init(&domain->node); - /* 4) free the entry */ + /* 3) free the entry */ kfree(entry); atomic_dec(&entry_count); spin_unlock_irqrestore(&async_lock, flags); - /* 5) wake up any waiters */ + /* 4) wake up any waiters */ wake_up(&async_done); } @@ -206,9 +171,9 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a spin_lock_irqsave(&async_lock, flags); newcookie = entry->cookie = next_cookie++; - list_add_tail(&entry->list, &async_pending); - if (domain->registered && domain->count++ == 0) + if (domain->registered && list_empty(&domain->pending)) list_add_tail(&domain->node, &async_domains); + list_add_tail(&entry->list, &domain->pending); atomic_inc(&entry_count); spin_unlock_irqrestore(&async_lock, flags); @@ -290,7 +255,7 @@ void async_unregister_domain(struct async_domain *domain) mutex_lock(&async_register_mutex); spin_lock_irq(&async_lock); WARN_ON(!domain->registered || !list_empty(&domain->node) || - !list_empty(&domain->running)); + !list_empty(&domain->pending)); domain->registered = 0; spin_unlock_irq(&async_lock); mutex_unlock(&async_register_mutex); From 9fdb04cdc5566d6ba68283a0bebe49667ca0b0e8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Jan 2013 09:32:30 -0800 Subject: [PATCH 7/8] async: replace list of active domains with global list of pending items Global synchronization - async_synchronize_full() - is currently implemented by keeping a list of all active registered domains and syncing them one by one until no domain is active. While this isn't necessarily a complex scheme, it can easily be simplified by keeping global list of the pending items of all registered active domains instead of list of domains and simply using the globl pending list the same way as domain syncing. This patch replaces async_domains with async_global_pending and update lowest_in_progress() to use the global pending list if @domain is %NULL. async_synchronize_full_domain(NULL) is now allowed and equivalent to async_synchronize_full(). As no one is calling with NULL domain, this doesn't affect any existing users. async_register_mutex is no longer necessary and dropped. Signed-off-by: Tejun Heo Cc: Arjan van de Ven Cc: Dan Williams Cc: Linus Torvalds --- kernel/async.c | 63 +++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 34 deletions(-) diff --git a/kernel/async.c b/kernel/async.c index 7c9f50f436d6..6958000eeb44 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -64,13 +64,13 @@ static async_cookie_t next_cookie = 1; #define MAX_WORK 32768 #define ASYNC_COOKIE_MAX ULLONG_MAX /* infinity cookie */ +static LIST_HEAD(async_global_pending); /* pending from all registered doms */ static ASYNC_DOMAIN(async_dfl_domain); -static LIST_HEAD(async_domains); static DEFINE_SPINLOCK(async_lock); -static DEFINE_MUTEX(async_register_mutex); struct async_entry { - struct list_head list; + struct list_head domain_list; + struct list_head global_list; struct work_struct work; async_cookie_t cookie; async_func_ptr *func; @@ -84,15 +84,25 @@ static atomic_t entry_count; static async_cookie_t lowest_in_progress(struct async_domain *domain) { + struct async_entry *first = NULL; async_cookie_t ret = ASYNC_COOKIE_MAX; unsigned long flags; spin_lock_irqsave(&async_lock, flags); - if (!list_empty(&domain->pending)) { - struct async_entry *first = list_first_entry(&domain->pending, - struct async_entry, list); - ret = first->cookie; + + if (domain) { + if (!list_empty(&domain->pending)) + first = list_first_entry(&domain->pending, + struct async_entry, domain_list); + } else { + if (!list_empty(&async_global_pending)) + first = list_first_entry(&async_global_pending, + struct async_entry, global_list); } + + if (first) + ret = first->cookie; + spin_unlock_irqrestore(&async_lock, flags); return ret; } @@ -106,7 +116,6 @@ static void async_run_entry_fn(struct work_struct *work) container_of(work, struct async_entry, work); unsigned long flags; ktime_t uninitialized_var(calltime), delta, rettime; - struct async_domain *domain = entry->domain; /* 1) run (and print duration) */ if (initcall_debug && system_state == SYSTEM_BOOTING) { @@ -127,9 +136,8 @@ static void async_run_entry_fn(struct work_struct *work) /* 2) remove self from the pending queues */ spin_lock_irqsave(&async_lock, flags); - list_del(&entry->list); - if (domain->registered && list_empty(&domain->pending)) - list_del_init(&domain->node); + list_del_init(&entry->domain_list); + list_del_init(&entry->global_list); /* 3) free the entry */ kfree(entry); @@ -170,10 +178,14 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a entry->domain = domain; spin_lock_irqsave(&async_lock, flags); + + /* allocate cookie and queue */ newcookie = entry->cookie = next_cookie++; - if (domain->registered && list_empty(&domain->pending)) - list_add_tail(&domain->node, &async_domains); - list_add_tail(&entry->list, &domain->pending); + + list_add_tail(&entry->domain_list, &domain->pending); + if (domain->registered) + list_add_tail(&entry->global_list, &async_global_pending); + atomic_inc(&entry_count); spin_unlock_irqrestore(&async_lock, flags); @@ -226,18 +238,7 @@ EXPORT_SYMBOL_GPL(async_schedule_domain); */ void async_synchronize_full(void) { - mutex_lock(&async_register_mutex); - do { - struct async_domain *domain = NULL; - - spin_lock_irq(&async_lock); - if (!list_empty(&async_domains)) - domain = list_first_entry(&async_domains, typeof(*domain), node); - spin_unlock_irq(&async_lock); - - async_synchronize_cookie_domain(ASYNC_COOKIE_MAX, domain); - } while (!list_empty(&async_domains)); - mutex_unlock(&async_register_mutex); + async_synchronize_full_domain(NULL); } EXPORT_SYMBOL_GPL(async_synchronize_full); @@ -252,13 +253,10 @@ EXPORT_SYMBOL_GPL(async_synchronize_full); */ void async_unregister_domain(struct async_domain *domain) { - mutex_lock(&async_register_mutex); spin_lock_irq(&async_lock); - WARN_ON(!domain->registered || !list_empty(&domain->node) || - !list_empty(&domain->pending)); + WARN_ON(!domain->registered || !list_empty(&domain->pending)); domain->registered = 0; spin_unlock_irq(&async_lock); - mutex_unlock(&async_register_mutex); } EXPORT_SYMBOL_GPL(async_unregister_domain); @@ -278,7 +276,7 @@ EXPORT_SYMBOL_GPL(async_synchronize_full_domain); /** * async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing * @cookie: async_cookie_t to use as checkpoint - * @domain: the domain to synchronize + * @domain: the domain to synchronize (%NULL for all registered domains) * * This function waits until all asynchronous function calls for the * synchronization domain specified by @domain submitted prior to @cookie @@ -288,9 +286,6 @@ void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain { ktime_t uninitialized_var(starttime), delta, endtime; - if (!domain) - return; - if (initcall_debug && system_state == SYSTEM_BOOTING) { printk(KERN_DEBUG "async_waiting @ %i\n", task_pid_nr(current)); starttime = ktime_get(); From a0327ff0eda915be623658babacef706099c11a8 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 25 Jan 2013 10:13:59 +0000 Subject: [PATCH 8/8] async: initialise list heads to fix crash 9fdb04cdc55 ("async: replace list of active domains with global list of pending items") added a struct list_head global_list in struct async_entry, which isn't initialised. This means that if !domain->registered at __async_schedule(), then list_del_init() will be called on the list head in async_run_entry_fn with both pointers NULL, causing a crash. This is fixed by initialising both the global_list and domain_list list_heads after kzalloc'ing the entry. This was noticed due to dapm_power_widgets() which uses ASYNC_DOMAIN_EXCLUSIVE, which initialises the domain->registered to 0. Signed-off-by: James Hogan Signed-off-by: Tejun Heo Reported-by: James Hogan Reported-by: Stephen Warren --- kernel/async.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/async.c b/kernel/async.c index 6958000eeb44..8ddee2c3e5b0 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -172,6 +172,8 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a ptr(data, newcookie); return newcookie; } + INIT_LIST_HEAD(&entry->domain_list); + INIT_LIST_HEAD(&entry->global_list); INIT_WORK(&entry->work, async_run_entry_fn); entry->func = ptr; entry->data = data;