From 4f48795b6154852d07d971e402c35ecc460ddcb6 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 10 Nov 2014 09:26:29 +1030 Subject: [PATCH 1/8] module: Wait for RCU synchronizing before releasing a module Wait for RCU synchronizing on failure path of module loading before releasing struct module, because the memory of mod->list can still be accessed by list walkers (e.g. kallsyms). Signed-off-by: Masami Hiramatsu Signed-off-by: Rusty Russell --- kernel/module.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/module.c b/kernel/module.c index 88cec1ddb1e3..331b03f6b411 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3326,6 +3326,8 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); wake_up_all(&module_wq); + /* Wait for RCU synchronizing before releasing mod->list. */ + synchronize_rcu(); mutex_unlock(&module_mutex); free_module: module_deallocate(mod, info); From 461e34aed0550fee706a9a28fb453830b5079ea0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 10 Nov 2014 09:27:29 +1030 Subject: [PATCH 2/8] module: Unlink module with RCU synchronizing instead of stop_machine Unlink module from module list with RCU synchronizing instead of using stop_machine(). Since module list is already protected by rcu, we don't need stop_machine() anymore. Signed-off-by: Masami Hiramatsu Signed-off-by: Rusty Russell --- kernel/module.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index 331b03f6b411..bed608b8c8a6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1697,18 +1697,6 @@ static void mod_sysfs_teardown(struct module *mod) mod_sysfs_fini(mod); } -/* - * unlink the module with the whole machine is stopped with interrupts off - * - this defends against kallsyms not taking locks - */ -static int __unlink_module(void *_mod) -{ - struct module *mod = _mod; - list_del(&mod->list); - module_bug_cleanup(mod); - return 0; -} - #ifdef CONFIG_DEBUG_SET_MODULE_RONX /* * LKM RO/NX protection: protect module's text/ro-data @@ -1860,7 +1848,11 @@ static void free_module(struct module *mod) /* Now we can delete it from the lists */ mutex_lock(&module_mutex); - stop_machine(__unlink_module, mod, NULL); + /* Unlink carefully: kallsyms could be walking list. */ + list_del_rcu(&mod->list); + /* Wait for RCU synchronizing before releasing mod->list. */ + synchronize_rcu(); + module_bug_cleanup(mod); mutex_unlock(&module_mutex); /* This may be NULL, but that's OK */ From 0286b5ea125e58b4797747f688949c05394412e8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 10 Nov 2014 09:28:29 +1030 Subject: [PATCH 3/8] lib/bug: Use RCU list ops for module_bug_list Actually since module_bug_list should be used in BUG context, we may not need this. But for someone who want to use this from normal context, this makes module_bug_list an RCU list. Signed-off-by: Masami Hiramatsu Signed-off-by: Rusty Russell --- kernel/module.c | 5 +++-- lib/bug.c | 20 ++++++++++++++------ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index bed608b8c8a6..d596a306b0a1 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1850,9 +1850,10 @@ static void free_module(struct module *mod) mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); - /* Wait for RCU synchronizing before releasing mod->list. */ - synchronize_rcu(); + /* Remove this module from bug list, this uses list_del_rcu */ module_bug_cleanup(mod); + /* Wait for RCU synchronizing before releasing mod->list and buglist. */ + synchronize_rcu(); mutex_unlock(&module_mutex); /* This may be NULL, but that's OK */ diff --git a/lib/bug.c b/lib/bug.c index d1d7c7878900..0c3bd9552b6f 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -64,16 +64,22 @@ static LIST_HEAD(module_bug_list); static const struct bug_entry *module_find_bug(unsigned long bugaddr) { struct module *mod; + const struct bug_entry *bug = NULL; - list_for_each_entry(mod, &module_bug_list, bug_list) { - const struct bug_entry *bug = mod->bug_table; + rcu_read_lock(); + list_for_each_entry_rcu(mod, &module_bug_list, bug_list) { unsigned i; + bug = mod->bug_table; for (i = 0; i < mod->num_bugs; ++i, ++bug) if (bugaddr == bug_addr(bug)) - return bug; + goto out; } - return NULL; + bug = NULL; +out: + rcu_read_unlock(); + + return bug; } void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, @@ -99,13 +105,15 @@ void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, * Strictly speaking this should have a spinlock to protect against * traversals, but since we only traverse on BUG()s, a spinlock * could potentially lead to deadlock and thus be counter-productive. + * Thus, this uses RCU to safely manipulate the bug list, since BUG + * must run in non-interruptive state. */ - list_add(&mod->bug_list, &module_bug_list); + list_add_rcu(&mod->bug_list, &module_bug_list); } void module_bug_cleanup(struct module *mod) { - list_del(&mod->bug_list); + list_del_rcu(&mod->bug_list); } #else From 2f35c41f58a978dfa44ffa102249d556caa99eeb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 10 Nov 2014 09:29:29 +1030 Subject: [PATCH 4/8] module: Replace module_ref with atomic_t refcnt Replace module_ref per-cpu complex reference counter with an atomic_t simple refcnt. This is for code simplification. Signed-off-by: Masami Hiramatsu Signed-off-by: Rusty Russell --- include/linux/module.h | 16 +------------- include/trace/events/module.h | 2 +- kernel/module.c | 39 +++++------------------------------ 3 files changed, 7 insertions(+), 50 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 71f282a4e307..ebfb0e153c6a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -210,20 +210,6 @@ enum module_state { MODULE_STATE_UNFORMED, /* Still setting it up. */ }; -/** - * struct module_ref - per cpu module reference counts - * @incs: number of module get on this cpu - * @decs: number of module put on this cpu - * - * We force an alignment on 8 or 16 bytes, so that alloc_percpu() - * put @incs/@decs in same cache line, with no extra memory cost, - * since alloc_percpu() is fine grained. - */ -struct module_ref { - unsigned long incs; - unsigned long decs; -} __attribute((aligned(2 * sizeof(unsigned long)))); - struct module { enum module_state state; @@ -367,7 +353,7 @@ struct module { /* Destruction function. */ void (*exit)(void); - struct module_ref __percpu *refptr; + atomic_t refcnt; #endif #ifdef CONFIG_CONSTRUCTORS diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 7c5cbfe3fc49..81c4c183d348 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -80,7 +80,7 @@ DECLARE_EVENT_CLASS(module_refcnt, TP_fast_assign( __entry->ip = ip; - __entry->refcnt = __this_cpu_read(mod->refptr->incs) - __this_cpu_read(mod->refptr->decs); + __entry->refcnt = atomic_read(&mod->refcnt); __assign_str(name, mod->name); ), diff --git a/kernel/module.c b/kernel/module.c index d596a306b0a1..b1d485df5ac1 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -631,15 +631,11 @@ EXPORT_TRACEPOINT_SYMBOL(module_get); /* Init the unload section of the module. */ static int module_unload_init(struct module *mod) { - mod->refptr = alloc_percpu(struct module_ref); - if (!mod->refptr) - return -ENOMEM; - INIT_LIST_HEAD(&mod->source_list); INIT_LIST_HEAD(&mod->target_list); /* Hold reference count during initialization. */ - raw_cpu_write(mod->refptr->incs, 1); + atomic_set(&mod->refcnt, 1); return 0; } @@ -721,8 +717,6 @@ static void module_unload_free(struct module *mod) kfree(use); } mutex_unlock(&module_mutex); - - free_percpu(mod->refptr); } #ifdef CONFIG_MODULE_FORCE_UNLOAD @@ -772,28 +766,7 @@ static int try_stop_module(struct module *mod, int flags, int *forced) unsigned long module_refcount(struct module *mod) { - unsigned long incs = 0, decs = 0; - int cpu; - - for_each_possible_cpu(cpu) - decs += per_cpu_ptr(mod->refptr, cpu)->decs; - /* - * ensure the incs are added up after the decs. - * module_put ensures incs are visible before decs with smp_wmb. - * - * This 2-count scheme avoids the situation where the refcount - * for CPU0 is read, then CPU0 increments the module refcount, - * then CPU1 drops that refcount, then the refcount for CPU1 is - * read. We would record a decrement but not its corresponding - * increment so we would see a low count (disaster). - * - * Rare situation? But module_refcount can be preempted, and we - * might be tallying up 4096+ CPUs. So it is not impossible. - */ - smp_rmb(); - for_each_possible_cpu(cpu) - incs += per_cpu_ptr(mod->refptr, cpu)->incs; - return incs - decs; + return (unsigned long)atomic_read(&mod->refcnt); } EXPORT_SYMBOL(module_refcount); @@ -935,7 +908,7 @@ void __module_get(struct module *module) { if (module) { preempt_disable(); - __this_cpu_inc(module->refptr->incs); + atomic_inc(&module->refcnt); trace_module_get(module, _RET_IP_); preempt_enable(); } @@ -950,7 +923,7 @@ bool try_module_get(struct module *module) preempt_disable(); if (likely(module_is_live(module))) { - __this_cpu_inc(module->refptr->incs); + atomic_inc(&module->refcnt); trace_module_get(module, _RET_IP_); } else ret = false; @@ -965,9 +938,7 @@ void module_put(struct module *module) { if (module) { preempt_disable(); - smp_wmb(); /* see comment in module_refcount */ - __this_cpu_inc(module->refptr->decs); - + atomic_dec(&module->refcnt); trace_module_put(module, _RET_IP_); preempt_enable(); } From e513cc1c07e2ab93a4514eec9833e031df3e30bb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 10 Nov 2014 09:30:29 +1030 Subject: [PATCH 5/8] module: Remove stop_machine from module unloading Remove stop_machine from module unloading by adding new reference counting algorithm. This atomic refcounter works like a semaphore, it can get (be incremented) only when the counter is not 0. When loading a module, kmodule subsystem sets the counter MODULE_REF_BASE (= 1). And when unloading the module, it subtracts MODULE_REF_BASE from the counter. If no one refers the module, the refcounter becomes 0 and we can remove the module safely. If someone referes it, we try to recover the counter by adding MODULE_REF_BASE unless the counter becomes 0, because the referrer can put the module right before recovering. If the recovering is failed, we can get the 0 refcount and it never be incremented again, it can be removed safely too. Note that __module_get() forcibly gets the module refcounter, users should use try_module_get() instead of that. Signed-off-by: Masami Hiramatsu Signed-off-by: Rusty Russell --- kernel/module.c | 69 ++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index b1d485df5ac1..e772595d73db 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -98,7 +97,7 @@ * 1) List of modules (also safely readable with preempt_disable), * 2) module_use links, * 3) module_addr_min/module_addr_max. - * (delete uses stop_machine/add uses RCU list operations). */ + * (delete and add uses RCU list operations). */ DEFINE_MUTEX(module_mutex); EXPORT_SYMBOL_GPL(module_mutex); static LIST_HEAD(modules); @@ -628,14 +627,23 @@ static char last_unloaded_module[MODULE_NAME_LEN+1]; EXPORT_TRACEPOINT_SYMBOL(module_get); +/* MODULE_REF_BASE is the base reference count by kmodule loader. */ +#define MODULE_REF_BASE 1 + /* Init the unload section of the module. */ static int module_unload_init(struct module *mod) { + /* + * Initialize reference counter to MODULE_REF_BASE. + * refcnt == 0 means module is going. + */ + atomic_set(&mod->refcnt, MODULE_REF_BASE); + INIT_LIST_HEAD(&mod->source_list); INIT_LIST_HEAD(&mod->target_list); /* Hold reference count during initialization. */ - atomic_set(&mod->refcnt, 1); + atomic_inc(&mod->refcnt); return 0; } @@ -734,39 +742,39 @@ static inline int try_force_unload(unsigned int flags) } #endif /* CONFIG_MODULE_FORCE_UNLOAD */ -struct stopref +/* Try to release refcount of module, 0 means success. */ +static int try_release_module_ref(struct module *mod) { - struct module *mod; - int flags; - int *forced; -}; + int ret; -/* Whole machine is stopped with interrupts off when this runs. */ -static int __try_stop_module(void *_sref) -{ - struct stopref *sref = _sref; + /* Try to decrement refcnt which we set at loading */ + ret = atomic_sub_return(MODULE_REF_BASE, &mod->refcnt); + BUG_ON(ret < 0); + if (ret) + /* Someone can put this right now, recover with checking */ + ret = atomic_add_unless(&mod->refcnt, MODULE_REF_BASE, 0); - /* If it's not unused, quit unless we're forcing. */ - if (module_refcount(sref->mod) != 0) { - if (!(*sref->forced = try_force_unload(sref->flags))) - return -EWOULDBLOCK; - } - - /* Mark it as dying. */ - sref->mod->state = MODULE_STATE_GOING; - return 0; + return ret; } static int try_stop_module(struct module *mod, int flags, int *forced) { - struct stopref sref = { mod, flags, forced }; + /* If it's not unused, quit unless we're forcing. */ + if (try_release_module_ref(mod) != 0) { + *forced = try_force_unload(flags); + if (!(*forced)) + return -EWOULDBLOCK; + } - return stop_machine(__try_stop_module, &sref, NULL); + /* Mark it as dying. */ + mod->state = MODULE_STATE_GOING; + + return 0; } unsigned long module_refcount(struct module *mod) { - return (unsigned long)atomic_read(&mod->refcnt); + return (unsigned long)atomic_read(&mod->refcnt) - MODULE_REF_BASE; } EXPORT_SYMBOL(module_refcount); @@ -921,11 +929,11 @@ bool try_module_get(struct module *module) if (module) { preempt_disable(); - - if (likely(module_is_live(module))) { - atomic_inc(&module->refcnt); + /* Note: here, we can fail to get a reference */ + if (likely(module_is_live(module) && + atomic_inc_not_zero(&module->refcnt) != 0)) trace_module_get(module, _RET_IP_); - } else + else ret = false; preempt_enable(); @@ -936,9 +944,12 @@ EXPORT_SYMBOL(try_module_get); void module_put(struct module *module) { + int ret; + if (module) { preempt_disable(); - atomic_dec(&module->refcnt); + ret = atomic_dec_if_positive(&module->refcnt); + WARN_ON(ret < 0); /* Failed to put refcount */ trace_module_put(module, _RET_IP_); preempt_enable(); } From 6da0b565150b32318757062bc75834113f0508d6 Mon Sep 17 00:00:00 2001 From: Ionut Alexa Date: Mon, 10 Nov 2014 09:31:29 +1030 Subject: [PATCH 6/8] kernel:module Fix coding style errors and warnings. Fixed codin style errors and warnings. Changes printk with print_debug/warn. Changed seq_printf to seq_puts. Signed-off-by: Ionut Alexa Signed-off-by: Rusty Russell (removed bogus KERN_DEFAULT conversion) --- kernel/module.c | 53 ++++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index e772595d73db..381105b2aaae 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -157,13 +157,13 @@ static BLOCKING_NOTIFIER_HEAD(module_notify_list); * Protected by module_mutex. */ static unsigned long module_addr_min = -1UL, module_addr_max = 0; -int register_module_notifier(struct notifier_block * nb) +int register_module_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&module_notify_list, nb); } EXPORT_SYMBOL(register_module_notifier); -int unregister_module_notifier(struct notifier_block * nb) +int unregister_module_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&module_notify_list, nb); } @@ -858,8 +858,10 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod) seq_printf(m, " %lu ", module_refcount(mod)); - /* Always include a trailing , so userspace can differentiate - between this and the old multi-field proc format. */ + /* + * Always include a trailing , so userspace can differentiate + * between this and the old multi-field proc format. + */ list_for_each_entry(use, &mod->source_list, source_list) { printed_something = 1; seq_printf(m, "%s,", use->source->name); @@ -867,11 +869,11 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod) if (mod->init != NULL && mod->exit == NULL) { printed_something = 1; - seq_printf(m, "[permanent],"); + seq_puts(m, "[permanent],"); } if (!printed_something) - seq_printf(m, "-"); + seq_puts(m, "-"); } void __symbol_put(const char *symbol) @@ -960,7 +962,7 @@ EXPORT_SYMBOL(module_put); static inline void print_unload_info(struct seq_file *m, struct module *mod) { /* We don't know the usage count, or what modules are using. */ - seq_printf(m, " - -"); + seq_puts(m, " - -"); } static inline void module_unload_free(struct module *mod) @@ -1113,7 +1115,7 @@ static unsigned long maybe_relocated(unsigned long crc, static int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, - struct module *mod, + struct module *mod, const unsigned long *crc, const struct module *crc_owner) { @@ -1147,7 +1149,7 @@ static int check_version(Elf_Shdr *sechdrs, return 0; bad_version: - printk("%s: disagrees about version of symbol %s\n", + pr_warn("%s: disagrees about version of symbol %s\n", mod->name, symname); return 0; } @@ -1182,7 +1184,7 @@ static inline int same_magic(const char *amagic, const char *bmagic, static inline int check_version(Elf_Shdr *sechdrs, unsigned int versindex, const char *symname, - struct module *mod, + struct module *mod, const unsigned long *crc, const struct module *crc_owner) { @@ -1270,15 +1272,13 @@ static inline bool sect_empty(const Elf_Shdr *sect) return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0; } -struct module_sect_attr -{ +struct module_sect_attr { struct module_attribute mattr; char *name; unsigned long address; }; -struct module_sect_attrs -{ +struct module_sect_attrs { struct attribute_group grp; unsigned int nsections; struct module_sect_attr attrs[0]; @@ -1532,7 +1532,8 @@ static int module_add_modinfo_attrs(struct module *mod) (attr->test && attr->test(mod))) { memcpy(temp_attr, attr, sizeof(*temp_attr)); sysfs_attr_init(&temp_attr->attr); - error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr); + error = sysfs_create_file(&mod->mkobj.kobj, + &temp_attr->attr); ++temp_attr; } } @@ -1548,7 +1549,7 @@ static void module_remove_modinfo_attrs(struct module *mod) /* pick a field to test for end of list */ if (!attr->attr.name) break; - sysfs_remove_file(&mod->mkobj.kobj,&attr->attr); + sysfs_remove_file(&mod->mkobj.kobj, &attr->attr); if (attr->free) attr->free(mod); } @@ -1930,7 +1931,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) /* We compiled with -fno-common. These are not supposed to happen. */ pr_debug("Common symbol: %s\n", name); - printk("%s: please compile with -fno-common\n", + pr_warn("%s: please compile with -fno-common\n", mod->name); ret = -ENOEXEC; break; @@ -2234,7 +2235,7 @@ static char elf_type(const Elf_Sym *sym, const struct load_info *info) } static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, - unsigned int shnum) + unsigned int shnum) { const Elf_Shdr *sec; @@ -2710,7 +2711,7 @@ static int find_module_sections(struct module *mod, struct load_info *info) * This shouldn't happen with same compiler and binutils * building all parts of the module. */ - printk(KERN_WARNING "%s: has both .ctors and .init_array.\n", + pr_warn("%s: has both .ctors and .init_array.\n", mod->name); return -EINVAL; } @@ -2998,8 +2999,10 @@ static int do_init_module(struct module *mod) if (mod->init != NULL) ret = do_one_initcall(mod->init); if (ret < 0) { - /* Init routine failed: abort. Try to protect us from - buggy refcounters. */ + /* + * Init routine failed: abort. Try to protect us from + * buggy refcounters. + */ mod->state = MODULE_STATE_GOING; synchronize_sched(); module_put(mod); @@ -3151,7 +3154,7 @@ out: static int unknown_module_param_cb(char *param, char *val, const char *modname) { - /* Check for magic 'dyndbg' arg */ + /* Check for magic 'dyndbg' arg */ int ret = ddebug_dyndbg_module_param_cb(param, val, modname); if (ret != 0) pr_warn("%s: unknown parameter '%s' ignored\n", modname, param); @@ -3636,8 +3639,8 @@ static int m_show(struct seq_file *m, void *p) /* Informative for users. */ seq_printf(m, " %s", - mod->state == MODULE_STATE_GOING ? "Unloading": - mod->state == MODULE_STATE_COMING ? "Loading": + mod->state == MODULE_STATE_GOING ? "Unloading" : + mod->state == MODULE_STATE_COMING ? "Loading" : "Live"); /* Used by oprofile and other similar tools. */ seq_printf(m, " 0x%pK", mod->module_core); @@ -3646,7 +3649,7 @@ static int m_show(struct seq_file *m, void *p) if (mod->taints) seq_printf(m, " %s", module_flags(mod, buf)); - seq_printf(m, "\n"); + seq_puts(m, "\n"); return 0; } From 18eb74fa94161380c1acc9cf562cb835c4e54a25 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 10 Nov 2014 09:32:29 +1030 Subject: [PATCH 7/8] params: cleanup sysfs allocation commit 63662139e519ce06090b2759cf4a1d291b9cc0e2 attempted to patch a leak (which would only happen on OOM, ie. never), but it didn't quite work. This rewrites the code to be as simple as possible. add_sysfs_param() adds a parameter. If it fails, it's the caller's responsibility to clean up the parameters which already exist. The kzalloc-then-always-krealloc pattern is perhaps overly simplistic, but this code has clearly confused people. It worked on me... Signed-off-by: Rusty Russell --- kernel/params.c | 95 +++++++++++++++++++++++-------------------------- 1 file changed, 44 insertions(+), 51 deletions(-) diff --git a/kernel/params.c b/kernel/params.c index db97b791390f..795321aba29f 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -603,74 +603,65 @@ static __modinit int add_sysfs_param(struct module_kobject *mk, const struct kernel_param *kp, const char *name) { - struct module_param_attrs *new; - struct attribute **attrs; - int err, num; + struct module_param_attrs *new_mp; + struct attribute **new_attrs; + unsigned int i; /* We don't bother calling this with invisible parameters. */ BUG_ON(!kp->perm); if (!mk->mp) { - num = 0; - attrs = NULL; - } else { - num = mk->mp->num; - attrs = mk->mp->grp.attrs; + /* First allocation. */ + mk->mp = kzalloc(sizeof(*mk->mp), GFP_KERNEL); + if (!mk->mp) + return -ENOMEM; + mk->mp->grp.name = "parameters"; + /* NULL-terminated attribute array. */ + mk->mp->grp.attrs = kzalloc(sizeof(mk->mp->grp.attrs[0]), + GFP_KERNEL); + /* Caller will cleanup via free_module_param_attrs */ + if (!mk->mp->grp.attrs) + return -ENOMEM; } - /* Enlarge. */ - new = krealloc(mk->mp, - sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1), - GFP_KERNEL); - if (!new) { - kfree(attrs); - err = -ENOMEM; - goto fail; - } - /* Despite looking like the typical realloc() bug, this is safe. - * We *want* the old 'attrs' to be freed either way, and we'll store - * the new one in the success case. */ - attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL); - if (!attrs) { - err = -ENOMEM; - goto fail_free_new; - } + /* Enlarge allocations. */ + new_mp = krealloc(mk->mp, + sizeof(*mk->mp) + + sizeof(mk->mp->attrs[0]) * (mk->mp->num + 1), + GFP_KERNEL); + if (!new_mp) + return -ENOMEM; + mk->mp = new_mp; - /* Sysfs wants everything zeroed. */ - memset(new, 0, sizeof(*new)); - memset(&new->attrs[num], 0, sizeof(new->attrs[num])); - memset(&attrs[num], 0, sizeof(attrs[num])); - new->grp.name = "parameters"; - new->grp.attrs = attrs; + /* Extra pointer for NULL terminator */ + new_attrs = krealloc(mk->mp->grp.attrs, + sizeof(mk->mp->grp.attrs[0]) * (mk->mp->num + 2), + GFP_KERNEL); + if (!new_attrs) + return -ENOMEM; + mk->mp->grp.attrs = new_attrs; /* Tack new one on the end. */ - sysfs_attr_init(&new->attrs[num].mattr.attr); - new->attrs[num].param = kp; - new->attrs[num].mattr.show = param_attr_show; - new->attrs[num].mattr.store = param_attr_store; - new->attrs[num].mattr.attr.name = (char *)name; - new->attrs[num].mattr.attr.mode = kp->perm; - new->num = num+1; + sysfs_attr_init(&mk->mp->attrs[mk->mp->num].mattr.attr); + mk->mp->attrs[mk->mp->num].param = kp; + mk->mp->attrs[mk->mp->num].mattr.show = param_attr_show; + mk->mp->attrs[mk->mp->num].mattr.store = param_attr_store; + mk->mp->attrs[mk->mp->num].mattr.attr.name = (char *)name; + mk->mp->attrs[mk->mp->num].mattr.attr.mode = kp->perm; + mk->mp->num++; /* Fix up all the pointers, since krealloc can move us */ - for (num = 0; num < new->num; num++) - new->grp.attrs[num] = &new->attrs[num].mattr.attr; - new->grp.attrs[num] = NULL; - - mk->mp = new; + for (i = 0; i < mk->mp->num; i++) + mk->mp->grp.attrs[i] = &mk->mp->attrs[i].mattr.attr; + mk->mp->grp.attrs[mk->mp->num] = NULL; return 0; - -fail_free_new: - kfree(new); -fail: - mk->mp = NULL; - return err; } #ifdef CONFIG_MODULES static void free_module_param_attrs(struct module_kobject *mk) { - kfree(mk->mp->grp.attrs); + if (mk->mp) + kfree(mk->mp->grp.attrs); kfree(mk->mp); mk->mp = NULL; } @@ -695,8 +686,10 @@ int module_param_sysfs_setup(struct module *mod, if (kparam[i].perm == 0) continue; err = add_sysfs_param(&mod->mkobj, &kparam[i], kparam[i].name); - if (err) + if (err) { + free_module_param_attrs(&mod->mkobj); return err; + } params = true; } From b0a65b0cccd477b2fd8b7adad0ac39433df54829 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 12 Dec 2014 13:36:49 +1030 Subject: [PATCH 8/8] param: do not set store func without write perm When a module_param is defined without DAC write permissions, it can still be changed at runtime and updated. Drivers using a 0444 permission may be surprised that these values can still be changed. For drivers that want to allow updates, any S_IW* flag will set the "store" function as before. Drivers without S_IW* flags will have the "store" function unset, unforcing a read-only value. Drivers that wish neither "store" nor "get" can continue to use "0" for perms to stay out of sysfs entirely. Old behavior: # cd /sys/module/snd/parameters # ls -l total 0 -r--r--r-- 1 root root 4096 Dec 11 13:55 cards_limit -r--r--r-- 1 root root 4096 Dec 11 13:55 major -r--r--r-- 1 root root 4096 Dec 11 13:55 slots # cat major 116 # echo -1 > major -bash: major: Permission denied # chmod u+w major # echo -1 > major # cat major -1 New behavior: ... # chmod u+w major # echo -1 > major -bash: echo: write error: Input/output error Signed-off-by: Kees Cook Signed-off-by: Rusty Russell --- kernel/params.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/params.c b/kernel/params.c index 795321aba29f..0af9b2c4e56c 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -645,7 +645,9 @@ static __modinit int add_sysfs_param(struct module_kobject *mk, sysfs_attr_init(&mk->mp->attrs[mk->mp->num].mattr.attr); mk->mp->attrs[mk->mp->num].param = kp; mk->mp->attrs[mk->mp->num].mattr.show = param_attr_show; - mk->mp->attrs[mk->mp->num].mattr.store = param_attr_store; + /* Do not allow runtime DAC changes to make param writable. */ + if ((kp->perm & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0) + mk->mp->attrs[mk->mp->num].mattr.store = param_attr_store; mk->mp->attrs[mk->mp->num].mattr.attr.name = (char *)name; mk->mp->attrs[mk->mp->num].mattr.attr.mode = kp->perm; mk->mp->num++;