Merge branch 'linus' into perfcounters/core
Conflicts: arch/x86/include/asm/kmap_types.h include/linux/mm.h include/asm-generic/kmap_types.h Merge reason: We crossed changes with kmap_types.h cleanups in mainline. Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
@@ -11,6 +11,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
|
||||
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
|
||||
notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
|
||||
async.o
|
||||
obj-y += groups.o
|
||||
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
# Do not trace debug files and internal ftrace files
|
||||
|
||||
258
kernel/cpuset.c
258
kernel/cpuset.c
@@ -97,12 +97,6 @@ struct cpuset {
|
||||
|
||||
struct cpuset *parent; /* my parent */
|
||||
|
||||
/*
|
||||
* Copy of global cpuset_mems_generation as of the most
|
||||
* recent time this cpuset changed its mems_allowed.
|
||||
*/
|
||||
int mems_generation;
|
||||
|
||||
struct fmeter fmeter; /* memory_pressure filter */
|
||||
|
||||
/* partition number for rebuild_sched_domains() */
|
||||
@@ -176,27 +170,6 @@ static inline int is_spread_slab(const struct cpuset *cs)
|
||||
return test_bit(CS_SPREAD_SLAB, &cs->flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Increment this integer everytime any cpuset changes its
|
||||
* mems_allowed value. Users of cpusets can track this generation
|
||||
* number, and avoid having to lock and reload mems_allowed unless
|
||||
* the cpuset they're using changes generation.
|
||||
*
|
||||
* A single, global generation is needed because cpuset_attach_task() could
|
||||
* reattach a task to a different cpuset, which must not have its
|
||||
* generation numbers aliased with those of that tasks previous cpuset.
|
||||
*
|
||||
* Generations are needed for mems_allowed because one task cannot
|
||||
* modify another's memory placement. So we must enable every task,
|
||||
* on every visit to __alloc_pages(), to efficiently check whether
|
||||
* its current->cpuset->mems_allowed has changed, requiring an update
|
||||
* of its current->mems_allowed.
|
||||
*
|
||||
* Since writes to cpuset_mems_generation are guarded by the cgroup lock
|
||||
* there is no need to mark it atomic.
|
||||
*/
|
||||
static int cpuset_mems_generation;
|
||||
|
||||
static struct cpuset top_cpuset = {
|
||||
.flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
|
||||
};
|
||||
@@ -228,8 +201,9 @@ static struct cpuset top_cpuset = {
|
||||
* If a task is only holding callback_mutex, then it has read-only
|
||||
* access to cpusets.
|
||||
*
|
||||
* The task_struct fields mems_allowed and mems_generation may only
|
||||
* be accessed in the context of that task, so require no locks.
|
||||
* Now, the task_struct fields mems_allowed and mempolicy may be changed
|
||||
* by other task, we use alloc_lock in the task_struct fields to protect
|
||||
* them.
|
||||
*
|
||||
* The cpuset_common_file_read() handlers only hold callback_mutex across
|
||||
* small pieces of code, such as when reading out possibly multi-word
|
||||
@@ -331,75 +305,22 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
|
||||
BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
|
||||
}
|
||||
|
||||
/**
|
||||
* cpuset_update_task_memory_state - update task memory placement
|
||||
/*
|
||||
* update task's spread flag if cpuset's page/slab spread flag is set
|
||||
*
|
||||
* If the current tasks cpusets mems_allowed changed behind our
|
||||
* backs, update current->mems_allowed, mems_generation and task NUMA
|
||||
* mempolicy to the new value.
|
||||
*
|
||||
* Task mempolicy is updated by rebinding it relative to the
|
||||
* current->cpuset if a task has its memory placement changed.
|
||||
* Do not call this routine if in_interrupt().
|
||||
*
|
||||
* Call without callback_mutex or task_lock() held. May be
|
||||
* called with or without cgroup_mutex held. Thanks in part to
|
||||
* 'the_top_cpuset_hack', the task's cpuset pointer will never
|
||||
* be NULL. This routine also might acquire callback_mutex during
|
||||
* call.
|
||||
*
|
||||
* Reading current->cpuset->mems_generation doesn't need task_lock
|
||||
* to guard the current->cpuset derefence, because it is guarded
|
||||
* from concurrent freeing of current->cpuset using RCU.
|
||||
*
|
||||
* The rcu_dereference() is technically probably not needed,
|
||||
* as I don't actually mind if I see a new cpuset pointer but
|
||||
* an old value of mems_generation. However this really only
|
||||
* matters on alpha systems using cpusets heavily. If I dropped
|
||||
* that rcu_dereference(), it would save them a memory barrier.
|
||||
* For all other arch's, rcu_dereference is a no-op anyway, and for
|
||||
* alpha systems not using cpusets, another planned optimization,
|
||||
* avoiding the rcu critical section for tasks in the root cpuset
|
||||
* which is statically allocated, so can't vanish, will make this
|
||||
* irrelevant. Better to use RCU as intended, than to engage in
|
||||
* some cute trick to save a memory barrier that is impossible to
|
||||
* test, for alpha systems using cpusets heavily, which might not
|
||||
* even exist.
|
||||
*
|
||||
* This routine is needed to update the per-task mems_allowed data,
|
||||
* within the tasks context, when it is trying to allocate memory
|
||||
* (in various mm/mempolicy.c routines) and notices that some other
|
||||
* task has been modifying its cpuset.
|
||||
* Called with callback_mutex/cgroup_mutex held
|
||||
*/
|
||||
|
||||
void cpuset_update_task_memory_state(void)
|
||||
static void cpuset_update_task_spread_flag(struct cpuset *cs,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
int my_cpusets_mem_gen;
|
||||
struct task_struct *tsk = current;
|
||||
struct cpuset *cs;
|
||||
|
||||
rcu_read_lock();
|
||||
my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
|
||||
rcu_read_unlock();
|
||||
|
||||
if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
|
||||
mutex_lock(&callback_mutex);
|
||||
task_lock(tsk);
|
||||
cs = task_cs(tsk); /* Maybe changed when task not locked */
|
||||
guarantee_online_mems(cs, &tsk->mems_allowed);
|
||||
tsk->cpuset_mems_generation = cs->mems_generation;
|
||||
if (is_spread_page(cs))
|
||||
tsk->flags |= PF_SPREAD_PAGE;
|
||||
else
|
||||
tsk->flags &= ~PF_SPREAD_PAGE;
|
||||
if (is_spread_slab(cs))
|
||||
tsk->flags |= PF_SPREAD_SLAB;
|
||||
else
|
||||
tsk->flags &= ~PF_SPREAD_SLAB;
|
||||
task_unlock(tsk);
|
||||
mutex_unlock(&callback_mutex);
|
||||
mpol_rebind_task(tsk, &tsk->mems_allowed);
|
||||
}
|
||||
if (is_spread_page(cs))
|
||||
tsk->flags |= PF_SPREAD_PAGE;
|
||||
else
|
||||
tsk->flags &= ~PF_SPREAD_PAGE;
|
||||
if (is_spread_slab(cs))
|
||||
tsk->flags |= PF_SPREAD_SLAB;
|
||||
else
|
||||
tsk->flags &= ~PF_SPREAD_SLAB;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1007,14 +928,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
|
||||
* other task, the task_struct mems_allowed that we are hacking
|
||||
* is for our current task, which must allocate new pages for that
|
||||
* migrating memory region.
|
||||
*
|
||||
* We call cpuset_update_task_memory_state() before hacking
|
||||
* our tasks mems_allowed, so that we are assured of being in
|
||||
* sync with our tasks cpuset, and in particular, callbacks to
|
||||
* cpuset_update_task_memory_state() from nested page allocations
|
||||
* won't see any mismatch of our cpuset and task mems_generation
|
||||
* values, so won't overwrite our hacked tasks mems_allowed
|
||||
* nodemask.
|
||||
*/
|
||||
|
||||
static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
|
||||
@@ -1022,22 +935,37 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
cpuset_update_task_memory_state();
|
||||
|
||||
mutex_lock(&callback_mutex);
|
||||
tsk->mems_allowed = *to;
|
||||
mutex_unlock(&callback_mutex);
|
||||
|
||||
do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
|
||||
|
||||
mutex_lock(&callback_mutex);
|
||||
guarantee_online_mems(task_cs(tsk),&tsk->mems_allowed);
|
||||
mutex_unlock(&callback_mutex);
|
||||
}
|
||||
|
||||
/*
|
||||
* Rebind task's vmas to cpuset's new mems_allowed, and migrate pages to new
|
||||
* nodes if memory_migrate flag is set. Called with cgroup_mutex held.
|
||||
* cpuset_change_task_nodemask - change task's mems_allowed and mempolicy
|
||||
* @tsk: the task to change
|
||||
* @newmems: new nodes that the task will be set
|
||||
*
|
||||
* In order to avoid seeing no nodes if the old and new nodes are disjoint,
|
||||
* we structure updates as setting all new allowed nodes, then clearing newly
|
||||
* disallowed ones.
|
||||
*
|
||||
* Called with task's alloc_lock held
|
||||
*/
|
||||
static void cpuset_change_task_nodemask(struct task_struct *tsk,
|
||||
nodemask_t *newmems)
|
||||
{
|
||||
nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
|
||||
mpol_rebind_task(tsk, &tsk->mems_allowed);
|
||||
mpol_rebind_task(tsk, newmems);
|
||||
tsk->mems_allowed = *newmems;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update task's mems_allowed and rebind its mempolicy and vmas' mempolicy
|
||||
* of it to cpuset's new mems_allowed, and migrate pages to new nodes if
|
||||
* memory_migrate flag is set. Called with cgroup_mutex held.
|
||||
*/
|
||||
static void cpuset_change_nodemask(struct task_struct *p,
|
||||
struct cgroup_scanner *scan)
|
||||
@@ -1046,12 +974,19 @@ static void cpuset_change_nodemask(struct task_struct *p,
|
||||
struct cpuset *cs;
|
||||
int migrate;
|
||||
const nodemask_t *oldmem = scan->data;
|
||||
nodemask_t newmems;
|
||||
|
||||
cs = cgroup_cs(scan->cg);
|
||||
guarantee_online_mems(cs, &newmems);
|
||||
|
||||
task_lock(p);
|
||||
cpuset_change_task_nodemask(p, &newmems);
|
||||
task_unlock(p);
|
||||
|
||||
mm = get_task_mm(p);
|
||||
if (!mm)
|
||||
return;
|
||||
|
||||
cs = cgroup_cs(scan->cg);
|
||||
migrate = is_memory_migrate(cs);
|
||||
|
||||
mpol_rebind_mm(mm, &cs->mems_allowed);
|
||||
@@ -1104,10 +1039,10 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
|
||||
/*
|
||||
* Handle user request to change the 'mems' memory placement
|
||||
* of a cpuset. Needs to validate the request, update the
|
||||
* cpusets mems_allowed and mems_generation, and for each
|
||||
* task in the cpuset, rebind any vma mempolicies and if
|
||||
* the cpuset is marked 'memory_migrate', migrate the tasks
|
||||
* pages to the new memory.
|
||||
* cpusets mems_allowed, and for each task in the cpuset,
|
||||
* update mems_allowed and rebind task's mempolicy and any vma
|
||||
* mempolicies and if the cpuset is marked 'memory_migrate',
|
||||
* migrate the tasks pages to the new memory.
|
||||
*
|
||||
* Call with cgroup_mutex held. May take callback_mutex during call.
|
||||
* Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
|
||||
@@ -1160,7 +1095,6 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
|
||||
|
||||
mutex_lock(&callback_mutex);
|
||||
cs->mems_allowed = trialcs->mems_allowed;
|
||||
cs->mems_generation = cpuset_mems_generation++;
|
||||
mutex_unlock(&callback_mutex);
|
||||
|
||||
update_tasks_nodemask(cs, &oldmem, &heap);
|
||||
@@ -1192,6 +1126,46 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* cpuset_change_flag - make a task's spread flags the same as its cpuset's
|
||||
* @tsk: task to be updated
|
||||
* @scan: struct cgroup_scanner containing the cgroup of the task
|
||||
*
|
||||
* Called by cgroup_scan_tasks() for each task in a cgroup.
|
||||
*
|
||||
* We don't need to re-check for the cgroup/cpuset membership, since we're
|
||||
* holding cgroup_lock() at this point.
|
||||
*/
|
||||
static void cpuset_change_flag(struct task_struct *tsk,
|
||||
struct cgroup_scanner *scan)
|
||||
{
|
||||
cpuset_update_task_spread_flag(cgroup_cs(scan->cg), tsk);
|
||||
}
|
||||
|
||||
/*
|
||||
* update_tasks_flags - update the spread flags of tasks in the cpuset.
|
||||
* @cs: the cpuset in which each task's spread flags needs to be changed
|
||||
* @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks()
|
||||
*
|
||||
* Called with cgroup_mutex held
|
||||
*
|
||||
* The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
|
||||
* calling callback functions for each.
|
||||
*
|
||||
* No return value. It's guaranteed that cgroup_scan_tasks() always returns 0
|
||||
* if @heap != NULL.
|
||||
*/
|
||||
static void update_tasks_flags(struct cpuset *cs, struct ptr_heap *heap)
|
||||
{
|
||||
struct cgroup_scanner scan;
|
||||
|
||||
scan.cg = cs->css.cgroup;
|
||||
scan.test_task = NULL;
|
||||
scan.process_task = cpuset_change_flag;
|
||||
scan.heap = heap;
|
||||
cgroup_scan_tasks(&scan);
|
||||
}
|
||||
|
||||
/*
|
||||
* update_flag - read a 0 or a 1 in a file and update associated flag
|
||||
* bit: the bit to update (see cpuset_flagbits_t)
|
||||
@@ -1205,8 +1179,10 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
|
||||
int turning_on)
|
||||
{
|
||||
struct cpuset *trialcs;
|
||||
int err;
|
||||
int balance_flag_changed;
|
||||
int spread_flag_changed;
|
||||
struct ptr_heap heap;
|
||||
int err;
|
||||
|
||||
trialcs = alloc_trial_cpuset(cs);
|
||||
if (!trialcs)
|
||||
@@ -1221,9 +1197,16 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
|
||||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
err = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
balance_flag_changed = (is_sched_load_balance(cs) !=
|
||||
is_sched_load_balance(trialcs));
|
||||
|
||||
spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
|
||||
|| (is_spread_page(cs) != is_spread_page(trialcs)));
|
||||
|
||||
mutex_lock(&callback_mutex);
|
||||
cs->flags = trialcs->flags;
|
||||
mutex_unlock(&callback_mutex);
|
||||
@@ -1231,6 +1214,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
|
||||
if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
|
||||
async_rebuild_sched_domains();
|
||||
|
||||
if (spread_flag_changed)
|
||||
update_tasks_flags(cs, &heap);
|
||||
heap_free(&heap);
|
||||
out:
|
||||
free_trial_cpuset(trialcs);
|
||||
return err;
|
||||
@@ -1372,15 +1358,20 @@ static void cpuset_attach(struct cgroup_subsys *ss,
|
||||
|
||||
if (cs == &top_cpuset) {
|
||||
cpumask_copy(cpus_attach, cpu_possible_mask);
|
||||
to = node_possible_map;
|
||||
} else {
|
||||
mutex_lock(&callback_mutex);
|
||||
guarantee_online_cpus(cs, cpus_attach);
|
||||
mutex_unlock(&callback_mutex);
|
||||
guarantee_online_mems(cs, &to);
|
||||
}
|
||||
err = set_cpus_allowed_ptr(tsk, cpus_attach);
|
||||
if (err)
|
||||
return;
|
||||
|
||||
task_lock(tsk);
|
||||
cpuset_change_task_nodemask(tsk, &to);
|
||||
task_unlock(tsk);
|
||||
cpuset_update_task_spread_flag(cs, tsk);
|
||||
|
||||
from = oldcs->mems_allowed;
|
||||
to = cs->mems_allowed;
|
||||
mm = get_task_mm(tsk);
|
||||
@@ -1442,11 +1433,9 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
|
||||
break;
|
||||
case FILE_SPREAD_PAGE:
|
||||
retval = update_flag(CS_SPREAD_PAGE, cs, val);
|
||||
cs->mems_generation = cpuset_mems_generation++;
|
||||
break;
|
||||
case FILE_SPREAD_SLAB:
|
||||
retval = update_flag(CS_SPREAD_SLAB, cs, val);
|
||||
cs->mems_generation = cpuset_mems_generation++;
|
||||
break;
|
||||
default:
|
||||
retval = -EINVAL;
|
||||
@@ -1786,8 +1775,6 @@ static struct cgroup_subsys_state *cpuset_create(
|
||||
struct cpuset *parent;
|
||||
|
||||
if (!cont->parent) {
|
||||
/* This is early initialization for the top cgroup */
|
||||
top_cpuset.mems_generation = cpuset_mems_generation++;
|
||||
return &top_cpuset.css;
|
||||
}
|
||||
parent = cgroup_cs(cont->parent);
|
||||
@@ -1799,7 +1786,6 @@ static struct cgroup_subsys_state *cpuset_create(
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
cpuset_update_task_memory_state();
|
||||
cs->flags = 0;
|
||||
if (is_spread_page(parent))
|
||||
set_bit(CS_SPREAD_PAGE, &cs->flags);
|
||||
@@ -1808,7 +1794,6 @@ static struct cgroup_subsys_state *cpuset_create(
|
||||
set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
|
||||
cpumask_clear(cs->cpus_allowed);
|
||||
nodes_clear(cs->mems_allowed);
|
||||
cs->mems_generation = cpuset_mems_generation++;
|
||||
fmeter_init(&cs->fmeter);
|
||||
cs->relax_domain_level = -1;
|
||||
|
||||
@@ -1827,8 +1812,6 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
|
||||
{
|
||||
struct cpuset *cs = cgroup_cs(cont);
|
||||
|
||||
cpuset_update_task_memory_state();
|
||||
|
||||
if (is_sched_load_balance(cs))
|
||||
update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
|
||||
|
||||
@@ -1849,21 +1832,6 @@ struct cgroup_subsys cpuset_subsys = {
|
||||
.early_init = 1,
|
||||
};
|
||||
|
||||
/*
|
||||
* cpuset_init_early - just enough so that the calls to
|
||||
* cpuset_update_task_memory_state() in early init code
|
||||
* are harmless.
|
||||
*/
|
||||
|
||||
int __init cpuset_init_early(void)
|
||||
{
|
||||
alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_NOWAIT);
|
||||
|
||||
top_cpuset.mems_generation = cpuset_mems_generation++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* cpuset_init - initialize cpusets at system boot
|
||||
*
|
||||
@@ -1874,11 +1842,13 @@ int __init cpuset_init(void)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL))
|
||||
BUG();
|
||||
|
||||
cpumask_setall(top_cpuset.cpus_allowed);
|
||||
nodes_setall(top_cpuset.mems_allowed);
|
||||
|
||||
fmeter_init(&top_cpuset.fmeter);
|
||||
top_cpuset.mems_generation = cpuset_mems_generation++;
|
||||
set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
|
||||
top_cpuset.relax_domain_level = -1;
|
||||
|
||||
|
||||
@@ -178,7 +178,7 @@ void __init fork_init(unsigned long mempages)
|
||||
/* create a slab on which task_structs can be allocated */
|
||||
task_struct_cachep =
|
||||
kmem_cache_create("task_struct", sizeof(struct task_struct),
|
||||
ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL);
|
||||
ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
|
||||
#endif
|
||||
|
||||
/* do the arch specific task caches init */
|
||||
@@ -1470,20 +1470,20 @@ void __init proc_caches_init(void)
|
||||
{
|
||||
sighand_cachep = kmem_cache_create("sighand_cache",
|
||||
sizeof(struct sighand_struct), 0,
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU,
|
||||
sighand_ctor);
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
|
||||
SLAB_NOTRACK, sighand_ctor);
|
||||
signal_cachep = kmem_cache_create("signal_cache",
|
||||
sizeof(struct signal_struct), 0,
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
|
||||
files_cachep = kmem_cache_create("files_cache",
|
||||
sizeof(struct files_struct), 0,
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
|
||||
fs_cachep = kmem_cache_create("fs_cache",
|
||||
sizeof(struct fs_struct), 0,
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
|
||||
mm_cachep = kmem_cache_create("mm_struct",
|
||||
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
|
||||
vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
|
||||
mmap_init();
|
||||
}
|
||||
|
||||
288
kernel/groups.c
Normal file
288
kernel/groups.c
Normal file
@@ -0,0 +1,288 @@
|
||||
/*
|
||||
* Supplementary group IDs
|
||||
*/
|
||||
#include <linux/cred.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
/* init to 2 - one for init_task, one to ensure it is never freed */
|
||||
struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
|
||||
|
||||
struct group_info *groups_alloc(int gidsetsize)
|
||||
{
|
||||
struct group_info *group_info;
|
||||
int nblocks;
|
||||
int i;
|
||||
|
||||
nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK;
|
||||
/* Make sure we always allocate at least one indirect block pointer */
|
||||
nblocks = nblocks ? : 1;
|
||||
group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER);
|
||||
if (!group_info)
|
||||
return NULL;
|
||||
group_info->ngroups = gidsetsize;
|
||||
group_info->nblocks = nblocks;
|
||||
atomic_set(&group_info->usage, 1);
|
||||
|
||||
if (gidsetsize <= NGROUPS_SMALL)
|
||||
group_info->blocks[0] = group_info->small_block;
|
||||
else {
|
||||
for (i = 0; i < nblocks; i++) {
|
||||
gid_t *b;
|
||||
b = (void *)__get_free_page(GFP_USER);
|
||||
if (!b)
|
||||
goto out_undo_partial_alloc;
|
||||
group_info->blocks[i] = b;
|
||||
}
|
||||
}
|
||||
return group_info;
|
||||
|
||||
out_undo_partial_alloc:
|
||||
while (--i >= 0) {
|
||||
free_page((unsigned long)group_info->blocks[i]);
|
||||
}
|
||||
kfree(group_info);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(groups_alloc);
|
||||
|
||||
void groups_free(struct group_info *group_info)
|
||||
{
|
||||
if (group_info->blocks[0] != group_info->small_block) {
|
||||
int i;
|
||||
for (i = 0; i < group_info->nblocks; i++)
|
||||
free_page((unsigned long)group_info->blocks[i]);
|
||||
}
|
||||
kfree(group_info);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(groups_free);
|
||||
|
||||
/* export the group_info to a user-space array */
|
||||
static int groups_to_user(gid_t __user *grouplist,
|
||||
const struct group_info *group_info)
|
||||
{
|
||||
int i;
|
||||
unsigned int count = group_info->ngroups;
|
||||
|
||||
for (i = 0; i < group_info->nblocks; i++) {
|
||||
unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
|
||||
unsigned int len = cp_count * sizeof(*grouplist);
|
||||
|
||||
if (copy_to_user(grouplist, group_info->blocks[i], len))
|
||||
return -EFAULT;
|
||||
|
||||
grouplist += NGROUPS_PER_BLOCK;
|
||||
count -= cp_count;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* fill a group_info from a user-space array - it must be allocated already */
|
||||
static int groups_from_user(struct group_info *group_info,
|
||||
gid_t __user *grouplist)
|
||||
{
|
||||
int i;
|
||||
unsigned int count = group_info->ngroups;
|
||||
|
||||
for (i = 0; i < group_info->nblocks; i++) {
|
||||
unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
|
||||
unsigned int len = cp_count * sizeof(*grouplist);
|
||||
|
||||
if (copy_from_user(group_info->blocks[i], grouplist, len))
|
||||
return -EFAULT;
|
||||
|
||||
grouplist += NGROUPS_PER_BLOCK;
|
||||
count -= cp_count;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* a simple Shell sort */
|
||||
static void groups_sort(struct group_info *group_info)
|
||||
{
|
||||
int base, max, stride;
|
||||
int gidsetsize = group_info->ngroups;
|
||||
|
||||
for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
|
||||
; /* nothing */
|
||||
stride /= 3;
|
||||
|
||||
while (stride) {
|
||||
max = gidsetsize - stride;
|
||||
for (base = 0; base < max; base++) {
|
||||
int left = base;
|
||||
int right = left + stride;
|
||||
gid_t tmp = GROUP_AT(group_info, right);
|
||||
|
||||
while (left >= 0 && GROUP_AT(group_info, left) > tmp) {
|
||||
GROUP_AT(group_info, right) =
|
||||
GROUP_AT(group_info, left);
|
||||
right = left;
|
||||
left -= stride;
|
||||
}
|
||||
GROUP_AT(group_info, right) = tmp;
|
||||
}
|
||||
stride /= 3;
|
||||
}
|
||||
}
|
||||
|
||||
/* a simple bsearch */
|
||||
int groups_search(const struct group_info *group_info, gid_t grp)
|
||||
{
|
||||
unsigned int left, right;
|
||||
|
||||
if (!group_info)
|
||||
return 0;
|
||||
|
||||
left = 0;
|
||||
right = group_info->ngroups;
|
||||
while (left < right) {
|
||||
unsigned int mid = (left+right)/2;
|
||||
int cmp = grp - GROUP_AT(group_info, mid);
|
||||
if (cmp > 0)
|
||||
left = mid + 1;
|
||||
else if (cmp < 0)
|
||||
right = mid;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* set_groups - Change a group subscription in a set of credentials
|
||||
* @new: The newly prepared set of credentials to alter
|
||||
* @group_info: The group list to install
|
||||
*
|
||||
* Validate a group subscription and, if valid, insert it into a set
|
||||
* of credentials.
|
||||
*/
|
||||
int set_groups(struct cred *new, struct group_info *group_info)
|
||||
{
|
||||
int retval;
|
||||
|
||||
retval = security_task_setgroups(group_info);
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
put_group_info(new->group_info);
|
||||
groups_sort(group_info);
|
||||
get_group_info(group_info);
|
||||
new->group_info = group_info;
|
||||
return 0;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(set_groups);
|
||||
|
||||
/**
|
||||
* set_current_groups - Change current's group subscription
|
||||
* @group_info: The group list to impose
|
||||
*
|
||||
* Validate a group subscription and, if valid, impose it upon current's task
|
||||
* security record.
|
||||
*/
|
||||
int set_current_groups(struct group_info *group_info)
|
||||
{
|
||||
struct cred *new;
|
||||
int ret;
|
||||
|
||||
new = prepare_creds();
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = set_groups(new, group_info);
|
||||
if (ret < 0) {
|
||||
abort_creds(new);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return commit_creds(new);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(set_current_groups);
|
||||
|
||||
SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist)
|
||||
{
|
||||
const struct cred *cred = current_cred();
|
||||
int i;
|
||||
|
||||
if (gidsetsize < 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* no need to grab task_lock here; it cannot change */
|
||||
i = cred->group_info->ngroups;
|
||||
if (gidsetsize) {
|
||||
if (i > gidsetsize) {
|
||||
i = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (groups_to_user(grouplist, cred->group_info)) {
|
||||
i = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out:
|
||||
return i;
|
||||
}
|
||||
|
||||
/*
|
||||
* SMP: Our groups are copy-on-write. We can set them safely
|
||||
* without another task interfering.
|
||||
*/
|
||||
|
||||
SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
|
||||
{
|
||||
struct group_info *group_info;
|
||||
int retval;
|
||||
|
||||
if (!capable(CAP_SETGID))
|
||||
return -EPERM;
|
||||
if ((unsigned)gidsetsize > NGROUPS_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
group_info = groups_alloc(gidsetsize);
|
||||
if (!group_info)
|
||||
return -ENOMEM;
|
||||
retval = groups_from_user(group_info, grouplist);
|
||||
if (retval) {
|
||||
put_group_info(group_info);
|
||||
return retval;
|
||||
}
|
||||
|
||||
retval = set_current_groups(group_info);
|
||||
put_group_info(group_info);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether we're fsgid/egid or in the supplemental group..
|
||||
*/
|
||||
int in_group_p(gid_t grp)
|
||||
{
|
||||
const struct cred *cred = current_cred();
|
||||
int retval = 1;
|
||||
|
||||
if (grp != cred->fsgid)
|
||||
retval = groups_search(cred->group_info, grp);
|
||||
return retval;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(in_group_p);
|
||||
|
||||
int in_egroup_p(gid_t grp)
|
||||
{
|
||||
const struct cred *cred = current_cred();
|
||||
int retval = 1;
|
||||
|
||||
if (grp != cred->egid)
|
||||
retval = groups_search(cred->group_info, grp);
|
||||
return retval;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(in_egroup_p);
|
||||
@@ -43,6 +43,8 @@
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/debugobjects.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/timer.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
@@ -193,12 +195,24 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
|
||||
* Switch the timer base to the current CPU when possible.
|
||||
*/
|
||||
static inline struct hrtimer_clock_base *
|
||||
switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
|
||||
switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
|
||||
int pinned)
|
||||
{
|
||||
struct hrtimer_clock_base *new_base;
|
||||
struct hrtimer_cpu_base *new_cpu_base;
|
||||
int cpu, preferred_cpu = -1;
|
||||
|
||||
new_cpu_base = &__get_cpu_var(hrtimer_bases);
|
||||
cpu = smp_processor_id();
|
||||
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
|
||||
if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
|
||||
preferred_cpu = get_nohz_load_balancer();
|
||||
if (preferred_cpu >= 0)
|
||||
cpu = preferred_cpu;
|
||||
}
|
||||
#endif
|
||||
|
||||
again:
|
||||
new_cpu_base = &per_cpu(hrtimer_bases, cpu);
|
||||
new_base = &new_cpu_base->clock_base[base->index];
|
||||
|
||||
if (base != new_base) {
|
||||
@@ -218,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
|
||||
timer->base = NULL;
|
||||
spin_unlock(&base->cpu_base->lock);
|
||||
spin_lock(&new_base->cpu_base->lock);
|
||||
|
||||
/* Optimized away for NOHZ=n SMP=n */
|
||||
if (cpu == preferred_cpu) {
|
||||
/* Calculate clock monotonic expiry time */
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
|
||||
new_base->offset);
|
||||
#else
|
||||
ktime_t expires = hrtimer_get_expires(timer);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Get the next event on target cpu from the
|
||||
* clock events layer.
|
||||
* This covers the highres=off nohz=on case as well.
|
||||
*/
|
||||
ktime_t next = clockevents_get_next_event(cpu);
|
||||
|
||||
ktime_t delta = ktime_sub(expires, next);
|
||||
|
||||
/*
|
||||
* We do not migrate the timer when it is expiring
|
||||
* before the next event on the target cpu because
|
||||
* we cannot reprogram the target cpu hardware and
|
||||
* we would cause it to fire late.
|
||||
*/
|
||||
if (delta.tv64 < 0) {
|
||||
cpu = smp_processor_id();
|
||||
spin_unlock(&new_base->cpu_base->lock);
|
||||
spin_lock(&base->cpu_base->lock);
|
||||
timer->base = base;
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
timer->base = new_base;
|
||||
}
|
||||
return new_base;
|
||||
@@ -235,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
|
||||
return base;
|
||||
}
|
||||
|
||||
# define switch_hrtimer_base(t, b) (b)
|
||||
# define switch_hrtimer_base(t, b, p) (b)
|
||||
|
||||
#endif /* !CONFIG_SMP */
|
||||
|
||||
@@ -907,9 +955,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
|
||||
ret = remove_hrtimer(timer, base);
|
||||
|
||||
/* Switch the timer base, if necessary: */
|
||||
new_base = switch_hrtimer_base(timer, base);
|
||||
new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
|
||||
|
||||
if (mode == HRTIMER_MODE_REL) {
|
||||
if (mode & HRTIMER_MODE_REL) {
|
||||
tim = ktime_add_safe(tim, new_base->get_time());
|
||||
/*
|
||||
* CONFIG_TIME_LOW_RES is a temporary way for architectures
|
||||
|
||||
@@ -30,12 +30,16 @@
|
||||
#define all_var 0
|
||||
#endif
|
||||
|
||||
/* These will be re-linked against their real values during the second link stage */
|
||||
/*
|
||||
* These will be re-linked against their real values
|
||||
* during the second link stage.
|
||||
*/
|
||||
extern const unsigned long kallsyms_addresses[] __attribute__((weak));
|
||||
extern const u8 kallsyms_names[] __attribute__((weak));
|
||||
|
||||
/* tell the compiler that the count isn't in the small data section if the arch
|
||||
* has one (eg: FRV)
|
||||
/*
|
||||
* Tell the compiler that the count isn't in the small data section if the arch
|
||||
* has one (eg: FRV).
|
||||
*/
|
||||
extern const unsigned long kallsyms_num_syms
|
||||
__attribute__((weak, section(".rodata")));
|
||||
@@ -75,31 +79,37 @@ static int is_ksym_addr(unsigned long addr)
|
||||
return is_kernel_text(addr) || is_kernel_inittext(addr);
|
||||
}
|
||||
|
||||
/* expand a compressed symbol data into the resulting uncompressed string,
|
||||
given the offset to where the symbol is in the compressed stream */
|
||||
/*
|
||||
* Expand a compressed symbol data into the resulting uncompressed string,
|
||||
* given the offset to where the symbol is in the compressed stream.
|
||||
*/
|
||||
static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
|
||||
{
|
||||
int len, skipped_first = 0;
|
||||
const u8 *tptr, *data;
|
||||
|
||||
/* get the compressed symbol length from the first symbol byte */
|
||||
/* Get the compressed symbol length from the first symbol byte. */
|
||||
data = &kallsyms_names[off];
|
||||
len = *data;
|
||||
data++;
|
||||
|
||||
/* update the offset to return the offset for the next symbol on
|
||||
* the compressed stream */
|
||||
/*
|
||||
* Update the offset to return the offset for the next symbol on
|
||||
* the compressed stream.
|
||||
*/
|
||||
off += len + 1;
|
||||
|
||||
/* for every byte on the compressed symbol data, copy the table
|
||||
entry for that byte */
|
||||
while(len) {
|
||||
tptr = &kallsyms_token_table[ kallsyms_token_index[*data] ];
|
||||
/*
|
||||
* For every byte on the compressed symbol data, copy the table
|
||||
* entry for that byte.
|
||||
*/
|
||||
while (len) {
|
||||
tptr = &kallsyms_token_table[kallsyms_token_index[*data]];
|
||||
data++;
|
||||
len--;
|
||||
|
||||
while (*tptr) {
|
||||
if(skipped_first) {
|
||||
if (skipped_first) {
|
||||
*result = *tptr;
|
||||
result++;
|
||||
} else
|
||||
@@ -110,36 +120,46 @@ static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
|
||||
|
||||
*result = '\0';
|
||||
|
||||
/* return to offset to the next symbol */
|
||||
/* Return to offset to the next symbol. */
|
||||
return off;
|
||||
}
|
||||
|
||||
/* get symbol type information. This is encoded as a single char at the
|
||||
* begining of the symbol name */
|
||||
/*
|
||||
* Get symbol type information. This is encoded as a single char at the
|
||||
* beginning of the symbol name.
|
||||
*/
|
||||
static char kallsyms_get_symbol_type(unsigned int off)
|
||||
{
|
||||
/* get just the first code, look it up in the token table, and return the
|
||||
* first char from this token */
|
||||
return kallsyms_token_table[ kallsyms_token_index[ kallsyms_names[off+1] ] ];
|
||||
/*
|
||||
* Get just the first code, look it up in the token table,
|
||||
* and return the first char from this token.
|
||||
*/
|
||||
return kallsyms_token_table[kallsyms_token_index[kallsyms_names[off + 1]]];
|
||||
}
|
||||
|
||||
|
||||
/* find the offset on the compressed stream given and index in the
|
||||
* kallsyms array */
|
||||
/*
|
||||
* Find the offset on the compressed stream given and index in the
|
||||
* kallsyms array.
|
||||
*/
|
||||
static unsigned int get_symbol_offset(unsigned long pos)
|
||||
{
|
||||
const u8 *name;
|
||||
int i;
|
||||
|
||||
/* use the closest marker we have. We have markers every 256 positions,
|
||||
* so that should be close enough */
|
||||
name = &kallsyms_names[ kallsyms_markers[pos>>8] ];
|
||||
/*
|
||||
* Use the closest marker we have. We have markers every 256 positions,
|
||||
* so that should be close enough.
|
||||
*/
|
||||
name = &kallsyms_names[kallsyms_markers[pos >> 8]];
|
||||
|
||||
/* sequentially scan all the symbols up to the point we're searching for.
|
||||
* Every symbol is stored in a [<len>][<len> bytes of data] format, so we
|
||||
* just need to add the len to the current pointer for every symbol we
|
||||
* wish to skip */
|
||||
for(i = 0; i < (pos&0xFF); i++)
|
||||
/*
|
||||
* Sequentially scan all the symbols up to the point we're searching
|
||||
* for. Every symbol is stored in a [<len>][<len> bytes of data] format,
|
||||
* so we just need to add the len to the current pointer for every
|
||||
* symbol we wish to skip.
|
||||
*/
|
||||
for (i = 0; i < (pos & 0xFF); i++)
|
||||
name = name + (*name) + 1;
|
||||
|
||||
return name - kallsyms_names;
|
||||
@@ -190,7 +210,7 @@ static unsigned long get_symbol_pos(unsigned long addr,
|
||||
/* This kernel should never had been booted. */
|
||||
BUG_ON(!kallsyms_addresses);
|
||||
|
||||
/* do a binary search on the sorted kallsyms_addresses array */
|
||||
/* Do a binary search on the sorted kallsyms_addresses array. */
|
||||
low = 0;
|
||||
high = kallsyms_num_syms;
|
||||
|
||||
@@ -203,15 +223,15 @@ static unsigned long get_symbol_pos(unsigned long addr,
|
||||
}
|
||||
|
||||
/*
|
||||
* search for the first aliased symbol. Aliased
|
||||
* symbols are symbols with the same address
|
||||
* Search for the first aliased symbol. Aliased
|
||||
* symbols are symbols with the same address.
|
||||
*/
|
||||
while (low && kallsyms_addresses[low-1] == kallsyms_addresses[low])
|
||||
--low;
|
||||
|
||||
symbol_start = kallsyms_addresses[low];
|
||||
|
||||
/* Search for next non-aliased symbol */
|
||||
/* Search for next non-aliased symbol. */
|
||||
for (i = low + 1; i < kallsyms_num_syms; i++) {
|
||||
if (kallsyms_addresses[i] > symbol_start) {
|
||||
symbol_end = kallsyms_addresses[i];
|
||||
@@ -219,7 +239,7 @@ static unsigned long get_symbol_pos(unsigned long addr,
|
||||
}
|
||||
}
|
||||
|
||||
/* if we found no next symbol, we use the end of the section */
|
||||
/* If we found no next symbol, we use the end of the section. */
|
||||
if (!symbol_end) {
|
||||
if (is_kernel_inittext(addr))
|
||||
symbol_end = (unsigned long)_einittext;
|
||||
@@ -252,10 +272,10 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
|
||||
|
||||
/*
|
||||
* Lookup an address
|
||||
* - modname is set to NULL if it's in the kernel
|
||||
* - we guarantee that the returned name is valid until we reschedule even if
|
||||
* it resides in a module
|
||||
* - we also guarantee that modname will be valid until rescheduled
|
||||
* - modname is set to NULL if it's in the kernel.
|
||||
* - We guarantee that the returned name is valid until we reschedule even if.
|
||||
* It resides in a module.
|
||||
* - We also guarantee that modname will be valid until rescheduled.
|
||||
*/
|
||||
const char *kallsyms_lookup(unsigned long addr,
|
||||
unsigned long *symbolsize,
|
||||
@@ -276,7 +296,7 @@ const char *kallsyms_lookup(unsigned long addr,
|
||||
return namebuf;
|
||||
}
|
||||
|
||||
/* see if it's in a module */
|
||||
/* See if it's in a module. */
|
||||
return module_address_lookup(addr, symbolsize, offset, modname,
|
||||
namebuf);
|
||||
}
|
||||
@@ -294,7 +314,7 @@ int lookup_symbol_name(unsigned long addr, char *symname)
|
||||
kallsyms_expand_symbol(get_symbol_offset(pos), symname);
|
||||
return 0;
|
||||
}
|
||||
/* see if it's in a module */
|
||||
/* See if it's in a module. */
|
||||
return lookup_module_symbol_name(addr, symname);
|
||||
}
|
||||
|
||||
@@ -313,7 +333,7 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
|
||||
modname[0] = '\0';
|
||||
return 0;
|
||||
}
|
||||
/* see if it's in a module */
|
||||
/* See if it's in a module. */
|
||||
return lookup_module_symbol_attrs(addr, size, offset, modname, name);
|
||||
}
|
||||
|
||||
@@ -342,6 +362,7 @@ int sprint_symbol(char *buffer, unsigned long address)
|
||||
|
||||
return len;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sprint_symbol);
|
||||
|
||||
/* Look up a kernel symbol and print it to the kernel messages. */
|
||||
void __print_symbol(const char *fmt, unsigned long address)
|
||||
@@ -352,13 +373,13 @@ void __print_symbol(const char *fmt, unsigned long address)
|
||||
|
||||
printk(fmt, buffer);
|
||||
}
|
||||
EXPORT_SYMBOL(__print_symbol);
|
||||
|
||||
/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
|
||||
struct kallsym_iter
|
||||
{
|
||||
struct kallsym_iter {
|
||||
loff_t pos;
|
||||
unsigned long value;
|
||||
unsigned int nameoff; /* If iterating in core kernel symbols */
|
||||
unsigned int nameoff; /* If iterating in core kernel symbols. */
|
||||
char type;
|
||||
char name[KSYM_NAME_LEN];
|
||||
char module_name[MODULE_NAME_LEN];
|
||||
@@ -404,7 +425,7 @@ static int update_iter(struct kallsym_iter *iter, loff_t pos)
|
||||
iter->pos = pos;
|
||||
return get_ksymbol_mod(iter);
|
||||
}
|
||||
|
||||
|
||||
/* If we're not on the desired position, reset to new position. */
|
||||
if (pos != iter->pos)
|
||||
reset_iter(iter, pos);
|
||||
@@ -439,23 +460,25 @@ static int s_show(struct seq_file *m, void *p)
|
||||
{
|
||||
struct kallsym_iter *iter = m->private;
|
||||
|
||||
/* Some debugging symbols have no name. Ignore them. */
|
||||
/* Some debugging symbols have no name. Ignore them. */
|
||||
if (!iter->name[0])
|
||||
return 0;
|
||||
|
||||
if (iter->module_name[0]) {
|
||||
char type;
|
||||
|
||||
/* Label it "global" if it is exported,
|
||||
* "local" if not exported. */
|
||||
/*
|
||||
* Label it "global" if it is exported,
|
||||
* "local" if not exported.
|
||||
*/
|
||||
type = iter->exported ? toupper(iter->type) :
|
||||
tolower(iter->type);
|
||||
seq_printf(m, "%0*lx %c %s\t[%s]\n",
|
||||
(int)(2*sizeof(void*)),
|
||||
(int)(2 * sizeof(void *)),
|
||||
iter->value, type, iter->name, iter->module_name);
|
||||
} else
|
||||
seq_printf(m, "%0*lx %c %s\n",
|
||||
(int)(2*sizeof(void*)),
|
||||
(int)(2 * sizeof(void *)),
|
||||
iter->value, iter->type, iter->name);
|
||||
return 0;
|
||||
}
|
||||
@@ -469,9 +492,11 @@ static const struct seq_operations kallsyms_op = {
|
||||
|
||||
static int kallsyms_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
/* We keep iterator in m->private, since normal case is to
|
||||
/*
|
||||
* We keep iterator in m->private, since normal case is to
|
||||
* s_start from where we left off, so we avoid doing
|
||||
* using get_symbol_offset for every symbol */
|
||||
* using get_symbol_offset for every symbol.
|
||||
*/
|
||||
struct kallsym_iter *iter;
|
||||
int ret;
|
||||
|
||||
@@ -500,7 +525,4 @@ static int __init kallsyms_init(void)
|
||||
proc_create("kallsyms", 0444, NULL, &kallsyms_operations);
|
||||
return 0;
|
||||
}
|
||||
__initcall(kallsyms_init);
|
||||
|
||||
EXPORT_SYMBOL(__print_symbol);
|
||||
EXPORT_SYMBOL_GPL(sprint_symbol);
|
||||
device_initcall(kallsyms_init);
|
||||
|
||||
@@ -72,9 +72,9 @@ struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
|
||||
|
||||
/*
|
||||
* round up to the next power of 2, since our 'let the indices
|
||||
* wrap' tachnique works only in this case.
|
||||
* wrap' technique works only in this case.
|
||||
*/
|
||||
if (size & (size - 1)) {
|
||||
if (!is_power_of_2(size)) {
|
||||
BUG_ON(size > 0x80000000);
|
||||
size = roundup_pow_of_two(size);
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/cpuset.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/module.h>
|
||||
@@ -236,6 +237,7 @@ int kthreadd(void *unused)
|
||||
ignore_signals(tsk);
|
||||
set_user_nice(tsk, KTHREAD_NICE_LEVEL);
|
||||
set_cpus_allowed_ptr(tsk, cpu_all_mask);
|
||||
set_mems_allowed(node_possible_map);
|
||||
|
||||
current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
|
||||
|
||||
|
||||
@@ -2899,7 +2899,7 @@ void print_modules(void)
|
||||
struct module *mod;
|
||||
char buf[8];
|
||||
|
||||
printk("Modules linked in:");
|
||||
printk(KERN_DEFAULT "Modules linked in:");
|
||||
/* Most callers should already have preempt disabled, but make sure */
|
||||
preempt_disable();
|
||||
list_for_each_entry_rcu(mod, &modules, list)
|
||||
|
||||
@@ -34,7 +34,7 @@ static struct sysrq_key_op sysrq_poweroff_op = {
|
||||
.handler = handle_poweroff,
|
||||
.help_msg = "powerOff",
|
||||
.action_msg = "Power Off",
|
||||
.enable_mask = SYSRQ_ENABLE_BOOT,
|
||||
.enable_mask = SYSRQ_ENABLE_BOOT,
|
||||
};
|
||||
|
||||
static int pm_sysrq_init(void)
|
||||
|
||||
@@ -117,9 +117,12 @@ int freeze_processes(void)
|
||||
if (error)
|
||||
goto Exit;
|
||||
printk("done.");
|
||||
|
||||
oom_killer_disable();
|
||||
Exit:
|
||||
BUG_ON(in_atomic());
|
||||
printk("\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -145,6 +148,8 @@ static void thaw_tasks(bool nosig_only)
|
||||
|
||||
void thaw_processes(void)
|
||||
{
|
||||
oom_killer_enable();
|
||||
|
||||
printk("Restarting tasks ... ");
|
||||
thaw_tasks(true);
|
||||
thaw_tasks(false);
|
||||
|
||||
@@ -687,20 +687,35 @@ asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
sizeof(printk_buf) - printed_len, fmt, args);
|
||||
|
||||
|
||||
p = printk_buf;
|
||||
|
||||
/* Do we have a loglevel in the string? */
|
||||
if (p[0] == '<') {
|
||||
unsigned char c = p[1];
|
||||
if (c && p[2] == '>') {
|
||||
switch (c) {
|
||||
case '0' ... '7': /* loglevel */
|
||||
current_log_level = c - '0';
|
||||
/* Fallthrough - make sure we're on a new line */
|
||||
case 'd': /* KERN_DEFAULT */
|
||||
if (!new_text_line) {
|
||||
emit_log_char('\n');
|
||||
new_text_line = 1;
|
||||
}
|
||||
/* Fallthrough - skip the loglevel */
|
||||
case 'c': /* KERN_CONT */
|
||||
p += 3;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy the output into log_buf. If the caller didn't provide
|
||||
* appropriate log level tags, we insert them here
|
||||
*/
|
||||
for (p = printk_buf; *p; p++) {
|
||||
for ( ; *p; p++) {
|
||||
if (new_text_line) {
|
||||
/* If a token, set current_log_level and skip over */
|
||||
if (p[0] == '<' && p[1] >= '0' && p[1] <= '7' &&
|
||||
p[2] == '>') {
|
||||
current_log_level = p[1] - '0';
|
||||
p += 3;
|
||||
printed_len -= 3;
|
||||
}
|
||||
|
||||
/* Always output the token */
|
||||
emit_log_char('<');
|
||||
emit_log_char(current_log_level + '0');
|
||||
|
||||
@@ -365,7 +365,7 @@ static int __cpuinit profile_cpu_callback(struct notifier_block *info,
|
||||
node = cpu_to_node(cpu);
|
||||
per_cpu(cpu_profile_flip, cpu) = 0;
|
||||
if (!per_cpu(cpu_profile_hits, cpu)[1]) {
|
||||
page = alloc_pages_node(node,
|
||||
page = alloc_pages_exact_node(node,
|
||||
GFP_KERNEL | __GFP_ZERO,
|
||||
0);
|
||||
if (!page)
|
||||
@@ -373,7 +373,7 @@ static int __cpuinit profile_cpu_callback(struct notifier_block *info,
|
||||
per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
|
||||
}
|
||||
if (!per_cpu(cpu_profile_hits, cpu)[0]) {
|
||||
page = alloc_pages_node(node,
|
||||
page = alloc_pages_exact_node(node,
|
||||
GFP_KERNEL | __GFP_ZERO,
|
||||
0);
|
||||
if (!page)
|
||||
@@ -564,14 +564,14 @@ static int create_hash_tables(void)
|
||||
int node = cpu_to_node(cpu);
|
||||
struct page *page;
|
||||
|
||||
page = alloc_pages_node(node,
|
||||
page = alloc_pages_exact_node(node,
|
||||
GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
|
||||
0);
|
||||
if (!page)
|
||||
goto out_cleanup;
|
||||
per_cpu(cpu_profile_hits, cpu)[1]
|
||||
= (struct profile_hit *)page_address(page);
|
||||
page = alloc_pages_node(node,
|
||||
page = alloc_pages_exact_node(node,
|
||||
GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
|
||||
0);
|
||||
if (!page)
|
||||
|
||||
@@ -902,7 +902,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
|
||||
* Returns:
|
||||
* 0 on success
|
||||
* -EINTR when interrupted by a signal
|
||||
* -ETIMEOUT when the timeout expired
|
||||
* -ETIMEDOUT when the timeout expired
|
||||
* -EDEADLK when the lock would deadlock (when deadlock detection is on)
|
||||
*/
|
||||
int
|
||||
|
||||
@@ -240,7 +240,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
|
||||
hard = hrtimer_get_expires(&rt_b->rt_period_timer);
|
||||
delta = ktime_to_ns(ktime_sub(hard, soft));
|
||||
__hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
|
||||
HRTIMER_MODE_ABS, 0);
|
||||
HRTIMER_MODE_ABS_PINNED, 0);
|
||||
}
|
||||
spin_unlock(&rt_b->rt_runtime_lock);
|
||||
}
|
||||
@@ -1155,7 +1155,7 @@ static __init void init_hrtick(void)
|
||||
static void hrtick_start(struct rq *rq, u64 delay)
|
||||
{
|
||||
__hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
|
||||
HRTIMER_MODE_REL, 0);
|
||||
HRTIMER_MODE_REL_PINNED, 0);
|
||||
}
|
||||
|
||||
static inline void init_hrtick(void)
|
||||
@@ -4397,6 +4397,11 @@ static struct {
|
||||
.load_balancer = ATOMIC_INIT(-1),
|
||||
};
|
||||
|
||||
int get_nohz_load_balancer(void)
|
||||
{
|
||||
return atomic_read(&nohz.load_balancer);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
|
||||
/**
|
||||
* lowest_flag_domain - Return lowest sched_domain containing flag.
|
||||
@@ -9029,6 +9034,8 @@ void __init sched_init_smp(void)
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
const_debug unsigned int sysctl_timer_migration = 1;
|
||||
|
||||
int in_sched_functions(unsigned long addr)
|
||||
{
|
||||
return in_lock_functions(addr) ||
|
||||
|
||||
@@ -832,6 +832,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
|
||||
{
|
||||
struct sigpending *pending;
|
||||
struct sigqueue *q;
|
||||
int override_rlimit;
|
||||
|
||||
trace_sched_signal_send(sig, t);
|
||||
|
||||
@@ -863,9 +864,13 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
|
||||
make sure at least one signal gets delivered and don't
|
||||
pass on the info struct. */
|
||||
|
||||
q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
|
||||
(is_si_special(info) ||
|
||||
info->si_code >= 0)));
|
||||
if (sig < SIGRTMIN)
|
||||
override_rlimit = (is_si_special(info) || info->si_code >= 0);
|
||||
else
|
||||
override_rlimit = 0;
|
||||
|
||||
q = __sigqueue_alloc(t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE,
|
||||
override_rlimit);
|
||||
if (q) {
|
||||
list_add_tail(&q->list, &pending->list);
|
||||
switch ((unsigned long) info) {
|
||||
|
||||
@@ -318,6 +318,15 @@ cant_get_ref:
|
||||
}
|
||||
EXPORT_SYMBOL(slow_work_enqueue);
|
||||
|
||||
/*
|
||||
* Schedule a cull of the thread pool at some time in the near future
|
||||
*/
|
||||
static void slow_work_schedule_cull(void)
|
||||
{
|
||||
mod_timer(&slow_work_cull_timer,
|
||||
round_jiffies(jiffies + SLOW_WORK_CULL_TIMEOUT));
|
||||
}
|
||||
|
||||
/*
|
||||
* Worker thread culling algorithm
|
||||
*/
|
||||
@@ -335,8 +344,7 @@ static bool slow_work_cull_thread(void)
|
||||
list_empty(&vslow_work_queue) &&
|
||||
atomic_read(&slow_work_thread_count) >
|
||||
slow_work_min_threads) {
|
||||
mod_timer(&slow_work_cull_timer,
|
||||
jiffies + SLOW_WORK_CULL_TIMEOUT);
|
||||
slow_work_schedule_cull();
|
||||
do_cull = true;
|
||||
}
|
||||
}
|
||||
@@ -393,8 +401,7 @@ static int slow_work_thread(void *_data)
|
||||
list_empty(&vslow_work_queue) &&
|
||||
atomic_read(&slow_work_thread_count) >
|
||||
slow_work_min_threads)
|
||||
mod_timer(&slow_work_cull_timer,
|
||||
jiffies + SLOW_WORK_CULL_TIMEOUT);
|
||||
slow_work_schedule_cull();
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -458,7 +465,7 @@ static void slow_work_new_thread_execute(struct slow_work *work)
|
||||
if (atomic_dec_and_test(&slow_work_thread_count))
|
||||
BUG(); /* we're running on a slow work thread... */
|
||||
mod_timer(&slow_work_oom_timer,
|
||||
jiffies + SLOW_WORK_OOM_TIMEOUT);
|
||||
round_jiffies(jiffies + SLOW_WORK_OOM_TIMEOUT));
|
||||
} else {
|
||||
/* ratelimit the starting of new threads */
|
||||
mod_timer(&slow_work_oom_timer, jiffies + 1);
|
||||
@@ -502,8 +509,7 @@ static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
|
||||
if (n < 0 && !slow_work_may_not_start_new_thread)
|
||||
slow_work_enqueue(&slow_work_new_thread);
|
||||
else if (n > 0)
|
||||
mod_timer(&slow_work_cull_timer,
|
||||
jiffies + SLOW_WORK_CULL_TIMEOUT);
|
||||
slow_work_schedule_cull();
|
||||
}
|
||||
mutex_unlock(&slow_work_user_lock);
|
||||
}
|
||||
@@ -529,8 +535,7 @@ static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
|
||||
atomic_read(&slow_work_thread_count);
|
||||
|
||||
if (n < 0)
|
||||
mod_timer(&slow_work_cull_timer,
|
||||
jiffies + SLOW_WORK_CULL_TIMEOUT);
|
||||
slow_work_schedule_cull();
|
||||
}
|
||||
mutex_unlock(&slow_work_user_lock);
|
||||
}
|
||||
|
||||
@@ -382,6 +382,17 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
|
||||
|
||||
EXPORT_SYMBOL(__tasklet_hi_schedule);
|
||||
|
||||
void __tasklet_hi_schedule_first(struct tasklet_struct *t)
|
||||
{
|
||||
BUG_ON(!irqs_disabled());
|
||||
|
||||
t->next = __get_cpu_var(tasklet_hi_vec).head;
|
||||
__get_cpu_var(tasklet_hi_vec).head = t;
|
||||
__raise_softirq_irqoff(HI_SOFTIRQ);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(__tasklet_hi_schedule_first);
|
||||
|
||||
static void tasklet_action(struct softirq_action *a)
|
||||
{
|
||||
struct tasklet_struct *list;
|
||||
|
||||
283
kernel/sys.c
283
kernel/sys.c
@@ -1113,289 +1113,6 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Supplementary group IDs
|
||||
*/
|
||||
|
||||
/* init to 2 - one for init_task, one to ensure it is never freed */
|
||||
struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
|
||||
|
||||
struct group_info *groups_alloc(int gidsetsize)
|
||||
{
|
||||
struct group_info *group_info;
|
||||
int nblocks;
|
||||
int i;
|
||||
|
||||
nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK;
|
||||
/* Make sure we always allocate at least one indirect block pointer */
|
||||
nblocks = nblocks ? : 1;
|
||||
group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER);
|
||||
if (!group_info)
|
||||
return NULL;
|
||||
group_info->ngroups = gidsetsize;
|
||||
group_info->nblocks = nblocks;
|
||||
atomic_set(&group_info->usage, 1);
|
||||
|
||||
if (gidsetsize <= NGROUPS_SMALL)
|
||||
group_info->blocks[0] = group_info->small_block;
|
||||
else {
|
||||
for (i = 0; i < nblocks; i++) {
|
||||
gid_t *b;
|
||||
b = (void *)__get_free_page(GFP_USER);
|
||||
if (!b)
|
||||
goto out_undo_partial_alloc;
|
||||
group_info->blocks[i] = b;
|
||||
}
|
||||
}
|
||||
return group_info;
|
||||
|
||||
out_undo_partial_alloc:
|
||||
while (--i >= 0) {
|
||||
free_page((unsigned long)group_info->blocks[i]);
|
||||
}
|
||||
kfree(group_info);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(groups_alloc);
|
||||
|
||||
void groups_free(struct group_info *group_info)
|
||||
{
|
||||
if (group_info->blocks[0] != group_info->small_block) {
|
||||
int i;
|
||||
for (i = 0; i < group_info->nblocks; i++)
|
||||
free_page((unsigned long)group_info->blocks[i]);
|
||||
}
|
||||
kfree(group_info);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(groups_free);
|
||||
|
||||
/* export the group_info to a user-space array */
|
||||
static int groups_to_user(gid_t __user *grouplist,
|
||||
const struct group_info *group_info)
|
||||
{
|
||||
int i;
|
||||
unsigned int count = group_info->ngroups;
|
||||
|
||||
for (i = 0; i < group_info->nblocks; i++) {
|
||||
unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
|
||||
unsigned int len = cp_count * sizeof(*grouplist);
|
||||
|
||||
if (copy_to_user(grouplist, group_info->blocks[i], len))
|
||||
return -EFAULT;
|
||||
|
||||
grouplist += NGROUPS_PER_BLOCK;
|
||||
count -= cp_count;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* fill a group_info from a user-space array - it must be allocated already */
|
||||
static int groups_from_user(struct group_info *group_info,
|
||||
gid_t __user *grouplist)
|
||||
{
|
||||
int i;
|
||||
unsigned int count = group_info->ngroups;
|
||||
|
||||
for (i = 0; i < group_info->nblocks; i++) {
|
||||
unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
|
||||
unsigned int len = cp_count * sizeof(*grouplist);
|
||||
|
||||
if (copy_from_user(group_info->blocks[i], grouplist, len))
|
||||
return -EFAULT;
|
||||
|
||||
grouplist += NGROUPS_PER_BLOCK;
|
||||
count -= cp_count;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* a simple Shell sort */
|
||||
static void groups_sort(struct group_info *group_info)
|
||||
{
|
||||
int base, max, stride;
|
||||
int gidsetsize = group_info->ngroups;
|
||||
|
||||
for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
|
||||
; /* nothing */
|
||||
stride /= 3;
|
||||
|
||||
while (stride) {
|
||||
max = gidsetsize - stride;
|
||||
for (base = 0; base < max; base++) {
|
||||
int left = base;
|
||||
int right = left + stride;
|
||||
gid_t tmp = GROUP_AT(group_info, right);
|
||||
|
||||
while (left >= 0 && GROUP_AT(group_info, left) > tmp) {
|
||||
GROUP_AT(group_info, right) =
|
||||
GROUP_AT(group_info, left);
|
||||
right = left;
|
||||
left -= stride;
|
||||
}
|
||||
GROUP_AT(group_info, right) = tmp;
|
||||
}
|
||||
stride /= 3;
|
||||
}
|
||||
}
|
||||
|
||||
/* a simple bsearch */
|
||||
int groups_search(const struct group_info *group_info, gid_t grp)
|
||||
{
|
||||
unsigned int left, right;
|
||||
|
||||
if (!group_info)
|
||||
return 0;
|
||||
|
||||
left = 0;
|
||||
right = group_info->ngroups;
|
||||
while (left < right) {
|
||||
unsigned int mid = (left+right)/2;
|
||||
int cmp = grp - GROUP_AT(group_info, mid);
|
||||
if (cmp > 0)
|
||||
left = mid + 1;
|
||||
else if (cmp < 0)
|
||||
right = mid;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* set_groups - Change a group subscription in a set of credentials
|
||||
* @new: The newly prepared set of credentials to alter
|
||||
* @group_info: The group list to install
|
||||
*
|
||||
* Validate a group subscription and, if valid, insert it into a set
|
||||
* of credentials.
|
||||
*/
|
||||
int set_groups(struct cred *new, struct group_info *group_info)
|
||||
{
|
||||
int retval;
|
||||
|
||||
retval = security_task_setgroups(group_info);
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
put_group_info(new->group_info);
|
||||
groups_sort(group_info);
|
||||
get_group_info(group_info);
|
||||
new->group_info = group_info;
|
||||
return 0;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(set_groups);
|
||||
|
||||
/**
|
||||
* set_current_groups - Change current's group subscription
|
||||
* @group_info: The group list to impose
|
||||
*
|
||||
* Validate a group subscription and, if valid, impose it upon current's task
|
||||
* security record.
|
||||
*/
|
||||
int set_current_groups(struct group_info *group_info)
|
||||
{
|
||||
struct cred *new;
|
||||
int ret;
|
||||
|
||||
new = prepare_creds();
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = set_groups(new, group_info);
|
||||
if (ret < 0) {
|
||||
abort_creds(new);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return commit_creds(new);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(set_current_groups);
|
||||
|
||||
SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist)
|
||||
{
|
||||
const struct cred *cred = current_cred();
|
||||
int i;
|
||||
|
||||
if (gidsetsize < 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* no need to grab task_lock here; it cannot change */
|
||||
i = cred->group_info->ngroups;
|
||||
if (gidsetsize) {
|
||||
if (i > gidsetsize) {
|
||||
i = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (groups_to_user(grouplist, cred->group_info)) {
|
||||
i = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out:
|
||||
return i;
|
||||
}
|
||||
|
||||
/*
|
||||
* SMP: Our groups are copy-on-write. We can set them safely
|
||||
* without another task interfering.
|
||||
*/
|
||||
|
||||
SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
|
||||
{
|
||||
struct group_info *group_info;
|
||||
int retval;
|
||||
|
||||
if (!capable(CAP_SETGID))
|
||||
return -EPERM;
|
||||
if ((unsigned)gidsetsize > NGROUPS_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
group_info = groups_alloc(gidsetsize);
|
||||
if (!group_info)
|
||||
return -ENOMEM;
|
||||
retval = groups_from_user(group_info, grouplist);
|
||||
if (retval) {
|
||||
put_group_info(group_info);
|
||||
return retval;
|
||||
}
|
||||
|
||||
retval = set_current_groups(group_info);
|
||||
put_group_info(group_info);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether we're fsgid/egid or in the supplemental group..
|
||||
*/
|
||||
int in_group_p(gid_t grp)
|
||||
{
|
||||
const struct cred *cred = current_cred();
|
||||
int retval = 1;
|
||||
|
||||
if (grp != cred->fsgid)
|
||||
retval = groups_search(cred->group_info, grp);
|
||||
return retval;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(in_group_p);
|
||||
|
||||
int in_egroup_p(gid_t grp)
|
||||
{
|
||||
const struct cred *cred = current_cred();
|
||||
int retval = 1;
|
||||
|
||||
if (grp != cred->egid)
|
||||
retval = groups_search(cred->group_info, grp);
|
||||
return retval;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(in_egroup_p);
|
||||
|
||||
DECLARE_RWSEM(uts_sem);
|
||||
|
||||
SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include <linux/security.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/kmemcheck.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/init.h>
|
||||
@@ -328,6 +329,14 @@ static struct ctl_table kern_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "timer_migration",
|
||||
.data = &sysctl_timer_migration,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
@@ -959,6 +968,17 @@ static struct ctl_table kern_table[] = {
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_KMEMCHECK
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "kmemcheck",
|
||||
.data = &kmemcheck_enabled,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
|
||||
/*
|
||||
* NOTE: do not add new entries to this table unless you have read
|
||||
* Documentation/sysctl/ctl_unnumbered.txt
|
||||
@@ -1317,7 +1337,6 @@ static struct ctl_table vm_table[] = {
|
||||
.extra2 = &one,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_UNEVICTABLE_LRU
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "scan_unevictable_pages",
|
||||
@@ -1326,7 +1345,6 @@ static struct ctl_table vm_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = &scan_unevictable_handler,
|
||||
},
|
||||
#endif
|
||||
/*
|
||||
* NOTE: do not add new entries to this table unless you have read
|
||||
* Documentation/sysctl/ctl_unnumbered.txt
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/sysdev.h>
|
||||
#include <linux/tick.h>
|
||||
|
||||
/* The registered clock event devices */
|
||||
static LIST_HEAD(clockevent_devices);
|
||||
@@ -54,6 +55,7 @@ unsigned long clockevent_delta2ns(unsigned long latch,
|
||||
|
||||
return (unsigned long) clc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(clockevent_delta2ns);
|
||||
|
||||
/**
|
||||
* clockevents_set_mode - set the operating mode of a clock event device
|
||||
@@ -187,6 +189,7 @@ void clockevents_register_device(struct clock_event_device *dev)
|
||||
|
||||
spin_unlock(&clockevents_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(clockevents_register_device);
|
||||
|
||||
/*
|
||||
* Noop handler when we shut down an event device
|
||||
@@ -251,4 +254,15 @@ void clockevents_notify(unsigned long reason, void *arg)
|
||||
spin_unlock(&clockevents_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(clockevents_notify);
|
||||
|
||||
ktime_t clockevents_get_next_event(int cpu)
|
||||
{
|
||||
struct tick_device *td;
|
||||
struct clock_event_device *dev;
|
||||
|
||||
td = &per_cpu(tick_cpu_device, cpu);
|
||||
dev = td->evtdev;
|
||||
|
||||
return dev->next_event;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -509,6 +509,18 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to make sure we don't switch to a non-highres capable
|
||||
* clocksource if the tick code is in oneshot mode (highres or nohz)
|
||||
*/
|
||||
if (tick_oneshot_mode_active() &&
|
||||
!(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) {
|
||||
printk(KERN_WARNING "%s clocksource is not HRT compatible. "
|
||||
"Cannot switch while in HRT/NOHZ mode\n", ovr->name);
|
||||
ovr = NULL;
|
||||
override_name[0] = 0;
|
||||
}
|
||||
|
||||
/* Reselect, when the override name has changed */
|
||||
if (ovr != clocksource_override) {
|
||||
clocksource_override = ovr;
|
||||
@@ -537,7 +549,13 @@ sysfs_show_available_clocksources(struct sys_device *dev,
|
||||
|
||||
spin_lock_irq(&clocksource_lock);
|
||||
list_for_each_entry(src, &clocksource_list, list) {
|
||||
count += snprintf(buf + count,
|
||||
/*
|
||||
* Don't show non-HRES clocksource if the tick code is
|
||||
* in one shot mode (highres=on or nohz=on)
|
||||
*/
|
||||
if (!tick_oneshot_mode_active() ||
|
||||
(src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
|
||||
count += snprintf(buf + count,
|
||||
max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
|
||||
"%s ", src->name);
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
* timer stops in C3 state.
|
||||
*/
|
||||
|
||||
struct tick_device tick_broadcast_device;
|
||||
static struct tick_device tick_broadcast_device;
|
||||
/* FIXME: Use cpumask_var_t. */
|
||||
static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
|
||||
static DECLARE_BITMAP(tmpmask, NR_CPUS);
|
||||
|
||||
@@ -128,6 +128,23 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* tick_check_oneshot_mode - check whether the system is in oneshot mode
|
||||
*
|
||||
* returns 1 when either nohz or highres are enabled. otherwise 0.
|
||||
*/
|
||||
int tick_oneshot_mode_active(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
local_irq_save(flags);
|
||||
ret = __get_cpu_var(tick_cpu_device).mode == TICKDEV_MODE_ONESHOT;
|
||||
local_irq_restore(flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
/**
|
||||
* tick_init_highres - switch to high resolution mode
|
||||
|
||||
@@ -349,7 +349,7 @@ void tick_nohz_stop_sched_tick(int inidle)
|
||||
|
||||
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
|
||||
hrtimer_start(&ts->sched_timer, expires,
|
||||
HRTIMER_MODE_ABS);
|
||||
HRTIMER_MODE_ABS_PINNED);
|
||||
/* Check, if the timer was already in the past */
|
||||
if (hrtimer_active(&ts->sched_timer))
|
||||
goto out;
|
||||
@@ -395,7 +395,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
|
||||
|
||||
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
|
||||
hrtimer_start_expires(&ts->sched_timer,
|
||||
HRTIMER_MODE_ABS);
|
||||
HRTIMER_MODE_ABS_PINNED);
|
||||
/* Check, if the timer was already in the past */
|
||||
if (hrtimer_active(&ts->sched_timer))
|
||||
break;
|
||||
@@ -698,7 +698,8 @@ void tick_setup_sched_timer(void)
|
||||
|
||||
for (;;) {
|
||||
hrtimer_forward(&ts->sched_timer, now, tick_period);
|
||||
hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS);
|
||||
hrtimer_start_expires(&ts->sched_timer,
|
||||
HRTIMER_MODE_ABS_PINNED);
|
||||
/* Check, if the timer was already in the past */
|
||||
if (hrtimer_active(&ts->sched_timer))
|
||||
break;
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
#include <linux/tick.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/perf_counter.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/unistd.h>
|
||||
@@ -605,13 +606,12 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
|
||||
}
|
||||
|
||||
static inline int
|
||||
__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
|
||||
__mod_timer(struct timer_list *timer, unsigned long expires,
|
||||
bool pending_only, int pinned)
|
||||
{
|
||||
struct tvec_base *base, *new_base;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
ret = 0;
|
||||
int ret = 0 , cpu;
|
||||
|
||||
timer_stats_timer_set_start_info(timer);
|
||||
BUG_ON(!timer->function);
|
||||
@@ -630,6 +630,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
|
||||
|
||||
new_base = __get_cpu_var(tvec_bases);
|
||||
|
||||
cpu = smp_processor_id();
|
||||
|
||||
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
|
||||
if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
|
||||
int preferred_cpu = get_nohz_load_balancer();
|
||||
|
||||
if (preferred_cpu >= 0)
|
||||
cpu = preferred_cpu;
|
||||
}
|
||||
#endif
|
||||
new_base = per_cpu(tvec_bases, cpu);
|
||||
|
||||
if (base != new_base) {
|
||||
/*
|
||||
* We are trying to schedule the timer on the local CPU.
|
||||
@@ -669,7 +681,7 @@ out_unlock:
|
||||
*/
|
||||
int mod_timer_pending(struct timer_list *timer, unsigned long expires)
|
||||
{
|
||||
return __mod_timer(timer, expires, true);
|
||||
return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
|
||||
}
|
||||
EXPORT_SYMBOL(mod_timer_pending);
|
||||
|
||||
@@ -703,10 +715,32 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
|
||||
if (timer->expires == expires && timer_pending(timer))
|
||||
return 1;
|
||||
|
||||
return __mod_timer(timer, expires, false);
|
||||
return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
|
||||
}
|
||||
EXPORT_SYMBOL(mod_timer);
|
||||
|
||||
/**
|
||||
* mod_timer_pinned - modify a timer's timeout
|
||||
* @timer: the timer to be modified
|
||||
* @expires: new timeout in jiffies
|
||||
*
|
||||
* mod_timer_pinned() is a way to update the expire field of an
|
||||
* active timer (if the timer is inactive it will be activated)
|
||||
* and not allow the timer to be migrated to a different CPU.
|
||||
*
|
||||
* mod_timer_pinned(timer, expires) is equivalent to:
|
||||
*
|
||||
* del_timer(timer); timer->expires = expires; add_timer(timer);
|
||||
*/
|
||||
int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
|
||||
{
|
||||
if (timer->expires == expires && timer_pending(timer))
|
||||
return 1;
|
||||
|
||||
return __mod_timer(timer, expires, false, TIMER_PINNED);
|
||||
}
|
||||
EXPORT_SYMBOL(mod_timer_pinned);
|
||||
|
||||
/**
|
||||
* add_timer - start a timer
|
||||
* @timer: the timer to be added
|
||||
@@ -757,6 +791,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
|
||||
wake_up_idle_cpu(cpu);
|
||||
spin_unlock_irqrestore(&base->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(add_timer_on);
|
||||
|
||||
/**
|
||||
* del_timer - deactive a timer.
|
||||
@@ -1016,6 +1051,9 @@ cascade:
|
||||
index = slot = timer_jiffies & TVN_MASK;
|
||||
do {
|
||||
list_for_each_entry(nte, varp->vec + slot, entry) {
|
||||
if (tbase_get_deferrable(nte->base))
|
||||
continue;
|
||||
|
||||
found = 1;
|
||||
if (time_before(nte->expires, expires))
|
||||
expires = nte->expires;
|
||||
@@ -1306,7 +1344,7 @@ signed long __sched schedule_timeout(signed long timeout)
|
||||
expire = timeout + jiffies;
|
||||
|
||||
setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
|
||||
__mod_timer(&timer, expire, false);
|
||||
__mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
|
||||
schedule();
|
||||
del_singleshot_timer_sync(&timer);
|
||||
|
||||
|
||||
@@ -147,7 +147,7 @@ config IRQSOFF_TRACER
|
||||
disabled by default and can be runtime (re-)started
|
||||
via:
|
||||
|
||||
echo 0 > /debugfs/tracing/tracing_max_latency
|
||||
echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
|
||||
|
||||
(Note that kernel size and overhead increases with this option
|
||||
enabled. This option and the preempt-off timing option can be
|
||||
@@ -168,7 +168,7 @@ config PREEMPT_TRACER
|
||||
disabled by default and can be runtime (re-)started
|
||||
via:
|
||||
|
||||
echo 0 > /debugfs/tracing/tracing_max_latency
|
||||
echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
|
||||
|
||||
(Note that kernel size and overhead increases with this option
|
||||
enabled. This option and the irqs-off timing option can be
|
||||
@@ -261,7 +261,7 @@ config PROFILE_ANNOTATED_BRANCHES
|
||||
This tracer profiles all the the likely and unlikely macros
|
||||
in the kernel. It will display the results in:
|
||||
|
||||
/debugfs/tracing/profile_annotated_branch
|
||||
/sys/kernel/debug/tracing/profile_annotated_branch
|
||||
|
||||
Note: this will add a significant overhead, only turn this
|
||||
on if you need to profile the system's use of these macros.
|
||||
@@ -274,7 +274,7 @@ config PROFILE_ALL_BRANCHES
|
||||
taken in the kernel is recorded whether it hit or miss.
|
||||
The results will be displayed in:
|
||||
|
||||
/debugfs/tracing/profile_branch
|
||||
/sys/kernel/debug/tracing/profile_branch
|
||||
|
||||
This option also enables the likely/unlikely profiler.
|
||||
|
||||
@@ -323,7 +323,7 @@ config STACK_TRACER
|
||||
select KALLSYMS
|
||||
help
|
||||
This special tracer records the maximum stack footprint of the
|
||||
kernel and displays it in debugfs/tracing/stack_trace.
|
||||
kernel and displays it in /sys/kernel/debug/tracing/stack_trace.
|
||||
|
||||
This tracer works by hooking into every function call that the
|
||||
kernel executes, and keeping a maximum stack depth value and
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/kmemcheck.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/mutex.h>
|
||||
@@ -1270,6 +1271,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
if (tail < BUF_PAGE_SIZE) {
|
||||
/* Mark the rest of the page with padding */
|
||||
event = __rb_page_index(tail_page, tail);
|
||||
kmemcheck_annotate_bitfield(event, bitfield);
|
||||
rb_event_set_padding(event);
|
||||
}
|
||||
|
||||
@@ -1327,6 +1329,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
return NULL;
|
||||
|
||||
event = __rb_page_index(tail_page, tail);
|
||||
kmemcheck_annotate_bitfield(event, bitfield);
|
||||
rb_update_event(event, type, length);
|
||||
|
||||
/* The passed in type is zero for DATA */
|
||||
|
||||
@@ -344,7 +344,7 @@ static raw_spinlock_t ftrace_max_lock =
|
||||
/*
|
||||
* Copy the new maximum trace into the separate maximum-trace
|
||||
* structure. (this way the maximum trace is permanently saved,
|
||||
* for later retrieval via /debugfs/tracing/latency_trace)
|
||||
* for later retrieval via /sys/kernel/debug/tracing/latency_trace)
|
||||
*/
|
||||
static void
|
||||
__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
|
||||
@@ -2414,21 +2414,20 @@ static const struct file_operations tracing_iter_fops = {
|
||||
|
||||
static const char readme_msg[] =
|
||||
"tracing mini-HOWTO:\n\n"
|
||||
"# mkdir /debug\n"
|
||||
"# mount -t debugfs nodev /debug\n\n"
|
||||
"# cat /debug/tracing/available_tracers\n"
|
||||
"# mount -t debugfs nodev /sys/kernel/debug\n\n"
|
||||
"# cat /sys/kernel/debug/tracing/available_tracers\n"
|
||||
"wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
|
||||
"# cat /debug/tracing/current_tracer\n"
|
||||
"# cat /sys/kernel/debug/tracing/current_tracer\n"
|
||||
"nop\n"
|
||||
"# echo sched_switch > /debug/tracing/current_tracer\n"
|
||||
"# cat /debug/tracing/current_tracer\n"
|
||||
"# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n"
|
||||
"# cat /sys/kernel/debug/tracing/current_tracer\n"
|
||||
"sched_switch\n"
|
||||
"# cat /debug/tracing/trace_options\n"
|
||||
"# cat /sys/kernel/debug/tracing/trace_options\n"
|
||||
"noprint-parent nosym-offset nosym-addr noverbose\n"
|
||||
"# echo print-parent > /debug/tracing/trace_options\n"
|
||||
"# echo 1 > /debug/tracing/tracing_enabled\n"
|
||||
"# cat /debug/tracing/trace > /tmp/trace.txt\n"
|
||||
"# echo 0 > /debug/tracing/tracing_enabled\n"
|
||||
"# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
|
||||
"# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n"
|
||||
"# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n"
|
||||
"# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n"
|
||||
;
|
||||
|
||||
static ssize_t
|
||||
|
||||
@@ -203,7 +203,8 @@ static void start_stack_timer(void *unused)
|
||||
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
hrtimer->function = stack_trace_timer_fn;
|
||||
|
||||
hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
|
||||
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
|
||||
HRTIMER_MODE_REL_PINNED);
|
||||
}
|
||||
|
||||
static void start_stack_timers(void)
|
||||
|
||||
@@ -75,21 +75,6 @@ static void uid_hash_remove(struct user_struct *up)
|
||||
put_user_ns(up->user_ns);
|
||||
}
|
||||
|
||||
static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
|
||||
{
|
||||
struct user_struct *user;
|
||||
struct hlist_node *h;
|
||||
|
||||
hlist_for_each_entry(user, h, hashent, uidhash_node) {
|
||||
if (user->uid == uid) {
|
||||
atomic_inc(&user->__count);
|
||||
return user;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_USER_SCHED
|
||||
|
||||
static void sched_destroy_user(struct user_struct *up)
|
||||
@@ -119,6 +104,23 @@ static int sched_create_user(struct user_struct *up) { return 0; }
|
||||
|
||||
#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS)
|
||||
|
||||
static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
|
||||
{
|
||||
struct user_struct *user;
|
||||
struct hlist_node *h;
|
||||
|
||||
hlist_for_each_entry(user, h, hashent, uidhash_node) {
|
||||
if (user->uid == uid) {
|
||||
/* possibly resurrect an "almost deleted" object */
|
||||
if (atomic_inc_return(&user->__count) == 1)
|
||||
cancel_delayed_work(&user->work);
|
||||
return user;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */
|
||||
static DEFINE_MUTEX(uids_mutex);
|
||||
|
||||
@@ -283,12 +285,12 @@ int __init uids_sysfs_init(void)
|
||||
return uids_user_create(&root_user);
|
||||
}
|
||||
|
||||
/* work function to remove sysfs directory for a user and free up
|
||||
/* delayed work function to remove sysfs directory for a user and free up
|
||||
* corresponding structures.
|
||||
*/
|
||||
static void cleanup_user_struct(struct work_struct *w)
|
||||
{
|
||||
struct user_struct *up = container_of(w, struct user_struct, work);
|
||||
struct user_struct *up = container_of(w, struct user_struct, work.work);
|
||||
unsigned long flags;
|
||||
int remove_user = 0;
|
||||
|
||||
@@ -297,15 +299,12 @@ static void cleanup_user_struct(struct work_struct *w)
|
||||
*/
|
||||
uids_mutex_lock();
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
|
||||
spin_lock_irqsave(&uidhash_lock, flags);
|
||||
if (atomic_read(&up->__count) == 0) {
|
||||
uid_hash_remove(up);
|
||||
remove_user = 1;
|
||||
spin_unlock_irqrestore(&uidhash_lock, flags);
|
||||
} else {
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
spin_unlock_irqrestore(&uidhash_lock, flags);
|
||||
|
||||
if (!remove_user)
|
||||
goto done;
|
||||
@@ -331,16 +330,28 @@ done:
|
||||
*/
|
||||
static void free_user(struct user_struct *up, unsigned long flags)
|
||||
{
|
||||
/* restore back the count */
|
||||
atomic_inc(&up->__count);
|
||||
spin_unlock_irqrestore(&uidhash_lock, flags);
|
||||
|
||||
INIT_WORK(&up->work, cleanup_user_struct);
|
||||
schedule_work(&up->work);
|
||||
INIT_DELAYED_WORK(&up->work, cleanup_user_struct);
|
||||
schedule_delayed_work(&up->work, msecs_to_jiffies(1000));
|
||||
}
|
||||
|
||||
#else /* CONFIG_USER_SCHED && CONFIG_SYSFS */
|
||||
|
||||
static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
|
||||
{
|
||||
struct user_struct *user;
|
||||
struct hlist_node *h;
|
||||
|
||||
hlist_for_each_entry(user, h, hashent, uidhash_node) {
|
||||
if (user->uid == uid) {
|
||||
atomic_inc(&user->__count);
|
||||
return user;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int uids_sysfs_init(void) { return 0; }
|
||||
static inline int uids_user_create(struct user_struct *up) { return 0; }
|
||||
static inline void uids_mutex_lock(void) { }
|
||||
|
||||
Reference in New Issue
Block a user