Merge branch 'linus' into perfcounters/core

Conflicts:
	arch/x86/include/asm/kmap_types.h
	include/linux/mm.h

	include/asm-generic/kmap_types.h

Merge reason: We crossed changes with kmap_types.h cleanups in mainline.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Ingo Molnar
2009-06-17 13:06:17 +02:00
3698 changed files with 293610 additions and 85501 deletions

View File

@@ -11,6 +11,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
async.o
obj-y += groups.o
ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files

View File

@@ -97,12 +97,6 @@ struct cpuset {
struct cpuset *parent; /* my parent */
/*
* Copy of global cpuset_mems_generation as of the most
* recent time this cpuset changed its mems_allowed.
*/
int mems_generation;
struct fmeter fmeter; /* memory_pressure filter */
/* partition number for rebuild_sched_domains() */
@@ -176,27 +170,6 @@ static inline int is_spread_slab(const struct cpuset *cs)
return test_bit(CS_SPREAD_SLAB, &cs->flags);
}
/*
* Increment this integer everytime any cpuset changes its
* mems_allowed value. Users of cpusets can track this generation
* number, and avoid having to lock and reload mems_allowed unless
* the cpuset they're using changes generation.
*
* A single, global generation is needed because cpuset_attach_task() could
* reattach a task to a different cpuset, which must not have its
* generation numbers aliased with those of that tasks previous cpuset.
*
* Generations are needed for mems_allowed because one task cannot
* modify another's memory placement. So we must enable every task,
* on every visit to __alloc_pages(), to efficiently check whether
* its current->cpuset->mems_allowed has changed, requiring an update
* of its current->mems_allowed.
*
* Since writes to cpuset_mems_generation are guarded by the cgroup lock
* there is no need to mark it atomic.
*/
static int cpuset_mems_generation;
static struct cpuset top_cpuset = {
.flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
};
@@ -228,8 +201,9 @@ static struct cpuset top_cpuset = {
* If a task is only holding callback_mutex, then it has read-only
* access to cpusets.
*
* The task_struct fields mems_allowed and mems_generation may only
* be accessed in the context of that task, so require no locks.
* Now, the task_struct fields mems_allowed and mempolicy may be changed
* by other task, we use alloc_lock in the task_struct fields to protect
* them.
*
* The cpuset_common_file_read() handlers only hold callback_mutex across
* small pieces of code, such as when reading out possibly multi-word
@@ -331,75 +305,22 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
}
/**
* cpuset_update_task_memory_state - update task memory placement
/*
* update task's spread flag if cpuset's page/slab spread flag is set
*
* If the current tasks cpusets mems_allowed changed behind our
* backs, update current->mems_allowed, mems_generation and task NUMA
* mempolicy to the new value.
*
* Task mempolicy is updated by rebinding it relative to the
* current->cpuset if a task has its memory placement changed.
* Do not call this routine if in_interrupt().
*
* Call without callback_mutex or task_lock() held. May be
* called with or without cgroup_mutex held. Thanks in part to
* 'the_top_cpuset_hack', the task's cpuset pointer will never
* be NULL. This routine also might acquire callback_mutex during
* call.
*
* Reading current->cpuset->mems_generation doesn't need task_lock
* to guard the current->cpuset derefence, because it is guarded
* from concurrent freeing of current->cpuset using RCU.
*
* The rcu_dereference() is technically probably not needed,
* as I don't actually mind if I see a new cpuset pointer but
* an old value of mems_generation. However this really only
* matters on alpha systems using cpusets heavily. If I dropped
* that rcu_dereference(), it would save them a memory barrier.
* For all other arch's, rcu_dereference is a no-op anyway, and for
* alpha systems not using cpusets, another planned optimization,
* avoiding the rcu critical section for tasks in the root cpuset
* which is statically allocated, so can't vanish, will make this
* irrelevant. Better to use RCU as intended, than to engage in
* some cute trick to save a memory barrier that is impossible to
* test, for alpha systems using cpusets heavily, which might not
* even exist.
*
* This routine is needed to update the per-task mems_allowed data,
* within the tasks context, when it is trying to allocate memory
* (in various mm/mempolicy.c routines) and notices that some other
* task has been modifying its cpuset.
* Called with callback_mutex/cgroup_mutex held
*/
void cpuset_update_task_memory_state(void)
static void cpuset_update_task_spread_flag(struct cpuset *cs,
struct task_struct *tsk)
{
int my_cpusets_mem_gen;
struct task_struct *tsk = current;
struct cpuset *cs;
rcu_read_lock();
my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
rcu_read_unlock();
if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
mutex_lock(&callback_mutex);
task_lock(tsk);
cs = task_cs(tsk); /* Maybe changed when task not locked */
guarantee_online_mems(cs, &tsk->mems_allowed);
tsk->cpuset_mems_generation = cs->mems_generation;
if (is_spread_page(cs))
tsk->flags |= PF_SPREAD_PAGE;
else
tsk->flags &= ~PF_SPREAD_PAGE;
if (is_spread_slab(cs))
tsk->flags |= PF_SPREAD_SLAB;
else
tsk->flags &= ~PF_SPREAD_SLAB;
task_unlock(tsk);
mutex_unlock(&callback_mutex);
mpol_rebind_task(tsk, &tsk->mems_allowed);
}
if (is_spread_page(cs))
tsk->flags |= PF_SPREAD_PAGE;
else
tsk->flags &= ~PF_SPREAD_PAGE;
if (is_spread_slab(cs))
tsk->flags |= PF_SPREAD_SLAB;
else
tsk->flags &= ~PF_SPREAD_SLAB;
}
/*
@@ -1007,14 +928,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
* other task, the task_struct mems_allowed that we are hacking
* is for our current task, which must allocate new pages for that
* migrating memory region.
*
* We call cpuset_update_task_memory_state() before hacking
* our tasks mems_allowed, so that we are assured of being in
* sync with our tasks cpuset, and in particular, callbacks to
* cpuset_update_task_memory_state() from nested page allocations
* won't see any mismatch of our cpuset and task mems_generation
* values, so won't overwrite our hacked tasks mems_allowed
* nodemask.
*/
static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
@@ -1022,22 +935,37 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
{
struct task_struct *tsk = current;
cpuset_update_task_memory_state();
mutex_lock(&callback_mutex);
tsk->mems_allowed = *to;
mutex_unlock(&callback_mutex);
do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
mutex_lock(&callback_mutex);
guarantee_online_mems(task_cs(tsk),&tsk->mems_allowed);
mutex_unlock(&callback_mutex);
}
/*
* Rebind task's vmas to cpuset's new mems_allowed, and migrate pages to new
* nodes if memory_migrate flag is set. Called with cgroup_mutex held.
* cpuset_change_task_nodemask - change task's mems_allowed and mempolicy
* @tsk: the task to change
* @newmems: new nodes that the task will be set
*
* In order to avoid seeing no nodes if the old and new nodes are disjoint,
* we structure updates as setting all new allowed nodes, then clearing newly
* disallowed ones.
*
* Called with task's alloc_lock held
*/
static void cpuset_change_task_nodemask(struct task_struct *tsk,
nodemask_t *newmems)
{
nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
mpol_rebind_task(tsk, &tsk->mems_allowed);
mpol_rebind_task(tsk, newmems);
tsk->mems_allowed = *newmems;
}
/*
* Update task's mems_allowed and rebind its mempolicy and vmas' mempolicy
* of it to cpuset's new mems_allowed, and migrate pages to new nodes if
* memory_migrate flag is set. Called with cgroup_mutex held.
*/
static void cpuset_change_nodemask(struct task_struct *p,
struct cgroup_scanner *scan)
@@ -1046,12 +974,19 @@ static void cpuset_change_nodemask(struct task_struct *p,
struct cpuset *cs;
int migrate;
const nodemask_t *oldmem = scan->data;
nodemask_t newmems;
cs = cgroup_cs(scan->cg);
guarantee_online_mems(cs, &newmems);
task_lock(p);
cpuset_change_task_nodemask(p, &newmems);
task_unlock(p);
mm = get_task_mm(p);
if (!mm)
return;
cs = cgroup_cs(scan->cg);
migrate = is_memory_migrate(cs);
mpol_rebind_mm(mm, &cs->mems_allowed);
@@ -1104,10 +1039,10 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
/*
* Handle user request to change the 'mems' memory placement
* of a cpuset. Needs to validate the request, update the
* cpusets mems_allowed and mems_generation, and for each
* task in the cpuset, rebind any vma mempolicies and if
* the cpuset is marked 'memory_migrate', migrate the tasks
* pages to the new memory.
* cpusets mems_allowed, and for each task in the cpuset,
* update mems_allowed and rebind task's mempolicy and any vma
* mempolicies and if the cpuset is marked 'memory_migrate',
* migrate the tasks pages to the new memory.
*
* Call with cgroup_mutex held. May take callback_mutex during call.
* Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
@@ -1160,7 +1095,6 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
mutex_lock(&callback_mutex);
cs->mems_allowed = trialcs->mems_allowed;
cs->mems_generation = cpuset_mems_generation++;
mutex_unlock(&callback_mutex);
update_tasks_nodemask(cs, &oldmem, &heap);
@@ -1192,6 +1126,46 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
return 0;
}
/*
* cpuset_change_flag - make a task's spread flags the same as its cpuset's
* @tsk: task to be updated
* @scan: struct cgroup_scanner containing the cgroup of the task
*
* Called by cgroup_scan_tasks() for each task in a cgroup.
*
* We don't need to re-check for the cgroup/cpuset membership, since we're
* holding cgroup_lock() at this point.
*/
static void cpuset_change_flag(struct task_struct *tsk,
struct cgroup_scanner *scan)
{
cpuset_update_task_spread_flag(cgroup_cs(scan->cg), tsk);
}
/*
* update_tasks_flags - update the spread flags of tasks in the cpuset.
* @cs: the cpuset in which each task's spread flags needs to be changed
* @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks()
*
* Called with cgroup_mutex held
*
* The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
* calling callback functions for each.
*
* No return value. It's guaranteed that cgroup_scan_tasks() always returns 0
* if @heap != NULL.
*/
static void update_tasks_flags(struct cpuset *cs, struct ptr_heap *heap)
{
struct cgroup_scanner scan;
scan.cg = cs->css.cgroup;
scan.test_task = NULL;
scan.process_task = cpuset_change_flag;
scan.heap = heap;
cgroup_scan_tasks(&scan);
}
/*
* update_flag - read a 0 or a 1 in a file and update associated flag
* bit: the bit to update (see cpuset_flagbits_t)
@@ -1205,8 +1179,10 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
int turning_on)
{
struct cpuset *trialcs;
int err;
int balance_flag_changed;
int spread_flag_changed;
struct ptr_heap heap;
int err;
trialcs = alloc_trial_cpuset(cs);
if (!trialcs)
@@ -1221,9 +1197,16 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
if (err < 0)
goto out;
err = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
if (err < 0)
goto out;
balance_flag_changed = (is_sched_load_balance(cs) !=
is_sched_load_balance(trialcs));
spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
|| (is_spread_page(cs) != is_spread_page(trialcs)));
mutex_lock(&callback_mutex);
cs->flags = trialcs->flags;
mutex_unlock(&callback_mutex);
@@ -1231,6 +1214,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
async_rebuild_sched_domains();
if (spread_flag_changed)
update_tasks_flags(cs, &heap);
heap_free(&heap);
out:
free_trial_cpuset(trialcs);
return err;
@@ -1372,15 +1358,20 @@ static void cpuset_attach(struct cgroup_subsys *ss,
if (cs == &top_cpuset) {
cpumask_copy(cpus_attach, cpu_possible_mask);
to = node_possible_map;
} else {
mutex_lock(&callback_mutex);
guarantee_online_cpus(cs, cpus_attach);
mutex_unlock(&callback_mutex);
guarantee_online_mems(cs, &to);
}
err = set_cpus_allowed_ptr(tsk, cpus_attach);
if (err)
return;
task_lock(tsk);
cpuset_change_task_nodemask(tsk, &to);
task_unlock(tsk);
cpuset_update_task_spread_flag(cs, tsk);
from = oldcs->mems_allowed;
to = cs->mems_allowed;
mm = get_task_mm(tsk);
@@ -1442,11 +1433,9 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
break;
case FILE_SPREAD_PAGE:
retval = update_flag(CS_SPREAD_PAGE, cs, val);
cs->mems_generation = cpuset_mems_generation++;
break;
case FILE_SPREAD_SLAB:
retval = update_flag(CS_SPREAD_SLAB, cs, val);
cs->mems_generation = cpuset_mems_generation++;
break;
default:
retval = -EINVAL;
@@ -1786,8 +1775,6 @@ static struct cgroup_subsys_state *cpuset_create(
struct cpuset *parent;
if (!cont->parent) {
/* This is early initialization for the top cgroup */
top_cpuset.mems_generation = cpuset_mems_generation++;
return &top_cpuset.css;
}
parent = cgroup_cs(cont->parent);
@@ -1799,7 +1786,6 @@ static struct cgroup_subsys_state *cpuset_create(
return ERR_PTR(-ENOMEM);
}
cpuset_update_task_memory_state();
cs->flags = 0;
if (is_spread_page(parent))
set_bit(CS_SPREAD_PAGE, &cs->flags);
@@ -1808,7 +1794,6 @@ static struct cgroup_subsys_state *cpuset_create(
set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
cpumask_clear(cs->cpus_allowed);
nodes_clear(cs->mems_allowed);
cs->mems_generation = cpuset_mems_generation++;
fmeter_init(&cs->fmeter);
cs->relax_domain_level = -1;
@@ -1827,8 +1812,6 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
{
struct cpuset *cs = cgroup_cs(cont);
cpuset_update_task_memory_state();
if (is_sched_load_balance(cs))
update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
@@ -1849,21 +1832,6 @@ struct cgroup_subsys cpuset_subsys = {
.early_init = 1,
};
/*
* cpuset_init_early - just enough so that the calls to
* cpuset_update_task_memory_state() in early init code
* are harmless.
*/
int __init cpuset_init_early(void)
{
alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_NOWAIT);
top_cpuset.mems_generation = cpuset_mems_generation++;
return 0;
}
/**
* cpuset_init - initialize cpusets at system boot
*
@@ -1874,11 +1842,13 @@ int __init cpuset_init(void)
{
int err = 0;
if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL))
BUG();
cpumask_setall(top_cpuset.cpus_allowed);
nodes_setall(top_cpuset.mems_allowed);
fmeter_init(&top_cpuset.fmeter);
top_cpuset.mems_generation = cpuset_mems_generation++;
set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
top_cpuset.relax_domain_level = -1;

View File

@@ -178,7 +178,7 @@ void __init fork_init(unsigned long mempages)
/* create a slab on which task_structs can be allocated */
task_struct_cachep =
kmem_cache_create("task_struct", sizeof(struct task_struct),
ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL);
ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
#endif
/* do the arch specific task caches init */
@@ -1470,20 +1470,20 @@ void __init proc_caches_init(void)
{
sighand_cachep = kmem_cache_create("sighand_cache",
sizeof(struct sighand_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU,
sighand_ctor);
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
SLAB_NOTRACK, sighand_ctor);
signal_cachep = kmem_cache_create("signal_cache",
sizeof(struct signal_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
files_cachep = kmem_cache_create("files_cache",
sizeof(struct files_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
fs_cachep = kmem_cache_create("fs_cache",
sizeof(struct fs_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
mm_cachep = kmem_cache_create("mm_struct",
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
mmap_init();
}

288
kernel/groups.c Normal file
View File

@@ -0,0 +1,288 @@
/*
* Supplementary group IDs
*/
#include <linux/cred.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <asm/uaccess.h>
/* init to 2 - one for init_task, one to ensure it is never freed */
struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
struct group_info *groups_alloc(int gidsetsize)
{
struct group_info *group_info;
int nblocks;
int i;
nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK;
/* Make sure we always allocate at least one indirect block pointer */
nblocks = nblocks ? : 1;
group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER);
if (!group_info)
return NULL;
group_info->ngroups = gidsetsize;
group_info->nblocks = nblocks;
atomic_set(&group_info->usage, 1);
if (gidsetsize <= NGROUPS_SMALL)
group_info->blocks[0] = group_info->small_block;
else {
for (i = 0; i < nblocks; i++) {
gid_t *b;
b = (void *)__get_free_page(GFP_USER);
if (!b)
goto out_undo_partial_alloc;
group_info->blocks[i] = b;
}
}
return group_info;
out_undo_partial_alloc:
while (--i >= 0) {
free_page((unsigned long)group_info->blocks[i]);
}
kfree(group_info);
return NULL;
}
EXPORT_SYMBOL(groups_alloc);
void groups_free(struct group_info *group_info)
{
if (group_info->blocks[0] != group_info->small_block) {
int i;
for (i = 0; i < group_info->nblocks; i++)
free_page((unsigned long)group_info->blocks[i]);
}
kfree(group_info);
}
EXPORT_SYMBOL(groups_free);
/* export the group_info to a user-space array */
static int groups_to_user(gid_t __user *grouplist,
const struct group_info *group_info)
{
int i;
unsigned int count = group_info->ngroups;
for (i = 0; i < group_info->nblocks; i++) {
unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
unsigned int len = cp_count * sizeof(*grouplist);
if (copy_to_user(grouplist, group_info->blocks[i], len))
return -EFAULT;
grouplist += NGROUPS_PER_BLOCK;
count -= cp_count;
}
return 0;
}
/* fill a group_info from a user-space array - it must be allocated already */
static int groups_from_user(struct group_info *group_info,
gid_t __user *grouplist)
{
int i;
unsigned int count = group_info->ngroups;
for (i = 0; i < group_info->nblocks; i++) {
unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
unsigned int len = cp_count * sizeof(*grouplist);
if (copy_from_user(group_info->blocks[i], grouplist, len))
return -EFAULT;
grouplist += NGROUPS_PER_BLOCK;
count -= cp_count;
}
return 0;
}
/* a simple Shell sort */
static void groups_sort(struct group_info *group_info)
{
int base, max, stride;
int gidsetsize = group_info->ngroups;
for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
; /* nothing */
stride /= 3;
while (stride) {
max = gidsetsize - stride;
for (base = 0; base < max; base++) {
int left = base;
int right = left + stride;
gid_t tmp = GROUP_AT(group_info, right);
while (left >= 0 && GROUP_AT(group_info, left) > tmp) {
GROUP_AT(group_info, right) =
GROUP_AT(group_info, left);
right = left;
left -= stride;
}
GROUP_AT(group_info, right) = tmp;
}
stride /= 3;
}
}
/* a simple bsearch */
int groups_search(const struct group_info *group_info, gid_t grp)
{
unsigned int left, right;
if (!group_info)
return 0;
left = 0;
right = group_info->ngroups;
while (left < right) {
unsigned int mid = (left+right)/2;
int cmp = grp - GROUP_AT(group_info, mid);
if (cmp > 0)
left = mid + 1;
else if (cmp < 0)
right = mid;
else
return 1;
}
return 0;
}
/**
* set_groups - Change a group subscription in a set of credentials
* @new: The newly prepared set of credentials to alter
* @group_info: The group list to install
*
* Validate a group subscription and, if valid, insert it into a set
* of credentials.
*/
int set_groups(struct cred *new, struct group_info *group_info)
{
int retval;
retval = security_task_setgroups(group_info);
if (retval)
return retval;
put_group_info(new->group_info);
groups_sort(group_info);
get_group_info(group_info);
new->group_info = group_info;
return 0;
}
EXPORT_SYMBOL(set_groups);
/**
* set_current_groups - Change current's group subscription
* @group_info: The group list to impose
*
* Validate a group subscription and, if valid, impose it upon current's task
* security record.
*/
int set_current_groups(struct group_info *group_info)
{
struct cred *new;
int ret;
new = prepare_creds();
if (!new)
return -ENOMEM;
ret = set_groups(new, group_info);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
}
EXPORT_SYMBOL(set_current_groups);
SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist)
{
const struct cred *cred = current_cred();
int i;
if (gidsetsize < 0)
return -EINVAL;
/* no need to grab task_lock here; it cannot change */
i = cred->group_info->ngroups;
if (gidsetsize) {
if (i > gidsetsize) {
i = -EINVAL;
goto out;
}
if (groups_to_user(grouplist, cred->group_info)) {
i = -EFAULT;
goto out;
}
}
out:
return i;
}
/*
* SMP: Our groups are copy-on-write. We can set them safely
* without another task interfering.
*/
SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
{
struct group_info *group_info;
int retval;
if (!capable(CAP_SETGID))
return -EPERM;
if ((unsigned)gidsetsize > NGROUPS_MAX)
return -EINVAL;
group_info = groups_alloc(gidsetsize);
if (!group_info)
return -ENOMEM;
retval = groups_from_user(group_info, grouplist);
if (retval) {
put_group_info(group_info);
return retval;
}
retval = set_current_groups(group_info);
put_group_info(group_info);
return retval;
}
/*
* Check whether we're fsgid/egid or in the supplemental group..
*/
int in_group_p(gid_t grp)
{
const struct cred *cred = current_cred();
int retval = 1;
if (grp != cred->fsgid)
retval = groups_search(cred->group_info, grp);
return retval;
}
EXPORT_SYMBOL(in_group_p);
int in_egroup_p(gid_t grp)
{
const struct cred *cred = current_cred();
int retval = 1;
if (grp != cred->egid)
retval = groups_search(cred->group_info, grp);
return retval;
}
EXPORT_SYMBOL(in_egroup_p);

View File

@@ -43,6 +43,8 @@
#include <linux/seq_file.h>
#include <linux/err.h>
#include <linux/debugobjects.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <asm/uaccess.h>
@@ -193,12 +195,24 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
* Switch the timer base to the current CPU when possible.
*/
static inline struct hrtimer_clock_base *
switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
int pinned)
{
struct hrtimer_clock_base *new_base;
struct hrtimer_cpu_base *new_cpu_base;
int cpu, preferred_cpu = -1;
new_cpu_base = &__get_cpu_var(hrtimer_bases);
cpu = smp_processor_id();
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
preferred_cpu = get_nohz_load_balancer();
if (preferred_cpu >= 0)
cpu = preferred_cpu;
}
#endif
again:
new_cpu_base = &per_cpu(hrtimer_bases, cpu);
new_base = &new_cpu_base->clock_base[base->index];
if (base != new_base) {
@@ -218,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
timer->base = NULL;
spin_unlock(&base->cpu_base->lock);
spin_lock(&new_base->cpu_base->lock);
/* Optimized away for NOHZ=n SMP=n */
if (cpu == preferred_cpu) {
/* Calculate clock monotonic expiry time */
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
new_base->offset);
#else
ktime_t expires = hrtimer_get_expires(timer);
#endif
/*
* Get the next event on target cpu from the
* clock events layer.
* This covers the highres=off nohz=on case as well.
*/
ktime_t next = clockevents_get_next_event(cpu);
ktime_t delta = ktime_sub(expires, next);
/*
* We do not migrate the timer when it is expiring
* before the next event on the target cpu because
* we cannot reprogram the target cpu hardware and
* we would cause it to fire late.
*/
if (delta.tv64 < 0) {
cpu = smp_processor_id();
spin_unlock(&new_base->cpu_base->lock);
spin_lock(&base->cpu_base->lock);
timer->base = base;
goto again;
}
}
timer->base = new_base;
}
return new_base;
@@ -235,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
return base;
}
# define switch_hrtimer_base(t, b) (b)
# define switch_hrtimer_base(t, b, p) (b)
#endif /* !CONFIG_SMP */
@@ -907,9 +955,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
ret = remove_hrtimer(timer, base);
/* Switch the timer base, if necessary: */
new_base = switch_hrtimer_base(timer, base);
new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
if (mode == HRTIMER_MODE_REL) {
if (mode & HRTIMER_MODE_REL) {
tim = ktime_add_safe(tim, new_base->get_time());
/*
* CONFIG_TIME_LOW_RES is a temporary way for architectures

View File

@@ -30,12 +30,16 @@
#define all_var 0
#endif
/* These will be re-linked against their real values during the second link stage */
/*
* These will be re-linked against their real values
* during the second link stage.
*/
extern const unsigned long kallsyms_addresses[] __attribute__((weak));
extern const u8 kallsyms_names[] __attribute__((weak));
/* tell the compiler that the count isn't in the small data section if the arch
* has one (eg: FRV)
/*
* Tell the compiler that the count isn't in the small data section if the arch
* has one (eg: FRV).
*/
extern const unsigned long kallsyms_num_syms
__attribute__((weak, section(".rodata")));
@@ -75,31 +79,37 @@ static int is_ksym_addr(unsigned long addr)
return is_kernel_text(addr) || is_kernel_inittext(addr);
}
/* expand a compressed symbol data into the resulting uncompressed string,
given the offset to where the symbol is in the compressed stream */
/*
* Expand a compressed symbol data into the resulting uncompressed string,
* given the offset to where the symbol is in the compressed stream.
*/
static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
{
int len, skipped_first = 0;
const u8 *tptr, *data;
/* get the compressed symbol length from the first symbol byte */
/* Get the compressed symbol length from the first symbol byte. */
data = &kallsyms_names[off];
len = *data;
data++;
/* update the offset to return the offset for the next symbol on
* the compressed stream */
/*
* Update the offset to return the offset for the next symbol on
* the compressed stream.
*/
off += len + 1;
/* for every byte on the compressed symbol data, copy the table
entry for that byte */
while(len) {
tptr = &kallsyms_token_table[ kallsyms_token_index[*data] ];
/*
* For every byte on the compressed symbol data, copy the table
* entry for that byte.
*/
while (len) {
tptr = &kallsyms_token_table[kallsyms_token_index[*data]];
data++;
len--;
while (*tptr) {
if(skipped_first) {
if (skipped_first) {
*result = *tptr;
result++;
} else
@@ -110,36 +120,46 @@ static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
*result = '\0';
/* return to offset to the next symbol */
/* Return to offset to the next symbol. */
return off;
}
/* get symbol type information. This is encoded as a single char at the
* begining of the symbol name */
/*
* Get symbol type information. This is encoded as a single char at the
* beginning of the symbol name.
*/
static char kallsyms_get_symbol_type(unsigned int off)
{
/* get just the first code, look it up in the token table, and return the
* first char from this token */
return kallsyms_token_table[ kallsyms_token_index[ kallsyms_names[off+1] ] ];
/*
* Get just the first code, look it up in the token table,
* and return the first char from this token.
*/
return kallsyms_token_table[kallsyms_token_index[kallsyms_names[off + 1]]];
}
/* find the offset on the compressed stream given and index in the
* kallsyms array */
/*
* Find the offset on the compressed stream given and index in the
* kallsyms array.
*/
static unsigned int get_symbol_offset(unsigned long pos)
{
const u8 *name;
int i;
/* use the closest marker we have. We have markers every 256 positions,
* so that should be close enough */
name = &kallsyms_names[ kallsyms_markers[pos>>8] ];
/*
* Use the closest marker we have. We have markers every 256 positions,
* so that should be close enough.
*/
name = &kallsyms_names[kallsyms_markers[pos >> 8]];
/* sequentially scan all the symbols up to the point we're searching for.
* Every symbol is stored in a [<len>][<len> bytes of data] format, so we
* just need to add the len to the current pointer for every symbol we
* wish to skip */
for(i = 0; i < (pos&0xFF); i++)
/*
* Sequentially scan all the symbols up to the point we're searching
* for. Every symbol is stored in a [<len>][<len> bytes of data] format,
* so we just need to add the len to the current pointer for every
* symbol we wish to skip.
*/
for (i = 0; i < (pos & 0xFF); i++)
name = name + (*name) + 1;
return name - kallsyms_names;
@@ -190,7 +210,7 @@ static unsigned long get_symbol_pos(unsigned long addr,
/* This kernel should never had been booted. */
BUG_ON(!kallsyms_addresses);
/* do a binary search on the sorted kallsyms_addresses array */
/* Do a binary search on the sorted kallsyms_addresses array. */
low = 0;
high = kallsyms_num_syms;
@@ -203,15 +223,15 @@ static unsigned long get_symbol_pos(unsigned long addr,
}
/*
* search for the first aliased symbol. Aliased
* symbols are symbols with the same address
* Search for the first aliased symbol. Aliased
* symbols are symbols with the same address.
*/
while (low && kallsyms_addresses[low-1] == kallsyms_addresses[low])
--low;
symbol_start = kallsyms_addresses[low];
/* Search for next non-aliased symbol */
/* Search for next non-aliased symbol. */
for (i = low + 1; i < kallsyms_num_syms; i++) {
if (kallsyms_addresses[i] > symbol_start) {
symbol_end = kallsyms_addresses[i];
@@ -219,7 +239,7 @@ static unsigned long get_symbol_pos(unsigned long addr,
}
}
/* if we found no next symbol, we use the end of the section */
/* If we found no next symbol, we use the end of the section. */
if (!symbol_end) {
if (is_kernel_inittext(addr))
symbol_end = (unsigned long)_einittext;
@@ -252,10 +272,10 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
/*
* Lookup an address
* - modname is set to NULL if it's in the kernel
* - we guarantee that the returned name is valid until we reschedule even if
* it resides in a module
* - we also guarantee that modname will be valid until rescheduled
* - modname is set to NULL if it's in the kernel.
* - We guarantee that the returned name is valid until we reschedule even if.
* It resides in a module.
* - We also guarantee that modname will be valid until rescheduled.
*/
const char *kallsyms_lookup(unsigned long addr,
unsigned long *symbolsize,
@@ -276,7 +296,7 @@ const char *kallsyms_lookup(unsigned long addr,
return namebuf;
}
/* see if it's in a module */
/* See if it's in a module. */
return module_address_lookup(addr, symbolsize, offset, modname,
namebuf);
}
@@ -294,7 +314,7 @@ int lookup_symbol_name(unsigned long addr, char *symname)
kallsyms_expand_symbol(get_symbol_offset(pos), symname);
return 0;
}
/* see if it's in a module */
/* See if it's in a module. */
return lookup_module_symbol_name(addr, symname);
}
@@ -313,7 +333,7 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
modname[0] = '\0';
return 0;
}
/* see if it's in a module */
/* See if it's in a module. */
return lookup_module_symbol_attrs(addr, size, offset, modname, name);
}
@@ -342,6 +362,7 @@ int sprint_symbol(char *buffer, unsigned long address)
return len;
}
EXPORT_SYMBOL_GPL(sprint_symbol);
/* Look up a kernel symbol and print it to the kernel messages. */
void __print_symbol(const char *fmt, unsigned long address)
@@ -352,13 +373,13 @@ void __print_symbol(const char *fmt, unsigned long address)
printk(fmt, buffer);
}
EXPORT_SYMBOL(__print_symbol);
/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
struct kallsym_iter
{
struct kallsym_iter {
loff_t pos;
unsigned long value;
unsigned int nameoff; /* If iterating in core kernel symbols */
unsigned int nameoff; /* If iterating in core kernel symbols. */
char type;
char name[KSYM_NAME_LEN];
char module_name[MODULE_NAME_LEN];
@@ -404,7 +425,7 @@ static int update_iter(struct kallsym_iter *iter, loff_t pos)
iter->pos = pos;
return get_ksymbol_mod(iter);
}
/* If we're not on the desired position, reset to new position. */
if (pos != iter->pos)
reset_iter(iter, pos);
@@ -439,23 +460,25 @@ static int s_show(struct seq_file *m, void *p)
{
struct kallsym_iter *iter = m->private;
/* Some debugging symbols have no name. Ignore them. */
/* Some debugging symbols have no name. Ignore them. */
if (!iter->name[0])
return 0;
if (iter->module_name[0]) {
char type;
/* Label it "global" if it is exported,
* "local" if not exported. */
/*
* Label it "global" if it is exported,
* "local" if not exported.
*/
type = iter->exported ? toupper(iter->type) :
tolower(iter->type);
seq_printf(m, "%0*lx %c %s\t[%s]\n",
(int)(2*sizeof(void*)),
(int)(2 * sizeof(void *)),
iter->value, type, iter->name, iter->module_name);
} else
seq_printf(m, "%0*lx %c %s\n",
(int)(2*sizeof(void*)),
(int)(2 * sizeof(void *)),
iter->value, iter->type, iter->name);
return 0;
}
@@ -469,9 +492,11 @@ static const struct seq_operations kallsyms_op = {
static int kallsyms_open(struct inode *inode, struct file *file)
{
/* We keep iterator in m->private, since normal case is to
/*
* We keep iterator in m->private, since normal case is to
* s_start from where we left off, so we avoid doing
* using get_symbol_offset for every symbol */
* using get_symbol_offset for every symbol.
*/
struct kallsym_iter *iter;
int ret;
@@ -500,7 +525,4 @@ static int __init kallsyms_init(void)
proc_create("kallsyms", 0444, NULL, &kallsyms_operations);
return 0;
}
__initcall(kallsyms_init);
EXPORT_SYMBOL(__print_symbol);
EXPORT_SYMBOL_GPL(sprint_symbol);
device_initcall(kallsyms_init);

View File

@@ -72,9 +72,9 @@ struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
/*
* round up to the next power of 2, since our 'let the indices
* wrap' tachnique works only in this case.
* wrap' technique works only in this case.
*/
if (size & (size - 1)) {
if (!is_power_of_2(size)) {
BUG_ON(size > 0x80000000);
size = roundup_pow_of_two(size);
}

View File

@@ -9,6 +9,7 @@
#include <linux/kthread.h>
#include <linux/completion.h>
#include <linux/err.h>
#include <linux/cpuset.h>
#include <linux/unistd.h>
#include <linux/file.h>
#include <linux/module.h>
@@ -236,6 +237,7 @@ int kthreadd(void *unused)
ignore_signals(tsk);
set_user_nice(tsk, KTHREAD_NICE_LEVEL);
set_cpus_allowed_ptr(tsk, cpu_all_mask);
set_mems_allowed(node_possible_map);
current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;

View File

@@ -2899,7 +2899,7 @@ void print_modules(void)
struct module *mod;
char buf[8];
printk("Modules linked in:");
printk(KERN_DEFAULT "Modules linked in:");
/* Most callers should already have preempt disabled, but make sure */
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list)

View File

@@ -34,7 +34,7 @@ static struct sysrq_key_op sysrq_poweroff_op = {
.handler = handle_poweroff,
.help_msg = "powerOff",
.action_msg = "Power Off",
.enable_mask = SYSRQ_ENABLE_BOOT,
.enable_mask = SYSRQ_ENABLE_BOOT,
};
static int pm_sysrq_init(void)

View File

@@ -117,9 +117,12 @@ int freeze_processes(void)
if (error)
goto Exit;
printk("done.");
oom_killer_disable();
Exit:
BUG_ON(in_atomic());
printk("\n");
return error;
}
@@ -145,6 +148,8 @@ static void thaw_tasks(bool nosig_only)
void thaw_processes(void)
{
oom_killer_enable();
printk("Restarting tasks ... ");
thaw_tasks(true);
thaw_tasks(false);

View File

@@ -687,20 +687,35 @@ asmlinkage int vprintk(const char *fmt, va_list args)
sizeof(printk_buf) - printed_len, fmt, args);
p = printk_buf;
/* Do we have a loglevel in the string? */
if (p[0] == '<') {
unsigned char c = p[1];
if (c && p[2] == '>') {
switch (c) {
case '0' ... '7': /* loglevel */
current_log_level = c - '0';
/* Fallthrough - make sure we're on a new line */
case 'd': /* KERN_DEFAULT */
if (!new_text_line) {
emit_log_char('\n');
new_text_line = 1;
}
/* Fallthrough - skip the loglevel */
case 'c': /* KERN_CONT */
p += 3;
break;
}
}
}
/*
* Copy the output into log_buf. If the caller didn't provide
* appropriate log level tags, we insert them here
*/
for (p = printk_buf; *p; p++) {
for ( ; *p; p++) {
if (new_text_line) {
/* If a token, set current_log_level and skip over */
if (p[0] == '<' && p[1] >= '0' && p[1] <= '7' &&
p[2] == '>') {
current_log_level = p[1] - '0';
p += 3;
printed_len -= 3;
}
/* Always output the token */
emit_log_char('<');
emit_log_char(current_log_level + '0');

View File

@@ -365,7 +365,7 @@ static int __cpuinit profile_cpu_callback(struct notifier_block *info,
node = cpu_to_node(cpu);
per_cpu(cpu_profile_flip, cpu) = 0;
if (!per_cpu(cpu_profile_hits, cpu)[1]) {
page = alloc_pages_node(node,
page = alloc_pages_exact_node(node,
GFP_KERNEL | __GFP_ZERO,
0);
if (!page)
@@ -373,7 +373,7 @@ static int __cpuinit profile_cpu_callback(struct notifier_block *info,
per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
}
if (!per_cpu(cpu_profile_hits, cpu)[0]) {
page = alloc_pages_node(node,
page = alloc_pages_exact_node(node,
GFP_KERNEL | __GFP_ZERO,
0);
if (!page)
@@ -564,14 +564,14 @@ static int create_hash_tables(void)
int node = cpu_to_node(cpu);
struct page *page;
page = alloc_pages_node(node,
page = alloc_pages_exact_node(node,
GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
0);
if (!page)
goto out_cleanup;
per_cpu(cpu_profile_hits, cpu)[1]
= (struct profile_hit *)page_address(page);
page = alloc_pages_node(node,
page = alloc_pages_exact_node(node,
GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
0);
if (!page)

View File

@@ -902,7 +902,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
* Returns:
* 0 on success
* -EINTR when interrupted by a signal
* -ETIMEOUT when the timeout expired
* -ETIMEDOUT when the timeout expired
* -EDEADLK when the lock would deadlock (when deadlock detection is on)
*/
int

View File

@@ -240,7 +240,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
hard = hrtimer_get_expires(&rt_b->rt_period_timer);
delta = ktime_to_ns(ktime_sub(hard, soft));
__hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
HRTIMER_MODE_ABS, 0);
HRTIMER_MODE_ABS_PINNED, 0);
}
spin_unlock(&rt_b->rt_runtime_lock);
}
@@ -1155,7 +1155,7 @@ static __init void init_hrtick(void)
static void hrtick_start(struct rq *rq, u64 delay)
{
__hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
HRTIMER_MODE_REL, 0);
HRTIMER_MODE_REL_PINNED, 0);
}
static inline void init_hrtick(void)
@@ -4397,6 +4397,11 @@ static struct {
.load_balancer = ATOMIC_INIT(-1),
};
int get_nohz_load_balancer(void)
{
return atomic_read(&nohz.load_balancer);
}
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
/**
* lowest_flag_domain - Return lowest sched_domain containing flag.
@@ -9029,6 +9034,8 @@ void __init sched_init_smp(void)
}
#endif /* CONFIG_SMP */
const_debug unsigned int sysctl_timer_migration = 1;
int in_sched_functions(unsigned long addr)
{
return in_lock_functions(addr) ||

View File

@@ -832,6 +832,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
{
struct sigpending *pending;
struct sigqueue *q;
int override_rlimit;
trace_sched_signal_send(sig, t);
@@ -863,9 +864,13 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
make sure at least one signal gets delivered and don't
pass on the info struct. */
q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
(is_si_special(info) ||
info->si_code >= 0)));
if (sig < SIGRTMIN)
override_rlimit = (is_si_special(info) || info->si_code >= 0);
else
override_rlimit = 0;
q = __sigqueue_alloc(t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE,
override_rlimit);
if (q) {
list_add_tail(&q->list, &pending->list);
switch ((unsigned long) info) {

View File

@@ -318,6 +318,15 @@ cant_get_ref:
}
EXPORT_SYMBOL(slow_work_enqueue);
/*
* Schedule a cull of the thread pool at some time in the near future
*/
static void slow_work_schedule_cull(void)
{
mod_timer(&slow_work_cull_timer,
round_jiffies(jiffies + SLOW_WORK_CULL_TIMEOUT));
}
/*
* Worker thread culling algorithm
*/
@@ -335,8 +344,7 @@ static bool slow_work_cull_thread(void)
list_empty(&vslow_work_queue) &&
atomic_read(&slow_work_thread_count) >
slow_work_min_threads) {
mod_timer(&slow_work_cull_timer,
jiffies + SLOW_WORK_CULL_TIMEOUT);
slow_work_schedule_cull();
do_cull = true;
}
}
@@ -393,8 +401,7 @@ static int slow_work_thread(void *_data)
list_empty(&vslow_work_queue) &&
atomic_read(&slow_work_thread_count) >
slow_work_min_threads)
mod_timer(&slow_work_cull_timer,
jiffies + SLOW_WORK_CULL_TIMEOUT);
slow_work_schedule_cull();
continue;
}
@@ -458,7 +465,7 @@ static void slow_work_new_thread_execute(struct slow_work *work)
if (atomic_dec_and_test(&slow_work_thread_count))
BUG(); /* we're running on a slow work thread... */
mod_timer(&slow_work_oom_timer,
jiffies + SLOW_WORK_OOM_TIMEOUT);
round_jiffies(jiffies + SLOW_WORK_OOM_TIMEOUT));
} else {
/* ratelimit the starting of new threads */
mod_timer(&slow_work_oom_timer, jiffies + 1);
@@ -502,8 +509,7 @@ static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
if (n < 0 && !slow_work_may_not_start_new_thread)
slow_work_enqueue(&slow_work_new_thread);
else if (n > 0)
mod_timer(&slow_work_cull_timer,
jiffies + SLOW_WORK_CULL_TIMEOUT);
slow_work_schedule_cull();
}
mutex_unlock(&slow_work_user_lock);
}
@@ -529,8 +535,7 @@ static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
atomic_read(&slow_work_thread_count);
if (n < 0)
mod_timer(&slow_work_cull_timer,
jiffies + SLOW_WORK_CULL_TIMEOUT);
slow_work_schedule_cull();
}
mutex_unlock(&slow_work_user_lock);
}

View File

@@ -382,6 +382,17 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
EXPORT_SYMBOL(__tasklet_hi_schedule);
void __tasklet_hi_schedule_first(struct tasklet_struct *t)
{
BUG_ON(!irqs_disabled());
t->next = __get_cpu_var(tasklet_hi_vec).head;
__get_cpu_var(tasklet_hi_vec).head = t;
__raise_softirq_irqoff(HI_SOFTIRQ);
}
EXPORT_SYMBOL(__tasklet_hi_schedule_first);
static void tasklet_action(struct softirq_action *a)
{
struct tasklet_struct *list;

View File

@@ -1113,289 +1113,6 @@ out:
return err;
}
/*
* Supplementary group IDs
*/
/* init to 2 - one for init_task, one to ensure it is never freed */
struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
struct group_info *groups_alloc(int gidsetsize)
{
struct group_info *group_info;
int nblocks;
int i;
nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK;
/* Make sure we always allocate at least one indirect block pointer */
nblocks = nblocks ? : 1;
group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER);
if (!group_info)
return NULL;
group_info->ngroups = gidsetsize;
group_info->nblocks = nblocks;
atomic_set(&group_info->usage, 1);
if (gidsetsize <= NGROUPS_SMALL)
group_info->blocks[0] = group_info->small_block;
else {
for (i = 0; i < nblocks; i++) {
gid_t *b;
b = (void *)__get_free_page(GFP_USER);
if (!b)
goto out_undo_partial_alloc;
group_info->blocks[i] = b;
}
}
return group_info;
out_undo_partial_alloc:
while (--i >= 0) {
free_page((unsigned long)group_info->blocks[i]);
}
kfree(group_info);
return NULL;
}
EXPORT_SYMBOL(groups_alloc);
void groups_free(struct group_info *group_info)
{
if (group_info->blocks[0] != group_info->small_block) {
int i;
for (i = 0; i < group_info->nblocks; i++)
free_page((unsigned long)group_info->blocks[i]);
}
kfree(group_info);
}
EXPORT_SYMBOL(groups_free);
/* export the group_info to a user-space array */
static int groups_to_user(gid_t __user *grouplist,
const struct group_info *group_info)
{
int i;
unsigned int count = group_info->ngroups;
for (i = 0; i < group_info->nblocks; i++) {
unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
unsigned int len = cp_count * sizeof(*grouplist);
if (copy_to_user(grouplist, group_info->blocks[i], len))
return -EFAULT;
grouplist += NGROUPS_PER_BLOCK;
count -= cp_count;
}
return 0;
}
/* fill a group_info from a user-space array - it must be allocated already */
static int groups_from_user(struct group_info *group_info,
gid_t __user *grouplist)
{
int i;
unsigned int count = group_info->ngroups;
for (i = 0; i < group_info->nblocks; i++) {
unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
unsigned int len = cp_count * sizeof(*grouplist);
if (copy_from_user(group_info->blocks[i], grouplist, len))
return -EFAULT;
grouplist += NGROUPS_PER_BLOCK;
count -= cp_count;
}
return 0;
}
/* a simple Shell sort */
static void groups_sort(struct group_info *group_info)
{
int base, max, stride;
int gidsetsize = group_info->ngroups;
for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
; /* nothing */
stride /= 3;
while (stride) {
max = gidsetsize - stride;
for (base = 0; base < max; base++) {
int left = base;
int right = left + stride;
gid_t tmp = GROUP_AT(group_info, right);
while (left >= 0 && GROUP_AT(group_info, left) > tmp) {
GROUP_AT(group_info, right) =
GROUP_AT(group_info, left);
right = left;
left -= stride;
}
GROUP_AT(group_info, right) = tmp;
}
stride /= 3;
}
}
/* a simple bsearch */
int groups_search(const struct group_info *group_info, gid_t grp)
{
unsigned int left, right;
if (!group_info)
return 0;
left = 0;
right = group_info->ngroups;
while (left < right) {
unsigned int mid = (left+right)/2;
int cmp = grp - GROUP_AT(group_info, mid);
if (cmp > 0)
left = mid + 1;
else if (cmp < 0)
right = mid;
else
return 1;
}
return 0;
}
/**
* set_groups - Change a group subscription in a set of credentials
* @new: The newly prepared set of credentials to alter
* @group_info: The group list to install
*
* Validate a group subscription and, if valid, insert it into a set
* of credentials.
*/
int set_groups(struct cred *new, struct group_info *group_info)
{
int retval;
retval = security_task_setgroups(group_info);
if (retval)
return retval;
put_group_info(new->group_info);
groups_sort(group_info);
get_group_info(group_info);
new->group_info = group_info;
return 0;
}
EXPORT_SYMBOL(set_groups);
/**
* set_current_groups - Change current's group subscription
* @group_info: The group list to impose
*
* Validate a group subscription and, if valid, impose it upon current's task
* security record.
*/
int set_current_groups(struct group_info *group_info)
{
struct cred *new;
int ret;
new = prepare_creds();
if (!new)
return -ENOMEM;
ret = set_groups(new, group_info);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
}
EXPORT_SYMBOL(set_current_groups);
SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist)
{
const struct cred *cred = current_cred();
int i;
if (gidsetsize < 0)
return -EINVAL;
/* no need to grab task_lock here; it cannot change */
i = cred->group_info->ngroups;
if (gidsetsize) {
if (i > gidsetsize) {
i = -EINVAL;
goto out;
}
if (groups_to_user(grouplist, cred->group_info)) {
i = -EFAULT;
goto out;
}
}
out:
return i;
}
/*
* SMP: Our groups are copy-on-write. We can set them safely
* without another task interfering.
*/
SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
{
struct group_info *group_info;
int retval;
if (!capable(CAP_SETGID))
return -EPERM;
if ((unsigned)gidsetsize > NGROUPS_MAX)
return -EINVAL;
group_info = groups_alloc(gidsetsize);
if (!group_info)
return -ENOMEM;
retval = groups_from_user(group_info, grouplist);
if (retval) {
put_group_info(group_info);
return retval;
}
retval = set_current_groups(group_info);
put_group_info(group_info);
return retval;
}
/*
* Check whether we're fsgid/egid or in the supplemental group..
*/
int in_group_p(gid_t grp)
{
const struct cred *cred = current_cred();
int retval = 1;
if (grp != cred->fsgid)
retval = groups_search(cred->group_info, grp);
return retval;
}
EXPORT_SYMBOL(in_group_p);
int in_egroup_p(gid_t grp)
{
const struct cred *cred = current_cred();
int retval = 1;
if (grp != cred->egid)
retval = groups_search(cred->group_info, grp);
return retval;
}
EXPORT_SYMBOL(in_egroup_p);
DECLARE_RWSEM(uts_sem);
SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)

View File

@@ -27,6 +27,7 @@
#include <linux/security.h>
#include <linux/ctype.h>
#include <linux/utsname.h>
#include <linux/kmemcheck.h>
#include <linux/smp_lock.h>
#include <linux/fs.h>
#include <linux/init.h>
@@ -328,6 +329,14 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "timer_migration",
.data = &sysctl_timer_migration,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#endif
{
.ctl_name = CTL_UNNUMBERED,
@@ -959,6 +968,17 @@ static struct ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
#ifdef CONFIG_KMEMCHECK
{
.ctl_name = CTL_UNNUMBERED,
.procname = "kmemcheck",
.data = &kmemcheck_enabled,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#endif
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
@@ -1317,7 +1337,6 @@ static struct ctl_table vm_table[] = {
.extra2 = &one,
},
#endif
#ifdef CONFIG_UNEVICTABLE_LRU
{
.ctl_name = CTL_UNNUMBERED,
.procname = "scan_unevictable_pages",
@@ -1326,7 +1345,6 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = &scan_unevictable_handler,
},
#endif
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt

View File

@@ -18,6 +18,7 @@
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/sysdev.h>
#include <linux/tick.h>
/* The registered clock event devices */
static LIST_HEAD(clockevent_devices);
@@ -54,6 +55,7 @@ unsigned long clockevent_delta2ns(unsigned long latch,
return (unsigned long) clc;
}
EXPORT_SYMBOL_GPL(clockevent_delta2ns);
/**
* clockevents_set_mode - set the operating mode of a clock event device
@@ -187,6 +189,7 @@ void clockevents_register_device(struct clock_event_device *dev)
spin_unlock(&clockevents_lock);
}
EXPORT_SYMBOL_GPL(clockevents_register_device);
/*
* Noop handler when we shut down an event device
@@ -251,4 +254,15 @@ void clockevents_notify(unsigned long reason, void *arg)
spin_unlock(&clockevents_lock);
}
EXPORT_SYMBOL_GPL(clockevents_notify);
ktime_t clockevents_get_next_event(int cpu)
{
struct tick_device *td;
struct clock_event_device *dev;
td = &per_cpu(tick_cpu_device, cpu);
dev = td->evtdev;
return dev->next_event;
}
#endif

View File

@@ -509,6 +509,18 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
}
}
/*
* Check to make sure we don't switch to a non-highres capable
* clocksource if the tick code is in oneshot mode (highres or nohz)
*/
if (tick_oneshot_mode_active() &&
!(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) {
printk(KERN_WARNING "%s clocksource is not HRT compatible. "
"Cannot switch while in HRT/NOHZ mode\n", ovr->name);
ovr = NULL;
override_name[0] = 0;
}
/* Reselect, when the override name has changed */
if (ovr != clocksource_override) {
clocksource_override = ovr;
@@ -537,7 +549,13 @@ sysfs_show_available_clocksources(struct sys_device *dev,
spin_lock_irq(&clocksource_lock);
list_for_each_entry(src, &clocksource_list, list) {
count += snprintf(buf + count,
/*
* Don't show non-HRES clocksource if the tick code is
* in one shot mode (highres=on or nohz=on)
*/
if (!tick_oneshot_mode_active() ||
(src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
count += snprintf(buf + count,
max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
"%s ", src->name);
}

View File

@@ -27,7 +27,7 @@
* timer stops in C3 state.
*/
struct tick_device tick_broadcast_device;
static struct tick_device tick_broadcast_device;
/* FIXME: Use cpumask_var_t. */
static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
static DECLARE_BITMAP(tmpmask, NR_CPUS);

View File

@@ -128,6 +128,23 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
return 0;
}
/**
* tick_check_oneshot_mode - check whether the system is in oneshot mode
*
* returns 1 when either nohz or highres are enabled. otherwise 0.
*/
int tick_oneshot_mode_active(void)
{
unsigned long flags;
int ret;
local_irq_save(flags);
ret = __get_cpu_var(tick_cpu_device).mode == TICKDEV_MODE_ONESHOT;
local_irq_restore(flags);
return ret;
}
#ifdef CONFIG_HIGH_RES_TIMERS
/**
* tick_init_highres - switch to high resolution mode

View File

@@ -349,7 +349,7 @@ void tick_nohz_stop_sched_tick(int inidle)
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
hrtimer_start(&ts->sched_timer, expires,
HRTIMER_MODE_ABS);
HRTIMER_MODE_ABS_PINNED);
/* Check, if the timer was already in the past */
if (hrtimer_active(&ts->sched_timer))
goto out;
@@ -395,7 +395,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
hrtimer_start_expires(&ts->sched_timer,
HRTIMER_MODE_ABS);
HRTIMER_MODE_ABS_PINNED);
/* Check, if the timer was already in the past */
if (hrtimer_active(&ts->sched_timer))
break;
@@ -698,7 +698,8 @@ void tick_setup_sched_timer(void)
for (;;) {
hrtimer_forward(&ts->sched_timer, now, tick_period);
hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS);
hrtimer_start_expires(&ts->sched_timer,
HRTIMER_MODE_ABS_PINNED);
/* Check, if the timer was already in the past */
if (hrtimer_active(&ts->sched_timer))
break;

View File

@@ -38,6 +38,7 @@
#include <linux/tick.h>
#include <linux/kallsyms.h>
#include <linux/perf_counter.h>
#include <linux/sched.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -605,13 +606,12 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
}
static inline int
__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
__mod_timer(struct timer_list *timer, unsigned long expires,
bool pending_only, int pinned)
{
struct tvec_base *base, *new_base;
unsigned long flags;
int ret;
ret = 0;
int ret = 0 , cpu;
timer_stats_timer_set_start_info(timer);
BUG_ON(!timer->function);
@@ -630,6 +630,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
new_base = __get_cpu_var(tvec_bases);
cpu = smp_processor_id();
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
int preferred_cpu = get_nohz_load_balancer();
if (preferred_cpu >= 0)
cpu = preferred_cpu;
}
#endif
new_base = per_cpu(tvec_bases, cpu);
if (base != new_base) {
/*
* We are trying to schedule the timer on the local CPU.
@@ -669,7 +681,7 @@ out_unlock:
*/
int mod_timer_pending(struct timer_list *timer, unsigned long expires)
{
return __mod_timer(timer, expires, true);
return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
}
EXPORT_SYMBOL(mod_timer_pending);
@@ -703,10 +715,32 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
if (timer->expires == expires && timer_pending(timer))
return 1;
return __mod_timer(timer, expires, false);
return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
}
EXPORT_SYMBOL(mod_timer);
/**
* mod_timer_pinned - modify a timer's timeout
* @timer: the timer to be modified
* @expires: new timeout in jiffies
*
* mod_timer_pinned() is a way to update the expire field of an
* active timer (if the timer is inactive it will be activated)
* and not allow the timer to be migrated to a different CPU.
*
* mod_timer_pinned(timer, expires) is equivalent to:
*
* del_timer(timer); timer->expires = expires; add_timer(timer);
*/
int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
{
if (timer->expires == expires && timer_pending(timer))
return 1;
return __mod_timer(timer, expires, false, TIMER_PINNED);
}
EXPORT_SYMBOL(mod_timer_pinned);
/**
* add_timer - start a timer
* @timer: the timer to be added
@@ -757,6 +791,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
wake_up_idle_cpu(cpu);
spin_unlock_irqrestore(&base->lock, flags);
}
EXPORT_SYMBOL_GPL(add_timer_on);
/**
* del_timer - deactive a timer.
@@ -1016,6 +1051,9 @@ cascade:
index = slot = timer_jiffies & TVN_MASK;
do {
list_for_each_entry(nte, varp->vec + slot, entry) {
if (tbase_get_deferrable(nte->base))
continue;
found = 1;
if (time_before(nte->expires, expires))
expires = nte->expires;
@@ -1306,7 +1344,7 @@ signed long __sched schedule_timeout(signed long timeout)
expire = timeout + jiffies;
setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
__mod_timer(&timer, expire, false);
__mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
schedule();
del_singleshot_timer_sync(&timer);

View File

@@ -147,7 +147,7 @@ config IRQSOFF_TRACER
disabled by default and can be runtime (re-)started
via:
echo 0 > /debugfs/tracing/tracing_max_latency
echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
(Note that kernel size and overhead increases with this option
enabled. This option and the preempt-off timing option can be
@@ -168,7 +168,7 @@ config PREEMPT_TRACER
disabled by default and can be runtime (re-)started
via:
echo 0 > /debugfs/tracing/tracing_max_latency
echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
(Note that kernel size and overhead increases with this option
enabled. This option and the irqs-off timing option can be
@@ -261,7 +261,7 @@ config PROFILE_ANNOTATED_BRANCHES
This tracer profiles all the the likely and unlikely macros
in the kernel. It will display the results in:
/debugfs/tracing/profile_annotated_branch
/sys/kernel/debug/tracing/profile_annotated_branch
Note: this will add a significant overhead, only turn this
on if you need to profile the system's use of these macros.
@@ -274,7 +274,7 @@ config PROFILE_ALL_BRANCHES
taken in the kernel is recorded whether it hit or miss.
The results will be displayed in:
/debugfs/tracing/profile_branch
/sys/kernel/debug/tracing/profile_branch
This option also enables the likely/unlikely profiler.
@@ -323,7 +323,7 @@ config STACK_TRACER
select KALLSYMS
help
This special tracer records the maximum stack footprint of the
kernel and displays it in debugfs/tracing/stack_trace.
kernel and displays it in /sys/kernel/debug/tracing/stack_trace.
This tracer works by hooking into every function call that the
kernel executes, and keeping a maximum stack depth value and

View File

@@ -10,6 +10,7 @@
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <linux/kmemcheck.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/mutex.h>
@@ -1270,6 +1271,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
if (tail < BUF_PAGE_SIZE) {
/* Mark the rest of the page with padding */
event = __rb_page_index(tail_page, tail);
kmemcheck_annotate_bitfield(event, bitfield);
rb_event_set_padding(event);
}
@@ -1327,6 +1329,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
return NULL;
event = __rb_page_index(tail_page, tail);
kmemcheck_annotate_bitfield(event, bitfield);
rb_update_event(event, type, length);
/* The passed in type is zero for DATA */

View File

@@ -344,7 +344,7 @@ static raw_spinlock_t ftrace_max_lock =
/*
* Copy the new maximum trace into the separate maximum-trace
* structure. (this way the maximum trace is permanently saved,
* for later retrieval via /debugfs/tracing/latency_trace)
* for later retrieval via /sys/kernel/debug/tracing/latency_trace)
*/
static void
__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
@@ -2414,21 +2414,20 @@ static const struct file_operations tracing_iter_fops = {
static const char readme_msg[] =
"tracing mini-HOWTO:\n\n"
"# mkdir /debug\n"
"# mount -t debugfs nodev /debug\n\n"
"# cat /debug/tracing/available_tracers\n"
"# mount -t debugfs nodev /sys/kernel/debug\n\n"
"# cat /sys/kernel/debug/tracing/available_tracers\n"
"wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
"# cat /debug/tracing/current_tracer\n"
"# cat /sys/kernel/debug/tracing/current_tracer\n"
"nop\n"
"# echo sched_switch > /debug/tracing/current_tracer\n"
"# cat /debug/tracing/current_tracer\n"
"# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n"
"# cat /sys/kernel/debug/tracing/current_tracer\n"
"sched_switch\n"
"# cat /debug/tracing/trace_options\n"
"# cat /sys/kernel/debug/tracing/trace_options\n"
"noprint-parent nosym-offset nosym-addr noverbose\n"
"# echo print-parent > /debug/tracing/trace_options\n"
"# echo 1 > /debug/tracing/tracing_enabled\n"
"# cat /debug/tracing/trace > /tmp/trace.txt\n"
"# echo 0 > /debug/tracing/tracing_enabled\n"
"# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
"# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n"
"# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n"
"# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n"
;
static ssize_t

View File

@@ -203,7 +203,8 @@ static void start_stack_timer(void *unused)
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = stack_trace_timer_fn;
hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
HRTIMER_MODE_REL_PINNED);
}
static void start_stack_timers(void)

View File

@@ -75,21 +75,6 @@ static void uid_hash_remove(struct user_struct *up)
put_user_ns(up->user_ns);
}
static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
{
struct user_struct *user;
struct hlist_node *h;
hlist_for_each_entry(user, h, hashent, uidhash_node) {
if (user->uid == uid) {
atomic_inc(&user->__count);
return user;
}
}
return NULL;
}
#ifdef CONFIG_USER_SCHED
static void sched_destroy_user(struct user_struct *up)
@@ -119,6 +104,23 @@ static int sched_create_user(struct user_struct *up) { return 0; }
#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS)
static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
{
struct user_struct *user;
struct hlist_node *h;
hlist_for_each_entry(user, h, hashent, uidhash_node) {
if (user->uid == uid) {
/* possibly resurrect an "almost deleted" object */
if (atomic_inc_return(&user->__count) == 1)
cancel_delayed_work(&user->work);
return user;
}
}
return NULL;
}
static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */
static DEFINE_MUTEX(uids_mutex);
@@ -283,12 +285,12 @@ int __init uids_sysfs_init(void)
return uids_user_create(&root_user);
}
/* work function to remove sysfs directory for a user and free up
/* delayed work function to remove sysfs directory for a user and free up
* corresponding structures.
*/
static void cleanup_user_struct(struct work_struct *w)
{
struct user_struct *up = container_of(w, struct user_struct, work);
struct user_struct *up = container_of(w, struct user_struct, work.work);
unsigned long flags;
int remove_user = 0;
@@ -297,15 +299,12 @@ static void cleanup_user_struct(struct work_struct *w)
*/
uids_mutex_lock();
local_irq_save(flags);
if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
spin_lock_irqsave(&uidhash_lock, flags);
if (atomic_read(&up->__count) == 0) {
uid_hash_remove(up);
remove_user = 1;
spin_unlock_irqrestore(&uidhash_lock, flags);
} else {
local_irq_restore(flags);
}
spin_unlock_irqrestore(&uidhash_lock, flags);
if (!remove_user)
goto done;
@@ -331,16 +330,28 @@ done:
*/
static void free_user(struct user_struct *up, unsigned long flags)
{
/* restore back the count */
atomic_inc(&up->__count);
spin_unlock_irqrestore(&uidhash_lock, flags);
INIT_WORK(&up->work, cleanup_user_struct);
schedule_work(&up->work);
INIT_DELAYED_WORK(&up->work, cleanup_user_struct);
schedule_delayed_work(&up->work, msecs_to_jiffies(1000));
}
#else /* CONFIG_USER_SCHED && CONFIG_SYSFS */
static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
{
struct user_struct *user;
struct hlist_node *h;
hlist_for_each_entry(user, h, hashent, uidhash_node) {
if (user->uid == uid) {
atomic_inc(&user->__count);
return user;
}
}
return NULL;
}
int uids_sysfs_init(void) { return 0; }
static inline int uids_user_create(struct user_struct *up) { return 0; }
static inline void uids_mutex_lock(void) { }