Merge branch 'linux-2.6'
This commit is contained in:
@@ -54,3 +54,5 @@ config HZ
|
||||
default 300 if HZ_300
|
||||
default 1000 if HZ_1000
|
||||
|
||||
config SCHED_HRTICK
|
||||
def_bool HIGH_RES_TIMERS && X86
|
||||
|
||||
@@ -52,14 +52,13 @@ config PREEMPT
|
||||
|
||||
endchoice
|
||||
|
||||
config PREEMPT_BKL
|
||||
bool "Preempt The Big Kernel Lock"
|
||||
depends on SMP || PREEMPT
|
||||
config RCU_TRACE
|
||||
bool "Enable tracing for RCU - currently stats in debugfs"
|
||||
select DEBUG_FS
|
||||
default y
|
||||
help
|
||||
This option reduces the latency of the kernel by making the
|
||||
big kernel lock preemptible.
|
||||
This option provides tracing in RCU which presents stats
|
||||
in debugfs for debugging RCU implementation.
|
||||
|
||||
Say Y here if you are building a kernel for a desktop system.
|
||||
Say Y here if you want to enable RCU tracing
|
||||
Say N if you are unsure.
|
||||
|
||||
|
||||
@@ -36,6 +36,7 @@ obj-$(CONFIG_KALLSYMS) += kallsyms.o
|
||||
obj-$(CONFIG_PM) += power/
|
||||
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
|
||||
obj-$(CONFIG_KEXEC) += kexec.o
|
||||
obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
|
||||
obj-$(CONFIG_COMPAT) += compat.o
|
||||
obj-$(CONFIG_CGROUPS) += cgroup.o
|
||||
obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
|
||||
@@ -43,6 +44,7 @@ obj-$(CONFIG_CPUSETS) += cpuset.o
|
||||
obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
|
||||
obj-$(CONFIG_IKCONFIG) += configs.o
|
||||
obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
|
||||
obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
|
||||
obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
|
||||
obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
|
||||
obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
|
||||
@@ -52,11 +54,17 @@ obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
|
||||
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
|
||||
obj-$(CONFIG_SECCOMP) += seccomp.o
|
||||
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
|
||||
obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o
|
||||
obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
|
||||
ifeq ($(CONFIG_PREEMPT_RCU),y)
|
||||
obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o
|
||||
endif
|
||||
obj-$(CONFIG_RELAY) += relay.o
|
||||
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
|
||||
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
|
||||
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
|
||||
obj-$(CONFIG_MARKERS) += marker.o
|
||||
obj-$(CONFIG_LATENCYTOP) += latencytop.o
|
||||
|
||||
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
|
||||
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
|
||||
|
||||
48
kernel/backtracetest.c
Normal file
48
kernel/backtracetest.c
Normal file
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Simple stack backtrace regression test module
|
||||
*
|
||||
* (C) Copyright 2008 Intel Corporation
|
||||
* Author: Arjan van de Ven <arjan@linux.intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/delay.h>
|
||||
|
||||
static struct timer_list backtrace_timer;
|
||||
|
||||
static void backtrace_test_timer(unsigned long data)
|
||||
{
|
||||
printk("Testing a backtrace from irq context.\n");
|
||||
printk("The following trace is a kernel self test and not a bug!\n");
|
||||
dump_stack();
|
||||
}
|
||||
static int backtrace_regression_test(void)
|
||||
{
|
||||
printk("====[ backtrace testing ]===========\n");
|
||||
printk("Testing a backtrace from process context.\n");
|
||||
printk("The following trace is a kernel self test and not a bug!\n");
|
||||
dump_stack();
|
||||
|
||||
init_timer(&backtrace_timer);
|
||||
backtrace_timer.function = backtrace_test_timer;
|
||||
mod_timer(&backtrace_timer, jiffies + 10);
|
||||
|
||||
msleep(10);
|
||||
printk("====[ end of backtrace testing ]====\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void exitf(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_init(backtrace_regression_test);
|
||||
module_exit(exitf);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
|
||||
166
kernel/cpu.c
166
kernel/cpu.c
@@ -15,9 +15,8 @@
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
/* This protects CPUs going up and down... */
|
||||
/* Serializes the updates to cpu_online_map, cpu_present_map */
|
||||
static DEFINE_MUTEX(cpu_add_remove_lock);
|
||||
static DEFINE_MUTEX(cpu_bitmask_lock);
|
||||
|
||||
static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
|
||||
|
||||
@@ -26,52 +25,123 @@ static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
|
||||
*/
|
||||
static int cpu_hotplug_disabled;
|
||||
|
||||
static struct {
|
||||
struct task_struct *active_writer;
|
||||
struct mutex lock; /* Synchronizes accesses to refcount, */
|
||||
/*
|
||||
* Also blocks the new readers during
|
||||
* an ongoing cpu hotplug operation.
|
||||
*/
|
||||
int refcount;
|
||||
wait_queue_head_t writer_queue;
|
||||
} cpu_hotplug;
|
||||
|
||||
#define writer_exists() (cpu_hotplug.active_writer != NULL)
|
||||
|
||||
void __init cpu_hotplug_init(void)
|
||||
{
|
||||
cpu_hotplug.active_writer = NULL;
|
||||
mutex_init(&cpu_hotplug.lock);
|
||||
cpu_hotplug.refcount = 0;
|
||||
init_waitqueue_head(&cpu_hotplug.writer_queue);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
||||
/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
|
||||
static struct task_struct *recursive;
|
||||
static int recursive_depth;
|
||||
|
||||
void lock_cpu_hotplug(void)
|
||||
void get_online_cpus(void)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
if (tsk == recursive) {
|
||||
static int warnings = 10;
|
||||
if (warnings) {
|
||||
printk(KERN_ERR "Lukewarm IQ detected in hotplug locking\n");
|
||||
WARN_ON(1);
|
||||
warnings--;
|
||||
}
|
||||
recursive_depth++;
|
||||
might_sleep();
|
||||
if (cpu_hotplug.active_writer == current)
|
||||
return;
|
||||
}
|
||||
mutex_lock(&cpu_bitmask_lock);
|
||||
recursive = tsk;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(lock_cpu_hotplug);
|
||||
mutex_lock(&cpu_hotplug.lock);
|
||||
cpu_hotplug.refcount++;
|
||||
mutex_unlock(&cpu_hotplug.lock);
|
||||
|
||||
void unlock_cpu_hotplug(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_online_cpus);
|
||||
|
||||
void put_online_cpus(void)
|
||||
{
|
||||
WARN_ON(recursive != current);
|
||||
if (recursive_depth) {
|
||||
recursive_depth--;
|
||||
if (cpu_hotplug.active_writer == current)
|
||||
return;
|
||||
}
|
||||
recursive = NULL;
|
||||
mutex_unlock(&cpu_bitmask_lock);
|
||||
mutex_lock(&cpu_hotplug.lock);
|
||||
cpu_hotplug.refcount--;
|
||||
|
||||
if (unlikely(writer_exists()) && !cpu_hotplug.refcount)
|
||||
wake_up(&cpu_hotplug.writer_queue);
|
||||
|
||||
mutex_unlock(&cpu_hotplug.lock);
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
|
||||
EXPORT_SYMBOL_GPL(put_online_cpus);
|
||||
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
/*
|
||||
* The following two API's must be used when attempting
|
||||
* to serialize the updates to cpu_online_map, cpu_present_map.
|
||||
*/
|
||||
void cpu_maps_update_begin(void)
|
||||
{
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
}
|
||||
|
||||
void cpu_maps_update_done(void)
|
||||
{
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* This ensures that the hotplug operation can begin only when the
|
||||
* refcount goes to zero.
|
||||
*
|
||||
* Note that during a cpu-hotplug operation, the new readers, if any,
|
||||
* will be blocked by the cpu_hotplug.lock
|
||||
*
|
||||
* Since cpu_maps_update_begin is always called after invoking
|
||||
* cpu_maps_update_begin, we can be sure that only one writer is active.
|
||||
*
|
||||
* Note that theoretically, there is a possibility of a livelock:
|
||||
* - Refcount goes to zero, last reader wakes up the sleeping
|
||||
* writer.
|
||||
* - Last reader unlocks the cpu_hotplug.lock.
|
||||
* - A new reader arrives at this moment, bumps up the refcount.
|
||||
* - The writer acquires the cpu_hotplug.lock finds the refcount
|
||||
* non zero and goes to sleep again.
|
||||
*
|
||||
* However, this is very difficult to achieve in practice since
|
||||
* get_online_cpus() not an api which is called all that often.
|
||||
*
|
||||
*/
|
||||
static void cpu_hotplug_begin(void)
|
||||
{
|
||||
DECLARE_WAITQUEUE(wait, current);
|
||||
|
||||
mutex_lock(&cpu_hotplug.lock);
|
||||
|
||||
cpu_hotplug.active_writer = current;
|
||||
add_wait_queue_exclusive(&cpu_hotplug.writer_queue, &wait);
|
||||
while (cpu_hotplug.refcount) {
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
mutex_unlock(&cpu_hotplug.lock);
|
||||
schedule();
|
||||
mutex_lock(&cpu_hotplug.lock);
|
||||
}
|
||||
remove_wait_queue_locked(&cpu_hotplug.writer_queue, &wait);
|
||||
}
|
||||
|
||||
static void cpu_hotplug_done(void)
|
||||
{
|
||||
cpu_hotplug.active_writer = NULL;
|
||||
mutex_unlock(&cpu_hotplug.lock);
|
||||
}
|
||||
/* Need to know about CPUs going up/down? */
|
||||
int __cpuinit register_cpu_notifier(struct notifier_block *nb)
|
||||
{
|
||||
int ret;
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_begin();
|
||||
ret = raw_notifier_chain_register(&cpu_chain, nb);
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_done();
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -81,9 +151,9 @@ EXPORT_SYMBOL(register_cpu_notifier);
|
||||
|
||||
void unregister_cpu_notifier(struct notifier_block *nb)
|
||||
{
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_begin();
|
||||
raw_notifier_chain_unregister(&cpu_chain, nb);
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_done();
|
||||
}
|
||||
EXPORT_SYMBOL(unregister_cpu_notifier);
|
||||
|
||||
@@ -147,7 +217,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
|
||||
if (!cpu_online(cpu))
|
||||
return -EINVAL;
|
||||
|
||||
raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu);
|
||||
cpu_hotplug_begin();
|
||||
err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
|
||||
hcpu, -1, &nr_calls);
|
||||
if (err == NOTIFY_BAD) {
|
||||
@@ -166,9 +236,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
|
||||
cpu_clear(cpu, tmp);
|
||||
set_cpus_allowed(current, tmp);
|
||||
|
||||
mutex_lock(&cpu_bitmask_lock);
|
||||
p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
|
||||
mutex_unlock(&cpu_bitmask_lock);
|
||||
|
||||
if (IS_ERR(p) || cpu_online(cpu)) {
|
||||
/* CPU didn't die: tell everyone. Can't complain. */
|
||||
@@ -202,7 +270,7 @@ out_thread:
|
||||
out_allowed:
|
||||
set_cpus_allowed(current, old_allowed);
|
||||
out_release:
|
||||
raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu);
|
||||
cpu_hotplug_done();
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -210,13 +278,13 @@ int cpu_down(unsigned int cpu)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_begin();
|
||||
if (cpu_hotplug_disabled)
|
||||
err = -EBUSY;
|
||||
else
|
||||
err = _cpu_down(cpu, 0);
|
||||
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_done();
|
||||
return err;
|
||||
}
|
||||
#endif /*CONFIG_HOTPLUG_CPU*/
|
||||
@@ -231,7 +299,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
|
||||
if (cpu_online(cpu) || !cpu_present(cpu))
|
||||
return -EINVAL;
|
||||
|
||||
raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu);
|
||||
cpu_hotplug_begin();
|
||||
ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
|
||||
-1, &nr_calls);
|
||||
if (ret == NOTIFY_BAD) {
|
||||
@@ -243,9 +311,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
|
||||
}
|
||||
|
||||
/* Arch-specific enabling code. */
|
||||
mutex_lock(&cpu_bitmask_lock);
|
||||
ret = __cpu_up(cpu);
|
||||
mutex_unlock(&cpu_bitmask_lock);
|
||||
if (ret != 0)
|
||||
goto out_notify;
|
||||
BUG_ON(!cpu_online(cpu));
|
||||
@@ -257,7 +323,7 @@ out_notify:
|
||||
if (ret != 0)
|
||||
__raw_notifier_call_chain(&cpu_chain,
|
||||
CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
|
||||
raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu);
|
||||
cpu_hotplug_done();
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -275,13 +341,13 @@ int __cpuinit cpu_up(unsigned int cpu)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_begin();
|
||||
if (cpu_hotplug_disabled)
|
||||
err = -EBUSY;
|
||||
else
|
||||
err = _cpu_up(cpu, 0);
|
||||
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_done();
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -292,7 +358,7 @@ int disable_nonboot_cpus(void)
|
||||
{
|
||||
int cpu, first_cpu, error = 0;
|
||||
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_begin();
|
||||
first_cpu = first_cpu(cpu_online_map);
|
||||
/* We take down all of the non-boot CPUs in one shot to avoid races
|
||||
* with the userspace trying to use the CPU hotplug at the same time
|
||||
@@ -319,7 +385,7 @@ int disable_nonboot_cpus(void)
|
||||
} else {
|
||||
printk(KERN_ERR "Non-boot CPUs are not disabled\n");
|
||||
}
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_done();
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -328,7 +394,7 @@ void enable_nonboot_cpus(void)
|
||||
int cpu, error;
|
||||
|
||||
/* Allow everyone to use the CPU hotplug again */
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_begin();
|
||||
cpu_hotplug_disabled = 0;
|
||||
if (cpus_empty(frozen_cpus))
|
||||
goto out;
|
||||
@@ -344,6 +410,6 @@ void enable_nonboot_cpus(void)
|
||||
}
|
||||
cpus_clear(frozen_cpus);
|
||||
out:
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_done();
|
||||
}
|
||||
#endif /* CONFIG_PM_SLEEP_SMP */
|
||||
|
||||
@@ -537,10 +537,10 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
|
||||
*
|
||||
* Call with cgroup_mutex held. May take callback_mutex during
|
||||
* call due to the kfifo_alloc() and kmalloc() calls. May nest
|
||||
* a call to the lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
|
||||
* a call to the get_online_cpus()/put_online_cpus() pair.
|
||||
* Must not be called holding callback_mutex, because we must not
|
||||
* call lock_cpu_hotplug() while holding callback_mutex. Elsewhere
|
||||
* the kernel nests callback_mutex inside lock_cpu_hotplug() calls.
|
||||
* call get_online_cpus() while holding callback_mutex. Elsewhere
|
||||
* the kernel nests callback_mutex inside get_online_cpus() calls.
|
||||
* So the reverse nesting would risk an ABBA deadlock.
|
||||
*
|
||||
* The three key local variables below are:
|
||||
@@ -691,9 +691,9 @@ restart:
|
||||
|
||||
rebuild:
|
||||
/* Have scheduler rebuild sched domains */
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
partition_sched_domains(ndoms, doms);
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
|
||||
done:
|
||||
if (q && !IS_ERR(q))
|
||||
@@ -1617,10 +1617,10 @@ static struct cgroup_subsys_state *cpuset_create(
|
||||
*
|
||||
* If the cpuset being removed has its flag 'sched_load_balance'
|
||||
* enabled, then simulate turning sched_load_balance off, which
|
||||
* will call rebuild_sched_domains(). The lock_cpu_hotplug()
|
||||
* will call rebuild_sched_domains(). The get_online_cpus()
|
||||
* call in rebuild_sched_domains() must not be made while holding
|
||||
* callback_mutex. Elsewhere the kernel nests callback_mutex inside
|
||||
* lock_cpu_hotplug() calls. So the reverse nesting would risk an
|
||||
* get_online_cpus() calls. So the reverse nesting would risk an
|
||||
* ABBA deadlock.
|
||||
*/
|
||||
|
||||
|
||||
@@ -46,7 +46,8 @@ int core_kernel_text(unsigned long addr)
|
||||
addr <= (unsigned long)_etext)
|
||||
return 1;
|
||||
|
||||
if (addr >= (unsigned long)_sinittext &&
|
||||
if (system_state == SYSTEM_BOOTING &&
|
||||
addr >= (unsigned long)_sinittext &&
|
||||
addr <= (unsigned long)_einittext)
|
||||
return 1;
|
||||
return 0;
|
||||
|
||||
@@ -51,6 +51,7 @@
|
||||
#include <linux/random.h>
|
||||
#include <linux/tty.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
@@ -392,6 +393,7 @@ void fastcall __mmdrop(struct mm_struct *mm)
|
||||
destroy_context(mm);
|
||||
free_mm(mm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__mmdrop);
|
||||
|
||||
/*
|
||||
* Decrement the use count and release all resources for an mm.
|
||||
@@ -791,6 +793,31 @@ out:
|
||||
return error;
|
||||
}
|
||||
|
||||
static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
|
||||
{
|
||||
#ifdef CONFIG_BLOCK
|
||||
struct io_context *ioc = current->io_context;
|
||||
|
||||
if (!ioc)
|
||||
return 0;
|
||||
/*
|
||||
* Share io context with parent, if CLONE_IO is set
|
||||
*/
|
||||
if (clone_flags & CLONE_IO) {
|
||||
tsk->io_context = ioc_task_link(ioc);
|
||||
if (unlikely(!tsk->io_context))
|
||||
return -ENOMEM;
|
||||
} else if (ioprio_valid(ioc->ioprio)) {
|
||||
tsk->io_context = alloc_io_context(GFP_KERNEL, -1);
|
||||
if (unlikely(!tsk->io_context))
|
||||
return -ENOMEM;
|
||||
|
||||
tsk->io_context->ioprio = ioc->ioprio;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper to unshare the files of the current task.
|
||||
* We don't want to expose copy_files internals to
|
||||
@@ -1045,6 +1072,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
copy_flags(clone_flags, p);
|
||||
INIT_LIST_HEAD(&p->children);
|
||||
INIT_LIST_HEAD(&p->sibling);
|
||||
#ifdef CONFIG_PREEMPT_RCU
|
||||
p->rcu_read_lock_nesting = 0;
|
||||
p->rcu_flipctr_idx = 0;
|
||||
#endif /* #ifdef CONFIG_PREEMPT_RCU */
|
||||
p->vfork_done = NULL;
|
||||
spin_lock_init(&p->alloc_lock);
|
||||
|
||||
@@ -1059,6 +1090,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
p->prev_utime = cputime_zero;
|
||||
p->prev_stime = cputime_zero;
|
||||
|
||||
#ifdef CONFIG_DETECT_SOFTLOCKUP
|
||||
p->last_switch_count = 0;
|
||||
p->last_switch_timestamp = 0;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TASK_XACCT
|
||||
p->rchar = 0; /* I/O counter: bytes read */
|
||||
p->wchar = 0; /* I/O counter: bytes written */
|
||||
@@ -1147,15 +1183,17 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
goto bad_fork_cleanup_mm;
|
||||
if ((retval = copy_namespaces(clone_flags, p)))
|
||||
goto bad_fork_cleanup_keys;
|
||||
if ((retval = copy_io(clone_flags, p)))
|
||||
goto bad_fork_cleanup_namespaces;
|
||||
retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
|
||||
if (retval)
|
||||
goto bad_fork_cleanup_namespaces;
|
||||
goto bad_fork_cleanup_io;
|
||||
|
||||
if (pid != &init_struct_pid) {
|
||||
retval = -ENOMEM;
|
||||
pid = alloc_pid(task_active_pid_ns(p));
|
||||
if (!pid)
|
||||
goto bad_fork_cleanup_namespaces;
|
||||
goto bad_fork_cleanup_io;
|
||||
|
||||
if (clone_flags & CLONE_NEWPID) {
|
||||
retval = pid_ns_prepare_proc(task_active_pid_ns(p));
|
||||
@@ -1196,6 +1234,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
#ifdef TIF_SYSCALL_EMU
|
||||
clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
|
||||
#endif
|
||||
clear_all_latency_tracing(p);
|
||||
|
||||
/* Our parent execution domain becomes current domain
|
||||
These must match for thread signalling to apply */
|
||||
@@ -1224,9 +1263,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
/* Need tasklist lock for parent etc handling! */
|
||||
write_lock_irq(&tasklist_lock);
|
||||
|
||||
/* for sys_ioprio_set(IOPRIO_WHO_PGRP) */
|
||||
p->ioprio = current->ioprio;
|
||||
|
||||
/*
|
||||
* The task hasn't been attached yet, so its cpus_allowed mask will
|
||||
* not be changed, nor will its assigned CPU.
|
||||
@@ -1237,6 +1273,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
* parent's CPU). This avoids alot of nasty races.
|
||||
*/
|
||||
p->cpus_allowed = current->cpus_allowed;
|
||||
p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
|
||||
if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
|
||||
!cpu_online(task_cpu(p))))
|
||||
set_task_cpu(p, smp_processor_id());
|
||||
@@ -1317,6 +1354,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
bad_fork_free_pid:
|
||||
if (pid != &init_struct_pid)
|
||||
free_pid(pid);
|
||||
bad_fork_cleanup_io:
|
||||
put_io_context(p->io_context);
|
||||
bad_fork_cleanup_namespaces:
|
||||
exit_task_namespaces(p);
|
||||
bad_fork_cleanup_keys:
|
||||
|
||||
284
kernel/hrtimer.c
284
kernel/hrtimer.c
@@ -325,6 +325,22 @@ unsigned long ktime_divns(const ktime_t kt, s64 div)
|
||||
}
|
||||
#endif /* BITS_PER_LONG >= 64 */
|
||||
|
||||
/*
|
||||
* Check, whether the timer is on the callback pending list
|
||||
*/
|
||||
static inline int hrtimer_cb_pending(const struct hrtimer *timer)
|
||||
{
|
||||
return timer->state & HRTIMER_STATE_PENDING;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove a timer from the callback pending list
|
||||
*/
|
||||
static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
|
||||
{
|
||||
list_del_init(&timer->cb_entry);
|
||||
}
|
||||
|
||||
/* High resolution timer related functions */
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
|
||||
@@ -493,22 +509,6 @@ void hres_timers_resume(void)
|
||||
retrigger_next_event(NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check, whether the timer is on the callback pending list
|
||||
*/
|
||||
static inline int hrtimer_cb_pending(const struct hrtimer *timer)
|
||||
{
|
||||
return timer->state & HRTIMER_STATE_PENDING;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove a timer from the callback pending list
|
||||
*/
|
||||
static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
|
||||
{
|
||||
list_del_init(&timer->cb_entry);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the high resolution related parts of cpu_base
|
||||
*/
|
||||
@@ -516,7 +516,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
|
||||
{
|
||||
base->expires_next.tv64 = KTIME_MAX;
|
||||
base->hres_active = 0;
|
||||
INIT_LIST_HEAD(&base->cb_pending);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -524,7 +523,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
|
||||
*/
|
||||
static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
|
||||
{
|
||||
INIT_LIST_HEAD(&timer->cb_entry);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -618,10 +616,13 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int hrtimer_cb_pending(struct hrtimer *timer) { return 0; }
|
||||
static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { }
|
||||
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
|
||||
static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
|
||||
static inline int hrtimer_reprogram(struct hrtimer *timer,
|
||||
struct hrtimer_clock_base *base)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_HIGH_RES_TIMERS */
|
||||
|
||||
@@ -1001,6 +1002,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
|
||||
clock_id = CLOCK_MONOTONIC;
|
||||
|
||||
timer->base = &cpu_base->clock_base[clock_id];
|
||||
INIT_LIST_HEAD(&timer->cb_entry);
|
||||
hrtimer_init_timer_hres(timer);
|
||||
|
||||
#ifdef CONFIG_TIMER_STATS
|
||||
@@ -1030,6 +1032,85 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hrtimer_get_res);
|
||||
|
||||
static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
|
||||
{
|
||||
spin_lock_irq(&cpu_base->lock);
|
||||
|
||||
while (!list_empty(&cpu_base->cb_pending)) {
|
||||
enum hrtimer_restart (*fn)(struct hrtimer *);
|
||||
struct hrtimer *timer;
|
||||
int restart;
|
||||
|
||||
timer = list_entry(cpu_base->cb_pending.next,
|
||||
struct hrtimer, cb_entry);
|
||||
|
||||
timer_stats_account_hrtimer(timer);
|
||||
|
||||
fn = timer->function;
|
||||
__remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
|
||||
spin_unlock_irq(&cpu_base->lock);
|
||||
|
||||
restart = fn(timer);
|
||||
|
||||
spin_lock_irq(&cpu_base->lock);
|
||||
|
||||
timer->state &= ~HRTIMER_STATE_CALLBACK;
|
||||
if (restart == HRTIMER_RESTART) {
|
||||
BUG_ON(hrtimer_active(timer));
|
||||
/*
|
||||
* Enqueue the timer, allow reprogramming of the event
|
||||
* device
|
||||
*/
|
||||
enqueue_hrtimer(timer, timer->base, 1);
|
||||
} else if (hrtimer_active(timer)) {
|
||||
/*
|
||||
* If the timer was rearmed on another CPU, reprogram
|
||||
* the event device.
|
||||
*/
|
||||
if (timer->base->first == &timer->node)
|
||||
hrtimer_reprogram(timer, timer->base);
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&cpu_base->lock);
|
||||
}
|
||||
|
||||
static void __run_hrtimer(struct hrtimer *timer)
|
||||
{
|
||||
struct hrtimer_clock_base *base = timer->base;
|
||||
struct hrtimer_cpu_base *cpu_base = base->cpu_base;
|
||||
enum hrtimer_restart (*fn)(struct hrtimer *);
|
||||
int restart;
|
||||
|
||||
__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
|
||||
timer_stats_account_hrtimer(timer);
|
||||
|
||||
fn = timer->function;
|
||||
if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
|
||||
/*
|
||||
* Used for scheduler timers, avoid lock inversion with
|
||||
* rq->lock and tasklist_lock.
|
||||
*
|
||||
* These timers are required to deal with enqueue expiry
|
||||
* themselves and are not allowed to migrate.
|
||||
*/
|
||||
spin_unlock(&cpu_base->lock);
|
||||
restart = fn(timer);
|
||||
spin_lock(&cpu_base->lock);
|
||||
} else
|
||||
restart = fn(timer);
|
||||
|
||||
/*
|
||||
* Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid
|
||||
* reprogramming of the event hardware. This happens at the end of this
|
||||
* function anyway.
|
||||
*/
|
||||
if (restart != HRTIMER_NORESTART) {
|
||||
BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
|
||||
enqueue_hrtimer(timer, base, 0);
|
||||
}
|
||||
timer->state &= ~HRTIMER_STATE_CALLBACK;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
|
||||
/*
|
||||
@@ -1087,21 +1168,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
|
||||
continue;
|
||||
}
|
||||
|
||||
__remove_hrtimer(timer, base,
|
||||
HRTIMER_STATE_CALLBACK, 0);
|
||||
timer_stats_account_hrtimer(timer);
|
||||
|
||||
/*
|
||||
* Note: We clear the CALLBACK bit after
|
||||
* enqueue_hrtimer to avoid reprogramming of
|
||||
* the event hardware. This happens at the end
|
||||
* of this function anyway.
|
||||
*/
|
||||
if (timer->function(timer) != HRTIMER_NORESTART) {
|
||||
BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
|
||||
enqueue_hrtimer(timer, base, 0);
|
||||
}
|
||||
timer->state &= ~HRTIMER_STATE_CALLBACK;
|
||||
__run_hrtimer(timer);
|
||||
}
|
||||
spin_unlock(&cpu_base->lock);
|
||||
base++;
|
||||
@@ -1122,98 +1189,11 @@ void hrtimer_interrupt(struct clock_event_device *dev)
|
||||
|
||||
static void run_hrtimer_softirq(struct softirq_action *h)
|
||||
{
|
||||
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
|
||||
|
||||
spin_lock_irq(&cpu_base->lock);
|
||||
|
||||
while (!list_empty(&cpu_base->cb_pending)) {
|
||||
enum hrtimer_restart (*fn)(struct hrtimer *);
|
||||
struct hrtimer *timer;
|
||||
int restart;
|
||||
|
||||
timer = list_entry(cpu_base->cb_pending.next,
|
||||
struct hrtimer, cb_entry);
|
||||
|
||||
timer_stats_account_hrtimer(timer);
|
||||
|
||||
fn = timer->function;
|
||||
__remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
|
||||
spin_unlock_irq(&cpu_base->lock);
|
||||
|
||||
restart = fn(timer);
|
||||
|
||||
spin_lock_irq(&cpu_base->lock);
|
||||
|
||||
timer->state &= ~HRTIMER_STATE_CALLBACK;
|
||||
if (restart == HRTIMER_RESTART) {
|
||||
BUG_ON(hrtimer_active(timer));
|
||||
/*
|
||||
* Enqueue the timer, allow reprogramming of the event
|
||||
* device
|
||||
*/
|
||||
enqueue_hrtimer(timer, timer->base, 1);
|
||||
} else if (hrtimer_active(timer)) {
|
||||
/*
|
||||
* If the timer was rearmed on another CPU, reprogram
|
||||
* the event device.
|
||||
*/
|
||||
if (timer->base->first == &timer->node)
|
||||
hrtimer_reprogram(timer, timer->base);
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&cpu_base->lock);
|
||||
run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
|
||||
}
|
||||
|
||||
#endif /* CONFIG_HIGH_RES_TIMERS */
|
||||
|
||||
/*
|
||||
* Expire the per base hrtimer-queue:
|
||||
*/
|
||||
static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
|
||||
int index)
|
||||
{
|
||||
struct rb_node *node;
|
||||
struct hrtimer_clock_base *base = &cpu_base->clock_base[index];
|
||||
|
||||
if (!base->first)
|
||||
return;
|
||||
|
||||
if (base->get_softirq_time)
|
||||
base->softirq_time = base->get_softirq_time();
|
||||
|
||||
spin_lock_irq(&cpu_base->lock);
|
||||
|
||||
while ((node = base->first)) {
|
||||
struct hrtimer *timer;
|
||||
enum hrtimer_restart (*fn)(struct hrtimer *);
|
||||
int restart;
|
||||
|
||||
timer = rb_entry(node, struct hrtimer, node);
|
||||
if (base->softirq_time.tv64 <= timer->expires.tv64)
|
||||
break;
|
||||
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
WARN_ON_ONCE(timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ);
|
||||
#endif
|
||||
timer_stats_account_hrtimer(timer);
|
||||
|
||||
fn = timer->function;
|
||||
__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
|
||||
spin_unlock_irq(&cpu_base->lock);
|
||||
|
||||
restart = fn(timer);
|
||||
|
||||
spin_lock_irq(&cpu_base->lock);
|
||||
|
||||
timer->state &= ~HRTIMER_STATE_CALLBACK;
|
||||
if (restart != HRTIMER_NORESTART) {
|
||||
BUG_ON(hrtimer_active(timer));
|
||||
enqueue_hrtimer(timer, base, 0);
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&cpu_base->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from timer softirq every jiffy, expire hrtimers:
|
||||
*
|
||||
@@ -1221,10 +1201,9 @@ static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
|
||||
* softirq context in case the hrtimer initialization failed or has
|
||||
* not been done yet.
|
||||
*/
|
||||
void hrtimer_run_queues(void)
|
||||
void hrtimer_run_pending(void)
|
||||
{
|
||||
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
|
||||
int i;
|
||||
|
||||
if (hrtimer_hres_active())
|
||||
return;
|
||||
@@ -1238,8 +1217,54 @@ void hrtimer_run_queues(void)
|
||||
* deadlock vs. xtime_lock.
|
||||
*/
|
||||
if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
|
||||
if (hrtimer_switch_to_hres())
|
||||
return;
|
||||
hrtimer_switch_to_hres();
|
||||
|
||||
run_hrtimer_pending(cpu_base);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from hardirq context every jiffy
|
||||
*/
|
||||
static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
|
||||
int index)
|
||||
{
|
||||
struct rb_node *node;
|
||||
struct hrtimer_clock_base *base = &cpu_base->clock_base[index];
|
||||
|
||||
if (!base->first)
|
||||
return;
|
||||
|
||||
if (base->get_softirq_time)
|
||||
base->softirq_time = base->get_softirq_time();
|
||||
|
||||
spin_lock(&cpu_base->lock);
|
||||
|
||||
while ((node = base->first)) {
|
||||
struct hrtimer *timer;
|
||||
|
||||
timer = rb_entry(node, struct hrtimer, node);
|
||||
if (base->softirq_time.tv64 <= timer->expires.tv64)
|
||||
break;
|
||||
|
||||
if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
|
||||
__remove_hrtimer(timer, base, HRTIMER_STATE_PENDING, 0);
|
||||
list_add_tail(&timer->cb_entry,
|
||||
&base->cpu_base->cb_pending);
|
||||
continue;
|
||||
}
|
||||
|
||||
__run_hrtimer(timer);
|
||||
}
|
||||
spin_unlock(&cpu_base->lock);
|
||||
}
|
||||
|
||||
void hrtimer_run_queues(void)
|
||||
{
|
||||
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
|
||||
int i;
|
||||
|
||||
if (hrtimer_hres_active())
|
||||
return;
|
||||
|
||||
hrtimer_get_softirq_time(cpu_base);
|
||||
|
||||
@@ -1268,7 +1293,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
|
||||
sl->timer.function = hrtimer_wakeup;
|
||||
sl->task = task;
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_RESTART;
|
||||
sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -1279,6 +1304,8 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
|
||||
do {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
hrtimer_start(&t->timer, t->timer.expires, mode);
|
||||
if (!hrtimer_active(&t->timer))
|
||||
t->task = NULL;
|
||||
|
||||
if (likely(t->task))
|
||||
schedule();
|
||||
@@ -1389,6 +1416,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
|
||||
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
|
||||
cpu_base->clock_base[i].cpu_base = cpu_base;
|
||||
|
||||
INIT_LIST_HEAD(&cpu_base->cb_pending);
|
||||
hrtimer_init_hres(cpu_base);
|
||||
}
|
||||
|
||||
|
||||
@@ -479,6 +479,9 @@ void free_irq(unsigned int irq, void *dev_id)
|
||||
return;
|
||||
}
|
||||
printk(KERN_ERR "Trying to free already-free IRQ %d\n", irq);
|
||||
#ifdef CONFIG_DEBUG_SHIRQ
|
||||
dump_stack();
|
||||
#endif
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -75,6 +75,18 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
|
||||
|
||||
#endif
|
||||
|
||||
static int irq_spurious_read(char *page, char **start, off_t off,
|
||||
int count, int *eof, void *data)
|
||||
{
|
||||
struct irq_desc *d = &irq_desc[(long) data];
|
||||
return sprintf(page, "count %u\n"
|
||||
"unhandled %u\n"
|
||||
"last_unhandled %u ms\n",
|
||||
d->irq_count,
|
||||
d->irqs_unhandled,
|
||||
jiffies_to_msecs(d->last_unhandled));
|
||||
}
|
||||
|
||||
#define MAX_NAMELEN 128
|
||||
|
||||
static int name_unique(unsigned int irq, struct irqaction *new_action)
|
||||
@@ -118,6 +130,7 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
|
||||
void register_irq_proc(unsigned int irq)
|
||||
{
|
||||
char name [MAX_NAMELEN];
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
if (!root_irq_dir ||
|
||||
(irq_desc[irq].chip == &no_irq_chip) ||
|
||||
@@ -132,8 +145,6 @@ void register_irq_proc(unsigned int irq)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
/* create /proc/irq/<irq>/smp_affinity */
|
||||
entry = create_proc_entry("smp_affinity", 0600, irq_desc[irq].dir);
|
||||
|
||||
@@ -144,6 +155,12 @@ void register_irq_proc(unsigned int irq)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
entry = create_proc_entry("spurious", 0444, irq_desc[irq].dir);
|
||||
if (entry) {
|
||||
entry->data = (void *)(long)irq;
|
||||
entry->read_proc = irq_spurious_read;
|
||||
}
|
||||
}
|
||||
|
||||
#undef MAX_NAMELEN
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/moduleparam.h>
|
||||
|
||||
static int irqfixup __read_mostly;
|
||||
|
||||
@@ -225,6 +226,8 @@ int noirqdebug_setup(char *str)
|
||||
}
|
||||
|
||||
__setup("noirqdebug", noirqdebug_setup);
|
||||
module_param(noirqdebug, bool, 0644);
|
||||
MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
|
||||
|
||||
static int __init irqfixup_setup(char *str)
|
||||
{
|
||||
@@ -236,6 +239,8 @@ static int __init irqfixup_setup(char *str)
|
||||
}
|
||||
|
||||
__setup("irqfixup", irqfixup_setup);
|
||||
module_param(irqfixup, int, 0644);
|
||||
MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode 2: irqpoll mode");
|
||||
|
||||
static int __init irqpoll_setup(char *str)
|
||||
{
|
||||
|
||||
@@ -233,10 +233,11 @@ static unsigned long get_symbol_pos(unsigned long addr,
|
||||
int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
|
||||
unsigned long *offset)
|
||||
{
|
||||
char namebuf[KSYM_NAME_LEN];
|
||||
if (is_ksym_addr(addr))
|
||||
return !!get_symbol_pos(addr, symbolsize, offset);
|
||||
|
||||
return !!module_address_lookup(addr, symbolsize, offset, NULL);
|
||||
return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -251,8 +252,6 @@ const char *kallsyms_lookup(unsigned long addr,
|
||||
unsigned long *offset,
|
||||
char **modname, char *namebuf)
|
||||
{
|
||||
const char *msym;
|
||||
|
||||
namebuf[KSYM_NAME_LEN - 1] = 0;
|
||||
namebuf[0] = 0;
|
||||
|
||||
@@ -268,10 +267,8 @@ const char *kallsyms_lookup(unsigned long addr,
|
||||
}
|
||||
|
||||
/* see if it's in a module */
|
||||
msym = module_address_lookup(addr, symbolsize, offset, modname);
|
||||
if (msym)
|
||||
return strncpy(namebuf, msym, KSYM_NAME_LEN - 1);
|
||||
|
||||
return module_address_lookup(addr, symbolsize, offset, modname,
|
||||
namebuf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
@@ -824,6 +824,8 @@ static int __init init_kprobes(void)
|
||||
if (!err)
|
||||
err = register_die_notifier(&kprobe_exceptions_nb);
|
||||
|
||||
if (!err)
|
||||
init_test_probes();
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@@ -17,30 +17,34 @@
|
||||
#include <linux/sched.h>
|
||||
|
||||
#define KERNEL_ATTR_RO(_name) \
|
||||
static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
|
||||
static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
|
||||
|
||||
#define KERNEL_ATTR_RW(_name) \
|
||||
static struct subsys_attribute _name##_attr = \
|
||||
static struct kobj_attribute _name##_attr = \
|
||||
__ATTR(_name, 0644, _name##_show, _name##_store)
|
||||
|
||||
#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
|
||||
/* current uevent sequence number */
|
||||
static ssize_t uevent_seqnum_show(struct kset *kset, char *page)
|
||||
static ssize_t uevent_seqnum_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(page, "%llu\n", (unsigned long long)uevent_seqnum);
|
||||
return sprintf(buf, "%llu\n", (unsigned long long)uevent_seqnum);
|
||||
}
|
||||
KERNEL_ATTR_RO(uevent_seqnum);
|
||||
|
||||
/* uevent helper program, used during early boo */
|
||||
static ssize_t uevent_helper_show(struct kset *kset, char *page)
|
||||
static ssize_t uevent_helper_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(page, "%s\n", uevent_helper);
|
||||
return sprintf(buf, "%s\n", uevent_helper);
|
||||
}
|
||||
static ssize_t uevent_helper_store(struct kset *kset, const char *page, size_t count)
|
||||
static ssize_t uevent_helper_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
if (count+1 > UEVENT_HELPER_PATH_LEN)
|
||||
return -ENOENT;
|
||||
memcpy(uevent_helper, page, count);
|
||||
memcpy(uevent_helper, buf, count);
|
||||
uevent_helper[count] = '\0';
|
||||
if (count && uevent_helper[count-1] == '\n')
|
||||
uevent_helper[count-1] = '\0';
|
||||
@@ -50,21 +54,24 @@ KERNEL_ATTR_RW(uevent_helper);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
static ssize_t kexec_loaded_show(struct kset *kset, char *page)
|
||||
static ssize_t kexec_loaded_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(page, "%d\n", !!kexec_image);
|
||||
return sprintf(buf, "%d\n", !!kexec_image);
|
||||
}
|
||||
KERNEL_ATTR_RO(kexec_loaded);
|
||||
|
||||
static ssize_t kexec_crash_loaded_show(struct kset *kset, char *page)
|
||||
static ssize_t kexec_crash_loaded_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(page, "%d\n", !!kexec_crash_image);
|
||||
return sprintf(buf, "%d\n", !!kexec_crash_image);
|
||||
}
|
||||
KERNEL_ATTR_RO(kexec_crash_loaded);
|
||||
|
||||
static ssize_t vmcoreinfo_show(struct kset *kset, char *page)
|
||||
static ssize_t vmcoreinfo_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(page, "%lx %x\n",
|
||||
return sprintf(buf, "%lx %x\n",
|
||||
paddr_vmcoreinfo_note(),
|
||||
(unsigned int)vmcoreinfo_max_size);
|
||||
}
|
||||
@@ -94,8 +101,8 @@ static struct bin_attribute notes_attr = {
|
||||
.read = ¬es_read,
|
||||
};
|
||||
|
||||
decl_subsys(kernel, NULL, NULL);
|
||||
EXPORT_SYMBOL_GPL(kernel_subsys);
|
||||
struct kobject *kernel_kobj;
|
||||
EXPORT_SYMBOL_GPL(kernel_kobj);
|
||||
|
||||
static struct attribute * kernel_attrs[] = {
|
||||
#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
|
||||
@@ -116,24 +123,39 @@ static struct attribute_group kernel_attr_group = {
|
||||
|
||||
static int __init ksysfs_init(void)
|
||||
{
|
||||
int error = subsystem_register(&kernel_subsys);
|
||||
if (!error)
|
||||
error = sysfs_create_group(&kernel_subsys.kobj,
|
||||
&kernel_attr_group);
|
||||
int error;
|
||||
|
||||
if (!error && notes_size > 0) {
|
||||
kernel_kobj = kobject_create_and_add("kernel", NULL);
|
||||
if (!kernel_kobj) {
|
||||
error = -ENOMEM;
|
||||
goto exit;
|
||||
}
|
||||
error = sysfs_create_group(kernel_kobj, &kernel_attr_group);
|
||||
if (error)
|
||||
goto kset_exit;
|
||||
|
||||
if (notes_size > 0) {
|
||||
notes_attr.size = notes_size;
|
||||
error = sysfs_create_bin_file(&kernel_subsys.kobj,
|
||||
¬es_attr);
|
||||
error = sysfs_create_bin_file(kernel_kobj, ¬es_attr);
|
||||
if (error)
|
||||
goto group_exit;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create "/sys/kernel/uids" directory and corresponding root user's
|
||||
* directory under it.
|
||||
*/
|
||||
if (!error)
|
||||
error = uids_kobject_init();
|
||||
/* create the /sys/kernel/uids/ directory */
|
||||
error = uids_sysfs_init();
|
||||
if (error)
|
||||
goto notes_exit;
|
||||
|
||||
return 0;
|
||||
|
||||
notes_exit:
|
||||
if (notes_size > 0)
|
||||
sysfs_remove_bin_file(kernel_kobj, ¬es_attr);
|
||||
group_exit:
|
||||
sysfs_remove_group(kernel_kobj, &kernel_attr_group);
|
||||
kset_exit:
|
||||
kobject_put(kernel_kobj);
|
||||
exit:
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
#include <linux/mutex.h>
|
||||
#include <asm/semaphore.h>
|
||||
|
||||
#define KTHREAD_NICE_LEVEL (-5)
|
||||
|
||||
static DEFINE_SPINLOCK(kthread_create_lock);
|
||||
static LIST_HEAD(kthread_create_list);
|
||||
struct task_struct *kthreadd_task;
|
||||
@@ -94,10 +96,18 @@ static void create_kthread(struct kthread_create_info *create)
|
||||
if (pid < 0) {
|
||||
create->result = ERR_PTR(pid);
|
||||
} else {
|
||||
struct sched_param param = { .sched_priority = 0 };
|
||||
wait_for_completion(&create->started);
|
||||
read_lock(&tasklist_lock);
|
||||
create->result = find_task_by_pid(pid);
|
||||
read_unlock(&tasklist_lock);
|
||||
/*
|
||||
* root may have changed our (kthreadd's) priority or CPU mask.
|
||||
* The kernel thread should not inherit these properties.
|
||||
*/
|
||||
sched_setscheduler(create->result, SCHED_NORMAL, ¶m);
|
||||
set_user_nice(create->result, KTHREAD_NICE_LEVEL);
|
||||
set_cpus_allowed(create->result, CPU_MASK_ALL);
|
||||
}
|
||||
complete(&create->done);
|
||||
}
|
||||
@@ -221,7 +231,7 @@ int kthreadd(void *unused)
|
||||
/* Setup a clean context for our children to inherit. */
|
||||
set_task_comm(tsk, "kthreadd");
|
||||
ignore_signals(tsk);
|
||||
set_user_nice(tsk, -5);
|
||||
set_user_nice(tsk, KTHREAD_NICE_LEVEL);
|
||||
set_cpus_allowed(tsk, CPU_MASK_ALL);
|
||||
|
||||
current->flags |= PF_NOFREEZE;
|
||||
|
||||
239
kernel/latencytop.c
Normal file
239
kernel/latencytop.c
Normal file
@@ -0,0 +1,239 @@
|
||||
/*
|
||||
* latencytop.c: Latency display infrastructure
|
||||
*
|
||||
* (C) Copyright 2008 Intel Corporation
|
||||
* Author: Arjan van de Ven <arjan@linux.intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*/
|
||||
#include <linux/latencytop.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/stacktrace.h>
|
||||
|
||||
static DEFINE_SPINLOCK(latency_lock);
|
||||
|
||||
#define MAXLR 128
|
||||
static struct latency_record latency_record[MAXLR];
|
||||
|
||||
int latencytop_enabled;
|
||||
|
||||
void clear_all_latency_tracing(struct task_struct *p)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!latencytop_enabled)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&latency_lock, flags);
|
||||
memset(&p->latency_record, 0, sizeof(p->latency_record));
|
||||
p->latency_record_count = 0;
|
||||
spin_unlock_irqrestore(&latency_lock, flags);
|
||||
}
|
||||
|
||||
static void clear_global_latency_tracing(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&latency_lock, flags);
|
||||
memset(&latency_record, 0, sizeof(latency_record));
|
||||
spin_unlock_irqrestore(&latency_lock, flags);
|
||||
}
|
||||
|
||||
static void __sched
|
||||
account_global_scheduler_latency(struct task_struct *tsk, struct latency_record *lat)
|
||||
{
|
||||
int firstnonnull = MAXLR + 1;
|
||||
int i;
|
||||
|
||||
if (!latencytop_enabled)
|
||||
return;
|
||||
|
||||
/* skip kernel threads for now */
|
||||
if (!tsk->mm)
|
||||
return;
|
||||
|
||||
for (i = 0; i < MAXLR; i++) {
|
||||
int q;
|
||||
int same = 1;
|
||||
/* Nothing stored: */
|
||||
if (!latency_record[i].backtrace[0]) {
|
||||
if (firstnonnull > i)
|
||||
firstnonnull = i;
|
||||
continue;
|
||||
}
|
||||
for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
|
||||
if (latency_record[i].backtrace[q] !=
|
||||
lat->backtrace[q])
|
||||
same = 0;
|
||||
if (same && lat->backtrace[q] == 0)
|
||||
break;
|
||||
if (same && lat->backtrace[q] == ULONG_MAX)
|
||||
break;
|
||||
}
|
||||
if (same) {
|
||||
latency_record[i].count++;
|
||||
latency_record[i].time += lat->time;
|
||||
if (lat->time > latency_record[i].max)
|
||||
latency_record[i].max = lat->time;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
i = firstnonnull;
|
||||
if (i >= MAXLR - 1)
|
||||
return;
|
||||
|
||||
/* Allocted a new one: */
|
||||
memcpy(&latency_record[i], lat, sizeof(struct latency_record));
|
||||
}
|
||||
|
||||
static inline void store_stacktrace(struct task_struct *tsk, struct latency_record *lat)
|
||||
{
|
||||
struct stack_trace trace;
|
||||
|
||||
memset(&trace, 0, sizeof(trace));
|
||||
trace.max_entries = LT_BACKTRACEDEPTH;
|
||||
trace.entries = &lat->backtrace[0];
|
||||
trace.skip = 0;
|
||||
save_stack_trace_tsk(tsk, &trace);
|
||||
}
|
||||
|
||||
void __sched
|
||||
account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i, q;
|
||||
struct latency_record lat;
|
||||
|
||||
if (!latencytop_enabled)
|
||||
return;
|
||||
|
||||
/* Long interruptible waits are generally user requested... */
|
||||
if (inter && usecs > 5000)
|
||||
return;
|
||||
|
||||
memset(&lat, 0, sizeof(lat));
|
||||
lat.count = 1;
|
||||
lat.time = usecs;
|
||||
lat.max = usecs;
|
||||
store_stacktrace(tsk, &lat);
|
||||
|
||||
spin_lock_irqsave(&latency_lock, flags);
|
||||
|
||||
account_global_scheduler_latency(tsk, &lat);
|
||||
|
||||
/*
|
||||
* short term hack; if we're > 32 we stop; future we recycle:
|
||||
*/
|
||||
tsk->latency_record_count++;
|
||||
if (tsk->latency_record_count >= LT_SAVECOUNT)
|
||||
goto out_unlock;
|
||||
|
||||
for (i = 0; i < LT_SAVECOUNT ; i++) {
|
||||
struct latency_record *mylat;
|
||||
int same = 1;
|
||||
mylat = &tsk->latency_record[i];
|
||||
for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
|
||||
if (mylat->backtrace[q] !=
|
||||
lat.backtrace[q])
|
||||
same = 0;
|
||||
if (same && lat.backtrace[q] == 0)
|
||||
break;
|
||||
if (same && lat.backtrace[q] == ULONG_MAX)
|
||||
break;
|
||||
}
|
||||
if (same) {
|
||||
mylat->count++;
|
||||
mylat->time += lat.time;
|
||||
if (lat.time > mylat->max)
|
||||
mylat->max = lat.time;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocated a new one: */
|
||||
i = tsk->latency_record_count;
|
||||
memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));
|
||||
|
||||
out_unlock:
|
||||
spin_unlock_irqrestore(&latency_lock, flags);
|
||||
}
|
||||
|
||||
static int lstats_show(struct seq_file *m, void *v)
|
||||
{
|
||||
int i;
|
||||
|
||||
seq_puts(m, "Latency Top version : v0.1\n");
|
||||
|
||||
for (i = 0; i < MAXLR; i++) {
|
||||
if (latency_record[i].backtrace[0]) {
|
||||
int q;
|
||||
seq_printf(m, "%i %li %li ",
|
||||
latency_record[i].count,
|
||||
latency_record[i].time,
|
||||
latency_record[i].max);
|
||||
for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
|
||||
char sym[KSYM_NAME_LEN];
|
||||
char *c;
|
||||
if (!latency_record[i].backtrace[q])
|
||||
break;
|
||||
if (latency_record[i].backtrace[q] == ULONG_MAX)
|
||||
break;
|
||||
sprint_symbol(sym, latency_record[i].backtrace[q]);
|
||||
c = strchr(sym, '+');
|
||||
if (c)
|
||||
*c = 0;
|
||||
seq_printf(m, "%s ", sym);
|
||||
}
|
||||
seq_printf(m, "\n");
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
lstats_write(struct file *file, const char __user *buf, size_t count,
|
||||
loff_t *offs)
|
||||
{
|
||||
clear_global_latency_tracing();
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static int lstats_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return single_open(filp, lstats_show, NULL);
|
||||
}
|
||||
|
||||
static struct file_operations lstats_fops = {
|
||||
.open = lstats_open,
|
||||
.read = seq_read,
|
||||
.write = lstats_write,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int __init init_lstats_procfs(void)
|
||||
{
|
||||
struct proc_dir_entry *pe;
|
||||
|
||||
pe = create_proc_entry("latency_stats", 0644, NULL);
|
||||
if (!pe)
|
||||
return -ENOMEM;
|
||||
|
||||
pe->proc_fops = &lstats_fops;
|
||||
|
||||
return 0;
|
||||
}
|
||||
__initcall(init_lstats_procfs);
|
||||
@@ -2932,7 +2932,7 @@ static void zap_class(struct lock_class *class)
|
||||
|
||||
}
|
||||
|
||||
static inline int within(void *addr, void *start, unsigned long size)
|
||||
static inline int within(const void *addr, void *start, unsigned long size)
|
||||
{
|
||||
return addr >= start && addr < start + size;
|
||||
}
|
||||
@@ -2955,9 +2955,12 @@ void lockdep_free_key_range(void *start, unsigned long size)
|
||||
head = classhash_table + i;
|
||||
if (list_empty(head))
|
||||
continue;
|
||||
list_for_each_entry_safe(class, next, head, hash_entry)
|
||||
list_for_each_entry_safe(class, next, head, hash_entry) {
|
||||
if (within(class->key, start, size))
|
||||
zap_class(class);
|
||||
else if (within(class->name, start, size))
|
||||
zap_class(class);
|
||||
}
|
||||
}
|
||||
|
||||
if (locked)
|
||||
@@ -3203,7 +3206,11 @@ retry:
|
||||
|
||||
EXPORT_SYMBOL_GPL(debug_show_all_locks);
|
||||
|
||||
void debug_show_held_locks(struct task_struct *task)
|
||||
/*
|
||||
* Careful: only use this function if you are sure that
|
||||
* the task cannot run in parallel!
|
||||
*/
|
||||
void __debug_show_held_locks(struct task_struct *task)
|
||||
{
|
||||
if (unlikely(!debug_locks)) {
|
||||
printk("INFO: lockdep is turned off.\n");
|
||||
@@ -3211,6 +3218,12 @@ void debug_show_held_locks(struct task_struct *task)
|
||||
}
|
||||
lockdep_print_held_locks(task);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__debug_show_held_locks);
|
||||
|
||||
void debug_show_held_locks(struct task_struct *task)
|
||||
{
|
||||
__debug_show_held_locks(task);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(debug_show_held_locks);
|
||||
|
||||
|
||||
261
kernel/module.c
261
kernel/module.c
@@ -47,8 +47,6 @@
|
||||
#include <asm/cacheflush.h>
|
||||
#include <linux/license.h>
|
||||
|
||||
extern int module_sysfs_initialized;
|
||||
|
||||
#if 0
|
||||
#define DEBUGP printk
|
||||
#else
|
||||
@@ -67,6 +65,9 @@ extern int module_sysfs_initialized;
|
||||
static DEFINE_MUTEX(module_mutex);
|
||||
static LIST_HEAD(modules);
|
||||
|
||||
/* Waiting for a module to finish initializing? */
|
||||
static DECLARE_WAIT_QUEUE_HEAD(module_wq);
|
||||
|
||||
static BLOCKING_NOTIFIER_HEAD(module_notify_list);
|
||||
|
||||
int register_module_notifier(struct notifier_block * nb)
|
||||
@@ -86,8 +87,11 @@ EXPORT_SYMBOL(unregister_module_notifier);
|
||||
static inline int strong_try_module_get(struct module *mod)
|
||||
{
|
||||
if (mod && mod->state == MODULE_STATE_COMING)
|
||||
return -EBUSY;
|
||||
if (try_module_get(mod))
|
||||
return 0;
|
||||
return try_module_get(mod);
|
||||
else
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
static inline void add_taint_module(struct module *mod, unsigned flag)
|
||||
@@ -426,6 +430,14 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr,
|
||||
return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
|
||||
}
|
||||
|
||||
static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
memcpy(pcpudest + per_cpu_offset(cpu), from, size);
|
||||
}
|
||||
|
||||
static int percpu_modinit(void)
|
||||
{
|
||||
pcpu_num_used = 2;
|
||||
@@ -498,6 +510,8 @@ static struct module_attribute modinfo_##field = { \
|
||||
MODINFO_ATTR(version);
|
||||
MODINFO_ATTR(srcversion);
|
||||
|
||||
static char last_unloaded_module[MODULE_NAME_LEN+1];
|
||||
|
||||
#ifdef CONFIG_MODULE_UNLOAD
|
||||
/* Init the unload section of the module. */
|
||||
static void module_unload_init(struct module *mod)
|
||||
@@ -539,11 +553,21 @@ static int already_uses(struct module *a, struct module *b)
|
||||
static int use_module(struct module *a, struct module *b)
|
||||
{
|
||||
struct module_use *use;
|
||||
int no_warn;
|
||||
int no_warn, err;
|
||||
|
||||
if (b == NULL || already_uses(a, b)) return 1;
|
||||
|
||||
if (!strong_try_module_get(b))
|
||||
/* If we're interrupted or time out, we fail. */
|
||||
if (wait_event_interruptible_timeout(
|
||||
module_wq, (err = strong_try_module_get(b)) != -EBUSY,
|
||||
30 * HZ) <= 0) {
|
||||
printk("%s: gave up waiting for init of module %s.\n",
|
||||
a->name, b->name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If strong_try_module_get() returned a different error, we fail. */
|
||||
if (err)
|
||||
return 0;
|
||||
|
||||
DEBUGP("Allocating new usage for %s.\n", a->name);
|
||||
@@ -721,6 +745,8 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
|
||||
mod->exit();
|
||||
mutex_lock(&module_mutex);
|
||||
}
|
||||
/* Store the name of the last unloaded module for diagnostic purposes */
|
||||
strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
|
||||
free_module(mod);
|
||||
|
||||
out:
|
||||
@@ -814,7 +840,7 @@ static inline void module_unload_free(struct module *mod)
|
||||
|
||||
static inline int use_module(struct module *a, struct module *b)
|
||||
{
|
||||
return strong_try_module_get(b);
|
||||
return strong_try_module_get(b) == 0;
|
||||
}
|
||||
|
||||
static inline void module_unload_init(struct module *mod)
|
||||
@@ -1122,7 +1148,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
|
||||
++loaded;
|
||||
}
|
||||
|
||||
notes_attrs->dir = kobject_add_dir(&mod->mkobj.kobj, "notes");
|
||||
notes_attrs->dir = kobject_create_and_add("notes", &mod->mkobj.kobj);
|
||||
if (!notes_attrs->dir)
|
||||
goto out;
|
||||
|
||||
@@ -1212,6 +1238,7 @@ void module_remove_modinfo_attrs(struct module *mod)
|
||||
int mod_sysfs_init(struct module *mod)
|
||||
{
|
||||
int err;
|
||||
struct kobject *kobj;
|
||||
|
||||
if (!module_sysfs_initialized) {
|
||||
printk(KERN_ERR "%s: module sysfs not initialized\n",
|
||||
@@ -1219,15 +1246,25 @@ int mod_sysfs_init(struct module *mod)
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj));
|
||||
err = kobject_set_name(&mod->mkobj.kobj, "%s", mod->name);
|
||||
if (err)
|
||||
|
||||
kobj = kset_find_obj(module_kset, mod->name);
|
||||
if (kobj) {
|
||||
printk(KERN_ERR "%s: module is already loaded\n", mod->name);
|
||||
kobject_put(kobj);
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
kobj_set_kset_s(&mod->mkobj, module_subsys);
|
||||
}
|
||||
|
||||
mod->mkobj.mod = mod;
|
||||
|
||||
kobject_init(&mod->mkobj.kobj);
|
||||
memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj));
|
||||
mod->mkobj.kobj.kset = module_kset;
|
||||
err = kobject_init_and_add(&mod->mkobj.kobj, &module_ktype, NULL,
|
||||
"%s", mod->name);
|
||||
if (err)
|
||||
kobject_put(&mod->mkobj.kobj);
|
||||
|
||||
/* delay uevent until full sysfs population */
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
@@ -1238,12 +1275,7 @@ int mod_sysfs_setup(struct module *mod,
|
||||
{
|
||||
int err;
|
||||
|
||||
/* delay uevent until full sysfs population */
|
||||
err = kobject_add(&mod->mkobj.kobj);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
mod->holders_dir = kobject_add_dir(&mod->mkobj.kobj, "holders");
|
||||
mod->holders_dir = kobject_create_and_add("holders", &mod->mkobj.kobj);
|
||||
if (!mod->holders_dir) {
|
||||
err = -ENOMEM;
|
||||
goto out_unreg;
|
||||
@@ -1263,11 +1295,9 @@ int mod_sysfs_setup(struct module *mod,
|
||||
out_unreg_param:
|
||||
module_param_sysfs_remove(mod);
|
||||
out_unreg_holders:
|
||||
kobject_unregister(mod->holders_dir);
|
||||
kobject_put(mod->holders_dir);
|
||||
out_unreg:
|
||||
kobject_del(&mod->mkobj.kobj);
|
||||
kobject_put(&mod->mkobj.kobj);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
@@ -1276,9 +1306,20 @@ static void mod_kobject_remove(struct module *mod)
|
||||
{
|
||||
module_remove_modinfo_attrs(mod);
|
||||
module_param_sysfs_remove(mod);
|
||||
kobject_unregister(mod->mkobj.drivers_dir);
|
||||
kobject_unregister(mod->holders_dir);
|
||||
kobject_unregister(&mod->mkobj.kobj);
|
||||
kobject_put(mod->mkobj.drivers_dir);
|
||||
kobject_put(mod->holders_dir);
|
||||
kobject_put(&mod->mkobj.kobj);
|
||||
}
|
||||
|
||||
/*
|
||||
* link the module with the whole machine is stopped with interrupts off
|
||||
* - this defends against kallsyms not taking locks
|
||||
*/
|
||||
static int __link_module(void *_mod)
|
||||
{
|
||||
struct module *mod = _mod;
|
||||
list_add(&mod->list, &modules);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1330,7 +1371,7 @@ void *__symbol_get(const char *symbol)
|
||||
|
||||
preempt_disable();
|
||||
value = __find_symbol(symbol, &owner, &crc, 1);
|
||||
if (value && !strong_try_module_get(owner))
|
||||
if (value && strong_try_module_get(owner) != 0)
|
||||
value = 0;
|
||||
preempt_enable();
|
||||
|
||||
@@ -1884,16 +1925,16 @@ static struct module *load_module(void __user *umod,
|
||||
/* Now we've moved module, initialize linked lists, etc. */
|
||||
module_unload_init(mod);
|
||||
|
||||
/* Initialize kobject, so we can reference it. */
|
||||
/* add kobject, so we can reference it. */
|
||||
err = mod_sysfs_init(mod);
|
||||
if (err)
|
||||
goto cleanup;
|
||||
goto free_unload;
|
||||
|
||||
/* Set up license info based on the info section */
|
||||
set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
|
||||
|
||||
if (strcmp(mod->name, "ndiswrapper") == 0)
|
||||
add_taint(TAINT_PROPRIETARY_MODULE);
|
||||
add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
|
||||
if (strcmp(mod->name, "driverloader") == 0)
|
||||
add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
|
||||
|
||||
@@ -2023,6 +2064,11 @@ static struct module *load_module(void __user *umod,
|
||||
printk(KERN_WARNING "%s: Ignoring obsolete parameters\n",
|
||||
mod->name);
|
||||
|
||||
/* Now sew it into the lists so we can get lockdep and oops
|
||||
* info during argument parsing. Noone should access us, since
|
||||
* strong_try_module_get() will fail. */
|
||||
stop_machine_run(__link_module, mod, NR_CPUS);
|
||||
|
||||
/* Size of section 0 is 0, so this works well if no params */
|
||||
err = parse_args(mod->name, mod->args,
|
||||
(struct kernel_param *)
|
||||
@@ -2031,7 +2077,7 @@ static struct module *load_module(void __user *umod,
|
||||
/ sizeof(struct kernel_param),
|
||||
NULL);
|
||||
if (err < 0)
|
||||
goto arch_cleanup;
|
||||
goto unlink;
|
||||
|
||||
err = mod_sysfs_setup(mod,
|
||||
(struct kernel_param *)
|
||||
@@ -2039,7 +2085,7 @@ static struct module *load_module(void __user *umod,
|
||||
sechdrs[setupindex].sh_size
|
||||
/ sizeof(struct kernel_param));
|
||||
if (err < 0)
|
||||
goto arch_cleanup;
|
||||
goto unlink;
|
||||
add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
|
||||
add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
|
||||
|
||||
@@ -2054,9 +2100,13 @@ static struct module *load_module(void __user *umod,
|
||||
/* Done! */
|
||||
return mod;
|
||||
|
||||
arch_cleanup:
|
||||
unlink:
|
||||
stop_machine_run(__unlink_module, mod, NR_CPUS);
|
||||
module_arch_cleanup(mod);
|
||||
cleanup:
|
||||
kobject_del(&mod->mkobj.kobj);
|
||||
kobject_put(&mod->mkobj.kobj);
|
||||
free_unload:
|
||||
module_unload_free(mod);
|
||||
module_free(mod, mod->module_init);
|
||||
free_core:
|
||||
@@ -2076,17 +2126,6 @@ static struct module *load_module(void __user *umod,
|
||||
goto free_hdr;
|
||||
}
|
||||
|
||||
/*
|
||||
* link the module with the whole machine is stopped with interrupts off
|
||||
* - this defends against kallsyms not taking locks
|
||||
*/
|
||||
static int __link_module(void *_mod)
|
||||
{
|
||||
struct module *mod = _mod;
|
||||
list_add(&mod->list, &modules);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This is where the real work happens */
|
||||
asmlinkage long
|
||||
sys_init_module(void __user *umod,
|
||||
@@ -2111,10 +2150,6 @@ sys_init_module(void __user *umod,
|
||||
return PTR_ERR(mod);
|
||||
}
|
||||
|
||||
/* Now sew it into the lists. They won't access us, since
|
||||
strong_try_module_get() will fail. */
|
||||
stop_machine_run(__link_module, mod, NR_CPUS);
|
||||
|
||||
/* Drop lock so they can recurse */
|
||||
mutex_unlock(&module_mutex);
|
||||
|
||||
@@ -2133,6 +2168,7 @@ sys_init_module(void __user *umod,
|
||||
mutex_lock(&module_mutex);
|
||||
free_module(mod);
|
||||
mutex_unlock(&module_mutex);
|
||||
wake_up(&module_wq);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2147,6 +2183,7 @@ sys_init_module(void __user *umod,
|
||||
mod->init_size = 0;
|
||||
mod->init_text_size = 0;
|
||||
mutex_unlock(&module_mutex);
|
||||
wake_up(&module_wq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2211,14 +2248,13 @@ static const char *get_ksymbol(struct module *mod,
|
||||
return mod->strtab + mod->symtab[best].st_name;
|
||||
}
|
||||
|
||||
/* For kallsyms to ask for address resolution. NULL means not found.
|
||||
We don't lock, as this is used for oops resolution and races are a
|
||||
lesser concern. */
|
||||
/* FIXME: Risky: returns a pointer into a module w/o lock */
|
||||
const char *module_address_lookup(unsigned long addr,
|
||||
unsigned long *size,
|
||||
unsigned long *offset,
|
||||
char **modname)
|
||||
/* For kallsyms to ask for address resolution. NULL means not found. Careful
|
||||
* not to lock to avoid deadlock on oopses, simply disable preemption. */
|
||||
char *module_address_lookup(unsigned long addr,
|
||||
unsigned long *size,
|
||||
unsigned long *offset,
|
||||
char **modname,
|
||||
char *namebuf)
|
||||
{
|
||||
struct module *mod;
|
||||
const char *ret = NULL;
|
||||
@@ -2233,8 +2269,13 @@ const char *module_address_lookup(unsigned long addr,
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Make a copy in here where it's safe */
|
||||
if (ret) {
|
||||
strncpy(namebuf, ret, KSYM_NAME_LEN - 1);
|
||||
ret = namebuf;
|
||||
}
|
||||
preempt_enable();
|
||||
return ret;
|
||||
return (char *)ret;
|
||||
}
|
||||
|
||||
int lookup_module_symbol_name(unsigned long addr, char *symname)
|
||||
@@ -2362,21 +2403,30 @@ static void m_stop(struct seq_file *m, void *p)
|
||||
mutex_unlock(&module_mutex);
|
||||
}
|
||||
|
||||
static char *taint_flags(unsigned int taints, char *buf)
|
||||
static char *module_flags(struct module *mod, char *buf)
|
||||
{
|
||||
int bx = 0;
|
||||
|
||||
if (taints) {
|
||||
if (mod->taints ||
|
||||
mod->state == MODULE_STATE_GOING ||
|
||||
mod->state == MODULE_STATE_COMING) {
|
||||
buf[bx++] = '(';
|
||||
if (taints & TAINT_PROPRIETARY_MODULE)
|
||||
if (mod->taints & TAINT_PROPRIETARY_MODULE)
|
||||
buf[bx++] = 'P';
|
||||
if (taints & TAINT_FORCED_MODULE)
|
||||
if (mod->taints & TAINT_FORCED_MODULE)
|
||||
buf[bx++] = 'F';
|
||||
/*
|
||||
* TAINT_FORCED_RMMOD: could be added.
|
||||
* TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
|
||||
* apply to modules.
|
||||
*/
|
||||
|
||||
/* Show a - for module-is-being-unloaded */
|
||||
if (mod->state == MODULE_STATE_GOING)
|
||||
buf[bx++] = '-';
|
||||
/* Show a + for module-is-being-loaded */
|
||||
if (mod->state == MODULE_STATE_COMING)
|
||||
buf[bx++] = '+';
|
||||
buf[bx++] = ')';
|
||||
}
|
||||
buf[bx] = '\0';
|
||||
@@ -2403,7 +2453,7 @@ static int m_show(struct seq_file *m, void *p)
|
||||
|
||||
/* Taints info */
|
||||
if (mod->taints)
|
||||
seq_printf(m, " %s", taint_flags(mod->taints, buf));
|
||||
seq_printf(m, " %s", module_flags(mod, buf));
|
||||
|
||||
seq_printf(m, "\n");
|
||||
return 0;
|
||||
@@ -2498,97 +2548,12 @@ void print_modules(void)
|
||||
|
||||
printk("Modules linked in:");
|
||||
list_for_each_entry(mod, &modules, list)
|
||||
printk(" %s%s", mod->name, taint_flags(mod->taints, buf));
|
||||
printk(" %s%s", mod->name, module_flags(mod, buf));
|
||||
if (last_unloaded_module[0])
|
||||
printk(" [last unloaded: %s]", last_unloaded_module);
|
||||
printk("\n");
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
static char *make_driver_name(struct device_driver *drv)
|
||||
{
|
||||
char *driver_name;
|
||||
|
||||
driver_name = kmalloc(strlen(drv->name) + strlen(drv->bus->name) + 2,
|
||||
GFP_KERNEL);
|
||||
if (!driver_name)
|
||||
return NULL;
|
||||
|
||||
sprintf(driver_name, "%s:%s", drv->bus->name, drv->name);
|
||||
return driver_name;
|
||||
}
|
||||
|
||||
static void module_create_drivers_dir(struct module_kobject *mk)
|
||||
{
|
||||
if (!mk || mk->drivers_dir)
|
||||
return;
|
||||
|
||||
mk->drivers_dir = kobject_add_dir(&mk->kobj, "drivers");
|
||||
}
|
||||
|
||||
void module_add_driver(struct module *mod, struct device_driver *drv)
|
||||
{
|
||||
char *driver_name;
|
||||
int no_warn;
|
||||
struct module_kobject *mk = NULL;
|
||||
|
||||
if (!drv)
|
||||
return;
|
||||
|
||||
if (mod)
|
||||
mk = &mod->mkobj;
|
||||
else if (drv->mod_name) {
|
||||
struct kobject *mkobj;
|
||||
|
||||
/* Lookup built-in module entry in /sys/modules */
|
||||
mkobj = kset_find_obj(&module_subsys, drv->mod_name);
|
||||
if (mkobj) {
|
||||
mk = container_of(mkobj, struct module_kobject, kobj);
|
||||
/* remember our module structure */
|
||||
drv->mkobj = mk;
|
||||
/* kset_find_obj took a reference */
|
||||
kobject_put(mkobj);
|
||||
}
|
||||
}
|
||||
|
||||
if (!mk)
|
||||
return;
|
||||
|
||||
/* Don't check return codes; these calls are idempotent */
|
||||
no_warn = sysfs_create_link(&drv->kobj, &mk->kobj, "module");
|
||||
driver_name = make_driver_name(drv);
|
||||
if (driver_name) {
|
||||
module_create_drivers_dir(mk);
|
||||
no_warn = sysfs_create_link(mk->drivers_dir, &drv->kobj,
|
||||
driver_name);
|
||||
kfree(driver_name);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(module_add_driver);
|
||||
|
||||
void module_remove_driver(struct device_driver *drv)
|
||||
{
|
||||
struct module_kobject *mk = NULL;
|
||||
char *driver_name;
|
||||
|
||||
if (!drv)
|
||||
return;
|
||||
|
||||
sysfs_remove_link(&drv->kobj, "module");
|
||||
|
||||
if (drv->owner)
|
||||
mk = &drv->owner->mkobj;
|
||||
else if (drv->mkobj)
|
||||
mk = drv->mkobj;
|
||||
if (mk && mk->drivers_dir) {
|
||||
driver_name = make_driver_name(drv);
|
||||
if (driver_name) {
|
||||
sysfs_remove_link(mk->drivers_dir, driver_name);
|
||||
kfree(driver_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(module_remove_driver);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MODVERSIONS
|
||||
/* Generate the signature for struct module here, too, for modversions. */
|
||||
void struct_module(struct module *mod) { return; }
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/debug_locks.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/kallsyms.h>
|
||||
|
||||
int panic_on_oops;
|
||||
int tainted;
|
||||
@@ -280,6 +281,13 @@ static int init_oops_id(void)
|
||||
}
|
||||
late_initcall(init_oops_id);
|
||||
|
||||
static void print_oops_end_marker(void)
|
||||
{
|
||||
init_oops_id();
|
||||
printk(KERN_WARNING "---[ end trace %016llx ]---\n",
|
||||
(unsigned long long)oops_id);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when the architecture exits its oops handler, after printing
|
||||
* everything.
|
||||
@@ -287,11 +295,26 @@ late_initcall(init_oops_id);
|
||||
void oops_exit(void)
|
||||
{
|
||||
do_oops_enter_exit();
|
||||
init_oops_id();
|
||||
printk(KERN_WARNING "---[ end trace %016llx ]---\n",
|
||||
(unsigned long long)oops_id);
|
||||
print_oops_end_marker();
|
||||
}
|
||||
|
||||
#ifdef WANT_WARN_ON_SLOWPATH
|
||||
void warn_on_slowpath(const char *file, int line)
|
||||
{
|
||||
char function[KSYM_SYMBOL_LEN];
|
||||
unsigned long caller = (unsigned long) __builtin_return_address(0);
|
||||
sprint_symbol(function, caller);
|
||||
|
||||
printk(KERN_WARNING "------------[ cut here ]------------\n");
|
||||
printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file,
|
||||
line, function);
|
||||
print_modules();
|
||||
dump_stack();
|
||||
print_oops_end_marker();
|
||||
}
|
||||
EXPORT_SYMBOL(warn_on_slowpath);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
/*
|
||||
* Called when gcc's -fstack-protector feature is used, and
|
||||
|
||||
@@ -376,8 +376,6 @@ int param_get_string(char *buffer, struct kernel_param *kp)
|
||||
|
||||
extern struct kernel_param __start___param[], __stop___param[];
|
||||
|
||||
#define MAX_KBUILD_MODNAME KOBJ_NAME_LEN
|
||||
|
||||
struct param_attribute
|
||||
{
|
||||
struct module_attribute mattr;
|
||||
@@ -472,7 +470,7 @@ param_sysfs_setup(struct module_kobject *mk,
|
||||
sizeof(mp->grp.attrs[0]));
|
||||
size[1] = (valid_attrs + 1) * sizeof(mp->grp.attrs[0]);
|
||||
|
||||
mp = kmalloc(size[0] + size[1], GFP_KERNEL);
|
||||
mp = kzalloc(size[0] + size[1], GFP_KERNEL);
|
||||
if (!mp)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
@@ -560,11 +558,10 @@ static void __init kernel_param_sysfs_setup(const char *name,
|
||||
BUG_ON(!mk);
|
||||
|
||||
mk->mod = THIS_MODULE;
|
||||
kobj_set_kset_s(mk, module_subsys);
|
||||
kobject_set_name(&mk->kobj, name);
|
||||
kobject_init(&mk->kobj);
|
||||
ret = kobject_add(&mk->kobj);
|
||||
mk->kobj.kset = module_kset;
|
||||
ret = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, "%s", name);
|
||||
if (ret) {
|
||||
kobject_put(&mk->kobj);
|
||||
printk(KERN_ERR "Module '%s' failed to be added to sysfs, "
|
||||
"error number %d\n", name, ret);
|
||||
printk(KERN_ERR "The system will be unstable now.\n");
|
||||
@@ -588,7 +585,7 @@ static void __init param_sysfs_builtin(void)
|
||||
{
|
||||
struct kernel_param *kp, *kp_begin = NULL;
|
||||
unsigned int i, name_len, count = 0;
|
||||
char modname[MAX_KBUILD_MODNAME + 1] = "";
|
||||
char modname[MODULE_NAME_LEN + 1] = "";
|
||||
|
||||
for (i=0; i < __stop___param - __start___param; i++) {
|
||||
char *dot;
|
||||
@@ -596,12 +593,12 @@ static void __init param_sysfs_builtin(void)
|
||||
|
||||
kp = &__start___param[i];
|
||||
max_name_len =
|
||||
min_t(size_t, MAX_KBUILD_MODNAME, strlen(kp->name));
|
||||
min_t(size_t, MODULE_NAME_LEN, strlen(kp->name));
|
||||
|
||||
dot = memchr(kp->name, '.', max_name_len);
|
||||
if (!dot) {
|
||||
DEBUGP("couldn't find period in first %d characters "
|
||||
"of %s\n", MAX_KBUILD_MODNAME, kp->name);
|
||||
"of %s\n", MODULE_NAME_LEN, kp->name);
|
||||
continue;
|
||||
}
|
||||
name_len = dot - kp->name;
|
||||
@@ -679,8 +676,6 @@ static struct sysfs_ops module_sysfs_ops = {
|
||||
.store = module_attr_store,
|
||||
};
|
||||
|
||||
static struct kobj_type module_ktype;
|
||||
|
||||
static int uevent_filter(struct kset *kset, struct kobject *kobj)
|
||||
{
|
||||
struct kobj_type *ktype = get_ktype(kobj);
|
||||
@@ -694,21 +689,11 @@ static struct kset_uevent_ops module_uevent_ops = {
|
||||
.filter = uevent_filter,
|
||||
};
|
||||
|
||||
decl_subsys(module, &module_ktype, &module_uevent_ops);
|
||||
struct kset *module_kset;
|
||||
int module_sysfs_initialized;
|
||||
|
||||
static void module_release(struct kobject *kobj)
|
||||
{
|
||||
/*
|
||||
* Stupid empty release function to allow the memory for the kobject to
|
||||
* be properly cleaned up. This will not need to be present for 2.6.25
|
||||
* with the upcoming kobject core rework.
|
||||
*/
|
||||
}
|
||||
|
||||
static struct kobj_type module_ktype = {
|
||||
struct kobj_type module_ktype = {
|
||||
.sysfs_ops = &module_sysfs_ops,
|
||||
.release = module_release,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -716,13 +701,11 @@ static struct kobj_type module_ktype = {
|
||||
*/
|
||||
static int __init param_sysfs_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = subsystem_register(&module_subsys);
|
||||
if (ret < 0) {
|
||||
printk(KERN_WARNING "%s (%d): subsystem_register error: %d\n",
|
||||
__FILE__, __LINE__, ret);
|
||||
return ret;
|
||||
module_kset = kset_create_and_add("module", &module_uevent_ops, NULL);
|
||||
if (!module_kset) {
|
||||
printk(KERN_WARNING "%s (%d): error creating kset\n",
|
||||
__FILE__, __LINE__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
module_sysfs_initialized = 1;
|
||||
|
||||
@@ -732,14 +715,7 @@ static int __init param_sysfs_init(void)
|
||||
}
|
||||
subsys_initcall(param_sysfs_init);
|
||||
|
||||
#else
|
||||
#if 0
|
||||
static struct sysfs_ops module_sysfs_ops = {
|
||||
.show = NULL,
|
||||
.store = NULL,
|
||||
};
|
||||
#endif
|
||||
#endif
|
||||
#endif /* CONFIG_SYSFS */
|
||||
|
||||
EXPORT_SYMBOL(param_set_byte);
|
||||
EXPORT_SYMBOL(param_get_byte);
|
||||
|
||||
@@ -967,6 +967,7 @@ static void check_thread_timers(struct task_struct *tsk,
|
||||
{
|
||||
int maxfire;
|
||||
struct list_head *timers = tsk->cpu_timers;
|
||||
struct signal_struct *const sig = tsk->signal;
|
||||
|
||||
maxfire = 20;
|
||||
tsk->it_prof_expires = cputime_zero;
|
||||
@@ -1011,6 +1012,35 @@ static void check_thread_timers(struct task_struct *tsk,
|
||||
t->firing = 1;
|
||||
list_move_tail(&t->entry, firing);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for the special case thread timers.
|
||||
*/
|
||||
if (sig->rlim[RLIMIT_RTTIME].rlim_cur != RLIM_INFINITY) {
|
||||
unsigned long hard = sig->rlim[RLIMIT_RTTIME].rlim_max;
|
||||
unsigned long *soft = &sig->rlim[RLIMIT_RTTIME].rlim_cur;
|
||||
|
||||
if (hard != RLIM_INFINITY &&
|
||||
tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
|
||||
/*
|
||||
* At the hard limit, we just die.
|
||||
* No need to calculate anything else now.
|
||||
*/
|
||||
__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
|
||||
return;
|
||||
}
|
||||
if (tsk->rt.timeout > DIV_ROUND_UP(*soft, USEC_PER_SEC/HZ)) {
|
||||
/*
|
||||
* At the soft limit, send a SIGXCPU every second.
|
||||
*/
|
||||
if (sig->rlim[RLIMIT_RTTIME].rlim_cur
|
||||
< sig->rlim[RLIMIT_RTTIME].rlim_max) {
|
||||
sig->rlim[RLIMIT_RTTIME].rlim_cur +=
|
||||
USEC_PER_SEC;
|
||||
}
|
||||
__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -567,7 +567,8 @@ static const char * const hibernation_modes[] = {
|
||||
* supports it (as determined by having hibernation_ops).
|
||||
*/
|
||||
|
||||
static ssize_t disk_show(struct kset *kset, char *buf)
|
||||
static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
int i;
|
||||
char *start = buf;
|
||||
@@ -597,7 +598,8 @@ static ssize_t disk_show(struct kset *kset, char *buf)
|
||||
}
|
||||
|
||||
|
||||
static ssize_t disk_store(struct kset *kset, const char *buf, size_t n)
|
||||
static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t n)
|
||||
{
|
||||
int error = 0;
|
||||
int i;
|
||||
@@ -642,13 +644,15 @@ static ssize_t disk_store(struct kset *kset, const char *buf, size_t n)
|
||||
|
||||
power_attr(disk);
|
||||
|
||||
static ssize_t resume_show(struct kset *kset, char *buf)
|
||||
static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device),
|
||||
MINOR(swsusp_resume_device));
|
||||
}
|
||||
|
||||
static ssize_t resume_store(struct kset *kset, const char *buf, size_t n)
|
||||
static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t n)
|
||||
{
|
||||
unsigned int maj, min;
|
||||
dev_t res;
|
||||
@@ -674,12 +678,14 @@ static ssize_t resume_store(struct kset *kset, const char *buf, size_t n)
|
||||
|
||||
power_attr(resume);
|
||||
|
||||
static ssize_t image_size_show(struct kset *kset, char *buf)
|
||||
static ssize_t image_size_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
return sprintf(buf, "%lu\n", image_size);
|
||||
}
|
||||
|
||||
static ssize_t image_size_store(struct kset *kset, const char *buf, size_t n)
|
||||
static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t n)
|
||||
{
|
||||
unsigned long size;
|
||||
|
||||
@@ -708,7 +714,7 @@ static struct attribute_group attr_group = {
|
||||
|
||||
static int __init pm_disk_init(void)
|
||||
{
|
||||
return sysfs_create_group(&power_subsys.kobj, &attr_group);
|
||||
return sysfs_create_group(power_kobj, &attr_group);
|
||||
}
|
||||
|
||||
core_initcall(pm_disk_init);
|
||||
|
||||
@@ -276,8 +276,7 @@ EXPORT_SYMBOL(pm_suspend);
|
||||
|
||||
#endif /* CONFIG_SUSPEND */
|
||||
|
||||
decl_subsys(power,NULL,NULL);
|
||||
|
||||
struct kobject *power_kobj;
|
||||
|
||||
/**
|
||||
* state - control system power state.
|
||||
@@ -290,7 +289,8 @@ decl_subsys(power,NULL,NULL);
|
||||
* proper enumerated value, and initiates a suspend transition.
|
||||
*/
|
||||
|
||||
static ssize_t state_show(struct kset *kset, char *buf)
|
||||
static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
char *s = buf;
|
||||
#ifdef CONFIG_SUSPEND
|
||||
@@ -311,7 +311,8 @@ static ssize_t state_show(struct kset *kset, char *buf)
|
||||
return (s - buf);
|
||||
}
|
||||
|
||||
static ssize_t state_store(struct kset *kset, const char *buf, size_t n)
|
||||
static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t n)
|
||||
{
|
||||
#ifdef CONFIG_SUSPEND
|
||||
suspend_state_t state = PM_SUSPEND_STANDBY;
|
||||
@@ -348,13 +349,15 @@ power_attr(state);
|
||||
#ifdef CONFIG_PM_TRACE
|
||||
int pm_trace_enabled;
|
||||
|
||||
static ssize_t pm_trace_show(struct kset *kset, char *buf)
|
||||
static ssize_t pm_trace_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
return sprintf(buf, "%d\n", pm_trace_enabled);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
pm_trace_store(struct kset *kset, const char *buf, size_t n)
|
||||
pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t n)
|
||||
{
|
||||
int val;
|
||||
|
||||
@@ -386,10 +389,10 @@ static struct attribute_group attr_group = {
|
||||
|
||||
static int __init pm_init(void)
|
||||
{
|
||||
int error = subsystem_register(&power_subsys);
|
||||
if (!error)
|
||||
error = sysfs_create_group(&power_subsys.kobj,&attr_group);
|
||||
return error;
|
||||
power_kobj = kobject_create_and_add("power", NULL);
|
||||
if (!power_kobj)
|
||||
return -ENOMEM;
|
||||
return sysfs_create_group(power_kobj, &attr_group);
|
||||
}
|
||||
|
||||
core_initcall(pm_init);
|
||||
|
||||
@@ -54,7 +54,7 @@ extern int pfn_is_nosave(unsigned long);
|
||||
extern struct mutex pm_mutex;
|
||||
|
||||
#define power_attr(_name) \
|
||||
static struct subsys_attribute _name##_attr = { \
|
||||
static struct kobj_attribute _name##_attr = { \
|
||||
.attr = { \
|
||||
.name = __stringify(_name), \
|
||||
.mode = 0644, \
|
||||
@@ -63,8 +63,6 @@ static struct subsys_attribute _name##_attr = { \
|
||||
.store = _name##_store, \
|
||||
}
|
||||
|
||||
extern struct kset power_subsys;
|
||||
|
||||
/* Preferred image size in bytes (default 500 MB) */
|
||||
extern unsigned long image_size;
|
||||
extern int in_suspend;
|
||||
|
||||
@@ -36,6 +36,13 @@
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
/*
|
||||
* Architectures can override it:
|
||||
*/
|
||||
void __attribute__((weak)) early_printk(const char *fmt, ...)
|
||||
{
|
||||
}
|
||||
|
||||
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
|
||||
|
||||
/* printk's without a loglevel use this.. */
|
||||
@@ -573,11 +580,6 @@ static int __init printk_time_setup(char *str)
|
||||
|
||||
__setup("time", printk_time_setup);
|
||||
|
||||
__attribute__((weak)) unsigned long long printk_clock(void)
|
||||
{
|
||||
return sched_clock();
|
||||
}
|
||||
|
||||
/* Check if we have any console registered that can be called early in boot. */
|
||||
static int have_callable_console(void)
|
||||
{
|
||||
@@ -628,30 +630,57 @@ asmlinkage int printk(const char *fmt, ...)
|
||||
/* cpu currently holding logbuf_lock */
|
||||
static volatile unsigned int printk_cpu = UINT_MAX;
|
||||
|
||||
const char printk_recursion_bug_msg [] =
|
||||
KERN_CRIT "BUG: recent printk recursion!\n";
|
||||
static int printk_recursion_bug;
|
||||
|
||||
asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
{
|
||||
unsigned long flags;
|
||||
int printed_len;
|
||||
char *p;
|
||||
static char printk_buf[1024];
|
||||
static int log_level_unknown = 1;
|
||||
static char printk_buf[1024];
|
||||
|
||||
unsigned long flags;
|
||||
int printed_len = 0;
|
||||
int this_cpu;
|
||||
char *p;
|
||||
|
||||
boot_delay_msec();
|
||||
|
||||
preempt_disable();
|
||||
if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id())
|
||||
/* If a crash is occurring during printk() on this CPU,
|
||||
* make sure we can't deadlock */
|
||||
zap_locks();
|
||||
|
||||
/* This stops the holder of console_sem just where we want him */
|
||||
raw_local_irq_save(flags);
|
||||
this_cpu = smp_processor_id();
|
||||
|
||||
/*
|
||||
* Ouch, printk recursed into itself!
|
||||
*/
|
||||
if (unlikely(printk_cpu == this_cpu)) {
|
||||
/*
|
||||
* If a crash is occurring during printk() on this CPU,
|
||||
* then try to get the crash message out but make sure
|
||||
* we can't deadlock. Otherwise just return to avoid the
|
||||
* recursion and return - but flag the recursion so that
|
||||
* it can be printed at the next appropriate moment:
|
||||
*/
|
||||
if (!oops_in_progress) {
|
||||
printk_recursion_bug = 1;
|
||||
goto out_restore_irqs;
|
||||
}
|
||||
zap_locks();
|
||||
}
|
||||
|
||||
lockdep_off();
|
||||
spin_lock(&logbuf_lock);
|
||||
printk_cpu = smp_processor_id();
|
||||
printk_cpu = this_cpu;
|
||||
|
||||
if (printk_recursion_bug) {
|
||||
printk_recursion_bug = 0;
|
||||
strcpy(printk_buf, printk_recursion_bug_msg);
|
||||
printed_len = sizeof(printk_recursion_bug_msg);
|
||||
}
|
||||
/* Emit the output into the temporary buffer */
|
||||
printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
|
||||
printed_len += vscnprintf(printk_buf + printed_len,
|
||||
sizeof(printk_buf), fmt, args);
|
||||
|
||||
/*
|
||||
* Copy the output into log_buf. If the caller didn't provide
|
||||
@@ -680,7 +709,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
loglev_char = default_message_loglevel
|
||||
+ '0';
|
||||
}
|
||||
t = printk_clock();
|
||||
t = cpu_clock(printk_cpu);
|
||||
nanosec_rem = do_div(t, 1000000000);
|
||||
tlen = sprintf(tbuf,
|
||||
"<%c>[%5lu.%06lu] ",
|
||||
@@ -744,6 +773,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
printk_cpu = UINT_MAX;
|
||||
spin_unlock(&logbuf_lock);
|
||||
lockdep_on();
|
||||
out_restore_irqs:
|
||||
raw_local_irq_restore(flags);
|
||||
}
|
||||
|
||||
|
||||
111
kernel/profile.c
111
kernel/profile.c
@@ -52,7 +52,7 @@ static DEFINE_PER_CPU(int, cpu_profile_flip);
|
||||
static DEFINE_MUTEX(profile_flip_mutex);
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
static int __init profile_setup(char * str)
|
||||
static int __init profile_setup(char *str)
|
||||
{
|
||||
static char __initdata schedstr[] = "schedule";
|
||||
static char __initdata sleepstr[] = "sleep";
|
||||
@@ -104,28 +104,28 @@ __setup("profile=", profile_setup);
|
||||
|
||||
void __init profile_init(void)
|
||||
{
|
||||
if (!prof_on)
|
||||
if (!prof_on)
|
||||
return;
|
||||
|
||||
|
||||
/* only text is profiled */
|
||||
prof_len = (_etext - _stext) >> prof_shift;
|
||||
prof_buffer = alloc_bootmem(prof_len*sizeof(atomic_t));
|
||||
}
|
||||
|
||||
/* Profile event notifications */
|
||||
|
||||
|
||||
#ifdef CONFIG_PROFILING
|
||||
|
||||
|
||||
static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
|
||||
static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
|
||||
static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
|
||||
|
||||
void profile_task_exit(struct task_struct * task)
|
||||
|
||||
void profile_task_exit(struct task_struct *task)
|
||||
{
|
||||
blocking_notifier_call_chain(&task_exit_notifier, 0, task);
|
||||
}
|
||||
|
||||
int profile_handoff_task(struct task_struct * task)
|
||||
|
||||
int profile_handoff_task(struct task_struct *task)
|
||||
{
|
||||
int ret;
|
||||
ret = atomic_notifier_call_chain(&task_free_notifier, 0, task);
|
||||
@@ -137,52 +137,55 @@ void profile_munmap(unsigned long addr)
|
||||
blocking_notifier_call_chain(&munmap_notifier, 0, (void *)addr);
|
||||
}
|
||||
|
||||
int task_handoff_register(struct notifier_block * n)
|
||||
int task_handoff_register(struct notifier_block *n)
|
||||
{
|
||||
return atomic_notifier_chain_register(&task_free_notifier, n);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(task_handoff_register);
|
||||
|
||||
int task_handoff_unregister(struct notifier_block * n)
|
||||
int task_handoff_unregister(struct notifier_block *n)
|
||||
{
|
||||
return atomic_notifier_chain_unregister(&task_free_notifier, n);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(task_handoff_unregister);
|
||||
|
||||
int profile_event_register(enum profile_type type, struct notifier_block * n)
|
||||
int profile_event_register(enum profile_type type, struct notifier_block *n)
|
||||
{
|
||||
int err = -EINVAL;
|
||||
|
||||
switch (type) {
|
||||
case PROFILE_TASK_EXIT:
|
||||
err = blocking_notifier_chain_register(
|
||||
&task_exit_notifier, n);
|
||||
break;
|
||||
case PROFILE_MUNMAP:
|
||||
err = blocking_notifier_chain_register(
|
||||
&munmap_notifier, n);
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int profile_event_unregister(enum profile_type type, struct notifier_block * n)
|
||||
{
|
||||
int err = -EINVAL;
|
||||
|
||||
switch (type) {
|
||||
case PROFILE_TASK_EXIT:
|
||||
err = blocking_notifier_chain_unregister(
|
||||
&task_exit_notifier, n);
|
||||
break;
|
||||
case PROFILE_MUNMAP:
|
||||
err = blocking_notifier_chain_unregister(
|
||||
&munmap_notifier, n);
|
||||
break;
|
||||
case PROFILE_TASK_EXIT:
|
||||
err = blocking_notifier_chain_register(
|
||||
&task_exit_notifier, n);
|
||||
break;
|
||||
case PROFILE_MUNMAP:
|
||||
err = blocking_notifier_chain_register(
|
||||
&munmap_notifier, n);
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(profile_event_register);
|
||||
|
||||
int profile_event_unregister(enum profile_type type, struct notifier_block *n)
|
||||
{
|
||||
int err = -EINVAL;
|
||||
|
||||
switch (type) {
|
||||
case PROFILE_TASK_EXIT:
|
||||
err = blocking_notifier_chain_unregister(
|
||||
&task_exit_notifier, n);
|
||||
break;
|
||||
case PROFILE_MUNMAP:
|
||||
err = blocking_notifier_chain_unregister(
|
||||
&munmap_notifier, n);
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(profile_event_unregister);
|
||||
|
||||
int register_timer_hook(int (*hook)(struct pt_regs *))
|
||||
{
|
||||
@@ -191,6 +194,7 @@ int register_timer_hook(int (*hook)(struct pt_regs *))
|
||||
timer_hook = hook;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_timer_hook);
|
||||
|
||||
void unregister_timer_hook(int (*hook)(struct pt_regs *))
|
||||
{
|
||||
@@ -199,13 +203,7 @@ void unregister_timer_hook(int (*hook)(struct pt_regs *))
|
||||
/* make sure all CPUs see the NULL hook */
|
||||
synchronize_sched(); /* Allow ongoing interrupts to complete. */
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(register_timer_hook);
|
||||
EXPORT_SYMBOL_GPL(unregister_timer_hook);
|
||||
EXPORT_SYMBOL_GPL(task_handoff_register);
|
||||
EXPORT_SYMBOL_GPL(task_handoff_unregister);
|
||||
EXPORT_SYMBOL_GPL(profile_event_register);
|
||||
EXPORT_SYMBOL_GPL(profile_event_unregister);
|
||||
|
||||
#endif /* CONFIG_PROFILING */
|
||||
|
||||
@@ -366,7 +364,7 @@ static int __devinit profile_cpu_callback(struct notifier_block *info,
|
||||
per_cpu(cpu_profile_hits, cpu)[0] = page_address(page);
|
||||
}
|
||||
break;
|
||||
out_free:
|
||||
out_free:
|
||||
page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
|
||||
per_cpu(cpu_profile_hits, cpu)[1] = NULL;
|
||||
__free_page(page);
|
||||
@@ -409,7 +407,6 @@ void profile_hits(int type, void *__pc, unsigned int nr_hits)
|
||||
atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]);
|
||||
}
|
||||
#endif /* !CONFIG_SMP */
|
||||
|
||||
EXPORT_SYMBOL_GPL(profile_hits);
|
||||
|
||||
void profile_tick(int type)
|
||||
@@ -427,7 +424,7 @@ void profile_tick(int type)
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
|
||||
static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
|
||||
int count, int *eof, void *data)
|
||||
{
|
||||
int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
|
||||
@@ -437,8 +434,8 @@ static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
|
||||
return len;
|
||||
}
|
||||
|
||||
static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffer,
|
||||
unsigned long count, void *data)
|
||||
static int prof_cpu_mask_write_proc(struct file *file,
|
||||
const char __user *buffer, unsigned long count, void *data)
|
||||
{
|
||||
cpumask_t *mask = (cpumask_t *)data;
|
||||
unsigned long full_count = count, err;
|
||||
@@ -457,7 +454,8 @@ void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir)
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
/* create /proc/irq/prof_cpu_mask */
|
||||
if (!(entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir)))
|
||||
entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
|
||||
if (!entry)
|
||||
return;
|
||||
entry->data = (void *)&prof_cpu_mask;
|
||||
entry->read_proc = prof_cpu_mask_read_proc;
|
||||
@@ -475,7 +473,7 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
|
||||
{
|
||||
unsigned long p = *ppos;
|
||||
ssize_t read;
|
||||
char * pnt;
|
||||
char *pnt;
|
||||
unsigned int sample_step = 1 << prof_shift;
|
||||
|
||||
profile_flip_buffers();
|
||||
@@ -486,12 +484,12 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
|
||||
read = 0;
|
||||
|
||||
while (p < sizeof(unsigned int) && count > 0) {
|
||||
if (put_user(*((char *)(&sample_step)+p),buf))
|
||||
if (put_user(*((char *)(&sample_step)+p), buf))
|
||||
return -EFAULT;
|
||||
buf++; p++; count--; read++;
|
||||
}
|
||||
pnt = (char *)prof_buffer + p - sizeof(atomic_t);
|
||||
if (copy_to_user(buf,(void *)pnt,count))
|
||||
if (copy_to_user(buf, (void *)pnt, count))
|
||||
return -EFAULT;
|
||||
read += count;
|
||||
*ppos += read;
|
||||
@@ -508,7 +506,7 @@ static ssize_t write_profile(struct file *file, const char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
extern int setup_profiling_timer (unsigned int multiplier);
|
||||
extern int setup_profiling_timer(unsigned int multiplier);
|
||||
|
||||
if (count == sizeof(int)) {
|
||||
unsigned int multiplier;
|
||||
@@ -591,7 +589,8 @@ static int __init create_proc_profile(void)
|
||||
return 0;
|
||||
if (create_hash_tables())
|
||||
return -1;
|
||||
if (!(entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL)))
|
||||
entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL);
|
||||
if (!entry)
|
||||
return 0;
|
||||
entry->proc_fops = &proc_profile_operations;
|
||||
entry->size = (1+prof_len) * sizeof(atomic_t);
|
||||
|
||||
167
kernel/ptrace.c
167
kernel/ptrace.c
@@ -366,12 +366,73 @@ static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data)
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
#ifdef PTRACE_SINGLESTEP
|
||||
#define is_singlestep(request) ((request) == PTRACE_SINGLESTEP)
|
||||
#else
|
||||
#define is_singlestep(request) 0
|
||||
#endif
|
||||
|
||||
#ifdef PTRACE_SINGLEBLOCK
|
||||
#define is_singleblock(request) ((request) == PTRACE_SINGLEBLOCK)
|
||||
#else
|
||||
#define is_singleblock(request) 0
|
||||
#endif
|
||||
|
||||
#ifdef PTRACE_SYSEMU
|
||||
#define is_sysemu_singlestep(request) ((request) == PTRACE_SYSEMU_SINGLESTEP)
|
||||
#else
|
||||
#define is_sysemu_singlestep(request) 0
|
||||
#endif
|
||||
|
||||
static int ptrace_resume(struct task_struct *child, long request, long data)
|
||||
{
|
||||
if (!valid_signal(data))
|
||||
return -EIO;
|
||||
|
||||
if (request == PTRACE_SYSCALL)
|
||||
set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
|
||||
else
|
||||
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
|
||||
|
||||
#ifdef TIF_SYSCALL_EMU
|
||||
if (request == PTRACE_SYSEMU || request == PTRACE_SYSEMU_SINGLESTEP)
|
||||
set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
|
||||
else
|
||||
clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
|
||||
#endif
|
||||
|
||||
if (is_singleblock(request)) {
|
||||
if (unlikely(!arch_has_block_step()))
|
||||
return -EIO;
|
||||
user_enable_block_step(child);
|
||||
} else if (is_singlestep(request) || is_sysemu_singlestep(request)) {
|
||||
if (unlikely(!arch_has_single_step()))
|
||||
return -EIO;
|
||||
user_enable_single_step(child);
|
||||
}
|
||||
else
|
||||
user_disable_single_step(child);
|
||||
|
||||
child->exit_code = data;
|
||||
wake_up_process(child);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ptrace_request(struct task_struct *child, long request,
|
||||
long addr, long data)
|
||||
{
|
||||
int ret = -EIO;
|
||||
|
||||
switch (request) {
|
||||
case PTRACE_PEEKTEXT:
|
||||
case PTRACE_PEEKDATA:
|
||||
return generic_ptrace_peekdata(child, addr, data);
|
||||
case PTRACE_POKETEXT:
|
||||
case PTRACE_POKEDATA:
|
||||
return generic_ptrace_pokedata(child, addr, data);
|
||||
|
||||
#ifdef PTRACE_OLDSETOPTIONS
|
||||
case PTRACE_OLDSETOPTIONS:
|
||||
#endif
|
||||
@@ -390,6 +451,26 @@ int ptrace_request(struct task_struct *child, long request,
|
||||
case PTRACE_DETACH: /* detach a process that was attached. */
|
||||
ret = ptrace_detach(child, data);
|
||||
break;
|
||||
|
||||
#ifdef PTRACE_SINGLESTEP
|
||||
case PTRACE_SINGLESTEP:
|
||||
#endif
|
||||
#ifdef PTRACE_SINGLEBLOCK
|
||||
case PTRACE_SINGLEBLOCK:
|
||||
#endif
|
||||
#ifdef PTRACE_SYSEMU
|
||||
case PTRACE_SYSEMU:
|
||||
case PTRACE_SYSEMU_SINGLESTEP:
|
||||
#endif
|
||||
case PTRACE_SYSCALL:
|
||||
case PTRACE_CONT:
|
||||
return ptrace_resume(child, request, data);
|
||||
|
||||
case PTRACE_KILL:
|
||||
if (child->exit_state) /* already dead */
|
||||
return 0;
|
||||
return ptrace_resume(child, request, SIGKILL);
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -470,6 +551,8 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
|
||||
lock_kernel();
|
||||
if (request == PTRACE_TRACEME) {
|
||||
ret = ptrace_traceme();
|
||||
if (!ret)
|
||||
arch_ptrace_attach(current);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -524,3 +607,87 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data)
|
||||
copied = access_process_vm(tsk, addr, &data, sizeof(data), 1);
|
||||
return (copied == sizeof(data)) ? 0 : -EIO;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
#include <linux/compat.h>
|
||||
|
||||
int compat_ptrace_request(struct task_struct *child, compat_long_t request,
|
||||
compat_ulong_t addr, compat_ulong_t data)
|
||||
{
|
||||
compat_ulong_t __user *datap = compat_ptr(data);
|
||||
compat_ulong_t word;
|
||||
int ret;
|
||||
|
||||
switch (request) {
|
||||
case PTRACE_PEEKTEXT:
|
||||
case PTRACE_PEEKDATA:
|
||||
ret = access_process_vm(child, addr, &word, sizeof(word), 0);
|
||||
if (ret != sizeof(word))
|
||||
ret = -EIO;
|
||||
else
|
||||
ret = put_user(word, datap);
|
||||
break;
|
||||
|
||||
case PTRACE_POKETEXT:
|
||||
case PTRACE_POKEDATA:
|
||||
ret = access_process_vm(child, addr, &data, sizeof(data), 1);
|
||||
ret = (ret != sizeof(data) ? -EIO : 0);
|
||||
break;
|
||||
|
||||
case PTRACE_GETEVENTMSG:
|
||||
ret = put_user((compat_ulong_t) child->ptrace_message, datap);
|
||||
break;
|
||||
|
||||
default:
|
||||
ret = ptrace_request(child, request, addr, data);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE
|
||||
asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
|
||||
compat_long_t addr, compat_long_t data)
|
||||
{
|
||||
struct task_struct *child;
|
||||
long ret;
|
||||
|
||||
/*
|
||||
* This lock_kernel fixes a subtle race with suid exec
|
||||
*/
|
||||
lock_kernel();
|
||||
if (request == PTRACE_TRACEME) {
|
||||
ret = ptrace_traceme();
|
||||
goto out;
|
||||
}
|
||||
|
||||
child = ptrace_get_task_struct(pid);
|
||||
if (IS_ERR(child)) {
|
||||
ret = PTR_ERR(child);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (request == PTRACE_ATTACH) {
|
||||
ret = ptrace_attach(child);
|
||||
/*
|
||||
* Some architectures need to do book-keeping after
|
||||
* a ptrace attach.
|
||||
*/
|
||||
if (!ret)
|
||||
arch_ptrace_attach(child);
|
||||
goto out_put_task_struct;
|
||||
}
|
||||
|
||||
ret = ptrace_check_attach(child, request == PTRACE_KILL);
|
||||
if (!ret)
|
||||
ret = compat_arch_ptrace(child, request, addr, data);
|
||||
|
||||
out_put_task_struct:
|
||||
put_task_struct(child);
|
||||
out:
|
||||
unlock_kernel();
|
||||
return ret;
|
||||
}
|
||||
#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */
|
||||
|
||||
#endif /* CONFIG_COMPAT */
|
||||
|
||||
575
kernel/rcuclassic.c
Normal file
575
kernel/rcuclassic.c
Normal file
@@ -0,0 +1,575 @@
|
||||
/*
|
||||
* Read-Copy Update mechanism for mutual exclusion
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright IBM Corporation, 2001
|
||||
*
|
||||
* Authors: Dipankar Sarma <dipankar@in.ibm.com>
|
||||
* Manfred Spraul <manfred@colorfullife.com>
|
||||
*
|
||||
* Based on the original work by Paul McKenney <paulmck@us.ibm.com>
|
||||
* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
|
||||
* Papers:
|
||||
* http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
|
||||
* http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
|
||||
*
|
||||
* For detailed explanation of Read-Copy Update mechanism see -
|
||||
* Documentation/RCU
|
||||
*
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
static struct lock_class_key rcu_lock_key;
|
||||
struct lockdep_map rcu_lock_map =
|
||||
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
|
||||
EXPORT_SYMBOL_GPL(rcu_lock_map);
|
||||
#endif
|
||||
|
||||
|
||||
/* Definition for rcupdate control block. */
|
||||
static struct rcu_ctrlblk rcu_ctrlblk = {
|
||||
.cur = -300,
|
||||
.completed = -300,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
|
||||
.cpumask = CPU_MASK_NONE,
|
||||
};
|
||||
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
|
||||
.cur = -300,
|
||||
.completed = -300,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
|
||||
.cpumask = CPU_MASK_NONE,
|
||||
};
|
||||
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
|
||||
|
||||
static int blimit = 10;
|
||||
static int qhimark = 10000;
|
||||
static int qlowmark = 100;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void force_quiescent_state(struct rcu_data *rdp,
|
||||
struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
int cpu;
|
||||
cpumask_t cpumask;
|
||||
set_need_resched();
|
||||
if (unlikely(!rcp->signaled)) {
|
||||
rcp->signaled = 1;
|
||||
/*
|
||||
* Don't send IPI to itself. With irqs disabled,
|
||||
* rdp->cpu is the current cpu.
|
||||
*/
|
||||
cpumask = rcp->cpumask;
|
||||
cpu_clear(rdp->cpu, cpumask);
|
||||
for_each_cpu_mask(cpu, cpumask)
|
||||
smp_send_reschedule(cpu);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void force_quiescent_state(struct rcu_data *rdp,
|
||||
struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
set_need_resched();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* call_rcu - Queue an RCU callback for invocation after a grace period.
|
||||
* @head: structure to be used for queueing the RCU updates.
|
||||
* @func: actual update function to be invoked after the grace period
|
||||
*
|
||||
* The update function will be invoked some time after a full grace
|
||||
* period elapses, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. RCU read-side critical
|
||||
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
|
||||
* and may be nested.
|
||||
*/
|
||||
void call_rcu(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
rdp = &__get_cpu_var(rcu_data);
|
||||
*rdp->nxttail = head;
|
||||
rdp->nxttail = &head->next;
|
||||
if (unlikely(++rdp->qlen > qhimark)) {
|
||||
rdp->blimit = INT_MAX;
|
||||
force_quiescent_state(rdp, &rcu_ctrlblk);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(call_rcu);
|
||||
|
||||
/**
|
||||
* call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
|
||||
* @head: structure to be used for queueing the RCU updates.
|
||||
* @func: actual update function to be invoked after the grace period
|
||||
*
|
||||
* The update function will be invoked some time after a full grace
|
||||
* period elapses, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. call_rcu_bh() assumes
|
||||
* that the read-side critical sections end on completion of a softirq
|
||||
* handler. This means that read-side critical sections in process
|
||||
* context must not be interrupted by softirqs. This interface is to be
|
||||
* used when most of the read-side critical sections are in softirq context.
|
||||
* RCU read-side critical sections are delimited by rcu_read_lock() and
|
||||
* rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
|
||||
* and rcu_read_unlock_bh(), if in process context. These may be nested.
|
||||
*/
|
||||
void call_rcu_bh(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
rdp = &__get_cpu_var(rcu_bh_data);
|
||||
*rdp->nxttail = head;
|
||||
rdp->nxttail = &head->next;
|
||||
|
||||
if (unlikely(++rdp->qlen > qhimark)) {
|
||||
rdp->blimit = INT_MAX;
|
||||
force_quiescent_state(rdp, &rcu_bh_ctrlblk);
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(call_rcu_bh);
|
||||
|
||||
/*
|
||||
* Return the number of RCU batches processed thus far. Useful
|
||||
* for debug and statistics.
|
||||
*/
|
||||
long rcu_batches_completed(void)
|
||||
{
|
||||
return rcu_ctrlblk.completed;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_batches_completed);
|
||||
|
||||
/*
|
||||
* Return the number of RCU batches processed thus far. Useful
|
||||
* for debug and statistics.
|
||||
*/
|
||||
long rcu_batches_completed_bh(void)
|
||||
{
|
||||
return rcu_bh_ctrlblk.completed;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
|
||||
|
||||
/* Raises the softirq for processing rcu_callbacks. */
|
||||
static inline void raise_rcu_softirq(void)
|
||||
{
|
||||
raise_softirq(RCU_SOFTIRQ);
|
||||
/*
|
||||
* The smp_mb() here is required to ensure that this cpu's
|
||||
* __rcu_process_callbacks() reads the most recently updated
|
||||
* value of rcu->cur.
|
||||
*/
|
||||
smp_mb();
|
||||
}
|
||||
|
||||
/*
|
||||
* Invoke the completed RCU callbacks. They are expected to be in
|
||||
* a per-cpu list.
|
||||
*/
|
||||
static void rcu_do_batch(struct rcu_data *rdp)
|
||||
{
|
||||
struct rcu_head *next, *list;
|
||||
int count = 0;
|
||||
|
||||
list = rdp->donelist;
|
||||
while (list) {
|
||||
next = list->next;
|
||||
prefetch(next);
|
||||
list->func(list);
|
||||
list = next;
|
||||
if (++count >= rdp->blimit)
|
||||
break;
|
||||
}
|
||||
rdp->donelist = list;
|
||||
|
||||
local_irq_disable();
|
||||
rdp->qlen -= count;
|
||||
local_irq_enable();
|
||||
if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
|
||||
rdp->blimit = blimit;
|
||||
|
||||
if (!rdp->donelist)
|
||||
rdp->donetail = &rdp->donelist;
|
||||
else
|
||||
raise_rcu_softirq();
|
||||
}
|
||||
|
||||
/*
|
||||
* Grace period handling:
|
||||
* The grace period handling consists out of two steps:
|
||||
* - A new grace period is started.
|
||||
* This is done by rcu_start_batch. The start is not broadcasted to
|
||||
* all cpus, they must pick this up by comparing rcp->cur with
|
||||
* rdp->quiescbatch. All cpus are recorded in the
|
||||
* rcu_ctrlblk.cpumask bitmap.
|
||||
* - All cpus must go through a quiescent state.
|
||||
* Since the start of the grace period is not broadcasted, at least two
|
||||
* calls to rcu_check_quiescent_state are required:
|
||||
* The first call just notices that a new grace period is running. The
|
||||
* following calls check if there was a quiescent state since the beginning
|
||||
* of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
|
||||
* the bitmap is empty, then the grace period is completed.
|
||||
* rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
|
||||
* period (if necessary).
|
||||
*/
|
||||
/*
|
||||
* Register a new batch of callbacks, and start it up if there is currently no
|
||||
* active batch and the batch to be registered has not already occurred.
|
||||
* Caller must hold rcu_ctrlblk.lock.
|
||||
*/
|
||||
static void rcu_start_batch(struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
if (rcp->next_pending &&
|
||||
rcp->completed == rcp->cur) {
|
||||
rcp->next_pending = 0;
|
||||
/*
|
||||
* next_pending == 0 must be visible in
|
||||
* __rcu_process_callbacks() before it can see new value of cur.
|
||||
*/
|
||||
smp_wmb();
|
||||
rcp->cur++;
|
||||
|
||||
/*
|
||||
* Accessing nohz_cpu_mask before incrementing rcp->cur needs a
|
||||
* Barrier Otherwise it can cause tickless idle CPUs to be
|
||||
* included in rcp->cpumask, which will extend graceperiods
|
||||
* unnecessarily.
|
||||
*/
|
||||
smp_mb();
|
||||
cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
|
||||
|
||||
rcp->signaled = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* cpu went through a quiescent state since the beginning of the grace period.
|
||||
* Clear it from the cpu mask and complete the grace period if it was the last
|
||||
* cpu. Start another grace period if someone has further entries pending
|
||||
*/
|
||||
static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
cpu_clear(cpu, rcp->cpumask);
|
||||
if (cpus_empty(rcp->cpumask)) {
|
||||
/* batch completed ! */
|
||||
rcp->completed = rcp->cur;
|
||||
rcu_start_batch(rcp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the cpu has gone through a quiescent state (say context
|
||||
* switch). If so and if it already hasn't done so in this RCU
|
||||
* quiescent cycle, then indicate that it has done so.
|
||||
*/
|
||||
static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
if (rdp->quiescbatch != rcp->cur) {
|
||||
/* start new grace period: */
|
||||
rdp->qs_pending = 1;
|
||||
rdp->passed_quiesc = 0;
|
||||
rdp->quiescbatch = rcp->cur;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Grace period already completed for this cpu?
|
||||
* qs_pending is checked instead of the actual bitmap to avoid
|
||||
* cacheline trashing.
|
||||
*/
|
||||
if (!rdp->qs_pending)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Was there a quiescent state since the beginning of the grace
|
||||
* period? If no, then exit and wait for the next call.
|
||||
*/
|
||||
if (!rdp->passed_quiesc)
|
||||
return;
|
||||
rdp->qs_pending = 0;
|
||||
|
||||
spin_lock(&rcp->lock);
|
||||
/*
|
||||
* rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
|
||||
* during cpu startup. Ignore the quiescent state.
|
||||
*/
|
||||
if (likely(rdp->quiescbatch == rcp->cur))
|
||||
cpu_quiet(rdp->cpu, rcp);
|
||||
|
||||
spin_unlock(&rcp->lock);
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
||||
/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
|
||||
* locking requirements, the list it's pulling from has to belong to a cpu
|
||||
* which is dead and hence not processing interrupts.
|
||||
*/
|
||||
static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
|
||||
struct rcu_head **tail)
|
||||
{
|
||||
local_irq_disable();
|
||||
*this_rdp->nxttail = list;
|
||||
if (list)
|
||||
this_rdp->nxttail = tail;
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
static void __rcu_offline_cpu(struct rcu_data *this_rdp,
|
||||
struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
|
||||
{
|
||||
/* if the cpu going offline owns the grace period
|
||||
* we can block indefinitely waiting for it, so flush
|
||||
* it here
|
||||
*/
|
||||
spin_lock_bh(&rcp->lock);
|
||||
if (rcp->cur != rcp->completed)
|
||||
cpu_quiet(rdp->cpu, rcp);
|
||||
spin_unlock_bh(&rcp->lock);
|
||||
rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
|
||||
rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
|
||||
rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
|
||||
}
|
||||
|
||||
static void rcu_offline_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
|
||||
struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
|
||||
|
||||
__rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
|
||||
&per_cpu(rcu_data, cpu));
|
||||
__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
|
||||
&per_cpu(rcu_bh_data, cpu));
|
||||
put_cpu_var(rcu_data);
|
||||
put_cpu_var(rcu_bh_data);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void rcu_offline_cpu(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This does the RCU processing work from softirq context.
|
||||
*/
|
||||
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
|
||||
*rdp->donetail = rdp->curlist;
|
||||
rdp->donetail = rdp->curtail;
|
||||
rdp->curlist = NULL;
|
||||
rdp->curtail = &rdp->curlist;
|
||||
}
|
||||
|
||||
if (rdp->nxtlist && !rdp->curlist) {
|
||||
local_irq_disable();
|
||||
rdp->curlist = rdp->nxtlist;
|
||||
rdp->curtail = rdp->nxttail;
|
||||
rdp->nxtlist = NULL;
|
||||
rdp->nxttail = &rdp->nxtlist;
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* start the next batch of callbacks
|
||||
*/
|
||||
|
||||
/* determine batch number */
|
||||
rdp->batch = rcp->cur + 1;
|
||||
/* see the comment and corresponding wmb() in
|
||||
* the rcu_start_batch()
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
if (!rcp->next_pending) {
|
||||
/* and start it/schedule start if it's a new batch */
|
||||
spin_lock(&rcp->lock);
|
||||
rcp->next_pending = 1;
|
||||
rcu_start_batch(rcp);
|
||||
spin_unlock(&rcp->lock);
|
||||
}
|
||||
}
|
||||
|
||||
rcu_check_quiescent_state(rcp, rdp);
|
||||
if (rdp->donelist)
|
||||
rcu_do_batch(rdp);
|
||||
}
|
||||
|
||||
static void rcu_process_callbacks(struct softirq_action *unused)
|
||||
{
|
||||
__rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
|
||||
__rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
|
||||
}
|
||||
|
||||
static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
|
||||
{
|
||||
/* This cpu has pending rcu entries and the grace period
|
||||
* for them has completed.
|
||||
*/
|
||||
if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
|
||||
return 1;
|
||||
|
||||
/* This cpu has no pending entries, but there are new entries */
|
||||
if (!rdp->curlist && rdp->nxtlist)
|
||||
return 1;
|
||||
|
||||
/* This cpu has finished callbacks to invoke */
|
||||
if (rdp->donelist)
|
||||
return 1;
|
||||
|
||||
/* The rcu core waits for a quiescent state from the cpu */
|
||||
if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
|
||||
return 1;
|
||||
|
||||
/* nothing to do */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if there is any immediate RCU-related work to be done
|
||||
* by the current CPU, returning 1 if so. This function is part of the
|
||||
* RCU implementation; it is -not- an exported member of the RCU API.
|
||||
*/
|
||||
int rcu_pending(int cpu)
|
||||
{
|
||||
return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
|
||||
__rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if any future RCU-related work will need to be done
|
||||
* by the current CPU, even if none need be done immediately, returning
|
||||
* 1 if so. This function is part of the RCU implementation; it is -not-
|
||||
* an exported member of the RCU API.
|
||||
*/
|
||||
int rcu_needs_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
|
||||
|
||||
return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
|
||||
}
|
||||
|
||||
void rcu_check_callbacks(int cpu, int user)
|
||||
{
|
||||
if (user ||
|
||||
(idle_cpu(cpu) && !in_softirq() &&
|
||||
hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
|
||||
rcu_qsctr_inc(cpu);
|
||||
rcu_bh_qsctr_inc(cpu);
|
||||
} else if (!in_softirq())
|
||||
rcu_bh_qsctr_inc(cpu);
|
||||
raise_rcu_softirq();
|
||||
}
|
||||
|
||||
static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
memset(rdp, 0, sizeof(*rdp));
|
||||
rdp->curtail = &rdp->curlist;
|
||||
rdp->nxttail = &rdp->nxtlist;
|
||||
rdp->donetail = &rdp->donelist;
|
||||
rdp->quiescbatch = rcp->completed;
|
||||
rdp->qs_pending = 0;
|
||||
rdp->cpu = cpu;
|
||||
rdp->blimit = blimit;
|
||||
}
|
||||
|
||||
static void __cpuinit rcu_online_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
|
||||
|
||||
rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
|
||||
rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
|
||||
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
|
||||
}
|
||||
|
||||
static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
long cpu = (long)hcpu;
|
||||
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
case CPU_UP_PREPARE_FROZEN:
|
||||
rcu_online_cpu(cpu);
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
rcu_offline_cpu(cpu);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __cpuinitdata rcu_nb = {
|
||||
.notifier_call = rcu_cpu_notify,
|
||||
};
|
||||
|
||||
/*
|
||||
* Initializes rcu mechanism. Assumed to be called early.
|
||||
* That is before local timer(SMP) or jiffie timer (uniproc) is setup.
|
||||
* Note that rcu_qsctr and friends are implicitly
|
||||
* initialized due to the choice of ``0'' for RCU_CTR_INVALID.
|
||||
*/
|
||||
void __init __rcu_init(void)
|
||||
{
|
||||
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
|
||||
(void *)(long)smp_processor_id());
|
||||
/* Register notifier for non-boot CPUs */
|
||||
register_cpu_notifier(&rcu_nb);
|
||||
}
|
||||
|
||||
module_param(blimit, int, 0);
|
||||
module_param(qhimark, int, 0);
|
||||
module_param(qlowmark, int, 0);
|
||||
@@ -15,7 +15,7 @@
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2001
|
||||
* Copyright IBM Corporation, 2001
|
||||
*
|
||||
* Authors: Dipankar Sarma <dipankar@in.ibm.com>
|
||||
* Manfred Spraul <manfred@colorfullife.com>
|
||||
@@ -35,572 +35,27 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
static struct lock_class_key rcu_lock_key;
|
||||
struct lockdep_map rcu_lock_map =
|
||||
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
|
||||
|
||||
EXPORT_SYMBOL_GPL(rcu_lock_map);
|
||||
#endif
|
||||
|
||||
/* Definition for rcupdate control block. */
|
||||
static struct rcu_ctrlblk rcu_ctrlblk = {
|
||||
.cur = -300,
|
||||
.completed = -300,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
|
||||
.cpumask = CPU_MASK_NONE,
|
||||
};
|
||||
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
|
||||
.cur = -300,
|
||||
.completed = -300,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
|
||||
.cpumask = CPU_MASK_NONE,
|
||||
};
|
||||
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
|
||||
|
||||
/* Fake initialization required by compiler */
|
||||
static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
|
||||
static int blimit = 10;
|
||||
static int qhimark = 10000;
|
||||
static int qlowmark = 100;
|
||||
|
||||
static atomic_t rcu_barrier_cpu_count;
|
||||
static DEFINE_MUTEX(rcu_barrier_mutex);
|
||||
static struct completion rcu_barrier_completion;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void force_quiescent_state(struct rcu_data *rdp,
|
||||
struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
int cpu;
|
||||
cpumask_t cpumask;
|
||||
set_need_resched();
|
||||
if (unlikely(!rcp->signaled)) {
|
||||
rcp->signaled = 1;
|
||||
/*
|
||||
* Don't send IPI to itself. With irqs disabled,
|
||||
* rdp->cpu is the current cpu.
|
||||
*/
|
||||
cpumask = rcp->cpumask;
|
||||
cpu_clear(rdp->cpu, cpumask);
|
||||
for_each_cpu_mask(cpu, cpumask)
|
||||
smp_send_reschedule(cpu);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void force_quiescent_state(struct rcu_data *rdp,
|
||||
struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
set_need_resched();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* call_rcu - Queue an RCU callback for invocation after a grace period.
|
||||
* @head: structure to be used for queueing the RCU updates.
|
||||
* @func: actual update function to be invoked after the grace period
|
||||
*
|
||||
* The update function will be invoked some time after a full grace
|
||||
* period elapses, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. RCU read-side critical
|
||||
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
|
||||
* and may be nested.
|
||||
*/
|
||||
void fastcall call_rcu(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
rdp = &__get_cpu_var(rcu_data);
|
||||
*rdp->nxttail = head;
|
||||
rdp->nxttail = &head->next;
|
||||
if (unlikely(++rdp->qlen > qhimark)) {
|
||||
rdp->blimit = INT_MAX;
|
||||
force_quiescent_state(rdp, &rcu_ctrlblk);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
|
||||
* @head: structure to be used for queueing the RCU updates.
|
||||
* @func: actual update function to be invoked after the grace period
|
||||
*
|
||||
* The update function will be invoked some time after a full grace
|
||||
* period elapses, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. call_rcu_bh() assumes
|
||||
* that the read-side critical sections end on completion of a softirq
|
||||
* handler. This means that read-side critical sections in process
|
||||
* context must not be interrupted by softirqs. This interface is to be
|
||||
* used when most of the read-side critical sections are in softirq context.
|
||||
* RCU read-side critical sections are delimited by rcu_read_lock() and
|
||||
* rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
|
||||
* and rcu_read_unlock_bh(), if in process context. These may be nested.
|
||||
*/
|
||||
void fastcall call_rcu_bh(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
rdp = &__get_cpu_var(rcu_bh_data);
|
||||
*rdp->nxttail = head;
|
||||
rdp->nxttail = &head->next;
|
||||
|
||||
if (unlikely(++rdp->qlen > qhimark)) {
|
||||
rdp->blimit = INT_MAX;
|
||||
force_quiescent_state(rdp, &rcu_bh_ctrlblk);
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of RCU batches processed thus far. Useful
|
||||
* for debug and statistics.
|
||||
*/
|
||||
long rcu_batches_completed(void)
|
||||
{
|
||||
return rcu_ctrlblk.completed;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of RCU batches processed thus far. Useful
|
||||
* for debug and statistics.
|
||||
*/
|
||||
long rcu_batches_completed_bh(void)
|
||||
{
|
||||
return rcu_bh_ctrlblk.completed;
|
||||
}
|
||||
|
||||
static void rcu_barrier_callback(struct rcu_head *notused)
|
||||
{
|
||||
if (atomic_dec_and_test(&rcu_barrier_cpu_count))
|
||||
complete(&rcu_barrier_completion);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with preemption disabled, and from cross-cpu IRQ context.
|
||||
*/
|
||||
static void rcu_barrier_func(void *notused)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
struct rcu_head *head;
|
||||
|
||||
head = &rdp->barrier;
|
||||
atomic_inc(&rcu_barrier_cpu_count);
|
||||
call_rcu(head, rcu_barrier_callback);
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_barrier - Wait until all the in-flight RCUs are complete.
|
||||
*/
|
||||
void rcu_barrier(void)
|
||||
{
|
||||
BUG_ON(in_interrupt());
|
||||
/* Take cpucontrol mutex to protect against CPU hotplug */
|
||||
mutex_lock(&rcu_barrier_mutex);
|
||||
init_completion(&rcu_barrier_completion);
|
||||
atomic_set(&rcu_barrier_cpu_count, 0);
|
||||
on_each_cpu(rcu_barrier_func, NULL, 0, 1);
|
||||
wait_for_completion(&rcu_barrier_completion);
|
||||
mutex_unlock(&rcu_barrier_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier);
|
||||
|
||||
/*
|
||||
* Invoke the completed RCU callbacks. They are expected to be in
|
||||
* a per-cpu list.
|
||||
*/
|
||||
static void rcu_do_batch(struct rcu_data *rdp)
|
||||
{
|
||||
struct rcu_head *next, *list;
|
||||
int count = 0;
|
||||
|
||||
list = rdp->donelist;
|
||||
while (list) {
|
||||
next = list->next;
|
||||
prefetch(next);
|
||||
list->func(list);
|
||||
list = next;
|
||||
if (++count >= rdp->blimit)
|
||||
break;
|
||||
}
|
||||
rdp->donelist = list;
|
||||
|
||||
local_irq_disable();
|
||||
rdp->qlen -= count;
|
||||
local_irq_enable();
|
||||
if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
|
||||
rdp->blimit = blimit;
|
||||
|
||||
if (!rdp->donelist)
|
||||
rdp->donetail = &rdp->donelist;
|
||||
else
|
||||
tasklet_schedule(&per_cpu(rcu_tasklet, rdp->cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* Grace period handling:
|
||||
* The grace period handling consists out of two steps:
|
||||
* - A new grace period is started.
|
||||
* This is done by rcu_start_batch. The start is not broadcasted to
|
||||
* all cpus, they must pick this up by comparing rcp->cur with
|
||||
* rdp->quiescbatch. All cpus are recorded in the
|
||||
* rcu_ctrlblk.cpumask bitmap.
|
||||
* - All cpus must go through a quiescent state.
|
||||
* Since the start of the grace period is not broadcasted, at least two
|
||||
* calls to rcu_check_quiescent_state are required:
|
||||
* The first call just notices that a new grace period is running. The
|
||||
* following calls check if there was a quiescent state since the beginning
|
||||
* of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
|
||||
* the bitmap is empty, then the grace period is completed.
|
||||
* rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
|
||||
* period (if necessary).
|
||||
*/
|
||||
/*
|
||||
* Register a new batch of callbacks, and start it up if there is currently no
|
||||
* active batch and the batch to be registered has not already occurred.
|
||||
* Caller must hold rcu_ctrlblk.lock.
|
||||
*/
|
||||
static void rcu_start_batch(struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
if (rcp->next_pending &&
|
||||
rcp->completed == rcp->cur) {
|
||||
rcp->next_pending = 0;
|
||||
/*
|
||||
* next_pending == 0 must be visible in
|
||||
* __rcu_process_callbacks() before it can see new value of cur.
|
||||
*/
|
||||
smp_wmb();
|
||||
rcp->cur++;
|
||||
|
||||
/*
|
||||
* Accessing nohz_cpu_mask before incrementing rcp->cur needs a
|
||||
* Barrier Otherwise it can cause tickless idle CPUs to be
|
||||
* included in rcp->cpumask, which will extend graceperiods
|
||||
* unnecessarily.
|
||||
*/
|
||||
smp_mb();
|
||||
cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
|
||||
|
||||
rcp->signaled = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* cpu went through a quiescent state since the beginning of the grace period.
|
||||
* Clear it from the cpu mask and complete the grace period if it was the last
|
||||
* cpu. Start another grace period if someone has further entries pending
|
||||
*/
|
||||
static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
cpu_clear(cpu, rcp->cpumask);
|
||||
if (cpus_empty(rcp->cpumask)) {
|
||||
/* batch completed ! */
|
||||
rcp->completed = rcp->cur;
|
||||
rcu_start_batch(rcp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the cpu has gone through a quiescent state (say context
|
||||
* switch). If so and if it already hasn't done so in this RCU
|
||||
* quiescent cycle, then indicate that it has done so.
|
||||
*/
|
||||
static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
if (rdp->quiescbatch != rcp->cur) {
|
||||
/* start new grace period: */
|
||||
rdp->qs_pending = 1;
|
||||
rdp->passed_quiesc = 0;
|
||||
rdp->quiescbatch = rcp->cur;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Grace period already completed for this cpu?
|
||||
* qs_pending is checked instead of the actual bitmap to avoid
|
||||
* cacheline trashing.
|
||||
*/
|
||||
if (!rdp->qs_pending)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Was there a quiescent state since the beginning of the grace
|
||||
* period? If no, then exit and wait for the next call.
|
||||
*/
|
||||
if (!rdp->passed_quiesc)
|
||||
return;
|
||||
rdp->qs_pending = 0;
|
||||
|
||||
spin_lock(&rcp->lock);
|
||||
/*
|
||||
* rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
|
||||
* during cpu startup. Ignore the quiescent state.
|
||||
*/
|
||||
if (likely(rdp->quiescbatch == rcp->cur))
|
||||
cpu_quiet(rdp->cpu, rcp);
|
||||
|
||||
spin_unlock(&rcp->lock);
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
||||
/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
|
||||
* locking requirements, the list it's pulling from has to belong to a cpu
|
||||
* which is dead and hence not processing interrupts.
|
||||
*/
|
||||
static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
|
||||
struct rcu_head **tail)
|
||||
{
|
||||
local_irq_disable();
|
||||
*this_rdp->nxttail = list;
|
||||
if (list)
|
||||
this_rdp->nxttail = tail;
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
static void __rcu_offline_cpu(struct rcu_data *this_rdp,
|
||||
struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
|
||||
{
|
||||
/* if the cpu going offline owns the grace period
|
||||
* we can block indefinitely waiting for it, so flush
|
||||
* it here
|
||||
*/
|
||||
spin_lock_bh(&rcp->lock);
|
||||
if (rcp->cur != rcp->completed)
|
||||
cpu_quiet(rdp->cpu, rcp);
|
||||
spin_unlock_bh(&rcp->lock);
|
||||
rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
|
||||
rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
|
||||
rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
|
||||
}
|
||||
|
||||
static void rcu_offline_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
|
||||
struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
|
||||
|
||||
__rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
|
||||
&per_cpu(rcu_data, cpu));
|
||||
__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
|
||||
&per_cpu(rcu_bh_data, cpu));
|
||||
put_cpu_var(rcu_data);
|
||||
put_cpu_var(rcu_bh_data);
|
||||
tasklet_kill_immediate(&per_cpu(rcu_tasklet, cpu), cpu);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void rcu_offline_cpu(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This does the RCU processing work from tasklet context.
|
||||
*/
|
||||
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
|
||||
*rdp->donetail = rdp->curlist;
|
||||
rdp->donetail = rdp->curtail;
|
||||
rdp->curlist = NULL;
|
||||
rdp->curtail = &rdp->curlist;
|
||||
}
|
||||
|
||||
if (rdp->nxtlist && !rdp->curlist) {
|
||||
local_irq_disable();
|
||||
rdp->curlist = rdp->nxtlist;
|
||||
rdp->curtail = rdp->nxttail;
|
||||
rdp->nxtlist = NULL;
|
||||
rdp->nxttail = &rdp->nxtlist;
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* start the next batch of callbacks
|
||||
*/
|
||||
|
||||
/* determine batch number */
|
||||
rdp->batch = rcp->cur + 1;
|
||||
/* see the comment and corresponding wmb() in
|
||||
* the rcu_start_batch()
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
if (!rcp->next_pending) {
|
||||
/* and start it/schedule start if it's a new batch */
|
||||
spin_lock(&rcp->lock);
|
||||
rcp->next_pending = 1;
|
||||
rcu_start_batch(rcp);
|
||||
spin_unlock(&rcp->lock);
|
||||
}
|
||||
}
|
||||
|
||||
rcu_check_quiescent_state(rcp, rdp);
|
||||
if (rdp->donelist)
|
||||
rcu_do_batch(rdp);
|
||||
}
|
||||
|
||||
static void rcu_process_callbacks(unsigned long unused)
|
||||
{
|
||||
__rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
|
||||
__rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
|
||||
}
|
||||
|
||||
static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
|
||||
{
|
||||
/* This cpu has pending rcu entries and the grace period
|
||||
* for them has completed.
|
||||
*/
|
||||
if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
|
||||
return 1;
|
||||
|
||||
/* This cpu has no pending entries, but there are new entries */
|
||||
if (!rdp->curlist && rdp->nxtlist)
|
||||
return 1;
|
||||
|
||||
/* This cpu has finished callbacks to invoke */
|
||||
if (rdp->donelist)
|
||||
return 1;
|
||||
|
||||
/* The rcu core waits for a quiescent state from the cpu */
|
||||
if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
|
||||
return 1;
|
||||
|
||||
/* nothing to do */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if there is any immediate RCU-related work to be done
|
||||
* by the current CPU, returning 1 if so. This function is part of the
|
||||
* RCU implementation; it is -not- an exported member of the RCU API.
|
||||
*/
|
||||
int rcu_pending(int cpu)
|
||||
{
|
||||
return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
|
||||
__rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if any future RCU-related work will need to be done
|
||||
* by the current CPU, even if none need be done immediately, returning
|
||||
* 1 if so. This function is part of the RCU implementation; it is -not-
|
||||
* an exported member of the RCU API.
|
||||
*/
|
||||
int rcu_needs_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
|
||||
|
||||
return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
|
||||
}
|
||||
|
||||
void rcu_check_callbacks(int cpu, int user)
|
||||
{
|
||||
if (user ||
|
||||
(idle_cpu(cpu) && !in_softirq() &&
|
||||
hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
|
||||
rcu_qsctr_inc(cpu);
|
||||
rcu_bh_qsctr_inc(cpu);
|
||||
} else if (!in_softirq())
|
||||
rcu_bh_qsctr_inc(cpu);
|
||||
tasklet_schedule(&per_cpu(rcu_tasklet, cpu));
|
||||
}
|
||||
|
||||
static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
memset(rdp, 0, sizeof(*rdp));
|
||||
rdp->curtail = &rdp->curlist;
|
||||
rdp->nxttail = &rdp->nxtlist;
|
||||
rdp->donetail = &rdp->donelist;
|
||||
rdp->quiescbatch = rcp->completed;
|
||||
rdp->qs_pending = 0;
|
||||
rdp->cpu = cpu;
|
||||
rdp->blimit = blimit;
|
||||
}
|
||||
|
||||
static void __cpuinit rcu_online_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
|
||||
|
||||
rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
|
||||
rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
|
||||
tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL);
|
||||
}
|
||||
|
||||
static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
long cpu = (long)hcpu;
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
case CPU_UP_PREPARE_FROZEN:
|
||||
rcu_online_cpu(cpu);
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
rcu_offline_cpu(cpu);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __cpuinitdata rcu_nb = {
|
||||
.notifier_call = rcu_cpu_notify,
|
||||
};
|
||||
|
||||
/*
|
||||
* Initializes rcu mechanism. Assumed to be called early.
|
||||
* That is before local timer(SMP) or jiffie timer (uniproc) is setup.
|
||||
* Note that rcu_qsctr and friends are implicitly
|
||||
* initialized due to the choice of ``0'' for RCU_CTR_INVALID.
|
||||
*/
|
||||
void __init rcu_init(void)
|
||||
{
|
||||
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
|
||||
(void *)(long)smp_processor_id());
|
||||
/* Register notifier for non-boot CPUs */
|
||||
register_cpu_notifier(&rcu_nb);
|
||||
}
|
||||
#include <linux/module.h>
|
||||
|
||||
struct rcu_synchronize {
|
||||
struct rcu_head head;
|
||||
struct completion completion;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
|
||||
static atomic_t rcu_barrier_cpu_count;
|
||||
static DEFINE_MUTEX(rcu_barrier_mutex);
|
||||
static struct completion rcu_barrier_completion;
|
||||
|
||||
/* Because of FASTCALL declaration of complete, we use this wrapper */
|
||||
static void wakeme_after_rcu(struct rcu_head *head)
|
||||
{
|
||||
@@ -618,9 +73,6 @@ static void wakeme_after_rcu(struct rcu_head *head)
|
||||
* read-side critical sections have completed. RCU read-side critical
|
||||
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
|
||||
* and may be nested.
|
||||
*
|
||||
* If your read-side code is not protected by rcu_read_lock(), do -not-
|
||||
* use synchronize_rcu().
|
||||
*/
|
||||
void synchronize_rcu(void)
|
||||
{
|
||||
@@ -633,12 +85,54 @@ void synchronize_rcu(void)
|
||||
/* Wait for it */
|
||||
wait_for_completion(&rcu.completion);
|
||||
}
|
||||
|
||||
module_param(blimit, int, 0);
|
||||
module_param(qhimark, int, 0);
|
||||
module_param(qlowmark, int, 0);
|
||||
EXPORT_SYMBOL_GPL(rcu_batches_completed);
|
||||
EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
|
||||
EXPORT_SYMBOL_GPL(call_rcu);
|
||||
EXPORT_SYMBOL_GPL(call_rcu_bh);
|
||||
EXPORT_SYMBOL_GPL(synchronize_rcu);
|
||||
|
||||
static void rcu_barrier_callback(struct rcu_head *notused)
|
||||
{
|
||||
if (atomic_dec_and_test(&rcu_barrier_cpu_count))
|
||||
complete(&rcu_barrier_completion);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with preemption disabled, and from cross-cpu IRQ context.
|
||||
*/
|
||||
static void rcu_barrier_func(void *notused)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
|
||||
|
||||
atomic_inc(&rcu_barrier_cpu_count);
|
||||
call_rcu(head, rcu_barrier_callback);
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_barrier - Wait until all the in-flight RCUs are complete.
|
||||
*/
|
||||
void rcu_barrier(void)
|
||||
{
|
||||
BUG_ON(in_interrupt());
|
||||
/* Take cpucontrol mutex to protect against CPU hotplug */
|
||||
mutex_lock(&rcu_barrier_mutex);
|
||||
init_completion(&rcu_barrier_completion);
|
||||
atomic_set(&rcu_barrier_cpu_count, 0);
|
||||
/*
|
||||
* The queueing of callbacks in all CPUs must be atomic with
|
||||
* respect to RCU, otherwise one CPU may queue a callback,
|
||||
* wait for a grace period, decrement barrier count and call
|
||||
* complete(), while other CPUs have not yet queued anything.
|
||||
* So, we need to make sure that grace periods cannot complete
|
||||
* until all the callbacks are queued.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
on_each_cpu(rcu_barrier_func, NULL, 0, 1);
|
||||
rcu_read_unlock();
|
||||
wait_for_completion(&rcu_barrier_completion);
|
||||
mutex_unlock(&rcu_barrier_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier);
|
||||
|
||||
void __init rcu_init(void)
|
||||
{
|
||||
__rcu_init();
|
||||
}
|
||||
|
||||
|
||||
953
kernel/rcupreempt.c
Normal file
953
kernel/rcupreempt.c
Normal file
@@ -0,0 +1,953 @@
|
||||
/*
|
||||
* Read-Copy Update mechanism for mutual exclusion, realtime implementation
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright IBM Corporation, 2006
|
||||
*
|
||||
* Authors: Paul E. McKenney <paulmck@us.ibm.com>
|
||||
* With thanks to Esben Nielsen, Bill Huey, and Ingo Molnar
|
||||
* for pushing me away from locks and towards counters, and
|
||||
* to Suparna Bhattacharya for pushing me completely away
|
||||
* from atomic instructions on the read side.
|
||||
*
|
||||
* Papers: http://www.rdrop.com/users/paulmck/RCU
|
||||
*
|
||||
* Design Document: http://lwn.net/Articles/253651/
|
||||
*
|
||||
* For detailed explanation of Read-Copy Update mechanism see -
|
||||
* Documentation/RCU/ *.txt
|
||||
*
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/byteorder/swabb.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/rcupreempt_trace.h>
|
||||
|
||||
/*
|
||||
* Macro that prevents the compiler from reordering accesses, but does
|
||||
* absolutely -nothing- to prevent CPUs from reordering. This is used
|
||||
* only to mediate communication between mainline code and hardware
|
||||
* interrupt and NMI handlers.
|
||||
*/
|
||||
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
|
||||
|
||||
/*
|
||||
* PREEMPT_RCU data structures.
|
||||
*/
|
||||
|
||||
/*
|
||||
* GP_STAGES specifies the number of times the state machine has
|
||||
* to go through the all the rcu_try_flip_states (see below)
|
||||
* in a single Grace Period.
|
||||
*
|
||||
* GP in GP_STAGES stands for Grace Period ;)
|
||||
*/
|
||||
#define GP_STAGES 2
|
||||
struct rcu_data {
|
||||
spinlock_t lock; /* Protect rcu_data fields. */
|
||||
long completed; /* Number of last completed batch. */
|
||||
int waitlistcount;
|
||||
struct tasklet_struct rcu_tasklet;
|
||||
struct rcu_head *nextlist;
|
||||
struct rcu_head **nexttail;
|
||||
struct rcu_head *waitlist[GP_STAGES];
|
||||
struct rcu_head **waittail[GP_STAGES];
|
||||
struct rcu_head *donelist;
|
||||
struct rcu_head **donetail;
|
||||
long rcu_flipctr[2];
|
||||
#ifdef CONFIG_RCU_TRACE
|
||||
struct rcupreempt_trace trace;
|
||||
#endif /* #ifdef CONFIG_RCU_TRACE */
|
||||
};
|
||||
|
||||
/*
|
||||
* States for rcu_try_flip() and friends.
|
||||
*/
|
||||
|
||||
enum rcu_try_flip_states {
|
||||
|
||||
/*
|
||||
* Stay here if nothing is happening. Flip the counter if somthing
|
||||
* starts happening. Denoted by "I"
|
||||
*/
|
||||
rcu_try_flip_idle_state,
|
||||
|
||||
/*
|
||||
* Wait here for all CPUs to notice that the counter has flipped. This
|
||||
* prevents the old set of counters from ever being incremented once
|
||||
* we leave this state, which in turn is necessary because we cannot
|
||||
* test any individual counter for zero -- we can only check the sum.
|
||||
* Denoted by "A".
|
||||
*/
|
||||
rcu_try_flip_waitack_state,
|
||||
|
||||
/*
|
||||
* Wait here for the sum of the old per-CPU counters to reach zero.
|
||||
* Denoted by "Z".
|
||||
*/
|
||||
rcu_try_flip_waitzero_state,
|
||||
|
||||
/*
|
||||
* Wait here for each of the other CPUs to execute a memory barrier.
|
||||
* This is necessary to ensure that these other CPUs really have
|
||||
* completed executing their RCU read-side critical sections, despite
|
||||
* their CPUs wildly reordering memory. Denoted by "M".
|
||||
*/
|
||||
rcu_try_flip_waitmb_state,
|
||||
};
|
||||
|
||||
struct rcu_ctrlblk {
|
||||
spinlock_t fliplock; /* Protect state-machine transitions. */
|
||||
long completed; /* Number of last completed batch. */
|
||||
enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
|
||||
the rcu state machine */
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct rcu_data, rcu_data);
|
||||
static struct rcu_ctrlblk rcu_ctrlblk = {
|
||||
.fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
|
||||
.completed = 0,
|
||||
.rcu_try_flip_state = rcu_try_flip_idle_state,
|
||||
};
|
||||
|
||||
|
||||
#ifdef CONFIG_RCU_TRACE
|
||||
static char *rcu_try_flip_state_names[] =
|
||||
{ "idle", "waitack", "waitzero", "waitmb" };
|
||||
#endif /* #ifdef CONFIG_RCU_TRACE */
|
||||
|
||||
static cpumask_t rcu_cpu_online_map __read_mostly = CPU_MASK_NONE;
|
||||
|
||||
/*
|
||||
* Enum and per-CPU flag to determine when each CPU has seen
|
||||
* the most recent counter flip.
|
||||
*/
|
||||
|
||||
enum rcu_flip_flag_values {
|
||||
rcu_flip_seen, /* Steady/initial state, last flip seen. */
|
||||
/* Only GP detector can update. */
|
||||
rcu_flipped /* Flip just completed, need confirmation. */
|
||||
/* Only corresponding CPU can update. */
|
||||
};
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_flip_flag_values, rcu_flip_flag)
|
||||
= rcu_flip_seen;
|
||||
|
||||
/*
|
||||
* Enum and per-CPU flag to determine when each CPU has executed the
|
||||
* needed memory barrier to fence in memory references from its last RCU
|
||||
* read-side critical section in the just-completed grace period.
|
||||
*/
|
||||
|
||||
enum rcu_mb_flag_values {
|
||||
rcu_mb_done, /* Steady/initial state, no mb()s required. */
|
||||
/* Only GP detector can update. */
|
||||
rcu_mb_needed /* Flip just completed, need an mb(). */
|
||||
/* Only corresponding CPU can update. */
|
||||
};
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
|
||||
= rcu_mb_done;
|
||||
|
||||
/*
|
||||
* RCU_DATA_ME: find the current CPU's rcu_data structure.
|
||||
* RCU_DATA_CPU: find the specified CPU's rcu_data structure.
|
||||
*/
|
||||
#define RCU_DATA_ME() (&__get_cpu_var(rcu_data))
|
||||
#define RCU_DATA_CPU(cpu) (&per_cpu(rcu_data, cpu))
|
||||
|
||||
/*
|
||||
* Helper macro for tracing when the appropriate rcu_data is not
|
||||
* cached in a local variable, but where the CPU number is so cached.
|
||||
*/
|
||||
#define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace));
|
||||
|
||||
/*
|
||||
* Helper macro for tracing when the appropriate rcu_data is not
|
||||
* cached in a local variable.
|
||||
*/
|
||||
#define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace));
|
||||
|
||||
/*
|
||||
* Helper macro for tracing when the appropriate rcu_data is pointed
|
||||
* to by a local variable.
|
||||
*/
|
||||
#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
|
||||
|
||||
/*
|
||||
* Return the number of RCU batches processed thus far. Useful
|
||||
* for debug and statistics.
|
||||
*/
|
||||
long rcu_batches_completed(void)
|
||||
{
|
||||
return rcu_ctrlblk.completed;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_batches_completed);
|
||||
|
||||
EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
|
||||
|
||||
void __rcu_read_lock(void)
|
||||
{
|
||||
int idx;
|
||||
struct task_struct *t = current;
|
||||
int nesting;
|
||||
|
||||
nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
|
||||
if (nesting != 0) {
|
||||
|
||||
/* An earlier rcu_read_lock() covers us, just count it. */
|
||||
|
||||
t->rcu_read_lock_nesting = nesting + 1;
|
||||
|
||||
} else {
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* We disable interrupts for the following reasons:
|
||||
* - If we get scheduling clock interrupt here, and we
|
||||
* end up acking the counter flip, it's like a promise
|
||||
* that we will never increment the old counter again.
|
||||
* Thus we will break that promise if that
|
||||
* scheduling clock interrupt happens between the time
|
||||
* we pick the .completed field and the time that we
|
||||
* increment our counter.
|
||||
*
|
||||
* - We don't want to be preempted out here.
|
||||
*
|
||||
* NMIs can still occur, of course, and might themselves
|
||||
* contain rcu_read_lock().
|
||||
*/
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
/*
|
||||
* Outermost nesting of rcu_read_lock(), so increment
|
||||
* the current counter for the current CPU. Use volatile
|
||||
* casts to prevent the compiler from reordering.
|
||||
*/
|
||||
|
||||
idx = ACCESS_ONCE(rcu_ctrlblk.completed) & 0x1;
|
||||
ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])++;
|
||||
|
||||
/*
|
||||
* Now that the per-CPU counter has been incremented, we
|
||||
* are protected from races with rcu_read_lock() invoked
|
||||
* from NMI handlers on this CPU. We can therefore safely
|
||||
* increment the nesting counter, relieving further NMIs
|
||||
* of the need to increment the per-CPU counter.
|
||||
*/
|
||||
|
||||
ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting + 1;
|
||||
|
||||
/*
|
||||
* Now that we have preventing any NMIs from storing
|
||||
* to the ->rcu_flipctr_idx, we can safely use it to
|
||||
* remember which counter to decrement in the matching
|
||||
* rcu_read_unlock().
|
||||
*/
|
||||
|
||||
ACCESS_ONCE(t->rcu_flipctr_idx) = idx;
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__rcu_read_lock);
|
||||
|
||||
void __rcu_read_unlock(void)
|
||||
{
|
||||
int idx;
|
||||
struct task_struct *t = current;
|
||||
int nesting;
|
||||
|
||||
nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
|
||||
if (nesting > 1) {
|
||||
|
||||
/*
|
||||
* We are still protected by the enclosing rcu_read_lock(),
|
||||
* so simply decrement the counter.
|
||||
*/
|
||||
|
||||
t->rcu_read_lock_nesting = nesting - 1;
|
||||
|
||||
} else {
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Disable local interrupts to prevent the grace-period
|
||||
* detection state machine from seeing us half-done.
|
||||
* NMIs can still occur, of course, and might themselves
|
||||
* contain rcu_read_lock() and rcu_read_unlock().
|
||||
*/
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
/*
|
||||
* Outermost nesting of rcu_read_unlock(), so we must
|
||||
* decrement the current counter for the current CPU.
|
||||
* This must be done carefully, because NMIs can
|
||||
* occur at any point in this code, and any rcu_read_lock()
|
||||
* and rcu_read_unlock() pairs in the NMI handlers
|
||||
* must interact non-destructively with this code.
|
||||
* Lots of volatile casts, and -very- careful ordering.
|
||||
*
|
||||
* Changes to this code, including this one, must be
|
||||
* inspected, validated, and tested extremely carefully!!!
|
||||
*/
|
||||
|
||||
/*
|
||||
* First, pick up the index.
|
||||
*/
|
||||
|
||||
idx = ACCESS_ONCE(t->rcu_flipctr_idx);
|
||||
|
||||
/*
|
||||
* Now that we have fetched the counter index, it is
|
||||
* safe to decrement the per-task RCU nesting counter.
|
||||
* After this, any interrupts or NMIs will increment and
|
||||
* decrement the per-CPU counters.
|
||||
*/
|
||||
ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting - 1;
|
||||
|
||||
/*
|
||||
* It is now safe to decrement this task's nesting count.
|
||||
* NMIs that occur after this statement will route their
|
||||
* rcu_read_lock() calls through this "else" clause, and
|
||||
* will thus start incrementing the per-CPU counter on
|
||||
* their own. They will also clobber ->rcu_flipctr_idx,
|
||||
* but that is OK, since we have already fetched it.
|
||||
*/
|
||||
|
||||
ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])--;
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
|
||||
|
||||
/*
|
||||
* If a global counter flip has occurred since the last time that we
|
||||
* advanced callbacks, advance them. Hardware interrupts must be
|
||||
* disabled when calling this function.
|
||||
*/
|
||||
static void __rcu_advance_callbacks(struct rcu_data *rdp)
|
||||
{
|
||||
int cpu;
|
||||
int i;
|
||||
int wlc = 0;
|
||||
|
||||
if (rdp->completed != rcu_ctrlblk.completed) {
|
||||
if (rdp->waitlist[GP_STAGES - 1] != NULL) {
|
||||
*rdp->donetail = rdp->waitlist[GP_STAGES - 1];
|
||||
rdp->donetail = rdp->waittail[GP_STAGES - 1];
|
||||
RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp);
|
||||
}
|
||||
for (i = GP_STAGES - 2; i >= 0; i--) {
|
||||
if (rdp->waitlist[i] != NULL) {
|
||||
rdp->waitlist[i + 1] = rdp->waitlist[i];
|
||||
rdp->waittail[i + 1] = rdp->waittail[i];
|
||||
wlc++;
|
||||
} else {
|
||||
rdp->waitlist[i + 1] = NULL;
|
||||
rdp->waittail[i + 1] =
|
||||
&rdp->waitlist[i + 1];
|
||||
}
|
||||
}
|
||||
if (rdp->nextlist != NULL) {
|
||||
rdp->waitlist[0] = rdp->nextlist;
|
||||
rdp->waittail[0] = rdp->nexttail;
|
||||
wlc++;
|
||||
rdp->nextlist = NULL;
|
||||
rdp->nexttail = &rdp->nextlist;
|
||||
RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp);
|
||||
} else {
|
||||
rdp->waitlist[0] = NULL;
|
||||
rdp->waittail[0] = &rdp->waitlist[0];
|
||||
}
|
||||
rdp->waitlistcount = wlc;
|
||||
rdp->completed = rcu_ctrlblk.completed;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if this CPU needs to report that it has seen
|
||||
* the most recent counter flip, thereby declaring that all
|
||||
* subsequent rcu_read_lock() invocations will respect this flip.
|
||||
*/
|
||||
|
||||
cpu = raw_smp_processor_id();
|
||||
if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
|
||||
smp_mb(); /* Subsequent counter accesses must see new value */
|
||||
per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
|
||||
smp_mb(); /* Subsequent RCU read-side critical sections */
|
||||
/* seen -after- acknowledgement. */
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Get here when RCU is idle. Decide whether we need to
|
||||
* move out of idle state, and return non-zero if so.
|
||||
* "Straightforward" approach for the moment, might later
|
||||
* use callback-list lengths, grace-period duration, or
|
||||
* some such to determine when to exit idle state.
|
||||
* Might also need a pre-idle test that does not acquire
|
||||
* the lock, but let's get the simple case working first...
|
||||
*/
|
||||
|
||||
static int
|
||||
rcu_try_flip_idle(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_i1);
|
||||
if (!rcu_pending(smp_processor_id())) {
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_ie1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do the flip.
|
||||
*/
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_g1);
|
||||
rcu_ctrlblk.completed++; /* stands in for rcu_try_flip_g2 */
|
||||
|
||||
/*
|
||||
* Need a memory barrier so that other CPUs see the new
|
||||
* counter value before they see the subsequent change of all
|
||||
* the rcu_flip_flag instances to rcu_flipped.
|
||||
*/
|
||||
|
||||
smp_mb(); /* see above block comment. */
|
||||
|
||||
/* Now ask each CPU for acknowledgement of the flip. */
|
||||
|
||||
for_each_cpu_mask(cpu, rcu_cpu_online_map)
|
||||
per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for CPUs to acknowledge the flip.
|
||||
*/
|
||||
|
||||
static int
|
||||
rcu_try_flip_waitack(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
|
||||
for_each_cpu_mask(cpu, rcu_cpu_online_map)
|
||||
if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure our checks above don't bleed into subsequent
|
||||
* waiting for the sum of the counters to reach zero.
|
||||
*/
|
||||
|
||||
smp_mb(); /* see above block comment. */
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_a2);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for collective ``last'' counter to reach zero,
|
||||
* then tell all CPUs to do an end-of-grace-period memory barrier.
|
||||
*/
|
||||
|
||||
static int
|
||||
rcu_try_flip_waitzero(void)
|
||||
{
|
||||
int cpu;
|
||||
int lastidx = !(rcu_ctrlblk.completed & 0x1);
|
||||
int sum = 0;
|
||||
|
||||
/* Check to see if the sum of the "last" counters is zero. */
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
|
||||
for_each_cpu_mask(cpu, rcu_cpu_online_map)
|
||||
sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
|
||||
if (sum != 0) {
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This ensures that the other CPUs see the call for
|
||||
* memory barriers -after- the sum to zero has been
|
||||
* detected here
|
||||
*/
|
||||
smp_mb(); /* ^^^^^^^^^^^^ */
|
||||
|
||||
/* Call for a memory barrier from each CPU. */
|
||||
for_each_cpu_mask(cpu, rcu_cpu_online_map)
|
||||
per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for all CPUs to do their end-of-grace-period memory barrier.
|
||||
* Return 0 once all CPUs have done so.
|
||||
*/
|
||||
|
||||
static int
|
||||
rcu_try_flip_waitmb(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
|
||||
for_each_cpu_mask(cpu, rcu_cpu_online_map)
|
||||
if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
smp_mb(); /* Ensure that the above checks precede any following flip. */
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_m2);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt a single flip of the counters. Remember, a single flip does
|
||||
* -not- constitute a grace period. Instead, the interval between
|
||||
* at least GP_STAGES consecutive flips is a grace period.
|
||||
*
|
||||
* If anyone is nuts enough to run this CONFIG_PREEMPT_RCU implementation
|
||||
* on a large SMP, they might want to use a hierarchical organization of
|
||||
* the per-CPU-counter pairs.
|
||||
*/
|
||||
static void rcu_try_flip(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_1);
|
||||
if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) {
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_e1);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take the next transition(s) through the RCU grace-period
|
||||
* flip-counter state machine.
|
||||
*/
|
||||
|
||||
switch (rcu_ctrlblk.rcu_try_flip_state) {
|
||||
case rcu_try_flip_idle_state:
|
||||
if (rcu_try_flip_idle())
|
||||
rcu_ctrlblk.rcu_try_flip_state =
|
||||
rcu_try_flip_waitack_state;
|
||||
break;
|
||||
case rcu_try_flip_waitack_state:
|
||||
if (rcu_try_flip_waitack())
|
||||
rcu_ctrlblk.rcu_try_flip_state =
|
||||
rcu_try_flip_waitzero_state;
|
||||
break;
|
||||
case rcu_try_flip_waitzero_state:
|
||||
if (rcu_try_flip_waitzero())
|
||||
rcu_ctrlblk.rcu_try_flip_state =
|
||||
rcu_try_flip_waitmb_state;
|
||||
break;
|
||||
case rcu_try_flip_waitmb_state:
|
||||
if (rcu_try_flip_waitmb())
|
||||
rcu_ctrlblk.rcu_try_flip_state =
|
||||
rcu_try_flip_idle_state;
|
||||
}
|
||||
spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if this CPU needs to do a memory barrier in order to
|
||||
* ensure that any prior RCU read-side critical sections have committed
|
||||
* their counter manipulations and critical-section memory references
|
||||
* before declaring the grace period to be completed.
|
||||
*/
|
||||
static void rcu_check_mb(int cpu)
|
||||
{
|
||||
if (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed) {
|
||||
smp_mb(); /* Ensure RCU read-side accesses are visible. */
|
||||
per_cpu(rcu_mb_flag, cpu) = rcu_mb_done;
|
||||
}
|
||||
}
|
||||
|
||||
void rcu_check_callbacks(int cpu, int user)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp = RCU_DATA_CPU(cpu);
|
||||
|
||||
rcu_check_mb(cpu);
|
||||
if (rcu_ctrlblk.completed == rdp->completed)
|
||||
rcu_try_flip();
|
||||
spin_lock_irqsave(&rdp->lock, flags);
|
||||
RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
|
||||
__rcu_advance_callbacks(rdp);
|
||||
if (rdp->donelist == NULL) {
|
||||
spin_unlock_irqrestore(&rdp->lock, flags);
|
||||
} else {
|
||||
spin_unlock_irqrestore(&rdp->lock, flags);
|
||||
raise_softirq(RCU_SOFTIRQ);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Needed by dynticks, to make sure all RCU processing has finished
|
||||
* when we go idle:
|
||||
*/
|
||||
void rcu_advance_callbacks(int cpu, int user)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp = RCU_DATA_CPU(cpu);
|
||||
|
||||
if (rcu_ctrlblk.completed == rdp->completed) {
|
||||
rcu_try_flip();
|
||||
if (rcu_ctrlblk.completed == rdp->completed)
|
||||
return;
|
||||
}
|
||||
spin_lock_irqsave(&rdp->lock, flags);
|
||||
RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
|
||||
__rcu_advance_callbacks(rdp);
|
||||
spin_unlock_irqrestore(&rdp->lock, flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
#define rcu_offline_cpu_enqueue(srclist, srctail, dstlist, dsttail) do { \
|
||||
*dsttail = srclist; \
|
||||
if (srclist != NULL) { \
|
||||
dsttail = srctail; \
|
||||
srclist = NULL; \
|
||||
srctail = &srclist;\
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void rcu_offline_cpu(int cpu)
|
||||
{
|
||||
int i;
|
||||
struct rcu_head *list = NULL;
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp = RCU_DATA_CPU(cpu);
|
||||
struct rcu_head **tail = &list;
|
||||
|
||||
/*
|
||||
* Remove all callbacks from the newly dead CPU, retaining order.
|
||||
* Otherwise rcu_barrier() will fail
|
||||
*/
|
||||
|
||||
spin_lock_irqsave(&rdp->lock, flags);
|
||||
rcu_offline_cpu_enqueue(rdp->donelist, rdp->donetail, list, tail);
|
||||
for (i = GP_STAGES - 1; i >= 0; i--)
|
||||
rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
|
||||
list, tail);
|
||||
rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
|
||||
spin_unlock_irqrestore(&rdp->lock, flags);
|
||||
rdp->waitlistcount = 0;
|
||||
|
||||
/* Disengage the newly dead CPU from the grace-period computation. */
|
||||
|
||||
spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
|
||||
rcu_check_mb(cpu);
|
||||
if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
|
||||
smp_mb(); /* Subsequent counter accesses must see new value */
|
||||
per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
|
||||
smp_mb(); /* Subsequent RCU read-side critical sections */
|
||||
/* seen -after- acknowledgement. */
|
||||
}
|
||||
|
||||
RCU_DATA_ME()->rcu_flipctr[0] += RCU_DATA_CPU(cpu)->rcu_flipctr[0];
|
||||
RCU_DATA_ME()->rcu_flipctr[1] += RCU_DATA_CPU(cpu)->rcu_flipctr[1];
|
||||
|
||||
RCU_DATA_CPU(cpu)->rcu_flipctr[0] = 0;
|
||||
RCU_DATA_CPU(cpu)->rcu_flipctr[1] = 0;
|
||||
|
||||
cpu_clear(cpu, rcu_cpu_online_map);
|
||||
|
||||
spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
|
||||
|
||||
/*
|
||||
* Place the removed callbacks on the current CPU's queue.
|
||||
* Make them all start a new grace period: simple approach,
|
||||
* in theory could starve a given set of callbacks, but
|
||||
* you would need to be doing some serious CPU hotplugging
|
||||
* to make this happen. If this becomes a problem, adding
|
||||
* a synchronize_rcu() to the hotplug path would be a simple
|
||||
* fix.
|
||||
*/
|
||||
|
||||
rdp = RCU_DATA_ME();
|
||||
spin_lock_irqsave(&rdp->lock, flags);
|
||||
*rdp->nexttail = list;
|
||||
if (list)
|
||||
rdp->nexttail = tail;
|
||||
spin_unlock_irqrestore(&rdp->lock, flags);
|
||||
}
|
||||
|
||||
void __devinit rcu_online_cpu(int cpu)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
|
||||
cpu_set(cpu, rcu_cpu_online_map);
|
||||
spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
|
||||
}
|
||||
|
||||
#else /* #ifdef CONFIG_HOTPLUG_CPU */
|
||||
|
||||
void rcu_offline_cpu(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
void __devinit rcu_online_cpu(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
|
||||
|
||||
static void rcu_process_callbacks(struct softirq_action *unused)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_head *next, *list;
|
||||
struct rcu_data *rdp = RCU_DATA_ME();
|
||||
|
||||
spin_lock_irqsave(&rdp->lock, flags);
|
||||
list = rdp->donelist;
|
||||
if (list == NULL) {
|
||||
spin_unlock_irqrestore(&rdp->lock, flags);
|
||||
return;
|
||||
}
|
||||
rdp->donelist = NULL;
|
||||
rdp->donetail = &rdp->donelist;
|
||||
RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp);
|
||||
spin_unlock_irqrestore(&rdp->lock, flags);
|
||||
while (list) {
|
||||
next = list->next;
|
||||
list->func(list);
|
||||
list = next;
|
||||
RCU_TRACE_ME(rcupreempt_trace_invoke);
|
||||
}
|
||||
}
|
||||
|
||||
void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
rdp = RCU_DATA_ME();
|
||||
spin_lock(&rdp->lock);
|
||||
__rcu_advance_callbacks(rdp);
|
||||
*rdp->nexttail = head;
|
||||
rdp->nexttail = &head->next;
|
||||
RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
|
||||
spin_unlock(&rdp->lock);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(call_rcu);
|
||||
|
||||
/*
|
||||
* Wait until all currently running preempt_disable() code segments
|
||||
* (including hardware-irq-disable segments) complete. Note that
|
||||
* in -rt this does -not- necessarily result in all currently executing
|
||||
* interrupt -handlers- having completed.
|
||||
*/
|
||||
void __synchronize_sched(void)
|
||||
{
|
||||
cpumask_t oldmask;
|
||||
int cpu;
|
||||
|
||||
if (sched_getaffinity(0, &oldmask) < 0)
|
||||
oldmask = cpu_possible_map;
|
||||
for_each_online_cpu(cpu) {
|
||||
sched_setaffinity(0, cpumask_of_cpu(cpu));
|
||||
schedule();
|
||||
}
|
||||
sched_setaffinity(0, oldmask);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__synchronize_sched);
|
||||
|
||||
/*
|
||||
* Check to see if any future RCU-related work will need to be done
|
||||
* by the current CPU, even if none need be done immediately, returning
|
||||
* 1 if so. Assumes that notifiers would take care of handling any
|
||||
* outstanding requests from the RCU core.
|
||||
*
|
||||
* This function is part of the RCU implementation; it is -not-
|
||||
* an exported member of the RCU API.
|
||||
*/
|
||||
int rcu_needs_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = RCU_DATA_CPU(cpu);
|
||||
|
||||
return (rdp->donelist != NULL ||
|
||||
!!rdp->waitlistcount ||
|
||||
rdp->nextlist != NULL);
|
||||
}
|
||||
|
||||
int rcu_pending(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = RCU_DATA_CPU(cpu);
|
||||
|
||||
/* The CPU has at least one callback queued somewhere. */
|
||||
|
||||
if (rdp->donelist != NULL ||
|
||||
!!rdp->waitlistcount ||
|
||||
rdp->nextlist != NULL)
|
||||
return 1;
|
||||
|
||||
/* The RCU core needs an acknowledgement from this CPU. */
|
||||
|
||||
if ((per_cpu(rcu_flip_flag, cpu) == rcu_flipped) ||
|
||||
(per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed))
|
||||
return 1;
|
||||
|
||||
/* This CPU has fallen behind the global grace-period number. */
|
||||
|
||||
if (rdp->completed != rcu_ctrlblk.completed)
|
||||
return 1;
|
||||
|
||||
/* Nothing needed from this CPU. */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
long cpu = (long)hcpu;
|
||||
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
case CPU_UP_PREPARE_FROZEN:
|
||||
rcu_online_cpu(cpu);
|
||||
break;
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_UP_CANCELED_FROZEN:
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
rcu_offline_cpu(cpu);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __cpuinitdata rcu_nb = {
|
||||
.notifier_call = rcu_cpu_notify,
|
||||
};
|
||||
|
||||
void __init __rcu_init(void)
|
||||
{
|
||||
int cpu;
|
||||
int i;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
printk(KERN_NOTICE "Preemptible RCU implementation.\n");
|
||||
for_each_possible_cpu(cpu) {
|
||||
rdp = RCU_DATA_CPU(cpu);
|
||||
spin_lock_init(&rdp->lock);
|
||||
rdp->completed = 0;
|
||||
rdp->waitlistcount = 0;
|
||||
rdp->nextlist = NULL;
|
||||
rdp->nexttail = &rdp->nextlist;
|
||||
for (i = 0; i < GP_STAGES; i++) {
|
||||
rdp->waitlist[i] = NULL;
|
||||
rdp->waittail[i] = &rdp->waitlist[i];
|
||||
}
|
||||
rdp->donelist = NULL;
|
||||
rdp->donetail = &rdp->donelist;
|
||||
rdp->rcu_flipctr[0] = 0;
|
||||
rdp->rcu_flipctr[1] = 0;
|
||||
}
|
||||
register_cpu_notifier(&rcu_nb);
|
||||
|
||||
/*
|
||||
* We don't need protection against CPU-Hotplug here
|
||||
* since
|
||||
* a) If a CPU comes online while we are iterating over the
|
||||
* cpu_online_map below, we would only end up making a
|
||||
* duplicate call to rcu_online_cpu() which sets the corresponding
|
||||
* CPU's mask in the rcu_cpu_online_map.
|
||||
*
|
||||
* b) A CPU cannot go offline at this point in time since the user
|
||||
* does not have access to the sysfs interface, nor do we
|
||||
* suspend the system.
|
||||
*/
|
||||
for_each_online_cpu(cpu)
|
||||
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long) cpu);
|
||||
|
||||
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Deprecated, use synchronize_rcu() or synchronize_sched() instead.
|
||||
*/
|
||||
void synchronize_kernel(void)
|
||||
{
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RCU_TRACE
|
||||
long *rcupreempt_flipctr(int cpu)
|
||||
{
|
||||
return &RCU_DATA_CPU(cpu)->rcu_flipctr[0];
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcupreempt_flipctr);
|
||||
|
||||
int rcupreempt_flip_flag(int cpu)
|
||||
{
|
||||
return per_cpu(rcu_flip_flag, cpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcupreempt_flip_flag);
|
||||
|
||||
int rcupreempt_mb_flag(int cpu)
|
||||
{
|
||||
return per_cpu(rcu_mb_flag, cpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcupreempt_mb_flag);
|
||||
|
||||
char *rcupreempt_try_flip_state_name(void)
|
||||
{
|
||||
return rcu_try_flip_state_names[rcu_ctrlblk.rcu_try_flip_state];
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcupreempt_try_flip_state_name);
|
||||
|
||||
struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = RCU_DATA_CPU(cpu);
|
||||
|
||||
return &rdp->trace;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcupreempt_trace_cpu);
|
||||
|
||||
#endif /* #ifdef RCU_TRACE */
|
||||
330
kernel/rcupreempt_trace.c
Normal file
330
kernel/rcupreempt_trace.c
Normal file
@@ -0,0 +1,330 @@
|
||||
/*
|
||||
* Read-Copy Update tracing for realtime implementation
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright IBM Corporation, 2006
|
||||
*
|
||||
* Papers: http://www.rdrop.com/users/paulmck/RCU
|
||||
*
|
||||
* For detailed explanation of Read-Copy Update mechanism see -
|
||||
* Documentation/RCU/ *.txt
|
||||
*
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/rcupreempt_trace.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
static struct mutex rcupreempt_trace_mutex;
|
||||
static char *rcupreempt_trace_buf;
|
||||
#define RCUPREEMPT_TRACE_BUF_SIZE 4096
|
||||
|
||||
void rcupreempt_trace_move2done(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->done_length += trace->wait_length;
|
||||
trace->done_add += trace->wait_length;
|
||||
trace->wait_length = 0;
|
||||
}
|
||||
void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->wait_length += trace->next_length;
|
||||
trace->wait_add += trace->next_length;
|
||||
trace->next_length = 0;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
atomic_inc(&trace->rcu_try_flip_1);
|
||||
}
|
||||
void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
atomic_inc(&trace->rcu_try_flip_e1);
|
||||
}
|
||||
void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_i1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_ie1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_g1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_a1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_ae1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_a2++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_z1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_ze1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_z2++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_m1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_me1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_m2++;
|
||||
}
|
||||
void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_check_callbacks++;
|
||||
}
|
||||
void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->done_remove += trace->done_length;
|
||||
trace->done_length = 0;
|
||||
}
|
||||
void rcupreempt_trace_invoke(struct rcupreempt_trace *trace)
|
||||
{
|
||||
atomic_inc(&trace->done_invoked);
|
||||
}
|
||||
void rcupreempt_trace_next_add(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->next_add++;
|
||||
trace->next_length++;
|
||||
}
|
||||
|
||||
static void rcupreempt_trace_sum(struct rcupreempt_trace *sp)
|
||||
{
|
||||
struct rcupreempt_trace *cp;
|
||||
int cpu;
|
||||
|
||||
memset(sp, 0, sizeof(*sp));
|
||||
for_each_possible_cpu(cpu) {
|
||||
cp = rcupreempt_trace_cpu(cpu);
|
||||
sp->next_length += cp->next_length;
|
||||
sp->next_add += cp->next_add;
|
||||
sp->wait_length += cp->wait_length;
|
||||
sp->wait_add += cp->wait_add;
|
||||
sp->done_length += cp->done_length;
|
||||
sp->done_add += cp->done_add;
|
||||
sp->done_remove += cp->done_remove;
|
||||
atomic_set(&sp->done_invoked, atomic_read(&cp->done_invoked));
|
||||
sp->rcu_check_callbacks += cp->rcu_check_callbacks;
|
||||
atomic_set(&sp->rcu_try_flip_1,
|
||||
atomic_read(&cp->rcu_try_flip_1));
|
||||
atomic_set(&sp->rcu_try_flip_e1,
|
||||
atomic_read(&cp->rcu_try_flip_e1));
|
||||
sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1;
|
||||
sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1;
|
||||
sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1;
|
||||
sp->rcu_try_flip_a1 += cp->rcu_try_flip_a1;
|
||||
sp->rcu_try_flip_ae1 += cp->rcu_try_flip_ae1;
|
||||
sp->rcu_try_flip_a2 += cp->rcu_try_flip_a2;
|
||||
sp->rcu_try_flip_z1 += cp->rcu_try_flip_z1;
|
||||
sp->rcu_try_flip_ze1 += cp->rcu_try_flip_ze1;
|
||||
sp->rcu_try_flip_z2 += cp->rcu_try_flip_z2;
|
||||
sp->rcu_try_flip_m1 += cp->rcu_try_flip_m1;
|
||||
sp->rcu_try_flip_me1 += cp->rcu_try_flip_me1;
|
||||
sp->rcu_try_flip_m2 += cp->rcu_try_flip_m2;
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t rcustats_read(struct file *filp, char __user *buffer,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct rcupreempt_trace trace;
|
||||
ssize_t bcount;
|
||||
int cnt = 0;
|
||||
|
||||
rcupreempt_trace_sum(&trace);
|
||||
mutex_lock(&rcupreempt_trace_mutex);
|
||||
snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt,
|
||||
"ggp=%ld rcc=%ld\n",
|
||||
rcu_batches_completed(),
|
||||
trace.rcu_check_callbacks);
|
||||
snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt,
|
||||
"na=%ld nl=%ld wa=%ld wl=%ld da=%ld dl=%ld dr=%ld di=%d\n"
|
||||
"1=%d e1=%d i1=%ld ie1=%ld g1=%ld a1=%ld ae1=%ld a2=%ld\n"
|
||||
"z1=%ld ze1=%ld z2=%ld m1=%ld me1=%ld m2=%ld\n",
|
||||
|
||||
trace.next_add, trace.next_length,
|
||||
trace.wait_add, trace.wait_length,
|
||||
trace.done_add, trace.done_length,
|
||||
trace.done_remove, atomic_read(&trace.done_invoked),
|
||||
atomic_read(&trace.rcu_try_flip_1),
|
||||
atomic_read(&trace.rcu_try_flip_e1),
|
||||
trace.rcu_try_flip_i1, trace.rcu_try_flip_ie1,
|
||||
trace.rcu_try_flip_g1,
|
||||
trace.rcu_try_flip_a1, trace.rcu_try_flip_ae1,
|
||||
trace.rcu_try_flip_a2,
|
||||
trace.rcu_try_flip_z1, trace.rcu_try_flip_ze1,
|
||||
trace.rcu_try_flip_z2,
|
||||
trace.rcu_try_flip_m1, trace.rcu_try_flip_me1,
|
||||
trace.rcu_try_flip_m2);
|
||||
bcount = simple_read_from_buffer(buffer, count, ppos,
|
||||
rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
|
||||
mutex_unlock(&rcupreempt_trace_mutex);
|
||||
return bcount;
|
||||
}
|
||||
|
||||
static ssize_t rcugp_read(struct file *filp, char __user *buffer,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
long oldgp = rcu_batches_completed();
|
||||
ssize_t bcount;
|
||||
|
||||
mutex_lock(&rcupreempt_trace_mutex);
|
||||
synchronize_rcu();
|
||||
snprintf(rcupreempt_trace_buf, RCUPREEMPT_TRACE_BUF_SIZE,
|
||||
"oldggp=%ld newggp=%ld\n", oldgp, rcu_batches_completed());
|
||||
bcount = simple_read_from_buffer(buffer, count, ppos,
|
||||
rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
|
||||
mutex_unlock(&rcupreempt_trace_mutex);
|
||||
return bcount;
|
||||
}
|
||||
|
||||
static ssize_t rcuctrs_read(struct file *filp, char __user *buffer,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
int cnt = 0;
|
||||
int cpu;
|
||||
int f = rcu_batches_completed() & 0x1;
|
||||
ssize_t bcount;
|
||||
|
||||
mutex_lock(&rcupreempt_trace_mutex);
|
||||
|
||||
cnt += snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE,
|
||||
"CPU last cur F M\n");
|
||||
for_each_online_cpu(cpu) {
|
||||
long *flipctr = rcupreempt_flipctr(cpu);
|
||||
cnt += snprintf(&rcupreempt_trace_buf[cnt],
|
||||
RCUPREEMPT_TRACE_BUF_SIZE - cnt,
|
||||
"%3d %4ld %3ld %d %d\n",
|
||||
cpu,
|
||||
flipctr[!f],
|
||||
flipctr[f],
|
||||
rcupreempt_flip_flag(cpu),
|
||||
rcupreempt_mb_flag(cpu));
|
||||
}
|
||||
cnt += snprintf(&rcupreempt_trace_buf[cnt],
|
||||
RCUPREEMPT_TRACE_BUF_SIZE - cnt,
|
||||
"ggp = %ld, state = %s\n",
|
||||
rcu_batches_completed(),
|
||||
rcupreempt_try_flip_state_name());
|
||||
cnt += snprintf(&rcupreempt_trace_buf[cnt],
|
||||
RCUPREEMPT_TRACE_BUF_SIZE - cnt,
|
||||
"\n");
|
||||
bcount = simple_read_from_buffer(buffer, count, ppos,
|
||||
rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
|
||||
mutex_unlock(&rcupreempt_trace_mutex);
|
||||
return bcount;
|
||||
}
|
||||
|
||||
static struct file_operations rcustats_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = rcustats_read,
|
||||
};
|
||||
|
||||
static struct file_operations rcugp_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = rcugp_read,
|
||||
};
|
||||
|
||||
static struct file_operations rcuctrs_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = rcuctrs_read,
|
||||
};
|
||||
|
||||
static struct dentry *rcudir, *statdir, *ctrsdir, *gpdir;
|
||||
static int rcupreempt_debugfs_init(void)
|
||||
{
|
||||
rcudir = debugfs_create_dir("rcu", NULL);
|
||||
if (!rcudir)
|
||||
goto out;
|
||||
statdir = debugfs_create_file("rcustats", 0444, rcudir,
|
||||
NULL, &rcustats_fops);
|
||||
if (!statdir)
|
||||
goto free_out;
|
||||
|
||||
gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
|
||||
if (!gpdir)
|
||||
goto free_out;
|
||||
|
||||
ctrsdir = debugfs_create_file("rcuctrs", 0444, rcudir,
|
||||
NULL, &rcuctrs_fops);
|
||||
if (!ctrsdir)
|
||||
goto free_out;
|
||||
return 0;
|
||||
free_out:
|
||||
if (statdir)
|
||||
debugfs_remove(statdir);
|
||||
if (gpdir)
|
||||
debugfs_remove(gpdir);
|
||||
debugfs_remove(rcudir);
|
||||
out:
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __init rcupreempt_trace_init(void)
|
||||
{
|
||||
mutex_init(&rcupreempt_trace_mutex);
|
||||
rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
|
||||
if (!rcupreempt_trace_buf)
|
||||
return 1;
|
||||
return rcupreempt_debugfs_init();
|
||||
}
|
||||
|
||||
static void __exit rcupreempt_trace_cleanup(void)
|
||||
{
|
||||
debugfs_remove(statdir);
|
||||
debugfs_remove(gpdir);
|
||||
debugfs_remove(ctrsdir);
|
||||
debugfs_remove(rcudir);
|
||||
kfree(rcupreempt_trace_buf);
|
||||
}
|
||||
|
||||
|
||||
module_init(rcupreempt_trace_init);
|
||||
module_exit(rcupreempt_trace_cleanup);
|
||||
@@ -726,11 +726,11 @@ static void rcu_torture_shuffle_tasks(void)
|
||||
cpumask_t tmp_mask = CPU_MASK_ALL;
|
||||
int i;
|
||||
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
|
||||
/* No point in shuffling if there is only one online CPU (ex: UP) */
|
||||
if (num_online_cpus() == 1) {
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -762,7 +762,7 @@ static void rcu_torture_shuffle_tasks(void)
|
||||
else
|
||||
rcu_idle_cpu--;
|
||||
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the
|
||||
|
||||
@@ -394,7 +394,7 @@ static SYSDEV_ATTR(status, 0600, sysfs_test_status, NULL);
|
||||
static SYSDEV_ATTR(command, 0600, NULL, sysfs_test_command);
|
||||
|
||||
static struct sysdev_class rttest_sysclass = {
|
||||
set_kset_name("rttest"),
|
||||
.name = "rttest",
|
||||
};
|
||||
|
||||
static int init_test_thread(int id)
|
||||
|
||||
1406
kernel/sched.c
1406
kernel/sched.c
File diff suppressed because it is too large
Load Diff
@@ -179,6 +179,7 @@ static void print_cpu(struct seq_file *m, int cpu)
|
||||
PN(prev_clock_raw);
|
||||
P(clock_warps);
|
||||
P(clock_overflows);
|
||||
P(clock_underflows);
|
||||
P(clock_deep_idle_events);
|
||||
PN(clock_max_delta);
|
||||
P(cpu_load[0]);
|
||||
@@ -299,6 +300,8 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
|
||||
PN(se.exec_max);
|
||||
PN(se.slice_max);
|
||||
PN(se.wait_max);
|
||||
PN(se.wait_sum);
|
||||
P(se.wait_count);
|
||||
P(sched_info.bkl_count);
|
||||
P(se.nr_migrations);
|
||||
P(se.nr_migrations_cold);
|
||||
@@ -366,6 +369,8 @@ void proc_sched_set_task(struct task_struct *p)
|
||||
{
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
p->se.wait_max = 0;
|
||||
p->se.wait_sum = 0;
|
||||
p->se.wait_count = 0;
|
||||
p->se.sleep_max = 0;
|
||||
p->se.sum_sleep_runtime = 0;
|
||||
p->se.block_max = 0;
|
||||
|
||||
@@ -20,6 +20,8 @@
|
||||
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
|
||||
*/
|
||||
|
||||
#include <linux/latencytop.h>
|
||||
|
||||
/*
|
||||
* Targeted preemption latency for CPU-bound tasks:
|
||||
* (default: 20ms * (1 + ilog(ncpus)), units: nanoseconds)
|
||||
@@ -248,8 +250,8 @@ static u64 __sched_period(unsigned long nr_running)
|
||||
unsigned long nr_latency = sched_nr_latency;
|
||||
|
||||
if (unlikely(nr_running > nr_latency)) {
|
||||
period = sysctl_sched_min_granularity;
|
||||
period *= nr_running;
|
||||
do_div(period, nr_latency);
|
||||
}
|
||||
|
||||
return period;
|
||||
@@ -383,6 +385,9 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
schedstat_set(se->wait_max, max(se->wait_max,
|
||||
rq_of(cfs_rq)->clock - se->wait_start));
|
||||
schedstat_set(se->wait_count, se->wait_count + 1);
|
||||
schedstat_set(se->wait_sum, se->wait_sum +
|
||||
rq_of(cfs_rq)->clock - se->wait_start);
|
||||
schedstat_set(se->wait_start, 0);
|
||||
}
|
||||
|
||||
@@ -434,6 +439,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
if (se->sleep_start) {
|
||||
u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
|
||||
struct task_struct *tsk = task_of(se);
|
||||
|
||||
if ((s64)delta < 0)
|
||||
delta = 0;
|
||||
@@ -443,9 +449,12 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
|
||||
se->sleep_start = 0;
|
||||
se->sum_sleep_runtime += delta;
|
||||
|
||||
account_scheduler_latency(tsk, delta >> 10, 1);
|
||||
}
|
||||
if (se->block_start) {
|
||||
u64 delta = rq_of(cfs_rq)->clock - se->block_start;
|
||||
struct task_struct *tsk = task_of(se);
|
||||
|
||||
if ((s64)delta < 0)
|
||||
delta = 0;
|
||||
@@ -462,11 +471,11 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
* time that the task spent sleeping:
|
||||
*/
|
||||
if (unlikely(prof_on == SLEEP_PROFILING)) {
|
||||
struct task_struct *tsk = task_of(se);
|
||||
|
||||
profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk),
|
||||
delta >> 20);
|
||||
}
|
||||
account_scheduler_latency(tsk, delta >> 10, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -642,13 +651,29 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
|
||||
cfs_rq->curr = NULL;
|
||||
}
|
||||
|
||||
static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
||||
static void
|
||||
entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
|
||||
{
|
||||
/*
|
||||
* Update run-time statistics of the 'current'.
|
||||
*/
|
||||
update_curr(cfs_rq);
|
||||
|
||||
#ifdef CONFIG_SCHED_HRTICK
|
||||
/*
|
||||
* queued ticks are scheduled to match the slice, so don't bother
|
||||
* validating it and just reschedule.
|
||||
*/
|
||||
if (queued)
|
||||
return resched_task(rq_of(cfs_rq)->curr);
|
||||
/*
|
||||
* don't let the period tick interfere with the hrtick preemption
|
||||
*/
|
||||
if (!sched_feat(DOUBLE_TICK) &&
|
||||
hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
|
||||
return;
|
||||
#endif
|
||||
|
||||
if (cfs_rq->nr_running > 1 || !sched_feat(WAKEUP_PREEMPT))
|
||||
check_preempt_tick(cfs_rq, curr);
|
||||
}
|
||||
@@ -690,7 +715,7 @@ static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
|
||||
|
||||
/* Iterate thr' all leaf cfs_rq's on a runqueue */
|
||||
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
|
||||
list_for_each_entry(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
|
||||
list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
|
||||
|
||||
/* Do the two (enqueued) entities belong to the same group ? */
|
||||
static inline int
|
||||
@@ -707,6 +732,8 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
|
||||
return se->parent;
|
||||
}
|
||||
|
||||
#define GROUP_IMBALANCE_PCT 20
|
||||
|
||||
#else /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
#define for_each_sched_entity(se) \
|
||||
@@ -752,6 +779,43 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
|
||||
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
#ifdef CONFIG_SCHED_HRTICK
|
||||
static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
int requeue = rq->curr == p;
|
||||
struct sched_entity *se = &p->se;
|
||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||
|
||||
WARN_ON(task_rq(p) != rq);
|
||||
|
||||
if (hrtick_enabled(rq) && cfs_rq->nr_running > 1) {
|
||||
u64 slice = sched_slice(cfs_rq, se);
|
||||
u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
|
||||
s64 delta = slice - ran;
|
||||
|
||||
if (delta < 0) {
|
||||
if (rq->curr == p)
|
||||
resched_task(p);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't schedule slices shorter than 10000ns, that just
|
||||
* doesn't make sense. Rely on vruntime for fairness.
|
||||
*/
|
||||
if (!requeue)
|
||||
delta = max(10000LL, delta);
|
||||
|
||||
hrtick_start(rq, delta, requeue);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void
|
||||
hrtick_start_fair(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The enqueue_task method is called before nr_running is
|
||||
* increased. Here we update the fair scheduling stats and
|
||||
@@ -760,15 +824,28 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
|
||||
static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct sched_entity *se = &p->se;
|
||||
struct sched_entity *se = &p->se,
|
||||
*topse = NULL; /* Highest schedulable entity */
|
||||
int incload = 1;
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
if (se->on_rq)
|
||||
topse = se;
|
||||
if (se->on_rq) {
|
||||
incload = 0;
|
||||
break;
|
||||
}
|
||||
cfs_rq = cfs_rq_of(se);
|
||||
enqueue_entity(cfs_rq, se, wakeup);
|
||||
wakeup = 1;
|
||||
}
|
||||
/* Increment cpu load if we just enqueued the first task of a group on
|
||||
* 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
|
||||
* at the highest grouping level.
|
||||
*/
|
||||
if (incload)
|
||||
inc_cpu_load(rq, topse->load.weight);
|
||||
|
||||
hrtick_start_fair(rq, rq->curr);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -779,16 +856,30 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
|
||||
static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct sched_entity *se = &p->se;
|
||||
struct sched_entity *se = &p->se,
|
||||
*topse = NULL; /* Highest schedulable entity */
|
||||
int decload = 1;
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
topse = se;
|
||||
cfs_rq = cfs_rq_of(se);
|
||||
dequeue_entity(cfs_rq, se, sleep);
|
||||
/* Don't dequeue parent if it has other entities besides us */
|
||||
if (cfs_rq->load.weight)
|
||||
if (cfs_rq->load.weight) {
|
||||
if (parent_entity(se))
|
||||
decload = 0;
|
||||
break;
|
||||
}
|
||||
sleep = 1;
|
||||
}
|
||||
/* Decrement cpu load if we just dequeued the last task of a group on
|
||||
* 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
|
||||
* at the highest grouping level.
|
||||
*/
|
||||
if (decload)
|
||||
dec_cpu_load(rq, topse->load.weight);
|
||||
|
||||
hrtick_start_fair(rq, rq->curr);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -835,6 +926,154 @@ static void yield_task_fair(struct rq *rq)
|
||||
se->vruntime = rightmost->vruntime + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* wake_idle() will wake a task on an idle cpu if task->cpu is
|
||||
* not idle and an idle cpu is available. The span of cpus to
|
||||
* search starts with cpus closest then further out as needed,
|
||||
* so we always favor a closer, idle cpu.
|
||||
*
|
||||
* Returns the CPU we should wake onto.
|
||||
*/
|
||||
#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
|
||||
static int wake_idle(int cpu, struct task_struct *p)
|
||||
{
|
||||
cpumask_t tmp;
|
||||
struct sched_domain *sd;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* If it is idle, then it is the best cpu to run this task.
|
||||
*
|
||||
* This cpu is also the best, if it has more than one task already.
|
||||
* Siblings must be also busy(in most cases) as they didn't already
|
||||
* pickup the extra load from this cpu and hence we need not check
|
||||
* sibling runqueue info. This will avoid the checks and cache miss
|
||||
* penalities associated with that.
|
||||
*/
|
||||
if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1)
|
||||
return cpu;
|
||||
|
||||
for_each_domain(cpu, sd) {
|
||||
if (sd->flags & SD_WAKE_IDLE) {
|
||||
cpus_and(tmp, sd->span, p->cpus_allowed);
|
||||
for_each_cpu_mask(i, tmp) {
|
||||
if (idle_cpu(i)) {
|
||||
if (i != task_cpu(p)) {
|
||||
schedstat_inc(p,
|
||||
se.nr_wakeups_idle);
|
||||
}
|
||||
return i;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return cpu;
|
||||
}
|
||||
#else
|
||||
static inline int wake_idle(int cpu, struct task_struct *p)
|
||||
{
|
||||
return cpu;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int select_task_rq_fair(struct task_struct *p, int sync)
|
||||
{
|
||||
int cpu, this_cpu;
|
||||
struct rq *rq;
|
||||
struct sched_domain *sd, *this_sd = NULL;
|
||||
int new_cpu;
|
||||
|
||||
cpu = task_cpu(p);
|
||||
rq = task_rq(p);
|
||||
this_cpu = smp_processor_id();
|
||||
new_cpu = cpu;
|
||||
|
||||
if (cpu == this_cpu)
|
||||
goto out_set_cpu;
|
||||
|
||||
for_each_domain(this_cpu, sd) {
|
||||
if (cpu_isset(cpu, sd->span)) {
|
||||
this_sd = sd;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
|
||||
goto out_set_cpu;
|
||||
|
||||
/*
|
||||
* Check for affine wakeup and passive balancing possibilities.
|
||||
*/
|
||||
if (this_sd) {
|
||||
int idx = this_sd->wake_idx;
|
||||
unsigned int imbalance;
|
||||
unsigned long load, this_load;
|
||||
|
||||
imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
|
||||
|
||||
load = source_load(cpu, idx);
|
||||
this_load = target_load(this_cpu, idx);
|
||||
|
||||
new_cpu = this_cpu; /* Wake to this CPU if we can */
|
||||
|
||||
if (this_sd->flags & SD_WAKE_AFFINE) {
|
||||
unsigned long tl = this_load;
|
||||
unsigned long tl_per_task;
|
||||
|
||||
/*
|
||||
* Attract cache-cold tasks on sync wakeups:
|
||||
*/
|
||||
if (sync && !task_hot(p, rq->clock, this_sd))
|
||||
goto out_set_cpu;
|
||||
|
||||
schedstat_inc(p, se.nr_wakeups_affine_attempts);
|
||||
tl_per_task = cpu_avg_load_per_task(this_cpu);
|
||||
|
||||
/*
|
||||
* If sync wakeup then subtract the (maximum possible)
|
||||
* effect of the currently running task from the load
|
||||
* of the current CPU:
|
||||
*/
|
||||
if (sync)
|
||||
tl -= current->se.load.weight;
|
||||
|
||||
if ((tl <= load &&
|
||||
tl + target_load(cpu, idx) <= tl_per_task) ||
|
||||
100*(tl + p->se.load.weight) <= imbalance*load) {
|
||||
/*
|
||||
* This domain has SD_WAKE_AFFINE and
|
||||
* p is cache cold in this domain, and
|
||||
* there is no bad imbalance.
|
||||
*/
|
||||
schedstat_inc(this_sd, ttwu_move_affine);
|
||||
schedstat_inc(p, se.nr_wakeups_affine);
|
||||
goto out_set_cpu;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Start passive balancing when half the imbalance_pct
|
||||
* limit is reached.
|
||||
*/
|
||||
if (this_sd->flags & SD_WAKE_BALANCE) {
|
||||
if (imbalance*this_load <= 100*load) {
|
||||
schedstat_inc(this_sd, ttwu_move_balance);
|
||||
schedstat_inc(p, se.nr_wakeups_passive);
|
||||
goto out_set_cpu;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
|
||||
out_set_cpu:
|
||||
return wake_idle(new_cpu, p);
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
|
||||
/*
|
||||
* Preempt the current task with a newly woken task if needed:
|
||||
*/
|
||||
@@ -876,6 +1115,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
|
||||
|
||||
static struct task_struct *pick_next_task_fair(struct rq *rq)
|
||||
{
|
||||
struct task_struct *p;
|
||||
struct cfs_rq *cfs_rq = &rq->cfs;
|
||||
struct sched_entity *se;
|
||||
|
||||
@@ -887,7 +1127,10 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
|
||||
cfs_rq = group_cfs_rq(se);
|
||||
} while (cfs_rq);
|
||||
|
||||
return task_of(se);
|
||||
p = task_of(se);
|
||||
hrtick_start_fair(rq, p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -944,25 +1187,6 @@ static struct task_struct *load_balance_next_fair(void *arg)
|
||||
return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
struct sched_entity *curr;
|
||||
struct task_struct *p;
|
||||
|
||||
if (!cfs_rq->nr_running)
|
||||
return MAX_PRIO;
|
||||
|
||||
curr = cfs_rq->curr;
|
||||
if (!curr)
|
||||
curr = __pick_next_entity(cfs_rq);
|
||||
|
||||
p = task_of(curr);
|
||||
|
||||
return p->prio;
|
||||
}
|
||||
#endif
|
||||
|
||||
static unsigned long
|
||||
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_load_move,
|
||||
@@ -972,28 +1196,45 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
struct cfs_rq *busy_cfs_rq;
|
||||
long rem_load_move = max_load_move;
|
||||
struct rq_iterator cfs_rq_iterator;
|
||||
unsigned long load_moved;
|
||||
|
||||
cfs_rq_iterator.start = load_balance_start_fair;
|
||||
cfs_rq_iterator.next = load_balance_next_fair;
|
||||
|
||||
for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
struct cfs_rq *this_cfs_rq;
|
||||
long imbalance;
|
||||
unsigned long maxload;
|
||||
struct cfs_rq *this_cfs_rq = busy_cfs_rq->tg->cfs_rq[this_cpu];
|
||||
unsigned long maxload, task_load, group_weight;
|
||||
unsigned long thisload, per_task_load;
|
||||
struct sched_entity *se = busy_cfs_rq->tg->se[busiest->cpu];
|
||||
|
||||
this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
|
||||
task_load = busy_cfs_rq->load.weight;
|
||||
group_weight = se->load.weight;
|
||||
|
||||
imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
|
||||
/* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
|
||||
if (imbalance <= 0)
|
||||
/*
|
||||
* 'group_weight' is contributed by tasks of total weight
|
||||
* 'task_load'. To move 'rem_load_move' worth of weight only,
|
||||
* we need to move a maximum task load of:
|
||||
*
|
||||
* maxload = (remload / group_weight) * task_load;
|
||||
*/
|
||||
maxload = (rem_load_move * task_load) / group_weight;
|
||||
|
||||
if (!maxload || !task_load)
|
||||
continue;
|
||||
|
||||
/* Don't pull more than imbalance/2 */
|
||||
imbalance /= 2;
|
||||
maxload = min(rem_load_move, imbalance);
|
||||
per_task_load = task_load / busy_cfs_rq->nr_running;
|
||||
/*
|
||||
* balance_tasks will try to forcibly move atleast one task if
|
||||
* possible (because of SCHED_LOAD_SCALE_FUZZ). Avoid that if
|
||||
* maxload is less than GROUP_IMBALANCE_FUZZ% the per_task_load.
|
||||
*/
|
||||
if (100 * maxload < GROUP_IMBALANCE_PCT * per_task_load)
|
||||
continue;
|
||||
|
||||
*this_best_prio = cfs_rq_best_prio(this_cfs_rq);
|
||||
/* Disable priority-based load balance */
|
||||
*this_best_prio = 0;
|
||||
thisload = this_cfs_rq->load.weight;
|
||||
#else
|
||||
# define maxload rem_load_move
|
||||
#endif
|
||||
@@ -1002,11 +1243,33 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
* load_balance_[start|next]_fair iterators
|
||||
*/
|
||||
cfs_rq_iterator.arg = busy_cfs_rq;
|
||||
rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
|
||||
load_moved = balance_tasks(this_rq, this_cpu, busiest,
|
||||
maxload, sd, idle, all_pinned,
|
||||
this_best_prio,
|
||||
&cfs_rq_iterator);
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
/*
|
||||
* load_moved holds the task load that was moved. The
|
||||
* effective (group) weight moved would be:
|
||||
* load_moved_eff = load_moved/task_load * group_weight;
|
||||
*/
|
||||
load_moved = (group_weight * load_moved) / task_load;
|
||||
|
||||
/* Adjust shares on both cpus to reflect load_moved */
|
||||
group_weight -= load_moved;
|
||||
set_se_shares(se, group_weight);
|
||||
|
||||
se = busy_cfs_rq->tg->se[this_cpu];
|
||||
if (!thisload)
|
||||
group_weight = load_moved;
|
||||
else
|
||||
group_weight = se->load.weight + load_moved;
|
||||
set_se_shares(se, group_weight);
|
||||
#endif
|
||||
|
||||
rem_load_move -= load_moved;
|
||||
|
||||
if (rem_load_move <= 0)
|
||||
break;
|
||||
}
|
||||
@@ -1042,14 +1305,14 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
/*
|
||||
* scheduler tick hitting a task of our scheduling class:
|
||||
*/
|
||||
static void task_tick_fair(struct rq *rq, struct task_struct *curr)
|
||||
static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct sched_entity *se = &curr->se;
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
cfs_rq = cfs_rq_of(se);
|
||||
entity_tick(cfs_rq, se);
|
||||
entity_tick(cfs_rq, se, queued);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1087,6 +1350,42 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
|
||||
resched_task(rq->curr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Priority of the task has changed. Check to see if we preempt
|
||||
* the current task.
|
||||
*/
|
||||
static void prio_changed_fair(struct rq *rq, struct task_struct *p,
|
||||
int oldprio, int running)
|
||||
{
|
||||
/*
|
||||
* Reschedule if we are currently running on this runqueue and
|
||||
* our priority decreased, or if we are not currently running on
|
||||
* this runqueue and our priority is higher than the current's
|
||||
*/
|
||||
if (running) {
|
||||
if (p->prio > oldprio)
|
||||
resched_task(rq->curr);
|
||||
} else
|
||||
check_preempt_curr(rq, p);
|
||||
}
|
||||
|
||||
/*
|
||||
* We switched to the sched_fair class.
|
||||
*/
|
||||
static void switched_to_fair(struct rq *rq, struct task_struct *p,
|
||||
int running)
|
||||
{
|
||||
/*
|
||||
* We were most likely switched from sched_rt, so
|
||||
* kick off the schedule if running, otherwise just see
|
||||
* if we can still preempt the current task.
|
||||
*/
|
||||
if (running)
|
||||
resched_task(rq->curr);
|
||||
else
|
||||
check_preempt_curr(rq, p);
|
||||
}
|
||||
|
||||
/* Account for a task changing its policy or group.
|
||||
*
|
||||
* This routine is mostly called to set cfs_rq->curr field when a task
|
||||
@@ -1108,6 +1407,9 @@ static const struct sched_class fair_sched_class = {
|
||||
.enqueue_task = enqueue_task_fair,
|
||||
.dequeue_task = dequeue_task_fair,
|
||||
.yield_task = yield_task_fair,
|
||||
#ifdef CONFIG_SMP
|
||||
.select_task_rq = select_task_rq_fair,
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
.check_preempt_curr = check_preempt_wakeup,
|
||||
|
||||
@@ -1122,6 +1424,9 @@ static const struct sched_class fair_sched_class = {
|
||||
.set_curr_task = set_curr_task_fair,
|
||||
.task_tick = task_tick_fair,
|
||||
.task_new = task_new_fair,
|
||||
|
||||
.prio_changed = prio_changed_fair,
|
||||
.switched_to = switched_to_fair,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
@@ -1132,7 +1437,9 @@ static void print_cfs_stats(struct seq_file *m, int cpu)
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
print_cfs_rq(m, cpu, &cpu_rq(cpu)->cfs);
|
||||
#endif
|
||||
rcu_read_lock();
|
||||
for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
|
||||
print_cfs_rq(m, cpu, cfs_rq);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -5,6 +5,12 @@
|
||||
* handled in sched_fair.c)
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int select_task_rq_idle(struct task_struct *p, int sync)
|
||||
{
|
||||
return task_cpu(p); /* IDLE tasks as never migrated */
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
/*
|
||||
* Idle tasks are unconditionally rescheduled:
|
||||
*/
|
||||
@@ -55,7 +61,7 @@ move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
}
|
||||
#endif
|
||||
|
||||
static void task_tick_idle(struct rq *rq, struct task_struct *curr)
|
||||
static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -63,6 +69,33 @@ static void set_curr_task_idle(struct rq *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static void switched_to_idle(struct rq *rq, struct task_struct *p,
|
||||
int running)
|
||||
{
|
||||
/* Can this actually happen?? */
|
||||
if (running)
|
||||
resched_task(rq->curr);
|
||||
else
|
||||
check_preempt_curr(rq, p);
|
||||
}
|
||||
|
||||
static void prio_changed_idle(struct rq *rq, struct task_struct *p,
|
||||
int oldprio, int running)
|
||||
{
|
||||
/* This can happen for hot plug CPUS */
|
||||
|
||||
/*
|
||||
* Reschedule if we are currently running on this runqueue and
|
||||
* our priority decreased, or if we are not currently running on
|
||||
* this runqueue and our priority is higher than the current's
|
||||
*/
|
||||
if (running) {
|
||||
if (p->prio > oldprio)
|
||||
resched_task(rq->curr);
|
||||
} else
|
||||
check_preempt_curr(rq, p);
|
||||
}
|
||||
|
||||
/*
|
||||
* Simple, special scheduling class for the per-CPU idle tasks:
|
||||
*/
|
||||
@@ -72,6 +105,9 @@ const struct sched_class idle_sched_class = {
|
||||
|
||||
/* dequeue is not valid, we print a debug message there: */
|
||||
.dequeue_task = dequeue_task_idle,
|
||||
#ifdef CONFIG_SMP
|
||||
.select_task_rq = select_task_rq_idle,
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
.check_preempt_curr = check_preempt_curr_idle,
|
||||
|
||||
@@ -85,5 +121,9 @@ const struct sched_class idle_sched_class = {
|
||||
|
||||
.set_curr_task = set_curr_task_idle,
|
||||
.task_tick = task_tick_idle,
|
||||
|
||||
.prio_changed = prio_changed_idle,
|
||||
.switched_to = switched_to_idle,
|
||||
|
||||
/* no .task_new for idle tasks */
|
||||
};
|
||||
|
||||
1142
kernel/sched_rt.c
1142
kernel/sched_rt.c
File diff suppressed because it is too large
Load Diff
@@ -733,13 +733,13 @@ static void print_fatal_signal(struct pt_regs *regs, int signr)
|
||||
current->comm, task_pid_nr(current), signr);
|
||||
|
||||
#if defined(__i386__) && !defined(__arch_um__)
|
||||
printk("code at %08lx: ", regs->eip);
|
||||
printk("code at %08lx: ", regs->ip);
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 16; i++) {
|
||||
unsigned char insn;
|
||||
|
||||
__get_user(insn, (unsigned char *)(regs->eip + i));
|
||||
__get_user(insn, (unsigned char *)(regs->ip + i));
|
||||
printk("%02x ", insn);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,9 @@
|
||||
*
|
||||
* Copyright (C) 1992 Linus Torvalds
|
||||
*
|
||||
* Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
|
||||
* Distribute under GPLv2.
|
||||
*
|
||||
* Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
@@ -278,9 +280,14 @@ asmlinkage void do_softirq(void)
|
||||
*/
|
||||
void irq_enter(void)
|
||||
{
|
||||
#ifdef CONFIG_NO_HZ
|
||||
int cpu = smp_processor_id();
|
||||
if (idle_cpu(cpu) && !in_interrupt())
|
||||
tick_nohz_stop_idle(cpu);
|
||||
#endif
|
||||
__irq_enter();
|
||||
#ifdef CONFIG_NO_HZ
|
||||
if (idle_cpu(smp_processor_id()))
|
||||
if (idle_cpu(cpu))
|
||||
tick_nohz_update_jiffies();
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
*/
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/freezer.h>
|
||||
@@ -23,8 +24,8 @@ static DEFINE_PER_CPU(unsigned long, touch_timestamp);
|
||||
static DEFINE_PER_CPU(unsigned long, print_timestamp);
|
||||
static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
|
||||
|
||||
static int did_panic;
|
||||
int softlockup_thresh = 10;
|
||||
static int __read_mostly did_panic;
|
||||
unsigned long __read_mostly softlockup_thresh = 60;
|
||||
|
||||
static int
|
||||
softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
|
||||
@@ -45,7 +46,7 @@ static struct notifier_block panic_block = {
|
||||
*/
|
||||
static unsigned long get_timestamp(int this_cpu)
|
||||
{
|
||||
return cpu_clock(this_cpu) >> 30; /* 2^30 ~= 10^9 */
|
||||
return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
|
||||
}
|
||||
|
||||
void touch_softlockup_watchdog(void)
|
||||
@@ -100,11 +101,7 @@ void softlockup_tick(void)
|
||||
|
||||
now = get_timestamp(this_cpu);
|
||||
|
||||
/* Wake up the high-prio watchdog task every second: */
|
||||
if (now > (touch_timestamp + 1))
|
||||
wake_up_process(per_cpu(watchdog_task, this_cpu));
|
||||
|
||||
/* Warn about unreasonable 10+ seconds delays: */
|
||||
/* Warn about unreasonable delays: */
|
||||
if (now <= (touch_timestamp + softlockup_thresh))
|
||||
return;
|
||||
|
||||
@@ -121,12 +118,94 @@ void softlockup_tick(void)
|
||||
spin_unlock(&print_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Have a reasonable limit on the number of tasks checked:
|
||||
*/
|
||||
unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
|
||||
|
||||
/*
|
||||
* Zero means infinite timeout - no checking done:
|
||||
*/
|
||||
unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
|
||||
|
||||
unsigned long __read_mostly sysctl_hung_task_warnings = 10;
|
||||
|
||||
/*
|
||||
* Only do the hung-tasks check on one CPU:
|
||||
*/
|
||||
static int check_cpu __read_mostly = -1;
|
||||
|
||||
static void check_hung_task(struct task_struct *t, unsigned long now)
|
||||
{
|
||||
unsigned long switch_count = t->nvcsw + t->nivcsw;
|
||||
|
||||
if (t->flags & PF_FROZEN)
|
||||
return;
|
||||
|
||||
if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
|
||||
t->last_switch_count = switch_count;
|
||||
t->last_switch_timestamp = now;
|
||||
return;
|
||||
}
|
||||
if ((long)(now - t->last_switch_timestamp) <
|
||||
sysctl_hung_task_timeout_secs)
|
||||
return;
|
||||
if (sysctl_hung_task_warnings < 0)
|
||||
return;
|
||||
sysctl_hung_task_warnings--;
|
||||
|
||||
/*
|
||||
* Ok, the task did not get scheduled for more than 2 minutes,
|
||||
* complain:
|
||||
*/
|
||||
printk(KERN_ERR "INFO: task %s:%d blocked for more than "
|
||||
"%ld seconds.\n", t->comm, t->pid,
|
||||
sysctl_hung_task_timeout_secs);
|
||||
printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
|
||||
" disables this message.\n");
|
||||
sched_show_task(t);
|
||||
__debug_show_held_locks(t);
|
||||
|
||||
t->last_switch_timestamp = now;
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
|
||||
* a really long time (120 seconds). If that happens, print out
|
||||
* a warning.
|
||||
*/
|
||||
static void check_hung_uninterruptible_tasks(int this_cpu)
|
||||
{
|
||||
int max_count = sysctl_hung_task_check_count;
|
||||
unsigned long now = get_timestamp(this_cpu);
|
||||
struct task_struct *g, *t;
|
||||
|
||||
/*
|
||||
* If the system crashed already then all bets are off,
|
||||
* do not report extra hung tasks:
|
||||
*/
|
||||
if ((tainted & TAINT_DIE) || did_panic)
|
||||
return;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
do_each_thread(g, t) {
|
||||
if (!--max_count)
|
||||
break;
|
||||
if (t->state & TASK_UNINTERRUPTIBLE)
|
||||
check_hung_task(t, now);
|
||||
} while_each_thread(g, t);
|
||||
|
||||
read_unlock(&tasklist_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* The watchdog thread - runs every second and touches the timestamp.
|
||||
*/
|
||||
static int watchdog(void *__bind_cpu)
|
||||
{
|
||||
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
|
||||
int this_cpu = (long)__bind_cpu;
|
||||
|
||||
sched_setscheduler(current, SCHED_FIFO, ¶m);
|
||||
|
||||
@@ -135,13 +214,18 @@ static int watchdog(void *__bind_cpu)
|
||||
|
||||
/*
|
||||
* Run briefly once per second to reset the softlockup timestamp.
|
||||
* If this gets delayed for more than 10 seconds then the
|
||||
* If this gets delayed for more than 60 seconds then the
|
||||
* debug-printout triggers in softlockup_tick().
|
||||
*/
|
||||
while (!kthread_should_stop()) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
touch_softlockup_watchdog();
|
||||
schedule();
|
||||
msleep_interruptible(10000);
|
||||
|
||||
if (this_cpu != check_cpu)
|
||||
continue;
|
||||
|
||||
if (sysctl_hung_task_timeout_secs)
|
||||
check_hung_uninterruptible_tasks(this_cpu);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -171,6 +255,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
break;
|
||||
case CPU_ONLINE:
|
||||
case CPU_ONLINE_FROZEN:
|
||||
check_cpu = any_online_cpu(cpu_online_map);
|
||||
wake_up_process(per_cpu(watchdog_task, hotcpu));
|
||||
break;
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
@@ -181,6 +266,15 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
/* Unbind so it can run. Fall thru. */
|
||||
kthread_bind(per_cpu(watchdog_task, hotcpu),
|
||||
any_online_cpu(cpu_online_map));
|
||||
case CPU_DOWN_PREPARE:
|
||||
case CPU_DOWN_PREPARE_FROZEN:
|
||||
if (hotcpu == check_cpu) {
|
||||
cpumask_t temp_cpu_online_map = cpu_online_map;
|
||||
|
||||
cpu_clear(hotcpu, temp_cpu_online_map);
|
||||
check_cpu = any_online_cpu(temp_cpu_online_map);
|
||||
}
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
p = per_cpu(watchdog_task, hotcpu);
|
||||
|
||||
@@ -65,8 +65,7 @@ EXPORT_SYMBOL(_write_trylock);
|
||||
* even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
|
||||
* not re-enabled during lock-acquire (which the preempt-spin-ops do):
|
||||
*/
|
||||
#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \
|
||||
defined(CONFIG_DEBUG_LOCK_ALLOC)
|
||||
#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
|
||||
|
||||
void __lockfunc _read_lock(rwlock_t *lock)
|
||||
{
|
||||
|
||||
@@ -203,13 +203,13 @@ int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
|
||||
int ret;
|
||||
|
||||
/* No CPUs can come up or down during this. */
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
p = __stop_machine_run(fn, data, cpu);
|
||||
if (!IS_ERR(p))
|
||||
ret = kthread_stop(p);
|
||||
else
|
||||
ret = PTR_ERR(p);
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
267
kernel/sysctl.c
267
kernel/sysctl.c
@@ -53,6 +53,7 @@
|
||||
#ifdef CONFIG_X86
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/stacktrace.h>
|
||||
#include <asm/io.h>
|
||||
#endif
|
||||
|
||||
static int deprecated_sysctl_warning(struct __sysctl_args *args);
|
||||
@@ -81,6 +82,7 @@ extern int compat_log;
|
||||
extern int maps_protect;
|
||||
extern int sysctl_stat_interval;
|
||||
extern int audit_argv_kb;
|
||||
extern int latencytop_enabled;
|
||||
|
||||
/* Constants used for minimum and maximum */
|
||||
#ifdef CONFIG_DETECT_SOFTLOCKUP
|
||||
@@ -156,8 +158,16 @@ static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *
|
||||
#endif
|
||||
|
||||
static struct ctl_table root_table[];
|
||||
static struct ctl_table_header root_table_header =
|
||||
{ root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
|
||||
static struct ctl_table_root sysctl_table_root;
|
||||
static struct ctl_table_header root_table_header = {
|
||||
.ctl_table = root_table,
|
||||
.ctl_entry = LIST_HEAD_INIT(sysctl_table_root.header_list),
|
||||
.root = &sysctl_table_root,
|
||||
};
|
||||
static struct ctl_table_root sysctl_table_root = {
|
||||
.root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
|
||||
.header_list = LIST_HEAD_INIT(root_table_header.ctl_entry),
|
||||
};
|
||||
|
||||
static struct ctl_table kern_table[];
|
||||
static struct ctl_table vm_table[];
|
||||
@@ -191,14 +201,6 @@ static struct ctl_table root_table[] = {
|
||||
.mode = 0555,
|
||||
.child = vm_table,
|
||||
},
|
||||
#ifdef CONFIG_NET
|
||||
{
|
||||
.ctl_name = CTL_NET,
|
||||
.procname = "net",
|
||||
.mode = 0555,
|
||||
.child = net_table,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.ctl_name = CTL_FS,
|
||||
.procname = "fs",
|
||||
@@ -306,9 +308,43 @@ static struct ctl_table kern_table[] = {
|
||||
.procname = "sched_nr_migrate",
|
||||
.data = &sysctl_sched_nr_migrate,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 644,
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_rt_period_ms",
|
||||
.data = &sysctl_sched_rt_period,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_rt_ratio",
|
||||
.data = &sysctl_sched_rt_ratio,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_min_bal_int_shares",
|
||||
.data = &sysctl_sched_min_bal_int_shares,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_max_bal_int_shares",
|
||||
.data = &sysctl_sched_max_bal_int_shares,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
@@ -382,6 +418,15 @@ static struct ctl_table kern_table[] = {
|
||||
.proc_handler = &proc_dointvec_taint,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_LATENCYTOP
|
||||
{
|
||||
.procname = "latencytop",
|
||||
.data = &latencytop_enabled,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_SECURITY_CAPABILITIES
|
||||
{
|
||||
.procname = "cap-bound",
|
||||
@@ -683,6 +728,14 @@ static struct ctl_table kern_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "io_delay_type",
|
||||
.data = &io_delay_type,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
#if defined(CONFIG_MMU)
|
||||
{
|
||||
@@ -728,13 +781,40 @@ static struct ctl_table kern_table[] = {
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "softlockup_thresh",
|
||||
.data = &softlockup_thresh,
|
||||
.maxlen = sizeof(int),
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec_minmax,
|
||||
.proc_handler = &proc_doulongvec_minmax,
|
||||
.strategy = &sysctl_intvec,
|
||||
.extra1 = &one,
|
||||
.extra2 = &sixty,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "hung_task_check_count",
|
||||
.data = &sysctl_hung_task_check_count,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_doulongvec_minmax,
|
||||
.strategy = &sysctl_intvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "hung_task_timeout_secs",
|
||||
.data = &sysctl_hung_task_timeout_secs,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_doulongvec_minmax,
|
||||
.strategy = &sysctl_intvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "hung_task_warnings",
|
||||
.data = &sysctl_hung_task_warnings,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_doulongvec_minmax,
|
||||
.strategy = &sysctl_intvec,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_COMPAT
|
||||
{
|
||||
@@ -1300,12 +1380,27 @@ void sysctl_head_finish(struct ctl_table_header *head)
|
||||
spin_unlock(&sysctl_lock);
|
||||
}
|
||||
|
||||
struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
|
||||
static struct list_head *
|
||||
lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
|
||||
{
|
||||
struct list_head *header_list;
|
||||
header_list = &root->header_list;
|
||||
if (root->lookup)
|
||||
header_list = root->lookup(root, namespaces);
|
||||
return header_list;
|
||||
}
|
||||
|
||||
struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
|
||||
struct ctl_table_header *prev)
|
||||
{
|
||||
struct ctl_table_root *root;
|
||||
struct list_head *header_list;
|
||||
struct ctl_table_header *head;
|
||||
struct list_head *tmp;
|
||||
|
||||
spin_lock(&sysctl_lock);
|
||||
if (prev) {
|
||||
head = prev;
|
||||
tmp = &prev->ctl_entry;
|
||||
unuse_table(prev);
|
||||
goto next;
|
||||
@@ -1319,14 +1414,38 @@ struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
|
||||
spin_unlock(&sysctl_lock);
|
||||
return head;
|
||||
next:
|
||||
root = head->root;
|
||||
tmp = tmp->next;
|
||||
if (tmp == &root_table_header.ctl_entry)
|
||||
break;
|
||||
header_list = lookup_header_list(root, namespaces);
|
||||
if (tmp != header_list)
|
||||
continue;
|
||||
|
||||
do {
|
||||
root = list_entry(root->root_list.next,
|
||||
struct ctl_table_root, root_list);
|
||||
if (root == &sysctl_table_root)
|
||||
goto out;
|
||||
header_list = lookup_header_list(root, namespaces);
|
||||
} while (list_empty(header_list));
|
||||
tmp = header_list->next;
|
||||
}
|
||||
out:
|
||||
spin_unlock(&sysctl_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
|
||||
{
|
||||
return __sysctl_head_next(current->nsproxy, prev);
|
||||
}
|
||||
|
||||
void register_sysctl_root(struct ctl_table_root *root)
|
||||
{
|
||||
spin_lock(&sysctl_lock);
|
||||
list_add_tail(&root->root_list, &sysctl_table_root.root_list);
|
||||
spin_unlock(&sysctl_lock);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSCTL_SYSCALL
|
||||
int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
|
||||
void __user *newval, size_t newlen)
|
||||
@@ -1483,18 +1602,21 @@ static __init int sysctl_init(void)
|
||||
{
|
||||
int err;
|
||||
sysctl_set_parent(NULL, root_table);
|
||||
err = sysctl_check_table(root_table);
|
||||
err = sysctl_check_table(current->nsproxy, root_table);
|
||||
return 0;
|
||||
}
|
||||
|
||||
core_initcall(sysctl_init);
|
||||
|
||||
/**
|
||||
* register_sysctl_table - register a sysctl hierarchy
|
||||
* __register_sysctl_paths - register a sysctl hierarchy
|
||||
* @root: List of sysctl headers to register on
|
||||
* @namespaces: Data to compute which lists of sysctl entries are visible
|
||||
* @path: The path to the directory the sysctl table is in.
|
||||
* @table: the top-level table structure
|
||||
*
|
||||
* Register a sysctl table hierarchy. @table should be a filled in ctl_table
|
||||
* array. An entry with a ctl_name of 0 terminates the table.
|
||||
* array. A completely 0 filled entry terminates the table.
|
||||
*
|
||||
* The members of the &struct ctl_table structure are used as follows:
|
||||
*
|
||||
@@ -1557,25 +1679,99 @@ core_initcall(sysctl_init);
|
||||
* This routine returns %NULL on a failure to register, and a pointer
|
||||
* to the table header on success.
|
||||
*/
|
||||
struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
|
||||
struct ctl_table_header *__register_sysctl_paths(
|
||||
struct ctl_table_root *root,
|
||||
struct nsproxy *namespaces,
|
||||
const struct ctl_path *path, struct ctl_table *table)
|
||||
{
|
||||
struct ctl_table_header *tmp;
|
||||
tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
|
||||
if (!tmp)
|
||||
struct list_head *header_list;
|
||||
struct ctl_table_header *header;
|
||||
struct ctl_table *new, **prevp;
|
||||
unsigned int n, npath;
|
||||
|
||||
/* Count the path components */
|
||||
for (npath = 0; path[npath].ctl_name || path[npath].procname; ++npath)
|
||||
;
|
||||
|
||||
/*
|
||||
* For each path component, allocate a 2-element ctl_table array.
|
||||
* The first array element will be filled with the sysctl entry
|
||||
* for this, the second will be the sentinel (ctl_name == 0).
|
||||
*
|
||||
* We allocate everything in one go so that we don't have to
|
||||
* worry about freeing additional memory in unregister_sysctl_table.
|
||||
*/
|
||||
header = kzalloc(sizeof(struct ctl_table_header) +
|
||||
(2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
|
||||
if (!header)
|
||||
return NULL;
|
||||
tmp->ctl_table = table;
|
||||
INIT_LIST_HEAD(&tmp->ctl_entry);
|
||||
tmp->used = 0;
|
||||
tmp->unregistering = NULL;
|
||||
sysctl_set_parent(NULL, table);
|
||||
if (sysctl_check_table(tmp->ctl_table)) {
|
||||
kfree(tmp);
|
||||
|
||||
new = (struct ctl_table *) (header + 1);
|
||||
|
||||
/* Now connect the dots */
|
||||
prevp = &header->ctl_table;
|
||||
for (n = 0; n < npath; ++n, ++path) {
|
||||
/* Copy the procname */
|
||||
new->procname = path->procname;
|
||||
new->ctl_name = path->ctl_name;
|
||||
new->mode = 0555;
|
||||
|
||||
*prevp = new;
|
||||
prevp = &new->child;
|
||||
|
||||
new += 2;
|
||||
}
|
||||
*prevp = table;
|
||||
header->ctl_table_arg = table;
|
||||
|
||||
INIT_LIST_HEAD(&header->ctl_entry);
|
||||
header->used = 0;
|
||||
header->unregistering = NULL;
|
||||
header->root = root;
|
||||
sysctl_set_parent(NULL, header->ctl_table);
|
||||
if (sysctl_check_table(namespaces, header->ctl_table)) {
|
||||
kfree(header);
|
||||
return NULL;
|
||||
}
|
||||
spin_lock(&sysctl_lock);
|
||||
list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
|
||||
header_list = lookup_header_list(root, namespaces);
|
||||
list_add_tail(&header->ctl_entry, header_list);
|
||||
spin_unlock(&sysctl_lock);
|
||||
return tmp;
|
||||
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
* register_sysctl_table_path - register a sysctl table hierarchy
|
||||
* @path: The path to the directory the sysctl table is in.
|
||||
* @table: the top-level table structure
|
||||
*
|
||||
* Register a sysctl table hierarchy. @table should be a filled in ctl_table
|
||||
* array. A completely 0 filled entry terminates the table.
|
||||
*
|
||||
* See __register_sysctl_paths for more details.
|
||||
*/
|
||||
struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
|
||||
struct ctl_table *table)
|
||||
{
|
||||
return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
|
||||
path, table);
|
||||
}
|
||||
|
||||
/**
|
||||
* register_sysctl_table - register a sysctl table hierarchy
|
||||
* @table: the top-level table structure
|
||||
*
|
||||
* Register a sysctl table hierarchy. @table should be a filled in ctl_table
|
||||
* array. A completely 0 filled entry terminates the table.
|
||||
*
|
||||
* See register_sysctl_paths for more details.
|
||||
*/
|
||||
struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
|
||||
{
|
||||
static const struct ctl_path null_path[] = { {} };
|
||||
|
||||
return register_sysctl_paths(null_path, table);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1604,6 +1800,12 @@ struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
|
||||
struct ctl_table *table)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void unregister_sysctl_table(struct ctl_table_header * table)
|
||||
{
|
||||
}
|
||||
@@ -2662,6 +2864,7 @@ EXPORT_SYMBOL(proc_dostring);
|
||||
EXPORT_SYMBOL(proc_doulongvec_minmax);
|
||||
EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
|
||||
EXPORT_SYMBOL(register_sysctl_table);
|
||||
EXPORT_SYMBOL(register_sysctl_paths);
|
||||
EXPORT_SYMBOL(sysctl_intvec);
|
||||
EXPORT_SYMBOL(sysctl_jiffies);
|
||||
EXPORT_SYMBOL(sysctl_ms_jiffies);
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
#include <linux/stat.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include "../arch/s390/appldata/appldata.h"
|
||||
#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
|
||||
#include <linux/sunrpc/debug.h>
|
||||
#include <linux/string.h>
|
||||
@@ -1343,7 +1342,8 @@ static void sysctl_repair_table(struct ctl_table *table)
|
||||
}
|
||||
}
|
||||
|
||||
static struct ctl_table *sysctl_check_lookup(struct ctl_table *table)
|
||||
static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces,
|
||||
struct ctl_table *table)
|
||||
{
|
||||
struct ctl_table_header *head;
|
||||
struct ctl_table *ref, *test;
|
||||
@@ -1351,8 +1351,8 @@ static struct ctl_table *sysctl_check_lookup(struct ctl_table *table)
|
||||
|
||||
depth = sysctl_depth(table);
|
||||
|
||||
for (head = sysctl_head_next(NULL); head;
|
||||
head = sysctl_head_next(head)) {
|
||||
for (head = __sysctl_head_next(namespaces, NULL); head;
|
||||
head = __sysctl_head_next(namespaces, head)) {
|
||||
cur_depth = depth;
|
||||
ref = head->ctl_table;
|
||||
repeat:
|
||||
@@ -1397,13 +1397,14 @@ static void set_fail(const char **fail, struct ctl_table *table, const char *str
|
||||
*fail = str;
|
||||
}
|
||||
|
||||
static int sysctl_check_dir(struct ctl_table *table)
|
||||
static int sysctl_check_dir(struct nsproxy *namespaces,
|
||||
struct ctl_table *table)
|
||||
{
|
||||
struct ctl_table *ref;
|
||||
int error;
|
||||
|
||||
error = 0;
|
||||
ref = sysctl_check_lookup(table);
|
||||
ref = sysctl_check_lookup(namespaces, table);
|
||||
if (ref) {
|
||||
int match = 0;
|
||||
if ((!table->procname && !ref->procname) ||
|
||||
@@ -1428,11 +1429,12 @@ static int sysctl_check_dir(struct ctl_table *table)
|
||||
return error;
|
||||
}
|
||||
|
||||
static void sysctl_check_leaf(struct ctl_table *table, const char **fail)
|
||||
static void sysctl_check_leaf(struct nsproxy *namespaces,
|
||||
struct ctl_table *table, const char **fail)
|
||||
{
|
||||
struct ctl_table *ref;
|
||||
|
||||
ref = sysctl_check_lookup(table);
|
||||
ref = sysctl_check_lookup(namespaces, table);
|
||||
if (ref && (ref != table))
|
||||
set_fail(fail, table, "Sysctl already exists");
|
||||
}
|
||||
@@ -1456,7 +1458,7 @@ static void sysctl_check_bin_path(struct ctl_table *table, const char **fail)
|
||||
}
|
||||
}
|
||||
|
||||
int sysctl_check_table(struct ctl_table *table)
|
||||
int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
|
||||
{
|
||||
int error = 0;
|
||||
for (; table->ctl_name || table->procname; table++) {
|
||||
@@ -1486,7 +1488,7 @@ int sysctl_check_table(struct ctl_table *table)
|
||||
set_fail(&fail, table, "Directory with extra1");
|
||||
if (table->extra2)
|
||||
set_fail(&fail, table, "Directory with extra2");
|
||||
if (sysctl_check_dir(table))
|
||||
if (sysctl_check_dir(namespaces, table))
|
||||
set_fail(&fail, table, "Inconsistent directory names");
|
||||
} else {
|
||||
if ((table->strategy == sysctl_data) ||
|
||||
@@ -1535,7 +1537,7 @@ int sysctl_check_table(struct ctl_table *table)
|
||||
if (!table->procname && table->proc_handler)
|
||||
set_fail(&fail, table, "proc_handler without procname");
|
||||
#endif
|
||||
sysctl_check_leaf(table, &fail);
|
||||
sysctl_check_leaf(namespaces, table, &fail);
|
||||
}
|
||||
sysctl_check_bin_path(table, &fail);
|
||||
if (fail) {
|
||||
@@ -1543,7 +1545,7 @@ int sysctl_check_table(struct ctl_table *table)
|
||||
error = -EINVAL;
|
||||
}
|
||||
if (table->child)
|
||||
error |= sysctl_check_table(table->child);
|
||||
error |= sysctl_check_table(namespaces, table->child);
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
216
kernel/test_kprobes.c
Normal file
216
kernel/test_kprobes.c
Normal file
@@ -0,0 +1,216 @@
|
||||
/*
|
||||
* test_kprobes.c - simple sanity test for *probes
|
||||
*
|
||||
* Copyright IBM Corp. 2008
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
|
||||
* the GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/random.h>
|
||||
|
||||
#define div_factor 3
|
||||
|
||||
static u32 rand1, preh_val, posth_val, jph_val;
|
||||
static int errors, handler_errors, num_tests;
|
||||
|
||||
static noinline u32 kprobe_target(u32 value)
|
||||
{
|
||||
/*
|
||||
* gcc ignores noinline on some architectures unless we stuff
|
||||
* sufficient lard into the function. The get_kprobe() here is
|
||||
* just for that.
|
||||
*
|
||||
* NOTE: We aren't concerned about the correctness of get_kprobe()
|
||||
* here; hence, this call is neither under !preempt nor with the
|
||||
* kprobe_mutex held. This is fine(tm)
|
||||
*/
|
||||
if (get_kprobe((void *)0xdeadbeef))
|
||||
printk(KERN_INFO "Kprobe smoke test: probe on 0xdeadbeef!\n");
|
||||
|
||||
return (value / div_factor);
|
||||
}
|
||||
|
||||
static int kp_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
preh_val = (rand1 / div_factor);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kp_post_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
unsigned long flags)
|
||||
{
|
||||
if (preh_val != (rand1 / div_factor)) {
|
||||
handler_errors++;
|
||||
printk(KERN_ERR "Kprobe smoke test failed: "
|
||||
"incorrect value in post_handler\n");
|
||||
}
|
||||
posth_val = preh_val + div_factor;
|
||||
}
|
||||
|
||||
static struct kprobe kp = {
|
||||
.symbol_name = "kprobe_target",
|
||||
.pre_handler = kp_pre_handler,
|
||||
.post_handler = kp_post_handler
|
||||
};
|
||||
|
||||
static int test_kprobe(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = register_kprobe(&kp);
|
||||
if (ret < 0) {
|
||||
printk(KERN_ERR "Kprobe smoke test failed: "
|
||||
"register_kprobe returned %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = kprobe_target(rand1);
|
||||
unregister_kprobe(&kp);
|
||||
|
||||
if (preh_val == 0) {
|
||||
printk(KERN_ERR "Kprobe smoke test failed: "
|
||||
"kprobe pre_handler not called\n");
|
||||
handler_errors++;
|
||||
}
|
||||
|
||||
if (posth_val == 0) {
|
||||
printk(KERN_ERR "Kprobe smoke test failed: "
|
||||
"kprobe post_handler not called\n");
|
||||
handler_errors++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 j_kprobe_target(u32 value)
|
||||
{
|
||||
if (value != rand1) {
|
||||
handler_errors++;
|
||||
printk(KERN_ERR "Kprobe smoke test failed: "
|
||||
"incorrect value in jprobe handler\n");
|
||||
}
|
||||
|
||||
jph_val = rand1;
|
||||
jprobe_return();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct jprobe jp = {
|
||||
.entry = j_kprobe_target,
|
||||
.kp.symbol_name = "kprobe_target"
|
||||
};
|
||||
|
||||
static int test_jprobe(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = register_jprobe(&jp);
|
||||
if (ret < 0) {
|
||||
printk(KERN_ERR "Kprobe smoke test failed: "
|
||||
"register_jprobe returned %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = kprobe_target(rand1);
|
||||
unregister_jprobe(&jp);
|
||||
if (jph_val == 0) {
|
||||
printk(KERN_ERR "Kprobe smoke test failed: "
|
||||
"jprobe handler not called\n");
|
||||
handler_errors++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
static u32 krph_val;
|
||||
|
||||
static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
|
||||
{
|
||||
unsigned long ret = regs_return_value(regs);
|
||||
|
||||
if (ret != (rand1 / div_factor)) {
|
||||
handler_errors++;
|
||||
printk(KERN_ERR "Kprobe smoke test failed: "
|
||||
"incorrect value in kretprobe handler\n");
|
||||
}
|
||||
|
||||
krph_val = (rand1 / div_factor);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct kretprobe rp = {
|
||||
.handler = return_handler,
|
||||
.kp.symbol_name = "kprobe_target"
|
||||
};
|
||||
|
||||
static int test_kretprobe(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = register_kretprobe(&rp);
|
||||
if (ret < 0) {
|
||||
printk(KERN_ERR "Kprobe smoke test failed: "
|
||||
"register_kretprobe returned %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = kprobe_target(rand1);
|
||||
unregister_kretprobe(&rp);
|
||||
if (krph_val == 0) {
|
||||
printk(KERN_ERR "Kprobe smoke test failed: "
|
||||
"kretprobe handler not called\n");
|
||||
handler_errors++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_KRETPROBES */
|
||||
|
||||
int init_test_probes(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
do {
|
||||
rand1 = random32();
|
||||
} while (rand1 <= div_factor);
|
||||
|
||||
printk(KERN_INFO "Kprobe smoke test started\n");
|
||||
num_tests++;
|
||||
ret = test_kprobe();
|
||||
if (ret < 0)
|
||||
errors++;
|
||||
|
||||
num_tests++;
|
||||
ret = test_jprobe();
|
||||
if (ret < 0)
|
||||
errors++;
|
||||
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
num_tests++;
|
||||
ret = test_kretprobe();
|
||||
if (ret < 0)
|
||||
errors++;
|
||||
#endif /* CONFIG_KRETPROBES */
|
||||
|
||||
if (errors)
|
||||
printk(KERN_ERR "BUG: Kprobe smoke test: %d out of "
|
||||
"%d tests failed\n", errors, num_tests);
|
||||
else if (handler_errors)
|
||||
printk(KERN_ERR "BUG: Kprobe smoke test: %d error(s) "
|
||||
"running handlers\n", handler_errors);
|
||||
else
|
||||
printk(KERN_INFO "Kprobe smoke test passed successfully\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -41,6 +41,11 @@ unsigned long clockevent_delta2ns(unsigned long latch,
|
||||
{
|
||||
u64 clc = ((u64) latch << evt->shift);
|
||||
|
||||
if (unlikely(!evt->mult)) {
|
||||
evt->mult = 1;
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
do_div(clc, evt->mult);
|
||||
if (clc < 1000)
|
||||
clc = 1000;
|
||||
@@ -151,6 +156,14 @@ static void clockevents_notify_released(void)
|
||||
void clockevents_register_device(struct clock_event_device *dev)
|
||||
{
|
||||
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
|
||||
/*
|
||||
* A nsec2cyc multiplicator of 0 is invalid and we'd crash
|
||||
* on it, so fix it up and emit a warning:
|
||||
*/
|
||||
if (unlikely(!dev->mult)) {
|
||||
dev->mult = 1;
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
spin_lock(&clockevents_lock);
|
||||
|
||||
|
||||
@@ -142,8 +142,13 @@ static void clocksource_watchdog(unsigned long data)
|
||||
}
|
||||
|
||||
if (!list_empty(&watchdog_list)) {
|
||||
__mod_timer(&watchdog_timer,
|
||||
watchdog_timer.expires + WATCHDOG_INTERVAL);
|
||||
/* Cycle through CPUs to check if the CPUs stay synchronized to
|
||||
* each other. */
|
||||
int next_cpu = next_cpu(raw_smp_processor_id(), cpu_online_map);
|
||||
if (next_cpu >= NR_CPUS)
|
||||
next_cpu = first_cpu(cpu_online_map);
|
||||
watchdog_timer.expires += WATCHDOG_INTERVAL;
|
||||
add_timer_on(&watchdog_timer, next_cpu);
|
||||
}
|
||||
spin_unlock(&watchdog_lock);
|
||||
}
|
||||
@@ -165,7 +170,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
|
||||
if (!started && watchdog) {
|
||||
watchdog_last = watchdog->read();
|
||||
watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
|
||||
add_timer(&watchdog_timer);
|
||||
add_timer_on(&watchdog_timer, first_cpu(cpu_online_map));
|
||||
}
|
||||
} else {
|
||||
if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
|
||||
@@ -175,7 +180,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
|
||||
if (watchdog)
|
||||
del_timer(&watchdog_timer);
|
||||
watchdog = cs;
|
||||
init_timer(&watchdog_timer);
|
||||
init_timer_deferrable(&watchdog_timer);
|
||||
watchdog_timer.function = clocksource_watchdog;
|
||||
|
||||
/* Reset watchdog cycles */
|
||||
@@ -186,7 +191,8 @@ static void clocksource_check_watchdog(struct clocksource *cs)
|
||||
watchdog_last = watchdog->read();
|
||||
watchdog_timer.expires =
|
||||
jiffies + WATCHDOG_INTERVAL;
|
||||
add_timer(&watchdog_timer);
|
||||
add_timer_on(&watchdog_timer,
|
||||
first_cpu(cpu_online_map));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -331,6 +337,21 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
|
||||
spin_unlock_irqrestore(&clocksource_lock, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* clocksource_unregister - remove a registered clocksource
|
||||
*/
|
||||
void clocksource_unregister(struct clocksource *cs)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&clocksource_lock, flags);
|
||||
list_del(&cs->list);
|
||||
if (clocksource_override == cs)
|
||||
clocksource_override = NULL;
|
||||
next_clocksource = select_clocksource();
|
||||
spin_unlock_irqrestore(&clocksource_lock, flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
/**
|
||||
* sysfs_show_current_clocksources - sysfs interface for current clocksource
|
||||
@@ -441,7 +462,7 @@ static SYSDEV_ATTR(available_clocksource, 0600,
|
||||
sysfs_show_available_clocksources, NULL);
|
||||
|
||||
static struct sysdev_class clocksource_sysclass = {
|
||||
set_kset_name("clocksource"),
|
||||
.name = "clocksource",
|
||||
};
|
||||
|
||||
static struct sys_device device_clocksource = {
|
||||
|
||||
@@ -126,9 +126,9 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
|
||||
/*
|
||||
* Broadcast the event to the cpus, which are set in the mask
|
||||
*/
|
||||
int tick_do_broadcast(cpumask_t mask)
|
||||
static void tick_do_broadcast(cpumask_t mask)
|
||||
{
|
||||
int ret = 0, cpu = smp_processor_id();
|
||||
int cpu = smp_processor_id();
|
||||
struct tick_device *td;
|
||||
|
||||
/*
|
||||
@@ -138,7 +138,6 @@ int tick_do_broadcast(cpumask_t mask)
|
||||
cpu_clear(cpu, mask);
|
||||
td = &per_cpu(tick_cpu_device, cpu);
|
||||
td->evtdev->event_handler(td->evtdev);
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
if (!cpus_empty(mask)) {
|
||||
@@ -151,9 +150,7 @@ int tick_do_broadcast(cpumask_t mask)
|
||||
cpu = first_cpu(mask);
|
||||
td = &per_cpu(tick_cpu_device, cpu);
|
||||
td->evtdev->broadcast(mask);
|
||||
ret = 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -70,8 +70,6 @@ static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
|
||||
* Broadcasting support
|
||||
*/
|
||||
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
|
||||
extern int tick_do_broadcast(cpumask_t mask);
|
||||
|
||||
extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
|
||||
extern int tick_check_broadcast_device(struct clock_event_device *dev);
|
||||
extern int tick_is_broadcast_device(struct clock_event_device *dev);
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
*
|
||||
* Started by: Thomas Gleixner and Ingo Molnar
|
||||
*
|
||||
* For licencing details see kernel-base/COPYING
|
||||
* Distribute under GPLv2.
|
||||
*/
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/err.h>
|
||||
@@ -143,6 +143,44 @@ void tick_nohz_update_jiffies(void)
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void tick_nohz_stop_idle(int cpu)
|
||||
{
|
||||
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
|
||||
|
||||
if (ts->idle_active) {
|
||||
ktime_t now, delta;
|
||||
now = ktime_get();
|
||||
delta = ktime_sub(now, ts->idle_entrytime);
|
||||
ts->idle_lastupdate = now;
|
||||
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
|
||||
ts->idle_active = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static ktime_t tick_nohz_start_idle(int cpu)
|
||||
{
|
||||
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
|
||||
ktime_t now, delta;
|
||||
|
||||
now = ktime_get();
|
||||
if (ts->idle_active) {
|
||||
delta = ktime_sub(now, ts->idle_entrytime);
|
||||
ts->idle_lastupdate = now;
|
||||
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
|
||||
}
|
||||
ts->idle_entrytime = now;
|
||||
ts->idle_active = 1;
|
||||
return now;
|
||||
}
|
||||
|
||||
u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
|
||||
{
|
||||
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
|
||||
|
||||
*last_update_time = ktime_to_us(ts->idle_lastupdate);
|
||||
return ktime_to_us(ts->idle_sleeptime);
|
||||
}
|
||||
|
||||
/**
|
||||
* tick_nohz_stop_sched_tick - stop the idle tick from the idle task
|
||||
*
|
||||
@@ -153,14 +191,16 @@ void tick_nohz_update_jiffies(void)
|
||||
void tick_nohz_stop_sched_tick(void)
|
||||
{
|
||||
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
|
||||
unsigned long rt_jiffies;
|
||||
struct tick_sched *ts;
|
||||
ktime_t last_update, expires, now, delta;
|
||||
ktime_t last_update, expires, now;
|
||||
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
|
||||
int cpu;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
cpu = smp_processor_id();
|
||||
now = tick_nohz_start_idle(cpu);
|
||||
ts = &per_cpu(tick_cpu_sched, cpu);
|
||||
|
||||
/*
|
||||
@@ -192,19 +232,7 @@ void tick_nohz_stop_sched_tick(void)
|
||||
}
|
||||
}
|
||||
|
||||
now = ktime_get();
|
||||
/*
|
||||
* When called from irq_exit we need to account the idle sleep time
|
||||
* correctly.
|
||||
*/
|
||||
if (ts->tick_stopped) {
|
||||
delta = ktime_sub(now, ts->idle_entrytime);
|
||||
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
|
||||
}
|
||||
|
||||
ts->idle_entrytime = now;
|
||||
ts->idle_calls++;
|
||||
|
||||
/* Read jiffies and the time when jiffies were updated last */
|
||||
do {
|
||||
seq = read_seqbegin(&xtime_lock);
|
||||
@@ -216,6 +244,10 @@ void tick_nohz_stop_sched_tick(void)
|
||||
next_jiffies = get_next_timer_interrupt(last_jiffies);
|
||||
delta_jiffies = next_jiffies - last_jiffies;
|
||||
|
||||
rt_jiffies = rt_needs_cpu(cpu);
|
||||
if (rt_jiffies && rt_jiffies < delta_jiffies)
|
||||
delta_jiffies = rt_jiffies;
|
||||
|
||||
if (rcu_needs_cpu(cpu))
|
||||
delta_jiffies = 1;
|
||||
/*
|
||||
@@ -291,7 +323,7 @@ void tick_nohz_stop_sched_tick(void)
|
||||
/* Check, if the timer was already in the past */
|
||||
if (hrtimer_active(&ts->sched_timer))
|
||||
goto out;
|
||||
} else if(!tick_program_event(expires, 0))
|
||||
} else if (!tick_program_event(expires, 0))
|
||||
goto out;
|
||||
/*
|
||||
* We are past the event already. So we crossed a
|
||||
@@ -332,23 +364,22 @@ void tick_nohz_restart_sched_tick(void)
|
||||
int cpu = smp_processor_id();
|
||||
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
|
||||
unsigned long ticks;
|
||||
ktime_t now, delta;
|
||||
|
||||
if (!ts->tick_stopped)
|
||||
return;
|
||||
|
||||
/* Update jiffies first */
|
||||
now = ktime_get();
|
||||
ktime_t now;
|
||||
|
||||
local_irq_disable();
|
||||
tick_nohz_stop_idle(cpu);
|
||||
|
||||
if (!ts->tick_stopped) {
|
||||
local_irq_enable();
|
||||
return;
|
||||
}
|
||||
|
||||
/* Update jiffies first */
|
||||
select_nohz_load_balancer(0);
|
||||
now = ktime_get();
|
||||
tick_do_update_jiffies64(now);
|
||||
cpu_clear(cpu, nohz_cpu_mask);
|
||||
|
||||
/* Account the idle time */
|
||||
delta = ktime_sub(now, ts->idle_entrytime);
|
||||
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
|
||||
|
||||
/*
|
||||
* We stopped the tick in idle. Update process times would miss the
|
||||
* time we slept as update_process_times does only a 1 tick
|
||||
@@ -502,14 +533,13 @@ static inline void tick_nohz_switch_to_nohz(void) { }
|
||||
*/
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
/*
|
||||
* We rearm the timer until we get disabled by the idle code
|
||||
* We rearm the timer until we get disabled by the idle code.
|
||||
* Called with interrupts disabled and timer->base->cpu_base->lock held.
|
||||
*/
|
||||
static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
|
||||
{
|
||||
struct tick_sched *ts =
|
||||
container_of(timer, struct tick_sched, sched_timer);
|
||||
struct hrtimer_cpu_base *base = timer->base->cpu_base;
|
||||
struct pt_regs *regs = get_irq_regs();
|
||||
ktime_t now = ktime_get();
|
||||
int cpu = smp_processor_id();
|
||||
@@ -547,15 +577,8 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
|
||||
touch_softlockup_watchdog();
|
||||
ts->idle_jiffies++;
|
||||
}
|
||||
/*
|
||||
* update_process_times() might take tasklist_lock, hence
|
||||
* drop the base lock. sched-tick hrtimers are per-CPU and
|
||||
* never accessible by userspace APIs, so this is safe to do.
|
||||
*/
|
||||
spin_unlock(&base->lock);
|
||||
update_process_times(user_mode(regs));
|
||||
profile_tick(CPU_PROFILING);
|
||||
spin_lock(&base->lock);
|
||||
}
|
||||
|
||||
/* Do not restart, when we are in the idle loop */
|
||||
|
||||
@@ -82,13 +82,12 @@ static inline s64 __get_nsec_offset(void)
|
||||
}
|
||||
|
||||
/**
|
||||
* __get_realtime_clock_ts - Returns the time of day in a timespec
|
||||
* getnstimeofday - Returns the time of day in a timespec
|
||||
* @ts: pointer to the timespec to be set
|
||||
*
|
||||
* Returns the time of day in a timespec. Used by
|
||||
* do_gettimeofday() and get_realtime_clock_ts().
|
||||
* Returns the time of day in a timespec.
|
||||
*/
|
||||
static inline void __get_realtime_clock_ts(struct timespec *ts)
|
||||
void getnstimeofday(struct timespec *ts)
|
||||
{
|
||||
unsigned long seq;
|
||||
s64 nsecs;
|
||||
@@ -104,30 +103,19 @@ static inline void __get_realtime_clock_ts(struct timespec *ts)
|
||||
timespec_add_ns(ts, nsecs);
|
||||
}
|
||||
|
||||
/**
|
||||
* getnstimeofday - Returns the time of day in a timespec
|
||||
* @ts: pointer to the timespec to be set
|
||||
*
|
||||
* Returns the time of day in a timespec.
|
||||
*/
|
||||
void getnstimeofday(struct timespec *ts)
|
||||
{
|
||||
__get_realtime_clock_ts(ts);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(getnstimeofday);
|
||||
|
||||
/**
|
||||
* do_gettimeofday - Returns the time of day in a timeval
|
||||
* @tv: pointer to the timeval to be set
|
||||
*
|
||||
* NOTE: Users should be converted to using get_realtime_clock_ts()
|
||||
* NOTE: Users should be converted to using getnstimeofday()
|
||||
*/
|
||||
void do_gettimeofday(struct timeval *tv)
|
||||
{
|
||||
struct timespec now;
|
||||
|
||||
__get_realtime_clock_ts(&now);
|
||||
getnstimeofday(&now);
|
||||
tv->tv_sec = now.tv_sec;
|
||||
tv->tv_usec = now.tv_nsec/1000;
|
||||
}
|
||||
@@ -198,7 +186,8 @@ static void change_clocksource(void)
|
||||
|
||||
clock->error = 0;
|
||||
clock->xtime_nsec = 0;
|
||||
clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
|
||||
clocksource_calculate_interval(clock,
|
||||
(unsigned long)(current_tick_length()>>TICK_LENGTH_SHIFT));
|
||||
|
||||
tick_clock_notify();
|
||||
|
||||
@@ -255,7 +244,8 @@ void __init timekeeping_init(void)
|
||||
ntp_clear();
|
||||
|
||||
clock = clocksource_get_next();
|
||||
clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
|
||||
clocksource_calculate_interval(clock,
|
||||
(unsigned long)(current_tick_length()>>TICK_LENGTH_SHIFT));
|
||||
clock->cycle_last = clocksource_read(clock);
|
||||
|
||||
xtime.tv_sec = sec;
|
||||
@@ -335,9 +325,9 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
|
||||
|
||||
/* sysfs resume/suspend bits for timekeeping */
|
||||
static struct sysdev_class timekeeping_sysclass = {
|
||||
.name = "timekeeping",
|
||||
.resume = timekeeping_resume,
|
||||
.suspend = timekeeping_suspend,
|
||||
set_kset_name("timekeeping"),
|
||||
};
|
||||
|
||||
static struct sys_device device_timer = {
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
* the pid and cmdline from the owner process if applicable.
|
||||
*
|
||||
* Start/stop data collection:
|
||||
* # echo 1[0] >/proc/timer_stats
|
||||
* # echo [1|0] >/proc/timer_stats
|
||||
*
|
||||
* Display the information collected so far:
|
||||
* # cat /proc/timer_stats
|
||||
|
||||
@@ -58,59 +58,57 @@ EXPORT_SYMBOL(jiffies_64);
|
||||
#define TVN_MASK (TVN_SIZE - 1)
|
||||
#define TVR_MASK (TVR_SIZE - 1)
|
||||
|
||||
typedef struct tvec_s {
|
||||
struct tvec {
|
||||
struct list_head vec[TVN_SIZE];
|
||||
} tvec_t;
|
||||
};
|
||||
|
||||
typedef struct tvec_root_s {
|
||||
struct tvec_root {
|
||||
struct list_head vec[TVR_SIZE];
|
||||
} tvec_root_t;
|
||||
};
|
||||
|
||||
struct tvec_t_base_s {
|
||||
struct tvec_base {
|
||||
spinlock_t lock;
|
||||
struct timer_list *running_timer;
|
||||
unsigned long timer_jiffies;
|
||||
tvec_root_t tv1;
|
||||
tvec_t tv2;
|
||||
tvec_t tv3;
|
||||
tvec_t tv4;
|
||||
tvec_t tv5;
|
||||
struct tvec_root tv1;
|
||||
struct tvec tv2;
|
||||
struct tvec tv3;
|
||||
struct tvec tv4;
|
||||
struct tvec tv5;
|
||||
} ____cacheline_aligned;
|
||||
|
||||
typedef struct tvec_t_base_s tvec_base_t;
|
||||
|
||||
tvec_base_t boot_tvec_bases;
|
||||
struct tvec_base boot_tvec_bases;
|
||||
EXPORT_SYMBOL(boot_tvec_bases);
|
||||
static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
|
||||
static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
|
||||
|
||||
/*
|
||||
* Note that all tvec_bases is 2 byte aligned and lower bit of
|
||||
* Note that all tvec_bases are 2 byte aligned and lower bit of
|
||||
* base in timer_list is guaranteed to be zero. Use the LSB for
|
||||
* the new flag to indicate whether the timer is deferrable
|
||||
*/
|
||||
#define TBASE_DEFERRABLE_FLAG (0x1)
|
||||
|
||||
/* Functions below help us manage 'deferrable' flag */
|
||||
static inline unsigned int tbase_get_deferrable(tvec_base_t *base)
|
||||
static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
|
||||
{
|
||||
return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
|
||||
}
|
||||
|
||||
static inline tvec_base_t *tbase_get_base(tvec_base_t *base)
|
||||
static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
|
||||
{
|
||||
return ((tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
|
||||
return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
|
||||
}
|
||||
|
||||
static inline void timer_set_deferrable(struct timer_list *timer)
|
||||
{
|
||||
timer->base = ((tvec_base_t *)((unsigned long)(timer->base) |
|
||||
timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
|
||||
TBASE_DEFERRABLE_FLAG));
|
||||
}
|
||||
|
||||
static inline void
|
||||
timer_set_base(struct timer_list *timer, tvec_base_t *new_base)
|
||||
timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
|
||||
{
|
||||
timer->base = (tvec_base_t *)((unsigned long)(new_base) |
|
||||
timer->base = (struct tvec_base *)((unsigned long)(new_base) |
|
||||
tbase_get_deferrable(timer->base));
|
||||
}
|
||||
|
||||
@@ -246,7 +244,7 @@ unsigned long round_jiffies_relative(unsigned long j)
|
||||
EXPORT_SYMBOL_GPL(round_jiffies_relative);
|
||||
|
||||
|
||||
static inline void set_running_timer(tvec_base_t *base,
|
||||
static inline void set_running_timer(struct tvec_base *base,
|
||||
struct timer_list *timer)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -254,7 +252,7 @@ static inline void set_running_timer(tvec_base_t *base,
|
||||
#endif
|
||||
}
|
||||
|
||||
static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
|
||||
static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
|
||||
{
|
||||
unsigned long expires = timer->expires;
|
||||
unsigned long idx = expires - base->timer_jiffies;
|
||||
@@ -371,14 +369,14 @@ static inline void detach_timer(struct timer_list *timer,
|
||||
* possible to set timer->base = NULL and drop the lock: the timer remains
|
||||
* locked.
|
||||
*/
|
||||
static tvec_base_t *lock_timer_base(struct timer_list *timer,
|
||||
static struct tvec_base *lock_timer_base(struct timer_list *timer,
|
||||
unsigned long *flags)
|
||||
__acquires(timer->base->lock)
|
||||
{
|
||||
tvec_base_t *base;
|
||||
struct tvec_base *base;
|
||||
|
||||
for (;;) {
|
||||
tvec_base_t *prelock_base = timer->base;
|
||||
struct tvec_base *prelock_base = timer->base;
|
||||
base = tbase_get_base(prelock_base);
|
||||
if (likely(base != NULL)) {
|
||||
spin_lock_irqsave(&base->lock, *flags);
|
||||
@@ -393,7 +391,7 @@ static tvec_base_t *lock_timer_base(struct timer_list *timer,
|
||||
|
||||
int __mod_timer(struct timer_list *timer, unsigned long expires)
|
||||
{
|
||||
tvec_base_t *base, *new_base;
|
||||
struct tvec_base *base, *new_base;
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
@@ -445,7 +443,7 @@ EXPORT_SYMBOL(__mod_timer);
|
||||
*/
|
||||
void add_timer_on(struct timer_list *timer, int cpu)
|
||||
{
|
||||
tvec_base_t *base = per_cpu(tvec_bases, cpu);
|
||||
struct tvec_base *base = per_cpu(tvec_bases, cpu);
|
||||
unsigned long flags;
|
||||
|
||||
timer_stats_timer_set_start_info(timer);
|
||||
@@ -508,7 +506,7 @@ EXPORT_SYMBOL(mod_timer);
|
||||
*/
|
||||
int del_timer(struct timer_list *timer)
|
||||
{
|
||||
tvec_base_t *base;
|
||||
struct tvec_base *base;
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
@@ -539,7 +537,7 @@ EXPORT_SYMBOL(del_timer);
|
||||
*/
|
||||
int try_to_del_timer_sync(struct timer_list *timer)
|
||||
{
|
||||
tvec_base_t *base;
|
||||
struct tvec_base *base;
|
||||
unsigned long flags;
|
||||
int ret = -1;
|
||||
|
||||
@@ -591,7 +589,7 @@ int del_timer_sync(struct timer_list *timer)
|
||||
EXPORT_SYMBOL(del_timer_sync);
|
||||
#endif
|
||||
|
||||
static int cascade(tvec_base_t *base, tvec_t *tv, int index)
|
||||
static int cascade(struct tvec_base *base, struct tvec *tv, int index)
|
||||
{
|
||||
/* cascade all the timers from tv up one level */
|
||||
struct timer_list *timer, *tmp;
|
||||
@@ -620,7 +618,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index)
|
||||
* This function cascades all vectors and executes all expired timer
|
||||
* vectors.
|
||||
*/
|
||||
static inline void __run_timers(tvec_base_t *base)
|
||||
static inline void __run_timers(struct tvec_base *base)
|
||||
{
|
||||
struct timer_list *timer;
|
||||
|
||||
@@ -657,7 +655,7 @@ static inline void __run_timers(tvec_base_t *base)
|
||||
int preempt_count = preempt_count();
|
||||
fn(data);
|
||||
if (preempt_count != preempt_count()) {
|
||||
printk(KERN_WARNING "huh, entered %p "
|
||||
printk(KERN_ERR "huh, entered %p "
|
||||
"with preempt_count %08x, exited"
|
||||
" with %08x?\n",
|
||||
fn, preempt_count,
|
||||
@@ -678,13 +676,13 @@ static inline void __run_timers(tvec_base_t *base)
|
||||
* is used on S/390 to stop all activity when a cpus is idle.
|
||||
* This functions needs to be called disabled.
|
||||
*/
|
||||
static unsigned long __next_timer_interrupt(tvec_base_t *base)
|
||||
static unsigned long __next_timer_interrupt(struct tvec_base *base)
|
||||
{
|
||||
unsigned long timer_jiffies = base->timer_jiffies;
|
||||
unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
|
||||
int index, slot, array, found = 0;
|
||||
struct timer_list *nte;
|
||||
tvec_t *varray[4];
|
||||
struct tvec *varray[4];
|
||||
|
||||
/* Look for timer events in tv1. */
|
||||
index = slot = timer_jiffies & TVR_MASK;
|
||||
@@ -716,7 +714,7 @@ cascade:
|
||||
varray[3] = &base->tv5;
|
||||
|
||||
for (array = 0; array < 4; array++) {
|
||||
tvec_t *varp = varray[array];
|
||||
struct tvec *varp = varray[array];
|
||||
|
||||
index = slot = timer_jiffies & TVN_MASK;
|
||||
do {
|
||||
@@ -795,7 +793,7 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
|
||||
*/
|
||||
unsigned long get_next_timer_interrupt(unsigned long now)
|
||||
{
|
||||
tvec_base_t *base = __get_cpu_var(tvec_bases);
|
||||
struct tvec_base *base = __get_cpu_var(tvec_bases);
|
||||
unsigned long expires;
|
||||
|
||||
spin_lock(&base->lock);
|
||||
@@ -894,9 +892,9 @@ static inline void calc_load(unsigned long ticks)
|
||||
*/
|
||||
static void run_timer_softirq(struct softirq_action *h)
|
||||
{
|
||||
tvec_base_t *base = __get_cpu_var(tvec_bases);
|
||||
struct tvec_base *base = __get_cpu_var(tvec_bases);
|
||||
|
||||
hrtimer_run_queues();
|
||||
hrtimer_run_pending();
|
||||
|
||||
if (time_after_eq(jiffies, base->timer_jiffies))
|
||||
__run_timers(base);
|
||||
@@ -907,6 +905,7 @@ static void run_timer_softirq(struct softirq_action *h)
|
||||
*/
|
||||
void run_local_timers(void)
|
||||
{
|
||||
hrtimer_run_queues();
|
||||
raise_softirq(TIMER_SOFTIRQ);
|
||||
softlockup_tick();
|
||||
}
|
||||
@@ -1222,7 +1221,7 @@ static struct lock_class_key base_lock_keys[NR_CPUS];
|
||||
static int __cpuinit init_timers_cpu(int cpu)
|
||||
{
|
||||
int j;
|
||||
tvec_base_t *base;
|
||||
struct tvec_base *base;
|
||||
static char __cpuinitdata tvec_base_done[NR_CPUS];
|
||||
|
||||
if (!tvec_base_done[cpu]) {
|
||||
@@ -1277,7 +1276,7 @@ static int __cpuinit init_timers_cpu(int cpu)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
|
||||
static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
|
||||
{
|
||||
struct timer_list *timer;
|
||||
|
||||
@@ -1291,8 +1290,8 @@ static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
|
||||
|
||||
static void __cpuinit migrate_timers(int cpu)
|
||||
{
|
||||
tvec_base_t *old_base;
|
||||
tvec_base_t *new_base;
|
||||
struct tvec_base *old_base;
|
||||
struct tvec_base *new_base;
|
||||
int i;
|
||||
|
||||
BUG_ON(cpu_online(cpu));
|
||||
|
||||
152
kernel/user.c
152
kernel/user.c
@@ -115,7 +115,7 @@ static void sched_switch_user(struct task_struct *p) { }
|
||||
|
||||
#if defined(CONFIG_FAIR_USER_SCHED) && defined(CONFIG_SYSFS)
|
||||
|
||||
static struct kobject uids_kobject; /* represents /sys/kernel/uids directory */
|
||||
static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */
|
||||
static DEFINE_MUTEX(uids_mutex);
|
||||
|
||||
static inline void uids_mutex_lock(void)
|
||||
@@ -128,86 +128,83 @@ static inline void uids_mutex_unlock(void)
|
||||
mutex_unlock(&uids_mutex);
|
||||
}
|
||||
|
||||
/* return cpu shares held by the user */
|
||||
static ssize_t cpu_shares_show(struct kset *kset, char *buffer)
|
||||
/* uid directory attributes */
|
||||
static ssize_t cpu_shares_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct user_struct *up = container_of(kset, struct user_struct, kset);
|
||||
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
|
||||
|
||||
return sprintf(buffer, "%lu\n", sched_group_shares(up->tg));
|
||||
return sprintf(buf, "%lu\n", sched_group_shares(up->tg));
|
||||
}
|
||||
|
||||
/* modify cpu shares held by the user */
|
||||
static ssize_t cpu_shares_store(struct kset *kset, const char *buffer,
|
||||
size_t size)
|
||||
static ssize_t cpu_shares_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t size)
|
||||
{
|
||||
struct user_struct *up = container_of(kset, struct user_struct, kset);
|
||||
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
|
||||
unsigned long shares;
|
||||
int rc;
|
||||
|
||||
sscanf(buffer, "%lu", &shares);
|
||||
sscanf(buf, "%lu", &shares);
|
||||
|
||||
rc = sched_group_set_shares(up->tg, shares);
|
||||
|
||||
return (rc ? rc : size);
|
||||
}
|
||||
|
||||
static void user_attr_init(struct subsys_attribute *sa, char *name, int mode)
|
||||
static struct kobj_attribute cpu_share_attr =
|
||||
__ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store);
|
||||
|
||||
/* default attributes per uid directory */
|
||||
static struct attribute *uids_attributes[] = {
|
||||
&cpu_share_attr.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
/* the lifetime of user_struct is not managed by the core (now) */
|
||||
static void uids_release(struct kobject *kobj)
|
||||
{
|
||||
sa->attr.name = name;
|
||||
sa->attr.mode = mode;
|
||||
sa->show = cpu_shares_show;
|
||||
sa->store = cpu_shares_store;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Create "/sys/kernel/uids/<uid>" directory and
|
||||
* "/sys/kernel/uids/<uid>/cpu_share" file for this user.
|
||||
*/
|
||||
static int user_kobject_create(struct user_struct *up)
|
||||
static struct kobj_type uids_ktype = {
|
||||
.sysfs_ops = &kobj_sysfs_ops,
|
||||
.default_attrs = uids_attributes,
|
||||
.release = uids_release,
|
||||
};
|
||||
|
||||
/* create /sys/kernel/uids/<uid>/cpu_share file for this user */
|
||||
static int uids_user_create(struct user_struct *up)
|
||||
{
|
||||
struct kset *kset = &up->kset;
|
||||
struct kobject *kobj = &kset->kobj;
|
||||
struct kobject *kobj = &up->kobj;
|
||||
int error;
|
||||
|
||||
memset(kset, 0, sizeof(struct kset));
|
||||
kobj->parent = &uids_kobject; /* create under /sys/kernel/uids dir */
|
||||
kobject_set_name(kobj, "%d", up->uid);
|
||||
kset_init(kset);
|
||||
user_attr_init(&up->user_attr, "cpu_share", 0644);
|
||||
|
||||
error = kobject_add(kobj);
|
||||
if (error)
|
||||
memset(kobj, 0, sizeof(struct kobject));
|
||||
kobj->kset = uids_kset;
|
||||
error = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid);
|
||||
if (error) {
|
||||
kobject_put(kobj);
|
||||
goto done;
|
||||
|
||||
error = sysfs_create_file(kobj, &up->user_attr.attr);
|
||||
if (error)
|
||||
kobject_del(kobj);
|
||||
}
|
||||
|
||||
kobject_uevent(kobj, KOBJ_ADD);
|
||||
|
||||
done:
|
||||
return error;
|
||||
}
|
||||
|
||||
/* create these in sysfs filesystem:
|
||||
/* create these entries in sysfs:
|
||||
* "/sys/kernel/uids" directory
|
||||
* "/sys/kernel/uids/0" directory (for root user)
|
||||
* "/sys/kernel/uids/0/cpu_share" file (for root user)
|
||||
*/
|
||||
int __init uids_kobject_init(void)
|
||||
int __init uids_sysfs_init(void)
|
||||
{
|
||||
int error;
|
||||
uids_kset = kset_create_and_add("uids", NULL, kernel_kobj);
|
||||
if (!uids_kset)
|
||||
return -ENOMEM;
|
||||
|
||||
/* create under /sys/kernel dir */
|
||||
uids_kobject.parent = &kernel_subsys.kobj;
|
||||
uids_kobject.kset = &kernel_subsys;
|
||||
kobject_set_name(&uids_kobject, "uids");
|
||||
kobject_init(&uids_kobject);
|
||||
|
||||
error = kobject_add(&uids_kobject);
|
||||
if (!error)
|
||||
error = user_kobject_create(&root_user);
|
||||
|
||||
return error;
|
||||
return uids_user_create(&root_user);
|
||||
}
|
||||
|
||||
/* work function to remove sysfs directory for a user and free up
|
||||
@@ -216,7 +213,6 @@ int __init uids_kobject_init(void)
|
||||
static void remove_user_sysfs_dir(struct work_struct *w)
|
||||
{
|
||||
struct user_struct *up = container_of(w, struct user_struct, work);
|
||||
struct kobject *kobj = &up->kset.kobj;
|
||||
unsigned long flags;
|
||||
int remove_user = 0;
|
||||
|
||||
@@ -238,9 +234,9 @@ static void remove_user_sysfs_dir(struct work_struct *w)
|
||||
if (!remove_user)
|
||||
goto done;
|
||||
|
||||
sysfs_remove_file(kobj, &up->user_attr.attr);
|
||||
kobject_uevent(kobj, KOBJ_REMOVE);
|
||||
kobject_del(kobj);
|
||||
kobject_uevent(&up->kobj, KOBJ_REMOVE);
|
||||
kobject_del(&up->kobj);
|
||||
kobject_put(&up->kobj);
|
||||
|
||||
sched_destroy_user(up);
|
||||
key_put(up->uid_keyring);
|
||||
@@ -267,7 +263,8 @@ static inline void free_user(struct user_struct *up, unsigned long flags)
|
||||
|
||||
#else /* CONFIG_FAIR_USER_SCHED && CONFIG_SYSFS */
|
||||
|
||||
static inline int user_kobject_create(struct user_struct *up) { return 0; }
|
||||
int uids_sysfs_init(void) { return 0; }
|
||||
static inline int uids_user_create(struct user_struct *up) { return 0; }
|
||||
static inline void uids_mutex_lock(void) { }
|
||||
static inline void uids_mutex_unlock(void) { }
|
||||
|
||||
@@ -322,9 +319,9 @@ void free_uid(struct user_struct *up)
|
||||
struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
|
||||
{
|
||||
struct hlist_head *hashent = uidhashentry(ns, uid);
|
||||
struct user_struct *up;
|
||||
struct user_struct *up, *new;
|
||||
|
||||
/* Make uid_hash_find() + user_kobject_create() + uid_hash_insert()
|
||||
/* Make uid_hash_find() + uids_user_create() + uid_hash_insert()
|
||||
* atomic.
|
||||
*/
|
||||
uids_mutex_lock();
|
||||
@@ -334,13 +331,9 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
|
||||
spin_unlock_irq(&uidhash_lock);
|
||||
|
||||
if (!up) {
|
||||
struct user_struct *new;
|
||||
|
||||
new = kmem_cache_alloc(uid_cachep, GFP_KERNEL);
|
||||
if (!new) {
|
||||
uids_mutex_unlock();
|
||||
return NULL;
|
||||
}
|
||||
if (!new)
|
||||
goto out_unlock;
|
||||
|
||||
new->uid = uid;
|
||||
atomic_set(&new->__count, 1);
|
||||
@@ -356,28 +349,14 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
|
||||
#endif
|
||||
new->locked_shm = 0;
|
||||
|
||||
if (alloc_uid_keyring(new, current) < 0) {
|
||||
kmem_cache_free(uid_cachep, new);
|
||||
uids_mutex_unlock();
|
||||
return NULL;
|
||||
}
|
||||
if (alloc_uid_keyring(new, current) < 0)
|
||||
goto out_free_user;
|
||||
|
||||
if (sched_create_user(new) < 0) {
|
||||
key_put(new->uid_keyring);
|
||||
key_put(new->session_keyring);
|
||||
kmem_cache_free(uid_cachep, new);
|
||||
uids_mutex_unlock();
|
||||
return NULL;
|
||||
}
|
||||
if (sched_create_user(new) < 0)
|
||||
goto out_put_keys;
|
||||
|
||||
if (user_kobject_create(new)) {
|
||||
sched_destroy_user(new);
|
||||
key_put(new->uid_keyring);
|
||||
key_put(new->session_keyring);
|
||||
kmem_cache_free(uid_cachep, new);
|
||||
uids_mutex_unlock();
|
||||
return NULL;
|
||||
}
|
||||
if (uids_user_create(new))
|
||||
goto out_destoy_sched;
|
||||
|
||||
/*
|
||||
* Before adding this, check whether we raced
|
||||
@@ -405,6 +384,17 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
|
||||
uids_mutex_unlock();
|
||||
|
||||
return up;
|
||||
|
||||
out_destoy_sched:
|
||||
sched_destroy_user(new);
|
||||
out_put_keys:
|
||||
key_put(new->uid_keyring);
|
||||
key_put(new->session_keyring);
|
||||
out_free_user:
|
||||
kmem_cache_free(uid_cachep, new);
|
||||
out_unlock:
|
||||
uids_mutex_unlock();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void switch_uid(struct user_struct *new_user)
|
||||
|
||||
@@ -67,9 +67,8 @@ struct workqueue_struct {
|
||||
#endif
|
||||
};
|
||||
|
||||
/* All the per-cpu workqueues on the system, for hotplug cpu to add/remove
|
||||
threads to each one as cpus come/go. */
|
||||
static DEFINE_MUTEX(workqueue_mutex);
|
||||
/* Serializes the accesses to the list of workqueues. */
|
||||
static DEFINE_SPINLOCK(workqueue_lock);
|
||||
static LIST_HEAD(workqueues);
|
||||
|
||||
static int singlethread_cpu __read_mostly;
|
||||
@@ -592,8 +591,6 @@ EXPORT_SYMBOL(schedule_delayed_work_on);
|
||||
* Returns zero on success.
|
||||
* Returns -ve errno on failure.
|
||||
*
|
||||
* Appears to be racy against CPU hotplug.
|
||||
*
|
||||
* schedule_on_each_cpu() is very slow.
|
||||
*/
|
||||
int schedule_on_each_cpu(work_func_t func)
|
||||
@@ -605,7 +602,7 @@ int schedule_on_each_cpu(work_func_t func)
|
||||
if (!works)
|
||||
return -ENOMEM;
|
||||
|
||||
preempt_disable(); /* CPU hotplug */
|
||||
get_online_cpus();
|
||||
for_each_online_cpu(cpu) {
|
||||
struct work_struct *work = per_cpu_ptr(works, cpu);
|
||||
|
||||
@@ -613,8 +610,8 @@ int schedule_on_each_cpu(work_func_t func)
|
||||
set_bit(WORK_STRUCT_PENDING, work_data_bits(work));
|
||||
__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work);
|
||||
}
|
||||
preempt_enable();
|
||||
flush_workqueue(keventd_wq);
|
||||
put_online_cpus();
|
||||
free_percpu(works);
|
||||
return 0;
|
||||
}
|
||||
@@ -750,8 +747,10 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
|
||||
err = create_workqueue_thread(cwq, singlethread_cpu);
|
||||
start_workqueue_thread(cwq, -1);
|
||||
} else {
|
||||
mutex_lock(&workqueue_mutex);
|
||||
get_online_cpus();
|
||||
spin_lock(&workqueue_lock);
|
||||
list_add(&wq->list, &workqueues);
|
||||
spin_unlock(&workqueue_lock);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
cwq = init_cpu_workqueue(wq, cpu);
|
||||
@@ -760,7 +759,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
|
||||
err = create_workqueue_thread(cwq, cpu);
|
||||
start_workqueue_thread(cwq, cpu);
|
||||
}
|
||||
mutex_unlock(&workqueue_mutex);
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
if (err) {
|
||||
@@ -775,7 +774,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
|
||||
{
|
||||
/*
|
||||
* Our caller is either destroy_workqueue() or CPU_DEAD,
|
||||
* workqueue_mutex protects cwq->thread
|
||||
* get_online_cpus() protects cwq->thread.
|
||||
*/
|
||||
if (cwq->thread == NULL)
|
||||
return;
|
||||
@@ -810,9 +809,11 @@ void destroy_workqueue(struct workqueue_struct *wq)
|
||||
struct cpu_workqueue_struct *cwq;
|
||||
int cpu;
|
||||
|
||||
mutex_lock(&workqueue_mutex);
|
||||
get_online_cpus();
|
||||
spin_lock(&workqueue_lock);
|
||||
list_del(&wq->list);
|
||||
mutex_unlock(&workqueue_mutex);
|
||||
spin_unlock(&workqueue_lock);
|
||||
put_online_cpus();
|
||||
|
||||
for_each_cpu_mask(cpu, *cpu_map) {
|
||||
cwq = per_cpu_ptr(wq->cpu_wq, cpu);
|
||||
@@ -835,13 +836,6 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
|
||||
action &= ~CPU_TASKS_FROZEN;
|
||||
|
||||
switch (action) {
|
||||
case CPU_LOCK_ACQUIRE:
|
||||
mutex_lock(&workqueue_mutex);
|
||||
return NOTIFY_OK;
|
||||
|
||||
case CPU_LOCK_RELEASE:
|
||||
mutex_unlock(&workqueue_mutex);
|
||||
return NOTIFY_OK;
|
||||
|
||||
case CPU_UP_PREPARE:
|
||||
cpu_set(cpu, cpu_populated_map);
|
||||
@@ -854,7 +848,8 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
|
||||
case CPU_UP_PREPARE:
|
||||
if (!create_workqueue_thread(cwq, cpu))
|
||||
break;
|
||||
printk(KERN_ERR "workqueue for %i failed\n", cpu);
|
||||
printk(KERN_ERR "workqueue [%s] for %i failed\n",
|
||||
wq->name, cpu);
|
||||
return NOTIFY_BAD;
|
||||
|
||||
case CPU_ONLINE:
|
||||
|
||||
Reference in New Issue
Block a user