Merge commit 'v2.6.28-rc6' into irq/urgent

This commit is contained in:
Ingo Molnar
2008-11-23 10:52:33 +01:00
668 changed files with 19601 additions and 6860 deletions

View File

@@ -11,8 +11,6 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o pm_qos_params.o sched_clock.o
CFLAGS_REMOVE_sched.o = -mno-spe
ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files
CFLAGS_REMOVE_lockdep.o = -pg
@@ -21,7 +19,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
CFLAGS_REMOVE_rtmutex-debug.o = -pg
CFLAGS_REMOVE_cgroup-debug.o = -pg
CFLAGS_REMOVE_sched_clock.o = -pg
CFLAGS_REMOVE_sched.o = -mno-spe -pg
CFLAGS_REMOVE_sched.o = -pg
endif
obj-$(CONFIG_FREEZER) += freezer.o

View File

@@ -24,6 +24,7 @@ struct audit_chunk {
struct list_head trees; /* with root here */
int dead;
int count;
atomic_long_t refs;
struct rcu_head head;
struct node {
struct list_head list;
@@ -56,7 +57,8 @@ static LIST_HEAD(prune_list);
* tree is refcounted; one reference for "some rules on rules_list refer to
* it", one for each chunk with pointer to it.
*
* chunk is refcounted by embedded inotify_watch.
* chunk is refcounted by embedded inotify_watch + .refs (non-zero refcount
* of watch contributes 1 to .refs).
*
* node.index allows to get from node.list to containing chunk.
* MSB of that sucker is stolen to mark taggings that we might have to
@@ -121,6 +123,7 @@ static struct audit_chunk *alloc_chunk(int count)
INIT_LIST_HEAD(&chunk->hash);
INIT_LIST_HEAD(&chunk->trees);
chunk->count = count;
atomic_long_set(&chunk->refs, 1);
for (i = 0; i < count; i++) {
INIT_LIST_HEAD(&chunk->owners[i].list);
chunk->owners[i].index = i;
@@ -129,9 +132,8 @@ static struct audit_chunk *alloc_chunk(int count)
return chunk;
}
static void __free_chunk(struct rcu_head *rcu)
static void free_chunk(struct audit_chunk *chunk)
{
struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
int i;
for (i = 0; i < chunk->count; i++) {
@@ -141,14 +143,16 @@ static void __free_chunk(struct rcu_head *rcu)
kfree(chunk);
}
static inline void free_chunk(struct audit_chunk *chunk)
{
call_rcu(&chunk->head, __free_chunk);
}
void audit_put_chunk(struct audit_chunk *chunk)
{
put_inotify_watch(&chunk->watch);
if (atomic_long_dec_and_test(&chunk->refs))
free_chunk(chunk);
}
static void __put_chunk(struct rcu_head *rcu)
{
struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
audit_put_chunk(chunk);
}
enum {HASH_SIZE = 128};
@@ -176,7 +180,7 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
list_for_each_entry_rcu(p, list, hash) {
if (p->watch.inode == inode) {
get_inotify_watch(&p->watch);
atomic_long_inc(&p->refs);
return p;
}
}
@@ -194,17 +198,49 @@ int audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree)
/* tagging and untagging inodes with trees */
static void untag_chunk(struct audit_chunk *chunk, struct node *p)
static struct audit_chunk *find_chunk(struct node *p)
{
int index = p->index & ~(1U<<31);
p -= index;
return container_of(p, struct audit_chunk, owners[0]);
}
static void untag_chunk(struct node *p)
{
struct audit_chunk *chunk = find_chunk(p);
struct audit_chunk *new;
struct audit_tree *owner;
int size = chunk->count - 1;
int i, j;
if (!pin_inotify_watch(&chunk->watch)) {
/*
* Filesystem is shutting down; all watches are getting
* evicted, just take it off the node list for this
* tree and let the eviction logics take care of the
* rest.
*/
owner = p->owner;
if (owner->root == chunk) {
list_del_init(&owner->same_root);
owner->root = NULL;
}
list_del_init(&p->list);
p->owner = NULL;
put_tree(owner);
return;
}
spin_unlock(&hash_lock);
/*
* pin_inotify_watch() succeeded, so the watch won't go away
* from under us.
*/
mutex_lock(&chunk->watch.inode->inotify_mutex);
if (chunk->dead) {
mutex_unlock(&chunk->watch.inode->inotify_mutex);
return;
goto out;
}
owner = p->owner;
@@ -221,7 +257,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p)
inotify_evict_watch(&chunk->watch);
mutex_unlock(&chunk->watch.inode->inotify_mutex);
put_inotify_watch(&chunk->watch);
return;
goto out;
}
new = alloc_chunk(size);
@@ -263,7 +299,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p)
inotify_evict_watch(&chunk->watch);
mutex_unlock(&chunk->watch.inode->inotify_mutex);
put_inotify_watch(&chunk->watch);
return;
goto out;
Fallback:
// do the best we can
@@ -277,6 +313,9 @@ Fallback:
put_tree(owner);
spin_unlock(&hash_lock);
mutex_unlock(&chunk->watch.inode->inotify_mutex);
out:
unpin_inotify_watch(&chunk->watch);
spin_lock(&hash_lock);
}
static int create_chunk(struct inode *inode, struct audit_tree *tree)
@@ -387,13 +426,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
return 0;
}
static struct audit_chunk *find_chunk(struct node *p)
{
int index = p->index & ~(1U<<31);
p -= index;
return container_of(p, struct audit_chunk, owners[0]);
}
static void kill_rules(struct audit_tree *tree)
{
struct audit_krule *rule, *next;
@@ -431,17 +463,10 @@ static void prune_one(struct audit_tree *victim)
spin_lock(&hash_lock);
while (!list_empty(&victim->chunks)) {
struct node *p;
struct audit_chunk *chunk;
p = list_entry(victim->chunks.next, struct node, list);
chunk = find_chunk(p);
get_inotify_watch(&chunk->watch);
spin_unlock(&hash_lock);
untag_chunk(chunk, p);
put_inotify_watch(&chunk->watch);
spin_lock(&hash_lock);
untag_chunk(p);
}
spin_unlock(&hash_lock);
put_tree(victim);
@@ -469,7 +494,6 @@ static void trim_marked(struct audit_tree *tree)
while (!list_empty(&tree->chunks)) {
struct node *node;
struct audit_chunk *chunk;
node = list_entry(tree->chunks.next, struct node, list);
@@ -477,14 +501,7 @@ static void trim_marked(struct audit_tree *tree)
if (!(node->index & (1U<<31)))
break;
chunk = find_chunk(node);
get_inotify_watch(&chunk->watch);
spin_unlock(&hash_lock);
untag_chunk(chunk, node);
put_inotify_watch(&chunk->watch);
spin_lock(&hash_lock);
untag_chunk(node);
}
if (!tree->root && !tree->goner) {
tree->goner = 1;
@@ -878,7 +895,7 @@ static void handle_event(struct inotify_watch *watch, u32 wd, u32 mask,
static void destroy_watch(struct inotify_watch *watch)
{
struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch);
free_chunk(chunk);
call_rcu(&chunk->head, __put_chunk);
}
static const struct inotify_operations rtree_inotify_ops = {

View File

@@ -1094,8 +1094,8 @@ static void audit_inotify_unregister(struct list_head *in_list)
list_for_each_entry_safe(p, n, in_list, ilist) {
list_del(&p->ilist);
inotify_rm_watch(audit_ih, &p->wdata);
/* the put matching the get in audit_do_del_rule() */
put_inotify_watch(&p->wdata);
/* the unpin matching the pin in audit_do_del_rule() */
unpin_inotify_watch(&p->wdata);
}
}
@@ -1389,9 +1389,13 @@ static inline int audit_del_rule(struct audit_entry *entry,
/* Put parent on the inotify un-registration
* list. Grab a reference before releasing
* audit_filter_mutex, to be released in
* audit_inotify_unregister(). */
list_add(&parent->ilist, &inotify_list);
get_inotify_watch(&parent->wdata);
* audit_inotify_unregister().
* If filesystem is going away, just leave
* the sucker alone, eviction will take
* care of it.
*/
if (pin_inotify_watch(&parent->wdata))
list_add(&parent->ilist, &inotify_list);
}
}
}

View File

@@ -2039,10 +2039,13 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
struct cgroup *cgrp;
struct cgroup_iter it;
struct task_struct *tsk;
/*
* Validate dentry by checking the superblock operations
* Validate dentry by checking the superblock operations,
* and make sure it's a directory.
*/
if (dentry->d_sb->s_op != &cgroup_ops)
if (dentry->d_sb->s_op != &cgroup_ops ||
!S_ISDIR(dentry->d_inode->i_mode))
goto err;
ret = 0;
@@ -2472,10 +2475,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
mutex_unlock(&cgroup_mutex);
return -EBUSY;
}
parent = cgrp->parent;
root = cgrp->root;
sb = root->sb;
mutex_unlock(&cgroup_mutex);
/*
* Call pre_destroy handlers of subsys. Notify subsystems
@@ -2483,7 +2483,14 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
*/
cgroup_call_pre_destroy(cgrp);
if (cgroup_has_css_refs(cgrp)) {
mutex_lock(&cgroup_mutex);
parent = cgrp->parent;
root = cgrp->root;
sb = root->sb;
if (atomic_read(&cgrp->count)
|| !list_empty(&cgrp->children)
|| cgroup_has_css_refs(cgrp)) {
mutex_unlock(&cgroup_mutex);
return -EBUSY;
}

View File

@@ -184,9 +184,20 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
{
struct freezer *freezer;
task_lock(task);
/*
* No lock is needed, since the task isn't on tasklist yet,
* so it can't be moved to another cgroup, which means the
* freezer won't be removed and will be valid during this
* function call.
*/
freezer = task_freezer(task);
task_unlock(task);
/*
* The root cgroup is non-freezable, so we can skip the
* following check.
*/
if (!freezer->css.cgroup->parent)
return;
spin_lock_irq(&freezer->lock);
BUG_ON(freezer->state == CGROUP_FROZEN);
@@ -331,7 +342,7 @@ static int freezer_write(struct cgroup *cgroup,
else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
goal_state = CGROUP_FROZEN;
else
return -EIO;
return -EINVAL;
if (!cgroup_lock_live_group(cgroup))
return -ENODEV;
@@ -350,6 +361,8 @@ static struct cftype files[] = {
static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
{
if (!cgroup->parent)
return 0;
return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
}

View File

@@ -36,6 +36,7 @@
#include <linux/list.h>
#include <linux/mempolicy.h>
#include <linux/mm.h>
#include <linux/memory.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/namei.h>
@@ -587,7 +588,6 @@ static int generate_sched_domains(cpumask_t **domains,
int ndoms; /* number of sched domains in result */
int nslot; /* next empty doms[] cpumask_t slot */
ndoms = 0;
doms = NULL;
dattr = NULL;
csa = NULL;
@@ -674,10 +674,8 @@ restart:
* Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
*/
doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
if (!doms) {
ndoms = 0;
if (!doms)
goto done;
}
/*
* The rest of the code, including the scheduler, can deal with
@@ -732,6 +730,13 @@ restart:
done:
kfree(csa);
/*
* Fallback to the default domain if kmalloc() failed.
* See comments in partition_sched_domains().
*/
if (doms == NULL)
ndoms = 1;
*domains = doms;
*attributes = dattr;
return ndoms;
@@ -2011,12 +2016,23 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
* Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
* See also the previous routine cpuset_track_online_cpus().
*/
void cpuset_track_online_nodes(void)
static int cpuset_track_online_nodes(struct notifier_block *self,
unsigned long action, void *arg)
{
cgroup_lock();
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
scan_for_empty_cpusets(&top_cpuset);
switch (action) {
case MEM_ONLINE:
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
break;
case MEM_OFFLINE:
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
scan_for_empty_cpusets(&top_cpuset);
break;
default:
break;
}
cgroup_unlock();
return NOTIFY_OK;
}
#endif
@@ -2032,6 +2048,7 @@ void __init cpuset_init_smp(void)
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
hotcpu_notifier(cpuset_track_online_cpus, 0);
hotplug_memory_notifier(cpuset_track_online_nodes, 10);
}
/**

View File

@@ -40,7 +40,6 @@
#include <linux/cn_proc.h>
#include <linux/mutex.h>
#include <linux/futex.h>
#include <linux/compat.h>
#include <linux/pipe_fs_i.h>
#include <linux/audit.h> /* for audit_free() */
#include <linux/resource.h>
@@ -141,6 +140,11 @@ static void __exit_signal(struct task_struct *tsk)
if (sig) {
flush_sigqueue(&sig->shared_pending);
taskstats_tgid_free(sig);
/*
* Make sure ->signal can't go away under rq->lock,
* see account_group_exec_runtime().
*/
task_rq_unlock_wait(tsk);
__cleanup_signal(sig);
}
}
@@ -1054,14 +1058,6 @@ NORET_TYPE void do_exit(long code)
exit_itimers(tsk->signal);
}
acct_collect(code, group_dead);
#ifdef CONFIG_FUTEX
if (unlikely(tsk->robust_list))
exit_robust_list(tsk);
#ifdef CONFIG_COMPAT
if (unlikely(tsk->compat_robust_list))
compat_exit_robust_list(tsk);
#endif
#endif
if (group_dead)
tty_audit_exit();
if (unlikely(tsk->audit_context))

View File

@@ -40,6 +40,7 @@
#include <linux/jiffies.h>
#include <linux/tracehook.h>
#include <linux/futex.h>
#include <linux/compat.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/rcupdate.h>
#include <linux/ptrace.h>
@@ -519,6 +520,16 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
{
struct completion *vfork_done = tsk->vfork_done;
/* Get rid of any futexes when releasing the mm */
#ifdef CONFIG_FUTEX
if (unlikely(tsk->robust_list))
exit_robust_list(tsk);
#ifdef CONFIG_COMPAT
if (unlikely(tsk->compat_robust_list))
compat_exit_robust_list(tsk);
#endif
#endif
/* Get rid of any cached register state */
deactivate_mm(tsk, mm);

View File

@@ -664,14 +664,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
/* Timer is expired, act upon the callback mode */
switch(timer->cb_mode) {
case HRTIMER_CB_IRQSAFE_NO_RESTART:
debug_hrtimer_deactivate(timer);
/*
* We can call the callback from here. No restart
* happens, so no danger of recursion
*/
BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
return 1;
case HRTIMER_CB_IRQSAFE_PERCPU:
case HRTIMER_CB_IRQSAFE_UNLOCKED:
/*
@@ -683,7 +675,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
*/
debug_hrtimer_deactivate(timer);
return 1;
case HRTIMER_CB_IRQSAFE:
case HRTIMER_CB_SOFTIRQ:
/*
* Move everything else into the softirq pending list !
@@ -1209,6 +1200,7 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
enum hrtimer_restart (*fn)(struct hrtimer *);
struct hrtimer *timer;
int restart;
int emulate_hardirq_ctx = 0;
timer = list_entry(cpu_base->cb_pending.next,
struct hrtimer, cb_entry);
@@ -1217,10 +1209,24 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
timer_stats_account_hrtimer(timer);
fn = timer->function;
/*
* A timer might have been added to the cb_pending list
* when it was migrated during a cpu-offline operation.
* Emulate hardirq context for such timers.
*/
if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED)
emulate_hardirq_ctx = 1;
__remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
spin_unlock_irq(&cpu_base->lock);
restart = fn(timer);
if (unlikely(emulate_hardirq_ctx)) {
local_irq_disable();
restart = fn(timer);
local_irq_enable();
} else
restart = fn(timer);
spin_lock_irq(&cpu_base->lock);

View File

@@ -304,17 +304,24 @@ int sprint_symbol(char *buffer, unsigned long address)
char *modname;
const char *name;
unsigned long offset, size;
char namebuf[KSYM_NAME_LEN];
int len;
name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
name = kallsyms_lookup(address, &size, &offset, &modname, buffer);
if (!name)
return sprintf(buffer, "0x%lx", address);
if (name != buffer)
strcpy(buffer, name);
len = strlen(buffer);
buffer += len;
if (modname)
return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset,
size, modname);
len += sprintf(buffer, "+%#lx/%#lx [%s]",
offset, size, modname);
else
return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
len += sprintf(buffer, "+%#lx/%#lx", offset, size);
return len;
}
/* Look up a kernel symbol and print it to the kernel messages. */

View File

@@ -72,7 +72,7 @@ static bool kprobe_enabled;
DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
static struct {
spinlock_t lock ____cacheline_aligned;
spinlock_t lock ____cacheline_aligned_in_smp;
} kretprobe_table_locks[KPROBE_TABLE_SIZE];
static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
@@ -613,30 +613,37 @@ static int __kprobes __register_kprobe(struct kprobe *p,
return -EINVAL;
p->addr = addr;
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr))
preempt_disable();
if (!__kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr)) {
preempt_enable();
return -EINVAL;
}
p->mod_refcounted = 0;
/*
* Check if are we probing a module.
*/
probed_mod = module_text_address((unsigned long) p->addr);
probed_mod = __module_text_address((unsigned long) p->addr);
if (probed_mod) {
struct module *calling_mod = module_text_address(called_from);
struct module *calling_mod;
calling_mod = __module_text_address(called_from);
/*
* We must allow modules to probe themself and in this case
* avoid incrementing the module refcount, so as to allow
* unloading of self probing modules.
*/
if (calling_mod && calling_mod != probed_mod) {
if (unlikely(!try_module_get(probed_mod)))
if (unlikely(!try_module_get(probed_mod))) {
preempt_enable();
return -EINVAL;
}
p->mod_refcounted = 1;
} else
probed_mod = NULL;
}
preempt_enable();
p->nmissed = 0;
INIT_LIST_HEAD(&p->list);
@@ -718,6 +725,10 @@ static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
struct kprobe *old_p;
if (p->mod_refcounted) {
/*
* Since we've already incremented refcount,
* we don't need to disable preemption.
*/
mod = module_text_address((unsigned long)p->addr);
if (mod)
module_put(mod);

View File

@@ -1308,9 +1308,10 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
*/
static inline int fastpath_timer_check(struct task_struct *tsk)
{
struct signal_struct *sig = tsk->signal;
struct signal_struct *sig;
if (unlikely(!sig))
/* tsk == current, ensure it is safe to use ->signal/sighand */
if (unlikely(tsk->exit_state))
return 0;
if (!task_cputime_zero(&tsk->cputime_expires)) {
@@ -1323,6 +1324,8 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
return 1;
}
sig = tsk->signal;
if (!task_cputime_zero(&sig->cputime_expires)) {
struct task_cputime group_sample;

View File

@@ -174,7 +174,7 @@ static void suspend_test_finish(const char *label)
* has some performance issues. The stack dump of a WARN_ON
* is more likely to get the right attention than a printk...
*/
WARN_ON(msec > (TEST_SUSPEND_SECONDS * 1000));
WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label);
}
#else

View File

@@ -544,7 +544,7 @@ static const struct file_operations proc_profile_operations = {
};
#ifdef CONFIG_SMP
static void __init profile_nop(void *unused)
static inline void profile_nop(void *unused)
{
}

View File

@@ -400,7 +400,7 @@ void relay_reset(struct rchan *chan)
}
mutex_lock(&relay_channels_mutex);
for_each_online_cpu(i)
for_each_possible_cpu(i)
if (chan->buf[i])
__relay_reset(chan->buf[i], 0);
mutex_unlock(&relay_channels_mutex);
@@ -611,10 +611,9 @@ struct rchan *relay_open(const char *base_filename,
return chan;
free_bufs:
for_each_online_cpu(i) {
if (!chan->buf[i])
break;
relay_close_buf(chan->buf[i]);
for_each_possible_cpu(i) {
if (chan->buf[i])
relay_close_buf(chan->buf[i]);
}
kref_put(&chan->kref, relay_destroy_channel);

View File

@@ -399,7 +399,7 @@ struct cfs_rq {
*/
struct sched_entity *curr, *next, *last;
unsigned long nr_spread_over;
unsigned int nr_spread_over;
#ifdef CONFIG_FAIR_GROUP_SCHED
struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
@@ -969,6 +969,14 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
}
}
void task_rq_unlock_wait(struct task_struct *p)
{
struct rq *rq = task_rq(p);
smp_mb(); /* spin-unlock-wait is not a full memory barrier */
spin_unlock_wait(&rq->lock);
}
static void __task_rq_unlock(struct rq *rq)
__releases(rq->lock)
{
@@ -1448,6 +1456,8 @@ static unsigned long cpu_avg_load_per_task(int cpu)
if (rq->nr_running)
rq->avg_load_per_task = rq->load.weight / rq->nr_running;
else
rq->avg_load_per_task = 0;
return rq->avg_load_per_task;
}
@@ -5860,6 +5870,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
spin_lock_irqsave(&rq->lock, flags);
__sched_fork(idle);
idle->se.exec_start = sched_clock();
@@ -5867,7 +5879,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
idle->cpus_allowed = cpumask_of_cpu(cpu);
__set_task_cpu(idle, cpu);
spin_lock_irqsave(&rq->lock, flags);
rq->curr = rq->idle = idle;
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
idle->oncpu = 1;
@@ -7778,13 +7789,14 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
*
* The passed in 'doms_new' should be kmalloc'd. This routine takes
* ownership of it and will kfree it when done with it. If the caller
* failed the kmalloc call, then it can pass in doms_new == NULL,
* and partition_sched_domains() will fallback to the single partition
* 'fallback_doms', it also forces the domains to be rebuilt.
* failed the kmalloc call, then it can pass in doms_new == NULL &&
* ndoms_new == 1, and partition_sched_domains() will fallback to
* the single partition 'fallback_doms', it also forces the domains
* to be rebuilt.
*
* If doms_new==NULL it will be replaced with cpu_online_map.
* ndoms_new==0 is a special case for destroying existing domains.
* It will not create the default domain.
* If doms_new == NULL it will be replaced with cpu_online_map.
* ndoms_new == 0 is a special case for destroying existing domains,
* and it will not create the default domain.
*
* Call with hotplug lock held
*/

View File

@@ -144,7 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
last = __pick_last_entity(cfs_rq);
if (last)
max_vruntime = last->vruntime;
min_vruntime = rq->cfs.min_vruntime;
min_vruntime = cfs_rq->min_vruntime;
rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime;
spin_unlock_irqrestore(&rq->lock, flags);
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
@@ -161,26 +161,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
SPLIT_NS(spread0));
SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
#ifdef CONFIG_SCHEDSTATS
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
P(yld_exp_empty);
P(yld_act_empty);
P(yld_both_empty);
P(yld_count);
P(sched_switch);
P(sched_count);
P(sched_goidle);
P(ttwu_count);
P(ttwu_local);
P(bkl_count);
#undef P
#endif
SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over",
SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
cfs_rq->nr_spread_over);
#ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SMP
@@ -260,6 +242,25 @@ static void print_cpu(struct seq_file *m, int cpu)
#undef P
#undef PN
#ifdef CONFIG_SCHEDSTATS
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
P(yld_exp_empty);
P(yld_act_empty);
P(yld_both_empty);
P(yld_count);
P(sched_switch);
P(sched_count);
P(sched_goidle);
P(ttwu_count);
P(ttwu_local);
P(bkl_count);
#undef P
#endif
print_cfs_stats(m, cpu);
print_rt_stats(m, cpu);
@@ -422,10 +423,11 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
#undef __P
{
unsigned int this_cpu = raw_smp_processor_id();
u64 t0, t1;
t0 = sched_clock();
t1 = sched_clock();
t0 = cpu_clock(this_cpu);
t1 = cpu_clock(this_cpu);
SEQ_printf(m, "%-35s:%21Ld\n",
"clock-delta", (long long)(t1-t0));
}

View File

@@ -716,6 +716,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
__enqueue_entity(cfs_rq, se);
}
static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
if (cfs_rq->last == se)
cfs_rq->last = NULL;
if (cfs_rq->next == se)
cfs_rq->next = NULL;
}
static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
{
@@ -738,11 +747,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
#endif
}
if (cfs_rq->last == se)
cfs_rq->last = NULL;
if (cfs_rq->next == se)
cfs_rq->next = NULL;
clear_buddies(cfs_rq, se);
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
@@ -977,6 +982,8 @@ static void yield_task_fair(struct rq *rq)
if (unlikely(cfs_rq->nr_running == 1))
return;
clear_buddies(cfs_rq, se);
if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
update_rq_clock(rq);
/*

View File

@@ -298,9 +298,11 @@ static inline void account_group_user_time(struct task_struct *tsk,
{
struct signal_struct *sig;
sig = tsk->signal;
if (unlikely(!sig))
/* tsk == current, ensure it is safe to use ->signal */
if (unlikely(tsk->exit_state))
return;
sig = tsk->signal;
if (sig->cputime.totals) {
struct task_cputime *times;
@@ -325,9 +327,11 @@ static inline void account_group_system_time(struct task_struct *tsk,
{
struct signal_struct *sig;
sig = tsk->signal;
if (unlikely(!sig))
/* tsk == current, ensure it is safe to use ->signal */
if (unlikely(tsk->exit_state))
return;
sig = tsk->signal;
if (sig->cputime.totals) {
struct task_cputime *times;
@@ -353,8 +357,11 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
struct signal_struct *sig;
sig = tsk->signal;
/* see __exit_signal()->task_rq_unlock_wait() */
barrier();
if (unlikely(!sig))
return;
if (sig->cputime.totals) {
struct task_cputime *times;

View File

@@ -269,10 +269,11 @@ void irq_enter(void)
{
int cpu = smp_processor_id();
if (idle_cpu(cpu) && !in_interrupt())
if (idle_cpu(cpu) && !in_interrupt()) {
__irq_enter();
tick_check_idle(cpu);
__irq_enter();
} else
__irq_enter();
}
#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED

View File

@@ -112,7 +112,7 @@ static int chill(void *unused)
int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
{
struct work_struct *sm_work;
int i;
int i, ret;
/* Set up initial state. */
mutex_lock(&lock);
@@ -137,8 +137,9 @@ int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
/* This will release the thread on our CPU. */
put_cpu();
flush_workqueue(stop_machine_wq);
ret = active.fnret;
mutex_unlock(&lock);
return active.fnret;
return ret;
}
int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)

View File

@@ -31,7 +31,7 @@ cond_syscall(sys_socketpair);
cond_syscall(sys_bind);
cond_syscall(sys_listen);
cond_syscall(sys_accept);
cond_syscall(sys_paccept);
cond_syscall(sys_accept4);
cond_syscall(sys_connect);
cond_syscall(sys_getsockname);
cond_syscall(sys_getpeername);

View File

@@ -568,6 +568,9 @@ static void tick_nohz_switch_to_nohz(void)
*/
static void tick_nohz_kick_tick(int cpu)
{
#if 0
/* Switch back to 2.6.27 behaviour */
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
ktime_t delta, now;
@@ -584,6 +587,7 @@ static void tick_nohz_kick_tick(int cpu)
return;
tick_nohz_restart(ts, now);
#endif
}
#else

View File

@@ -185,7 +185,6 @@ enum {
};
static int ftrace_filtered;
static int tracing_on;
static LIST_HEAD(ftrace_new_addrs);
@@ -327,96 +326,89 @@ ftrace_record_ip(unsigned long ip)
static int
__ftrace_replace_code(struct dyn_ftrace *rec,
unsigned char *old, unsigned char *new, int enable)
unsigned char *nop, int enable)
{
unsigned long ip, fl;
unsigned char *call, *old, *new;
ip = rec->ip;
if (ftrace_filtered && enable) {
/*
* If filtering is on:
*
* If this record is set to be filtered and
* is enabled then do nothing.
*
* If this record is set to be filtered and
* it is not enabled, enable it.
*
* If this record is not set to be filtered
* and it is not enabled do nothing.
*
* If this record is set not to trace then
* do nothing.
*
* If this record is set not to trace and
* it is enabled then disable it.
*
* If this record is not set to be filtered and
* it is enabled, disable it.
*/
fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE |
FTRACE_FL_ENABLED);
if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) ||
(fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) ||
!fl || (fl == FTRACE_FL_NOTRACE))
/*
* If this record is not to be traced and
* it is not enabled then do nothing.
*
* If this record is not to be traced and
* it is enabled then disabled it.
*
*/
if (rec->flags & FTRACE_FL_NOTRACE) {
if (rec->flags & FTRACE_FL_ENABLED)
rec->flags &= ~FTRACE_FL_ENABLED;
else
return 0;
} else if (ftrace_filtered && enable) {
/*
* If it is enabled disable it,
* otherwise enable it!
* Filtering is on:
*/
if (fl & FTRACE_FL_ENABLED) {
/* swap new and old */
new = old;
old = ftrace_call_replace(ip, FTRACE_ADDR);
fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
/* Record is filtered and enabled, do nothing */
if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
return 0;
/* Record is not filtered and is not enabled do nothing */
if (!fl)
return 0;
/* Record is not filtered but enabled, disable it */
if (fl == FTRACE_FL_ENABLED)
rec->flags &= ~FTRACE_FL_ENABLED;
} else {
new = ftrace_call_replace(ip, FTRACE_ADDR);
else
/* Otherwise record is filtered but not enabled, enable it */
rec->flags |= FTRACE_FL_ENABLED;
}
} else {
/* Disable or not filtered */
if (enable) {
/*
* If this record is set not to trace and is
* not enabled, do nothing.
*/
fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
if (fl == FTRACE_FL_NOTRACE)
return 0;
new = ftrace_call_replace(ip, FTRACE_ADDR);
} else
old = ftrace_call_replace(ip, FTRACE_ADDR);
if (enable) {
/* if record is enabled, do nothing */
if (rec->flags & FTRACE_FL_ENABLED)
return 0;
rec->flags |= FTRACE_FL_ENABLED;
} else {
/* if record is not enabled do nothing */
if (!(rec->flags & FTRACE_FL_ENABLED))
return 0;
rec->flags &= ~FTRACE_FL_ENABLED;
}
}
call = ftrace_call_replace(ip, FTRACE_ADDR);
if (rec->flags & FTRACE_FL_ENABLED) {
old = nop;
new = call;
} else {
old = call;
new = nop;
}
return ftrace_modify_code(ip, old, new);
}
static void ftrace_replace_code(int enable)
{
int i, failed;
unsigned char *new = NULL, *old = NULL;
unsigned char *nop = NULL;
struct dyn_ftrace *rec;
struct ftrace_page *pg;
if (enable)
old = ftrace_nop_replace();
else
new = ftrace_nop_replace();
nop = ftrace_nop_replace();
for (pg = ftrace_pages_start; pg; pg = pg->next) {
for (i = 0; i < pg->index; i++) {
@@ -434,7 +426,7 @@ static void ftrace_replace_code(int enable)
unfreeze_record(rec);
}
failed = __ftrace_replace_code(rec, old, new, enable);
failed = __ftrace_replace_code(rec, nop, enable);
if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
rec->flags |= FTRACE_FL_FAILED;
if ((system_state == SYSTEM_BOOTING) ||
@@ -506,13 +498,10 @@ static int __ftrace_modify_code(void *data)
{
int *command = data;
if (*command & FTRACE_ENABLE_CALLS) {
if (*command & FTRACE_ENABLE_CALLS)
ftrace_replace_code(1);
tracing_on = 1;
} else if (*command & FTRACE_DISABLE_CALLS) {
else if (*command & FTRACE_DISABLE_CALLS)
ftrace_replace_code(0);
tracing_on = 0;
}
if (*command & FTRACE_UPDATE_TRACE_FUNC)
ftrace_update_ftrace_func(ftrace_trace_function);
@@ -538,8 +527,7 @@ static void ftrace_startup(void)
mutex_lock(&ftrace_start_lock);
ftrace_start++;
if (ftrace_start == 1)
command |= FTRACE_ENABLE_CALLS;
command |= FTRACE_ENABLE_CALLS;
if (saved_ftrace_func != ftrace_trace_function) {
saved_ftrace_func = ftrace_trace_function;
@@ -677,7 +665,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
cnt = num_to_init / ENTRIES_PER_PAGE;
pr_info("ftrace: allocating %ld entries in %d pages\n",
num_to_init, cnt);
num_to_init, cnt + 1);
for (i = 0; i < cnt; i++) {
pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -738,6 +726,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
((iter->flags & FTRACE_ITER_FAILURES) &&
!(rec->flags & FTRACE_FL_FAILED)) ||
((iter->flags & FTRACE_ITER_FILTER) &&
!(rec->flags & FTRACE_FL_FILTER)) ||
((iter->flags & FTRACE_ITER_NOTRACE) &&
!(rec->flags & FTRACE_FL_NOTRACE))) {
rec = NULL;
@@ -757,13 +748,11 @@ static void *t_start(struct seq_file *m, loff_t *pos)
void *p = NULL;
loff_t l = -1;
if (*pos != iter->pos) {
for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l))
;
} else {
l = *pos;
p = t_next(m, p, &l);
}
if (*pos > iter->pos)
*pos = iter->pos;
l = *pos;
p = t_next(m, p, &l);
return p;
}
@@ -774,15 +763,21 @@ static void t_stop(struct seq_file *m, void *p)
static int t_show(struct seq_file *m, void *v)
{
struct ftrace_iterator *iter = m->private;
struct dyn_ftrace *rec = v;
char str[KSYM_SYMBOL_LEN];
int ret = 0;
if (!rec)
return 0;
kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
seq_printf(m, "%s\n", str);
ret = seq_printf(m, "%s\n", str);
if (ret < 0) {
iter->pos--;
iter->idx--;
}
return 0;
}
@@ -808,7 +803,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
return -ENOMEM;
iter->pg = ftrace_pages_start;
iter->pos = -1;
iter->pos = 0;
ret = seq_open(file, &show_ftrace_seq_ops);
if (!ret) {
@@ -895,7 +890,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
if (file->f_mode & FMODE_READ) {
iter->pg = ftrace_pages_start;
iter->pos = -1;
iter->pos = 0;
iter->flags = enable ? FTRACE_ITER_FILTER :
FTRACE_ITER_NOTRACE;
@@ -1186,7 +1181,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
mutex_lock(&ftrace_sysctl_lock);
mutex_lock(&ftrace_start_lock);
if (iter->filtered && ftrace_start && ftrace_enabled)
if (ftrace_start && ftrace_enabled)
ftrace_run_update_code(FTRACE_ENABLE_CALLS);
mutex_unlock(&ftrace_start_lock);
mutex_unlock(&ftrace_sysctl_lock);

View File

@@ -16,14 +16,49 @@
#include <linux/list.h>
#include <linux/fs.h>
#include "trace.h"
/* Global flag to disable all recording to ring buffers */
static int ring_buffers_off __read_mostly;
/**
* tracing_on - enable all tracing buffers
*
* This function enables all tracing buffers that may have been
* disabled with tracing_off.
*/
void tracing_on(void)
{
ring_buffers_off = 0;
}
/**
* tracing_off - turn off all tracing buffers
*
* This function stops all tracing buffers from recording data.
* It does not disable any overhead the tracers themselves may
* be causing. This function simply causes all recording to
* the ring buffers to fail.
*/
void tracing_off(void)
{
ring_buffers_off = 1;
}
/* Up this if you want to test the TIME_EXTENTS and normalization */
#define DEBUG_SHIFT 0
/* FIXME!!! */
u64 ring_buffer_time_stamp(int cpu)
{
u64 time;
preempt_disable_notrace();
/* shift to debug/test normalization and TIME_EXTENTS */
return sched_clock() << DEBUG_SHIFT;
time = sched_clock() << DEBUG_SHIFT;
preempt_enable_notrace();
return time;
}
void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
@@ -503,6 +538,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
LIST_HEAD(pages);
int i, cpu;
/*
* Always succeed at resizing a non-existent buffer:
*/
if (!buffer)
return size;
size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
size *= BUF_PAGE_SIZE;
buffer_size = buffer->pages * BUF_PAGE_SIZE;
@@ -576,6 +617,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
list_del_init(&page->list);
free_buffer_page(page);
}
mutex_unlock(&buffer->mutex);
return -ENOMEM;
}
@@ -1060,7 +1102,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
/* Did the write stamp get updated already? */
if (unlikely(ts < cpu_buffer->write_stamp))
goto again;
delta = 0;
if (test_time_stamp(delta)) {
@@ -1133,6 +1175,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
struct ring_buffer_event *event;
int cpu, resched;
if (ring_buffers_off)
return NULL;
if (atomic_read(&buffer->record_disabled))
return NULL;
@@ -1249,6 +1294,9 @@ int ring_buffer_write(struct ring_buffer *buffer,
int ret = -EBUSY;
int cpu, resched;
if (ring_buffers_off)
return -EBUSY;
if (atomic_read(&buffer->record_disabled))
return -EBUSY;
@@ -2070,3 +2118,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
return 0;
}
static ssize_t
rb_simple_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
int *p = filp->private_data;
char buf[64];
int r;
/* !ring_buffers_off == tracing_on */
r = sprintf(buf, "%d\n", !*p);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
static ssize_t
rb_simple_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
int *p = filp->private_data;
char buf[64];
long val;
int ret;
if (cnt >= sizeof(buf))
return -EINVAL;
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
buf[cnt] = 0;
ret = strict_strtoul(buf, 10, &val);
if (ret < 0)
return ret;
/* !ring_buffers_off == tracing_on */
*p = !val;
(*ppos)++;
return cnt;
}
static struct file_operations rb_simple_fops = {
.open = tracing_open_generic,
.read = rb_simple_read,
.write = rb_simple_write,
};
static __init int rb_init_debugfs(void)
{
struct dentry *d_tracer;
struct dentry *entry;
d_tracer = tracing_init_dentry();
entry = debugfs_create_file("tracing_on", 0644, d_tracer,
&ring_buffers_off, &rb_simple_fops);
if (!entry)
pr_warning("Could not create debugfs 'tracing_on' entry\n");
return 0;
}
fs_initcall(rb_init_debugfs);

View File

@@ -1755,7 +1755,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
return TRACE_TYPE_HANDLED;
SEQ_PUT_FIELD_RET(s, entry->pid);
SEQ_PUT_FIELD_RET(s, iter->cpu);
SEQ_PUT_FIELD_RET(s, entry->cpu);
SEQ_PUT_FIELD_RET(s, iter->ts);
switch (entry->type) {
@@ -1936,6 +1936,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
ring_buffer_read_finish(iter->buffer_iter[cpu]);
}
mutex_unlock(&trace_types_lock);
kfree(iter);
return ERR_PTR(-ENOMEM);
}
@@ -2676,7 +2677,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
{
unsigned long val;
char buf[64];
int ret;
int ret, cpu;
struct trace_array *tr = filp->private_data;
if (cnt >= sizeof(buf))
@@ -2704,6 +2705,14 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
goto out;
}
/* disable all cpu buffers */
for_each_tracing_cpu(cpu) {
if (global_trace.data[cpu])
atomic_inc(&global_trace.data[cpu]->disabled);
if (max_tr.data[cpu])
atomic_inc(&max_tr.data[cpu]->disabled);
}
if (val != global_trace.entries) {
ret = ring_buffer_resize(global_trace.buffer, val);
if (ret < 0) {
@@ -2735,6 +2744,13 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
if (tracing_disabled)
cnt = -ENOMEM;
out:
for_each_tracing_cpu(cpu) {
if (global_trace.data[cpu])
atomic_dec(&global_trace.data[cpu]->disabled);
if (max_tr.data[cpu])
atomic_dec(&max_tr.data[cpu]->disabled);
}
max_tr.entries = global_trace.entries;
mutex_unlock(&trace_types_lock);