Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (46 commits) llist: Add back llist_add_batch() and llist_del_first() prototypes sched: Don't use tasklist_lock for debug prints sched: Warn on rt throttling sched: Unify the ->cpus_allowed mask copy sched: Wrap scheduler p->cpus_allowed access sched: Request for idle balance during nohz idle load balance sched: Use resched IPI to kick off the nohz idle balance sched: Fix idle_cpu() llist: Remove cpu_relax() usage in cmpxchg loops sched: Convert to struct llist llist: Add llist_next() irq_work: Use llist in the struct irq_work logic llist: Return whether list is empty before adding in llist_add() llist: Move cpu_relax() to after the cmpxchg() llist: Remove the platform-dependent NMI checks llist: Make some llist functions inline sched, tracing: Show PREEMPT_ACTIVE state in trace_sched_switch sched: Remove redundant test in check_preempt_tick() sched: Add documentation for bandwidth control sched: Return unused runtime on group dequeue ...
This commit is contained in:
commit
8a4a8918ed
122
Documentation/scheduler/sched-bwc.txt
Normal file
122
Documentation/scheduler/sched-bwc.txt
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
CFS Bandwidth Control
|
||||||
|
=====================
|
||||||
|
|
||||||
|
[ This document only discusses CPU bandwidth control for SCHED_NORMAL.
|
||||||
|
The SCHED_RT case is covered in Documentation/scheduler/sched-rt-group.txt ]
|
||||||
|
|
||||||
|
CFS bandwidth control is a CONFIG_FAIR_GROUP_SCHED extension which allows the
|
||||||
|
specification of the maximum CPU bandwidth available to a group or hierarchy.
|
||||||
|
|
||||||
|
The bandwidth allowed for a group is specified using a quota and period. Within
|
||||||
|
each given "period" (microseconds), a group is allowed to consume only up to
|
||||||
|
"quota" microseconds of CPU time. When the CPU bandwidth consumption of a
|
||||||
|
group exceeds this limit (for that period), the tasks belonging to its
|
||||||
|
hierarchy will be throttled and are not allowed to run again until the next
|
||||||
|
period.
|
||||||
|
|
||||||
|
A group's unused runtime is globally tracked, being refreshed with quota units
|
||||||
|
above at each period boundary. As threads consume this bandwidth it is
|
||||||
|
transferred to cpu-local "silos" on a demand basis. The amount transferred
|
||||||
|
within each of these updates is tunable and described as the "slice".
|
||||||
|
|
||||||
|
Management
|
||||||
|
----------
|
||||||
|
Quota and period are managed within the cpu subsystem via cgroupfs.
|
||||||
|
|
||||||
|
cpu.cfs_quota_us: the total available run-time within a period (in microseconds)
|
||||||
|
cpu.cfs_period_us: the length of a period (in microseconds)
|
||||||
|
cpu.stat: exports throttling statistics [explained further below]
|
||||||
|
|
||||||
|
The default values are:
|
||||||
|
cpu.cfs_period_us=100ms
|
||||||
|
cpu.cfs_quota=-1
|
||||||
|
|
||||||
|
A value of -1 for cpu.cfs_quota_us indicates that the group does not have any
|
||||||
|
bandwidth restriction in place, such a group is described as an unconstrained
|
||||||
|
bandwidth group. This represents the traditional work-conserving behavior for
|
||||||
|
CFS.
|
||||||
|
|
||||||
|
Writing any (valid) positive value(s) will enact the specified bandwidth limit.
|
||||||
|
The minimum quota allowed for the quota or period is 1ms. There is also an
|
||||||
|
upper bound on the period length of 1s. Additional restrictions exist when
|
||||||
|
bandwidth limits are used in a hierarchical fashion, these are explained in
|
||||||
|
more detail below.
|
||||||
|
|
||||||
|
Writing any negative value to cpu.cfs_quota_us will remove the bandwidth limit
|
||||||
|
and return the group to an unconstrained state once more.
|
||||||
|
|
||||||
|
Any updates to a group's bandwidth specification will result in it becoming
|
||||||
|
unthrottled if it is in a constrained state.
|
||||||
|
|
||||||
|
System wide settings
|
||||||
|
--------------------
|
||||||
|
For efficiency run-time is transferred between the global pool and CPU local
|
||||||
|
"silos" in a batch fashion. This greatly reduces global accounting pressure
|
||||||
|
on large systems. The amount transferred each time such an update is required
|
||||||
|
is described as the "slice".
|
||||||
|
|
||||||
|
This is tunable via procfs:
|
||||||
|
/proc/sys/kernel/sched_cfs_bandwidth_slice_us (default=5ms)
|
||||||
|
|
||||||
|
Larger slice values will reduce transfer overheads, while smaller values allow
|
||||||
|
for more fine-grained consumption.
|
||||||
|
|
||||||
|
Statistics
|
||||||
|
----------
|
||||||
|
A group's bandwidth statistics are exported via 3 fields in cpu.stat.
|
||||||
|
|
||||||
|
cpu.stat:
|
||||||
|
- nr_periods: Number of enforcement intervals that have elapsed.
|
||||||
|
- nr_throttled: Number of times the group has been throttled/limited.
|
||||||
|
- throttled_time: The total time duration (in nanoseconds) for which entities
|
||||||
|
of the group have been throttled.
|
||||||
|
|
||||||
|
This interface is read-only.
|
||||||
|
|
||||||
|
Hierarchical considerations
|
||||||
|
---------------------------
|
||||||
|
The interface enforces that an individual entity's bandwidth is always
|
||||||
|
attainable, that is: max(c_i) <= C. However, over-subscription in the
|
||||||
|
aggregate case is explicitly allowed to enable work-conserving semantics
|
||||||
|
within a hierarchy.
|
||||||
|
e.g. \Sum (c_i) may exceed C
|
||||||
|
[ Where C is the parent's bandwidth, and c_i its children ]
|
||||||
|
|
||||||
|
|
||||||
|
There are two ways in which a group may become throttled:
|
||||||
|
a. it fully consumes its own quota within a period
|
||||||
|
b. a parent's quota is fully consumed within its period
|
||||||
|
|
||||||
|
In case b) above, even though the child may have runtime remaining it will not
|
||||||
|
be allowed to until the parent's runtime is refreshed.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
1. Limit a group to 1 CPU worth of runtime.
|
||||||
|
|
||||||
|
If period is 250ms and quota is also 250ms, the group will get
|
||||||
|
1 CPU worth of runtime every 250ms.
|
||||||
|
|
||||||
|
# echo 250000 > cpu.cfs_quota_us /* quota = 250ms */
|
||||||
|
# echo 250000 > cpu.cfs_period_us /* period = 250ms */
|
||||||
|
|
||||||
|
2. Limit a group to 2 CPUs worth of runtime on a multi-CPU machine.
|
||||||
|
|
||||||
|
With 500ms period and 1000ms quota, the group can get 2 CPUs worth of
|
||||||
|
runtime every 500ms.
|
||||||
|
|
||||||
|
# echo 1000000 > cpu.cfs_quota_us /* quota = 1000ms */
|
||||||
|
# echo 500000 > cpu.cfs_period_us /* period = 500ms */
|
||||||
|
|
||||||
|
The larger period here allows for increased burst capacity.
|
||||||
|
|
||||||
|
3. Limit a group to 20% of 1 CPU.
|
||||||
|
|
||||||
|
With 50ms period, 10ms quota will be equivalent to 20% of 1 CPU.
|
||||||
|
|
||||||
|
# echo 10000 > cpu.cfs_quota_us /* quota = 10ms */
|
||||||
|
# echo 50000 > cpu.cfs_period_us /* period = 50ms */
|
||||||
|
|
||||||
|
By using a small period here we are ensuring a consistent latency
|
||||||
|
response at the expense of burst capacity.
|
||||||
|
|
@ -14,7 +14,6 @@ config ACPI_APEI_GHES
|
|||||||
depends on ACPI_APEI && X86
|
depends on ACPI_APEI && X86
|
||||||
select ACPI_HED
|
select ACPI_HED
|
||||||
select IRQ_WORK
|
select IRQ_WORK
|
||||||
select LLIST
|
|
||||||
select GENERIC_ALLOCATOR
|
select GENERIC_ALLOCATOR
|
||||||
help
|
help
|
||||||
Generic Hardware Error Source provides a way to report
|
Generic Hardware Error Source provides a way to report
|
||||||
|
@ -1,20 +1,23 @@
|
|||||||
#ifndef _LINUX_IRQ_WORK_H
|
#ifndef _LINUX_IRQ_WORK_H
|
||||||
#define _LINUX_IRQ_WORK_H
|
#define _LINUX_IRQ_WORK_H
|
||||||
|
|
||||||
|
#include <linux/llist.h>
|
||||||
|
|
||||||
struct irq_work {
|
struct irq_work {
|
||||||
struct irq_work *next;
|
unsigned long flags;
|
||||||
|
struct llist_node llnode;
|
||||||
void (*func)(struct irq_work *);
|
void (*func)(struct irq_work *);
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *))
|
void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
|
||||||
{
|
{
|
||||||
entry->next = NULL;
|
work->flags = 0;
|
||||||
entry->func = func;
|
work->func = func;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool irq_work_queue(struct irq_work *entry);
|
bool irq_work_queue(struct irq_work *work);
|
||||||
void irq_work_run(void);
|
void irq_work_run(void);
|
||||||
void irq_work_sync(struct irq_work *entry);
|
void irq_work_sync(struct irq_work *work);
|
||||||
|
|
||||||
#endif /* _LINUX_IRQ_WORK_H */
|
#endif /* _LINUX_IRQ_WORK_H */
|
||||||
|
@ -35,10 +35,30 @@
|
|||||||
*
|
*
|
||||||
* The basic atomic operation of this list is cmpxchg on long. On
|
* The basic atomic operation of this list is cmpxchg on long. On
|
||||||
* architectures that don't have NMI-safe cmpxchg implementation, the
|
* architectures that don't have NMI-safe cmpxchg implementation, the
|
||||||
* list can NOT be used in NMI handler. So code uses the list in NMI
|
* list can NOT be used in NMI handlers. So code that uses the list in
|
||||||
* handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
|
* an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
|
||||||
|
*
|
||||||
|
* Copyright 2010,2011 Intel Corp.
|
||||||
|
* Author: Huang Ying <ying.huang@intel.com>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License version
|
||||||
|
* 2 as published by the Free Software Foundation;
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
#include <asm/system.h>
|
||||||
|
#include <asm/processor.h>
|
||||||
|
|
||||||
struct llist_head {
|
struct llist_head {
|
||||||
struct llist_node *first;
|
struct llist_node *first;
|
||||||
};
|
};
|
||||||
@ -113,14 +133,55 @@ static inline void init_llist_head(struct llist_head *list)
|
|||||||
* test whether the list is empty without deleting something from the
|
* test whether the list is empty without deleting something from the
|
||||||
* list.
|
* list.
|
||||||
*/
|
*/
|
||||||
static inline int llist_empty(const struct llist_head *head)
|
static inline bool llist_empty(const struct llist_head *head)
|
||||||
{
|
{
|
||||||
return ACCESS_ONCE(head->first) == NULL;
|
return ACCESS_ONCE(head->first) == NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void llist_add(struct llist_node *new, struct llist_head *head);
|
static inline struct llist_node *llist_next(struct llist_node *node)
|
||||||
void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
|
{
|
||||||
|
return node->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* llist_add - add a new entry
|
||||||
|
* @new: new entry to be added
|
||||||
|
* @head: the head for your lock-less list
|
||||||
|
*
|
||||||
|
* Return whether list is empty before adding.
|
||||||
|
*/
|
||||||
|
static inline bool llist_add(struct llist_node *new, struct llist_head *head)
|
||||||
|
{
|
||||||
|
struct llist_node *entry, *old_entry;
|
||||||
|
|
||||||
|
entry = head->first;
|
||||||
|
for (;;) {
|
||||||
|
old_entry = entry;
|
||||||
|
new->next = entry;
|
||||||
|
entry = cmpxchg(&head->first, old_entry, new);
|
||||||
|
if (entry == old_entry)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return old_entry == NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* llist_del_all - delete all entries from lock-less list
|
||||||
|
* @head: the head of lock-less list to delete all entries
|
||||||
|
*
|
||||||
|
* If list is empty, return NULL, otherwise, delete all entries and
|
||||||
|
* return the pointer to the first entry. The order of entries
|
||||||
|
* deleted is from the newest to the oldest added one.
|
||||||
|
*/
|
||||||
|
static inline struct llist_node *llist_del_all(struct llist_head *head)
|
||||||
|
{
|
||||||
|
return xchg(&head->first, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern bool llist_add_batch(struct llist_node *new_first,
|
||||||
|
struct llist_node *new_last,
|
||||||
struct llist_head *head);
|
struct llist_head *head);
|
||||||
struct llist_node *llist_del_first(struct llist_head *head);
|
extern struct llist_node *llist_del_first(struct llist_head *head);
|
||||||
struct llist_node *llist_del_all(struct llist_head *head);
|
|
||||||
#endif /* LLIST_H */
|
#endif /* LLIST_H */
|
||||||
|
@ -90,6 +90,7 @@ struct sched_param {
|
|||||||
#include <linux/task_io_accounting.h>
|
#include <linux/task_io_accounting.h>
|
||||||
#include <linux/latencytop.h>
|
#include <linux/latencytop.h>
|
||||||
#include <linux/cred.h>
|
#include <linux/cred.h>
|
||||||
|
#include <linux/llist.h>
|
||||||
|
|
||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
|
|
||||||
@ -1224,7 +1225,7 @@ struct task_struct {
|
|||||||
unsigned int ptrace;
|
unsigned int ptrace;
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
struct task_struct *wake_entry;
|
struct llist_node wake_entry;
|
||||||
int on_cpu;
|
int on_cpu;
|
||||||
#endif
|
#endif
|
||||||
int on_rq;
|
int on_rq;
|
||||||
@ -2035,6 +2036,10 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
|
|||||||
static inline void sched_autogroup_exit(struct signal_struct *sig) { }
|
static inline void sched_autogroup_exit(struct signal_struct *sig) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_CFS_BANDWIDTH
|
||||||
|
extern unsigned int sysctl_sched_cfs_bandwidth_slice;
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_RT_MUTEXES
|
#ifdef CONFIG_RT_MUTEXES
|
||||||
extern int rt_mutex_getprio(struct task_struct *p);
|
extern int rt_mutex_getprio(struct task_struct *p);
|
||||||
extern void rt_mutex_setprio(struct task_struct *p, int prio);
|
extern void rt_mutex_setprio(struct task_struct *p, int prio);
|
||||||
|
@ -100,7 +100,7 @@ static inline long __trace_sched_switch_state(struct task_struct *p)
|
|||||||
* For all intents and purposes a preempted task is a running task.
|
* For all intents and purposes a preempted task is a running task.
|
||||||
*/
|
*/
|
||||||
if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
|
if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
|
||||||
state = TASK_RUNNING;
|
state = TASK_RUNNING | TASK_STATE_MAX;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return state;
|
return state;
|
||||||
@ -137,13 +137,14 @@ TRACE_EVENT(sched_switch,
|
|||||||
__entry->next_prio = next->prio;
|
__entry->next_prio = next->prio;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d",
|
TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
|
||||||
__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
|
__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
|
||||||
__entry->prev_state ?
|
__entry->prev_state & (TASK_STATE_MAX-1) ?
|
||||||
__print_flags(__entry->prev_state, "|",
|
__print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
|
||||||
{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
|
{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
|
||||||
{ 16, "Z" }, { 32, "X" }, { 64, "x" },
|
{ 16, "Z" }, { 32, "X" }, { 64, "x" },
|
||||||
{ 128, "W" }) : "R",
|
{ 128, "W" }) : "R",
|
||||||
|
__entry->prev_state & TASK_STATE_MAX ? "+" : "",
|
||||||
__entry->next_comm, __entry->next_pid, __entry->next_prio)
|
__entry->next_comm, __entry->next_pid, __entry->next_prio)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
12
init/Kconfig
12
init/Kconfig
@ -715,6 +715,18 @@ config FAIR_GROUP_SCHED
|
|||||||
depends on CGROUP_SCHED
|
depends on CGROUP_SCHED
|
||||||
default CGROUP_SCHED
|
default CGROUP_SCHED
|
||||||
|
|
||||||
|
config CFS_BANDWIDTH
|
||||||
|
bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
|
||||||
|
depends on EXPERIMENTAL
|
||||||
|
depends on FAIR_GROUP_SCHED
|
||||||
|
default n
|
||||||
|
help
|
||||||
|
This option allows users to define CPU bandwidth rates (limits) for
|
||||||
|
tasks running within the fair group scheduler. Groups with no limit
|
||||||
|
set are considered to be unconstrained and will run with no
|
||||||
|
restriction.
|
||||||
|
See tip/Documentation/scheduler/sched-bwc.txt for more information.
|
||||||
|
|
||||||
config RT_GROUP_SCHED
|
config RT_GROUP_SCHED
|
||||||
bool "Group scheduling for SCHED_RR/FIFO"
|
bool "Group scheduling for SCHED_RR/FIFO"
|
||||||
depends on EXPERIMENTAL
|
depends on EXPERIMENTAL
|
||||||
|
@ -17,54 +17,34 @@
|
|||||||
* claimed NULL, 3 -> {pending} : claimed to be enqueued
|
* claimed NULL, 3 -> {pending} : claimed to be enqueued
|
||||||
* pending next, 3 -> {busy} : queued, pending callback
|
* pending next, 3 -> {busy} : queued, pending callback
|
||||||
* busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
|
* busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
|
||||||
*
|
|
||||||
* We use the lower two bits of the next pointer to keep PENDING and BUSY
|
|
||||||
* flags.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define IRQ_WORK_PENDING 1UL
|
#define IRQ_WORK_PENDING 1UL
|
||||||
#define IRQ_WORK_BUSY 2UL
|
#define IRQ_WORK_BUSY 2UL
|
||||||
#define IRQ_WORK_FLAGS 3UL
|
#define IRQ_WORK_FLAGS 3UL
|
||||||
|
|
||||||
static inline bool irq_work_is_set(struct irq_work *entry, int flags)
|
static DEFINE_PER_CPU(struct llist_head, irq_work_list);
|
||||||
{
|
|
||||||
return (unsigned long)entry->next & flags;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline struct irq_work *irq_work_next(struct irq_work *entry)
|
|
||||||
{
|
|
||||||
unsigned long next = (unsigned long)entry->next;
|
|
||||||
next &= ~IRQ_WORK_FLAGS;
|
|
||||||
return (struct irq_work *)next;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline struct irq_work *next_flags(struct irq_work *entry, int flags)
|
|
||||||
{
|
|
||||||
unsigned long next = (unsigned long)entry;
|
|
||||||
next |= flags;
|
|
||||||
return (struct irq_work *)next;
|
|
||||||
}
|
|
||||||
|
|
||||||
static DEFINE_PER_CPU(struct irq_work *, irq_work_list);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Claim the entry so that no one else will poke at it.
|
* Claim the entry so that no one else will poke at it.
|
||||||
*/
|
*/
|
||||||
static bool irq_work_claim(struct irq_work *entry)
|
static bool irq_work_claim(struct irq_work *work)
|
||||||
{
|
{
|
||||||
struct irq_work *next, *nflags;
|
unsigned long flags, nflags;
|
||||||
|
|
||||||
do {
|
for (;;) {
|
||||||
next = entry->next;
|
flags = work->flags;
|
||||||
if ((unsigned long)next & IRQ_WORK_PENDING)
|
if (flags & IRQ_WORK_PENDING)
|
||||||
return false;
|
return false;
|
||||||
nflags = next_flags(next, IRQ_WORK_FLAGS);
|
nflags = flags | IRQ_WORK_FLAGS;
|
||||||
} while (cmpxchg(&entry->next, next, nflags) != next);
|
if (cmpxchg(&work->flags, flags, nflags) == flags)
|
||||||
|
break;
|
||||||
|
cpu_relax();
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void __weak arch_irq_work_raise(void)
|
void __weak arch_irq_work_raise(void)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@ -75,20 +55,15 @@ void __weak arch_irq_work_raise(void)
|
|||||||
/*
|
/*
|
||||||
* Queue the entry and raise the IPI if needed.
|
* Queue the entry and raise the IPI if needed.
|
||||||
*/
|
*/
|
||||||
static void __irq_work_queue(struct irq_work *entry)
|
static void __irq_work_queue(struct irq_work *work)
|
||||||
{
|
{
|
||||||
struct irq_work *next;
|
bool empty;
|
||||||
|
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
|
|
||||||
do {
|
empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
|
||||||
next = __this_cpu_read(irq_work_list);
|
|
||||||
/* Can assign non-atomic because we keep the flags set. */
|
|
||||||
entry->next = next_flags(next, IRQ_WORK_FLAGS);
|
|
||||||
} while (this_cpu_cmpxchg(irq_work_list, next, entry) != next);
|
|
||||||
|
|
||||||
/* The list was empty, raise self-interrupt to start processing. */
|
/* The list was empty, raise self-interrupt to start processing. */
|
||||||
if (!irq_work_next(entry))
|
if (empty)
|
||||||
arch_irq_work_raise();
|
arch_irq_work_raise();
|
||||||
|
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
@ -100,16 +75,16 @@ static void __irq_work_queue(struct irq_work *entry)
|
|||||||
*
|
*
|
||||||
* Can be re-enqueued while the callback is still in progress.
|
* Can be re-enqueued while the callback is still in progress.
|
||||||
*/
|
*/
|
||||||
bool irq_work_queue(struct irq_work *entry)
|
bool irq_work_queue(struct irq_work *work)
|
||||||
{
|
{
|
||||||
if (!irq_work_claim(entry)) {
|
if (!irq_work_claim(work)) {
|
||||||
/*
|
/*
|
||||||
* Already enqueued, can't do!
|
* Already enqueued, can't do!
|
||||||
*/
|
*/
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
__irq_work_queue(entry);
|
__irq_work_queue(work);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(irq_work_queue);
|
EXPORT_SYMBOL_GPL(irq_work_queue);
|
||||||
@ -120,34 +95,34 @@ EXPORT_SYMBOL_GPL(irq_work_queue);
|
|||||||
*/
|
*/
|
||||||
void irq_work_run(void)
|
void irq_work_run(void)
|
||||||
{
|
{
|
||||||
struct irq_work *list;
|
struct irq_work *work;
|
||||||
|
struct llist_head *this_list;
|
||||||
|
struct llist_node *llnode;
|
||||||
|
|
||||||
if (this_cpu_read(irq_work_list) == NULL)
|
this_list = &__get_cpu_var(irq_work_list);
|
||||||
|
if (llist_empty(this_list))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
BUG_ON(!in_irq());
|
BUG_ON(!in_irq());
|
||||||
BUG_ON(!irqs_disabled());
|
BUG_ON(!irqs_disabled());
|
||||||
|
|
||||||
list = this_cpu_xchg(irq_work_list, NULL);
|
llnode = llist_del_all(this_list);
|
||||||
|
while (llnode != NULL) {
|
||||||
|
work = llist_entry(llnode, struct irq_work, llnode);
|
||||||
|
|
||||||
while (list != NULL) {
|
llnode = llist_next(llnode);
|
||||||
struct irq_work *entry = list;
|
|
||||||
|
|
||||||
list = irq_work_next(list);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Clear the PENDING bit, after this point the @entry
|
* Clear the PENDING bit, after this point the @work
|
||||||
* can be re-used.
|
* can be re-used.
|
||||||
*/
|
*/
|
||||||
entry->next = next_flags(NULL, IRQ_WORK_BUSY);
|
work->flags = IRQ_WORK_BUSY;
|
||||||
entry->func(entry);
|
work->func(work);
|
||||||
/*
|
/*
|
||||||
* Clear the BUSY bit and return to the free state if
|
* Clear the BUSY bit and return to the free state if
|
||||||
* no-one else claimed it meanwhile.
|
* no-one else claimed it meanwhile.
|
||||||
*/
|
*/
|
||||||
(void)cmpxchg(&entry->next,
|
(void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0);
|
||||||
next_flags(NULL, IRQ_WORK_BUSY),
|
|
||||||
NULL);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(irq_work_run);
|
EXPORT_SYMBOL_GPL(irq_work_run);
|
||||||
@ -156,11 +131,11 @@ EXPORT_SYMBOL_GPL(irq_work_run);
|
|||||||
* Synchronize against the irq_work @entry, ensures the entry is not
|
* Synchronize against the irq_work @entry, ensures the entry is not
|
||||||
* currently in use.
|
* currently in use.
|
||||||
*/
|
*/
|
||||||
void irq_work_sync(struct irq_work *entry)
|
void irq_work_sync(struct irq_work *work)
|
||||||
{
|
{
|
||||||
WARN_ON_ONCE(irqs_disabled());
|
WARN_ON_ONCE(irqs_disabled());
|
||||||
|
|
||||||
while (irq_work_is_set(entry, IRQ_WORK_BUSY))
|
while (work->flags & IRQ_WORK_BUSY)
|
||||||
cpu_relax();
|
cpu_relax();
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(irq_work_sync);
|
EXPORT_SYMBOL_GPL(irq_work_sync);
|
||||||
|
660
kernel/sched.c
660
kernel/sched.c
@ -196,10 +196,28 @@ static inline int rt_bandwidth_enabled(void)
|
|||||||
return sysctl_sched_rt_runtime >= 0;
|
return sysctl_sched_rt_runtime >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
|
||||||
|
{
|
||||||
|
unsigned long delta;
|
||||||
|
ktime_t soft, hard, now;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
if (hrtimer_active(period_timer))
|
||||||
|
break;
|
||||||
|
|
||||||
|
now = hrtimer_cb_get_time(period_timer);
|
||||||
|
hrtimer_forward(period_timer, now, period);
|
||||||
|
|
||||||
|
soft = hrtimer_get_softexpires(period_timer);
|
||||||
|
hard = hrtimer_get_expires(period_timer);
|
||||||
|
delta = ktime_to_ns(ktime_sub(hard, soft));
|
||||||
|
__hrtimer_start_range_ns(period_timer, soft, delta,
|
||||||
|
HRTIMER_MODE_ABS_PINNED, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
|
static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
|
||||||
{
|
{
|
||||||
ktime_t now;
|
|
||||||
|
|
||||||
if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
|
if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -207,22 +225,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
raw_spin_lock(&rt_b->rt_runtime_lock);
|
raw_spin_lock(&rt_b->rt_runtime_lock);
|
||||||
for (;;) {
|
start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
|
||||||
unsigned long delta;
|
|
||||||
ktime_t soft, hard;
|
|
||||||
|
|
||||||
if (hrtimer_active(&rt_b->rt_period_timer))
|
|
||||||
break;
|
|
||||||
|
|
||||||
now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
|
|
||||||
hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
|
|
||||||
|
|
||||||
soft = hrtimer_get_softexpires(&rt_b->rt_period_timer);
|
|
||||||
hard = hrtimer_get_expires(&rt_b->rt_period_timer);
|
|
||||||
delta = ktime_to_ns(ktime_sub(hard, soft));
|
|
||||||
__hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
|
|
||||||
HRTIMER_MODE_ABS_PINNED, 0);
|
|
||||||
}
|
|
||||||
raw_spin_unlock(&rt_b->rt_runtime_lock);
|
raw_spin_unlock(&rt_b->rt_runtime_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -247,6 +250,24 @@ struct cfs_rq;
|
|||||||
|
|
||||||
static LIST_HEAD(task_groups);
|
static LIST_HEAD(task_groups);
|
||||||
|
|
||||||
|
struct cfs_bandwidth {
|
||||||
|
#ifdef CONFIG_CFS_BANDWIDTH
|
||||||
|
raw_spinlock_t lock;
|
||||||
|
ktime_t period;
|
||||||
|
u64 quota, runtime;
|
||||||
|
s64 hierarchal_quota;
|
||||||
|
u64 runtime_expires;
|
||||||
|
|
||||||
|
int idle, timer_active;
|
||||||
|
struct hrtimer period_timer, slack_timer;
|
||||||
|
struct list_head throttled_cfs_rq;
|
||||||
|
|
||||||
|
/* statistics */
|
||||||
|
int nr_periods, nr_throttled;
|
||||||
|
u64 throttled_time;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
/* task group related information */
|
/* task group related information */
|
||||||
struct task_group {
|
struct task_group {
|
||||||
struct cgroup_subsys_state css;
|
struct cgroup_subsys_state css;
|
||||||
@ -278,6 +299,8 @@ struct task_group {
|
|||||||
#ifdef CONFIG_SCHED_AUTOGROUP
|
#ifdef CONFIG_SCHED_AUTOGROUP
|
||||||
struct autogroup *autogroup;
|
struct autogroup *autogroup;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
struct cfs_bandwidth cfs_bandwidth;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* task_group_lock serializes the addition/removal of task groups */
|
/* task_group_lock serializes the addition/removal of task groups */
|
||||||
@ -311,7 +334,7 @@ struct task_group root_task_group;
|
|||||||
/* CFS-related fields in a runqueue */
|
/* CFS-related fields in a runqueue */
|
||||||
struct cfs_rq {
|
struct cfs_rq {
|
||||||
struct load_weight load;
|
struct load_weight load;
|
||||||
unsigned long nr_running;
|
unsigned long nr_running, h_nr_running;
|
||||||
|
|
||||||
u64 exec_clock;
|
u64 exec_clock;
|
||||||
u64 min_vruntime;
|
u64 min_vruntime;
|
||||||
@ -377,9 +400,120 @@ struct cfs_rq {
|
|||||||
|
|
||||||
unsigned long load_contribution;
|
unsigned long load_contribution;
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_CFS_BANDWIDTH
|
||||||
|
int runtime_enabled;
|
||||||
|
u64 runtime_expires;
|
||||||
|
s64 runtime_remaining;
|
||||||
|
|
||||||
|
u64 throttled_timestamp;
|
||||||
|
int throttled, throttle_count;
|
||||||
|
struct list_head throttled_list;
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||||
|
#ifdef CONFIG_CFS_BANDWIDTH
|
||||||
|
static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
|
||||||
|
{
|
||||||
|
return &tg->cfs_bandwidth;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 default_cfs_period(void);
|
||||||
|
static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun);
|
||||||
|
static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b);
|
||||||
|
|
||||||
|
static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
|
||||||
|
{
|
||||||
|
struct cfs_bandwidth *cfs_b =
|
||||||
|
container_of(timer, struct cfs_bandwidth, slack_timer);
|
||||||
|
do_sched_cfs_slack_timer(cfs_b);
|
||||||
|
|
||||||
|
return HRTIMER_NORESTART;
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
|
||||||
|
{
|
||||||
|
struct cfs_bandwidth *cfs_b =
|
||||||
|
container_of(timer, struct cfs_bandwidth, period_timer);
|
||||||
|
ktime_t now;
|
||||||
|
int overrun;
|
||||||
|
int idle = 0;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
now = hrtimer_cb_get_time(timer);
|
||||||
|
overrun = hrtimer_forward(timer, now, cfs_b->period);
|
||||||
|
|
||||||
|
if (!overrun)
|
||||||
|
break;
|
||||||
|
|
||||||
|
idle = do_sched_cfs_period_timer(cfs_b, overrun);
|
||||||
|
}
|
||||||
|
|
||||||
|
return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
|
||||||
|
{
|
||||||
|
raw_spin_lock_init(&cfs_b->lock);
|
||||||
|
cfs_b->runtime = 0;
|
||||||
|
cfs_b->quota = RUNTIME_INF;
|
||||||
|
cfs_b->period = ns_to_ktime(default_cfs_period());
|
||||||
|
|
||||||
|
INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
|
||||||
|
hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||||
|
cfs_b->period_timer.function = sched_cfs_period_timer;
|
||||||
|
hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||||
|
cfs_b->slack_timer.function = sched_cfs_slack_timer;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
|
||||||
|
{
|
||||||
|
cfs_rq->runtime_enabled = 0;
|
||||||
|
INIT_LIST_HEAD(&cfs_rq->throttled_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* requires cfs_b->lock, may release to reprogram timer */
|
||||||
|
static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* The timer may be active because we're trying to set a new bandwidth
|
||||||
|
* period or because we're racing with the tear-down path
|
||||||
|
* (timer_active==0 becomes visible before the hrtimer call-back
|
||||||
|
* terminates). In either case we ensure that it's re-programmed
|
||||||
|
*/
|
||||||
|
while (unlikely(hrtimer_active(&cfs_b->period_timer))) {
|
||||||
|
raw_spin_unlock(&cfs_b->lock);
|
||||||
|
/* ensure cfs_b->lock is available while we wait */
|
||||||
|
hrtimer_cancel(&cfs_b->period_timer);
|
||||||
|
|
||||||
|
raw_spin_lock(&cfs_b->lock);
|
||||||
|
/* if someone else restarted the timer then we're done */
|
||||||
|
if (cfs_b->timer_active)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
cfs_b->timer_active = 1;
|
||||||
|
start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
|
||||||
|
{
|
||||||
|
hrtimer_cancel(&cfs_b->period_timer);
|
||||||
|
hrtimer_cancel(&cfs_b->slack_timer);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
|
||||||
|
static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
|
||||||
|
static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
|
||||||
|
|
||||||
|
static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_CFS_BANDWIDTH */
|
||||||
|
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||||
|
|
||||||
/* Real-Time classes' related field in a runqueue: */
|
/* Real-Time classes' related field in a runqueue: */
|
||||||
struct rt_rq {
|
struct rt_rq {
|
||||||
struct rt_prio_array active;
|
struct rt_prio_array active;
|
||||||
@ -510,7 +644,7 @@ struct rq {
|
|||||||
|
|
||||||
unsigned long cpu_power;
|
unsigned long cpu_power;
|
||||||
|
|
||||||
unsigned char idle_at_tick;
|
unsigned char idle_balance;
|
||||||
/* For active balancing */
|
/* For active balancing */
|
||||||
int post_schedule;
|
int post_schedule;
|
||||||
int active_balance;
|
int active_balance;
|
||||||
@ -520,8 +654,6 @@ struct rq {
|
|||||||
int cpu;
|
int cpu;
|
||||||
int online;
|
int online;
|
||||||
|
|
||||||
unsigned long avg_load_per_task;
|
|
||||||
|
|
||||||
u64 rt_avg;
|
u64 rt_avg;
|
||||||
u64 age_stamp;
|
u64 age_stamp;
|
||||||
u64 idle_stamp;
|
u64 idle_stamp;
|
||||||
@ -570,7 +702,7 @@ struct rq {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
struct task_struct *wake_list;
|
struct llist_head wake_list;
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1272,6 +1404,18 @@ void wake_up_idle_cpu(int cpu)
|
|||||||
smp_send_reschedule(cpu);
|
smp_send_reschedule(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool got_nohz_idle_kick(void)
|
||||||
|
{
|
||||||
|
return idle_cpu(smp_processor_id()) && this_rq()->nohz_balance_kick;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* CONFIG_NO_HZ */
|
||||||
|
|
||||||
|
static inline bool got_nohz_idle_kick(void)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* CONFIG_NO_HZ */
|
#endif /* CONFIG_NO_HZ */
|
||||||
|
|
||||||
static u64 sched_avg_period(void)
|
static u64 sched_avg_period(void)
|
||||||
@ -1471,24 +1615,28 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
|
|||||||
update_load_sub(&rq->load, load);
|
update_load_sub(&rq->load, load);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED)
|
#if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
|
||||||
|
(defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH)))
|
||||||
typedef int (*tg_visitor)(struct task_group *, void *);
|
typedef int (*tg_visitor)(struct task_group *, void *);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Iterate the full tree, calling @down when first entering a node and @up when
|
* Iterate task_group tree rooted at *from, calling @down when first entering a
|
||||||
* leaving it for the final time.
|
* node and @up when leaving it for the final time.
|
||||||
|
*
|
||||||
|
* Caller must hold rcu_lock or sufficient equivalent.
|
||||||
*/
|
*/
|
||||||
static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
|
static int walk_tg_tree_from(struct task_group *from,
|
||||||
|
tg_visitor down, tg_visitor up, void *data)
|
||||||
{
|
{
|
||||||
struct task_group *parent, *child;
|
struct task_group *parent, *child;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
rcu_read_lock();
|
parent = from;
|
||||||
parent = &root_task_group;
|
|
||||||
down:
|
down:
|
||||||
ret = (*down)(parent, data);
|
ret = (*down)(parent, data);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_unlock;
|
goto out;
|
||||||
list_for_each_entry_rcu(child, &parent->children, siblings) {
|
list_for_each_entry_rcu(child, &parent->children, siblings) {
|
||||||
parent = child;
|
parent = child;
|
||||||
goto down;
|
goto down;
|
||||||
@ -1497,19 +1645,29 @@ up:
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
ret = (*up)(parent, data);
|
ret = (*up)(parent, data);
|
||||||
if (ret)
|
if (ret || parent == from)
|
||||||
goto out_unlock;
|
goto out;
|
||||||
|
|
||||||
child = parent;
|
child = parent;
|
||||||
parent = parent->parent;
|
parent = parent->parent;
|
||||||
if (parent)
|
if (parent)
|
||||||
goto up;
|
goto up;
|
||||||
out_unlock:
|
out:
|
||||||
rcu_read_unlock();
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Iterate the full tree, calling @down when first entering a node and @up when
|
||||||
|
* leaving it for the final time.
|
||||||
|
*
|
||||||
|
* Caller must hold rcu_lock or sufficient equivalent.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
|
||||||
|
{
|
||||||
|
return walk_tg_tree_from(&root_task_group, down, up, data);
|
||||||
|
}
|
||||||
|
|
||||||
static int tg_nop(struct task_group *tg, void *data)
|
static int tg_nop(struct task_group *tg, void *data)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
@ -1569,11 +1727,9 @@ static unsigned long cpu_avg_load_per_task(int cpu)
|
|||||||
unsigned long nr_running = ACCESS_ONCE(rq->nr_running);
|
unsigned long nr_running = ACCESS_ONCE(rq->nr_running);
|
||||||
|
|
||||||
if (nr_running)
|
if (nr_running)
|
||||||
rq->avg_load_per_task = rq->load.weight / nr_running;
|
return rq->load.weight / nr_running;
|
||||||
else
|
|
||||||
rq->avg_load_per_task = 0;
|
|
||||||
|
|
||||||
return rq->avg_load_per_task;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_PREEMPT
|
#ifdef CONFIG_PREEMPT
|
||||||
@ -1806,7 +1962,6 @@ static void activate_task(struct rq *rq, struct task_struct *p, int flags)
|
|||||||
rq->nr_uninterruptible--;
|
rq->nr_uninterruptible--;
|
||||||
|
|
||||||
enqueue_task(rq, p, flags);
|
enqueue_task(rq, p, flags);
|
||||||
inc_nr_running(rq);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1818,7 +1973,6 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
|
|||||||
rq->nr_uninterruptible++;
|
rq->nr_uninterruptible++;
|
||||||
|
|
||||||
dequeue_task(rq, p, flags);
|
dequeue_task(rq, p, flags);
|
||||||
dec_nr_running(rq);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||||
@ -2390,11 +2544,11 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
|
|||||||
|
|
||||||
/* Look for allowed, online CPU in same node. */
|
/* Look for allowed, online CPU in same node. */
|
||||||
for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
|
for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
|
||||||
if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
|
if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
|
||||||
return dest_cpu;
|
return dest_cpu;
|
||||||
|
|
||||||
/* Any allowed, online CPU? */
|
/* Any allowed, online CPU? */
|
||||||
dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
|
dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask);
|
||||||
if (dest_cpu < nr_cpu_ids)
|
if (dest_cpu < nr_cpu_ids)
|
||||||
return dest_cpu;
|
return dest_cpu;
|
||||||
|
|
||||||
@ -2431,7 +2585,7 @@ int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
|
|||||||
* [ this allows ->select_task() to simply return task_cpu(p) and
|
* [ this allows ->select_task() to simply return task_cpu(p) and
|
||||||
* not worry about this generic constraint ]
|
* not worry about this generic constraint ]
|
||||||
*/
|
*/
|
||||||
if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
|
if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) ||
|
||||||
!cpu_online(cpu)))
|
!cpu_online(cpu)))
|
||||||
cpu = select_fallback_rq(task_cpu(p), p);
|
cpu = select_fallback_rq(task_cpu(p), p);
|
||||||
|
|
||||||
@ -2556,42 +2710,26 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
static void sched_ttwu_do_pending(struct task_struct *list)
|
static void sched_ttwu_pending(void)
|
||||||
{
|
{
|
||||||
struct rq *rq = this_rq();
|
struct rq *rq = this_rq();
|
||||||
|
struct llist_node *llist = llist_del_all(&rq->wake_list);
|
||||||
|
struct task_struct *p;
|
||||||
|
|
||||||
raw_spin_lock(&rq->lock);
|
raw_spin_lock(&rq->lock);
|
||||||
|
|
||||||
while (list) {
|
while (llist) {
|
||||||
struct task_struct *p = list;
|
p = llist_entry(llist, struct task_struct, wake_entry);
|
||||||
list = list->wake_entry;
|
llist = llist_next(llist);
|
||||||
ttwu_do_activate(rq, p, 0);
|
ttwu_do_activate(rq, p, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
raw_spin_unlock(&rq->lock);
|
raw_spin_unlock(&rq->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_HOTPLUG_CPU
|
|
||||||
|
|
||||||
static void sched_ttwu_pending(void)
|
|
||||||
{
|
|
||||||
struct rq *rq = this_rq();
|
|
||||||
struct task_struct *list = xchg(&rq->wake_list, NULL);
|
|
||||||
|
|
||||||
if (!list)
|
|
||||||
return;
|
|
||||||
|
|
||||||
sched_ttwu_do_pending(list);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* CONFIG_HOTPLUG_CPU */
|
|
||||||
|
|
||||||
void scheduler_ipi(void)
|
void scheduler_ipi(void)
|
||||||
{
|
{
|
||||||
struct rq *rq = this_rq();
|
if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
|
||||||
struct task_struct *list = xchg(&rq->wake_list, NULL);
|
|
||||||
|
|
||||||
if (!list)
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2608,25 +2746,21 @@ void scheduler_ipi(void)
|
|||||||
* somewhat pessimize the simple resched case.
|
* somewhat pessimize the simple resched case.
|
||||||
*/
|
*/
|
||||||
irq_enter();
|
irq_enter();
|
||||||
sched_ttwu_do_pending(list);
|
sched_ttwu_pending();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if someone kicked us for doing the nohz idle load balance.
|
||||||
|
*/
|
||||||
|
if (unlikely(got_nohz_idle_kick() && !need_resched())) {
|
||||||
|
this_rq()->idle_balance = 1;
|
||||||
|
raise_softirq_irqoff(SCHED_SOFTIRQ);
|
||||||
|
}
|
||||||
irq_exit();
|
irq_exit();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ttwu_queue_remote(struct task_struct *p, int cpu)
|
static void ttwu_queue_remote(struct task_struct *p, int cpu)
|
||||||
{
|
{
|
||||||
struct rq *rq = cpu_rq(cpu);
|
if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list))
|
||||||
struct task_struct *next = rq->wake_list;
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
struct task_struct *old = next;
|
|
||||||
|
|
||||||
p->wake_entry = next;
|
|
||||||
next = cmpxchg(&rq->wake_list, old, p);
|
|
||||||
if (next == old)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!next)
|
|
||||||
smp_send_reschedule(cpu);
|
smp_send_reschedule(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2847,20 +2981,24 @@ void sched_fork(struct task_struct *p)
|
|||||||
*/
|
*/
|
||||||
p->state = TASK_RUNNING;
|
p->state = TASK_RUNNING;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make sure we do not leak PI boosting priority to the child.
|
||||||
|
*/
|
||||||
|
p->prio = current->normal_prio;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Revert to default priority/policy on fork if requested.
|
* Revert to default priority/policy on fork if requested.
|
||||||
*/
|
*/
|
||||||
if (unlikely(p->sched_reset_on_fork)) {
|
if (unlikely(p->sched_reset_on_fork)) {
|
||||||
if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) {
|
if (task_has_rt_policy(p)) {
|
||||||
p->policy = SCHED_NORMAL;
|
p->policy = SCHED_NORMAL;
|
||||||
p->normal_prio = p->static_prio;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (PRIO_TO_NICE(p->static_prio) < 0) {
|
|
||||||
p->static_prio = NICE_TO_PRIO(0);
|
p->static_prio = NICE_TO_PRIO(0);
|
||||||
p->normal_prio = p->static_prio;
|
p->rt_priority = 0;
|
||||||
|
} else if (PRIO_TO_NICE(p->static_prio) < 0)
|
||||||
|
p->static_prio = NICE_TO_PRIO(0);
|
||||||
|
|
||||||
|
p->prio = p->normal_prio = __normal_prio(p);
|
||||||
set_load_weight(p);
|
set_load_weight(p);
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We don't need the reset flag anymore after the fork. It has
|
* We don't need the reset flag anymore after the fork. It has
|
||||||
@ -2869,11 +3007,6 @@ void sched_fork(struct task_struct *p)
|
|||||||
p->sched_reset_on_fork = 0;
|
p->sched_reset_on_fork = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Make sure we do not leak PI boosting priority to the child.
|
|
||||||
*/
|
|
||||||
p->prio = current->normal_prio;
|
|
||||||
|
|
||||||
if (!rt_prio(p->prio))
|
if (!rt_prio(p->prio))
|
||||||
p->sched_class = &fair_sched_class;
|
p->sched_class = &fair_sched_class;
|
||||||
|
|
||||||
@ -4116,7 +4249,7 @@ void scheduler_tick(void)
|
|||||||
perf_event_task_tick();
|
perf_event_task_tick();
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
rq->idle_at_tick = idle_cpu(cpu);
|
rq->idle_balance = idle_cpu(cpu);
|
||||||
trigger_load_balance(rq, cpu);
|
trigger_load_balance(rq, cpu);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -4240,7 +4373,7 @@ pick_next_task(struct rq *rq)
|
|||||||
* Optimization: we know that if all tasks are in
|
* Optimization: we know that if all tasks are in
|
||||||
* the fair class we can call that function directly:
|
* the fair class we can call that function directly:
|
||||||
*/
|
*/
|
||||||
if (likely(rq->nr_running == rq->cfs.nr_running)) {
|
if (likely(rq->nr_running == rq->cfs.h_nr_running)) {
|
||||||
p = fair_sched_class.pick_next_task(rq);
|
p = fair_sched_class.pick_next_task(rq);
|
||||||
if (likely(p))
|
if (likely(p))
|
||||||
return p;
|
return p;
|
||||||
@ -5026,7 +5159,20 @@ EXPORT_SYMBOL(task_nice);
|
|||||||
*/
|
*/
|
||||||
int idle_cpu(int cpu)
|
int idle_cpu(int cpu)
|
||||||
{
|
{
|
||||||
return cpu_curr(cpu) == cpu_rq(cpu)->idle;
|
struct rq *rq = cpu_rq(cpu);
|
||||||
|
|
||||||
|
if (rq->curr != rq->idle)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (rq->nr_running)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
if (!llist_empty(&rq->wake_list))
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -5876,7 +6022,7 @@ void show_state_filter(unsigned long state_filter)
|
|||||||
printk(KERN_INFO
|
printk(KERN_INFO
|
||||||
" task PC stack pid father\n");
|
" task PC stack pid father\n");
|
||||||
#endif
|
#endif
|
||||||
read_lock(&tasklist_lock);
|
rcu_read_lock();
|
||||||
do_each_thread(g, p) {
|
do_each_thread(g, p) {
|
||||||
/*
|
/*
|
||||||
* reset the NMI-timeout, listing all files on a slow
|
* reset the NMI-timeout, listing all files on a slow
|
||||||
@ -5892,7 +6038,7 @@ void show_state_filter(unsigned long state_filter)
|
|||||||
#ifdef CONFIG_SCHED_DEBUG
|
#ifdef CONFIG_SCHED_DEBUG
|
||||||
sysrq_sched_debug_show();
|
sysrq_sched_debug_show();
|
||||||
#endif
|
#endif
|
||||||
read_unlock(&tasklist_lock);
|
rcu_read_unlock();
|
||||||
/*
|
/*
|
||||||
* Only show locks if all tasks are dumped:
|
* Only show locks if all tasks are dumped:
|
||||||
*/
|
*/
|
||||||
@ -6007,10 +6153,9 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
|
|||||||
{
|
{
|
||||||
if (p->sched_class && p->sched_class->set_cpus_allowed)
|
if (p->sched_class && p->sched_class->set_cpus_allowed)
|
||||||
p->sched_class->set_cpus_allowed(p, new_mask);
|
p->sched_class->set_cpus_allowed(p, new_mask);
|
||||||
else {
|
|
||||||
cpumask_copy(&p->cpus_allowed, new_mask);
|
cpumask_copy(&p->cpus_allowed, new_mask);
|
||||||
p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
|
p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -6108,7 +6253,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
|
|||||||
if (task_cpu(p) != src_cpu)
|
if (task_cpu(p) != src_cpu)
|
||||||
goto done;
|
goto done;
|
||||||
/* Affinity changed (again). */
|
/* Affinity changed (again). */
|
||||||
if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
|
if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -6189,6 +6334,30 @@ static void calc_global_load_remove(struct rq *rq)
|
|||||||
rq->calc_load_active = 0;
|
rq->calc_load_active = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_CFS_BANDWIDTH
|
||||||
|
static void unthrottle_offline_cfs_rqs(struct rq *rq)
|
||||||
|
{
|
||||||
|
struct cfs_rq *cfs_rq;
|
||||||
|
|
||||||
|
for_each_leaf_cfs_rq(rq, cfs_rq) {
|
||||||
|
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
|
||||||
|
|
||||||
|
if (!cfs_rq->runtime_enabled)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* clock_task is not advancing so we just need to make sure
|
||||||
|
* there's some valid quota amount
|
||||||
|
*/
|
||||||
|
cfs_rq->runtime_remaining = cfs_b->quota;
|
||||||
|
if (cfs_rq_throttled(cfs_rq))
|
||||||
|
unthrottle_cfs_rq(cfs_rq);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static void unthrottle_offline_cfs_rqs(struct rq *rq) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Migrate all tasks from the rq, sleeping tasks will be migrated by
|
* Migrate all tasks from the rq, sleeping tasks will be migrated by
|
||||||
* try_to_wake_up()->select_task_rq().
|
* try_to_wake_up()->select_task_rq().
|
||||||
@ -6214,6 +6383,9 @@ static void migrate_tasks(unsigned int dead_cpu)
|
|||||||
*/
|
*/
|
||||||
rq->stop = NULL;
|
rq->stop = NULL;
|
||||||
|
|
||||||
|
/* Ensure any throttled groups are reachable by pick_next_task */
|
||||||
|
unthrottle_offline_cfs_rqs(rq);
|
||||||
|
|
||||||
for ( ; ; ) {
|
for ( ; ; ) {
|
||||||
/*
|
/*
|
||||||
* There's this thread running, bail when that's the only
|
* There's this thread running, bail when that's the only
|
||||||
@ -7957,6 +8129,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
|
|||||||
/* allow initial update_cfs_load() to truncate */
|
/* allow initial update_cfs_load() to truncate */
|
||||||
cfs_rq->load_stamp = 1;
|
cfs_rq->load_stamp = 1;
|
||||||
#endif
|
#endif
|
||||||
|
init_cfs_rq_runtime(cfs_rq);
|
||||||
|
|
||||||
tg->cfs_rq[cpu] = cfs_rq;
|
tg->cfs_rq[cpu] = cfs_rq;
|
||||||
tg->se[cpu] = se;
|
tg->se[cpu] = se;
|
||||||
@ -8096,6 +8269,7 @@ void __init sched_init(void)
|
|||||||
* We achieve this by letting root_task_group's tasks sit
|
* We achieve this by letting root_task_group's tasks sit
|
||||||
* directly in rq->cfs (i.e root_task_group->se[] = NULL).
|
* directly in rq->cfs (i.e root_task_group->se[] = NULL).
|
||||||
*/
|
*/
|
||||||
|
init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
|
||||||
init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
|
init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
|
||||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||||
|
|
||||||
@ -8125,7 +8299,6 @@ void __init sched_init(void)
|
|||||||
rq_attach_root(rq, &def_root_domain);
|
rq_attach_root(rq, &def_root_domain);
|
||||||
#ifdef CONFIG_NO_HZ
|
#ifdef CONFIG_NO_HZ
|
||||||
rq->nohz_balance_kick = 0;
|
rq->nohz_balance_kick = 0;
|
||||||
init_sched_softirq_csd(&per_cpu(remote_sched_softirq_cb, i));
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
init_rq_hrtick(rq);
|
init_rq_hrtick(rq);
|
||||||
@ -8336,6 +8509,8 @@ static void free_fair_sched_group(struct task_group *tg)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
|
||||||
|
|
||||||
for_each_possible_cpu(i) {
|
for_each_possible_cpu(i) {
|
||||||
if (tg->cfs_rq)
|
if (tg->cfs_rq)
|
||||||
kfree(tg->cfs_rq[i]);
|
kfree(tg->cfs_rq[i]);
|
||||||
@ -8363,6 +8538,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
|||||||
|
|
||||||
tg->shares = NICE_0_LOAD;
|
tg->shares = NICE_0_LOAD;
|
||||||
|
|
||||||
|
init_cfs_bandwidth(tg_cfs_bandwidth(tg));
|
||||||
|
|
||||||
for_each_possible_cpu(i) {
|
for_each_possible_cpu(i) {
|
||||||
cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
|
cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
|
||||||
GFP_KERNEL, cpu_to_node(i));
|
GFP_KERNEL, cpu_to_node(i));
|
||||||
@ -8638,12 +8815,7 @@ unsigned long sched_group_shares(struct task_group *tg)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_RT_GROUP_SCHED
|
#if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH)
|
||||||
/*
|
|
||||||
* Ensure that the real time constraints are schedulable.
|
|
||||||
*/
|
|
||||||
static DEFINE_MUTEX(rt_constraints_mutex);
|
|
||||||
|
|
||||||
static unsigned long to_ratio(u64 period, u64 runtime)
|
static unsigned long to_ratio(u64 period, u64 runtime)
|
||||||
{
|
{
|
||||||
if (runtime == RUNTIME_INF)
|
if (runtime == RUNTIME_INF)
|
||||||
@ -8651,6 +8823,13 @@ static unsigned long to_ratio(u64 period, u64 runtime)
|
|||||||
|
|
||||||
return div64_u64(runtime << 20, period);
|
return div64_u64(runtime << 20, period);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_RT_GROUP_SCHED
|
||||||
|
/*
|
||||||
|
* Ensure that the real time constraints are schedulable.
|
||||||
|
*/
|
||||||
|
static DEFINE_MUTEX(rt_constraints_mutex);
|
||||||
|
|
||||||
/* Must be called with tasklist_lock held */
|
/* Must be called with tasklist_lock held */
|
||||||
static inline int tg_has_rt_tasks(struct task_group *tg)
|
static inline int tg_has_rt_tasks(struct task_group *tg)
|
||||||
@ -8671,7 +8850,7 @@ struct rt_schedulable_data {
|
|||||||
u64 rt_runtime;
|
u64 rt_runtime;
|
||||||
};
|
};
|
||||||
|
|
||||||
static int tg_schedulable(struct task_group *tg, void *data)
|
static int tg_rt_schedulable(struct task_group *tg, void *data)
|
||||||
{
|
{
|
||||||
struct rt_schedulable_data *d = data;
|
struct rt_schedulable_data *d = data;
|
||||||
struct task_group *child;
|
struct task_group *child;
|
||||||
@ -8729,16 +8908,22 @@ static int tg_schedulable(struct task_group *tg, void *data)
|
|||||||
|
|
||||||
static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
|
static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
|
||||||
{
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
struct rt_schedulable_data data = {
|
struct rt_schedulable_data data = {
|
||||||
.tg = tg,
|
.tg = tg,
|
||||||
.rt_period = period,
|
.rt_period = period,
|
||||||
.rt_runtime = runtime,
|
.rt_runtime = runtime,
|
||||||
};
|
};
|
||||||
|
|
||||||
return walk_tg_tree(tg_schedulable, tg_nop, &data);
|
rcu_read_lock();
|
||||||
|
ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int tg_set_bandwidth(struct task_group *tg,
|
static int tg_set_rt_bandwidth(struct task_group *tg,
|
||||||
u64 rt_period, u64 rt_runtime)
|
u64 rt_period, u64 rt_runtime)
|
||||||
{
|
{
|
||||||
int i, err = 0;
|
int i, err = 0;
|
||||||
@ -8777,7 +8962,7 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
|
|||||||
if (rt_runtime_us < 0)
|
if (rt_runtime_us < 0)
|
||||||
rt_runtime = RUNTIME_INF;
|
rt_runtime = RUNTIME_INF;
|
||||||
|
|
||||||
return tg_set_bandwidth(tg, rt_period, rt_runtime);
|
return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
|
||||||
}
|
}
|
||||||
|
|
||||||
long sched_group_rt_runtime(struct task_group *tg)
|
long sched_group_rt_runtime(struct task_group *tg)
|
||||||
@ -8802,7 +8987,7 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
|
|||||||
if (rt_period == 0)
|
if (rt_period == 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
return tg_set_bandwidth(tg, rt_period, rt_runtime);
|
return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
|
||||||
}
|
}
|
||||||
|
|
||||||
long sched_group_rt_period(struct task_group *tg)
|
long sched_group_rt_period(struct task_group *tg)
|
||||||
@ -8992,6 +9177,238 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
|
|||||||
|
|
||||||
return (u64) scale_load_down(tg->shares);
|
return (u64) scale_load_down(tg->shares);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_CFS_BANDWIDTH
|
||||||
|
static DEFINE_MUTEX(cfs_constraints_mutex);
|
||||||
|
|
||||||
|
const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */
|
||||||
|
const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
|
||||||
|
|
||||||
|
static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
|
||||||
|
|
||||||
|
static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
|
||||||
|
{
|
||||||
|
int i, ret = 0, runtime_enabled;
|
||||||
|
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
|
||||||
|
|
||||||
|
if (tg == &root_task_group)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure we have at some amount of bandwidth every period. This is
|
||||||
|
* to prevent reaching a state of large arrears when throttled via
|
||||||
|
* entity_tick() resulting in prolonged exit starvation.
|
||||||
|
*/
|
||||||
|
if (quota < min_cfs_quota_period || period < min_cfs_quota_period)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Likewise, bound things on the otherside by preventing insane quota
|
||||||
|
* periods. This also allows us to normalize in computing quota
|
||||||
|
* feasibility.
|
||||||
|
*/
|
||||||
|
if (period > max_cfs_quota_period)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
mutex_lock(&cfs_constraints_mutex);
|
||||||
|
ret = __cfs_schedulable(tg, period, quota);
|
||||||
|
if (ret)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
runtime_enabled = quota != RUNTIME_INF;
|
||||||
|
raw_spin_lock_irq(&cfs_b->lock);
|
||||||
|
cfs_b->period = ns_to_ktime(period);
|
||||||
|
cfs_b->quota = quota;
|
||||||
|
|
||||||
|
__refill_cfs_bandwidth_runtime(cfs_b);
|
||||||
|
/* restart the period timer (if active) to handle new period expiry */
|
||||||
|
if (runtime_enabled && cfs_b->timer_active) {
|
||||||
|
/* force a reprogram */
|
||||||
|
cfs_b->timer_active = 0;
|
||||||
|
__start_cfs_bandwidth(cfs_b);
|
||||||
|
}
|
||||||
|
raw_spin_unlock_irq(&cfs_b->lock);
|
||||||
|
|
||||||
|
for_each_possible_cpu(i) {
|
||||||
|
struct cfs_rq *cfs_rq = tg->cfs_rq[i];
|
||||||
|
struct rq *rq = rq_of(cfs_rq);
|
||||||
|
|
||||||
|
raw_spin_lock_irq(&rq->lock);
|
||||||
|
cfs_rq->runtime_enabled = runtime_enabled;
|
||||||
|
cfs_rq->runtime_remaining = 0;
|
||||||
|
|
||||||
|
if (cfs_rq_throttled(cfs_rq))
|
||||||
|
unthrottle_cfs_rq(cfs_rq);
|
||||||
|
raw_spin_unlock_irq(&rq->lock);
|
||||||
|
}
|
||||||
|
out_unlock:
|
||||||
|
mutex_unlock(&cfs_constraints_mutex);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
|
||||||
|
{
|
||||||
|
u64 quota, period;
|
||||||
|
|
||||||
|
period = ktime_to_ns(tg_cfs_bandwidth(tg)->period);
|
||||||
|
if (cfs_quota_us < 0)
|
||||||
|
quota = RUNTIME_INF;
|
||||||
|
else
|
||||||
|
quota = (u64)cfs_quota_us * NSEC_PER_USEC;
|
||||||
|
|
||||||
|
return tg_set_cfs_bandwidth(tg, period, quota);
|
||||||
|
}
|
||||||
|
|
||||||
|
long tg_get_cfs_quota(struct task_group *tg)
|
||||||
|
{
|
||||||
|
u64 quota_us;
|
||||||
|
|
||||||
|
if (tg_cfs_bandwidth(tg)->quota == RUNTIME_INF)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
quota_us = tg_cfs_bandwidth(tg)->quota;
|
||||||
|
do_div(quota_us, NSEC_PER_USEC);
|
||||||
|
|
||||||
|
return quota_us;
|
||||||
|
}
|
||||||
|
|
||||||
|
int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
|
||||||
|
{
|
||||||
|
u64 quota, period;
|
||||||
|
|
||||||
|
period = (u64)cfs_period_us * NSEC_PER_USEC;
|
||||||
|
quota = tg_cfs_bandwidth(tg)->quota;
|
||||||
|
|
||||||
|
if (period <= 0)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
return tg_set_cfs_bandwidth(tg, period, quota);
|
||||||
|
}
|
||||||
|
|
||||||
|
long tg_get_cfs_period(struct task_group *tg)
|
||||||
|
{
|
||||||
|
u64 cfs_period_us;
|
||||||
|
|
||||||
|
cfs_period_us = ktime_to_ns(tg_cfs_bandwidth(tg)->period);
|
||||||
|
do_div(cfs_period_us, NSEC_PER_USEC);
|
||||||
|
|
||||||
|
return cfs_period_us;
|
||||||
|
}
|
||||||
|
|
||||||
|
static s64 cpu_cfs_quota_read_s64(struct cgroup *cgrp, struct cftype *cft)
|
||||||
|
{
|
||||||
|
return tg_get_cfs_quota(cgroup_tg(cgrp));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int cpu_cfs_quota_write_s64(struct cgroup *cgrp, struct cftype *cftype,
|
||||||
|
s64 cfs_quota_us)
|
||||||
|
{
|
||||||
|
return tg_set_cfs_quota(cgroup_tg(cgrp), cfs_quota_us);
|
||||||
|
}
|
||||||
|
|
||||||
|
static u64 cpu_cfs_period_read_u64(struct cgroup *cgrp, struct cftype *cft)
|
||||||
|
{
|
||||||
|
return tg_get_cfs_period(cgroup_tg(cgrp));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype,
|
||||||
|
u64 cfs_period_us)
|
||||||
|
{
|
||||||
|
return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct cfs_schedulable_data {
|
||||||
|
struct task_group *tg;
|
||||||
|
u64 period, quota;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* normalize group quota/period to be quota/max_period
|
||||||
|
* note: units are usecs
|
||||||
|
*/
|
||||||
|
static u64 normalize_cfs_quota(struct task_group *tg,
|
||||||
|
struct cfs_schedulable_data *d)
|
||||||
|
{
|
||||||
|
u64 quota, period;
|
||||||
|
|
||||||
|
if (tg == d->tg) {
|
||||||
|
period = d->period;
|
||||||
|
quota = d->quota;
|
||||||
|
} else {
|
||||||
|
period = tg_get_cfs_period(tg);
|
||||||
|
quota = tg_get_cfs_quota(tg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* note: these should typically be equivalent */
|
||||||
|
if (quota == RUNTIME_INF || quota == -1)
|
||||||
|
return RUNTIME_INF;
|
||||||
|
|
||||||
|
return to_ratio(period, quota);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
|
||||||
|
{
|
||||||
|
struct cfs_schedulable_data *d = data;
|
||||||
|
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
|
||||||
|
s64 quota = 0, parent_quota = -1;
|
||||||
|
|
||||||
|
if (!tg->parent) {
|
||||||
|
quota = RUNTIME_INF;
|
||||||
|
} else {
|
||||||
|
struct cfs_bandwidth *parent_b = tg_cfs_bandwidth(tg->parent);
|
||||||
|
|
||||||
|
quota = normalize_cfs_quota(tg, d);
|
||||||
|
parent_quota = parent_b->hierarchal_quota;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ensure max(child_quota) <= parent_quota, inherit when no
|
||||||
|
* limit is set
|
||||||
|
*/
|
||||||
|
if (quota == RUNTIME_INF)
|
||||||
|
quota = parent_quota;
|
||||||
|
else if (parent_quota != RUNTIME_INF && quota > parent_quota)
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
cfs_b->hierarchal_quota = quota;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct cfs_schedulable_data data = {
|
||||||
|
.tg = tg,
|
||||||
|
.period = period,
|
||||||
|
.quota = quota,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (quota != RUNTIME_INF) {
|
||||||
|
do_div(data.period, NSEC_PER_USEC);
|
||||||
|
do_div(data.quota, NSEC_PER_USEC);
|
||||||
|
}
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
ret = walk_tg_tree(tg_cfs_schedulable_down, tg_nop, &data);
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft,
|
||||||
|
struct cgroup_map_cb *cb)
|
||||||
|
{
|
||||||
|
struct task_group *tg = cgroup_tg(cgrp);
|
||||||
|
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
|
||||||
|
|
||||||
|
cb->fill(cb, "nr_periods", cfs_b->nr_periods);
|
||||||
|
cb->fill(cb, "nr_throttled", cfs_b->nr_throttled);
|
||||||
|
cb->fill(cb, "throttled_time", cfs_b->throttled_time);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_CFS_BANDWIDTH */
|
||||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||||
|
|
||||||
#ifdef CONFIG_RT_GROUP_SCHED
|
#ifdef CONFIG_RT_GROUP_SCHED
|
||||||
@ -9026,6 +9443,22 @@ static struct cftype cpu_files[] = {
|
|||||||
.write_u64 = cpu_shares_write_u64,
|
.write_u64 = cpu_shares_write_u64,
|
||||||
},
|
},
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_CFS_BANDWIDTH
|
||||||
|
{
|
||||||
|
.name = "cfs_quota_us",
|
||||||
|
.read_s64 = cpu_cfs_quota_read_s64,
|
||||||
|
.write_s64 = cpu_cfs_quota_write_s64,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.name = "cfs_period_us",
|
||||||
|
.read_u64 = cpu_cfs_period_read_u64,
|
||||||
|
.write_u64 = cpu_cfs_period_write_u64,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.name = "stat",
|
||||||
|
.read_map = cpu_stats_show,
|
||||||
|
},
|
||||||
|
#endif
|
||||||
#ifdef CONFIG_RT_GROUP_SCHED
|
#ifdef CONFIG_RT_GROUP_SCHED
|
||||||
{
|
{
|
||||||
.name = "rt_runtime_us",
|
.name = "rt_runtime_us",
|
||||||
@ -9335,4 +9768,3 @@ struct cgroup_subsys cpuacct_subsys = {
|
|||||||
.subsys_id = cpuacct_subsys_id,
|
.subsys_id = cpuacct_subsys_id,
|
||||||
};
|
};
|
||||||
#endif /* CONFIG_CGROUP_CPUACCT */
|
#endif /* CONFIG_CGROUP_CPUACCT */
|
||||||
|
|
||||||
|
@ -47,9 +47,6 @@ static int convert_prio(int prio)
|
|||||||
return cpupri;
|
return cpupri;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define for_each_cpupri_active(array, idx) \
|
|
||||||
for_each_set_bit(idx, array, CPUPRI_NR_PRIORITIES)
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* cpupri_find - find the best (lowest-pri) CPU in the system
|
* cpupri_find - find the best (lowest-pri) CPU in the system
|
||||||
* @cp: The cpupri context
|
* @cp: The cpupri context
|
||||||
@ -71,11 +68,38 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
|
|||||||
int idx = 0;
|
int idx = 0;
|
||||||
int task_pri = convert_prio(p->prio);
|
int task_pri = convert_prio(p->prio);
|
||||||
|
|
||||||
for_each_cpupri_active(cp->pri_active, idx) {
|
if (task_pri >= MAX_RT_PRIO)
|
||||||
struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
|
return 0;
|
||||||
|
|
||||||
if (idx >= task_pri)
|
for (idx = 0; idx < task_pri; idx++) {
|
||||||
break;
|
struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
|
||||||
|
int skip = 0;
|
||||||
|
|
||||||
|
if (!atomic_read(&(vec)->count))
|
||||||
|
skip = 1;
|
||||||
|
/*
|
||||||
|
* When looking at the vector, we need to read the counter,
|
||||||
|
* do a memory barrier, then read the mask.
|
||||||
|
*
|
||||||
|
* Note: This is still all racey, but we can deal with it.
|
||||||
|
* Ideally, we only want to look at masks that are set.
|
||||||
|
*
|
||||||
|
* If a mask is not set, then the only thing wrong is that we
|
||||||
|
* did a little more work than necessary.
|
||||||
|
*
|
||||||
|
* If we read a zero count but the mask is set, because of the
|
||||||
|
* memory barriers, that can only happen when the highest prio
|
||||||
|
* task for a run queue has left the run queue, in which case,
|
||||||
|
* it will be followed by a pull. If the task we are processing
|
||||||
|
* fails to find a proper place to go, that pull request will
|
||||||
|
* pull this task if the run queue is running at a lower
|
||||||
|
* priority.
|
||||||
|
*/
|
||||||
|
smp_rmb();
|
||||||
|
|
||||||
|
/* Need to do the rmb for every iteration */
|
||||||
|
if (skip)
|
||||||
|
continue;
|
||||||
|
|
||||||
if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
|
if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
|
||||||
continue;
|
continue;
|
||||||
@ -115,7 +139,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
|
|||||||
{
|
{
|
||||||
int *currpri = &cp->cpu_to_pri[cpu];
|
int *currpri = &cp->cpu_to_pri[cpu];
|
||||||
int oldpri = *currpri;
|
int oldpri = *currpri;
|
||||||
unsigned long flags;
|
int do_mb = 0;
|
||||||
|
|
||||||
newpri = convert_prio(newpri);
|
newpri = convert_prio(newpri);
|
||||||
|
|
||||||
@ -128,32 +152,46 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
|
|||||||
* If the cpu was currently mapped to a different value, we
|
* If the cpu was currently mapped to a different value, we
|
||||||
* need to map it to the new value then remove the old value.
|
* need to map it to the new value then remove the old value.
|
||||||
* Note, we must add the new value first, otherwise we risk the
|
* Note, we must add the new value first, otherwise we risk the
|
||||||
* cpu being cleared from pri_active, and this cpu could be
|
* cpu being missed by the priority loop in cpupri_find.
|
||||||
* missed for a push or pull.
|
|
||||||
*/
|
*/
|
||||||
if (likely(newpri != CPUPRI_INVALID)) {
|
if (likely(newpri != CPUPRI_INVALID)) {
|
||||||
struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
|
struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&vec->lock, flags);
|
|
||||||
|
|
||||||
cpumask_set_cpu(cpu, vec->mask);
|
cpumask_set_cpu(cpu, vec->mask);
|
||||||
vec->count++;
|
/*
|
||||||
if (vec->count == 1)
|
* When adding a new vector, we update the mask first,
|
||||||
set_bit(newpri, cp->pri_active);
|
* do a write memory barrier, and then update the count, to
|
||||||
|
* make sure the vector is visible when count is set.
|
||||||
raw_spin_unlock_irqrestore(&vec->lock, flags);
|
*/
|
||||||
|
smp_mb__before_atomic_inc();
|
||||||
|
atomic_inc(&(vec)->count);
|
||||||
|
do_mb = 1;
|
||||||
}
|
}
|
||||||
if (likely(oldpri != CPUPRI_INVALID)) {
|
if (likely(oldpri != CPUPRI_INVALID)) {
|
||||||
struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
|
struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&vec->lock, flags);
|
/*
|
||||||
|
* Because the order of modification of the vec->count
|
||||||
|
* is important, we must make sure that the update
|
||||||
|
* of the new prio is seen before we decrement the
|
||||||
|
* old prio. This makes sure that the loop sees
|
||||||
|
* one or the other when we raise the priority of
|
||||||
|
* the run queue. We don't care about when we lower the
|
||||||
|
* priority, as that will trigger an rt pull anyway.
|
||||||
|
*
|
||||||
|
* We only need to do a memory barrier if we updated
|
||||||
|
* the new priority vec.
|
||||||
|
*/
|
||||||
|
if (do_mb)
|
||||||
|
smp_mb__after_atomic_inc();
|
||||||
|
|
||||||
vec->count--;
|
/*
|
||||||
if (!vec->count)
|
* When removing from the vector, we decrement the counter first
|
||||||
clear_bit(oldpri, cp->pri_active);
|
* do a memory barrier and then clear the mask.
|
||||||
|
*/
|
||||||
|
atomic_dec(&(vec)->count);
|
||||||
|
smp_mb__after_atomic_inc();
|
||||||
cpumask_clear_cpu(cpu, vec->mask);
|
cpumask_clear_cpu(cpu, vec->mask);
|
||||||
|
|
||||||
raw_spin_unlock_irqrestore(&vec->lock, flags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
*currpri = newpri;
|
*currpri = newpri;
|
||||||
@ -175,8 +213,7 @@ int cpupri_init(struct cpupri *cp)
|
|||||||
for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
|
for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
|
||||||
struct cpupri_vec *vec = &cp->pri_to_cpu[i];
|
struct cpupri_vec *vec = &cp->pri_to_cpu[i];
|
||||||
|
|
||||||
raw_spin_lock_init(&vec->lock);
|
atomic_set(&vec->count, 0);
|
||||||
vec->count = 0;
|
|
||||||
if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
|
if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,6 @@
|
|||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
|
|
||||||
#define CPUPRI_NR_PRIORITIES (MAX_RT_PRIO + 2)
|
#define CPUPRI_NR_PRIORITIES (MAX_RT_PRIO + 2)
|
||||||
#define CPUPRI_NR_PRI_WORDS BITS_TO_LONGS(CPUPRI_NR_PRIORITIES)
|
|
||||||
|
|
||||||
#define CPUPRI_INVALID -1
|
#define CPUPRI_INVALID -1
|
||||||
#define CPUPRI_IDLE 0
|
#define CPUPRI_IDLE 0
|
||||||
@ -12,14 +11,12 @@
|
|||||||
/* values 2-101 are RT priorities 0-99 */
|
/* values 2-101 are RT priorities 0-99 */
|
||||||
|
|
||||||
struct cpupri_vec {
|
struct cpupri_vec {
|
||||||
raw_spinlock_t lock;
|
atomic_t count;
|
||||||
int count;
|
|
||||||
cpumask_var_t mask;
|
cpumask_var_t mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct cpupri {
|
struct cpupri {
|
||||||
struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES];
|
struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES];
|
||||||
long pri_active[CPUPRI_NR_PRI_WORDS];
|
|
||||||
int cpu_to_pri[NR_CPUS];
|
int cpu_to_pri[NR_CPUS];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -11,11 +11,6 @@ SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1)
|
|||||||
*/
|
*/
|
||||||
SCHED_FEAT(START_DEBIT, 1)
|
SCHED_FEAT(START_DEBIT, 1)
|
||||||
|
|
||||||
/*
|
|
||||||
* Should wakeups try to preempt running tasks.
|
|
||||||
*/
|
|
||||||
SCHED_FEAT(WAKEUP_PREEMPT, 1)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Based on load and program behaviour, see if it makes sense to place
|
* Based on load and program behaviour, see if it makes sense to place
|
||||||
* a newly woken task on the same cpu as the task that woke it --
|
* a newly woken task on the same cpu as the task that woke it --
|
||||||
|
@ -124,21 +124,33 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
|||||||
update_rt_migration(rt_rq);
|
update_rt_migration(rt_rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int has_pushable_tasks(struct rq *rq)
|
||||||
|
{
|
||||||
|
return !plist_head_empty(&rq->rt.pushable_tasks);
|
||||||
|
}
|
||||||
|
|
||||||
static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
|
static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
|
||||||
{
|
{
|
||||||
plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
|
plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
|
||||||
plist_node_init(&p->pushable_tasks, p->prio);
|
plist_node_init(&p->pushable_tasks, p->prio);
|
||||||
plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
|
plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
|
||||||
|
|
||||||
|
/* Update the highest prio pushable task */
|
||||||
|
if (p->prio < rq->rt.highest_prio.next)
|
||||||
|
rq->rt.highest_prio.next = p->prio;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
|
static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
|
||||||
{
|
{
|
||||||
plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
|
plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
|
||||||
}
|
|
||||||
|
|
||||||
static inline int has_pushable_tasks(struct rq *rq)
|
/* Update the new highest prio pushable task */
|
||||||
{
|
if (has_pushable_tasks(rq)) {
|
||||||
return !plist_head_empty(&rq->rt.pushable_tasks);
|
p = plist_first_entry(&rq->rt.pushable_tasks,
|
||||||
|
struct task_struct, pushable_tasks);
|
||||||
|
rq->rt.highest_prio.next = p->prio;
|
||||||
|
} else
|
||||||
|
rq->rt.highest_prio.next = MAX_RT_PRIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
@ -643,6 +655,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
|
|||||||
|
|
||||||
if (rt_rq->rt_time > runtime) {
|
if (rt_rq->rt_time > runtime) {
|
||||||
rt_rq->rt_throttled = 1;
|
rt_rq->rt_throttled = 1;
|
||||||
|
printk_once(KERN_WARNING "sched: RT throttling activated\n");
|
||||||
if (rt_rq_throttled(rt_rq)) {
|
if (rt_rq_throttled(rt_rq)) {
|
||||||
sched_rt_rq_dequeue(rt_rq);
|
sched_rt_rq_dequeue(rt_rq);
|
||||||
return 1;
|
return 1;
|
||||||
@ -698,47 +711,13 @@ static void update_curr_rt(struct rq *rq)
|
|||||||
|
|
||||||
#if defined CONFIG_SMP
|
#if defined CONFIG_SMP
|
||||||
|
|
||||||
static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu);
|
|
||||||
|
|
||||||
static inline int next_prio(struct rq *rq)
|
|
||||||
{
|
|
||||||
struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu);
|
|
||||||
|
|
||||||
if (next && rt_prio(next->prio))
|
|
||||||
return next->prio;
|
|
||||||
else
|
|
||||||
return MAX_RT_PRIO;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
|
inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
|
||||||
{
|
{
|
||||||
struct rq *rq = rq_of_rt_rq(rt_rq);
|
struct rq *rq = rq_of_rt_rq(rt_rq);
|
||||||
|
|
||||||
if (prio < prev_prio) {
|
if (rq->online && prio < prev_prio)
|
||||||
|
|
||||||
/*
|
|
||||||
* If the new task is higher in priority than anything on the
|
|
||||||
* run-queue, we know that the previous high becomes our
|
|
||||||
* next-highest.
|
|
||||||
*/
|
|
||||||
rt_rq->highest_prio.next = prev_prio;
|
|
||||||
|
|
||||||
if (rq->online)
|
|
||||||
cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
|
cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
|
||||||
|
|
||||||
} else if (prio == rt_rq->highest_prio.curr)
|
|
||||||
/*
|
|
||||||
* If the next task is equal in priority to the highest on
|
|
||||||
* the run-queue, then we implicitly know that the next highest
|
|
||||||
* task cannot be any lower than current
|
|
||||||
*/
|
|
||||||
rt_rq->highest_prio.next = prio;
|
|
||||||
else if (prio < rt_rq->highest_prio.next)
|
|
||||||
/*
|
|
||||||
* Otherwise, we need to recompute next-highest
|
|
||||||
*/
|
|
||||||
rt_rq->highest_prio.next = next_prio(rq);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -746,9 +725,6 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
|
|||||||
{
|
{
|
||||||
struct rq *rq = rq_of_rt_rq(rt_rq);
|
struct rq *rq = rq_of_rt_rq(rt_rq);
|
||||||
|
|
||||||
if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next))
|
|
||||||
rt_rq->highest_prio.next = next_prio(rq);
|
|
||||||
|
|
||||||
if (rq->online && rt_rq->highest_prio.curr != prev_prio)
|
if (rq->online && rt_rq->highest_prio.curr != prev_prio)
|
||||||
cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
|
cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
|
||||||
}
|
}
|
||||||
@ -961,6 +937,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
|
|||||||
|
|
||||||
if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
|
if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
|
||||||
enqueue_pushable_task(rq, p);
|
enqueue_pushable_task(rq, p);
|
||||||
|
|
||||||
|
inc_nr_running(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
|
static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
|
||||||
@ -971,6 +949,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
|
|||||||
dequeue_rt_entity(rt_se);
|
dequeue_rt_entity(rt_se);
|
||||||
|
|
||||||
dequeue_pushable_task(rq, p);
|
dequeue_pushable_task(rq, p);
|
||||||
|
|
||||||
|
dec_nr_running(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1017,10 +997,12 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
|
|||||||
struct rq *rq;
|
struct rq *rq;
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
if (sd_flag != SD_BALANCE_WAKE)
|
|
||||||
return smp_processor_id();
|
|
||||||
|
|
||||||
cpu = task_cpu(p);
|
cpu = task_cpu(p);
|
||||||
|
|
||||||
|
/* For anything but wake ups, just return the task_cpu */
|
||||||
|
if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
|
||||||
|
goto out;
|
||||||
|
|
||||||
rq = cpu_rq(cpu);
|
rq = cpu_rq(cpu);
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
@ -1059,6 +1041,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
|
|||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
out:
|
||||||
return cpu;
|
return cpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1178,7 +1161,6 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
|
|||||||
static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
|
static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
|
||||||
{
|
{
|
||||||
update_curr_rt(rq);
|
update_curr_rt(rq);
|
||||||
p->se.exec_start = 0;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The previous task needs to be made eligible for pushing
|
* The previous task needs to be made eligible for pushing
|
||||||
@ -1198,7 +1180,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
|
|||||||
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
|
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
|
||||||
{
|
{
|
||||||
if (!task_running(rq, p) &&
|
if (!task_running(rq, p) &&
|
||||||
(cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
|
(cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) &&
|
||||||
(p->rt.nr_cpus_allowed > 1))
|
(p->rt.nr_cpus_allowed > 1))
|
||||||
return 1;
|
return 1;
|
||||||
return 0;
|
return 0;
|
||||||
@ -1343,7 +1325,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
|
|||||||
*/
|
*/
|
||||||
if (unlikely(task_rq(task) != rq ||
|
if (unlikely(task_rq(task) != rq ||
|
||||||
!cpumask_test_cpu(lowest_rq->cpu,
|
!cpumask_test_cpu(lowest_rq->cpu,
|
||||||
&task->cpus_allowed) ||
|
tsk_cpus_allowed(task)) ||
|
||||||
task_running(rq, task) ||
|
task_running(rq, task) ||
|
||||||
!task->on_rq)) {
|
!task->on_rq)) {
|
||||||
|
|
||||||
@ -1394,6 +1376,7 @@ static int push_rt_task(struct rq *rq)
|
|||||||
{
|
{
|
||||||
struct task_struct *next_task;
|
struct task_struct *next_task;
|
||||||
struct rq *lowest_rq;
|
struct rq *lowest_rq;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
if (!rq->rt.overloaded)
|
if (!rq->rt.overloaded)
|
||||||
return 0;
|
return 0;
|
||||||
@ -1426,7 +1409,7 @@ retry:
|
|||||||
if (!lowest_rq) {
|
if (!lowest_rq) {
|
||||||
struct task_struct *task;
|
struct task_struct *task;
|
||||||
/*
|
/*
|
||||||
* find lock_lowest_rq releases rq->lock
|
* find_lock_lowest_rq releases rq->lock
|
||||||
* so it is possible that next_task has migrated.
|
* so it is possible that next_task has migrated.
|
||||||
*
|
*
|
||||||
* We need to make sure that the task is still on the same
|
* We need to make sure that the task is still on the same
|
||||||
@ -1436,12 +1419,11 @@ retry:
|
|||||||
task = pick_next_pushable_task(rq);
|
task = pick_next_pushable_task(rq);
|
||||||
if (task_cpu(next_task) == rq->cpu && task == next_task) {
|
if (task_cpu(next_task) == rq->cpu && task == next_task) {
|
||||||
/*
|
/*
|
||||||
* If we get here, the task hasn't moved at all, but
|
* The task hasn't migrated, and is still the next
|
||||||
* it has failed to push. We will not try again,
|
* eligible task, but we failed to find a run-queue
|
||||||
* since the other cpus will pull from us when they
|
* to push it to. Do not retry in this case, since
|
||||||
* are ready.
|
* other cpus will pull from us when ready.
|
||||||
*/
|
*/
|
||||||
dequeue_pushable_task(rq, next_task);
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1460,6 +1442,7 @@ retry:
|
|||||||
deactivate_task(rq, next_task, 0);
|
deactivate_task(rq, next_task, 0);
|
||||||
set_task_cpu(next_task, lowest_rq->cpu);
|
set_task_cpu(next_task, lowest_rq->cpu);
|
||||||
activate_task(lowest_rq, next_task, 0);
|
activate_task(lowest_rq, next_task, 0);
|
||||||
|
ret = 1;
|
||||||
|
|
||||||
resched_task(lowest_rq->curr);
|
resched_task(lowest_rq->curr);
|
||||||
|
|
||||||
@ -1468,7 +1451,7 @@ retry:
|
|||||||
out:
|
out:
|
||||||
put_task_struct(next_task);
|
put_task_struct(next_task);
|
||||||
|
|
||||||
return 1;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void push_rt_tasks(struct rq *rq)
|
static void push_rt_tasks(struct rq *rq)
|
||||||
@ -1626,9 +1609,6 @@ static void set_cpus_allowed_rt(struct task_struct *p,
|
|||||||
|
|
||||||
update_rt_migration(&rq->rt);
|
update_rt_migration(&rq->rt);
|
||||||
}
|
}
|
||||||
|
|
||||||
cpumask_copy(&p->cpus_allowed, new_mask);
|
|
||||||
p->rt.nr_cpus_allowed = weight;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Assumes rq->lock is held */
|
/* Assumes rq->lock is held */
|
||||||
@ -1863,4 +1843,3 @@ static void print_rt_stats(struct seq_file *m, int cpu)
|
|||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_SCHED_DEBUG */
|
#endif /* CONFIG_SCHED_DEBUG */
|
||||||
|
|
||||||
|
@ -34,11 +34,13 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
|
|||||||
static void
|
static void
|
||||||
enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
|
enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
|
||||||
{
|
{
|
||||||
|
inc_nr_running(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
|
dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
|
||||||
{
|
{
|
||||||
|
dec_nr_running(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void yield_task_stop(struct rq *rq)
|
static void yield_task_stop(struct rq *rq)
|
||||||
|
@ -379,6 +379,16 @@ static struct ctl_table kern_table[] = {
|
|||||||
.extra2 = &one,
|
.extra2 = &one,
|
||||||
},
|
},
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_CFS_BANDWIDTH
|
||||||
|
{
|
||||||
|
.procname = "sched_cfs_bandwidth_slice_us",
|
||||||
|
.data = &sysctl_sched_cfs_bandwidth_slice,
|
||||||
|
.maxlen = sizeof(unsigned int),
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = proc_dointvec_minmax,
|
||||||
|
.extra1 = &one,
|
||||||
|
},
|
||||||
|
#endif
|
||||||
#ifdef CONFIG_PROVE_LOCKING
|
#ifdef CONFIG_PROVE_LOCKING
|
||||||
{
|
{
|
||||||
.procname = "prove_locking",
|
.procname = "prove_locking",
|
||||||
|
@ -276,7 +276,4 @@ config CORDIC
|
|||||||
so its calculations are in fixed point. Modules can select this
|
so its calculations are in fixed point. Modules can select this
|
||||||
when they require this function. Module will be called cordic.
|
when they require this function. Module will be called cordic.
|
||||||
|
|
||||||
config LLIST
|
|
||||||
bool
|
|
||||||
|
|
||||||
endmenu
|
endmenu
|
||||||
|
@ -22,7 +22,7 @@ lib-y += kobject.o kref.o klist.o
|
|||||||
obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
|
obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
|
||||||
bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
|
bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
|
||||||
string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \
|
string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \
|
||||||
bsearch.o find_last_bit.o find_next_bit.o
|
bsearch.o find_last_bit.o find_next_bit.o llist.o
|
||||||
obj-y += kstrtox.o
|
obj-y += kstrtox.o
|
||||||
obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
|
obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
|
||||||
|
|
||||||
@ -115,8 +115,6 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
|
|||||||
|
|
||||||
obj-$(CONFIG_CORDIC) += cordic.o
|
obj-$(CONFIG_CORDIC) += cordic.o
|
||||||
|
|
||||||
obj-$(CONFIG_LLIST) += llist.o
|
|
||||||
|
|
||||||
hostprogs-y := gen_crc32table
|
hostprogs-y := gen_crc32table
|
||||||
clean-files := crc32table.h
|
clean-files := crc32table.h
|
||||||
|
|
||||||
|
74
lib/llist.c
74
lib/llist.c
@ -3,8 +3,8 @@
|
|||||||
*
|
*
|
||||||
* The basic atomic operation of this list is cmpxchg on long. On
|
* The basic atomic operation of this list is cmpxchg on long. On
|
||||||
* architectures that don't have NMI-safe cmpxchg implementation, the
|
* architectures that don't have NMI-safe cmpxchg implementation, the
|
||||||
* list can NOT be used in NMI handler. So code uses the list in NMI
|
* list can NOT be used in NMI handlers. So code that uses the list in
|
||||||
* handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
|
* an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
|
||||||
*
|
*
|
||||||
* Copyright 2010,2011 Intel Corp.
|
* Copyright 2010,2011 Intel Corp.
|
||||||
* Author: Huang Ying <ying.huang@intel.com>
|
* Author: Huang Ying <ying.huang@intel.com>
|
||||||
@ -29,49 +29,29 @@
|
|||||||
|
|
||||||
#include <asm/system.h>
|
#include <asm/system.h>
|
||||||
|
|
||||||
/**
|
|
||||||
* llist_add - add a new entry
|
|
||||||
* @new: new entry to be added
|
|
||||||
* @head: the head for your lock-less list
|
|
||||||
*/
|
|
||||||
void llist_add(struct llist_node *new, struct llist_head *head)
|
|
||||||
{
|
|
||||||
struct llist_node *entry, *old_entry;
|
|
||||||
|
|
||||||
#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
|
|
||||||
BUG_ON(in_nmi());
|
|
||||||
#endif
|
|
||||||
|
|
||||||
entry = head->first;
|
|
||||||
do {
|
|
||||||
old_entry = entry;
|
|
||||||
new->next = entry;
|
|
||||||
cpu_relax();
|
|
||||||
} while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry);
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(llist_add);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* llist_add_batch - add several linked entries in batch
|
* llist_add_batch - add several linked entries in batch
|
||||||
* @new_first: first entry in batch to be added
|
* @new_first: first entry in batch to be added
|
||||||
* @new_last: last entry in batch to be added
|
* @new_last: last entry in batch to be added
|
||||||
* @head: the head for your lock-less list
|
* @head: the head for your lock-less list
|
||||||
|
*
|
||||||
|
* Return whether list is empty before adding.
|
||||||
*/
|
*/
|
||||||
void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
|
bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
|
||||||
struct llist_head *head)
|
struct llist_head *head)
|
||||||
{
|
{
|
||||||
struct llist_node *entry, *old_entry;
|
struct llist_node *entry, *old_entry;
|
||||||
|
|
||||||
#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
|
|
||||||
BUG_ON(in_nmi());
|
|
||||||
#endif
|
|
||||||
|
|
||||||
entry = head->first;
|
entry = head->first;
|
||||||
do {
|
for (;;) {
|
||||||
old_entry = entry;
|
old_entry = entry;
|
||||||
new_last->next = entry;
|
new_last->next = entry;
|
||||||
cpu_relax();
|
entry = cmpxchg(&head->first, old_entry, new_first);
|
||||||
} while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry);
|
if (entry == old_entry)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return old_entry == NULL;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(llist_add_batch);
|
EXPORT_SYMBOL_GPL(llist_add_batch);
|
||||||
|
|
||||||
@ -93,37 +73,17 @@ struct llist_node *llist_del_first(struct llist_head *head)
|
|||||||
{
|
{
|
||||||
struct llist_node *entry, *old_entry, *next;
|
struct llist_node *entry, *old_entry, *next;
|
||||||
|
|
||||||
#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
|
|
||||||
BUG_ON(in_nmi());
|
|
||||||
#endif
|
|
||||||
|
|
||||||
entry = head->first;
|
entry = head->first;
|
||||||
do {
|
for (;;) {
|
||||||
if (entry == NULL)
|
if (entry == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
old_entry = entry;
|
old_entry = entry;
|
||||||
next = entry->next;
|
next = entry->next;
|
||||||
cpu_relax();
|
entry = cmpxchg(&head->first, old_entry, next);
|
||||||
} while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry);
|
if (entry == old_entry)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
return entry;
|
return entry;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(llist_del_first);
|
EXPORT_SYMBOL_GPL(llist_del_first);
|
||||||
|
|
||||||
/**
|
|
||||||
* llist_del_all - delete all entries from lock-less list
|
|
||||||
* @head: the head of lock-less list to delete all entries
|
|
||||||
*
|
|
||||||
* If list is empty, return NULL, otherwise, delete all entries and
|
|
||||||
* return the pointer to the first entry. The order of entries
|
|
||||||
* deleted is from the newest to the oldest added one.
|
|
||||||
*/
|
|
||||||
struct llist_node *llist_del_all(struct llist_head *head)
|
|
||||||
{
|
|
||||||
#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
|
|
||||||
BUG_ON(in_nmi());
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return xchg(&head->first, NULL);
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(llist_del_all);
|
|
||||||
|
@ -22,7 +22,7 @@ notrace unsigned int debug_smp_processor_id(void)
|
|||||||
* Kernel threads bound to a single CPU can safely use
|
* Kernel threads bound to a single CPU can safely use
|
||||||
* smp_processor_id():
|
* smp_processor_id():
|
||||||
*/
|
*/
|
||||||
if (cpumask_equal(¤t->cpus_allowed, cpumask_of(this_cpu)))
|
if (cpumask_equal(tsk_cpus_allowed(current), cpumask_of(this_cpu)))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
Reference in New Issue
Block a user