Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler changes from Ingo Molnar:
"Main changes:
- scheduler side full-dynticks (user-space execution is undisturbed
and receives no timer IRQs) preparation changes that convert the
cputime accounting code to be full-dynticks ready, from Frederic
Weisbecker.
- Initial sched.h split-up changes, by Clark Williams
- select_idle_sibling() performance improvement by Mike Galbraith:
" 1 tbench pair (worst case) in a 10 core + SMT package:
pre 15.22 MB/sec 1 procs
post 252.01 MB/sec 1 procs "
- sched_rr_get_interval() ABI fix/change. We think this detail is not
used by apps (so it's not an ABI in practice), but lets keep it
under observation.
- misc RT scheduling cleanups, optimizations"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
sched/rt: Add <linux/sched/rt.h> header to <linux/init_task.h>
cputime: Remove irqsave from seqlock readers
sched, powerpc: Fix sched.h split-up build failure
cputime: Restore CPU_ACCOUNTING config defaults for PPC64
sched/rt: Move rt specific bits into new header file
sched/rt: Add a tuning knob to allow changing SCHED_RR timeslice
sched: Move sched.h sysctl bits into separate header
sched: Fix signedness bug in yield_to()
sched: Fix select_idle_sibling() bouncing cow syndrome
sched/rt: Further simplify pick_rt_task()
sched/rt: Do not account zero delta_exec in update_curr_rt()
cputime: Safely read cputime of full dynticks CPUs
kvm: Prepare to add generic guest entry/exit callbacks
cputime: Use accessors to read task cputime stats
cputime: Allow dynamic switch between tick/virtual based cputime accounting
cputime: Generic on-demand virtual cputime accounting
cputime: Move default nsecs_to_cputime() to jiffies based cputime file
cputime: Librarize per nsecs resolution cputime definitions
cputime: Avoid multiplication overflow on utime scaling
context_tracking: Export context state for generic vtime
...
Fix up conflict in kernel/context_tracking.c due to comment additions.
This commit is contained in:
@@ -4,66 +4,12 @@
|
||||
#include <linux/time.h>
|
||||
#include <linux/jiffies.h>
|
||||
|
||||
typedef unsigned long __nocast cputime_t;
|
||||
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
|
||||
# include <asm-generic/cputime_jiffies.h>
|
||||
#endif
|
||||
|
||||
#define cputime_one_jiffy jiffies_to_cputime(1)
|
||||
#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct)
|
||||
#define cputime_to_scaled(__ct) (__ct)
|
||||
#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz)
|
||||
|
||||
typedef u64 __nocast cputime64_t;
|
||||
|
||||
#define cputime64_to_jiffies64(__ct) (__force u64)(__ct)
|
||||
#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif)
|
||||
|
||||
#define nsecs_to_cputime64(__ct) \
|
||||
jiffies64_to_cputime64(nsecs_to_jiffies64(__ct))
|
||||
|
||||
|
||||
/*
|
||||
* Convert cputime to microseconds and back.
|
||||
*/
|
||||
#define cputime_to_usecs(__ct) \
|
||||
jiffies_to_usecs(cputime_to_jiffies(__ct))
|
||||
#define usecs_to_cputime(__usec) \
|
||||
jiffies_to_cputime(usecs_to_jiffies(__usec))
|
||||
#define usecs_to_cputime64(__usec) \
|
||||
jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
|
||||
|
||||
/*
|
||||
* Convert cputime to seconds and back.
|
||||
*/
|
||||
#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ)
|
||||
#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ)
|
||||
|
||||
/*
|
||||
* Convert cputime to timespec and back.
|
||||
*/
|
||||
#define timespec_to_cputime(__val) \
|
||||
jiffies_to_cputime(timespec_to_jiffies(__val))
|
||||
#define cputime_to_timespec(__ct,__val) \
|
||||
jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
|
||||
|
||||
/*
|
||||
* Convert cputime to timeval and back.
|
||||
*/
|
||||
#define timeval_to_cputime(__val) \
|
||||
jiffies_to_cputime(timeval_to_jiffies(__val))
|
||||
#define cputime_to_timeval(__ct,__val) \
|
||||
jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
|
||||
|
||||
/*
|
||||
* Convert cputime to clock and back.
|
||||
*/
|
||||
#define cputime_to_clock_t(__ct) \
|
||||
jiffies_to_clock_t(cputime_to_jiffies(__ct))
|
||||
#define clock_t_to_cputime(__x) \
|
||||
jiffies_to_cputime(clock_t_to_jiffies(__x))
|
||||
|
||||
/*
|
||||
* Convert cputime64 to clock.
|
||||
*/
|
||||
#define cputime64_to_clock_t(__ct) \
|
||||
jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
||||
# include <asm-generic/cputime_nsecs.h>
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
72
include/asm-generic/cputime_jiffies.h
Normal file
72
include/asm-generic/cputime_jiffies.h
Normal file
@@ -0,0 +1,72 @@
|
||||
#ifndef _ASM_GENERIC_CPUTIME_JIFFIES_H
|
||||
#define _ASM_GENERIC_CPUTIME_JIFFIES_H
|
||||
|
||||
typedef unsigned long __nocast cputime_t;
|
||||
|
||||
#define cputime_one_jiffy jiffies_to_cputime(1)
|
||||
#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct)
|
||||
#define cputime_to_scaled(__ct) (__ct)
|
||||
#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz)
|
||||
|
||||
typedef u64 __nocast cputime64_t;
|
||||
|
||||
#define cputime64_to_jiffies64(__ct) (__force u64)(__ct)
|
||||
#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif)
|
||||
|
||||
|
||||
/*
|
||||
* Convert nanoseconds to cputime
|
||||
*/
|
||||
#define nsecs_to_cputime64(__nsec) \
|
||||
jiffies64_to_cputime64(nsecs_to_jiffies64(__nsec))
|
||||
#define nsecs_to_cputime(__nsec) \
|
||||
jiffies_to_cputime(nsecs_to_jiffies(__nsec))
|
||||
|
||||
|
||||
/*
|
||||
* Convert cputime to microseconds and back.
|
||||
*/
|
||||
#define cputime_to_usecs(__ct) \
|
||||
jiffies_to_usecs(cputime_to_jiffies(__ct))
|
||||
#define usecs_to_cputime(__usec) \
|
||||
jiffies_to_cputime(usecs_to_jiffies(__usec))
|
||||
#define usecs_to_cputime64(__usec) \
|
||||
jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
|
||||
|
||||
/*
|
||||
* Convert cputime to seconds and back.
|
||||
*/
|
||||
#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ)
|
||||
#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ)
|
||||
|
||||
/*
|
||||
* Convert cputime to timespec and back.
|
||||
*/
|
||||
#define timespec_to_cputime(__val) \
|
||||
jiffies_to_cputime(timespec_to_jiffies(__val))
|
||||
#define cputime_to_timespec(__ct,__val) \
|
||||
jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
|
||||
|
||||
/*
|
||||
* Convert cputime to timeval and back.
|
||||
*/
|
||||
#define timeval_to_cputime(__val) \
|
||||
jiffies_to_cputime(timeval_to_jiffies(__val))
|
||||
#define cputime_to_timeval(__ct,__val) \
|
||||
jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
|
||||
|
||||
/*
|
||||
* Convert cputime to clock and back.
|
||||
*/
|
||||
#define cputime_to_clock_t(__ct) \
|
||||
jiffies_to_clock_t(cputime_to_jiffies(__ct))
|
||||
#define clock_t_to_cputime(__x) \
|
||||
jiffies_to_cputime(clock_t_to_jiffies(__x))
|
||||
|
||||
/*
|
||||
* Convert cputime64 to clock.
|
||||
*/
|
||||
#define cputime64_to_clock_t(__ct) \
|
||||
jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
|
||||
|
||||
#endif
|
||||
104
include/asm-generic/cputime_nsecs.h
Normal file
104
include/asm-generic/cputime_nsecs.h
Normal file
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Definitions for measuring cputime in nsecs resolution.
|
||||
*
|
||||
* Based on <arch/ia64/include/asm/cputime.h>
|
||||
*
|
||||
* Copyright (C) 2007 FUJITSU LIMITED
|
||||
* Copyright (C) 2007 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _ASM_GENERIC_CPUTIME_NSECS_H
|
||||
#define _ASM_GENERIC_CPUTIME_NSECS_H
|
||||
|
||||
typedef u64 __nocast cputime_t;
|
||||
typedef u64 __nocast cputime64_t;
|
||||
|
||||
#define cputime_one_jiffy jiffies_to_cputime(1)
|
||||
|
||||
/*
|
||||
* Convert cputime <-> jiffies (HZ)
|
||||
*/
|
||||
#define cputime_to_jiffies(__ct) \
|
||||
((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
|
||||
#define cputime_to_scaled(__ct) (__ct)
|
||||
#define jiffies_to_cputime(__jif) \
|
||||
(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
|
||||
#define cputime64_to_jiffies64(__ct) \
|
||||
((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
|
||||
#define jiffies64_to_cputime64(__jif) \
|
||||
(__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
|
||||
|
||||
|
||||
/*
|
||||
* Convert cputime <-> nanoseconds
|
||||
*/
|
||||
#define nsecs_to_cputime(__nsecs) ((__force u64)(__nsecs))
|
||||
|
||||
|
||||
/*
|
||||
* Convert cputime <-> microseconds
|
||||
*/
|
||||
#define cputime_to_usecs(__ct) \
|
||||
((__force u64)(__ct) / NSEC_PER_USEC)
|
||||
#define usecs_to_cputime(__usecs) \
|
||||
(__force cputime_t)((__usecs) * NSEC_PER_USEC)
|
||||
#define usecs_to_cputime64(__usecs) \
|
||||
(__force cputime64_t)((__usecs) * NSEC_PER_USEC)
|
||||
|
||||
/*
|
||||
* Convert cputime <-> seconds
|
||||
*/
|
||||
#define cputime_to_secs(__ct) \
|
||||
((__force u64)(__ct) / NSEC_PER_SEC)
|
||||
#define secs_to_cputime(__secs) \
|
||||
(__force cputime_t)((__secs) * NSEC_PER_SEC)
|
||||
|
||||
/*
|
||||
* Convert cputime <-> timespec (nsec)
|
||||
*/
|
||||
static inline cputime_t timespec_to_cputime(const struct timespec *val)
|
||||
{
|
||||
u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
|
||||
return (__force cputime_t) ret;
|
||||
}
|
||||
static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
|
||||
{
|
||||
val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
|
||||
val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert cputime <-> timeval (msec)
|
||||
*/
|
||||
static inline cputime_t timeval_to_cputime(struct timeval *val)
|
||||
{
|
||||
u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
|
||||
return (__force cputime_t) ret;
|
||||
}
|
||||
static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
|
||||
{
|
||||
val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
|
||||
val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert cputime <-> clock (USER_HZ)
|
||||
*/
|
||||
#define cputime_to_clock_t(__ct) \
|
||||
((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
|
||||
#define clock_t_to_cputime(__x) \
|
||||
(__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
|
||||
|
||||
/*
|
||||
* Convert cputime64 to clock.
|
||||
*/
|
||||
#define cputime64_to_clock_t(__ct) \
|
||||
cputime_to_clock_t((__force cputime_t)__ct)
|
||||
|
||||
#endif
|
||||
@@ -3,12 +3,40 @@
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
#include <linux/sched.h>
|
||||
#include <linux/percpu.h>
|
||||
|
||||
struct context_tracking {
|
||||
/*
|
||||
* When active is false, probes are unset in order
|
||||
* to minimize overhead: TIF flags are cleared
|
||||
* and calls to user_enter/exit are ignored. This
|
||||
* may be further optimized using static keys.
|
||||
*/
|
||||
bool active;
|
||||
enum {
|
||||
IN_KERNEL = 0,
|
||||
IN_USER,
|
||||
} state;
|
||||
};
|
||||
|
||||
DECLARE_PER_CPU(struct context_tracking, context_tracking);
|
||||
|
||||
static inline bool context_tracking_in_user(void)
|
||||
{
|
||||
return __this_cpu_read(context_tracking.state) == IN_USER;
|
||||
}
|
||||
|
||||
static inline bool context_tracking_active(void)
|
||||
{
|
||||
return __this_cpu_read(context_tracking.active);
|
||||
}
|
||||
|
||||
extern void user_enter(void);
|
||||
extern void user_exit(void);
|
||||
extern void context_tracking_task_switch(struct task_struct *prev,
|
||||
struct task_struct *next);
|
||||
#else
|
||||
static inline bool context_tracking_in_user(void) { return false; }
|
||||
static inline void user_enter(void) { }
|
||||
static inline void user_exit(void) { }
|
||||
static inline void context_tracking_task_switch(struct task_struct *prev,
|
||||
|
||||
@@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void);
|
||||
*/
|
||||
#define __irq_enter() \
|
||||
do { \
|
||||
vtime_account_irq_enter(current); \
|
||||
account_irq_enter_time(current); \
|
||||
add_preempt_count(HARDIRQ_OFFSET); \
|
||||
trace_hardirq_enter(); \
|
||||
} while (0)
|
||||
@@ -169,7 +169,7 @@ extern void irq_enter(void);
|
||||
#define __irq_exit() \
|
||||
do { \
|
||||
trace_hardirq_exit(); \
|
||||
vtime_account_irq_exit(current); \
|
||||
account_irq_exit_time(current); \
|
||||
sub_preempt_count(HARDIRQ_OFFSET); \
|
||||
} while (0)
|
||||
|
||||
|
||||
@@ -10,7 +10,9 @@
|
||||
#include <linux/pid_namespace.h>
|
||||
#include <linux/user_namespace.h>
|
||||
#include <linux/securebits.h>
|
||||
#include <linux/seqlock.h>
|
||||
#include <net/net_namespace.h>
|
||||
#include <linux/sched/rt.h>
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
# define INIT_PUSHABLE_TASKS(tsk) \
|
||||
@@ -141,6 +143,15 @@ extern struct task_group root_task_group;
|
||||
# define INIT_PERF_EVENTS(tsk)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
||||
# define INIT_VTIME(tsk) \
|
||||
.vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \
|
||||
.vtime_snap = 0, \
|
||||
.vtime_snap_whence = VTIME_SYS,
|
||||
#else
|
||||
# define INIT_VTIME(tsk)
|
||||
#endif
|
||||
|
||||
#define INIT_TASK_COMM "swapper"
|
||||
|
||||
/*
|
||||
@@ -210,6 +221,7 @@ extern struct task_group root_task_group;
|
||||
INIT_TRACE_RECURSION \
|
||||
INIT_TASK_RCU_PREEMPT(tsk) \
|
||||
INIT_CPUSET_SEQ \
|
||||
INIT_VTIME(tsk) \
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t)
|
||||
extern void account_steal_time(cputime_t);
|
||||
extern void account_idle_time(cputime_t);
|
||||
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
|
||||
static inline void account_process_tick(struct task_struct *tsk, int user)
|
||||
{
|
||||
vtime_account_user(tsk);
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/irqflags.h>
|
||||
#include <asm/signal.h>
|
||||
|
||||
#include <linux/kvm.h>
|
||||
@@ -740,15 +741,52 @@ static inline int kvm_deassign_device(struct kvm *kvm,
|
||||
}
|
||||
#endif /* CONFIG_IOMMU_API */
|
||||
|
||||
static inline void kvm_guest_enter(void)
|
||||
static inline void __guest_enter(void)
|
||||
{
|
||||
BUG_ON(preemptible());
|
||||
/*
|
||||
* This is running in ioctl context so we can avoid
|
||||
* the call to vtime_account() with its unnecessary idle check.
|
||||
*/
|
||||
vtime_account_system_irqsafe(current);
|
||||
vtime_account_system(current);
|
||||
current->flags |= PF_VCPU;
|
||||
}
|
||||
|
||||
static inline void __guest_exit(void)
|
||||
{
|
||||
/*
|
||||
* This is running in ioctl context so we can avoid
|
||||
* the call to vtime_account() with its unnecessary idle check.
|
||||
*/
|
||||
vtime_account_system(current);
|
||||
current->flags &= ~PF_VCPU;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
extern void guest_enter(void);
|
||||
extern void guest_exit(void);
|
||||
|
||||
#else /* !CONFIG_CONTEXT_TRACKING */
|
||||
static inline void guest_enter(void)
|
||||
{
|
||||
__guest_enter();
|
||||
}
|
||||
|
||||
static inline void guest_exit(void)
|
||||
{
|
||||
__guest_exit();
|
||||
}
|
||||
#endif /* !CONFIG_CONTEXT_TRACKING */
|
||||
|
||||
static inline void kvm_guest_enter(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
BUG_ON(preemptible());
|
||||
|
||||
local_irq_save(flags);
|
||||
guest_enter();
|
||||
local_irq_restore(flags);
|
||||
|
||||
/* KVM does not hold any references to rcu protected data when it
|
||||
* switches CPU into a guest mode. In fact switching to a guest mode
|
||||
* is very similar to exiting to userspase from rcu point of view. In
|
||||
@@ -761,12 +799,11 @@ static inline void kvm_guest_enter(void)
|
||||
|
||||
static inline void kvm_guest_exit(void)
|
||||
{
|
||||
/*
|
||||
* This is running in ioctl context so we can avoid
|
||||
* the call to vtime_account() with its unnecessary idle check.
|
||||
*/
|
||||
vtime_account_system_irqsafe(current);
|
||||
current->flags &= ~PF_VCPU;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
guest_exit();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -304,19 +304,6 @@ static inline void lockup_detector_init(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DETECT_HUNG_TASK
|
||||
extern unsigned int sysctl_hung_task_panic;
|
||||
extern unsigned long sysctl_hung_task_check_count;
|
||||
extern unsigned long sysctl_hung_task_timeout_secs;
|
||||
extern unsigned long sysctl_hung_task_warnings;
|
||||
extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
|
||||
void __user *buffer,
|
||||
size_t *lenp, loff_t *ppos);
|
||||
#else
|
||||
/* Avoid need for ifdefs elsewhere in the code */
|
||||
enum { sysctl_hung_task_timeout_secs = 0 };
|
||||
#endif
|
||||
|
||||
/* Attach to any functions which should be ignored in wchan output. */
|
||||
#define __sched __attribute__((__section__(".sched.text")))
|
||||
|
||||
@@ -338,23 +325,6 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
|
||||
struct nsproxy;
|
||||
struct user_namespace;
|
||||
|
||||
/*
|
||||
* Default maximum number of active map areas, this limits the number of vmas
|
||||
* per mm struct. Users can overwrite this number by sysctl but there is a
|
||||
* problem.
|
||||
*
|
||||
* When a program's coredump is generated as ELF format, a section is created
|
||||
* per a vma. In ELF, the number of sections is represented in unsigned short.
|
||||
* This means the number of sections should be smaller than 65535 at coredump.
|
||||
* Because the kernel adds some informative sections to a image of program at
|
||||
* generating coredump, we need some margin. The number of extra sections is
|
||||
* 1-3 now and depends on arch. We use "5" as safe margin, here.
|
||||
*/
|
||||
#define MAPCOUNT_ELF_CORE_MARGIN (5)
|
||||
#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
|
||||
|
||||
extern int sysctl_max_map_count;
|
||||
|
||||
#include <linux/aio.h>
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
@@ -1194,6 +1164,7 @@ struct sched_entity {
|
||||
/* rq "owned" by this entity/group: */
|
||||
struct cfs_rq *my_q;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
|
||||
* removed when useful for applications beyond shares distribution (e.g.
|
||||
@@ -1208,6 +1179,7 @@ struct sched_entity {
|
||||
struct sched_rt_entity {
|
||||
struct list_head run_list;
|
||||
unsigned long timeout;
|
||||
unsigned long watchdog_stamp;
|
||||
unsigned int time_slice;
|
||||
|
||||
struct sched_rt_entity *back;
|
||||
@@ -1220,11 +1192,6 @@ struct sched_rt_entity {
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* default timeslice is 100 msecs (used only for SCHED_RR tasks).
|
||||
* Timeslices get refilled after they expire.
|
||||
*/
|
||||
#define RR_TIMESLICE (100 * HZ / 1000)
|
||||
|
||||
struct rcu_node;
|
||||
|
||||
@@ -1367,6 +1334,15 @@ struct task_struct {
|
||||
cputime_t gtime;
|
||||
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
|
||||
struct cputime prev_cputime;
|
||||
#endif
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
||||
seqlock_t vtime_seqlock;
|
||||
unsigned long long vtime_snap;
|
||||
enum {
|
||||
VTIME_SLEEPING = 0,
|
||||
VTIME_USER,
|
||||
VTIME_SYS,
|
||||
} vtime_snap_whence;
|
||||
#endif
|
||||
unsigned long nvcsw, nivcsw; /* context switch counts */
|
||||
struct timespec start_time; /* monotonic time */
|
||||
@@ -1622,37 +1598,6 @@ static inline void set_numabalancing_state(bool enabled)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Priority of a process goes from 0..MAX_PRIO-1, valid RT
|
||||
* priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
|
||||
* tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
|
||||
* values are inverted: lower p->prio value means higher priority.
|
||||
*
|
||||
* The MAX_USER_RT_PRIO value allows the actual maximum
|
||||
* RT priority to be separate from the value exported to
|
||||
* user-space. This allows kernel threads to set their
|
||||
* priority to a value higher than any user task. Note:
|
||||
* MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
|
||||
*/
|
||||
|
||||
#define MAX_USER_RT_PRIO 100
|
||||
#define MAX_RT_PRIO MAX_USER_RT_PRIO
|
||||
|
||||
#define MAX_PRIO (MAX_RT_PRIO + 40)
|
||||
#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
|
||||
|
||||
static inline int rt_prio(int prio)
|
||||
{
|
||||
if (unlikely(prio < MAX_RT_PRIO))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int rt_task(struct task_struct *p)
|
||||
{
|
||||
return rt_prio(p->prio);
|
||||
}
|
||||
|
||||
static inline struct pid *task_pid(struct task_struct *task)
|
||||
{
|
||||
return task->pids[PIDTYPE_PID].pid;
|
||||
@@ -1792,6 +1737,37 @@ static inline void put_task_struct(struct task_struct *t)
|
||||
__put_task_struct(t);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
||||
extern void task_cputime(struct task_struct *t,
|
||||
cputime_t *utime, cputime_t *stime);
|
||||
extern void task_cputime_scaled(struct task_struct *t,
|
||||
cputime_t *utimescaled, cputime_t *stimescaled);
|
||||
extern cputime_t task_gtime(struct task_struct *t);
|
||||
#else
|
||||
static inline void task_cputime(struct task_struct *t,
|
||||
cputime_t *utime, cputime_t *stime)
|
||||
{
|
||||
if (utime)
|
||||
*utime = t->utime;
|
||||
if (stime)
|
||||
*stime = t->stime;
|
||||
}
|
||||
|
||||
static inline void task_cputime_scaled(struct task_struct *t,
|
||||
cputime_t *utimescaled,
|
||||
cputime_t *stimescaled)
|
||||
{
|
||||
if (utimescaled)
|
||||
*utimescaled = t->utimescaled;
|
||||
if (stimescaled)
|
||||
*stimescaled = t->stimescaled;
|
||||
}
|
||||
|
||||
static inline cputime_t task_gtime(struct task_struct *t)
|
||||
{
|
||||
return t->gtime;
|
||||
}
|
||||
#endif
|
||||
extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
|
||||
extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
|
||||
|
||||
@@ -2033,58 +2009,7 @@ extern void wake_up_idle_cpu(int cpu);
|
||||
static inline void wake_up_idle_cpu(int cpu) { }
|
||||
#endif
|
||||
|
||||
extern unsigned int sysctl_sched_latency;
|
||||
extern unsigned int sysctl_sched_min_granularity;
|
||||
extern unsigned int sysctl_sched_wakeup_granularity;
|
||||
extern unsigned int sysctl_sched_child_runs_first;
|
||||
|
||||
enum sched_tunable_scaling {
|
||||
SCHED_TUNABLESCALING_NONE,
|
||||
SCHED_TUNABLESCALING_LOG,
|
||||
SCHED_TUNABLESCALING_LINEAR,
|
||||
SCHED_TUNABLESCALING_END,
|
||||
};
|
||||
extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
|
||||
|
||||
extern unsigned int sysctl_numa_balancing_scan_delay;
|
||||
extern unsigned int sysctl_numa_balancing_scan_period_min;
|
||||
extern unsigned int sysctl_numa_balancing_scan_period_max;
|
||||
extern unsigned int sysctl_numa_balancing_scan_period_reset;
|
||||
extern unsigned int sysctl_numa_balancing_scan_size;
|
||||
extern unsigned int sysctl_numa_balancing_settle_count;
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
extern unsigned int sysctl_sched_migration_cost;
|
||||
extern unsigned int sysctl_sched_nr_migrate;
|
||||
extern unsigned int sysctl_sched_time_avg;
|
||||
extern unsigned int sysctl_timer_migration;
|
||||
extern unsigned int sysctl_sched_shares_window;
|
||||
|
||||
int sched_proc_update_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *length,
|
||||
loff_t *ppos);
|
||||
#endif
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
static inline unsigned int get_sysctl_timer_migration(void)
|
||||
{
|
||||
return sysctl_timer_migration;
|
||||
}
|
||||
#else
|
||||
static inline unsigned int get_sysctl_timer_migration(void)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
extern unsigned int sysctl_sched_rt_period;
|
||||
extern int sysctl_sched_rt_runtime;
|
||||
|
||||
int sched_rt_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos);
|
||||
|
||||
#ifdef CONFIG_SCHED_AUTOGROUP
|
||||
extern unsigned int sysctl_sched_autogroup_enabled;
|
||||
|
||||
extern void sched_autogroup_create_attach(struct task_struct *p);
|
||||
extern void sched_autogroup_detach(struct task_struct *p);
|
||||
extern void sched_autogroup_fork(struct signal_struct *sig);
|
||||
@@ -2100,30 +2025,6 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
|
||||
static inline void sched_autogroup_exit(struct signal_struct *sig) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CFS_BANDWIDTH
|
||||
extern unsigned int sysctl_sched_cfs_bandwidth_slice;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RT_MUTEXES
|
||||
extern int rt_mutex_getprio(struct task_struct *p);
|
||||
extern void rt_mutex_setprio(struct task_struct *p, int prio);
|
||||
extern void rt_mutex_adjust_pi(struct task_struct *p);
|
||||
static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
|
||||
{
|
||||
return tsk->pi_blocked_on != NULL;
|
||||
}
|
||||
#else
|
||||
static inline int rt_mutex_getprio(struct task_struct *p)
|
||||
{
|
||||
return p->normal_prio;
|
||||
}
|
||||
# define rt_mutex_adjust_pi(p) do { } while (0)
|
||||
static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern bool yield_to(struct task_struct *p, bool preempt);
|
||||
extern void set_user_nice(struct task_struct *p, long nice);
|
||||
extern int task_prio(const struct task_struct *p);
|
||||
@@ -2753,8 +2654,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
|
||||
extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
|
||||
extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
|
||||
|
||||
extern void normalize_rt_tasks(void);
|
||||
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
|
||||
extern struct task_group root_task_group;
|
||||
|
||||
58
include/linux/sched/rt.h
Normal file
58
include/linux/sched/rt.h
Normal file
@@ -0,0 +1,58 @@
|
||||
#ifndef _SCHED_RT_H
|
||||
#define _SCHED_RT_H
|
||||
|
||||
/*
|
||||
* Priority of a process goes from 0..MAX_PRIO-1, valid RT
|
||||
* priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
|
||||
* tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
|
||||
* values are inverted: lower p->prio value means higher priority.
|
||||
*
|
||||
* The MAX_USER_RT_PRIO value allows the actual maximum
|
||||
* RT priority to be separate from the value exported to
|
||||
* user-space. This allows kernel threads to set their
|
||||
* priority to a value higher than any user task. Note:
|
||||
* MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
|
||||
*/
|
||||
|
||||
#define MAX_USER_RT_PRIO 100
|
||||
#define MAX_RT_PRIO MAX_USER_RT_PRIO
|
||||
|
||||
#define MAX_PRIO (MAX_RT_PRIO + 40)
|
||||
#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
|
||||
|
||||
static inline int rt_prio(int prio)
|
||||
{
|
||||
if (unlikely(prio < MAX_RT_PRIO))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int rt_task(struct task_struct *p)
|
||||
{
|
||||
return rt_prio(p->prio);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RT_MUTEXES
|
||||
extern int rt_mutex_getprio(struct task_struct *p);
|
||||
extern void rt_mutex_setprio(struct task_struct *p, int prio);
|
||||
extern void rt_mutex_adjust_pi(struct task_struct *p);
|
||||
static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
|
||||
{
|
||||
return tsk->pi_blocked_on != NULL;
|
||||
}
|
||||
#else
|
||||
static inline int rt_mutex_getprio(struct task_struct *p)
|
||||
{
|
||||
return p->normal_prio;
|
||||
}
|
||||
# define rt_mutex_adjust_pi(p) do { } while (0)
|
||||
static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern void normalize_rt_tasks(void);
|
||||
|
||||
|
||||
#endif /* _SCHED_RT_H */
|
||||
110
include/linux/sched/sysctl.h
Normal file
110
include/linux/sched/sysctl.h
Normal file
@@ -0,0 +1,110 @@
|
||||
#ifndef _SCHED_SYSCTL_H
|
||||
#define _SCHED_SYSCTL_H
|
||||
|
||||
#ifdef CONFIG_DETECT_HUNG_TASK
|
||||
extern unsigned int sysctl_hung_task_panic;
|
||||
extern unsigned long sysctl_hung_task_check_count;
|
||||
extern unsigned long sysctl_hung_task_timeout_secs;
|
||||
extern unsigned long sysctl_hung_task_warnings;
|
||||
extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
|
||||
void __user *buffer,
|
||||
size_t *lenp, loff_t *ppos);
|
||||
#else
|
||||
/* Avoid need for ifdefs elsewhere in the code */
|
||||
enum { sysctl_hung_task_timeout_secs = 0 };
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Default maximum number of active map areas, this limits the number of vmas
|
||||
* per mm struct. Users can overwrite this number by sysctl but there is a
|
||||
* problem.
|
||||
*
|
||||
* When a program's coredump is generated as ELF format, a section is created
|
||||
* per a vma. In ELF, the number of sections is represented in unsigned short.
|
||||
* This means the number of sections should be smaller than 65535 at coredump.
|
||||
* Because the kernel adds some informative sections to a image of program at
|
||||
* generating coredump, we need some margin. The number of extra sections is
|
||||
* 1-3 now and depends on arch. We use "5" as safe margin, here.
|
||||
*/
|
||||
#define MAPCOUNT_ELF_CORE_MARGIN (5)
|
||||
#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
|
||||
|
||||
extern int sysctl_max_map_count;
|
||||
|
||||
extern unsigned int sysctl_sched_latency;
|
||||
extern unsigned int sysctl_sched_min_granularity;
|
||||
extern unsigned int sysctl_sched_wakeup_granularity;
|
||||
extern unsigned int sysctl_sched_child_runs_first;
|
||||
|
||||
enum sched_tunable_scaling {
|
||||
SCHED_TUNABLESCALING_NONE,
|
||||
SCHED_TUNABLESCALING_LOG,
|
||||
SCHED_TUNABLESCALING_LINEAR,
|
||||
SCHED_TUNABLESCALING_END,
|
||||
};
|
||||
extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
|
||||
|
||||
extern unsigned int sysctl_numa_balancing_scan_delay;
|
||||
extern unsigned int sysctl_numa_balancing_scan_period_min;
|
||||
extern unsigned int sysctl_numa_balancing_scan_period_max;
|
||||
extern unsigned int sysctl_numa_balancing_scan_period_reset;
|
||||
extern unsigned int sysctl_numa_balancing_scan_size;
|
||||
extern unsigned int sysctl_numa_balancing_settle_count;
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
extern unsigned int sysctl_sched_migration_cost;
|
||||
extern unsigned int sysctl_sched_nr_migrate;
|
||||
extern unsigned int sysctl_sched_time_avg;
|
||||
extern unsigned int sysctl_timer_migration;
|
||||
extern unsigned int sysctl_sched_shares_window;
|
||||
|
||||
int sched_proc_update_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *length,
|
||||
loff_t *ppos);
|
||||
#endif
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
static inline unsigned int get_sysctl_timer_migration(void)
|
||||
{
|
||||
return sysctl_timer_migration;
|
||||
}
|
||||
#else
|
||||
static inline unsigned int get_sysctl_timer_migration(void)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* control realtime throttling:
|
||||
*
|
||||
* /proc/sys/kernel/sched_rt_period_us
|
||||
* /proc/sys/kernel/sched_rt_runtime_us
|
||||
*/
|
||||
extern unsigned int sysctl_sched_rt_period;
|
||||
extern int sysctl_sched_rt_runtime;
|
||||
|
||||
#ifdef CONFIG_CFS_BANDWIDTH
|
||||
extern unsigned int sysctl_sched_cfs_bandwidth_slice;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_AUTOGROUP
|
||||
extern unsigned int sysctl_sched_autogroup_enabled;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* default timeslice is 100 msecs (used only for SCHED_RR tasks).
|
||||
* Timeslices get refilled after they expire.
|
||||
*/
|
||||
#define RR_TIMESLICE (100 * HZ / 1000)
|
||||
|
||||
extern int sched_rr_timeslice;
|
||||
|
||||
extern int sched_rr_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos);
|
||||
|
||||
extern int sched_rt_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos);
|
||||
|
||||
#endif /* _SCHED_SYSCTL_H */
|
||||
@@ -23,12 +23,15 @@ static inline void bacct_add_tsk(struct user_namespace *user_ns,
|
||||
#ifdef CONFIG_TASK_XACCT
|
||||
extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p);
|
||||
extern void acct_update_integrals(struct task_struct *tsk);
|
||||
extern void acct_account_cputime(struct task_struct *tsk);
|
||||
extern void acct_clear_integrals(struct task_struct *tsk);
|
||||
#else
|
||||
static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
|
||||
{}
|
||||
static inline void acct_update_integrals(struct task_struct *tsk)
|
||||
{}
|
||||
static inline void acct_account_cputime(struct task_struct *tsk)
|
||||
{}
|
||||
static inline void acct_clear_integrals(struct task_struct *tsk)
|
||||
{}
|
||||
#endif /* CONFIG_TASK_XACCT */
|
||||
|
||||
@@ -6,15 +6,46 @@ struct task_struct;
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
||||
extern void vtime_task_switch(struct task_struct *prev);
|
||||
extern void vtime_account_system(struct task_struct *tsk);
|
||||
extern void vtime_account_system_irqsafe(struct task_struct *tsk);
|
||||
extern void vtime_account_idle(struct task_struct *tsk);
|
||||
extern void vtime_account_user(struct task_struct *tsk);
|
||||
extern void vtime_account(struct task_struct *tsk);
|
||||
#else
|
||||
extern void vtime_account_irq_enter(struct task_struct *tsk);
|
||||
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
|
||||
static inline bool vtime_accounting_enabled(void) { return true; }
|
||||
#endif
|
||||
|
||||
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
|
||||
|
||||
static inline void vtime_task_switch(struct task_struct *prev) { }
|
||||
static inline void vtime_account_system(struct task_struct *tsk) { }
|
||||
static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
|
||||
static inline void vtime_account(struct task_struct *tsk) { }
|
||||
static inline void vtime_account_user(struct task_struct *tsk) { }
|
||||
static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
|
||||
static inline bool vtime_accounting_enabled(void) { return false; }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
||||
extern void arch_vtime_task_switch(struct task_struct *tsk);
|
||||
extern void vtime_account_irq_exit(struct task_struct *tsk);
|
||||
extern bool vtime_accounting_enabled(void);
|
||||
extern void vtime_user_enter(struct task_struct *tsk);
|
||||
static inline void vtime_user_exit(struct task_struct *tsk)
|
||||
{
|
||||
vtime_account_user(tsk);
|
||||
}
|
||||
extern void vtime_guest_enter(struct task_struct *tsk);
|
||||
extern void vtime_guest_exit(struct task_struct *tsk);
|
||||
extern void vtime_init_idle(struct task_struct *tsk);
|
||||
#else
|
||||
static inline void vtime_account_irq_exit(struct task_struct *tsk)
|
||||
{
|
||||
/* On hard|softirq exit we always account to hard|softirq cputime */
|
||||
vtime_account_system(tsk);
|
||||
}
|
||||
static inline void vtime_user_enter(struct task_struct *tsk) { }
|
||||
static inline void vtime_user_exit(struct task_struct *tsk) { }
|
||||
static inline void vtime_guest_enter(struct task_struct *tsk) { }
|
||||
static inline void vtime_guest_exit(struct task_struct *tsk) { }
|
||||
static inline void vtime_init_idle(struct task_struct *tsk) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||
@@ -23,25 +54,15 @@ extern void irqtime_account_irq(struct task_struct *tsk);
|
||||
static inline void irqtime_account_irq(struct task_struct *tsk) { }
|
||||
#endif
|
||||
|
||||
static inline void vtime_account_irq_enter(struct task_struct *tsk)
|
||||
static inline void account_irq_enter_time(struct task_struct *tsk)
|
||||
{
|
||||
/*
|
||||
* Hardirq can interrupt idle task anytime. So we need vtime_account()
|
||||
* that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING.
|
||||
* Softirq can also interrupt idle task directly if it calls
|
||||
* local_bh_enable(). Such case probably don't exist but we never know.
|
||||
* Ksoftirqd is not concerned because idle time is flushed on context
|
||||
* switch. Softirqs in the end of hardirqs are also not a problem because
|
||||
* the idle time is flushed on hardirq time already.
|
||||
*/
|
||||
vtime_account(tsk);
|
||||
vtime_account_irq_enter(tsk);
|
||||
irqtime_account_irq(tsk);
|
||||
}
|
||||
|
||||
static inline void vtime_account_irq_exit(struct task_struct *tsk)
|
||||
static inline void account_irq_exit_time(struct task_struct *tsk)
|
||||
{
|
||||
/* On hard|softirq exit we always account to hard|softirq cputime */
|
||||
vtime_account_system(tsk);
|
||||
vtime_account_irq_exit(tsk);
|
||||
irqtime_account_irq(tsk);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user