mirror of
https://github.com/torvalds/linux.git
synced 2024-12-26 21:02:19 +00:00
1fb497dd00
Running posix CPU timers in hard interrupt context has a few downsides: - For PREEMPT_RT it cannot work as the expiry code needs to take sighand lock, which is a 'sleeping spinlock' in RT. The original RT approach of offloading the posix CPU timer handling into a high priority thread was clumsy and provided no real benefit in general. - For fine grained accounting it's just wrong to run this in context of the timer interrupt because that way a process specific CPU time is accounted to the timer interrupt. - Long running timer interrupts caused by a large amount of expiring timers which can be created and armed by unpriviledged user space. There is no hard requirement to expire them in interrupt context. If the signal is targeted at the task itself then it won't be delivered before the task returns to user space anyway. If the signal is targeted at a supervisor process then it might be slightly delayed, but posix CPU timers are inaccurate anyway due to the fact that they are tied to the tick. Provide infrastructure to schedule task work which allows splitting the posix CPU timer code into a quick check in interrupt context and a thread context expiry and signal delivery function. This has to be enabled by architectures as it requires that the architecture specific KVM implementation handles pending task work before exiting to guest mode. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20200730102337.783470146@linutronix.de
178 lines
5.0 KiB
Plaintext
178 lines
5.0 KiB
Plaintext
# SPDX-License-Identifier: GPL-2.0-only
|
|
#
|
|
# Timer subsystem related configuration options
|
|
#
|
|
|
|
# Options selectable by arch Kconfig
|
|
|
|
# Watchdog function for clocksources to detect instabilities
|
|
config CLOCKSOURCE_WATCHDOG
|
|
bool
|
|
|
|
# Architecture has extra clocksource data
|
|
config ARCH_CLOCKSOURCE_DATA
|
|
bool
|
|
|
|
# Architecture has extra clocksource init called from registration
|
|
config ARCH_CLOCKSOURCE_INIT
|
|
bool
|
|
|
|
# Clocksources require validation of the clocksource against the last
|
|
# cycle update - x86/TSC misfeature
|
|
config CLOCKSOURCE_VALIDATE_LAST_CYCLE
|
|
bool
|
|
|
|
# Timekeeping vsyscall support
|
|
config GENERIC_TIME_VSYSCALL
|
|
bool
|
|
|
|
# Old style timekeeping
|
|
config ARCH_USES_GETTIMEOFFSET
|
|
bool
|
|
|
|
# The generic clock events infrastructure
|
|
config GENERIC_CLOCKEVENTS
|
|
bool
|
|
|
|
# Architecture can handle broadcast in a driver-agnostic way
|
|
config ARCH_HAS_TICK_BROADCAST
|
|
bool
|
|
|
|
# Clockevents broadcasting infrastructure
|
|
config GENERIC_CLOCKEVENTS_BROADCAST
|
|
bool
|
|
depends on GENERIC_CLOCKEVENTS
|
|
|
|
# Automatically adjust the min. reprogramming time for
|
|
# clock event device
|
|
config GENERIC_CLOCKEVENTS_MIN_ADJUST
|
|
bool
|
|
|
|
# Generic update of CMOS clock
|
|
config GENERIC_CMOS_UPDATE
|
|
bool
|
|
|
|
# Select to handle posix CPU timers from task_work
|
|
# and not from the timer interrupt context
|
|
config HAVE_POSIX_CPU_TIMERS_TASK_WORK
|
|
bool
|
|
|
|
config POSIX_CPU_TIMERS_TASK_WORK
|
|
bool
|
|
default y if POSIX_TIMERS && HAVE_POSIX_CPU_TIMERS_TASK_WORK
|
|
|
|
if GENERIC_CLOCKEVENTS
|
|
menu "Timers subsystem"
|
|
|
|
# Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is
|
|
# only related to the tick functionality. Oneshot clockevent devices
|
|
# are supported independent of this.
|
|
config TICK_ONESHOT
|
|
bool
|
|
|
|
config NO_HZ_COMMON
|
|
bool
|
|
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
|
|
select TICK_ONESHOT
|
|
|
|
choice
|
|
prompt "Timer tick handling"
|
|
default NO_HZ_IDLE if NO_HZ
|
|
|
|
config HZ_PERIODIC
|
|
bool "Periodic timer ticks (constant rate, no dynticks)"
|
|
help
|
|
This option keeps the tick running periodically at a constant
|
|
rate, even when the CPU doesn't need it.
|
|
|
|
config NO_HZ_IDLE
|
|
bool "Idle dynticks system (tickless idle)"
|
|
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
|
|
select NO_HZ_COMMON
|
|
help
|
|
This option enables a tickless idle system: timer interrupts
|
|
will only trigger on an as-needed basis when the system is idle.
|
|
This is usually interesting for energy saving.
|
|
|
|
Most of the time you want to say Y here.
|
|
|
|
config NO_HZ_FULL
|
|
bool "Full dynticks system (tickless)"
|
|
# NO_HZ_COMMON dependency
|
|
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
|
|
# We need at least one periodic CPU for timekeeping
|
|
depends on SMP
|
|
depends on HAVE_CONTEXT_TRACKING
|
|
# VIRT_CPU_ACCOUNTING_GEN dependency
|
|
depends on HAVE_VIRT_CPU_ACCOUNTING_GEN
|
|
select NO_HZ_COMMON
|
|
select RCU_NOCB_CPU
|
|
select VIRT_CPU_ACCOUNTING_GEN
|
|
select IRQ_WORK
|
|
select CPU_ISOLATION
|
|
help
|
|
Adaptively try to shutdown the tick whenever possible, even when
|
|
the CPU is running tasks. Typically this requires running a single
|
|
task on the CPU. Chances for running tickless are maximized when
|
|
the task mostly runs in userspace and has few kernel activity.
|
|
|
|
You need to fill up the nohz_full boot parameter with the
|
|
desired range of dynticks CPUs.
|
|
|
|
This is implemented at the expense of some overhead in user <-> kernel
|
|
transitions: syscalls, exceptions and interrupts. Even when it's
|
|
dynamically off.
|
|
|
|
Say N.
|
|
|
|
endchoice
|
|
|
|
config CONTEXT_TRACKING
|
|
bool
|
|
|
|
config CONTEXT_TRACKING_FORCE
|
|
bool "Force context tracking"
|
|
depends on CONTEXT_TRACKING
|
|
default y if !NO_HZ_FULL
|
|
help
|
|
The major pre-requirement for full dynticks to work is to
|
|
support the context tracking subsystem. But there are also
|
|
other dependencies to provide in order to make the full
|
|
dynticks working.
|
|
|
|
This option stands for testing when an arch implements the
|
|
context tracking backend but doesn't yet fullfill all the
|
|
requirements to make the full dynticks feature working.
|
|
Without the full dynticks, there is no way to test the support
|
|
for context tracking and the subsystems that rely on it: RCU
|
|
userspace extended quiescent state and tickless cputime
|
|
accounting. This option copes with the absence of the full
|
|
dynticks subsystem by forcing the context tracking on all
|
|
CPUs in the system.
|
|
|
|
Say Y only if you're working on the development of an
|
|
architecture backend for the context tracking.
|
|
|
|
Say N otherwise, this option brings an overhead that you
|
|
don't want in production.
|
|
|
|
config NO_HZ
|
|
bool "Old Idle dynticks config"
|
|
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
|
|
help
|
|
This is the old config entry that enables dynticks idle.
|
|
We keep it around for a little while to enforce backward
|
|
compatibility with older config files.
|
|
|
|
config HIGH_RES_TIMERS
|
|
bool "High Resolution Timer Support"
|
|
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
|
|
select TICK_ONESHOT
|
|
help
|
|
This option enables high resolution timer support. If your
|
|
hardware is not capable then this option only increases
|
|
the size of the kernel image.
|
|
|
|
endmenu
|
|
endif
|